Coverage Report

Created: 2025-10-31 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/quick-xml-0.38.3/src/reader/mod.rs
Line
Count
Source
1
//! Contains high-level interface for a pull-based XML parser.
2
3
#[cfg(feature = "encoding")]
4
use encoding_rs::Encoding;
5
use std::io;
6
use std::ops::Range;
7
8
use crate::encoding::Decoder;
9
use crate::errors::{Error, IllFormedError, SyntaxError};
10
use crate::events::{BytesRef, Event};
11
use crate::parser::{ElementParser, Parser, PiParser};
12
use crate::reader::state::ReaderState;
13
14
/// A struct that holds a parser configuration.
15
///
16
/// Current parser configuration can be retrieved by calling [`Reader::config()`]
17
/// and changed by changing properties of the object returned by a call to
18
/// [`Reader::config_mut()`].
19
///
20
/// [`Reader::config()`]: crate::reader::Reader::config
21
/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut
22
#[derive(Debug, Clone, PartialEq, Eq)]
23
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
24
#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
25
#[non_exhaustive]
26
pub struct Config {
27
    /// Whether lone ampersand character (without a paired semicolon) should be
28
    /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
29
    /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
30
    ///
31
    /// Default: `false`
32
    ///
33
    /// # Example
34
    ///
35
    /// ```
36
    /// # use quick_xml::events::{BytesRef, BytesText, Event};
37
    /// # use quick_xml::reader::Reader;
38
    /// # use pretty_assertions::assert_eq;
39
    /// let mut reader = Reader::from_str("text with & & & alone");
40
    /// reader.config_mut().allow_dangling_amp = true;
41
    ///
42
    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
43
    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
44
    /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
45
    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
46
    /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
47
    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
48
    /// ```
49
    ///
50
    /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
51
    pub allow_dangling_amp: bool,
52
53
    /// Whether unmatched closing tag names should be allowed. Unless enabled,
54
    /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
55
    /// is returned from read methods.
56
    ///
57
    /// When set to `true`, it won't check if a closing tag has a corresponding
58
    /// opening tag at all. For example, `<a></a></b>` will be permitted.
59
    ///
60
    /// Note that the emitted [`End`] event will not be modified if this is enabled,
61
    /// ie. it will contain the data of the unmatched end tag.
62
    ///
63
    /// Note, that setting this to `true` will lead to additional allocates that
64
    /// needed to store tag name for an [`End`] event.
65
    ///
66
    /// Default: `false`
67
    ///
68
    /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag
69
    /// [`End`]: crate::events::Event::End
70
    pub allow_unmatched_ends: bool,
71
72
    /// Whether comments should be validated. If enabled, in case of invalid comment
73
    /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods.
74
    ///
75
    /// When set to `true`, every [`Comment`] event will be checked for not
76
    /// containing `--`, which [is not allowed] in XML comments. Most of the time
77
    /// we don't want comments at all so we don't really care about comment
78
    /// correctness, thus the default value is `false` to improve performance.
79
    ///
80
    /// Default: `false`
81
    ///
82
    /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment
83
    /// [`Comment`]: crate::events::Event::Comment
84
    /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments
85
    pub check_comments: bool,
86
87
    /// Whether mismatched closing tag names should be detected. If enabled, in
88
    /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from
89
    /// read methods.
90
    ///
91
    /// Note, that start and end tags [should match literally][spec], they cannot
92
    /// have different prefixes even if both prefixes resolve to the same namespace.
93
    /// The XML
94
    ///
95
    /// ```xml
96
    /// <outer xmlns="namespace" xmlns:p="namespace">
97
    /// </p:outer>
98
    /// ```
99
    ///
100
    /// is not valid, even though semantically the start tag is the same as the
101
    /// end tag. The reason is that namespaces are an extension of the original
102
    /// XML specification (without namespaces) and it should be backward-compatible.
103
    ///
104
    /// When set to `false`, it won't check if a closing tag matches the corresponding
105
    /// opening tag. For example, `<mytag></different_tag>` will be permitted.
106
    ///
107
    /// If the XML is known to be sane (already processed, etc.) this saves extra time.
108
    ///
109
    /// Note that the emitted [`End`] event will not be modified if this is disabled,
110
    /// ie. it will contain the data of the mismatched end tag.
111
    ///
112
    /// Note, that setting this to `true` will lead to additional allocates that
113
    /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
114
    /// is also set, only one additional allocation will be performed that support
115
    /// both these options.
116
    ///
117
    /// Default: `true`
118
    ///
119
    /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag
120
    /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
121
    /// [`End`]: crate::events::Event::End
122
    /// [`expand_empty_elements`]: Self::expand_empty_elements
123
    pub check_end_names: bool,
124
125
    /// Whether empty elements should be split into an `Open` and a `Close` event.
126
    ///
127
    /// When set to `true`, all [`Empty`] events produced by a self-closing tag
128
    /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`]
129
    /// event. When set to `false` (the default), those tags are represented by
130
    /// an [`Empty`] event instead.
131
    ///
132
    /// Note, that setting this to `true` will lead to additional allocates that
133
    /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
134
    /// is also set, only one additional allocation will be performed that support
135
    /// both these options.
136
    ///
137
    /// Default: `false`
138
    ///
139
    /// [`Empty`]: crate::events::Event::Empty
140
    /// [`Start`]: crate::events::Event::Start
141
    /// [`End`]: crate::events::Event::End
142
    /// [`check_end_names`]: Self::check_end_names
143
    pub expand_empty_elements: bool,
144
145
    /// Whether trailing whitespace after the markup name are trimmed in closing
146
    /// tags `</a >`.
147
    ///
148
    /// If `true` the emitted [`End`] event is stripped of trailing whitespace
149
    /// after the markup name.
150
    ///
151
    /// Note that if set to `false` and [`check_end_names`] is `true` the comparison
152
    /// of markup names is going to fail erroneously if a closing tag contains
153
    /// trailing whitespace.
154
    ///
155
    /// Default: `true`
156
    ///
157
    /// [`End`]: crate::events::Event::End
158
    /// [`check_end_names`]: Self::check_end_names
159
    pub trim_markup_names_in_closing_tags: bool,
160
161
    /// Whether whitespace before character data should be removed.
162
    ///
163
    /// When set to `true`, leading whitespace is trimmed in [`Text`] events.
164
    /// If after that the event is empty it will not be pushed.
165
    ///
166
    /// Default: `false`
167
    ///
168
    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
169
    ///
170
    /// WARNING: With this option every text events will be trimmed which is
171
    /// incorrect behavior when text events delimited by comments, processing
172
    /// instructions or CDATA sections. To correctly trim data manually apply
173
    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
174
    /// only to necessary events.
175
    /// </div>
176
    ///
177
    /// [`Text`]: crate::events::Event::Text
178
    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
179
    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
180
    pub trim_text_start: bool,
181
182
    /// Whether whitespace after character data should be removed.
183
    ///
184
    /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
185
    /// If after that the event is empty it will not be pushed.
186
    ///
187
    /// Default: `false`
188
    ///
189
    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
190
    ///
191
    /// WARNING: With this option every text events will be trimmed which is
192
    /// incorrect behavior when text events delimited by comments, processing
193
    /// instructions or CDATA sections. To correctly trim data manually apply
194
    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
195
    /// only to necessary events.
196
    /// </div>
197
    ///
198
    /// [`Text`]: crate::events::Event::Text
199
    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
200
    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
201
    pub trim_text_end: bool,
202
}
203
204
impl Config {
205
    /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value.
206
    ///
207
    /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
208
    ///
209
    /// WARNING: With this option every text events will be trimmed which is
210
    /// incorrect behavior when text events delimited by comments, processing
211
    /// instructions or CDATA sections. To correctly trim data manually apply
212
    /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
213
    /// only to necessary events.
214
    /// </div>
215
    ///
216
    /// [`trim_text_start`]: Self::trim_text_start
217
    /// [`trim_text_end`]: Self::trim_text_end
218
    /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
219
    /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
220
    #[inline]
221
0
    pub fn trim_text(&mut self, trim: bool) {
222
0
        self.trim_text_start = trim;
223
0
        self.trim_text_end = trim;
224
0
    }
225
226
    /// Turn on or off all checks for well-formedness. Currently it is that settings:
227
    /// - [`check_comments`](Self::check_comments)
228
    /// - [`check_end_names`](Self::check_end_names)
229
    #[inline]
230
0
    pub fn enable_all_checks(&mut self, enable: bool) {
231
0
        self.check_comments = enable;
232
0
        self.check_end_names = enable;
233
0
    }
234
}
235
236
impl Default for Config {
237
0
    fn default() -> Self {
238
0
        Self {
239
0
            allow_dangling_amp: false,
240
0
            allow_unmatched_ends: false,
241
0
            check_comments: false,
242
0
            check_end_names: true,
243
0
            expand_empty_elements: false,
244
0
            trim_markup_names_in_closing_tags: true,
245
0
            trim_text_start: false,
246
0
            trim_text_end: false,
247
0
        }
248
0
    }
249
}
250
251
////////////////////////////////////////////////////////////////////////////////////////////////////
252
253
macro_rules! read_event_impl {
254
    (
255
        $self:ident, $buf:ident,
256
        $reader:expr,
257
        $read_until_close:ident
258
        $(, $await:ident)?
259
    ) => {{
260
        let event = loop {
261
            break match $self.state.state {
262
                ParseState::Init => { // Go to InsideText state
263
                    // If encoding set explicitly, we not need to detect it. For example,
264
                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
265
                    // But we still need to remove BOM for consistency with no encoding
266
                    // feature enabled path
267
                    #[cfg(feature = "encoding")]
268
                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
269
                        if $self.state.encoding.can_be_refined() {
270
                            $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
271
                        }
272
                    }
273
274
                    // Removes UTF-8 BOM if it is present
275
                    #[cfg(not(feature = "encoding"))]
276
                    $reader.remove_utf8_bom() $(.$await)? ?;
277
278
                    $self.state.state = ParseState::InsideText;
279
                    continue;
280
                },
281
                ParseState::InsideRef => { // Go to InsideText
282
                    let start = $self.state.offset;
283
                    match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
284
                        // Emit reference, go to InsideText state
285
                        ReadRefResult::Ref(bytes) => {
286
                            $self.state.state = ParseState::InsideText;
287
                            // +1 to skip start `&`
288
                            Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder())))
289
                        }
290
                        // Go to Done state
291
                        ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
292
                            $self.state.state = ParseState::Done;
293
                            Ok(Event::Text($self.state.emit_text(bytes)))
294
                        }
295
                        ReadRefResult::UpToEof(_) => {
296
                            $self.state.state = ParseState::Done;
297
                            $self.state.last_error_offset = start;
298
                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
299
                        }
300
                        // Do not change state, stay in InsideRef
301
                        ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
302
                            Ok(Event::Text($self.state.emit_text(bytes)))
303
                        }
304
                        ReadRefResult::UpToRef(_) => {
305
                            $self.state.last_error_offset = start;
306
                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
307
                        }
308
                        // Go to InsideMarkup state
309
                        ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
310
                            $self.state.state = ParseState::InsideMarkup;
311
                            Ok(Event::Text($self.state.emit_text(bytes)))
312
                        }
313
                        ReadRefResult::UpToMarkup(_) => {
314
                            $self.state.state = ParseState::InsideMarkup;
315
                            $self.state.last_error_offset = start;
316
                            Err(Error::IllFormed(IllFormedError::UnclosedReference))
317
                        }
318
                        ReadRefResult::Err(e) => Err(Error::Io(e.into())),
319
                    }
320
                }
321
                ParseState::InsideText => { // Go to InsideMarkup or Done state
322
                    if $self.state.config.trim_text_start {
323
                        $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
324
                    }
325
326
                    match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
327
                        ReadTextResult::Markup(buf) => {
328
                            $self.state.state = ParseState::InsideMarkup;
329
                            // Pass `buf` to the next next iteration of parsing loop
330
                            $buf = buf;
331
                            continue;
332
                        }
333
                        ReadTextResult::Ref(buf) => {
334
                            $self.state.state = ParseState::InsideRef;
335
                            // Pass `buf` to the next next iteration of parsing loop
336
                            $buf = buf;
337
                            continue;
338
                        }
339
                        ReadTextResult::UpToMarkup(bytes) => {
340
                            $self.state.state = ParseState::InsideMarkup;
341
                            // FIXME: Can produce an empty event if:
342
                            // - event contains only spaces
343
                            // - trim_text_start = false
344
                            // - trim_text_end = true
345
                            Ok(Event::Text($self.state.emit_text(bytes)))
346
                        }
347
                        ReadTextResult::UpToRef(bytes) => {
348
                            $self.state.state = ParseState::InsideRef;
349
                            // Return Text event with `bytes` content or Eof if bytes is empty
350
                            Ok(Event::Text($self.state.emit_text(bytes)))
351
                        }
352
                        ReadTextResult::UpToEof(bytes) => {
353
                            $self.state.state = ParseState::Done;
354
                            // Trim bytes from end if required
355
                            let event = $self.state.emit_text(bytes);
356
                            if event.is_empty() {
357
                                Ok(Event::Eof)
358
                            } else {
359
                                Ok(Event::Text(event))
360
                            }
361
                        }
362
                        ReadTextResult::Err(e) => Err(Error::Io(e.into())),
363
                    }
364
                },
365
                // Go to InsideText state in next two arms
366
                ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
367
                ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
368
                ParseState::Done => Ok(Event::Eof),
369
            };
370
        };
371
        match event {
372
            // #513: In case of ill-formed errors we already consume the wrong data
373
            // and change the state. We can continue parsing if we wish
374
            Err(Error::IllFormed(_)) => {}
375
            Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
376
            _ => {}
377
        }
378
        event
379
    }};
380
}
381
382
/// Read bytes up to the `>` and skip it. This method is expected to be called
383
/// after seeing the `<` symbol and skipping it. Inspects the next (current)
384
/// symbol and returns an appropriate [`Event`]:
385
///
386
/// |Symbol |Event
387
/// |-------|-------------------------------------
388
/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
389
/// |`/`    |[`End`]
390
/// |`?`    |[`PI`]
391
/// |_other_|[`Start`] or [`Empty`]
392
///
393
/// Moves parser to the `InsideText` state.
394
///
395
/// [`Comment`]: Event::Comment
396
/// [`CData`]: Event::CData
397
/// [`DocType`]: Event::DocType
398
/// [`End`]: Event::End
399
/// [`PI`]: Event::PI
400
/// [`Start`]: Event::Start
401
/// [`Empty`]: Event::Empty
402
macro_rules! read_until_close {
403
    (
404
        $self:ident, $buf:ident,
405
        $reader:expr
406
        $(, $await:ident)?
407
    ) => {{
408
        $self.state.state = ParseState::InsideText;
409
410
        let start = $self.state.offset;
411
        match $reader.peek_one() $(.$await)? {
412
            // `<!` - comment, CDATA or DOCTYPE declaration
413
            Ok(Some(b'!')) => match $reader
414
                .read_bang_element($buf, &mut $self.state.offset)
415
                $(.$await)?
416
            {
417
                Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
418
                Err(e) => {
419
                    // We want to report error at `<`, but offset was increased,
420
                    // so return it back (-1 for `<`)
421
                    $self.state.last_error_offset = start - 1;
422
                    Err(e)
423
                }
424
            },
425
            // `</` - closing tag
426
            // #776: We parse using ElementParser which allows us to have attributes
427
            // in close tags. While such tags are not allowed by the specification,
428
            // we anyway allow to parse them because:
429
            // - we do not check constraints during parsing. This is performed by the
430
            //   optional validate step which user should call manually
431
            // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
432
            //   `</tag attr=">` and text `" >` which probably no one existing parser
433
            //   does. This is malformed XML, however it is tolerated by some parsers
434
            //   (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
435
            Ok(Some(b'/')) => match $reader
436
                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
437
                $(.$await)?
438
            {
439
                Ok(bytes) => $self.state.emit_end(bytes),
440
                Err(e) => {
441
                    // We want to report error at `<`, but offset was increased,
442
                    // so return it back (-1 for `<`)
443
                    $self.state.last_error_offset = start - 1;
444
                    Err(e)
445
                }
446
            },
447
            // `<?` - processing instruction
448
            Ok(Some(b'?')) => match $reader
449
                .read_with(PiParser(false), $buf, &mut $self.state.offset)
450
                $(.$await)?
451
            {
452
                Ok(bytes) => $self.state.emit_question_mark(bytes),
453
                Err(e) => {
454
                    // We want to report error at `<`, but offset was increased,
455
                    // so return it back (-1 for `<`)
456
                    $self.state.last_error_offset = start - 1;
457
                    Err(e)
458
                }
459
            },
460
            // `<...` - opening or self-closed tag
461
            Ok(Some(_)) => match $reader
462
                .read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
463
                $(.$await)?
464
            {
465
                Ok(bytes) => Ok($self.state.emit_start(bytes)),
466
                Err(e) => {
467
                    // We want to report error at `<`, but offset was increased,
468
                    // so return it back (-1 for `<`)
469
                    $self.state.last_error_offset = start - 1;
470
                    Err(e)
471
                }
472
            },
473
            // `<` - syntax error, tag not closed
474
            Ok(None) => {
475
                // We want to report error at `<`, but offset was increased,
476
                // so return it back (-1 for `<`)
477
                $self.state.last_error_offset = start - 1;
478
                Err(Error::Syntax(SyntaxError::UnclosedTag))
479
            }
480
            Err(e) => Err(Error::Io(e.into())),
481
        }
482
    }};
483
}
484
485
/// Generalization of `read_to_end` method for buffered and borrowed readers
486
macro_rules! read_to_end {
487
    (
488
        // $self: &mut Reader
489
        $self:expr, $end:expr, $buf:expr,
490
        $read_event:ident,
491
        // Code block that performs clearing of internal buffer after read of each event
492
        $clear:block
493
        $(, $await:ident)?
494
    ) => {{
495
        // Because we take position after the event before the End event,
496
        // it is important that this position indicates beginning of the End event.
497
        // If between last event and the End event would be only spaces, then we
498
        // take position before the spaces, but spaces would be skipped without
499
        // generating event if `trim_text_start` is set to `true`. To prevent that
500
        // we temporary disable start text trimming.
501
        //
502
        // We also cannot take position after getting End event, because if
503
        // `trim_markup_names_in_closing_tags` is set to `true` (which is the default),
504
        // we do not known the real size of the End event that it is occupies in
505
        // the source and cannot correct the position after the End event.
506
        // So, we in any case should tweak parser configuration.
507
        let config = $self.config_mut();
508
        let trim = config.trim_text_start;
509
        config.trim_text_start = false;
510
511
        let start = $self.buffer_position();
512
        let mut depth = 0;
513
        loop {
514
            $clear
515
            let end = $self.buffer_position();
516
            match $self.$read_event($buf) $(.$await)? {
517
                Err(e) => {
518
                    $self.config_mut().trim_text_start = trim;
519
                    return Err(e);
520
                }
521
522
                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
523
                Ok(Event::End(e)) if e.name() == $end => {
524
                    if depth == 0 {
525
                        $self.config_mut().trim_text_start = trim;
526
                        break start..end;
527
                    }
528
                    depth -= 1;
529
                }
530
                Ok(Event::Eof) => {
531
                    $self.config_mut().trim_text_start = trim;
532
                    return Err(Error::missed_end($end, $self.decoder()));
533
                }
534
                _ => (),
535
            }
536
        }
537
    }};
538
}
539
540
#[cfg(feature = "async-tokio")]
541
mod async_tokio;
542
mod buffered_reader;
543
mod ns_reader;
544
mod slice_reader;
545
mod state;
546
547
pub use ns_reader::NsReader;
548
549
/// Range of input in bytes, that corresponds to some piece of XML
550
pub type Span = Range<u64>;
551
552
////////////////////////////////////////////////////////////////////////////////////////////////////
553
554
/// Possible reader states. The state transition diagram (`true` and `false` shows
555
/// value of [`Config::expand_empty_elements`] option):
556
///
557
/// ```mermaid
558
/// flowchart LR
559
///   subgraph _
560
///     direction LR
561
///
562
///     Init         -- "(no event)"\n                                       --> InsideMarkup
563
///     InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText
564
///     InsideText   -- "#lt;false#gt;\n(no event)"\nText                    --> InsideMarkup
565
///     InsideRef    -- "(no event)"\nGeneralRef                             --> InsideText
566
///   end
567
///   InsideText     -- "#lt;true#gt;"\nStart --> InsideEmpty
568
///   InsideEmpty    -- End                   --> InsideText
569
///   _ -. Eof .-> Done
570
/// ```
571
#[derive(Clone, Debug)]
572
enum ParseState {
573
    /// Initial state in which reader stay after creation. Transition from that
574
    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
575
    /// state is always `InsideMarkup`. The reader will never return to this state. The
576
    /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the
577
    /// first symbol not `<`, otherwise no event are emitted.
578
    Init,
579
    /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other
580
    /// events could be generated.
581
    ///
582
    /// After generating one event the reader moves to the `ClosedTag` state.
583
    InsideRef,
584
    /// State after seeing the `<` symbol. Depending on the next symbol all other
585
    /// events could be generated.
586
    ///
587
    /// After generating one event the reader moves to the `InsideText` state.
588
    InsideMarkup,
589
    /// State in which reader searches the `<` symbol of a markup. All bytes before
590
    /// that symbol will be returned in the [`Event::Text`] event. After that
591
    /// the reader moves to the `InsideMarkup` state.
592
    InsideText,
593
    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
594
    /// Reader enters to this state when it is in a `InsideText` state and emits an
595
    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
596
    /// after which reader returned to the `InsideText` state.
597
    ///
598
    /// [`expand_empty_elements`]: Config::expand_empty_elements
599
    InsideEmpty,
600
    /// Reader enters this state when `Eof` event generated or an error occurred.
601
    /// This is the last state, the reader stay in it forever.
602
    Done,
603
}
604
605
/// A reference to an encoding together with information about how it was retrieved.
606
///
607
/// The state transition diagram:
608
///
609
/// ```mermaid
610
/// flowchart LR
611
///   Implicit    -- from_str       --> Explicit
612
///   Implicit    -- BOM            --> BomDetected
613
///   Implicit    -- "encoding=..." --> XmlDetected
614
///   BomDetected -- "encoding=..." --> XmlDetected
615
/// ```
616
#[cfg(feature = "encoding")]
617
#[derive(Clone, Copy, Debug)]
618
enum EncodingRef {
619
    /// Encoding was implicitly assumed to have a specified value. It can be refined
620
    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
621
    Implicit(&'static Encoding),
622
    /// Encoding was explicitly set to the desired value. It cannot be changed
623
    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
624
    Explicit(&'static Encoding),
625
    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
626
    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
627
    BomDetected(&'static Encoding),
628
    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
629
    /// It can no longer change
630
    XmlDetected(&'static Encoding),
631
}
632
#[cfg(feature = "encoding")]
633
impl EncodingRef {
634
    #[inline]
635
    const fn encoding(&self) -> &'static Encoding {
636
        match self {
637
            Self::Implicit(e) => e,
638
            Self::Explicit(e) => e,
639
            Self::BomDetected(e) => e,
640
            Self::XmlDetected(e) => e,
641
        }
642
    }
643
    #[inline]
644
    const fn can_be_refined(&self) -> bool {
645
        match self {
646
            Self::Implicit(_) | Self::BomDetected(_) => true,
647
            Self::Explicit(_) | Self::XmlDetected(_) => false,
648
        }
649
    }
650
}
651
652
////////////////////////////////////////////////////////////////////////////////////////////////////
653
654
/// A direct stream to the underlying [`Reader`]s reader which updates
655
/// [`Reader::buffer_position()`] when read from it.
656
#[derive(Debug)]
657
#[must_use = "streams do nothing unless read or polled"]
658
pub struct BinaryStream<'r, R> {
659
    inner: &'r mut R,
660
    offset: &'r mut u64,
661
}
662
663
impl<'r, R> BinaryStream<'r, R> {
664
    /// Returns current position in bytes in the original source.
665
    #[inline]
666
0
    pub const fn offset(&self) -> u64 {
667
0
        *self.offset
668
0
    }
669
670
    /// Gets a reference to the underlying reader.
671
    #[inline]
672
0
    pub const fn get_ref(&self) -> &R {
673
0
        self.inner
674
0
    }
675
676
    /// Gets a mutable reference to the underlying reader.
677
    ///
678
    /// Avoid read from this reader because this will not update reader's position
679
    /// and will lead to incorrect positions of errors. Read from this stream instead.
680
    #[inline]
681
0
    pub fn get_mut(&mut self) -> &mut R {
682
0
        self.inner
683
0
    }
684
}
685
686
impl<'r, R> io::Read for BinaryStream<'r, R>
687
where
688
    R: io::Read,
689
{
690
    #[inline]
691
0
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
692
0
        let amt = self.inner.read(buf)?;
693
0
        *self.offset += amt as u64;
694
0
        Ok(amt)
695
0
    }
696
}
697
698
impl<'r, R> io::BufRead for BinaryStream<'r, R>
699
where
700
    R: io::BufRead,
701
{
702
    #[inline]
703
0
    fn fill_buf(&mut self) -> io::Result<&[u8]> {
704
0
        self.inner.fill_buf()
705
0
    }
706
707
    #[inline]
708
0
    fn consume(&mut self, amt: usize) {
709
0
        self.inner.consume(amt);
710
0
        *self.offset += amt as u64;
711
0
    }
712
}
713
714
////////////////////////////////////////////////////////////////////////////////////////////////////
715
716
/// A low level encoding-agnostic XML event reader.
717
///
718
/// Consumes bytes and streams XML [`Event`]s.
719
///
720
/// This reader does not manage namespace declarations and not able to resolve
721
/// prefixes. If you want these features, use the [`NsReader`].
722
///
723
/// # Examples
724
///
725
/// ```
726
/// use quick_xml::events::Event;
727
/// use quick_xml::reader::Reader;
728
///
729
/// let xml = r#"<tag1 att1 = "test">
730
///                 <tag2><!--Test comment-->Test</tag2>
731
///                 <tag2>Test 2</tag2>
732
///              </tag1>"#;
733
/// let mut reader = Reader::from_str(xml);
734
/// reader.config_mut().trim_text(true);
735
///
736
/// let mut count = 0;
737
/// let mut txt = Vec::new();
738
/// let mut buf = Vec::new();
739
///
740
/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
741
/// loop {
742
///     // NOTE: this is the generic case when we don't know about the input BufRead.
743
///     // when the input is a &str or a &[u8], we don't actually need to use another
744
///     // buffer, we could directly call `reader.read_event()`
745
///     match reader.read_event_into(&mut buf) {
746
///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
747
///         // exits the loop when reaching end of file
748
///         Ok(Event::Eof) => break,
749
///
750
///         Ok(Event::Start(e)) => {
751
///             match e.name().as_ref() {
752
///                 b"tag1" => println!("attributes values: {:?}",
753
///                                     e.attributes().map(|a| a.unwrap().value)
754
///                                     .collect::<Vec<_>>()),
755
///                 b"tag2" => count += 1,
756
///                 _ => (),
757
///             }
758
///         }
759
///         Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()),
760
///
761
///         // There are several other `Event`s we do not consider here
762
///         _ => (),
763
///     }
764
///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
765
///     buf.clear();
766
/// }
767
/// ```
768
///
769
/// [`NsReader`]: crate::reader::NsReader
770
#[derive(Debug, Clone)]
771
pub struct Reader<R> {
772
    /// Source of data for parse
773
    reader: R,
774
    /// Configuration and current parse state
775
    state: ReaderState,
776
}
777
778
/// Builder methods
779
impl<R> Reader<R> {
780
    /// Creates a `Reader` that reads from a given reader.
781
0
    pub fn from_reader(reader: R) -> Self {
782
0
        Self {
783
0
            reader,
784
0
            state: ReaderState::default(),
785
0
        }
786
0
    }
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::from_reader
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::from_reader
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::from_reader
Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::from_reader
787
788
    /// Returns reference to the parser configuration
789
0
    pub const fn config(&self) -> &Config {
790
0
        &self.state.config
791
0
    }
792
793
    /// Returns mutable reference to the parser configuration
794
0
    pub fn config_mut(&mut self) -> &mut Config {
795
0
        &mut self.state.config
796
0
    }
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::config_mut
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::config_mut
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::config_mut
Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::config_mut
797
}
798
799
/// Getters
800
impl<R> Reader<R> {
801
    /// Consumes `Reader` returning the underlying reader
802
    ///
803
    /// Can be used to compute line and column of a parsing error position
804
    ///
805
    /// # Examples
806
    ///
807
    /// ```
808
    /// # use pretty_assertions::assert_eq;
809
    /// use std::{str, io::Cursor};
810
    /// use quick_xml::events::Event;
811
    /// use quick_xml::reader::Reader;
812
    ///
813
    /// let xml = r#"<tag1 att1 = "test">
814
    ///                 <tag2><!--Test comment-->Test</tag2>
815
    ///                 <tag3>Test 2</tag3>
816
    ///              </tag1>"#;
817
    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
818
    /// let mut buf = Vec::new();
819
    ///
820
    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
821
    ///     // We known that size cannot exceed usize::MAX because we created parser from single &[u8]
822
    ///     let end_pos = reader.buffer_position() as usize;
823
    ///     let mut cursor = reader.into_inner();
824
    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
825
    ///         .expect("can't make a string");
826
    ///     let mut line = 1;
827
    ///     let mut column = 0;
828
    ///     for c in s.chars() {
829
    ///         if c == '\n' {
830
    ///             line += 1;
831
    ///             column = 0;
832
    ///         } else {
833
    ///             column += 1;
834
    ///         }
835
    ///     }
836
    ///     (line, column)
837
    /// }
838
    ///
839
    /// loop {
840
    ///     match reader.read_event_into(&mut buf) {
841
    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
842
    ///             b"tag1" | b"tag2" => (),
843
    ///             tag => {
844
    ///                 assert_eq!(b"tag3", tag);
845
    ///                 assert_eq!((3, 22), into_line_and_column(reader));
846
    ///                 break;
847
    ///             }
848
    ///         },
849
    ///         Ok(Event::Eof) => unreachable!(),
850
    ///         _ => (),
851
    ///     }
852
    ///     buf.clear();
853
    /// }
854
    /// ```
855
0
    pub fn into_inner(self) -> R {
856
0
        self.reader
857
0
    }
858
859
    /// Gets a reference to the underlying reader.
860
0
    pub const fn get_ref(&self) -> &R {
861
0
        &self.reader
862
0
    }
863
864
    /// Gets a mutable reference to the underlying reader.
865
    ///
866
    /// Avoid read from this reader because this will not update reader's position
867
    /// and will lead to incorrect positions of errors. If you want to read, use
868
    /// [`stream()`] instead.
869
    ///
870
    /// [`stream()`]: Self::stream
871
0
    pub fn get_mut(&mut self) -> &mut R {
872
0
        &mut self.reader
873
0
    }
874
875
    /// Gets the byte position in the input data just after the last emitted event
876
    /// (i.e. this is position where data of last event ends).
877
    ///
878
    /// Note, that for text events which is originally ended with whitespace characters
879
    /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position
880
    /// before trim, not the position of the last byte of the [`Event::Text`] content.
881
0
    pub const fn buffer_position(&self) -> u64 {
882
        // when internal state is InsideMarkup, we have actually read until '<',
883
        // which we don't want to show
884
0
        if let ParseState::InsideMarkup = self.state.state {
885
0
            self.state.offset - 1
886
        } else {
887
0
            self.state.offset
888
        }
889
0
    }
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::buffer_position
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::buffer_position
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::buffer_position
Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::buffer_position
890
891
    /// Gets the last error byte position in the input data. If there is no errors
892
    /// yet, returns `0`.
893
    ///
894
    /// Unlike `buffer_position` it will point to the place where it is rational
895
    /// to report error to the end user. For example, all [`SyntaxError`]s are
896
    /// reported when the parser sees EOF inside of some kind of markup. The
897
    /// `buffer_position()` will point to the last byte of input which is not
898
    /// very useful. `error_position()` will point to the start of corresponding
899
    /// markup element (i. e. to the `<` character).
900
    ///
901
    /// This position is always `<= buffer_position()`.
902
0
    pub const fn error_position(&self) -> u64 {
903
0
        self.state.last_error_offset
904
0
    }
905
906
    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
907
    ///
908
    /// If [`encoding`] feature is enabled, the used encoding may change after
909
    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
910
    ///
911
    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
912
    /// defaults to UTF-8.
913
    ///
914
    /// [`encoding`]: ../index.html#encoding
915
    #[inline]
916
0
    pub const fn decoder(&self) -> Decoder {
917
0
        self.state.decoder()
918
0
    }
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::decoder
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::decoder
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::decoder
Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::decoder
919
920
    /// Get the direct access to the underlying reader, but tracks the amount of
921
    /// read data and update [`Reader::buffer_position()`] accordingly.
922
    ///
923
    /// Note, that this method gives you access to the internal reader and read
924
    /// data will not be returned in any subsequent events read by `read_event`
925
    /// family of methods.
926
    ///
927
    /// # Example
928
    ///
929
    /// This example demonstrates how to read stream raw bytes from an XML document.
930
    /// This could be used to implement streaming read of text, or to read raw binary
931
    /// bytes embedded in an XML document. (Documents with embedded raw bytes are not
932
    /// valid XML, but XML-derived file formats exist where such documents are valid).
933
    ///
934
    /// ```
935
    /// # use pretty_assertions::assert_eq;
936
    /// use std::io::{BufRead, Read};
937
    /// use quick_xml::events::{BytesEnd, BytesStart, Event};
938
    /// use quick_xml::reader::Reader;
939
    ///
940
    /// let mut reader = Reader::from_str("<tag>binary << data&></tag>");
941
    /// //                                 ^    ^               ^     ^
942
    /// //                                 0    5              21    27
943
    ///
944
    /// assert_eq!(
945
    ///     (reader.read_event().unwrap(), reader.buffer_position()),
946
    ///     // 5 - end of the `<tag>`
947
    ///     (Event::Start(BytesStart::new("tag")), 5)
948
    /// );
949
    ///
950
    /// // Reading directly from underlying reader will not update position
951
    /// // let mut inner = reader.get_mut();
952
    ///
953
    /// // Reading from the stream() advances position
954
    /// let mut inner = reader.stream();
955
    ///
956
    /// // Read binary data. We must know its size
957
    /// let mut binary = [0u8; 16];
958
    /// inner.read_exact(&mut binary).unwrap();
959
    /// assert_eq!(&binary, b"binary << data&>");
960
    /// // 21 - end of the `binary << data&>`
961
    /// assert_eq!(inner.offset(), 21);
962
    /// assert_eq!(reader.buffer_position(), 21);
963
    ///
964
    /// assert_eq!(
965
    ///     (reader.read_event().unwrap(), reader.buffer_position()),
966
    ///     // 27 - end of the `</tag>`
967
    ///     (Event::End(BytesEnd::new("tag")), 27)
968
    /// );
969
    ///
970
    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
971
    /// ```
972
    #[inline]
973
0
    pub fn stream(&mut self) -> BinaryStream<'_, R> {
974
0
        BinaryStream {
975
0
            inner: &mut self.reader,
976
0
            offset: &mut self.state.offset,
977
0
        }
978
0
    }
979
}
980
981
/// Private sync reading methods
982
impl<R> Reader<R> {
983
    /// Read text into the given buffer, and return an event that borrows from
984
    /// either that buffer or from the input itself, based on the type of the
985
    /// reader.
986
0
    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
987
0
    where
988
0
        R: XmlSource<'i, B>,
989
    {
990
0
        read_event_impl!(self, buf, self.reader, read_until_close)
991
0
    }
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::read_event_impl::<&mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::read_event_impl::<&mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::read_event_impl::<&mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::read_event_impl::<()>
992
993
    /// Private function to read until `>` is found. This function expects that
994
    /// it was called just after encounter a `<` symbol.
995
0
    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
996
0
    where
997
0
        R: XmlSource<'i, B>,
998
    {
999
0
        read_until_close!(self, buf, self.reader)
1000
0
    }
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::read_until_close::<&mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::read_until_close::<&mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::read_until_close::<&mut alloc::vec::Vec<u8>>
Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::read_until_close::<()>
1001
}
1002
1003
////////////////////////////////////////////////////////////////////////////////////////////////////
1004
1005
/// Result of an attempt to read XML textual data from the source.
1006
#[derive(Debug)]
1007
enum ReadTextResult<'r, B> {
1008
    /// Start of markup (`<` character) was found in the first byte. `<` was consumed.
1009
    /// Contains buffer that should be returned back to the next iteration cycle
1010
    /// to satisfy borrow checker requirements.
1011
    Markup(B),
1012
    /// Start of reference (`&` character) was found in the first byte.
1013
    /// `&` was not consumed.
1014
    /// Contains buffer that should be returned back to the next iteration cycle
1015
    /// to satisfy borrow checker requirements.
1016
    Ref(B),
1017
    /// Contains text block up to start of markup (`<` character). `<` was consumed.
1018
    UpToMarkup(&'r [u8]),
1019
    /// Contains text block up to start of reference (`&` character).
1020
    /// `&` was not consumed.
1021
    UpToRef(&'r [u8]),
1022
    /// Contains text block up to EOF, neither start of markup (`<` character)
1023
    /// or start of reference (`&` character) was found.
1024
    UpToEof(&'r [u8]),
1025
    /// IO error occurred.
1026
    Err(io::Error),
1027
}
1028
1029
/// Result of an attempt to read general reference from the reader.
1030
#[derive(Debug)]
1031
enum ReadRefResult<'r> {
1032
    /// Contains text block up to end of reference (`;` character).
1033
    /// Result includes start `&`, but not end `;`.
1034
    Ref(&'r [u8]),
1035
    /// Contains text block up to EOF. Neither end of reference (`;`), start of
1036
    /// another reference (`&`) or start of markup (`<`) characters was found.
1037
    /// Result includes start `&`.
1038
    UpToEof(&'r [u8]),
1039
    /// Contains text block up to next possible reference (`&` character).
1040
    /// Result includes start `&`.
1041
    UpToRef(&'r [u8]),
1042
    /// Contains text block up to start of markup (`<` character).
1043
    /// Result includes start `&`.
1044
    UpToMarkup(&'r [u8]),
1045
    /// IO error occurred.
1046
    Err(io::Error),
1047
}
1048
1049
/// Represents an input for a reader that can return borrowed data.
1050
///
1051
/// There are two implementors of this trait: generic one that read data from
1052
/// `Self`, copies some part of it into a provided buffer of type `B` and then
1053
/// returns data that borrow from that buffer.
1054
///
1055
/// The other implementor is for `&[u8]` and instead of copying data returns
1056
/// borrowed data from `Self` instead. This implementation allows zero-copy
1057
/// deserialization.
1058
///
1059
/// # Parameters
1060
/// - `'r`: lifetime of a buffer from which events will borrow
1061
/// - `B`: a type of a buffer that can be used to store data read from `Self` and
1062
///   from which events can borrow
1063
trait XmlSource<'r, B> {
1064
    /// Removes UTF-8 BOM if it is present
1065
    #[cfg(not(feature = "encoding"))]
1066
    fn remove_utf8_bom(&mut self) -> io::Result<()>;
1067
1068
    /// Determines encoding from the start of input and removes BOM if it is present
1069
    #[cfg(feature = "encoding")]
1070
    fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
1071
1072
    /// Read input until start of markup (the `<`) is found, start of general entity
1073
    /// reference (the `&`) is found or end of input is reached.
1074
    ///
1075
    /// # Parameters
1076
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1077
    ///   from which [events] could borrow their data
1078
    /// - `position`: Will be increased by amount of bytes consumed
1079
    ///
1080
    /// [events]: crate::events::Event
1081
    fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
1082
1083
    /// Read input until end of general reference (the `;`) is found, start of
1084
    /// another general reference (the `&`) is found or end of input is reached.
1085
    ///
1086
    /// This method must be called when current character is `&`.
1087
    ///
1088
    /// # Parameters
1089
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1090
    ///   from which [events] could borrow their data
1091
    /// - `position`: Will be increased by amount of bytes consumed
1092
    ///
1093
    /// [events]: crate::events::Event
1094
    fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
1095
1096
    /// Read input until processing instruction is finished.
1097
    ///
1098
    /// This method expect that start sequence of a parser already was read.
1099
    ///
1100
    /// Returns a slice of data read up to the end of the thing being parsed.
1101
    /// The end of thing and the returned content is determined by the used parser.
1102
    ///
1103
    /// If input (`Self`) is exhausted and no bytes was read, or if the specified
1104
    /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
1105
    ///
1106
    /// # Parameters
1107
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1108
    ///   from which [events] could borrow their data
1109
    /// - `position`: Will be increased by amount of bytes consumed
1110
    ///
1111
    /// A `P` type parameter is used to preserve state between calls to the underlying
1112
    /// reader which provides bytes fed into the parser.
1113
    ///
1114
    /// [events]: crate::events::Event
1115
    fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
1116
    where
1117
        P: Parser;
1118
1119
    /// Read input until comment or CDATA is finished.
1120
    ///
1121
    /// This method expect that `<` already was read.
1122
    ///
1123
    /// Returns a slice of data read up to end of comment or CDATA (`>`),
1124
    /// which does not include into result.
1125
    ///
1126
    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
1127
    ///
1128
    /// # Parameters
1129
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
1130
    ///   from which [events] could borrow their data
1131
    /// - `position`: Will be increased by amount of bytes consumed
1132
    ///
1133
    /// [events]: crate::events::Event
1134
    fn read_bang_element(
1135
        &mut self,
1136
        buf: B,
1137
        position: &mut u64,
1138
    ) -> Result<(BangType, &'r [u8]), Error>;
1139
1140
    /// Consume and discard all the whitespace until the next non-whitespace
1141
    /// character or EOF.
1142
    ///
1143
    /// # Parameters
1144
    /// - `position`: Will be increased by amount of bytes consumed
1145
    fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
1146
1147
    /// Return one character without consuming it, so that future `read_*` calls
1148
    /// will still include it. On EOF, return `None`.
1149
    fn peek_one(&mut self) -> io::Result<Option<u8>>;
1150
}
1151
1152
/// Possible elements started with `<!`
1153
#[derive(Debug, PartialEq)]
1154
enum BangType {
1155
    /// <![CDATA[...]]>
1156
    CData,
1157
    /// <!--...-->
1158
    Comment,
1159
    /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1)
1160
    DocType(i32),
1161
}
1162
impl BangType {
1163
    #[inline(always)]
1164
0
    const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
1165
0
        Ok(match byte {
1166
0
            Some(b'[') => Self::CData,
1167
0
            Some(b'-') => Self::Comment,
1168
0
            Some(b'D') | Some(b'd') => Self::DocType(0),
1169
0
            _ => return Err(SyntaxError::InvalidBangMarkup),
1170
        })
1171
0
    }
1172
1173
    /// If element is finished, returns its content up to `>` symbol and
1174
    /// an index of this symbol, otherwise returns `None`
1175
    ///
1176
    /// # Parameters
1177
    /// - `buf`: buffer with data consumed on previous iterations
1178
    /// - `chunk`: data read on current iteration and not yet consumed from reader
1179
    #[inline(always)]
1180
0
    fn parse<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
1181
0
        match self {
1182
            Self::Comment => {
1183
0
                for i in memchr::memchr_iter(b'>', chunk) {
1184
                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
1185
                    // <!----> - XML comment
1186
                    //  012345 - i
1187
0
                    if buf.len() + i > 4 {
1188
0
                        if chunk[..i].ends_with(b"--") {
1189
                            // We cannot strip last `--` from the buffer because we need it in case of
1190
                            // check_comments enabled option. XML standard requires that comment
1191
                            // will not end with `--->` sequence because this is a special case of
1192
                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
1193
0
                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1194
0
                        }
1195
                        // End sequence `-|->` was splitted at |
1196
                        //        buf --/   \-- chunk
1197
0
                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
1198
0
                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1199
0
                        }
1200
                        // End sequence `--|>` was splitted at |
1201
                        //         buf --/   \-- chunk
1202
0
                        if i == 0 && buf.ends_with(b"--") {
1203
0
                            return Some((&[], i + 1)); // +1 for `>`
1204
0
                        }
1205
0
                    }
1206
                }
1207
            }
1208
            Self::CData => {
1209
0
                for i in memchr::memchr_iter(b'>', chunk) {
1210
0
                    if chunk[..i].ends_with(b"]]") {
1211
0
                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1212
0
                    }
1213
                    // End sequence `]|]>` was splitted at |
1214
                    //        buf --/   \-- chunk
1215
0
                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
1216
0
                        return Some((&chunk[..i], i + 1)); // +1 for `>`
1217
0
                    }
1218
                    // End sequence `]]|>` was splitted at |
1219
                    //         buf --/   \-- chunk
1220
0
                    if i == 0 && buf.ends_with(b"]]") {
1221
0
                        return Some((&[], i + 1)); // +1 for `>`
1222
0
                    }
1223
                }
1224
            }
1225
0
            Self::DocType(ref mut balance) => {
1226
0
                for i in memchr::memchr2_iter(b'<', b'>', chunk) {
1227
0
                    if chunk[i] == b'<' {
1228
0
                        *balance += 1;
1229
0
                    } else {
1230
0
                        if *balance == 0 {
1231
0
                            return Some((&chunk[..i], i + 1)); // +1 for `>`
1232
0
                        }
1233
0
                        *balance -= 1;
1234
                    }
1235
                }
1236
            }
1237
        }
1238
0
        None
1239
0
    }
1240
    #[inline]
1241
0
    const fn to_err(&self) -> SyntaxError {
1242
0
        match self {
1243
0
            Self::CData => SyntaxError::UnclosedCData,
1244
0
            Self::Comment => SyntaxError::UnclosedComment,
1245
0
            Self::DocType(_) => SyntaxError::UnclosedDoctype,
1246
        }
1247
0
    }
Unexecuted instantiation: <quick_xml::reader::BangType>::to_err
Unexecuted instantiation: <quick_xml::reader::BangType>::to_err
1248
}
1249
1250
////////////////////////////////////////////////////////////////////////////////////////////////////
1251
1252
#[cfg(test)]
1253
mod test {
1254
    /// Checks the internal implementation of the various reader methods
1255
    macro_rules! check {
1256
        (
1257
            #[$test:meta]
1258
            $read_event:ident,
1259
            $read_until_close:ident,
1260
            // constructor of the XML source on which internal functions will be called
1261
            $source:path,
1262
            // constructor of the buffer to which read data will stored
1263
            $buf:expr
1264
            $(, $async:ident, $await:ident)?
1265
        ) => {
1266
            mod read_bang_element {
1267
                use super::*;
1268
                use crate::errors::{Error, SyntaxError};
1269
                use crate::reader::BangType;
1270
                use crate::utils::Bytes;
1271
1272
                /// Checks that reading CDATA content works correctly
1273
                mod cdata {
1274
                    use super::*;
1275
                    use pretty_assertions::assert_eq;
1276
1277
                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1278
                    /// is not finished, parsing ends with an error
1279
                    #[$test]
1280
                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1281
                    $($async)? fn not_properly_start() {
1282
                        let buf = $buf;
1283
                        let mut position = 1;
1284
                        let mut input = b"![]]>other content".as_ref();
1285
                        //                ^= 1
1286
1287
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1288
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1289
                            x => panic!(
1290
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1291
                                x
1292
                            ),
1293
                        }
1294
                        assert_eq!(position, 1);
1295
                    }
1296
1297
                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1298
                    /// is not found, parsing ends with an error
1299
                    #[$test]
1300
                    $($async)? fn not_closed() {
1301
                        let buf = $buf;
1302
                        let mut position = 1;
1303
                        let mut input = b"![CDATA[other content".as_ref();
1304
                        //                ^= 1                 ^= 22
1305
1306
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1307
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
1308
                            x => panic!(
1309
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1310
                                x
1311
                            ),
1312
                        }
1313
                        assert_eq!(position, 22);
1314
                    }
1315
1316
                    /// Checks that CDATA element without content inside parsed successfully
1317
                    #[$test]
1318
                    $($async)? fn empty() {
1319
                        let buf = $buf;
1320
                        let mut position = 1;
1321
                        let mut input = b"![CDATA[]]>other content".as_ref();
1322
                        //                ^= 1       ^= 12
1323
1324
                        let (ty, bytes) = $source(&mut input)
1325
                            .read_bang_element(buf, &mut position)
1326
                            $(.$await)?
1327
                            .unwrap();
1328
                        assert_eq!(
1329
                            (ty, Bytes(bytes)),
1330
                            (BangType::CData, Bytes(b"![CDATA[]]"))
1331
                        );
1332
                        assert_eq!(position, 12);
1333
                    }
1334
1335
                    /// Checks that CDATA element with content parsed successfully.
1336
                    /// Additionally checks that sequences inside CDATA that may look like
1337
                    /// a CDATA end sequence do not interrupt CDATA parsing
1338
                    #[$test]
1339
                    $($async)? fn with_content() {
1340
                        let buf = $buf;
1341
                        let mut position = 1;
1342
                        let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
1343
                        //                ^= 1                        ^= 29
1344
1345
                        let (ty, bytes) = $source(&mut input)
1346
                            .read_bang_element(buf, &mut position)
1347
                            $(.$await)?
1348
                            .unwrap();
1349
                        assert_eq!(
1350
                            (ty, Bytes(bytes)),
1351
                            (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]"))
1352
                        );
1353
                        assert_eq!(position, 29);
1354
                    }
1355
                }
1356
1357
                /// Checks that reading XML comments works correctly. According to the [specification],
1358
                /// comment data can contain any sequence except `--`:
1359
                ///
1360
                /// ```peg
1361
                /// comment = '<--' (!'--' char)* '-->';
1362
                /// char = [#x1-#x2C]
1363
                ///      / [#x2E-#xD7FF]
1364
                ///      / [#xE000-#xFFFD]
1365
                ///      / [#x10000-#x10FFFF]
1366
                /// ```
1367
                ///
1368
                /// The presence of this limitation, however, is simply a poorly designed specification
1369
                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1370
                /// presence of these sequences by default. This tests allow such content.
1371
                ///
1372
                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1373
                mod comment {
1374
                    use super::*;
1375
                    use pretty_assertions::assert_eq;
1376
1377
                    #[$test]
1378
                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1379
                    $($async)? fn not_properly_start() {
1380
                        let buf = $buf;
1381
                        let mut position = 1;
1382
                        let mut input = b"!- -->other content".as_ref();
1383
                        //                ^= 1
1384
1385
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1386
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1387
                            x => panic!(
1388
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1389
                                x
1390
                            ),
1391
                        }
1392
                        assert_eq!(position, 1);
1393
                    }
1394
1395
                    #[$test]
1396
                    $($async)? fn not_properly_end() {
1397
                        let buf = $buf;
1398
                        let mut position = 1;
1399
                        let mut input = b"!->other content".as_ref();
1400
                        //                ^= 1            ^= 17
1401
1402
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1403
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1404
                            x => panic!(
1405
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1406
                                x
1407
                            ),
1408
                        }
1409
                        assert_eq!(position, 17);
1410
                    }
1411
1412
                    #[$test]
1413
                    $($async)? fn not_closed1() {
1414
                        let buf = $buf;
1415
                        let mut position = 1;
1416
                        let mut input = b"!--other content".as_ref();
1417
                        //                ^= 1            ^= 17
1418
1419
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1420
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1421
                            x => panic!(
1422
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1423
                                x
1424
                            ),
1425
                        }
1426
                        assert_eq!(position, 17);
1427
                    }
1428
1429
                    #[$test]
1430
                    $($async)? fn not_closed2() {
1431
                        let buf = $buf;
1432
                        let mut position = 1;
1433
                        let mut input = b"!-->other content".as_ref();
1434
                        //                ^= 1             ^= 18
1435
1436
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1437
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1438
                            x => panic!(
1439
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1440
                                x
1441
                            ),
1442
                        }
1443
                        assert_eq!(position, 18);
1444
                    }
1445
1446
                    #[$test]
1447
                    $($async)? fn not_closed3() {
1448
                        let buf = $buf;
1449
                        let mut position = 1;
1450
                        let mut input = b"!--->other content".as_ref();
1451
                        //                ^= 1              ^= 19
1452
1453
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1454
                            Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
1455
                            x => panic!(
1456
                                "Expected `Err(Syntax(_))`, but got `{:?}`",
1457
                                x
1458
                            ),
1459
                        }
1460
                        assert_eq!(position, 19);
1461
                    }
1462
1463
                    #[$test]
1464
                    $($async)? fn empty() {
1465
                        let buf = $buf;
1466
                        let mut position = 1;
1467
                        let mut input = b"!---->other content".as_ref();
1468
                        //                ^= 1  ^= 7
1469
1470
                        let (ty, bytes) = $source(&mut input)
1471
                            .read_bang_element(buf, &mut position)
1472
                            $(.$await)?
1473
                            .unwrap();
1474
                        assert_eq!(
1475
                            (ty, Bytes(bytes)),
1476
                            (BangType::Comment, Bytes(b"!----"))
1477
                        );
1478
                        assert_eq!(position, 7);
1479
                    }
1480
1481
                    #[$test]
1482
                    $($async)? fn with_content() {
1483
                        let buf = $buf;
1484
                        let mut position = 1;
1485
                        let mut input = b"!--->comment<--->other content".as_ref();
1486
                        //                ^= 1             ^= 18
1487
1488
                        let (ty, bytes) = $source(&mut input)
1489
                            .read_bang_element(buf, &mut position)
1490
                            $(.$await)?
1491
                            .unwrap();
1492
                        assert_eq!(
1493
                            (ty, Bytes(bytes)),
1494
                            (BangType::Comment, Bytes(b"!--->comment<---"))
1495
                        );
1496
                        assert_eq!(position, 18);
1497
                    }
1498
                }
1499
1500
                /// Checks that reading DOCTYPE definition works correctly
1501
                mod doctype {
1502
                    use super::*;
1503
1504
                    mod uppercase {
1505
                        use super::*;
1506
                        use pretty_assertions::assert_eq;
1507
1508
                        #[$test]
1509
                        $($async)? fn not_properly_start() {
1510
                            let buf = $buf;
1511
                            let mut position = 1;
1512
                            let mut input = b"!D other content".as_ref();
1513
                            //                ^= 1            ^= 17
1514
1515
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1516
                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1517
                                x => panic!(
1518
                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1519
                                    x
1520
                                ),
1521
                            }
1522
                            assert_eq!(position, 17);
1523
                        }
1524
1525
                        #[$test]
1526
                        $($async)? fn without_space() {
1527
                            let buf = $buf;
1528
                            let mut position = 1;
1529
                            let mut input = b"!DOCTYPEother content".as_ref();
1530
                            //                ^= 1                 ^= 22
1531
1532
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1533
                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1534
                                x => panic!(
1535
                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1536
                                    x
1537
                                ),
1538
                            }
1539
                            assert_eq!(position, 22);
1540
                        }
1541
1542
                        #[$test]
1543
                        $($async)? fn empty() {
1544
                            let buf = $buf;
1545
                            let mut position = 1;
1546
                            let mut input = b"!DOCTYPE>other content".as_ref();
1547
                            //                ^= 1     ^= 10
1548
1549
                            let (ty, bytes) = $source(&mut input)
1550
                                .read_bang_element(buf, &mut position)
1551
                                $(.$await)?
1552
                                .unwrap();
1553
                            assert_eq!(
1554
                                (ty, Bytes(bytes)),
1555
                                (BangType::DocType(0), Bytes(b"!DOCTYPE"))
1556
                            );
1557
                            assert_eq!(position, 10);
1558
                        }
1559
1560
                        #[$test]
1561
                        $($async)? fn not_closed() {
1562
                            let buf = $buf;
1563
                            let mut position = 1;
1564
                            let mut input = b"!DOCTYPE other content".as_ref();
1565
                            //                ^= 1                  ^23
1566
1567
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1568
                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1569
                                x => panic!(
1570
                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1571
                                    x
1572
                                ),
1573
                            }
1574
                            assert_eq!(position, 23);
1575
                        }
1576
                    }
1577
1578
                    mod lowercase {
1579
                        use super::*;
1580
                        use pretty_assertions::assert_eq;
1581
1582
                        #[$test]
1583
                        $($async)? fn not_properly_start() {
1584
                            let buf = $buf;
1585
                            let mut position = 1;
1586
                            let mut input = b"!d other content".as_ref();
1587
                            //                ^= 1            ^= 17
1588
1589
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1590
                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1591
                                x => panic!(
1592
                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1593
                                    x
1594
                                ),
1595
                            }
1596
                            assert_eq!(position, 17);
1597
                        }
1598
1599
                        #[$test]
1600
                        $($async)? fn without_space() {
1601
                            let buf = $buf;
1602
                            let mut position = 1;
1603
                            let mut input = b"!doctypeother content".as_ref();
1604
                            //                ^= 1                 ^= 22
1605
1606
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1607
                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1608
                                x => panic!(
1609
                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1610
                                    x
1611
                                ),
1612
                            }
1613
                            assert_eq!(position, 22);
1614
                        }
1615
1616
                        #[$test]
1617
                        $($async)? fn empty() {
1618
                            let buf = $buf;
1619
                            let mut position = 1;
1620
                            let mut input = b"!doctype>other content".as_ref();
1621
                            //                ^= 1     ^= 10
1622
1623
                            let (ty, bytes) = $source(&mut input)
1624
                                .read_bang_element(buf, &mut position)
1625
                                $(.$await)?
1626
                                .unwrap();
1627
                            assert_eq!(
1628
                                (ty, Bytes(bytes)),
1629
                                (BangType::DocType(0), Bytes(b"!doctype"))
1630
                            );
1631
                            assert_eq!(position, 10);
1632
                        }
1633
1634
                        #[$test]
1635
                        $($async)? fn not_closed() {
1636
                            let buf = $buf;
1637
                            let mut position = 1;
1638
                            let mut input = b"!doctype other content".as_ref();
1639
                            //                ^= 1                  ^= 23
1640
1641
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1642
                                Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
1643
                                x => panic!(
1644
                                    "Expected `Err(Syntax(_))`, but got `{:?}`",
1645
                                    x
1646
                                ),
1647
                            }
1648
                            assert_eq!(position, 23);
1649
                        }
1650
                    }
1651
                }
1652
            }
1653
1654
            mod read_text {
1655
                use super::*;
1656
                use crate::reader::ReadTextResult;
1657
                use crate::utils::Bytes;
1658
                use pretty_assertions::assert_eq;
1659
1660
                #[$test]
1661
                $($async)? fn empty() {
1662
                    let buf = $buf;
1663
                    let mut position = 1;
1664
                    let mut input = b"".as_ref();
1665
                    //                ^= 1
1666
1667
                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1668
                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
1669
                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1670
                    }
1671
                    assert_eq!(position, 1);
1672
                }
1673
1674
                #[$test]
1675
                $($async)? fn markup() {
1676
                    let buf = $buf;
1677
                    let mut position = 1;
1678
                    let mut input = b"<".as_ref();
1679
                    //                 ^= 2
1680
1681
                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1682
                        ReadTextResult::Markup(b) => assert_eq!(b, $buf),
1683
                        x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
1684
                    }
1685
                    assert_eq!(position, 2);
1686
                }
1687
1688
                #[$test]
1689
                $($async)? fn ref_() {
1690
                    let buf = $buf;
1691
                    let mut position = 1;
1692
                    let mut input = b"&".as_ref();
1693
                    //                ^= 1
1694
1695
                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1696
                        ReadTextResult::Ref(b) => assert_eq!(b, $buf),
1697
                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1698
                    }
1699
                    assert_eq!(position, 1);
1700
                }
1701
1702
                #[$test]
1703
                $($async)? fn up_to_markup() {
1704
                    let buf = $buf;
1705
                    let mut position = 1;
1706
                    let mut input = b"a<".as_ref();
1707
                    //                1 ^= 3
1708
1709
                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1710
                        ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1711
                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1712
                    }
1713
                    assert_eq!(position, 3);
1714
                }
1715
1716
                #[$test]
1717
                $($async)? fn up_to_ref() {
1718
                    let buf = $buf;
1719
                    let mut position = 1;
1720
                    let mut input = b"a&".as_ref();
1721
                    //                 ^= 2
1722
1723
                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1724
                        ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1725
                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1726
                    }
1727
                    assert_eq!(position, 2);
1728
                }
1729
1730
                #[$test]
1731
                $($async)? fn up_to_eof() {
1732
                    let buf = $buf;
1733
                    let mut position = 1;
1734
                    let mut input = b"a".as_ref();
1735
                    //                 ^= 2
1736
1737
                    match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
1738
                        ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
1739
                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1740
                    }
1741
                    assert_eq!(position, 2);
1742
                }
1743
            }
1744
1745
            mod read_ref {
1746
                use super::*;
1747
                use crate::reader::ReadRefResult;
1748
                use crate::utils::Bytes;
1749
                use pretty_assertions::assert_eq;
1750
1751
                // Empty input is not allowed for `read_ref` so not tested.
1752
                // Borrowed source triggers debug assertion,
1753
                // buffered do nothing due to implementation details.
1754
1755
                #[$test]
1756
                $($async)? fn up_to_eof() {
1757
                    let buf = $buf;
1758
                    let mut position = 1;
1759
                    let mut input = b"&".as_ref();
1760
                    //                 ^= 2
1761
1762
                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1763
                        ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1764
                        x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
1765
                    }
1766
                    assert_eq!(position, 2);
1767
                }
1768
1769
                #[$test]
1770
                $($async)? fn up_to_ref() {
1771
                    let buf = $buf;
1772
                    let mut position = 1;
1773
                    let mut input = b"&&".as_ref();
1774
                    //                 ^= 2
1775
1776
                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1777
                        ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1778
                        x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
1779
                    }
1780
                    assert_eq!(position, 2);
1781
                }
1782
1783
                #[$test]
1784
                $($async)? fn up_to_markup() {
1785
                    let buf = $buf;
1786
                    let mut position = 1;
1787
                    let mut input = b"&<".as_ref();
1788
                    //                  ^= 3
1789
1790
                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1791
                        ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1792
                        x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
1793
                    }
1794
                    assert_eq!(position, 3);
1795
                }
1796
1797
                #[$test]
1798
                $($async)? fn empty_ref() {
1799
                    let buf = $buf;
1800
                    let mut position = 1;
1801
                    let mut input = b"&;".as_ref();
1802
                    //                  ^= 3
1803
1804
                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1805
                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
1806
                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1807
                    }
1808
                    assert_eq!(position, 3);
1809
                }
1810
1811
                #[$test]
1812
                $($async)? fn normal() {
1813
                    let buf = $buf;
1814
                    let mut position = 1;
1815
                    let mut input = b"&lt;".as_ref();
1816
                    //                    ^= 5
1817
1818
                    match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
1819
                        ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&lt")),
1820
                        x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
1821
                    }
1822
                    assert_eq!(position, 5);
1823
                }
1824
            }
1825
1826
            mod read_element {
1827
                use super::*;
1828
                use crate::errors::{Error, SyntaxError};
1829
                use crate::parser::ElementParser;
1830
                use crate::utils::Bytes;
1831
                use pretty_assertions::assert_eq;
1832
1833
                /// Checks that nothing was read from empty buffer
1834
                #[$test]
1835
                $($async)? fn empty() {
1836
                    let buf = $buf;
1837
                    let mut position = 1;
1838
                    let mut input = b"".as_ref();
1839
                    //                ^= 1
1840
1841
                    match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
1842
                        Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
1843
                        x => panic!(
1844
                            "Expected `Err(Syntax(_))`, but got `{:?}`",
1845
                            x
1846
                        ),
1847
                    }
1848
                    assert_eq!(position, 1);
1849
                }
1850
1851
                mod open {
1852
                    use super::*;
1853
                    use pretty_assertions::assert_eq;
1854
1855
                    #[$test]
1856
                    $($async)? fn empty_tag() {
1857
                        let buf = $buf;
1858
                        let mut position = 1;
1859
                        let mut input = b">".as_ref();
1860
                        //                 ^= 2
1861
1862
                        assert_eq!(
1863
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1864
                            Bytes(b"")
1865
                        );
1866
                        assert_eq!(position, 2);
1867
                    }
1868
1869
                    #[$test]
1870
                    $($async)? fn normal() {
1871
                        let buf = $buf;
1872
                        let mut position = 1;
1873
                        let mut input = b"tag>".as_ref();
1874
                        //                    ^= 5
1875
1876
                        assert_eq!(
1877
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1878
                            Bytes(b"tag")
1879
                        );
1880
                        assert_eq!(position, 5);
1881
                    }
1882
1883
                    #[$test]
1884
                    $($async)? fn empty_ns_empty_tag() {
1885
                        let buf = $buf;
1886
                        let mut position = 1;
1887
                        let mut input = b":>".as_ref();
1888
                        //                  ^= 3
1889
1890
                        assert_eq!(
1891
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1892
                            Bytes(b":")
1893
                        );
1894
                        assert_eq!(position, 3);
1895
                    }
1896
1897
                    #[$test]
1898
                    $($async)? fn empty_ns() {
1899
                        let buf = $buf;
1900
                        let mut position = 1;
1901
                        let mut input = b":tag>".as_ref();
1902
                        //                     ^= 6
1903
1904
                        assert_eq!(
1905
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1906
                            Bytes(b":tag")
1907
                        );
1908
                        assert_eq!(position, 6);
1909
                    }
1910
1911
                    #[$test]
1912
                    $($async)? fn with_attributes() {
1913
                        let buf = $buf;
1914
                        let mut position = 1;
1915
                        let mut input = br#"tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
1916
                        //                                                        ^= 39
1917
1918
                        assert_eq!(
1919
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1920
                            Bytes(br#"tag  attr-1=">"  attr2  =  '>'  3attr"#)
1921
                        );
1922
                        assert_eq!(position, 39);
1923
                    }
1924
                }
1925
1926
                mod self_closed {
1927
                    use super::*;
1928
                    use pretty_assertions::assert_eq;
1929
1930
                    #[$test]
1931
                    $($async)? fn empty_tag() {
1932
                        let buf = $buf;
1933
                        let mut position = 1;
1934
                        let mut input = b"/>".as_ref();
1935
                        //                  ^= 3
1936
1937
                        assert_eq!(
1938
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1939
                            Bytes(b"/")
1940
                        );
1941
                        assert_eq!(position, 3);
1942
                    }
1943
1944
                    #[$test]
1945
                    $($async)? fn normal() {
1946
                        let buf = $buf;
1947
                        let mut position = 1;
1948
                        let mut input = b"tag/>".as_ref();
1949
                        //                     ^= 6
1950
1951
                        assert_eq!(
1952
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1953
                            Bytes(b"tag/")
1954
                        );
1955
                        assert_eq!(position, 6);
1956
                    }
1957
1958
                    #[$test]
1959
                    $($async)? fn empty_ns_empty_tag() {
1960
                        let buf = $buf;
1961
                        let mut position = 1;
1962
                        let mut input = b":/>".as_ref();
1963
                        //                   ^= 4
1964
1965
                        assert_eq!(
1966
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1967
                            Bytes(b":/")
1968
                        );
1969
                        assert_eq!(position, 4);
1970
                    }
1971
1972
                    #[$test]
1973
                    $($async)? fn empty_ns() {
1974
                        let buf = $buf;
1975
                        let mut position = 1;
1976
                        let mut input = b":tag/>".as_ref();
1977
                        //                      ^= 7
1978
1979
                        assert_eq!(
1980
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1981
                            Bytes(b":tag/")
1982
                        );
1983
                        assert_eq!(position, 7);
1984
                    }
1985
1986
                    #[$test]
1987
                    $($async)? fn with_attributes() {
1988
                        let buf = $buf;
1989
                        let mut position = 1;
1990
                        let mut input = br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#.as_ref();
1991
                        //                                                           ^= 42
1992
1993
                        assert_eq!(
1994
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
1995
                            Bytes(br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/"#)
1996
                        );
1997
                        assert_eq!(position, 42);
1998
                    }
1999
                }
2000
2001
                mod close {
2002
                    use super::*;
2003
                    use pretty_assertions::assert_eq;
2004
2005
                    #[$test]
2006
                    $($async)? fn empty_tag() {
2007
                        let buf = $buf;
2008
                        let mut position = 1;
2009
                        let mut input = b"/ >".as_ref();
2010
                        //                   ^= 4
2011
2012
                        assert_eq!(
2013
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2014
                            Bytes(b"/ ")
2015
                        );
2016
                        assert_eq!(position, 4);
2017
                    }
2018
2019
                    #[$test]
2020
                    $($async)? fn normal() {
2021
                        let buf = $buf;
2022
                        let mut position = 1;
2023
                        let mut input = b"/tag>".as_ref();
2024
                        //                     ^= 6
2025
2026
                        assert_eq!(
2027
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2028
                            Bytes(b"/tag")
2029
                        );
2030
                        assert_eq!(position, 6);
2031
                    }
2032
2033
                    #[$test]
2034
                    $($async)? fn empty_ns_empty_tag() {
2035
                        let buf = $buf;
2036
                        let mut position = 1;
2037
                        let mut input = b"/:>".as_ref();
2038
                        //                   ^= 4
2039
2040
                        assert_eq!(
2041
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2042
                            Bytes(b"/:")
2043
                        );
2044
                        assert_eq!(position, 4);
2045
                    }
2046
2047
                    #[$test]
2048
                    $($async)? fn empty_ns() {
2049
                        let buf = $buf;
2050
                        let mut position = 1;
2051
                        let mut input = b"/:tag>".as_ref();
2052
                        //                      ^= 7
2053
2054
                        assert_eq!(
2055
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2056
                            Bytes(b"/:tag")
2057
                        );
2058
                        assert_eq!(position, 7);
2059
                    }
2060
2061
                    #[$test]
2062
                    $($async)? fn with_attributes() {
2063
                        let buf = $buf;
2064
                        let mut position = 1;
2065
                        let mut input = br#"/tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
2066
                        //                                                         ^= 40
2067
2068
                        assert_eq!(
2069
                            Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
2070
                            Bytes(br#"/tag  attr-1=">"  attr2  =  '>'  3attr"#)
2071
                        );
2072
                        assert_eq!(position, 40);
2073
                    }
2074
                }
2075
            }
2076
2077
            /// Ensures, that no empty `Text` events are generated
2078
            mod $read_event {
2079
                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
2080
                use crate::reader::Reader;
2081
                use pretty_assertions::assert_eq;
2082
2083
                /// When `encoding` feature is enabled, encoding should be detected
2084
                /// from BOM (UTF-8) and BOM should be stripped.
2085
                ///
2086
                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
2087
                /// character should be stripped for consistency
2088
                #[$test]
2089
                $($async)? fn bom_from_reader() {
2090
                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
2091
2092
                    assert_eq!(
2093
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2094
                        Event::Text(BytesText::from_escaped("\u{feff}"))
2095
                    );
2096
2097
                    assert_eq!(
2098
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2099
                        Event::Eof
2100
                    );
2101
                }
2102
2103
                /// When parsing from &str, encoding is fixed (UTF-8), so
2104
                /// - when `encoding` feature is disabled, the behavior the
2105
                ///   same as in `bom_from_reader` text
2106
                /// - when `encoding` feature is enabled, the behavior should
2107
                ///   stay consistent, so the first BOM character is stripped
2108
                #[$test]
2109
                $($async)? fn bom_from_str() {
2110
                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
2111
2112
                    assert_eq!(
2113
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2114
                        Event::Text(BytesText::from_escaped("\u{feff}"))
2115
                    );
2116
2117
                    assert_eq!(
2118
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2119
                        Event::Eof
2120
                    );
2121
                }
2122
2123
                #[$test]
2124
                $($async)? fn declaration() {
2125
                    let mut reader = Reader::from_str("<?xml ?>");
2126
2127
                    assert_eq!(
2128
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2129
                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
2130
                    );
2131
                }
2132
2133
                #[$test]
2134
                $($async)? fn doctype() {
2135
                    let mut reader = Reader::from_str("<!DOCTYPE x>");
2136
2137
                    assert_eq!(
2138
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2139
                        Event::DocType(BytesText::from_escaped("x"))
2140
                    );
2141
                }
2142
2143
                #[$test]
2144
                $($async)? fn processing_instruction() {
2145
                    let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
2146
2147
                    assert_eq!(
2148
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2149
                        Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
2150
                    );
2151
                }
2152
2153
                /// Lone closing tags are not allowed, so testing it together with start tag
2154
                #[$test]
2155
                $($async)? fn start_and_end() {
2156
                    let mut reader = Reader::from_str("<tag></tag>");
2157
2158
                    assert_eq!(
2159
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2160
                        Event::Start(BytesStart::new("tag"))
2161
                    );
2162
2163
                    assert_eq!(
2164
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2165
                        Event::End(BytesEnd::new("tag"))
2166
                    );
2167
                }
2168
2169
                #[$test]
2170
                $($async)? fn empty() {
2171
                    let mut reader = Reader::from_str("<tag/>");
2172
2173
                    assert_eq!(
2174
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2175
                        Event::Empty(BytesStart::new("tag"))
2176
                    );
2177
                }
2178
2179
                #[$test]
2180
                $($async)? fn text() {
2181
                    let mut reader = Reader::from_str("text");
2182
2183
                    assert_eq!(
2184
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2185
                        Event::Text(BytesText::from_escaped("text"))
2186
                    );
2187
                }
2188
2189
                #[$test]
2190
                $($async)? fn cdata() {
2191
                    let mut reader = Reader::from_str("<![CDATA[]]>");
2192
2193
                    assert_eq!(
2194
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2195
                        Event::CData(BytesCData::new(""))
2196
                    );
2197
                }
2198
2199
                #[$test]
2200
                $($async)? fn comment() {
2201
                    let mut reader = Reader::from_str("<!---->");
2202
2203
                    assert_eq!(
2204
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2205
                        Event::Comment(BytesText::from_escaped(""))
2206
                    );
2207
                }
2208
2209
                #[$test]
2210
                $($async)? fn eof() {
2211
                    let mut reader = Reader::from_str("");
2212
2213
                    assert_eq!(
2214
                        reader.$read_event($buf) $(.$await)? .unwrap(),
2215
                        Event::Eof
2216
                    );
2217
                }
2218
            }
2219
        };
2220
    }
2221
2222
    // Export macros for the child modules:
2223
    // - buffered_reader
2224
    // - slice_reader
2225
    pub(super) use check;
2226
}