Coverage Report

Created: 2025-09-27 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/html5ever/markup5ever/util/buffer_queue.rs
Line
Count
Source
1
// Copyright 2014-2017 The html5ever Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! The `BufferQueue` struct and helper types.
11
//!
12
//! This type is designed for the efficient parsing of string data, especially where many
13
//! significant characters are from the ascii range 0-63. This includes, for example, important
14
//! characters in xml/html parsing.
15
//!
16
//! Good and predictable performance is achieved by avoiding allocation where possible (a.k.a. zero
17
//! copy).
18
//!
19
//! [`BufferQueue`]: struct.BufferQueue.html
20
21
use std::{
22
    cell::{RefCell, RefMut},
23
    collections::VecDeque,
24
    mem,
25
};
26
27
use tendril::StrTendril;
28
29
pub use self::SetResult::{FromSet, NotFromSet};
30
use crate::util::smallcharset::SmallCharSet;
31
32
/// Result from [`pop_except_from`] containing either a character from a [`SmallCharSet`], or a
33
/// string buffer of characters not from the set.
34
///
35
/// [`pop_except_from`]: struct.BufferQueue.html#method.pop_except_from
36
/// [`SmallCharSet`]: ../struct.SmallCharSet.html
37
#[derive(PartialEq, Eq, Debug)]
38
pub enum SetResult {
39
    /// A character from the `SmallCharSet`.
40
    FromSet(char),
41
    /// A string buffer containing no characters from the `SmallCharSet`.
42
    NotFromSet(StrTendril),
43
}
44
45
/// A queue of owned string buffers, which supports incrementally consuming characters.
46
///
47
/// Internally it uses [`VecDeque`] and has the same complexity properties.
48
///
49
/// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html
50
#[derive(Clone, Debug)]
51
pub struct BufferQueue {
52
    /// Buffers to process.
53
    buffers: RefCell<VecDeque<StrTendril>>,
54
}
55
56
impl Default for BufferQueue {
57
    /// Create an empty BufferQueue.
58
    #[inline]
59
24.7k
    fn default() -> Self {
60
24.7k
        Self {
61
24.7k
            buffers: RefCell::new(VecDeque::with_capacity(16)),
62
24.7k
        }
63
24.7k
    }
64
}
65
66
impl BufferQueue {
67
    /// Returns whether the queue is empty.
68
    #[inline]
69
36.9M
    pub fn is_empty(&self) -> bool {
70
36.9M
        self.buffers.borrow().is_empty()
71
36.9M
    }
72
73
    /// Get the buffer at the beginning of the queue.
74
    #[inline]
75
    pub fn pop_front(&self) -> Option<StrTendril> {
76
        self.buffers.borrow_mut().pop_front()
77
    }
78
79
    /// Add a buffer to the beginning of the queue.
80
    ///
81
    /// If the buffer is empty, it will be skipped.
82
11.0M
    pub fn push_front(&self, buf: StrTendril) {
83
11.0M
        if buf.len32() == 0 {
84
10.8M
            return;
85
201k
        }
86
201k
        self.buffers.borrow_mut().push_front(buf);
87
11.0M
    }
88
89
    /// Add a buffer to the end of the queue.
90
    ///
91
    /// If the buffer is empty, it will be skipped.
92
36.9M
    pub fn push_back(&self, buf: StrTendril) {
93
36.9M
        if buf.len32() == 0 {
94
0
            return;
95
36.9M
        }
96
36.9M
        self.buffers.borrow_mut().push_back(buf);
97
36.9M
    }
98
99
    /// Look at the next available character without removing it, if the queue is not empty.
100
37.8M
    pub fn peek(&self) -> Option<char> {
101
37.8M
        debug_assert!(
102
0
            !self.buffers.borrow().iter().any(|el| el.len32() == 0),
103
0
            "invariant \"all buffers in the queue are non-empty\" failed"
104
        );
105
37.8M
        self.buffers
106
37.8M
            .borrow()
107
37.8M
            .front()
108
37.8M
            .map(|b| b.chars().next().unwrap())
109
37.8M
    }
110
111
    /// Pops and returns either a single character from the given set, or
112
    /// a buffer of characters none of which are in the set.
113
    ///
114
    /// # Examples
115
    ///
116
    /// ```
117
    /// # #[macro_use] extern crate markup5ever;
118
    /// # #[macro_use] extern crate tendril;
119
    /// # fn main() {
120
    /// use markup5ever::buffer_queue::{BufferQueue, SetResult};
121
    ///
122
    /// let mut queue = BufferQueue::default();
123
    /// queue.push_back(format_tendril!(r#"<some_tag attr="text">SomeText</some_tag>"#));
124
    /// let set = small_char_set!(b'<' b'>' b' ' b'=' b'"' b'/');
125
    /// let tag = format_tendril!("some_tag");
126
    /// let attr = format_tendril!("attr");
127
    /// let attr_val = format_tendril!("text");
128
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('<')));
129
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(tag)));
130
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet(' ')));
131
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr)));
132
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('=')));
133
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('"')));
134
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr_val)));
135
    /// // ...
136
    /// # }
137
    /// ```
138
43.4M
    pub fn pop_except_from(&self, set: SmallCharSet) -> Option<SetResult> {
139
43.4M
        let (result, now_empty) = match self.buffers.borrow_mut().front_mut() {
140
15.6M
            None => (None, false),
141
27.7M
            Some(buf) => {
142
27.7M
                let n = set.nonmember_prefix_len(buf);
143
27.7M
                if n > 0 {
144
                    let out;
145
19.8M
                    unsafe {
146
19.8M
                        out = buf.unsafe_subtendril(0, n);
147
19.8M
                        buf.unsafe_pop_front(n);
148
19.8M
                    }
149
19.8M
                    (Some(NotFromSet(out)), buf.is_empty())
150
                } else {
151
7.96M
                    let c = buf.pop_front_char().expect("empty buffer in queue");
152
7.96M
                    (Some(FromSet(c)), buf.is_empty())
153
                }
154
            },
155
        };
156
157
        // Unborrow self for this part.
158
43.4M
        if now_empty {
159
16.0M
            self.buffers.borrow_mut().pop_front();
160
27.4M
        }
161
162
43.4M
        result
163
43.4M
    }
164
165
    /// Consume bytes matching the pattern, using a custom comparison function `eq`.
166
    ///
167
    /// Returns `Some(true)` if there is a match, `Some(false)` if there is no match, or `None` if
168
    /// it wasn't possible to know (more data is needed).
169
    ///
170
    /// The custom comparison function is used elsewhere to compare ascii-case-insensitively.
171
    ///
172
    /// # Examples
173
    ///
174
    /// ```
175
    /// # extern crate markup5ever;
176
    /// # #[macro_use] extern crate tendril;
177
    /// # fn main() {
178
    /// use markup5ever::buffer_queue::BufferQueue;
179
    ///
180
    /// let mut queue = BufferQueue::default();
181
    /// queue.push_back(format_tendril!("testtext"));
182
    /// let test_str = "test";
183
    /// assert_eq!(queue.eat("test", |&a, &b| a == b), Some(true));
184
    /// assert_eq!(queue.eat("text", |&a, &b| a == b), Some(true));
185
    /// assert!(queue.is_empty());
186
    /// # }
187
    /// ```
188
10.8M
    pub fn eat<F: Fn(&u8, &u8) -> bool>(&self, pat: &str, eq: F) -> Option<bool> {
189
10.8M
        let mut buffers_exhausted = 0;
190
10.8M
        let mut consumed_from_last = 0;
191
192
10.8M
        self.buffers.borrow().front()?;
193
194
13.0M
        for pattern_byte in pat.bytes() {
195
13.0M
            if buffers_exhausted >= self.buffers.borrow().len() {
196
6.29k
                return None;
197
13.0M
            }
198
13.0M
            let buf = &self.buffers.borrow()[buffers_exhausted];
199
200
13.0M
            if !eq(&buf.as_bytes()[consumed_from_last], &pattern_byte) {
201
10.4M
                return Some(false);
202
2.63M
            }
203
204
2.63M
            consumed_from_last += 1;
205
2.63M
            if consumed_from_last >= buf.len() {
206
150k
                buffers_exhausted += 1;
207
150k
                consumed_from_last = 0;
208
2.48M
            }
209
        }
210
211
        // We have a match. Commit changes to the BufferQueue.
212
403k
        for _ in 0..buffers_exhausted {
213
138k
            self.buffers.borrow_mut().pop_front();
214
138k
        }
215
216
403k
        match self.buffers.borrow_mut().front_mut() {
217
129k
            None => assert_eq!(consumed_from_last, 0),
218
273k
            Some(ref mut buf) => buf.pop_front(consumed_from_last as u32),
219
        }
220
221
403k
        Some(true)
222
10.8M
    }
223
224
    /// Get the next character if one is available, removing it from the queue.
225
    ///
226
    /// This function manages the buffers, removing them as they become empty.
227
91.9M
    pub fn next(&self) -> Option<char> {
228
91.9M
        let (result, now_empty) = match self.buffers.borrow_mut().front_mut() {
229
20.2M
            None => (None, false),
230
71.6M
            Some(buf) => {
231
71.6M
                let c = buf.pop_front_char().expect("empty buffer in queue");
232
71.6M
                (Some(c), buf.is_empty())
233
            },
234
        };
235
236
91.9M
        if now_empty {
237
20.7M
            self.buffers.borrow_mut().pop_front();
238
71.2M
        }
239
240
91.9M
        result
241
91.9M
    }
242
243
0
    pub fn replace_with(&self, other: BufferQueue) {
244
0
        let _ = mem::replace(&mut *self.buffers.borrow_mut(), other.buffers.take());
245
0
    }
246
247
0
    pub fn swap_with(&self, other: &BufferQueue) {
248
0
        mem::swap(
249
0
            &mut *self.buffers.borrow_mut(),
250
0
            &mut *other.buffers.borrow_mut(),
251
        );
252
0
    }
253
254
    /// Return a mutable reference to the first tendril in the queue.
255
0
    pub fn peek_front_chunk_mut(&self) -> Option<RefMut<'_, StrTendril>> {
256
0
        let buffers = self.buffers.borrow_mut();
257
0
        if buffers.is_empty() {
258
0
            return None;
259
0
        }
260
261
0
        let front_buffer = RefMut::map(buffers, |buffers| {
262
0
            buffers.front_mut().expect("there is at least one buffer")
263
0
        });
264
0
        Some(front_buffer)
265
0
    }
266
}
267
268
#[cfg(test)]
269
#[allow(non_snake_case)]
270
mod test {
271
    use tendril::SliceExt;
272
273
    use super::BufferQueue;
274
    use super::SetResult::{FromSet, NotFromSet};
275
276
    #[test]
277
    fn smoke_test() {
278
        let bq = BufferQueue::default();
279
        assert_eq!(bq.peek(), None);
280
        assert_eq!(bq.next(), None);
281
282
        bq.push_back("abc".to_tendril());
283
        assert_eq!(bq.peek(), Some('a'));
284
        assert_eq!(bq.next(), Some('a'));
285
        assert_eq!(bq.peek(), Some('b'));
286
        assert_eq!(bq.peek(), Some('b'));
287
        assert_eq!(bq.next(), Some('b'));
288
        assert_eq!(bq.peek(), Some('c'));
289
        assert_eq!(bq.next(), Some('c'));
290
        assert_eq!(bq.peek(), None);
291
        assert_eq!(bq.next(), None);
292
    }
293
294
    #[test]
295
    fn can_unconsume() {
296
        let bq = BufferQueue::default();
297
        bq.push_back("abc".to_tendril());
298
        assert_eq!(bq.next(), Some('a'));
299
300
        bq.push_front("xy".to_tendril());
301
        assert_eq!(bq.next(), Some('x'));
302
        assert_eq!(bq.next(), Some('y'));
303
        assert_eq!(bq.next(), Some('b'));
304
        assert_eq!(bq.next(), Some('c'));
305
        assert_eq!(bq.next(), None);
306
    }
307
308
    #[test]
309
    fn can_pop_except_set() {
310
        let bq = BufferQueue::default();
311
        bq.push_back("abc&def".to_tendril());
312
        let pop = || bq.pop_except_from(small_char_set!('&'));
313
        assert_eq!(pop(), Some(NotFromSet("abc".to_tendril())));
314
        assert_eq!(pop(), Some(FromSet('&')));
315
        assert_eq!(pop(), Some(NotFromSet("def".to_tendril())));
316
        assert_eq!(pop(), None);
317
    }
318
319
    #[test]
320
    fn can_eat() {
321
        // This is not very comprehensive.  We rely on the tokenizer
322
        // integration tests for more thorough testing with many
323
        // different input buffer splits.
324
        let bq = BufferQueue::default();
325
        bq.push_back("a".to_tendril());
326
        bq.push_back("bc".to_tendril());
327
        assert_eq!(bq.eat("abcd", u8::eq_ignore_ascii_case), None);
328
        assert_eq!(bq.eat("ax", u8::eq_ignore_ascii_case), Some(false));
329
        assert_eq!(bq.eat("ab", u8::eq_ignore_ascii_case), Some(true));
330
        assert_eq!(bq.next(), Some('c'));
331
        assert_eq!(bq.next(), None);
332
    }
333
}