Coverage Report

Created: 2025-05-08 06:13

/src/html5ever/markup5ever/util/buffer_queue.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2014-2017 The html5ever Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! The `BufferQueue` struct and helper types.
11
//!
12
//! This type is designed for the efficient parsing of string data, especially where many
13
//! significant characters are from the ascii range 0-63. This includes, for example, important
14
//! characters in xml/html parsing.
15
//!
16
//! Good and predictable performance is achieved by avoiding allocation where possible (a.k.a. zero
17
//! copy).
18
//!
19
//! [`BufferQueue`]: struct.BufferQueue.html
20
21
use std::{cell::RefCell, collections::VecDeque, mem};
22
23
use tendril::StrTendril;
24
25
pub use self::SetResult::{FromSet, NotFromSet};
26
use crate::util::smallcharset::SmallCharSet;
27
28
/// Result from [`pop_except_from`] containing either a character from a [`SmallCharSet`], or a
29
/// string buffer of characters not from the set.
30
///
31
/// [`pop_except_from`]: struct.BufferQueue.html#method.pop_except_from
32
/// [`SmallCharSet`]: ../struct.SmallCharSet.html
33
#[derive(PartialEq, Eq, Debug)]
34
pub enum SetResult {
35
    /// A character from the `SmallCharSet`.
36
    FromSet(char),
37
    /// A string buffer containing no characters from the `SmallCharSet`.
38
    NotFromSet(StrTendril),
39
}
40
41
/// A queue of owned string buffers, which supports incrementally consuming characters.
42
///
43
/// Internally it uses [`VecDeque`] and has the same complexity properties.
44
///
45
/// [`VecDeque`]: https://doc.rust-lang.org/std/collections/struct.VecDeque.html
46
#[derive(Debug)]
47
pub struct BufferQueue {
48
    /// Buffers to process.
49
    buffers: RefCell<VecDeque<StrTendril>>,
50
}
51
52
impl Default for BufferQueue {
53
    /// Create an empty BufferQueue.
54
    #[inline]
55
24.2k
    fn default() -> Self {
56
24.2k
        Self {
57
24.2k
            buffers: RefCell::new(VecDeque::with_capacity(16)),
58
24.2k
        }
59
24.2k
    }
<markup5ever::util::buffer_queue::BufferQueue as core::default::Default>::default
Line
Count
Source
55
24.2k
    fn default() -> Self {
56
24.2k
        Self {
57
24.2k
            buffers: RefCell::new(VecDeque::with_capacity(16)),
58
24.2k
        }
59
24.2k
    }
Unexecuted instantiation: <markup5ever::util::buffer_queue::BufferQueue as core::default::Default>::default
60
}
61
62
impl BufferQueue {
63
    /// Returns whether the queue is empty.
64
    #[inline]
65
39.5M
    pub fn is_empty(&self) -> bool {
66
39.5M
        self.buffers.borrow().is_empty()
67
39.5M
    }
<markup5ever::util::buffer_queue::BufferQueue>::is_empty
Line
Count
Source
65
39.5M
    pub fn is_empty(&self) -> bool {
66
39.5M
        self.buffers.borrow().is_empty()
67
39.5M
    }
Unexecuted instantiation: <markup5ever::util::buffer_queue::BufferQueue>::is_empty
68
69
    /// Get the buffer at the beginning of the queue.
70
    #[inline]
71
0
    pub fn pop_front(&self) -> Option<StrTendril> {
72
0
        self.buffers.borrow_mut().pop_front()
73
0
    }
74
75
    /// Add a buffer to the beginning of the queue.
76
    ///
77
    /// If the buffer is empty, it will be skipped.
78
9.38M
    pub fn push_front(&self, buf: StrTendril) {
79
9.38M
        if buf.len32() == 0 {
80
9.02M
            return;
81
360k
        }
82
360k
        self.buffers.borrow_mut().push_front(buf);
83
9.38M
    }
84
85
    /// Add a buffer to the end of the queue.
86
    ///
87
    /// If the buffer is empty, it will be skipped.
88
39.5M
    pub fn push_back(&self, buf: StrTendril) {
89
39.5M
        if buf.len32() == 0 {
90
0
            return;
91
39.5M
        }
92
39.5M
        self.buffers.borrow_mut().push_back(buf);
93
39.5M
    }
94
95
    /// Look at the next available character without removing it, if the queue is not empty.
96
41.2M
    pub fn peek(&self) -> Option<char> {
97
41.2M
        debug_assert!(
98
0
            !self.buffers.borrow().iter().any(|el| el.len32() == 0),
99
0
            "invariant \"all buffers in the queue are non-empty\" failed"
100
        );
101
41.2M
        self.buffers
102
41.2M
            .borrow()
103
41.2M
            .front()
104
41.2M
            .map(|b| b.chars().next().unwrap())
105
41.2M
    }
106
107
    /// Pops and returns either a single character from the given set, or
108
    /// a buffer of characters none of which are in the set.
109
    ///
110
    /// # Examples
111
    ///
112
    /// ```
113
    /// # #[macro_use] extern crate markup5ever;
114
    /// # #[macro_use] extern crate tendril;
115
    /// # fn main() {
116
    /// use markup5ever::buffer_queue::{BufferQueue, SetResult};
117
    ///
118
    /// let mut queue = BufferQueue::default();
119
    /// queue.push_back(format_tendril!(r#"<some_tag attr="text">SomeText</some_tag>"#));
120
    /// let set = small_char_set!(b'<' b'>' b' ' b'=' b'"' b'/');
121
    /// let tag = format_tendril!("some_tag");
122
    /// let attr = format_tendril!("attr");
123
    /// let attr_val = format_tendril!("text");
124
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('<')));
125
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(tag)));
126
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet(' ')));
127
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr)));
128
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('=')));
129
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::FromSet('"')));
130
    /// assert_eq!(queue.pop_except_from(set), Some(SetResult::NotFromSet(attr_val)));
131
    /// // ...
132
    /// # }
133
    /// ```
134
25.6M
    pub fn pop_except_from(&self, set: SmallCharSet) -> Option<SetResult> {
135
25.6M
        let (result, now_empty) = match self.buffers.borrow_mut().front_mut() {
136
7.46M
            None => (None, false),
137
18.1M
            Some(buf) => {
138
18.1M
                let n = set.nonmember_prefix_len(buf);
139
18.1M
                if n > 0 {
140
                    let out;
141
10.5M
                    unsafe {
142
10.5M
                        out = buf.unsafe_subtendril(0, n);
143
10.5M
                        buf.unsafe_pop_front(n);
144
10.5M
                    }
145
10.5M
                    (Some(NotFromSet(out)), buf.is_empty())
146
                } else {
147
7.62M
                    let c = buf.pop_front_char().expect("empty buffer in queue");
148
7.62M
                    (Some(FromSet(c)), buf.is_empty())
149
                }
150
            },
151
        };
152
153
        // Unborrow self for this part.
154
25.6M
        if now_empty {
155
7.78M
            self.buffers.borrow_mut().pop_front();
156
17.8M
        }
157
158
25.6M
        result
159
25.6M
    }
160
161
    /// Consume bytes matching the pattern, using a custom comparison function `eq`.
162
    ///
163
    /// Returns `Some(true)` if there is a match, `Some(false)` if there is no match, or `None` if
164
    /// it wasn't possible to know (more data is needed).
165
    ///
166
    /// The custom comparison function is used elsewhere to compare ascii-case-insensitively.
167
    ///
168
    /// # Examples
169
    ///
170
    /// ```
171
    /// # extern crate markup5ever;
172
    /// # #[macro_use] extern crate tendril;
173
    /// # fn main() {
174
    /// use markup5ever::buffer_queue::BufferQueue;
175
    ///
176
    /// let mut queue = BufferQueue::default();
177
    /// queue.push_back(format_tendril!("testtext"));
178
    /// let test_str = "test";
179
    /// assert_eq!(queue.eat("test", |&a, &b| a == b), Some(true));
180
    /// assert_eq!(queue.eat("text", |&a, &b| a == b), Some(true));
181
    /// assert!(queue.is_empty());
182
    /// # }
183
    /// ```
184
9.02M
    pub fn eat<F: Fn(&u8, &u8) -> bool>(&self, pat: &str, eq: F) -> Option<bool> {
185
9.02M
        let mut buffers_exhausted = 0;
186
9.02M
        let mut consumed_from_last = 0;
187
9.02M
188
9.02M
        self.buffers.borrow().front()?;
189
190
11.7M
        for pattern_byte in pat.bytes() {
191
11.7M
            if buffers_exhausted >= self.buffers.borrow().len() {
192
2.67k
                return None;
193
11.7M
            }
194
11.7M
            let buf = &self.buffers.borrow()[buffers_exhausted];
195
11.7M
196
11.7M
            if !eq(&buf.as_bytes()[consumed_from_last], &pattern_byte) {
197
8.56M
                return Some(false);
198
3.16M
            }
199
3.16M
200
3.16M
            consumed_from_last += 1;
201
3.16M
            if consumed_from_last >= buf.len() {
202
121k
                buffers_exhausted += 1;
203
121k
                consumed_from_last = 0;
204
3.04M
            }
205
        }
206
207
        // We have a match. Commit changes to the BufferQueue.
208
452k
        for _ in 0..buffers_exhausted {
209
116k
            self.buffers.borrow_mut().pop_front();
210
116k
        }
211
212
452k
        match self.buffers.borrow_mut().front_mut() {
213
111k
            None => assert_eq!(consumed_from_last, 0),
214
340k
            Some(ref mut buf) => buf.pop_front(consumed_from_last as u32),
215
        }
216
217
452k
        Some(true)
218
9.02M
    }
<markup5ever::util::buffer_queue::BufferQueue>::eat::<<u8>::eq_ignore_ascii_case>
Line
Count
Source
184
9.02M
    pub fn eat<F: Fn(&u8, &u8) -> bool>(&self, pat: &str, eq: F) -> Option<bool> {
185
9.02M
        let mut buffers_exhausted = 0;
186
9.02M
        let mut consumed_from_last = 0;
187
9.02M
188
9.02M
        self.buffers.borrow().front()?;
189
190
11.7M
        for pattern_byte in pat.bytes() {
191
11.7M
            if buffers_exhausted >= self.buffers.borrow().len() {
192
2.67k
                return None;
193
11.7M
            }
194
11.7M
            let buf = &self.buffers.borrow()[buffers_exhausted];
195
11.7M
196
11.7M
            if !eq(&buf.as_bytes()[consumed_from_last], &pattern_byte) {
197
8.56M
                return Some(false);
198
3.16M
            }
199
3.16M
200
3.16M
            consumed_from_last += 1;
201
3.16M
            if consumed_from_last >= buf.len() {
202
121k
                buffers_exhausted += 1;
203
121k
                consumed_from_last = 0;
204
3.04M
            }
205
        }
206
207
        // We have a match. Commit changes to the BufferQueue.
208
452k
        for _ in 0..buffers_exhausted {
209
116k
            self.buffers.borrow_mut().pop_front();
210
116k
        }
211
212
452k
        match self.buffers.borrow_mut().front_mut() {
213
111k
            None => assert_eq!(consumed_from_last, 0),
214
340k
            Some(ref mut buf) => buf.pop_front(consumed_from_last as u32),
215
        }
216
217
452k
        Some(true)
218
9.02M
    }
Unexecuted instantiation: <markup5ever::util::buffer_queue::BufferQueue>::eat::<_>
219
220
    /// Get the next character if one is available, removing it from the queue.
221
    ///
222
    /// This function manages the buffers, removing them as they become empty.
223
104M
    pub fn next(&self) -> Option<char> {
224
104M
        let (result, now_empty) = match self.buffers.borrow_mut().front_mut() {
225
31.2M
            None => (None, false),
226
73.5M
            Some(buf) => {
227
73.5M
                let c = buf.pop_front_char().expect("empty buffer in queue");
228
73.5M
                (Some(c), buf.is_empty())
229
            },
230
        };
231
232
104M
        if now_empty {
233
31.7M
            self.buffers.borrow_mut().pop_front();
234
73.0M
        }
235
236
104M
        result
237
104M
    }
238
239
0
    pub fn replace_with(&self, other: BufferQueue) {
240
0
        let _ = mem::replace(&mut *self.buffers.borrow_mut(), other.buffers.take());
241
0
    }
242
243
0
    pub fn swap_with(&self, other: &BufferQueue) {
244
0
        mem::swap(
245
0
            &mut *self.buffers.borrow_mut(),
246
0
            &mut *other.buffers.borrow_mut(),
247
0
        );
248
0
    }
249
}
250
251
#[cfg(test)]
252
#[allow(non_snake_case)]
253
mod test {
254
    use tendril::SliceExt;
255
256
    use super::BufferQueue;
257
    use super::SetResult::{FromSet, NotFromSet};
258
259
    #[test]
260
    fn smoke_test() {
261
        let bq = BufferQueue::default();
262
        assert_eq!(bq.peek(), None);
263
        assert_eq!(bq.next(), None);
264
265
        bq.push_back("abc".to_tendril());
266
        assert_eq!(bq.peek(), Some('a'));
267
        assert_eq!(bq.next(), Some('a'));
268
        assert_eq!(bq.peek(), Some('b'));
269
        assert_eq!(bq.peek(), Some('b'));
270
        assert_eq!(bq.next(), Some('b'));
271
        assert_eq!(bq.peek(), Some('c'));
272
        assert_eq!(bq.next(), Some('c'));
273
        assert_eq!(bq.peek(), None);
274
        assert_eq!(bq.next(), None);
275
    }
276
277
    #[test]
278
    fn can_unconsume() {
279
        let bq = BufferQueue::default();
280
        bq.push_back("abc".to_tendril());
281
        assert_eq!(bq.next(), Some('a'));
282
283
        bq.push_front("xy".to_tendril());
284
        assert_eq!(bq.next(), Some('x'));
285
        assert_eq!(bq.next(), Some('y'));
286
        assert_eq!(bq.next(), Some('b'));
287
        assert_eq!(bq.next(), Some('c'));
288
        assert_eq!(bq.next(), None);
289
    }
290
291
    #[test]
292
    fn can_pop_except_set() {
293
        let bq = BufferQueue::default();
294
        bq.push_back("abc&def".to_tendril());
295
        let pop = || bq.pop_except_from(small_char_set!('&'));
296
        assert_eq!(pop(), Some(NotFromSet("abc".to_tendril())));
297
        assert_eq!(pop(), Some(FromSet('&')));
298
        assert_eq!(pop(), Some(NotFromSet("def".to_tendril())));
299
        assert_eq!(pop(), None);
300
    }
301
302
    #[test]
303
    fn can_eat() {
304
        // This is not very comprehensive.  We rely on the tokenizer
305
        // integration tests for more thorough testing with many
306
        // different input buffer splits.
307
        let bq = BufferQueue::default();
308
        bq.push_back("a".to_tendril());
309
        bq.push_back("bc".to_tendril());
310
        assert_eq!(bq.eat("abcd", u8::eq_ignore_ascii_case), None);
311
        assert_eq!(bq.eat("ax", u8::eq_ignore_ascii_case), Some(false));
312
        assert_eq!(bq.eat("ab", u8::eq_ignore_ascii_case), Some(true));
313
        assert_eq!(bq.next(), Some('c'));
314
        assert_eq!(bq.next(), None);
315
    }
316
}