Coverage Report

Created: 2025-05-08 06:13

/src/html5ever/markup5ever/util/smallcharset.rs
Line
Count
Source
1
// Copyright 2014-2017 The html5ever Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! This module contains a single struct [`SmallCharSet`]. See its documentation for details.
11
//!
12
//! [`SmallCharSet`]: struct.SmallCharSet.html
13
14
/// Represents a set of "small characters", those with Unicode scalar
15
/// values less than 64.
16
///
17
/// This is stored as a bitmap, with 1 bit for each value.
18
#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
19
pub struct SmallCharSet {
20
    pub bits: u64,
21
}
22
23
impl SmallCharSet {
24
    /// Checks whether a character (u8 value below 64) is stored in the SmallCharSet.
25
    ///
26
    /// # Examples
27
    ///
28
    /// ```ignore
29
    /// # use markup5ever::SmallCharSet;
30
    /// let set = SmallCharSet {
31
    ///     bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000
32
    /// };
33
    /// assert!(set.contains(64));
34
    /// assert!(set.contains(b'6')); // `b'6'` is the same as 64u8
35
    /// ```
36
    #[inline]
37
18.3M
    fn contains(&self, n: u8) -> bool {
38
18.3M
        0 != (self.bits & (1 << (n as usize)))
39
18.3M
    }
40
41
    /// Count the number of bytes of characters at the beginning of `buf` which are not in the set.
42
    ///
43
    /// This functionality is used in [`BufferQueue::pop_except_from`].
44
    ///
45
    /// # Examples
46
    ///
47
    /// ```
48
    /// # #[macro_use] extern crate markup5ever;
49
    /// # fn main() {
50
    /// let set = small_char_set!(48 49 50); // '0' '1' '2'
51
    /// // `test` is 4 chars, ๐Ÿ˜ is 4 chars, then we meet a character in the set
52
    /// let test_str = "test๐Ÿ˜01232afd";
53
    /// assert_eq!(set.nonmember_prefix_len(test_str), 8);
54
    /// # }
55
    /// ```
56
    ///
57
    /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from
58
18.1M
    pub fn nonmember_prefix_len(&self, buf: &str) -> u32 {
59
18.1M
        let mut n = 0;
60
48.0M
        for b in buf.bytes() {
61
48.0M
            if b >= 64 || !self.contains(b) {
62
37.3M
                n += 1;
63
37.3M
            } else {
64
10.7M
                break;
65
            }
66
        }
67
18.1M
        n
68
18.1M
    }
69
}
70
71
#[cfg(test)]
72
mod test {
73
    #[test]
74
    fn nonmember_prefix() {
75
        for &c in ['&', '\0'].iter() {
76
            for x in 0..48u32 {
77
                for y in 0..48u32 {
78
                    let mut s = "x".repeat(x as usize);
79
                    s.push(c);
80
                    s.push_str(&"x".repeat(y as usize));
81
                    let set = small_char_set!('&' '\0');
82
83
                    assert_eq!(x, set.nonmember_prefix_len(&s));
84
                }
85
            }
86
        }
87
    }
88
}