/src/html5ever/markup5ever/util/smallcharset.rs
Line | Count | Source |
1 | | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | | // COPYRIGHT file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | //! This module contains a single struct [`SmallCharSet`]. See its documentation for details. |
11 | | //! |
12 | | //! [`SmallCharSet`]: struct.SmallCharSet.html |
13 | | |
14 | | /// Represents a set of "small characters", those with Unicode scalar |
15 | | /// values less than 64. |
16 | | /// |
17 | | /// This is stored as a bitmap, with 1 bit for each value. |
18 | | #[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)] |
19 | | pub struct SmallCharSet { |
20 | | pub bits: u64, |
21 | | } |
22 | | |
23 | | impl SmallCharSet { |
24 | | /// Checks whether a character (u8 value below 64) is stored in the SmallCharSet. |
25 | | /// |
26 | | /// # Examples |
27 | | /// |
28 | | /// ```ignore |
29 | | /// # use markup5ever::SmallCharSet; |
30 | | /// let set = SmallCharSet { |
31 | | /// bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000 |
32 | | /// }; |
33 | | /// assert!(set.contains(64)); |
34 | | /// assert!(set.contains(b'6')); // `b'6'` is the same as 64u8 |
35 | | /// ``` |
36 | | #[inline] |
37 | 18.3M | fn contains(&self, n: u8) -> bool { |
38 | 18.3M | 0 != (self.bits & (1 << (n as usize))) |
39 | 18.3M | } |
40 | | |
41 | | /// Count the number of bytes of characters at the beginning of `buf` which are not in the set. |
42 | | /// |
43 | | /// This functionality is used in [`BufferQueue::pop_except_from`]. |
44 | | /// |
45 | | /// # Examples |
46 | | /// |
47 | | /// ``` |
48 | | /// # #[macro_use] extern crate markup5ever; |
49 | | /// # fn main() { |
50 | | /// let set = small_char_set!(48 49 50); // '0' '1' '2' |
51 | | /// // `test` is 4 chars, ๐ is 4 chars, then we meet a character in the set |
52 | | /// let test_str = "test๐01232afd"; |
53 | | /// assert_eq!(set.nonmember_prefix_len(test_str), 8); |
54 | | /// # } |
55 | | /// ``` |
56 | | /// |
57 | | /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from |
58 | 18.1M | pub fn nonmember_prefix_len(&self, buf: &str) -> u32 { |
59 | 18.1M | let mut n = 0; |
60 | 48.0M | for b in buf.bytes() { |
61 | 48.0M | if b >= 64 || !self.contains(b) { |
62 | 37.3M | n += 1; |
63 | 37.3M | } else { |
64 | 10.7M | break; |
65 | | } |
66 | | } |
67 | 18.1M | n |
68 | 18.1M | } |
69 | | } |
70 | | |
71 | | #[cfg(test)] |
72 | | mod test { |
73 | | #[test] |
74 | | fn nonmember_prefix() { |
75 | | for &c in ['&', '\0'].iter() { |
76 | | for x in 0..48u32 { |
77 | | for y in 0..48u32 { |
78 | | let mut s = "x".repeat(x as usize); |
79 | | s.push(c); |
80 | | s.push_str(&"x".repeat(y as usize)); |
81 | | let set = small_char_set!('&' '\0'); |
82 | | |
83 | | assert_eq!(x, set.nonmember_prefix_len(&s)); |
84 | | } |
85 | | } |
86 | | } |
87 | | } |
88 | | } |