Coverage Report

Created: 2026-01-13 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rust-url/percent_encoding/src/ascii_set.rs
Line
Count
Source
1
// Copyright 2013-2016 The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
use core::{mem, ops};
10
11
/// Represents a set of characters or bytes in the ASCII range.
12
///
13
/// This is used in [`percent_encode`] and [`utf8_percent_encode`].
14
/// This is similar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes).
15
///
16
/// Use the `add` method of an existing set to define a new set. For example:
17
///
18
/// [`percent_encode`]: crate::percent_encode
19
/// [`utf8_percent_encode`]: crate::utf8_percent_encode
20
///
21
/// ```
22
/// use percent_encoding::{AsciiSet, CONTROLS};
23
///
24
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
25
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
26
/// ```
27
#[derive(Debug, PartialEq, Eq)]
28
pub struct AsciiSet {
29
    mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
30
}
31
32
type Chunk = u32;
33
34
const ASCII_RANGE_LEN: usize = 0x80;
35
36
const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
37
38
impl AsciiSet {
39
    /// An empty set.
40
    pub const EMPTY: Self = Self {
41
        mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK],
42
    };
43
44
    /// Called with UTF-8 bytes rather than code points.
45
    /// Not used for non-ASCII bytes.
46
812M
    pub(crate) const fn contains(&self, byte: u8) -> bool {
47
812M
        let chunk = self.mask[byte as usize / BITS_PER_CHUNK];
48
812M
        let mask = 1 << (byte as usize % BITS_PER_CHUNK);
49
812M
        (chunk & mask) != 0
50
812M
    }
51
52
843M
    pub(crate) fn should_percent_encode(&self, byte: u8) -> bool {
53
843M
        !byte.is_ascii() || self.contains(byte)
54
843M
    }
55
56
0
    pub const fn add(&self, byte: u8) -> Self {
57
0
        let mut mask = self.mask;
58
0
        mask[byte as usize / BITS_PER_CHUNK] |= 1 << (byte as usize % BITS_PER_CHUNK);
59
0
        Self { mask }
60
0
    }
61
62
0
    pub const fn remove(&self, byte: u8) -> Self {
63
0
        let mut mask = self.mask;
64
0
        mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK));
65
0
        Self { mask }
66
0
    }
67
68
    /// Return the union of two sets.
69
0
    pub const fn union(&self, other: Self) -> Self {
70
0
        let mask = [
71
0
            self.mask[0] | other.mask[0],
72
0
            self.mask[1] | other.mask[1],
73
0
            self.mask[2] | other.mask[2],
74
0
            self.mask[3] | other.mask[3],
75
0
        ];
76
0
        Self { mask }
77
0
    }
78
79
    /// Return the negation of the set.
80
0
    pub const fn complement(&self) -> Self {
81
0
        let mask = [!self.mask[0], !self.mask[1], !self.mask[2], !self.mask[3]];
82
0
        Self { mask }
83
0
    }
84
}
85
86
impl ops::Add for AsciiSet {
87
    type Output = Self;
88
89
0
    fn add(self, other: Self) -> Self {
90
0
        self.union(other)
91
0
    }
92
}
93
94
impl ops::Not for AsciiSet {
95
    type Output = Self;
96
97
0
    fn not(self) -> Self {
98
0
        self.complement()
99
0
    }
100
}
101
102
/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
103
///
104
/// Note that this includes the newline and tab characters, but not the space 0x20.
105
///
106
/// <https://url.spec.whatwg.org/#c0-control-percent-encode-set>
107
pub const CONTROLS: &AsciiSet = &AsciiSet {
108
    mask: [
109
        !0_u32, // C0: 0x00 to 0x1F (32 bits set)
110
        0,
111
        0,
112
        1 << (0x7F_u32 % 32), // DEL: 0x7F (one bit set)
113
    ],
114
};
115
116
macro_rules! static_assert {
117
    ($( $bool: expr, )+) => {
118
0
        fn _static_assert() {
119
            $(
120
0
                let _ = mem::transmute::<[u8; $bool as usize], u8>;
121
            )+
122
0
        }
123
    }
124
}
125
126
static_assert! {
127
    CONTROLS.contains(0x00),
128
    CONTROLS.contains(0x1F),
129
    !CONTROLS.contains(0x20),
130
    !CONTROLS.contains(0x7E),
131
    CONTROLS.contains(0x7F),
132
}
133
134
/// Everything that is not an ASCII letter or digit.
135
///
136
/// This is probably more eager than necessary in any context.
137
pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS
138
    .add(b' ')
139
    .add(b'!')
140
    .add(b'"')
141
    .add(b'#')
142
    .add(b'$')
143
    .add(b'%')
144
    .add(b'&')
145
    .add(b'\'')
146
    .add(b'(')
147
    .add(b')')
148
    .add(b'*')
149
    .add(b'+')
150
    .add(b',')
151
    .add(b'-')
152
    .add(b'.')
153
    .add(b'/')
154
    .add(b':')
155
    .add(b';')
156
    .add(b'<')
157
    .add(b'=')
158
    .add(b'>')
159
    .add(b'?')
160
    .add(b'@')
161
    .add(b'[')
162
    .add(b'\\')
163
    .add(b']')
164
    .add(b'^')
165
    .add(b'_')
166
    .add(b'`')
167
    .add(b'{')
168
    .add(b'|')
169
    .add(b'}')
170
    .add(b'~');
171
172
#[cfg(test)]
173
mod tests {
174
    use super::*;
175
176
    #[test]
177
    fn add_op() {
178
        let left = AsciiSet::EMPTY.add(b'A');
179
        let right = AsciiSet::EMPTY.add(b'B');
180
        let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
181
        assert_eq!(left + right, expected);
182
    }
183
184
    #[test]
185
    fn not_op() {
186
        let set = AsciiSet::EMPTY.add(b'A').add(b'B');
187
        let not_set = !set;
188
        assert!(!not_set.contains(b'A'));
189
        assert!(not_set.contains(b'C'));
190
    }
191
192
    /// This test ensures that we can get the union of two sets as a constant value, which is
193
    /// useful for defining sets in a modular way.
194
    #[test]
195
    fn union() {
196
        const A: AsciiSet = AsciiSet::EMPTY.add(b'A');
197
        const B: AsciiSet = AsciiSet::EMPTY.add(b'B');
198
        const UNION: AsciiSet = A.union(B);
199
        const EXPECTED: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
200
        assert_eq!(UNION, EXPECTED);
201
    }
202
203
    /// This test ensures that we can get the complement of a set as a constant value, which is
204
    /// useful for defining sets in a modular way.
205
    #[test]
206
    fn complement() {
207
        const BOTH: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
208
        const COMPLEMENT: AsciiSet = BOTH.complement();
209
        assert!(!COMPLEMENT.contains(b'A'));
210
        assert!(!COMPLEMENT.contains(b'B'));
211
        assert!(COMPLEMENT.contains(b'C'));
212
    }
213
}