Coverage Report

Created: 2026-04-12 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/tinystr-0.8.3/src/int_ops.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use crate::asciibyte::AsciiByte;
6
7
/// Internal helper struct that performs operations on aligned integers.
8
/// Supports strings up to 4 bytes long.
9
#[repr(transparent)]
10
pub struct Aligned4(u32);
11
12
impl Aligned4 {
13
    /// # Panics
14
    /// Panics if N is greater than 4
15
    #[inline]
16
0
    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
17
0
        let mut bytes = [0; 4];
18
0
        let mut i = 0;
19
        // The function documentation defines when panics may occur
20
        #[expect(clippy::indexing_slicing)]
21
0
        while i < N {
22
0
            bytes[i] = src[i] as u8;
23
0
            i += 1;
24
0
        }
25
0
        Self(u32::from_ne_bytes(bytes))
26
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<2>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<3>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<4>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<8>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<_>
27
28
0
    pub const fn len(&self) -> usize {
29
0
        let word = self.0;
30
        #[cfg(target_endian = "little")]
31
0
        let len = (4 - word.leading_zeros() / 8) as usize;
32
        #[cfg(target_endian = "big")]
33
        let len = (4 - word.trailing_zeros() / 8) as usize;
34
0
        len
35
0
    }
36
37
0
    pub const fn is_ascii_alphabetic(&self) -> bool {
38
0
        let word = self.0;
39
        // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid.
40
        // `mask` sets all NUL bytes to 0.
41
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
42
        // `lower` converts the string to lowercase. It may also change the value of non-alpha
43
        // characters, but this does not matter for the alphabetic test that follows.
44
0
        let lower = word | 0x2020_2020;
45
        // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters.
46
0
        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
47
        // The overall string is valid if every character passes at least one test.
48
        // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`).
49
0
        (alpha & mask) == 0
50
0
    }
51
52
0
    pub const fn is_ascii_alphanumeric(&self) -> bool {
53
0
        let word = self.0;
54
        // See explanatory comments in is_ascii_alphabetic
55
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
56
0
        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
57
0
        let lower = word | 0x2020_2020;
58
0
        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
59
0
        (alpha & numeric & mask) == 0
60
0
    }
61
62
0
    pub const fn is_ascii_numeric(&self) -> bool {
63
0
        let word = self.0;
64
        // See explanatory comments in is_ascii_alphabetic
65
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
66
0
        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
67
0
        (numeric & mask) == 0
68
0
    }
69
70
0
    pub const fn is_ascii_lowercase(&self) -> bool {
71
0
        let word = self.0;
72
        // For efficiency, this function tests for an invalid string rather than a valid string.
73
        // A string is ASCII lowercase iff it contains no uppercase ASCII characters.
74
        // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1.
75
0
        let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
76
        // The string is valid if it contains no invalid characters (if all high bits are 1).
77
0
        (invalid_case & 0x8080_8080) == 0x8080_8080
78
0
    }
79
80
0
    pub const fn is_ascii_titlecase(&self) -> bool {
81
0
        let word = self.0;
82
        // See explanatory comments in is_ascii_lowercase
83
0
        let invalid_case = if cfg!(target_endian = "little") {
84
0
            !(word + 0x3f3f_3f1f) | (word + 0x2525_2505)
85
        } else {
86
0
            !(word + 0x1f3f_3f3f) | (word + 0x0525_2525)
87
        };
88
0
        (invalid_case & 0x8080_8080) == 0x8080_8080
89
0
    }
90
91
0
    pub const fn is_ascii_uppercase(&self) -> bool {
92
0
        let word = self.0;
93
        // See explanatory comments in is_ascii_lowercase
94
0
        let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
95
0
        (invalid_case & 0x8080_8080) == 0x8080_8080
96
0
    }
97
98
0
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
99
0
        let word = self.0;
100
        // `mask` sets all NUL bytes to 0.
101
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
102
        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
103
0
        let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
104
        // The overall string is valid if every character passes at least one test.
105
        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
106
0
        (lower_alpha & mask) == 0
107
0
    }
108
109
0
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
110
0
        let word = self.0;
111
        // See explanatory comments in is_ascii_alphabetic_lowercase
112
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
113
0
        let title_case = if cfg!(target_endian = "little") {
114
0
            !(word + 0x1f1f_1f3f) | (word + 0x0505_0525)
115
        } else {
116
0
            !(word + 0x3f1f_1f1f) | (word + 0x2505_0505)
117
        };
118
0
        (title_case & mask) == 0
119
0
    }
120
121
0
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
122
0
        let word = self.0;
123
        // See explanatory comments in is_ascii_alphabetic_lowercase
124
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
125
0
        let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
126
0
        (upper_alpha & mask) == 0
127
0
    }
128
129
0
    pub const fn to_ascii_lowercase(&self) -> [AsciiByte; 4] {
130
0
        let word = self.0;
131
0
        let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2);
132
0
        unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) }
133
0
    }
134
135
0
    pub const fn to_ascii_titlecase(&self) -> [AsciiByte; 4] {
136
0
        let word = self.0.to_le();
137
0
        let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2;
138
0
        let result = (word | mask) & !(0x20 & mask);
139
0
        unsafe { AsciiByte::to_ascii_byte_array(&u32::from_le(result).to_ne_bytes()) }
140
0
    }
141
142
0
    pub const fn to_ascii_uppercase(&self) -> [AsciiByte; 4] {
143
0
        let word = self.0;
144
0
        let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2);
145
0
        unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) }
146
0
    }
147
}
148
149
/// Internal helper struct that performs operations on aligned integers.
150
/// Supports strings up to 8 bytes long.
151
#[repr(transparent)]
152
pub struct Aligned8(u64);
153
154
impl Aligned8 {
155
    /// # Panics
156
    /// Panics if N is greater than 8
157
    #[inline]
158
0
    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
159
0
        let mut bytes = [0; 8];
160
0
        let mut i = 0;
161
        // The function documentation defines when panics may occur
162
        #[expect(clippy::indexing_slicing)]
163
0
        while i < N {
164
0
            bytes[i] = src[i] as u8;
165
0
            i += 1;
166
0
        }
167
0
        Self(u64::from_ne_bytes(bytes))
168
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<2>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<3>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<4>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<8>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<_>
169
170
0
    pub const fn len(&self) -> usize {
171
0
        let word = self.0;
172
        #[cfg(target_endian = "little")]
173
0
        let len = (8 - word.leading_zeros() / 8) as usize;
174
        #[cfg(target_endian = "big")]
175
        let len = (8 - word.trailing_zeros() / 8) as usize;
176
0
        len
177
0
    }
178
179
0
    pub const fn is_ascii_alphabetic(&self) -> bool {
180
0
        let word = self.0;
181
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
182
0
        let lower = word | 0x2020_2020_2020_2020;
183
0
        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
184
0
        (alpha & mask) == 0
185
0
    }
186
187
0
    pub const fn is_ascii_alphanumeric(&self) -> bool {
188
0
        let word = self.0;
189
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
190
0
        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
191
0
        let lower = word | 0x2020_2020_2020_2020;
192
0
        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
193
0
        (alpha & numeric & mask) == 0
194
0
    }
195
196
0
    pub const fn is_ascii_numeric(&self) -> bool {
197
0
        let word = self.0;
198
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
199
0
        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
200
0
        (numeric & mask) == 0
201
0
    }
202
203
0
    pub const fn is_ascii_lowercase(&self) -> bool {
204
0
        let word = self.0;
205
0
        let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
206
0
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
207
0
    }
208
209
0
    pub const fn is_ascii_titlecase(&self) -> bool {
210
0
        let word = self.0;
211
0
        let invalid_case = if cfg!(target_endian = "little") {
212
0
            !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505)
213
        } else {
214
0
            !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525)
215
        };
216
0
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
217
0
    }
218
219
0
    pub const fn is_ascii_uppercase(&self) -> bool {
220
0
        let word = self.0;
221
0
        let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
222
0
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
223
0
    }
224
225
0
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
226
0
        let word = self.0;
227
        // `mask` sets all NUL bytes to 0.
228
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
229
        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
230
0
        let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
231
        // The overall string is valid if every character passes at least one test.
232
        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
233
0
        (lower_alpha & mask) == 0
234
0
    }
235
236
0
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
237
0
        let word = self.0;
238
        // See explanatory comments in is_ascii_alphabetic_lowercase
239
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
240
0
        let title_case = if cfg!(target_endian = "little") {
241
0
            !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525)
242
        } else {
243
0
            !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505)
244
        };
245
0
        (title_case & mask) == 0
246
0
    }
247
248
0
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
249
0
        let word = self.0;
250
        // See explanatory comments in is_ascii_alphabetic_lowercase
251
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
252
0
        let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
253
0
        (upper_alpha & mask) == 0
254
0
    }
255
256
0
    pub const fn to_ascii_lowercase(&self) -> [AsciiByte; 8] {
257
0
        let word = self.0;
258
0
        let result = word
259
0
            | (((word + 0x3f3f_3f3f_3f3f_3f3f)
260
0
                & !(word + 0x2525_2525_2525_2525)
261
0
                & 0x8080_8080_8080_8080)
262
0
                >> 2);
263
0
        unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) }
264
0
    }
265
266
0
    pub const fn to_ascii_titlecase(&self) -> [AsciiByte; 8] {
267
0
        let word = self.0.to_le();
268
0
        let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f)
269
0
            & !(word + 0x2525_2525_2525_2505)
270
0
            & 0x8080_8080_8080_8080)
271
0
            >> 2;
272
0
        let result = (word | mask) & !(0x20 & mask);
273
0
        unsafe { AsciiByte::to_ascii_byte_array(&u64::from_le(result).to_ne_bytes()) }
274
0
    }
275
276
0
    pub const fn to_ascii_uppercase(&self) -> [AsciiByte; 8] {
277
0
        let word = self.0;
278
0
        let result = word
279
0
            & !(((word + 0x1f1f_1f1f_1f1f_1f1f)
280
0
                & !(word + 0x0505_0505_0505_0505)
281
0
                & 0x8080_8080_8080_8080)
282
0
                >> 2);
283
0
        unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) }
284
0
    }
285
}