Coverage Report

Created: 2025-10-31 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/tinystr-0.8.1/src/int_ops.rs
Line
Count
Source
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5
use crate::asciibyte::AsciiByte;
6
7
/// Internal helper struct that performs operations on aligned integers.
8
/// Supports strings up to 4 bytes long.
9
#[repr(transparent)]
10
pub struct Aligned4(u32);
11
12
impl Aligned4 {
13
    /// # Panics
14
    /// Panics if N is greater than 4
15
    #[inline]
16
0
    pub const fn from_utf8<const N: usize>(src: &[u8; N]) -> Self {
17
0
        let mut bytes = [0; 4];
18
0
        let mut i = 0;
19
        // The function documentation defines when panics may occur
20
        #[allow(clippy::indexing_slicing)]
21
0
        while i < N {
22
0
            bytes[i] = src[i];
23
0
            i += 1;
24
0
        }
25
0
        Self(u32::from_ne_bytes(bytes))
26
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_utf8::<2>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_utf8::<3>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_utf8::<4>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_utf8::<8>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_utf8::<_>
27
28
    #[inline]
29
0
    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
30
0
        Self::from_utf8::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) })
31
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<2>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<3>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<4>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<8>
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<_>
32
33
    #[inline]
34
0
    pub const fn to_bytes(&self) -> [u8; 4] {
35
0
        self.0.to_ne_bytes()
36
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::to_bytes
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::to_bytes
37
38
    #[inline]
39
0
    pub const fn to_ascii_bytes(&self) -> [AsciiByte; 4] {
40
0
        unsafe { core::mem::transmute(self.to_bytes()) }
41
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::to_ascii_bytes
Unexecuted instantiation: <tinystr::int_ops::Aligned4>::to_ascii_bytes
42
43
0
    pub const fn len(&self) -> usize {
44
0
        let word = self.0;
45
        #[cfg(target_endian = "little")]
46
0
        let len = (4 - word.leading_zeros() / 8) as usize;
47
        #[cfg(target_endian = "big")]
48
        let len = (4 - word.trailing_zeros() / 8) as usize;
49
0
        len
50
0
    }
51
52
0
    pub const fn is_ascii_alphabetic(&self) -> bool {
53
0
        let word = self.0;
54
        // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid.
55
        // `mask` sets all NUL bytes to 0.
56
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
57
        // `lower` converts the string to lowercase. It may also change the value of non-alpha
58
        // characters, but this does not matter for the alphabetic test that follows.
59
0
        let lower = word | 0x2020_2020;
60
        // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters.
61
0
        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
62
        // The overall string is valid if every character passes at least one test.
63
        // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`).
64
0
        (alpha & mask) == 0
65
0
    }
66
67
0
    pub const fn is_ascii_alphanumeric(&self) -> bool {
68
0
        let word = self.0;
69
        // See explanatory comments in is_ascii_alphabetic
70
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
71
0
        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
72
0
        let lower = word | 0x2020_2020;
73
0
        let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505);
74
0
        (alpha & numeric & mask) == 0
75
0
    }
76
77
0
    pub const fn is_ascii_numeric(&self) -> bool {
78
0
        let word = self.0;
79
        // See explanatory comments in is_ascii_alphabetic
80
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
81
0
        let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646);
82
0
        (numeric & mask) == 0
83
0
    }
84
85
0
    pub const fn is_ascii_lowercase(&self) -> bool {
86
0
        let word = self.0;
87
        // For efficiency, this function tests for an invalid string rather than a valid string.
88
        // A string is ASCII lowercase iff it contains no uppercase ASCII characters.
89
        // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1.
90
0
        let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
91
        // The string is valid if it contains no invalid characters (if all high bits are 1).
92
0
        (invalid_case & 0x8080_8080) == 0x8080_8080
93
0
    }
94
95
0
    pub const fn is_ascii_titlecase(&self) -> bool {
96
0
        let word = self.0;
97
        // See explanatory comments in is_ascii_lowercase
98
0
        let invalid_case = if cfg!(target_endian = "little") {
99
0
            !(word + 0x3f3f_3f1f) | (word + 0x2525_2505)
100
        } else {
101
0
            !(word + 0x1f3f_3f3f) | (word + 0x0525_2525)
102
        };
103
0
        (invalid_case & 0x8080_8080) == 0x8080_8080
104
0
    }
105
106
0
    pub const fn is_ascii_uppercase(&self) -> bool {
107
0
        let word = self.0;
108
        // See explanatory comments in is_ascii_lowercase
109
0
        let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
110
0
        (invalid_case & 0x8080_8080) == 0x8080_8080
111
0
    }
112
113
0
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
114
0
        let word = self.0;
115
        // `mask` sets all NUL bytes to 0.
116
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
117
        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
118
0
        let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505);
119
        // The overall string is valid if every character passes at least one test.
120
        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
121
0
        (lower_alpha & mask) == 0
122
0
    }
123
124
0
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
125
0
        let word = self.0;
126
        // See explanatory comments in is_ascii_alphabetic_lowercase
127
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
128
0
        let title_case = if cfg!(target_endian = "little") {
129
0
            !(word + 0x1f1f_1f3f) | (word + 0x0505_0525)
130
        } else {
131
0
            !(word + 0x3f1f_1f1f) | (word + 0x2505_0505)
132
        };
133
0
        (title_case & mask) == 0
134
0
    }
135
136
0
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
137
0
        let word = self.0;
138
        // See explanatory comments in is_ascii_alphabetic_lowercase
139
0
        let mask = (word + 0x7f7f_7f7f) & 0x8080_8080;
140
0
        let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525);
141
0
        (upper_alpha & mask) == 0
142
0
    }
143
144
0
    pub const fn to_ascii_lowercase(&self) -> Self {
145
0
        let word = self.0;
146
0
        let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2);
147
0
        Self(result)
148
0
    }
149
150
0
    pub const fn to_ascii_titlecase(&self) -> Self {
151
0
        let word = self.0.to_le();
152
0
        let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2;
153
0
        let result = (word | mask) & !(0x20 & mask);
154
0
        Self(u32::from_le(result))
155
0
    }
156
157
0
    pub const fn to_ascii_uppercase(&self) -> Self {
158
0
        let word = self.0;
159
0
        let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2);
160
0
        Self(result)
161
0
    }
162
}
163
164
/// Internal helper struct that performs operations on aligned integers.
165
/// Supports strings up to 8 bytes long.
166
#[repr(transparent)]
167
pub struct Aligned8(u64);
168
169
impl Aligned8 {
170
    /// # Panics
171
    /// Panics if N is greater than 8
172
    #[inline]
173
0
    pub const fn from_utf8<const N: usize>(src: &[u8; N]) -> Self {
174
0
        let mut bytes = [0; 8];
175
0
        let mut i = 0;
176
        // The function documentation defines when panics may occur
177
        #[allow(clippy::indexing_slicing)]
178
0
        while i < N {
179
0
            bytes[i] = src[i];
180
0
            i += 1;
181
0
        }
182
0
        Self(u64::from_ne_bytes(bytes))
183
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_utf8::<2>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_utf8::<3>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_utf8::<4>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_utf8::<8>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_utf8::<_>
184
185
    #[inline]
186
0
    pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self {
187
0
        Self::from_utf8::<N>(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) })
188
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<2>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<3>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<4>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<8>
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<_>
189
190
    #[inline]
191
0
    pub const fn to_bytes(&self) -> [u8; 8] {
192
0
        self.0.to_ne_bytes()
193
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::to_bytes
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::to_bytes
194
195
    #[inline]
196
0
    pub const fn to_ascii_bytes(&self) -> [AsciiByte; 8] {
197
0
        unsafe { core::mem::transmute(self.to_bytes()) }
198
0
    }
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::to_ascii_bytes
Unexecuted instantiation: <tinystr::int_ops::Aligned8>::to_ascii_bytes
199
200
0
    pub const fn len(&self) -> usize {
201
0
        let word = self.0;
202
        #[cfg(target_endian = "little")]
203
0
        let len = (8 - word.leading_zeros() / 8) as usize;
204
        #[cfg(target_endian = "big")]
205
        let len = (8 - word.trailing_zeros() / 8) as usize;
206
0
        len
207
0
    }
208
209
0
    pub const fn is_ascii_alphabetic(&self) -> bool {
210
0
        let word = self.0;
211
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
212
0
        let lower = word | 0x2020_2020_2020_2020;
213
0
        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
214
0
        (alpha & mask) == 0
215
0
    }
216
217
0
    pub const fn is_ascii_alphanumeric(&self) -> bool {
218
0
        let word = self.0;
219
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
220
0
        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
221
0
        let lower = word | 0x2020_2020_2020_2020;
222
0
        let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505);
223
0
        (alpha & numeric & mask) == 0
224
0
    }
225
226
0
    pub const fn is_ascii_numeric(&self) -> bool {
227
0
        let word = self.0;
228
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
229
0
        let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646);
230
0
        (numeric & mask) == 0
231
0
    }
232
233
0
    pub const fn is_ascii_lowercase(&self) -> bool {
234
0
        let word = self.0;
235
0
        let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
236
0
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
237
0
    }
238
239
0
    pub const fn is_ascii_titlecase(&self) -> bool {
240
0
        let word = self.0;
241
0
        let invalid_case = if cfg!(target_endian = "little") {
242
0
            !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505)
243
        } else {
244
0
            !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525)
245
        };
246
0
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
247
0
    }
248
249
0
    pub const fn is_ascii_uppercase(&self) -> bool {
250
0
        let word = self.0;
251
0
        let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
252
0
        (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080
253
0
    }
254
255
0
    pub const fn is_ascii_alphabetic_lowercase(&self) -> bool {
256
0
        let word = self.0;
257
        // `mask` sets all NUL bytes to 0.
258
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
259
        // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1.
260
0
        let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505);
261
        // The overall string is valid if every character passes at least one test.
262
        // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`).
263
0
        (lower_alpha & mask) == 0
264
0
    }
265
266
0
    pub const fn is_ascii_alphabetic_titlecase(&self) -> bool {
267
0
        let word = self.0;
268
        // See explanatory comments in is_ascii_alphabetic_lowercase
269
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
270
0
        let title_case = if cfg!(target_endian = "little") {
271
0
            !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525)
272
        } else {
273
0
            !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505)
274
        };
275
0
        (title_case & mask) == 0
276
0
    }
277
278
0
    pub const fn is_ascii_alphabetic_uppercase(&self) -> bool {
279
0
        let word = self.0;
280
        // See explanatory comments in is_ascii_alphabetic_lowercase
281
0
        let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080;
282
0
        let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525);
283
0
        (upper_alpha & mask) == 0
284
0
    }
285
286
0
    pub const fn to_ascii_lowercase(&self) -> Self {
287
0
        let word = self.0;
288
0
        let result = word
289
0
            | (((word + 0x3f3f_3f3f_3f3f_3f3f)
290
0
                & !(word + 0x2525_2525_2525_2525)
291
0
                & 0x8080_8080_8080_8080)
292
0
                >> 2);
293
0
        Self(result)
294
0
    }
295
296
0
    pub const fn to_ascii_titlecase(&self) -> Self {
297
0
        let word = self.0.to_le();
298
0
        let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f)
299
0
            & !(word + 0x2525_2525_2525_2505)
300
0
            & 0x8080_8080_8080_8080)
301
0
            >> 2;
302
0
        let result = (word | mask) & !(0x20 & mask);
303
0
        Self(u64::from_le(result))
304
0
    }
305
306
0
    pub const fn to_ascii_uppercase(&self) -> Self {
307
0
        let word = self.0;
308
0
        let result = word
309
0
            & !(((word + 0x1f1f_1f1f_1f1f_1f1f)
310
0
                & !(word + 0x0505_0505_0505_0505)
311
0
                & 0x8080_8080_8080_8080)
312
0
                >> 2);
313
0
        Self(result)
314
0
    }
315
}