/rust/registry/src/index.crates.io-1949cf8c6b5b557f/tinystr-0.8.3/src/int_ops.rs
Line | Count | Source |
1 | | // This file is part of ICU4X. For terms of use, please see the file |
2 | | // called LICENSE at the top level of the ICU4X source tree |
3 | | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | | |
5 | | use crate::asciibyte::AsciiByte; |
6 | | |
7 | | /// Internal helper struct that performs operations on aligned integers. |
8 | | /// Supports strings up to 4 bytes long. |
9 | | #[repr(transparent)] |
10 | | pub struct Aligned4(u32); |
11 | | |
12 | | impl Aligned4 { |
13 | | /// # Panics |
14 | | /// Panics if N is greater than 4 |
15 | | #[inline] |
16 | 0 | pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self { |
17 | 0 | let mut bytes = [0; 4]; |
18 | 0 | let mut i = 0; |
19 | | // The function documentation defines when panics may occur |
20 | | #[expect(clippy::indexing_slicing)] |
21 | 0 | while i < N { |
22 | 0 | bytes[i] = src[i] as u8; |
23 | 0 | i += 1; |
24 | 0 | } |
25 | 0 | Self(u32::from_ne_bytes(bytes)) |
26 | 0 | } Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<2> Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<3> Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<4> Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<8> Unexecuted instantiation: <tinystr::int_ops::Aligned4>::from_ascii_bytes::<_> |
27 | | |
28 | 0 | pub const fn len(&self) -> usize { |
29 | 0 | let word = self.0; |
30 | | #[cfg(target_endian = "little")] |
31 | 0 | let len = (4 - word.leading_zeros() / 8) as usize; |
32 | | #[cfg(target_endian = "big")] |
33 | | let len = (4 - word.trailing_zeros() / 8) as usize; |
34 | 0 | len |
35 | 0 | } |
36 | | |
37 | 0 | pub const fn is_ascii_alphabetic(&self) -> bool { |
38 | 0 | let word = self.0; |
39 | | // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid. |
40 | | // `mask` sets all NUL bytes to 0. |
41 | 0 | let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; |
42 | | // `lower` converts the string to lowercase. It may also change the value of non-alpha |
43 | | // characters, but this does not matter for the alphabetic test that follows. |
44 | 0 | let lower = word | 0x2020_2020; |
45 | | // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters. |
46 | 0 | let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); |
47 | | // The overall string is valid if every character passes at least one test. |
48 | | // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`). |
49 | 0 | (alpha & mask) == 0 |
50 | 0 | } |
51 | | |
52 | 0 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
53 | 0 | let word = self.0; |
54 | | // See explanatory comments in is_ascii_alphabetic |
55 | 0 | let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; |
56 | 0 | let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); |
57 | 0 | let lower = word | 0x2020_2020; |
58 | 0 | let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); |
59 | 0 | (alpha & numeric & mask) == 0 |
60 | 0 | } |
61 | | |
62 | 0 | pub const fn is_ascii_numeric(&self) -> bool { |
63 | 0 | let word = self.0; |
64 | | // See explanatory comments in is_ascii_alphabetic |
65 | 0 | let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; |
66 | 0 | let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); |
67 | 0 | (numeric & mask) == 0 |
68 | 0 | } |
69 | | |
70 | 0 | pub const fn is_ascii_lowercase(&self) -> bool { |
71 | 0 | let word = self.0; |
72 | | // For efficiency, this function tests for an invalid string rather than a valid string. |
73 | | // A string is ASCII lowercase iff it contains no uppercase ASCII characters. |
74 | | // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1. |
75 | 0 | let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525); |
76 | | // The string is valid if it contains no invalid characters (if all high bits are 1). |
77 | 0 | (invalid_case & 0x8080_8080) == 0x8080_8080 |
78 | 0 | } |
79 | | |
80 | 0 | pub const fn is_ascii_titlecase(&self) -> bool { |
81 | 0 | let word = self.0; |
82 | | // See explanatory comments in is_ascii_lowercase |
83 | 0 | let invalid_case = if cfg!(target_endian = "little") { |
84 | 0 | !(word + 0x3f3f_3f1f) | (word + 0x2525_2505) |
85 | | } else { |
86 | 0 | !(word + 0x1f3f_3f3f) | (word + 0x0525_2525) |
87 | | }; |
88 | 0 | (invalid_case & 0x8080_8080) == 0x8080_8080 |
89 | 0 | } |
90 | | |
91 | 0 | pub const fn is_ascii_uppercase(&self) -> bool { |
92 | 0 | let word = self.0; |
93 | | // See explanatory comments in is_ascii_lowercase |
94 | 0 | let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505); |
95 | 0 | (invalid_case & 0x8080_8080) == 0x8080_8080 |
96 | 0 | } |
97 | | |
98 | 0 | pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { |
99 | 0 | let word = self.0; |
100 | | // `mask` sets all NUL bytes to 0. |
101 | 0 | let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; |
102 | | // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1. |
103 | 0 | let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505); |
104 | | // The overall string is valid if every character passes at least one test. |
105 | | // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`). |
106 | 0 | (lower_alpha & mask) == 0 |
107 | 0 | } |
108 | | |
109 | 0 | pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { |
110 | 0 | let word = self.0; |
111 | | // See explanatory comments in is_ascii_alphabetic_lowercase |
112 | 0 | let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; |
113 | 0 | let title_case = if cfg!(target_endian = "little") { |
114 | 0 | !(word + 0x1f1f_1f3f) | (word + 0x0505_0525) |
115 | | } else { |
116 | 0 | !(word + 0x3f1f_1f1f) | (word + 0x2505_0505) |
117 | | }; |
118 | 0 | (title_case & mask) == 0 |
119 | 0 | } |
120 | | |
121 | 0 | pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { |
122 | 0 | let word = self.0; |
123 | | // See explanatory comments in is_ascii_alphabetic_lowercase |
124 | 0 | let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; |
125 | 0 | let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525); |
126 | 0 | (upper_alpha & mask) == 0 |
127 | 0 | } |
128 | | |
129 | 0 | pub const fn to_ascii_lowercase(&self) -> [AsciiByte; 4] { |
130 | 0 | let word = self.0; |
131 | 0 | let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2); |
132 | 0 | unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) } |
133 | 0 | } |
134 | | |
135 | 0 | pub const fn to_ascii_titlecase(&self) -> [AsciiByte; 4] { |
136 | 0 | let word = self.0.to_le(); |
137 | 0 | let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2; |
138 | 0 | let result = (word | mask) & !(0x20 & mask); |
139 | 0 | unsafe { AsciiByte::to_ascii_byte_array(&u32::from_le(result).to_ne_bytes()) } |
140 | 0 | } |
141 | | |
142 | 0 | pub const fn to_ascii_uppercase(&self) -> [AsciiByte; 4] { |
143 | 0 | let word = self.0; |
144 | 0 | let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2); |
145 | 0 | unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) } |
146 | 0 | } |
147 | | } |
148 | | |
149 | | /// Internal helper struct that performs operations on aligned integers. |
150 | | /// Supports strings up to 8 bytes long. |
151 | | #[repr(transparent)] |
152 | | pub struct Aligned8(u64); |
153 | | |
154 | | impl Aligned8 { |
155 | | /// # Panics |
156 | | /// Panics if N is greater than 8 |
157 | | #[inline] |
158 | 0 | pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self { |
159 | 0 | let mut bytes = [0; 8]; |
160 | 0 | let mut i = 0; |
161 | | // The function documentation defines when panics may occur |
162 | | #[expect(clippy::indexing_slicing)] |
163 | 0 | while i < N { |
164 | 0 | bytes[i] = src[i] as u8; |
165 | 0 | i += 1; |
166 | 0 | } |
167 | 0 | Self(u64::from_ne_bytes(bytes)) |
168 | 0 | } Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<2> Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<3> Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<4> Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<8> Unexecuted instantiation: <tinystr::int_ops::Aligned8>::from_ascii_bytes::<_> |
169 | | |
170 | 0 | pub const fn len(&self) -> usize { |
171 | 0 | let word = self.0; |
172 | | #[cfg(target_endian = "little")] |
173 | 0 | let len = (8 - word.leading_zeros() / 8) as usize; |
174 | | #[cfg(target_endian = "big")] |
175 | | let len = (8 - word.trailing_zeros() / 8) as usize; |
176 | 0 | len |
177 | 0 | } |
178 | | |
179 | 0 | pub const fn is_ascii_alphabetic(&self) -> bool { |
180 | 0 | let word = self.0; |
181 | 0 | let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; |
182 | 0 | let lower = word | 0x2020_2020_2020_2020; |
183 | 0 | let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505); |
184 | 0 | (alpha & mask) == 0 |
185 | 0 | } |
186 | | |
187 | 0 | pub const fn is_ascii_alphanumeric(&self) -> bool { |
188 | 0 | let word = self.0; |
189 | 0 | let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; |
190 | 0 | let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646); |
191 | 0 | let lower = word | 0x2020_2020_2020_2020; |
192 | 0 | let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505); |
193 | 0 | (alpha & numeric & mask) == 0 |
194 | 0 | } |
195 | | |
196 | 0 | pub const fn is_ascii_numeric(&self) -> bool { |
197 | 0 | let word = self.0; |
198 | 0 | let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; |
199 | 0 | let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646); |
200 | 0 | (numeric & mask) == 0 |
201 | 0 | } |
202 | | |
203 | 0 | pub const fn is_ascii_lowercase(&self) -> bool { |
204 | 0 | let word = self.0; |
205 | 0 | let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525); |
206 | 0 | (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 |
207 | 0 | } |
208 | | |
209 | 0 | pub const fn is_ascii_titlecase(&self) -> bool { |
210 | 0 | let word = self.0; |
211 | 0 | let invalid_case = if cfg!(target_endian = "little") { |
212 | 0 | !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505) |
213 | | } else { |
214 | 0 | !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525) |
215 | | }; |
216 | 0 | (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 |
217 | 0 | } |
218 | | |
219 | 0 | pub const fn is_ascii_uppercase(&self) -> bool { |
220 | 0 | let word = self.0; |
221 | 0 | let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505); |
222 | 0 | (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 |
223 | 0 | } |
224 | | |
225 | 0 | pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { |
226 | 0 | let word = self.0; |
227 | | // `mask` sets all NUL bytes to 0. |
228 | 0 | let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; |
229 | | // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1. |
230 | 0 | let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505); |
231 | | // The overall string is valid if every character passes at least one test. |
232 | | // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`). |
233 | 0 | (lower_alpha & mask) == 0 |
234 | 0 | } |
235 | | |
236 | 0 | pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { |
237 | 0 | let word = self.0; |
238 | | // See explanatory comments in is_ascii_alphabetic_lowercase |
239 | 0 | let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; |
240 | 0 | let title_case = if cfg!(target_endian = "little") { |
241 | 0 | !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525) |
242 | | } else { |
243 | 0 | !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505) |
244 | | }; |
245 | 0 | (title_case & mask) == 0 |
246 | 0 | } |
247 | | |
248 | 0 | pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { |
249 | 0 | let word = self.0; |
250 | | // See explanatory comments in is_ascii_alphabetic_lowercase |
251 | 0 | let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; |
252 | 0 | let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525); |
253 | 0 | (upper_alpha & mask) == 0 |
254 | 0 | } |
255 | | |
256 | 0 | pub const fn to_ascii_lowercase(&self) -> [AsciiByte; 8] { |
257 | 0 | let word = self.0; |
258 | 0 | let result = word |
259 | 0 | | (((word + 0x3f3f_3f3f_3f3f_3f3f) |
260 | 0 | & !(word + 0x2525_2525_2525_2525) |
261 | 0 | & 0x8080_8080_8080_8080) |
262 | 0 | >> 2); |
263 | 0 | unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) } |
264 | 0 | } |
265 | | |
266 | 0 | pub const fn to_ascii_titlecase(&self) -> [AsciiByte; 8] { |
267 | 0 | let word = self.0.to_le(); |
268 | 0 | let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f) |
269 | 0 | & !(word + 0x2525_2525_2525_2505) |
270 | 0 | & 0x8080_8080_8080_8080) |
271 | 0 | >> 2; |
272 | 0 | let result = (word | mask) & !(0x20 & mask); |
273 | 0 | unsafe { AsciiByte::to_ascii_byte_array(&u64::from_le(result).to_ne_bytes()) } |
274 | 0 | } |
275 | | |
276 | 0 | pub const fn to_ascii_uppercase(&self) -> [AsciiByte; 8] { |
277 | 0 | let word = self.0; |
278 | 0 | let result = word |
279 | 0 | & !(((word + 0x1f1f_1f1f_1f1f_1f1f) |
280 | 0 | & !(word + 0x0505_0505_0505_0505) |
281 | 0 | & 0x8080_8080_8080_8080) |
282 | 0 | >> 2); |
283 | 0 | unsafe { AsciiByte::to_ascii_byte_array(&result.to_ne_bytes()) } |
284 | 0 | } |
285 | | } |