/rust/registry/src/index.crates.io-1949cf8c6b5b557f/iri-string-0.7.9/src/parser/str.rs
Line | Count | Source |
1 | | //! Functions for common string operations. |
2 | | |
3 | | pub(crate) use self::maybe_pct_encoded::{ |
4 | | process_percent_encoded_best_effort, PctEncodedFragments, |
5 | | }; |
6 | | |
7 | | mod maybe_pct_encoded; |
8 | | |
9 | | /// Returns the inner string if wrapped. |
10 | | #[must_use] |
11 | 0 | pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> { |
12 | 0 | let (prefix, suffix) = match s.as_bytes() { |
13 | 0 | [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix), |
14 | 0 | _ => return None, |
15 | | }; |
16 | 0 | if (prefix == open) && (suffix == close) { |
17 | 0 | Some(&s[1..(s.len() - 1)]) |
18 | | } else { |
19 | 0 | None |
20 | | } |
21 | 0 | } |
22 | | |
23 | | /// Returns the byte that appears first. |
24 | | #[cfg(not(feature = "memchr"))] |
25 | | #[inline] |
26 | | #[must_use] |
27 | 0 | pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { |
28 | 0 | haystack |
29 | 0 | .iter() |
30 | 0 | .copied() |
31 | 0 | .find(|&b| b == needle1 || b == needle2) |
32 | 0 | } |
33 | | |
34 | | /// Returns the byte that appears first. |
35 | | #[cfg(feature = "memchr")] |
36 | | #[inline] |
37 | | #[must_use] |
38 | | pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { |
39 | | memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos]) |
40 | | } |
41 | | |
42 | | /// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. |
43 | | #[cfg(not(feature = "memchr"))] |
44 | | #[inline] |
45 | | #[must_use] |
46 | 0 | pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { |
47 | 0 | haystack.iter().rposition(|&b| b == needle) |
48 | 0 | } |
49 | | |
50 | | /// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. |
51 | | #[cfg(feature = "memchr")] |
52 | | #[inline] |
53 | | #[must_use] |
54 | | pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { |
55 | | memchr::memrchr(needle, haystack) |
56 | | } |
57 | | |
58 | | /// Finds the first needle, and returns the string before it and the rest. |
59 | | /// |
60 | | /// If `needle` is not found, returns `None`. |
61 | | #[cfg(not(feature = "memchr"))] |
62 | | #[must_use] |
63 | 0 | pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { |
64 | 0 | haystack |
65 | 0 | .bytes() |
66 | 0 | .position(|b| b == needle) |
67 | 0 | .map(|pos| haystack.split_at(pos)) |
68 | 0 | } |
69 | | |
70 | | /// Finds the first needle, and returns the string before it and the rest. |
71 | | /// |
72 | | /// If `needle` is not found, returns `None`. |
73 | | #[cfg(feature = "memchr")] |
74 | | #[must_use] |
75 | | pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { |
76 | | memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) |
77 | | } |
78 | | |
79 | | /// Finds the last needle, and returns the string before it and the rest. |
80 | | /// |
81 | | /// If no needles are found, returns `None`. |
82 | | #[cfg(not(feature = "memchr"))] |
83 | | #[must_use] |
84 | 0 | pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { |
85 | 0 | haystack |
86 | 0 | .bytes() |
87 | 0 | .rposition(|b| b == needle1 || b == needle2) |
88 | 0 | .map(|pos| haystack.split_at(pos)) |
89 | 0 | } |
90 | | |
91 | | /// Finds the last needle, and returns the string before it and the rest. |
92 | | /// |
93 | | /// If no needles are found, returns `None`. |
94 | | #[cfg(feature = "memchr")] |
95 | | #[must_use] |
96 | | pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { |
97 | | memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) |
98 | | } |
99 | | |
100 | | /// Finds the first needle, and returns the string before it and the rest. |
101 | | /// |
102 | | /// If no needles are found, returns `None`. |
103 | | #[cfg(not(feature = "memchr"))] |
104 | | #[must_use] |
105 | 0 | pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { |
106 | 0 | haystack |
107 | 0 | .bytes() |
108 | 0 | .position(|b| b == needle1 || b == needle2) |
109 | 0 | .map(|pos| haystack.split_at(pos)) |
110 | 0 | } |
111 | | |
112 | | /// Finds the first needle, and returns the string before it and the rest. |
113 | | /// |
114 | | /// If no needles are found, returns `None`. |
115 | | #[cfg(feature = "memchr")] |
116 | | #[must_use] |
117 | | pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { |
118 | | memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) |
119 | | } |
120 | | |
121 | | /// Finds the first needle, and returns the string before it and the rest. |
122 | | /// |
123 | | /// If no needles are found, returns `None`. |
124 | | #[cfg(not(feature = "memchr"))] |
125 | | #[must_use] |
126 | 0 | pub(crate) fn find_split3( |
127 | 0 | haystack: &str, |
128 | 0 | needle1: u8, |
129 | 0 | needle2: u8, |
130 | 0 | needle3: u8, |
131 | 0 | ) -> Option<(&str, &str)> { |
132 | 0 | haystack |
133 | 0 | .bytes() |
134 | 0 | .position(|b| b == needle1 || b == needle2 || b == needle3) |
135 | 0 | .map(|pos| haystack.split_at(pos)) |
136 | 0 | } |
137 | | |
138 | | /// Finds the first needle, and returns the string before it and the rest. |
139 | | /// |
140 | | /// If no needles are found, returns `None`. |
141 | | #[cfg(feature = "memchr")] |
142 | | #[must_use] |
143 | | pub(crate) fn find_split3( |
144 | | haystack: &str, |
145 | | needle1: u8, |
146 | | needle2: u8, |
147 | | needle3: u8, |
148 | | ) -> Option<(&str, &str)> { |
149 | | memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes()) |
150 | | .map(|pos| haystack.split_at(pos)) |
151 | | } |
152 | | |
153 | | /// Finds the first needle, and returns the string before it and after it. |
154 | | /// |
155 | | /// If `needle` is not found, returns `None`. |
156 | | #[cfg(not(feature = "memchr"))] |
157 | | #[must_use] |
158 | 0 | pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { |
159 | 0 | haystack |
160 | 0 | .bytes() |
161 | 0 | .position(|b| b == needle) |
162 | 0 | .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) |
163 | 0 | } |
164 | | |
165 | | /// Finds the first needle, and returns the string before it and after it. |
166 | | /// |
167 | | /// If `needle` is not found, returns `None`. |
168 | | #[cfg(feature = "memchr")] |
169 | | #[must_use] |
170 | | pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { |
171 | | memchr::memchr(needle, haystack.as_bytes()) |
172 | | .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) |
173 | | } |
174 | | |
175 | | /// Finds the first needle, and returns the string before it, the needle, and the string after it. |
176 | | /// |
177 | | /// If no needles are found, returns `None`. |
178 | | #[cfg(not(feature = "memchr"))] |
179 | | #[must_use] |
180 | 0 | pub(crate) fn find_split2_hole( |
181 | 0 | haystack: &str, |
182 | 0 | needle1: u8, |
183 | 0 | needle2: u8, |
184 | 0 | ) -> Option<(&str, u8, &str)> { |
185 | 0 | haystack |
186 | 0 | .bytes() |
187 | 0 | .position(|b| b == needle1 || b == needle2) |
188 | 0 | .map(|pos| { |
189 | 0 | ( |
190 | 0 | &haystack[..pos], |
191 | 0 | haystack.as_bytes()[pos], |
192 | 0 | &haystack[(pos + 1)..], |
193 | 0 | ) |
194 | 0 | }) |
195 | 0 | } |
196 | | |
197 | | /// Finds the first needle, and returns the string before it, the needle, and the string after it. |
198 | | /// |
199 | | /// If no needles are found, returns `None`. |
200 | | #[cfg(feature = "memchr")] |
201 | | #[must_use] |
202 | | pub(crate) fn find_split2_hole( |
203 | | haystack: &str, |
204 | | needle1: u8, |
205 | | needle2: u8, |
206 | | ) -> Option<(&str, u8, &str)> { |
207 | | memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| { |
208 | | ( |
209 | | &haystack[..pos], |
210 | | haystack.as_bytes()[pos], |
211 | | &haystack[(pos + 1)..], |
212 | | ) |
213 | | }) |
214 | | } |
215 | | |
216 | | /// Finds the first needle, and returns the string before it, the needle, and the string after it. |
217 | | /// |
218 | | /// If no needles are found, returns `None`. |
219 | | #[cfg(not(feature = "memchr"))] |
220 | | #[must_use] |
221 | 0 | pub(crate) fn find_split4_hole( |
222 | 0 | haystack: &str, |
223 | 0 | needle1: u8, |
224 | 0 | needle2: u8, |
225 | 0 | needle3: u8, |
226 | 0 | needle4: u8, |
227 | 0 | ) -> Option<(&str, u8, &str)> { |
228 | 0 | haystack |
229 | 0 | .bytes() |
230 | 0 | .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4) |
231 | 0 | .map(|pos| { |
232 | 0 | ( |
233 | 0 | &haystack[..pos], |
234 | 0 | haystack.as_bytes()[pos], |
235 | 0 | &haystack[(pos + 1)..], |
236 | 0 | ) |
237 | 0 | }) |
238 | 0 | } |
239 | | |
240 | | /// Finds the first needle, and returns the string before it, the needle, and the string after it. |
241 | | /// |
242 | | /// If no needles are found, returns `None`. |
243 | | #[cfg(feature = "memchr")] |
244 | | #[must_use] |
245 | | pub(crate) fn find_split4_hole( |
246 | | haystack: &str, |
247 | | needle1: u8, |
248 | | needle2: u8, |
249 | | needle3: u8, |
250 | | needle4: u8, |
251 | | ) -> Option<(&str, u8, &str)> { |
252 | | let bytes = haystack.as_bytes(); |
253 | | let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) { |
254 | | Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)), |
255 | | None => memchr::memchr(needle4, bytes), |
256 | | }; |
257 | | pos.map(|pos| { |
258 | | ( |
259 | | &haystack[..pos], |
260 | | haystack.as_bytes()[pos], |
261 | | &haystack[(pos + 1)..], |
262 | | ) |
263 | | }) |
264 | | } |
265 | | |
266 | | /// Finds the last needle, and returns the string before it and after it. |
267 | | /// |
268 | | /// If `needle` is not found, returns `None`. |
269 | | #[cfg(not(feature = "memchr"))] |
270 | | #[must_use] |
271 | 0 | pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { |
272 | 0 | haystack |
273 | 0 | .bytes() |
274 | 0 | .rposition(|b| b == needle) |
275 | 0 | .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) |
276 | 0 | } |
277 | | |
278 | | /// Finds the last needle, and returns the string before it and after it. |
279 | | /// |
280 | | /// If `needle` is not found, returns `None`. |
281 | | #[cfg(feature = "memchr")] |
282 | | #[must_use] |
283 | | pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { |
284 | | memchr::memrchr(needle, haystack.as_bytes()) |
285 | | .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) |
286 | | } |
287 | | |
288 | | /// Returns `true` if the string only contains the allowed characters. |
289 | | #[must_use] |
290 | 0 | fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool |
291 | 0 | where |
292 | 0 | F: Copy + Fn(u8) -> bool, |
293 | 0 | G: Copy + Fn(char) -> bool, |
294 | | { |
295 | 0 | while !s.is_empty() { |
296 | 0 | match s.bytes().position(|b| !b.is_ascii()) {Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>>::{closure#0}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>>::{closure#0}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>>::{closure#0}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved>::{closure#0}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>>::{closure#0}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}>::{closure#0} |
297 | 0 | Some(nonascii_pos) => { |
298 | | // Valdiate ASCII prefix. |
299 | 0 | if nonascii_pos != 0 { |
300 | 0 | let (prefix, rest) = s.split_at(nonascii_pos); |
301 | 0 | if !prefix.bytes().all(pred_ascii) { |
302 | 0 | return false; |
303 | 0 | } |
304 | 0 | s = rest; |
305 | 0 | } |
306 | | |
307 | | // Extract non-ASCII part and validate it. |
308 | 0 | let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) {Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>>::{closure#1}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>>::{closure#1}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>>::{closure#1}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved>::{closure#1}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>>::{closure#1}Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}>::{closure#1} |
309 | 0 | Some(ascii_pos) => s.split_at(ascii_pos), |
310 | 0 | None => (s, ""), |
311 | | }; |
312 | 0 | if !prefix.chars().all(pred_nonascii) { |
313 | 0 | return false; |
314 | 0 | } |
315 | 0 | s = rest; |
316 | | } |
317 | | None => { |
318 | | // All chars are ASCII. |
319 | 0 | return s.bytes().all(pred_ascii); |
320 | | } |
321 | | } |
322 | | } |
323 | | |
324 | 0 | true |
325 | 0 | } Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved> Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}> |
326 | | |
327 | | /// Returns `true` if the string only contains the allowed characters and percent-encoded char. |
328 | | #[must_use] |
329 | 0 | pub(crate) fn satisfy_chars_with_pct_encoded<F, G>( |
330 | 0 | mut s: &str, |
331 | 0 | pred_ascii: F, |
332 | 0 | pred_nonascii: G, |
333 | 0 | ) -> bool |
334 | 0 | where |
335 | 0 | F: Copy + Fn(u8) -> bool, |
336 | 0 | G: Copy + Fn(char) -> bool, |
337 | | { |
338 | 0 | while let Some((prefix, suffix)) = find_split_hole(s, b'%') { |
339 | | // Verify strings before the percent-encoded char. |
340 | 0 | if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) { |
341 | 0 | return false; |
342 | 0 | } |
343 | | |
344 | | // Verify the percent-encoded char. |
345 | 0 | if !starts_with_double_hexdigits(suffix.as_bytes()) { |
346 | 0 | return false; |
347 | 0 | } |
348 | | |
349 | | // Advance the cursor. |
350 | 0 | s = &suffix[2..]; |
351 | | } |
352 | | |
353 | | // Verify the rest. |
354 | 0 | satisfy_chars(s, pred_ascii, pred_nonascii) |
355 | 0 | } Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved> Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>> Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}> |
356 | | |
357 | | /// Returns `true` if the given string starts with two hexadecimal digits. |
358 | | #[must_use] |
359 | 0 | pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool { |
360 | 0 | match s { |
361 | 0 | [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(), |
362 | 0 | _ => false, |
363 | | } |
364 | 0 | } |
365 | | |
366 | | /// Strips the first character if it is the given ASCII character, and returns the rest. |
367 | | /// |
368 | | /// # Precondition |
369 | | /// |
370 | | /// The given ASCII character (`prefix`) should be an ASCII character. |
371 | | #[must_use] |
372 | 0 | pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> { |
373 | 0 | debug_assert!(prefix.is_ascii()); |
374 | 0 | if s.as_bytes().first().copied() == Some(prefix) { |
375 | 0 | Some(&s[1..]) |
376 | | } else { |
377 | 0 | None |
378 | | } |
379 | 0 | } |
380 | | |
381 | | /// Splits the given string into the first character and the rest. |
382 | | /// |
383 | | /// Returns `(first_char, rest_str)`. |
384 | | #[must_use] |
385 | 0 | pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> { |
386 | 0 | let mut chars = s.chars(); |
387 | 0 | let c = chars.next()?; |
388 | 0 | let rest = chars.as_str(); |
389 | 0 | Some((c, rest)) |
390 | 0 | } |