Coverage Report

Created: 2026-01-15 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/iri-string-0.7.9/src/parser/str.rs
Line
Count
Source
1
//! Functions for common string operations.
2
3
pub(crate) use self::maybe_pct_encoded::{
4
    process_percent_encoded_best_effort, PctEncodedFragments,
5
};
6
7
mod maybe_pct_encoded;
8
9
/// Returns the inner string if wrapped.
10
#[must_use]
11
0
pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> {
12
0
    let (prefix, suffix) = match s.as_bytes() {
13
0
        [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix),
14
0
        _ => return None,
15
    };
16
0
    if (prefix == open) && (suffix == close) {
17
0
        Some(&s[1..(s.len() - 1)])
18
    } else {
19
0
        None
20
    }
21
0
}
22
23
/// Returns the byte that appears first.
24
#[cfg(not(feature = "memchr"))]
25
#[inline]
26
#[must_use]
27
0
pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
28
0
    haystack
29
0
        .iter()
30
0
        .copied()
31
0
        .find(|&b| b == needle1 || b == needle2)
32
0
}
33
34
/// Returns the byte that appears first.
35
#[cfg(feature = "memchr")]
36
#[inline]
37
#[must_use]
38
pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
39
    memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos])
40
}
41
42
/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
43
#[cfg(not(feature = "memchr"))]
44
#[inline]
45
#[must_use]
46
0
pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
47
0
    haystack.iter().rposition(|&b| b == needle)
48
0
}
49
50
/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
51
#[cfg(feature = "memchr")]
52
#[inline]
53
#[must_use]
54
pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
55
    memchr::memrchr(needle, haystack)
56
}
57
58
/// Finds the first needle, and returns the string before it and the rest.
59
///
60
/// If `needle` is not found, returns `None`.
61
#[cfg(not(feature = "memchr"))]
62
#[must_use]
63
0
pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
64
0
    haystack
65
0
        .bytes()
66
0
        .position(|b| b == needle)
67
0
        .map(|pos| haystack.split_at(pos))
68
0
}
69
70
/// Finds the first needle, and returns the string before it and the rest.
71
///
72
/// If `needle` is not found, returns `None`.
73
#[cfg(feature = "memchr")]
74
#[must_use]
75
pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
76
    memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
77
}
78
79
/// Finds the last needle, and returns the string before it and the rest.
80
///
81
/// If no needles are found, returns `None`.
82
#[cfg(not(feature = "memchr"))]
83
#[must_use]
84
0
pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
85
0
    haystack
86
0
        .bytes()
87
0
        .rposition(|b| b == needle1 || b == needle2)
88
0
        .map(|pos| haystack.split_at(pos))
89
0
}
90
91
/// Finds the last needle, and returns the string before it and the rest.
92
///
93
/// If no needles are found, returns `None`.
94
#[cfg(feature = "memchr")]
95
#[must_use]
96
pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
97
    memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
98
}
99
100
/// Finds the first needle, and returns the string before it and the rest.
101
///
102
/// If no needles are found, returns `None`.
103
#[cfg(not(feature = "memchr"))]
104
#[must_use]
105
0
pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
106
0
    haystack
107
0
        .bytes()
108
0
        .position(|b| b == needle1 || b == needle2)
109
0
        .map(|pos| haystack.split_at(pos))
110
0
}
111
112
/// Finds the first needle, and returns the string before it and the rest.
113
///
114
/// If no needles are found, returns `None`.
115
#[cfg(feature = "memchr")]
116
#[must_use]
117
pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
118
    memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
119
}
120
121
/// Finds the first needle, and returns the string before it and the rest.
122
///
123
/// If no needles are found, returns `None`.
124
#[cfg(not(feature = "memchr"))]
125
#[must_use]
126
0
pub(crate) fn find_split3(
127
0
    haystack: &str,
128
0
    needle1: u8,
129
0
    needle2: u8,
130
0
    needle3: u8,
131
0
) -> Option<(&str, &str)> {
132
0
    haystack
133
0
        .bytes()
134
0
        .position(|b| b == needle1 || b == needle2 || b == needle3)
135
0
        .map(|pos| haystack.split_at(pos))
136
0
}
137
138
/// Finds the first needle, and returns the string before it and the rest.
139
///
140
/// If no needles are found, returns `None`.
141
#[cfg(feature = "memchr")]
142
#[must_use]
143
pub(crate) fn find_split3(
144
    haystack: &str,
145
    needle1: u8,
146
    needle2: u8,
147
    needle3: u8,
148
) -> Option<(&str, &str)> {
149
    memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes())
150
        .map(|pos| haystack.split_at(pos))
151
}
152
153
/// Finds the first needle, and returns the string before it and after it.
154
///
155
/// If `needle` is not found, returns `None`.
156
#[cfg(not(feature = "memchr"))]
157
#[must_use]
158
0
pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
159
0
    haystack
160
0
        .bytes()
161
0
        .position(|b| b == needle)
162
0
        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
163
0
}
164
165
/// Finds the first needle, and returns the string before it and after it.
166
///
167
/// If `needle` is not found, returns `None`.
168
#[cfg(feature = "memchr")]
169
#[must_use]
170
pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
171
    memchr::memchr(needle, haystack.as_bytes())
172
        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
173
}
174
175
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
176
///
177
/// If no needles are found, returns `None`.
178
#[cfg(not(feature = "memchr"))]
179
#[must_use]
180
0
pub(crate) fn find_split2_hole(
181
0
    haystack: &str,
182
0
    needle1: u8,
183
0
    needle2: u8,
184
0
) -> Option<(&str, u8, &str)> {
185
0
    haystack
186
0
        .bytes()
187
0
        .position(|b| b == needle1 || b == needle2)
188
0
        .map(|pos| {
189
0
            (
190
0
                &haystack[..pos],
191
0
                haystack.as_bytes()[pos],
192
0
                &haystack[(pos + 1)..],
193
0
            )
194
0
        })
195
0
}
196
197
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
198
///
199
/// If no needles are found, returns `None`.
200
#[cfg(feature = "memchr")]
201
#[must_use]
202
pub(crate) fn find_split2_hole(
203
    haystack: &str,
204
    needle1: u8,
205
    needle2: u8,
206
) -> Option<(&str, u8, &str)> {
207
    memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| {
208
        (
209
            &haystack[..pos],
210
            haystack.as_bytes()[pos],
211
            &haystack[(pos + 1)..],
212
        )
213
    })
214
}
215
216
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
217
///
218
/// If no needles are found, returns `None`.
219
#[cfg(not(feature = "memchr"))]
220
#[must_use]
221
0
pub(crate) fn find_split4_hole(
222
0
    haystack: &str,
223
0
    needle1: u8,
224
0
    needle2: u8,
225
0
    needle3: u8,
226
0
    needle4: u8,
227
0
) -> Option<(&str, u8, &str)> {
228
0
    haystack
229
0
        .bytes()
230
0
        .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4)
231
0
        .map(|pos| {
232
0
            (
233
0
                &haystack[..pos],
234
0
                haystack.as_bytes()[pos],
235
0
                &haystack[(pos + 1)..],
236
0
            )
237
0
        })
238
0
}
239
240
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
241
///
242
/// If no needles are found, returns `None`.
243
#[cfg(feature = "memchr")]
244
#[must_use]
245
pub(crate) fn find_split4_hole(
246
    haystack: &str,
247
    needle1: u8,
248
    needle2: u8,
249
    needle3: u8,
250
    needle4: u8,
251
) -> Option<(&str, u8, &str)> {
252
    let bytes = haystack.as_bytes();
253
    let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) {
254
        Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)),
255
        None => memchr::memchr(needle4, bytes),
256
    };
257
    pos.map(|pos| {
258
        (
259
            &haystack[..pos],
260
            haystack.as_bytes()[pos],
261
            &haystack[(pos + 1)..],
262
        )
263
    })
264
}
265
266
/// Finds the last needle, and returns the string before it and after it.
267
///
268
/// If `needle` is not found, returns `None`.
269
#[cfg(not(feature = "memchr"))]
270
#[must_use]
271
0
pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
272
0
    haystack
273
0
        .bytes()
274
0
        .rposition(|b| b == needle)
275
0
        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
276
0
}
277
278
/// Finds the last needle, and returns the string before it and after it.
279
///
280
/// If `needle` is not found, returns `None`.
281
#[cfg(feature = "memchr")]
282
#[must_use]
283
pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
284
    memchr::memrchr(needle, haystack.as_bytes())
285
        .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
286
}
287
288
/// Returns `true` if the string only contains the allowed characters.
289
#[must_use]
290
0
fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool
291
0
where
292
0
    F: Copy + Fn(u8) -> bool,
293
0
    G: Copy + Fn(char) -> bool,
294
{
295
0
    while !s.is_empty() {
296
0
        match s.bytes().position(|b| !b.is_ascii()) {
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>>::{closure#0}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>>::{closure#0}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>>::{closure#0}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved>::{closure#0}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>>::{closure#0}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}>::{closure#0}
297
0
            Some(nonascii_pos) => {
298
                // Valdiate ASCII prefix.
299
0
                if nonascii_pos != 0 {
300
0
                    let (prefix, rest) = s.split_at(nonascii_pos);
301
0
                    if !prefix.bytes().all(pred_ascii) {
302
0
                        return false;
303
0
                    }
304
0
                    s = rest;
305
0
                }
306
307
                // Extract non-ASCII part and validate it.
308
0
                let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) {
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>>::{closure#1}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>>::{closure#1}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>>::{closure#1}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved>::{closure#1}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>>::{closure#1}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}>::{closure#1}
309
0
                    Some(ascii_pos) => s.split_at(ascii_pos),
310
0
                    None => (s, ""),
311
                };
312
0
                if !prefix.chars().all(pred_nonascii) {
313
0
                    return false;
314
0
                }
315
0
                s = rest;
316
            }
317
            None => {
318
                // All chars are ASCII.
319
0
                return s.bytes().all(pred_ascii);
320
            }
321
        }
322
    }
323
324
0
    true
325
0
}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}>
326
327
/// Returns `true` if the string only contains the allowed characters and percent-encoded char.
328
#[must_use]
329
0
pub(crate) fn satisfy_chars_with_pct_encoded<F, G>(
330
0
    mut s: &str,
331
0
    pred_ascii: F,
332
0
    pred_nonascii: G,
333
0
) -> bool
334
0
where
335
0
    F: Copy + Fn(u8) -> bool,
336
0
    G: Copy + Fn(char) -> bool,
337
{
338
0
    while let Some((prefix, suffix)) = find_split_hole(s, b'%') {
339
        // Verify strings before the percent-encoded char.
340
0
        if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) {
341
0
            return false;
342
0
        }
343
344
        // Verify the percent-encoded char.
345
0
        if !starts_with_double_hexdigits(suffix.as_bytes()) {
346
0
            return false;
347
0
        }
348
349
        // Advance the cursor.
350
0
        s = &suffix[2..];
351
    }
352
353
    // Verify the rest.
354
0
    satisfy_chars(s, pred_ascii, pred_nonascii)
355
0
}
Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_regname, iri_string::parser::char::is_nonascii_regname<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_query<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_frag_query, iri_string::parser::char::is_nonascii_fragment<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_pchar_slash, <iri_string::spec::UriSpec as iri_string::spec::internal::SpecInternal>::is_nonascii_char_unreserved>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::parser::char::is_ascii_userinfo_ipvfutureaddr, iri_string::parser::char::is_nonascii_userinfo<iri_string::spec::UriSpec>>
Unexecuted instantiation: iri_string::parser::str::satisfy_chars_with_pct_encoded::<iri_string::template::parser::char::is_ascii_varchar_continue, iri_string::template::parser::validate::validate_varname::{closure#0}>
356
357
/// Returns `true` if the given string starts with two hexadecimal digits.
358
#[must_use]
359
0
pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool {
360
0
    match s {
361
0
        [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(),
362
0
        _ => false,
363
    }
364
0
}
365
366
/// Strips the first character if it is the given ASCII character, and returns the rest.
367
///
368
/// # Precondition
369
///
370
/// The given ASCII character (`prefix`) should be an ASCII character.
371
#[must_use]
372
0
pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> {
373
0
    debug_assert!(prefix.is_ascii());
374
0
    if s.as_bytes().first().copied() == Some(prefix) {
375
0
        Some(&s[1..])
376
    } else {
377
0
        None
378
    }
379
0
}
380
381
/// Splits the given string into the first character and the rest.
382
///
383
/// Returns `(first_char, rest_str)`.
384
#[must_use]
385
0
pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> {
386
0
    let mut chars = s.chars();
387
0
    let c = chars.next()?;
388
0
    let rest = chars.as_str();
389
0
    Some((c, rest))
390
0
}