Coverage Report

Created: 2026-01-30 06:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/iri-string-0.7.9/src/parser/trusted.rs
Line
Count
Source
1
//! Fast parsers for trusted (already validated) input.
2
//!
3
//! Using this in wrong way will lead to unexpected wrong result.
4
5
pub(crate) mod authority;
6
7
use core::cmp::Ordering;
8
use core::num::NonZeroUsize;
9
10
use crate::components::{RiReferenceComponents, Splitter};
11
use crate::format::eq_str_display;
12
use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode};
13
use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole};
14
use crate::spec::Spec;
15
use crate::types::RiReferenceStr;
16
17
/// Eats a `scheme` and a following colon, and returns the rest and the scheme.
18
///
19
/// Returns `(rest, scheme)`.
20
///
21
/// This should be called at the head of an absolute IRIs/URIs.
22
#[must_use]
23
0
fn scheme_colon(i: &str) -> (&str, &str) {
24
0
    let (scheme, rest) =
25
0
        find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part");
26
0
    (rest, scheme)
27
0
}
28
29
/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme.
30
///
31
/// This should be called at the head of an `IRI-reference` or similar.
32
#[must_use]
33
0
fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) {
34
0
    match find_split4_hole(i, b':', b'/', b'?', b'#') {
35
0
        Some((scheme, b':', rest)) => (rest, Some(scheme)),
36
0
        _ => (i, None),
37
    }
38
0
}
39
40
/// Eats double slash and the following authority if available, and returns the authority.
41
///
42
/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`.
43
#[must_use]
44
0
fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) {
45
0
    let s = match i.strip_prefix("//") {
46
0
        Some(rest) => rest,
47
0
        None => return (i, None),
48
    };
49
    // `i` might match `path-abempty` (which can start with `//`), but it is not
50
    // allowed as `relative-part`, so no need to care `path-abempty` rule here.
51
    // A slash, question mark, and hash character won't appear in `authority`.
52
0
    match find_split3(s, b'/', b'?', b'#') {
53
0
        Some((authority, rest)) => (rest, Some(authority)),
54
0
        None => ("", Some(s)),
55
    }
56
0
}
57
58
/// Eats a string until the query, and returns that part (excluding `?` for the query).
59
#[must_use]
60
0
fn until_query(i: &str) -> (&str, &str) {
61
    // `?` won't appear before the query part.
62
0
    match find_split2(i, b'?', b'#') {
63
0
        Some((before_query, rest)) => (rest, before_query),
64
0
        None => ("", i),
65
    }
66
0
}
67
68
/// Decomposes query and fragment, if available.
69
///
70
/// The string must starts with `?`, or `#`, or be empty.
71
#[must_use]
72
0
fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) {
73
0
    match i.as_bytes().first().copied() {
74
0
        None => (None, None),
75
        Some(b'?') => {
76
0
            let rest = &i[1..];
77
0
            match find_split_hole(rest, b'#') {
78
0
                Some((query, fragment)) => (Some(query), Some(fragment)),
79
0
                None => (Some(rest), None),
80
            }
81
        }
82
0
        Some(c) => {
83
0
            debug_assert_eq!(c, b'#');
84
0
            (None, Some(&i[1..]))
85
        }
86
    }
87
0
}
88
89
/// Decomposes the given valid `IRI-reference`.
90
#[must_use]
91
0
pub(crate) fn decompose_iri_reference<S: Spec>(
92
0
    i: &RiReferenceStr<S>,
93
0
) -> RiReferenceComponents<'_, S> {
94
    /// Inner function to avoid unnecessary monomorphizations on `S`.
95
0
    fn decompose(i: &str) -> Splitter {
96
0
        let len = i.len();
97
98
0
        let (i, scheme_end) = {
99
0
            let (i, scheme) = scheme_colon_opt(i);
100
0
            let end = scheme.and_then(|s| NonZeroUsize::new(s.len()));
101
0
            (i, end)
102
        };
103
0
        let (i, authority_end) = {
104
            // 2: "//".len()
105
0
            let start = len - i.len() + 2;
106
            // `authority` does not contain the two slashes of `://'.
107
0
            let (i, authority) = slash_slash_authority_opt(i);
108
0
            let end = authority.and_then(|s| NonZeroUsize::new(start + s.len()));
109
0
            (i, end)
110
        };
111
0
        let (i, _path) = until_query(i);
112
113
0
        let (query_start, fragment_start) = {
114
            // This could theoretically be zero if `len` is `usize::MAX` and
115
            // `i` has neither a query nor a fragment. However, this is
116
            // practically impossible.
117
0
            let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1));
118
119
0
            let (query, fragment) = decompose_query_and_fragment(i);
120
0
            match (query.is_some(), fragment) {
121
0
                (true, Some(fragment)) => {
122
0
                    (after_first_prefix, NonZeroUsize::new(len - fragment.len()))
123
                }
124
0
                (true, None) => (after_first_prefix, None),
125
0
                (false, Some(_fragment)) => (None, after_first_prefix),
126
0
                (false, None) => (None, None),
127
            }
128
        };
129
130
0
        Splitter::new(scheme_end, authority_end, query_start, fragment_start)
131
0
    }
132
133
0
    RiReferenceComponents {
134
0
        iri: i,
135
0
        splitter: decompose(i.as_str()),
136
0
    }
137
0
}
Unexecuted instantiation: iri_string::parser::trusted::decompose_iri_reference::<iri_string::spec::UriSpec>
Unexecuted instantiation: iri_string::parser::trusted::decompose_iri_reference::<_>
138
139
/// Extracts `scheme` part from an IRI reference.
140
///
141
/// # Precondition
142
///
143
/// The given string must be a valid IRI reference.
144
#[inline]
145
#[must_use]
146
0
pub(crate) fn extract_scheme(i: &str) -> Option<&str> {
147
0
    scheme_colon_opt(i).1
148
0
}
149
150
/// Extracts `scheme` part from an absolute IRI.
151
///
152
/// # Precondition
153
///
154
/// The given string must be a valid absolute IRI.
155
#[inline]
156
#[must_use]
157
0
pub(crate) fn extract_scheme_absolute(i: &str) -> &str {
158
0
    scheme_colon(i).1
159
0
}
160
161
/// Extracts `authority` part from an IRI reference.
162
///
163
/// # Precondition
164
///
165
/// The given string must be a valid IRI reference.
166
#[inline]
167
#[must_use]
168
0
pub(crate) fn extract_authority(i: &str) -> Option<&str> {
169
0
    let (i, _scheme) = scheme_colon_opt(i);
170
0
    slash_slash_authority_opt(i).1
171
0
}
172
173
/// Extracts `authority` part from an absolute IRI.
174
///
175
/// # Precondition
176
///
177
/// The given string must be a valid absolute IRI.
178
#[inline]
179
#[must_use]
180
0
pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> {
181
0
    let (i, _scheme) = scheme_colon(i);
182
0
    slash_slash_authority_opt(i).1
183
0
}
184
185
/// Extracts `authority` part from a relative IRI.
186
///
187
/// # Precondition
188
///
189
/// The given string must be a valid relative IRI.
190
#[inline]
191
#[must_use]
192
0
pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> {
193
0
    slash_slash_authority_opt(i).1
194
0
}
195
196
/// Extracts `path` part from an IRI reference.
197
///
198
/// # Precondition
199
///
200
/// The given string must be a valid IRI reference.
201
#[inline]
202
#[must_use]
203
0
pub(crate) fn extract_path(i: &str) -> &str {
204
0
    let (i, _scheme) = scheme_colon_opt(i);
205
0
    let (i, _authority) = slash_slash_authority_opt(i);
206
0
    until_query(i).1
207
0
}
208
209
/// Extracts `path` part from an absolute IRI.
210
///
211
/// # Precondition
212
///
213
/// The given string must be a valid absolute IRI.
214
#[inline]
215
#[must_use]
216
0
pub(crate) fn extract_path_absolute(i: &str) -> &str {
217
0
    let (i, _scheme) = scheme_colon(i);
218
0
    let (i, _authority) = slash_slash_authority_opt(i);
219
0
    until_query(i).1
220
0
}
221
222
/// Extracts `path` part from a relative IRI.
223
///
224
/// # Precondition
225
///
226
/// The given string must be a valid relative IRI.
227
#[inline]
228
#[must_use]
229
0
pub(crate) fn extract_path_relative(i: &str) -> &str {
230
0
    let (i, _authority) = slash_slash_authority_opt(i);
231
0
    until_query(i).1
232
0
}
233
234
/// Extracts `query` part from an IRI reference.
235
///
236
/// # Precondition
237
///
238
/// The given string must be a valid IRI reference.
239
#[inline]
240
#[must_use]
241
0
pub(crate) fn extract_query(i: &str) -> Option<&str> {
242
0
    let (i, _before_query) = until_query(i);
243
0
    decompose_query_and_fragment(i).0
244
0
}
245
246
/// Extracts `query` part from an `absolute-IRI` string.
247
///
248
/// # Precondition
249
///
250
/// The given string must be a valid `absolute-IRI` string.
251
#[must_use]
252
0
pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> {
253
0
    let (i, _before_query) = until_query(i);
254
0
    if i.is_empty() {
255
0
        None
256
    } else {
257
0
        debug_assert_eq!(
258
0
            i.as_bytes().first(),
259
            Some(&b'?'),
260
0
            "`absolute-IRI` string must not have `fragment part"
261
        );
262
0
        Some(&i[1..])
263
    }
264
0
}
265
266
/// Splits an IRI string into the prefix and the fragment part.
267
///
268
/// A leading `#` character is truncated if the fragment part exists.
269
///
270
/// # Precondition
271
///
272
/// The given string must be a valid IRI reference.
273
#[inline]
274
#[must_use]
275
0
pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) {
276
    // It is completely OK to find the first `#` character from valid IRI to get fragment part,
277
    // because the spec says that there are no `#` characters before the fragment part.
278
    //
279
    // > ```
280
    // > scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
281
    // > ```
282
    // >
283
    // > --- [RFC 3986, section 3.1. Scheme](https://tools.ietf.org/html/rfc3986#section-3.1)
284
    //
285
    // > The authority component is preceded by a double slash ("//") and is terminated by the
286
    // > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end
287
    // > of the URI.
288
    // >
289
    // > --- [RFC 3986, section 3.2. Authority](https://tools.ietf.org/html/rfc3986#section-3.2)
290
    //
291
    // > The path is terminated by the first question mark ("?") or number sign ("#")
292
    // > character, or by the end of the URI.
293
    // >
294
    // > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3)
295
    //
296
    // > The query component is indicated by the first question mark ("?") character and
297
    // > terminated by a number sign ("#") character or by the end of the URI.
298
    // >
299
    // > --- [RFC 3986, section 3.4. Query](https://tools.ietf.org/html/rfc3986#section-3.4)
300
0
    match find_split_hole(iri, b'#') {
301
0
        Some((prefix, fragment)) => (prefix, Some(fragment)),
302
0
        None => (iri, None),
303
    }
304
0
}
305
306
/// Returns the fragment part of the given IRI.
307
///
308
/// A leading `#` character of the fragment is truncated.
309
#[inline]
310
#[must_use]
311
0
pub(crate) fn extract_fragment(iri: &str) -> Option<&str> {
312
0
    split_fragment(iri).1
313
0
}
314
315
/// Returns `Ok(_)` if the string is normalized.
316
///
317
/// If this function returns `true`, normalization input and output will be identical.
318
///
319
/// In this function, "normalized" means that any of the normalization below
320
/// won't change the input on normalization:
321
///
322
/// * syntax-based normalization,
323
/// * case normalization,
324
/// * percent-encoding normalization, and
325
/// * path segment normalizaiton.
326
///
327
/// Note that scheme-based normalization is not considered.
328
#[must_use]
329
0
pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool {
330
0
    let (i, scheme) = scheme_colon(i);
331
0
    let (after_authority, authority) = slash_slash_authority_opt(i);
332
0
    let (_after_path, path) = until_query(after_authority);
333
334
    // Syntax-based normalization: uppercase chars in `scheme` should be
335
    // converted to lowercase.
336
0
    if scheme.bytes().any(|b| b.is_ascii_uppercase()) {
337
0
        return false;
338
0
    }
339
340
    // Case normalization: ASCII alphabets in US-ASCII only `host` should be
341
    // normalized to lowercase.
342
    // Case normalization: ASCII alphabets in percent-encoding triplet should be
343
    // normalized to uppercase.
344
    // Percent-encoding normalization: unresreved characters should be decoded
345
    // in `userinfo`, `host`, `path`, `query`, and `fragments`.
346
    // Path segment normalization: the path should not have dot segments (`.`
347
    // and/or `..`).
348
    //
349
    // Note that `authority` can have percent-encoded `userinfo`.
350
0
    if let Some(authority) = authority {
351
0
        let authority_components = authority::decompose_authority(authority);
352
353
        // Check `host`.
354
0
        let host = authority_components.host();
355
0
        let host_is_normalized = if is_ascii_only_host(host) {
356
0
            eq_str_display(host, &NormalizedAsciiOnlyHost::new(host))
357
        } else {
358
            // If the host is not ASCII-only, conversion to lowercase is not performed.
359
0
            is_pct_case_normalized::<S>(host)
360
        };
361
0
        if !host_is_normalized {
362
0
            return false;
363
0
        }
364
365
        // Check pencent encodings in `userinfo`.
366
0
        if let Some(userinfo) = authority_components.userinfo() {
367
0
            if !is_pct_case_normalized::<S>(userinfo) {
368
0
                return false;
369
0
            }
370
0
        }
371
0
    }
372
373
    // Check `path`.
374
    //
375
    // Syntax-based normalization: Dot segments might be removed.
376
    // Note that we don't have to care `%2e` and `%2E` since `.` is unreserved
377
    // and they will be decoded if not normalized.
378
    // Also note that WHATWG serialization will use `/.//` as a path prefix if
379
    // the path is absolute and won't modify the path if the path is relative.
380
    //
381
    // Percent-encoding normalization: unresreved characters should be decoded
382
    // in `path`, `query`, and `fragments`.
383
0
    let path_span_no_dot_segments = if authority.is_some() {
384
0
        Some(path)
385
    } else {
386
0
        match mode {
387
0
            NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)),
388
0
            NormalizednessCheckMode::Rfc3986 => Some(path),
389
            NormalizednessCheckMode::PreserveAuthoritylessRelativePath => {
390
0
                if path.starts_with('/') {
391
                    // Absolute.
392
0
                    Some(path.strip_prefix("/.//").unwrap_or(path))
393
                } else {
394
                    // Relative. Treat the path as "opaque". No span to check.
395
0
                    None
396
                }
397
            }
398
        }
399
    };
400
0
    if let Some(path_span_no_dot_segments) = path_span_no_dot_segments {
401
0
        if path_span_no_dot_segments
402
0
            .split('/')
403
0
            .any(|segment| matches!(segment, "." | ".."))
404
        {
405
0
            return false;
406
0
        }
407
0
    }
408
0
    is_pct_case_normalized::<S>(after_authority)
409
0
}
410
411
/// Decodes two hexdigits into a byte.
412
///
413
/// # Preconditions
414
///
415
/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit.
416
#[must_use]
417
0
pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 {
418
0
    let i_upper = match (upper & 0xf0).cmp(&0x40) {
419
0
        Ordering::Less => upper - b'0',
420
0
        Ordering::Equal => upper - (b'A' - 10),
421
0
        Ordering::Greater => upper - (b'a' - 10),
422
    };
423
0
    let i_lower = match (lower & 0xf0).cmp(&0x40) {
424
0
        Ordering::Less => lower - b'0',
425
0
        Ordering::Equal => lower - (b'A' - 10),
426
0
        Ordering::Greater => lower - (b'a' - 10),
427
    };
428
0
    (i_upper << 4) + i_lower
429
0
}
430
431
/// Converts the first two hexdigit bytes in the buffer into a byte.
432
///
433
/// # Panics
434
///
435
/// Panics if the string does not start with two hexdigits.
436
#[must_use]
437
0
pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) {
438
0
    let mut bytes = s.bytes();
439
0
    let upper_xdigit = bytes
440
0
        .next()
441
0
        .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
442
0
    let lower_xdigit = bytes
443
0
        .next()
444
0
        .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
445
0
    let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]);
446
0
    (v, &s[2..])
447
0
}
448
449
/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters.
450
///
451
/// # Precondition
452
///
453
/// The given string should be valid `host` or `host ":" port` string.
454
#[must_use]
455
0
pub(crate) fn is_ascii_only_host(mut host: &str) -> bool {
456
0
    while let Some((i, c)) = host
457
0
        .char_indices()
458
0
        .find(|(_i, c)| !c.is_ascii() || *c == '%')
459
    {
460
0
        if c != '%' {
461
            // Non-ASCII character found.
462
0
            debug_assert!(!c.is_ascii());
463
0
            return false;
464
0
        }
465
        // Percent-encoded character found.
466
0
        let after_pct = &host[(i + 1)..];
467
0
        let (byte, rest) = take_xdigits2(after_pct);
468
0
        if !byte.is_ascii() {
469
0
            return false;
470
0
        }
471
0
        host = rest;
472
    }
473
474
    // Neither non-ASCII characters nor percent-encoded characters found.
475
0
    true
476
0
}