Coverage Report

Created: 2025-12-31 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rust-url/url/src/parser.rs
Line
Count
Source
1
// Copyright 2013-2016 The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
use alloc::borrow::Cow;
10
use alloc::string::String;
11
use core::fmt::{self, Formatter, Write};
12
use core::str;
13
14
use crate::host::{Host, HostInternal};
15
use crate::Url;
16
use form_urlencoded::EncodingOverride;
17
use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
18
19
/// https://url.spec.whatwg.org/#fragment-percent-encode-set
20
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
21
22
/// https://url.spec.whatwg.org/#path-percent-encode-set
23
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
24
25
/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
26
pub(crate) const USERINFO: &AsciiSet = &PATH
27
    .add(b'/')
28
    .add(b':')
29
    .add(b';')
30
    .add(b'=')
31
    .add(b'@')
32
    .add(b'[')
33
    .add(b'\\')
34
    .add(b']')
35
    .add(b'^')
36
    .add(b'|');
37
38
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
39
40
// The backslash (\) character is treated as a path separator in special URLs
41
// so it needs to be additionally escaped in that case.
42
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
43
44
// https://url.spec.whatwg.org/#query-state
45
const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
46
const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
47
48
pub type ParseResult<T> = Result<T, ParseError>;
49
50
macro_rules! simple_enum_error {
51
    ($($name: ident => $description: expr,)+) => {
52
        /// Errors that can occur during parsing.
53
        ///
54
        /// This may be extended in the future so exhaustive matching is
55
        /// forbidden.
56
        #[derive(PartialEq, Eq, Clone, Copy, Debug)]
57
        #[non_exhaustive]
58
        pub enum ParseError {
59
            $(
60
                $name,
61
            )+
62
        }
63
64
        impl fmt::Display for ParseError {
65
0
            fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
66
0
                match *self {
67
                    $(
68
0
                        ParseError::$name => fmt.write_str($description),
69
                    )+
70
                }
71
0
            }
72
        }
73
    }
74
}
75
76
macro_rules! ascii_tab_or_new_line_pattern {
77
    () => {
78
        '\t' | '\n' | '\r'
79
    };
80
}
81
82
#[cfg(feature = "std")]
83
impl std::error::Error for ParseError {}
84
85
#[cfg(not(feature = "std"))]
86
impl core::error::Error for ParseError {}
87
88
simple_enum_error! {
89
    EmptyHost => "empty host",
90
    IdnaError => "invalid international domain name",
91
    InvalidPort => "invalid port number",
92
    InvalidIpv4Address => "invalid IPv4 address",
93
    InvalidIpv6Address => "invalid IPv6 address",
94
    InvalidDomainCharacter => "invalid domain character",
95
    RelativeUrlWithoutBase => "relative URL without a base",
96
    RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
97
    SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
98
    Overflow => "URLs more than 4 GB are not supported",
99
}
100
101
impl From<::idna::Errors> for ParseError {
102
6.35k
    fn from(_: ::idna::Errors) -> Self {
103
6.35k
        Self::IdnaError
104
6.35k
    }
105
}
106
107
macro_rules! syntax_violation_enum {
108
    ($($name: ident => $description: literal,)+) => {
109
        /// Non-fatal syntax violations that can occur during parsing.
110
        ///
111
        /// This may be extended in the future so exhaustive matching is
112
        /// forbidden.
113
        #[derive(PartialEq, Eq, Clone, Copy, Debug)]
114
        #[non_exhaustive]
115
        pub enum SyntaxViolation {
116
            $(
117
                /// ```text
118
                #[doc = $description]
119
                /// ```
120
                $name,
121
            )+
122
        }
123
124
        impl SyntaxViolation {
125
0
            pub fn description(&self) -> &'static str {
126
0
                match *self {
127
                    $(
128
                        SyntaxViolation::$name => $description,
129
                    )+
130
                }
131
0
            }
132
        }
133
    }
134
}
135
136
syntax_violation_enum! {
137
    Backslash => "backslash",
138
    C0SpaceIgnored =>
139
        "leading or trailing control or space character are ignored in URLs",
140
    EmbeddedCredentials =>
141
        "embedding authentication information (username or password) \
142
         in an URL is not recommended",
143
    ExpectedDoubleSlash => "expected //",
144
    ExpectedFileDoubleSlash => "expected // after file:",
145
    FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
146
    NonUrlCodePoint => "non-URL code point",
147
    NullInFragment => "NULL characters are ignored in URL fragment identifiers",
148
    PercentDecode => "expected 2 hex digits after %",
149
    TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
150
    UnencodedAtSign => "unencoded @ sign in username or password",
151
}
152
153
impl fmt::Display for SyntaxViolation {
154
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
155
0
        fmt::Display::fmt(self.description(), f)
156
0
    }
157
}
158
159
#[derive(Copy, Clone, PartialEq, Eq)]
160
pub enum SchemeType {
161
    File,
162
    SpecialNotFile,
163
    NotSpecial,
164
}
165
166
impl SchemeType {
167
114k
    pub fn is_special(&self) -> bool {
168
114k
        !matches!(*self, Self::NotSpecial)
169
114k
    }
170
171
323M
    pub fn is_file(&self) -> bool {
172
323M
        matches!(*self, Self::File)
173
323M
    }
174
}
175
176
impl<T: AsRef<str>> From<T> for SchemeType {
177
79.6k
    fn from(s: T) -> Self {
178
79.6k
        match s.as_ref() {
179
79.6k
            "http" | "https" | "ws" | "wss" | "ftp" => Self::SpecialNotFile,
180
55.7k
            "file" => Self::File,
181
48.8k
            _ => Self::NotSpecial,
182
        }
183
79.6k
    }
<url::parser::SchemeType as core::convert::From<&alloc::string::String>>::from
Line
Count
Source
177
74.8k
    fn from(s: T) -> Self {
178
74.8k
        match s.as_ref() {
179
74.8k
            "http" | "https" | "ws" | "wss" | "ftp" => Self::SpecialNotFile,
180
52.0k
            "file" => Self::File,
181
46.7k
            _ => Self::NotSpecial,
182
        }
183
74.8k
    }
<url::parser::SchemeType as core::convert::From<&str>>::from
Line
Count
Source
177
4.85k
    fn from(s: T) -> Self {
178
4.85k
        match s.as_ref() {
179
4.85k
            "http" | "https" | "ws" | "wss" | "ftp" => Self::SpecialNotFile,
180
3.67k
            "file" => Self::File,
181
2.17k
            _ => Self::NotSpecial,
182
        }
183
4.85k
    }
184
}
185
186
1.80k
pub fn default_port(scheme: &str) -> Option<u16> {
187
1.80k
    match scheme {
188
1.80k
        "http" | "ws" => Some(80),
189
670
        "https" | "wss" => Some(443),
190
529
        "ftp" => Some(21),
191
520
        _ => None,
192
    }
193
1.80k
}
194
195
#[derive(Clone, Debug)]
196
pub struct Input<'i> {
197
    chars: str::Chars<'i>,
198
}
199
200
impl<'i> Input<'i> {
201
3.18k
    pub fn new_no_trim(input: &'i str) -> Self {
202
3.18k
        Input {
203
3.18k
            chars: input.chars(),
204
3.18k
        }
205
3.18k
    }
206
207
607
    pub fn new_trim_tab_and_newlines(
208
607
        original_input: &'i str,
209
607
        vfn: Option<&dyn Fn(SyntaxViolation)>,
210
607
    ) -> Self {
211
607
        let input = original_input.trim_matches(ascii_tab_or_new_line);
212
607
        if let Some(vfn) = vfn {
213
0
            if input.len() < original_input.len() {
214
0
                vfn(SyntaxViolation::C0SpaceIgnored)
215
0
            }
216
0
            if input.chars().any(ascii_tab_or_new_line) {
217
0
                vfn(SyntaxViolation::TabOrNewlineIgnored)
218
0
            }
219
607
        }
220
607
        Input {
221
607
            chars: input.chars(),
222
607
        }
223
607
    }
224
225
74.3k
    pub fn new_trim_c0_control_and_space(
226
74.3k
        original_input: &'i str,
227
74.3k
        vfn: Option<&dyn Fn(SyntaxViolation)>,
228
74.3k
    ) -> Self {
229
74.3k
        let input = original_input.trim_matches(c0_control_or_space);
230
74.3k
        if let Some(vfn) = vfn {
231
0
            if input.len() < original_input.len() {
232
0
                vfn(SyntaxViolation::C0SpaceIgnored)
233
0
            }
234
0
            if input.chars().any(ascii_tab_or_new_line) {
235
0
                vfn(SyntaxViolation::TabOrNewlineIgnored)
236
0
            }
237
74.3k
        }
238
74.3k
        Input {
239
74.3k
            chars: input.chars(),
240
74.3k
        }
241
74.3k
    }
242
243
    #[inline]
244
721
    pub fn is_empty(&self) -> bool {
245
721
        self.clone().next().is_none()
246
721
    }
247
248
    #[inline]
249
76.6k
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
76.6k
        p.split_prefix(&mut self.clone())
251
76.6k
    }
<url::parser::Input>::starts_with::<url::parser::ascii_alpha>
Line
Count
Source
249
75.8k
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
75.8k
        p.split_prefix(&mut self.clone())
251
75.8k
    }
<url::parser::Input>::starts_with::<&str>
Line
Count
Source
249
376
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
376
        p.split_prefix(&mut self.clone())
251
376
    }
<url::parser::Input>::starts_with::<char>
Line
Count
Source
249
389
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
389
        p.split_prefix(&mut self.clone())
251
389
    }
252
253
    #[inline]
254
108k
    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
255
108k
        let mut remaining = self.clone();
256
108k
        if p.split_prefix(&mut remaining) {
257
6.47k
            Some(remaining)
258
        } else {
259
101k
            None
260
        }
261
108k
    }
<url::parser::Input>::split_prefix::<&str>
Line
Count
Source
254
46.4k
    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
255
46.4k
        let mut remaining = self.clone();
256
46.4k
        if p.split_prefix(&mut remaining) {
257
2.48k
            Some(remaining)
258
        } else {
259
43.9k
            None
260
        }
261
46.4k
    }
<url::parser::Input>::split_prefix::<char>
Line
Count
Source
254
61.5k
    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
255
61.5k
        let mut remaining = self.clone();
256
61.5k
        if p.split_prefix(&mut remaining) {
257
3.98k
            Some(remaining)
258
        } else {
259
57.5k
            None
260
        }
261
61.5k
    }
262
263
    #[inline]
264
35.8k
    fn split_first(&self) -> (Option<char>, Self) {
265
35.8k
        let mut remaining = self.clone();
266
35.8k
        (remaining.next(), remaining)
267
35.8k
    }
268
269
    #[inline]
270
22.4k
    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
271
22.4k
        let mut count = 0;
272
22.4k
        let mut remaining = self.clone();
273
        loop {
274
33.5k
            let mut input = remaining.clone();
275
33.5k
            if matches!(input.next(), Some(c) if f(c)) {
276
11.0k
                remaining = input;
277
11.0k
                count += 1;
278
11.0k
            } else {
279
22.4k
                return (count, remaining);
280
            }
281
        }
282
22.4k
    }
<url::parser::Input>::count_matching::<<url::parser::Parser>::parse_relative::{closure#0}>
Line
Count
Source
270
240
    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
271
240
        let mut count = 0;
272
240
        let mut remaining = self.clone();
273
        loop {
274
480
            let mut input = remaining.clone();
275
480
            if matches!(input.next(), Some(c) if f(c)) {
276
240
                remaining = input;
277
240
                count += 1;
278
240
            } else {
279
240
                return (count, remaining);
280
            }
281
        }
282
240
    }
<url::parser::Input>::count_matching::<<url::parser::Parser>::parse_with_scheme::{closure#2}>
Line
Count
Source
270
22.2k
    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
271
22.2k
        let mut count = 0;
272
22.2k
        let mut remaining = self.clone();
273
        loop {
274
33.0k
            let mut input = remaining.clone();
275
33.0k
            if matches!(input.next(), Some(c) if f(c)) {
276
10.8k
                remaining = input;
277
10.8k
                count += 1;
278
10.8k
            } else {
279
22.2k
                return (count, remaining);
280
            }
281
        }
282
22.2k
    }
283
284
    #[inline]
285
313M
    fn next_utf8(&mut self) -> Option<(char, &'i str)> {
286
        loop {
287
313M
            let utf8 = self.chars.as_str();
288
313M
            match self.chars.next() {
289
313M
                Some(c) => {
290
313M
                    if !ascii_tab_or_new_line(c) {
291
313M
                        return Some((c, &utf8[..c.len_utf8()]));
292
77.0k
                    }
293
                }
294
40.4k
                None => return None,
295
            }
296
        }
297
313M
    }
298
}
299
300
pub trait Pattern {
301
    fn split_prefix(self, input: &mut Input) -> bool;
302
}
303
304
impl Pattern for char {
305
61.9k
    fn split_prefix(self, input: &mut Input) -> bool {
306
61.9k
        input.next() == Some(self)
307
61.9k
    }
308
}
309
310
impl Pattern for &str {
311
46.8k
    fn split_prefix(self, input: &mut Input) -> bool {
312
51.5k
        for c in self.chars() {
313
51.5k
            if input.next() != Some(c) {
314
44.3k
                return false;
315
7.20k
            }
316
        }
317
2.49k
        true
318
46.8k
    }
319
}
320
321
impl<F: FnMut(char) -> bool> Pattern for F {
322
75.8k
    fn split_prefix(self, input: &mut Input) -> bool {
323
75.8k
        input.next().map_or(false, self)
324
75.8k
    }
325
}
326
327
impl Iterator for Input<'_> {
328
    type Item = char;
329
1.27G
    fn next(&mut self) -> Option<char> {
330
1.27G
        self.chars.by_ref().find(|&c| !ascii_tab_or_new_line(c))
331
1.27G
    }
332
333
1.74k
    fn size_hint(&self) -> (usize, Option<usize>) {
334
1.74k
        (0, Some(self.chars.as_str().len()))
335
1.74k
    }
336
}
337
338
pub struct Parser<'a> {
339
    pub serialization: String,
340
    pub base_url: Option<&'a Url>,
341
    pub query_encoding_override: EncodingOverride<'a>,
342
    pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
343
    pub context: Context,
344
}
345
346
#[derive(PartialEq, Eq, Copy, Clone)]
347
pub enum Context {
348
    UrlParser,
349
    Setter,
350
    PathSegmentSetter,
351
}
352
353
impl Parser<'_> {
354
39.5k
    fn log_violation(&self, v: SyntaxViolation) {
355
39.5k
        if let Some(f) = self.violation_fn {
356
0
            f(v)
357
39.5k
        }
358
39.5k
    }
359
360
33.5k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
33.5k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
33.5k
        }
366
33.5k
    }
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_file::{closure#0}>
Line
Count
Source
360
3.75k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
3.75k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
3.75k
        }
366
3.75k
    }
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_file::{closure#1}>
Line
Count
Source
360
2.89k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
2.89k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
2.89k
        }
366
2.89k
    }
Unexecuted instantiation: <url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_relative::{closure#1}>
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_with_scheme::{closure#0}>
Line
Count
Source
360
4.63k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
4.63k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
4.63k
        }
366
4.63k
    }
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_with_scheme::{closure#3}>
Line
Count
Source
360
22.2k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
22.2k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
22.2k
        }
366
22.2k
    }
367
368
3.78k
    pub fn for_setter(serialization: String) -> Self {
369
3.78k
        Parser {
370
3.78k
            serialization,
371
3.78k
            base_url: None,
372
3.78k
            query_encoding_override: None,
373
3.78k
            violation_fn: None,
374
3.78k
            context: Context::Setter,
375
3.78k
        }
376
3.78k
    }
377
378
    /// https://url.spec.whatwg.org/#concept-basic-url-parser
379
74.3k
    pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
380
74.3k
        let input = Input::new_trim_c0_control_and_space(input, self.violation_fn);
381
74.3k
        if let Ok(remaining) = self.parse_scheme(input.clone()) {
382
73.3k
            return self.parse_with_scheme(remaining);
383
1.02k
        }
384
385
        // No-scheme state
386
1.02k
        if let Some(base_url) = self.base_url {
387
389
            if input.starts_with('#') {
388
0
                self.fragment_only(base_url, input)
389
389
            } else if base_url.cannot_be_a_base() {
390
21
                Err(ParseError::RelativeUrlWithCannotBeABaseBase)
391
            } else {
392
368
                let scheme_type = SchemeType::from(base_url.scheme());
393
368
                if scheme_type.is_file() {
394
128
                    self.parse_file(input, scheme_type, Some(base_url))
395
                } else {
396
240
                    self.parse_relative(input, scheme_type, base_url)
397
                }
398
            }
399
        } else {
400
633
            Err(ParseError::RelativeUrlWithoutBase)
401
        }
402
74.3k
    }
403
404
75.8k
    pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
405
        // starts_with will also fail for empty strings so we can skip that comparison for perf
406
75.8k
        if !input.starts_with(ascii_alpha) {
407
650
            return Err(());
408
75.2k
        }
409
75.2k
        debug_assert!(self.serialization.is_empty());
410
45.5M
        while let Some(c) = input.next() {
411
45.5M
            match c {
412
43.1M
                'a'..='z' | '0'..='9' | '+' | '-' | '.' => self.serialization.push(c),
413
2.33M
                'A'..='Z' => self.serialization.push(c.to_ascii_lowercase()),
414
73.3k
                ':' => return Ok(input),
415
                _ => {
416
197
                    self.serialization.clear();
417
197
                    return Err(());
418
                }
419
            }
420
        }
421
        // EOF before ':'
422
1.64k
        if self.context == Context::Setter {
423
1.47k
            Ok(input)
424
        } else {
425
178
            self.serialization.clear();
426
178
            Err(())
427
        }
428
75.8k
    }
429
430
73.3k
    fn parse_with_scheme(mut self, input: Input<'_>) -> ParseResult<Url> {
431
        use crate::SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash};
432
73.3k
        let scheme_end = to_u32(self.serialization.len())?;
433
73.3k
        let scheme_type = SchemeType::from(&self.serialization);
434
73.3k
        self.serialization.push(':');
435
73.3k
        match scheme_type {
436
            SchemeType::File => {
437
4.63k
                self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//"));
438
4.63k
                let base_file_url = self.base_url.and_then(|base| {
439
0
                    if base.scheme() == "file" {
440
0
                        Some(base)
441
                    } else {
442
0
                        None
443
                    }
444
0
                });
445
4.63k
                self.serialization.clear();
446
4.63k
                self.parse_file(input, scheme_type, base_file_url)
447
            }
448
            SchemeType::SpecialNotFile => {
449
                // special relative or authority state
450
33.0k
                let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
451
22.2k
                if let Some(base_url) = self.base_url {
452
0
                    if slashes_count < 2
453
0
                        && base_url.scheme() == &self.serialization[..scheme_end as usize]
454
                    {
455
                        // "Cannot-be-a-base" URLs only happen with "not special" schemes.
456
0
                        debug_assert!(!base_url.cannot_be_a_base());
457
0
                        self.serialization.clear();
458
0
                        return self.parse_relative(input, scheme_type, base_url);
459
0
                    }
460
22.2k
                }
461
                // special authority slashes state
462
22.2k
                self.log_violation_if(ExpectedDoubleSlash, || {
463
0
                    input
464
0
                        .clone()
465
0
                        .take_while(|&c| matches!(c, '/' | '\\'))
466
0
                        .collect::<String>()
467
0
                        != "//"
468
0
                });
469
22.2k
                self.after_double_slash(remaining, scheme_type, scheme_end)
470
            }
471
46.4k
            SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
472
        }
473
73.3k
    }
474
475
    /// Scheme other than file, http, https, ws, ws, ftp.
476
46.4k
    fn parse_non_special(
477
46.4k
        mut self,
478
46.4k
        input: Input<'_>,
479
46.4k
        scheme_type: SchemeType,
480
46.4k
        scheme_end: u32,
481
46.4k
    ) -> ParseResult<Url> {
482
        // path or authority state (
483
46.4k
        if let Some(input) = input.split_prefix("//") {
484
2.48k
            return self.after_double_slash(input, scheme_type, scheme_end);
485
43.9k
        }
486
        // Anarchist URL (no authority)
487
43.9k
        let path_start = to_u32(self.serialization.len())?;
488
43.9k
        let username_end = path_start;
489
43.9k
        let host_start = path_start;
490
43.9k
        let host_end = path_start;
491
43.9k
        let host = HostInternal::None;
492
43.9k
        let port = None;
493
43.9k
        let remaining = if let Some(input) = input.split_prefix('/') {
494
2.22k
            self.serialization.push('/');
495
2.22k
            self.parse_path(scheme_type, &mut false, path_start as usize, input)
496
        } else {
497
41.7k
            self.parse_cannot_be_a_base_path(input)
498
        };
499
43.9k
        self.with_query_and_fragment(
500
43.9k
            scheme_type,
501
43.9k
            scheme_end,
502
43.9k
            username_end,
503
43.9k
            host_start,
504
43.9k
            host_end,
505
43.9k
            host,
506
43.9k
            port,
507
43.9k
            path_start,
508
43.9k
            remaining,
509
        )
510
46.4k
    }
511
512
4.76k
    fn parse_file(
513
4.76k
        mut self,
514
4.76k
        input: Input<'_>,
515
4.76k
        scheme_type: SchemeType,
516
4.76k
        base_file_url: Option<&Url>,
517
4.76k
    ) -> ParseResult<Url> {
518
        use crate::SyntaxViolation::Backslash;
519
        // file state
520
4.76k
        debug_assert!(self.serialization.is_empty());
521
4.76k
        let (first_char, input_after_first_char) = input.split_first();
522
4.76k
        if matches!(first_char, Some('/') | Some('\\')) {
523
3.75k
            self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\'));
524
            // file slash state
525
3.75k
            let (next_char, input_after_next_char) = input_after_first_char.split_first();
526
3.75k
            if matches!(next_char, Some('/') | Some('\\')) {
527
2.89k
                self.log_violation_if(Backslash, || next_char == Some('\\'));
528
                // file host state
529
2.89k
                self.serialization.push_str("file://");
530
2.89k
                let scheme_end = "file".len() as u32;
531
2.89k
                let host_start = "file://".len() as u32;
532
2.67k
                let (path_start, mut host, remaining) =
533
2.89k
                    self.parse_file_host(input_after_next_char)?;
534
2.67k
                let mut host_end = to_u32(self.serialization.len())?;
535
2.67k
                let mut has_host = !matches!(host, HostInternal::None);
536
2.67k
                let remaining = if path_start {
537
1.56k
                    self.parse_path_start(SchemeType::File, &mut has_host, remaining)
538
                } else {
539
1.11k
                    let path_start = self.serialization.len();
540
1.11k
                    self.serialization.push('/');
541
1.11k
                    self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
542
                };
543
544
                // For file URLs that have a host and whose path starts
545
                // with the windows drive letter we just remove the host.
546
2.67k
                if !has_host {
547
1.37k
                    self.serialization
548
1.37k
                        .drain(host_start as usize..host_end as usize);
549
1.37k
                    host_end = host_start;
550
1.37k
                    host = HostInternal::None;
551
1.37k
                }
552
2.67k
                let (query_start, fragment_start) =
553
2.67k
                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
554
2.67k
                return Ok(Url {
555
2.67k
                    serialization: self.serialization,
556
2.67k
                    scheme_end,
557
2.67k
                    username_end: host_start,
558
2.67k
                    host_start,
559
2.67k
                    host_end,
560
2.67k
                    host,
561
2.67k
                    port: None,
562
2.67k
                    path_start: host_end,
563
2.67k
                    query_start,
564
2.67k
                    fragment_start,
565
2.67k
                });
566
            } else {
567
863
                self.serialization.push_str("file://");
568
863
                let scheme_end = "file".len() as u32;
569
863
                let host_start = "file://".len();
570
863
                let mut host_end = host_start;
571
863
                let mut host = HostInternal::None;
572
863
                if !starts_with_windows_drive_letter_segment(&input_after_first_char) {
573
855
                    if let Some(base_url) = base_file_url {
574
128
                        let first_segment = base_url.path_segments().unwrap().next().unwrap();
575
128
                        if is_normalized_windows_drive_letter(first_segment) {
576
12
                            self.serialization.push('/');
577
12
                            self.serialization.push_str(first_segment);
578
116
                        } else if let Some(host_str) = base_url.host_str() {
579
41
                            self.serialization.push_str(host_str);
580
41
                            host_end = self.serialization.len();
581
41
                            host = base_url.host;
582
75
                        }
583
727
                    }
584
8
                }
585
                // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
586
863
                let parse_path_input = if let Some(c) = first_char {
587
863
                    if c == '/' || c == '\\' || c == '?' || c == '#' {
588
863
                        input
589
                    } else {
590
0
                        input_after_first_char
591
                    }
592
                } else {
593
0
                    input_after_first_char
594
                };
595
596
863
                let remaining =
597
863
                    self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
598
599
863
                let host_start = host_start as u32;
600
601
863
                let (query_start, fragment_start) =
602
863
                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
603
604
863
                let host_end = host_end as u32;
605
863
                return Ok(Url {
606
863
                    serialization: self.serialization,
607
863
                    scheme_end,
608
863
                    username_end: host_start,
609
863
                    host_start,
610
863
                    host_end,
611
863
                    host,
612
863
                    port: None,
613
863
                    path_start: host_end,
614
863
                    query_start,
615
863
                    fragment_start,
616
863
                });
617
            }
618
1.00k
        }
619
1.00k
        if let Some(base_url) = base_file_url {
620
0
            match first_char {
621
                None => {
622
                    // Copy everything except the fragment
623
0
                    let before_fragment = match base_url.fragment_start {
624
0
                        Some(i) => &base_url.serialization[..i as usize],
625
0
                        None => &*base_url.serialization,
626
                    };
627
0
                    self.serialization.push_str(before_fragment);
628
0
                    Ok(Url {
629
0
                        serialization: self.serialization,
630
0
                        fragment_start: None,
631
0
                        ..*base_url
632
0
                    })
633
                }
634
                Some('?') => {
635
                    // Copy everything up to the query string
636
0
                    let before_query = match (base_url.query_start, base_url.fragment_start) {
637
0
                        (None, None) => &*base_url.serialization,
638
0
                        (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
639
                    };
640
0
                    self.serialization.push_str(before_query);
641
0
                    let (query_start, fragment_start) =
642
0
                        self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
643
0
                    Ok(Url {
644
0
                        serialization: self.serialization,
645
0
                        query_start,
646
0
                        fragment_start,
647
0
                        ..*base_url
648
0
                    })
649
                }
650
0
                Some('#') => self.fragment_only(base_url, input),
651
                _ => {
652
0
                    if !starts_with_windows_drive_letter_segment(&input) {
653
0
                        let before_query = match (base_url.query_start, base_url.fragment_start) {
654
0
                            (None, None) => &*base_url.serialization,
655
0
                            (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
656
                        };
657
0
                        self.serialization.push_str(before_query);
658
0
                        self.shorten_path(SchemeType::File, base_url.path_start as usize);
659
0
                        let remaining = self.parse_path(
660
0
                            SchemeType::File,
661
0
                            &mut true,
662
0
                            base_url.path_start as usize,
663
0
                            input,
664
                        );
665
0
                        self.with_query_and_fragment(
666
0
                            SchemeType::File,
667
0
                            base_url.scheme_end,
668
0
                            base_url.username_end,
669
0
                            base_url.host_start,
670
0
                            base_url.host_end,
671
0
                            base_url.host,
672
0
                            base_url.port,
673
0
                            base_url.path_start,
674
0
                            remaining,
675
                        )
676
                    } else {
677
0
                        self.serialization.push_str("file:///");
678
0
                        let scheme_end = "file".len() as u32;
679
0
                        let path_start = "file://".len();
680
0
                        let remaining =
681
0
                            self.parse_path(SchemeType::File, &mut false, path_start, input);
682
0
                        let (query_start, fragment_start) =
683
0
                            self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
684
0
                        let path_start = path_start as u32;
685
0
                        Ok(Url {
686
0
                            serialization: self.serialization,
687
0
                            scheme_end,
688
0
                            username_end: path_start,
689
0
                            host_start: path_start,
690
0
                            host_end: path_start,
691
0
                            host: HostInternal::None,
692
0
                            port: None,
693
0
                            path_start,
694
0
                            query_start,
695
0
                            fragment_start,
696
0
                        })
697
                    }
698
                }
699
            }
700
        } else {
701
1.00k
            self.serialization.push_str("file:///");
702
1.00k
            let scheme_end = "file".len() as u32;
703
1.00k
            let path_start = "file://".len();
704
1.00k
            let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
705
1.00k
            let (query_start, fragment_start) =
706
1.00k
                self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
707
1.00k
            let path_start = path_start as u32;
708
1.00k
            Ok(Url {
709
1.00k
                serialization: self.serialization,
710
1.00k
                scheme_end,
711
1.00k
                username_end: path_start,
712
1.00k
                host_start: path_start,
713
1.00k
                host_end: path_start,
714
1.00k
                host: HostInternal::None,
715
1.00k
                port: None,
716
1.00k
                path_start,
717
1.00k
                query_start,
718
1.00k
                fragment_start,
719
1.00k
            })
720
        }
721
4.76k
    }
722
723
240
    fn parse_relative(
724
240
        mut self,
725
240
        input: Input<'_>,
726
240
        scheme_type: SchemeType,
727
240
        base_url: &Url,
728
240
    ) -> ParseResult<Url> {
729
        // relative state
730
240
        debug_assert!(self.serialization.is_empty());
731
240
        let (first_char, input_after_first_char) = input.split_first();
732
240
        match first_char {
733
            None => {
734
                // Copy everything except the fragment
735
0
                let before_fragment = match base_url.fragment_start {
736
0
                    Some(i) => &base_url.serialization[..i as usize],
737
0
                    None => &*base_url.serialization,
738
                };
739
0
                self.serialization.push_str(before_fragment);
740
0
                Ok(Url {
741
0
                    serialization: self.serialization,
742
0
                    fragment_start: None,
743
0
                    ..*base_url
744
0
                })
745
            }
746
            Some('?') => {
747
                // Copy everything up to the query string
748
0
                let before_query = match (base_url.query_start, base_url.fragment_start) {
749
0
                    (None, None) => &*base_url.serialization,
750
0
                    (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
751
                };
752
0
                self.serialization.push_str(before_query);
753
0
                let (query_start, fragment_start) =
754
0
                    self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
755
0
                Ok(Url {
756
0
                    serialization: self.serialization,
757
0
                    query_start,
758
0
                    fragment_start,
759
0
                    ..*base_url
760
0
                })
761
            }
762
0
            Some('#') => self.fragment_only(base_url, input),
763
            Some('/') | Some('\\') => {
764
480
                let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
765
240
                if slashes_count >= 2 {
766
0
                    self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || {
767
0
                        input
768
0
                            .clone()
769
0
                            .take_while(|&c| matches!(c, '/' | '\\'))
770
0
                            .collect::<String>()
771
0
                            != "//"
772
0
                    });
773
0
                    let scheme_end = base_url.scheme_end;
774
0
                    debug_assert!(base_url.byte_at(scheme_end) == b':');
775
0
                    self.serialization
776
0
                        .push_str(base_url.slice(..scheme_end + 1));
777
0
                    if let Some(after_prefix) = input.split_prefix("//") {
778
0
                        return self.after_double_slash(after_prefix, scheme_type, scheme_end);
779
0
                    }
780
0
                    return self.after_double_slash(remaining, scheme_type, scheme_end);
781
240
                }
782
240
                let path_start = base_url.path_start;
783
240
                self.serialization.push_str(base_url.slice(..path_start));
784
240
                self.serialization.push('/');
785
240
                let remaining = self.parse_path(
786
240
                    scheme_type,
787
240
                    &mut true,
788
240
                    path_start as usize,
789
240
                    input_after_first_char,
790
                );
791
240
                self.with_query_and_fragment(
792
240
                    scheme_type,
793
240
                    base_url.scheme_end,
794
240
                    base_url.username_end,
795
240
                    base_url.host_start,
796
240
                    base_url.host_end,
797
240
                    base_url.host,
798
240
                    base_url.port,
799
240
                    base_url.path_start,
800
240
                    remaining,
801
                )
802
            }
803
            _ => {
804
0
                let before_query = match (base_url.query_start, base_url.fragment_start) {
805
0
                    (None, None) => &*base_url.serialization,
806
0
                    (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
807
                };
808
0
                self.serialization.push_str(before_query);
809
                // FIXME spec says just "remove last entry", not the "pop" algorithm
810
0
                self.pop_path(scheme_type, base_url.path_start as usize);
811
                // A special url always has a path.
812
                // A path always starts with '/'
813
0
                if self.serialization.len() == base_url.path_start as usize
814
0
                    && (SchemeType::from(base_url.scheme()).is_special() || !input.is_empty())
815
0
                {
816
0
                    self.serialization.push('/');
817
0
                }
818
0
                let remaining = match input.split_first() {
819
0
                    (Some('/'), remaining) => self.parse_path(
820
0
                        scheme_type,
821
0
                        &mut true,
822
0
                        base_url.path_start as usize,
823
0
                        remaining,
824
                    ),
825
                    _ => {
826
0
                        self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input)
827
                    }
828
                };
829
0
                self.with_query_and_fragment(
830
0
                    scheme_type,
831
0
                    base_url.scheme_end,
832
0
                    base_url.username_end,
833
0
                    base_url.host_start,
834
0
                    base_url.host_end,
835
0
                    base_url.host,
836
0
                    base_url.port,
837
0
                    base_url.path_start,
838
0
                    remaining,
839
                )
840
            }
841
        }
842
240
    }
843
844
24.7k
    fn after_double_slash(
845
24.7k
        mut self,
846
24.7k
        input: Input<'_>,
847
24.7k
        scheme_type: SchemeType,
848
24.7k
        scheme_end: u32,
849
24.7k
    ) -> ParseResult<Url> {
850
24.7k
        self.serialization.push('/');
851
24.7k
        self.serialization.push('/');
852
        // authority state
853
24.7k
        let before_authority = self.serialization.len();
854
24.7k
        let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
855
24.7k
        let has_authority = before_authority != self.serialization.len();
856
        // host state
857
24.7k
        let host_start = to_u32(self.serialization.len())?;
858
17.2k
        let (host_end, host, port, remaining) =
859
24.7k
            self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
860
17.2k
        if host == HostInternal::None && has_authority {
861
10
            return Err(ParseError::EmptyHost);
862
17.2k
        }
863
        // path state
864
17.2k
        let path_start = to_u32(self.serialization.len())?;
865
17.2k
        let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
866
17.2k
        self.with_query_and_fragment(
867
17.2k
            scheme_type,
868
17.2k
            scheme_end,
869
17.2k
            username_end,
870
17.2k
            host_start,
871
17.2k
            host_end,
872
17.2k
            host,
873
17.2k
            port,
874
17.2k
            path_start,
875
17.2k
            remaining,
876
        )
877
24.7k
    }
878
879
    /// Return (username_end, remaining)
880
24.7k
    fn parse_userinfo<'i>(
881
24.7k
        &mut self,
882
24.7k
        mut input: Input<'i>,
883
24.7k
        scheme_type: SchemeType,
884
24.7k
    ) -> ParseResult<(u32, Input<'i>)> {
885
24.7k
        let mut last_at = None;
886
24.7k
        let mut remaining = input.clone();
887
24.7k
        let mut char_count = 0;
888
608M
        while let Some(c) = remaining.next() {
889
3.91k
            match c {
890
                '@' => {
891
10.4k
                    if last_at.is_some() {
892
8.77k
                        self.log_violation(SyntaxViolation::UnencodedAtSign)
893
                    } else {
894
1.72k
                        self.log_violation(SyntaxViolation::EmbeddedCredentials)
895
                    }
896
10.4k
                    last_at = Some((char_count, remaining.clone()))
897
                }
898
6.52k
                '/' | '?' | '#' => break,
899
3.91k
                '\\' if scheme_type.is_special() => break,
900
608M
                _ => (),
901
            }
902
608M
            char_count += 1;
903
        }
904
24.7k
        let (mut userinfo_char_count, remaining) = match last_at {
905
23.0k
            None => return Ok((to_u32(self.serialization.len())?, input)),
906
271
            Some((0, remaining)) => {
907
                // Otherwise, if one of the following is true
908
                // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
909
                // url is special and c is U+005C (\)
910
                // If @ flag is set and buffer is the empty string, validation error, return failure.
911
271
                if let (Some(c), _) = remaining.split_first() {
912
243
                    if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') {
913
19
                        return Err(ParseError::EmptyHost);
914
224
                    }
915
28
                }
916
252
                return Ok((to_u32(self.serialization.len())?, remaining));
917
            }
918
1.45k
            Some(x) => x,
919
        };
920
921
1.45k
        let mut username_end = None;
922
1.45k
        let mut has_password = false;
923
1.45k
        let mut has_username = false;
924
105M
        while userinfo_char_count > 0 {
925
105M
            let (c, utf8_c) = input.next_utf8().unwrap();
926
105M
            userinfo_char_count -= 1;
927
105M
            if c == ':' && username_end.is_none() {
928
                // Start parsing password
929
625
                username_end = Some(to_u32(self.serialization.len())?);
930
                // We don't add a colon if the password is empty
931
625
                if userinfo_char_count > 0 {
932
596
                    self.serialization.push(':');
933
596
                    has_password = true;
934
596
                }
935
            } else {
936
105M
                if !has_password {
937
39.3M
                    has_username = true;
938
66.5M
                }
939
105M
                self.check_url_code_point(c, &input);
940
105M
                self.serialization
941
105M
                    .extend(utf8_percent_encode(utf8_c, USERINFO));
942
            }
943
        }
944
1.45k
        let username_end = match username_end {
945
625
            Some(i) => i,
946
827
            None => to_u32(self.serialization.len())?,
947
        };
948
1.45k
        if has_username || has_password {
949
1.44k
            self.serialization.push('@');
950
1.44k
        }
951
1.45k
        Ok((username_end, remaining))
952
24.7k
    }
953
954
24.7k
    fn parse_host_and_port<'i>(
955
24.7k
        &mut self,
956
24.7k
        input: Input<'i>,
957
24.7k
        scheme_end: u32,
958
24.7k
        scheme_type: SchemeType,
959
24.7k
    ) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
960
24.7k
        let (host, remaining) = Parser::parse_host(input, scheme_type)?;
961
17.5k
        write!(&mut self.serialization, "{host}").unwrap();
962
17.5k
        let host_end = to_u32(self.serialization.len())?;
963
17.5k
        if let Host::Domain(h) = &host {
964
16.2k
            if h.is_empty() {
965
                // Port with an empty host
966
376
                if remaining.starts_with(":") {
967
7
                    return Err(ParseError::EmptyHost);
968
369
                }
969
369
                if scheme_type.is_special() {
970
0
                    return Err(ParseError::EmptyHost);
971
369
                }
972
15.8k
            }
973
1.30k
        };
974
975
17.5k
        let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
976
1.76k
            let scheme = || default_port(&self.serialization[..scheme_end as usize]);
977
1.76k
            let (port, remaining) = Parser::parse_port(remaining, scheme, self.context)?;
978
1.48k
            if let Some(port) = port {
979
1.13k
                self.serialization.push(':');
980
1.13k
                let mut buffer = [0u8; 5];
981
1.13k
                let port_str = fast_u16_to_str(&mut buffer, port);
982
1.13k
                self.serialization.push_str(port_str);
983
1.13k
            }
984
1.48k
            (port, remaining)
985
        } else {
986
15.8k
            (None, remaining)
987
        };
988
17.2k
        Ok((host_end, host.into(), port, remaining))
989
24.7k
    }
990
991
24.7k
    pub fn parse_host(
992
24.7k
        mut input: Input<'_>,
993
24.7k
        scheme_type: SchemeType,
994
24.7k
    ) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
995
24.7k
        if scheme_type.is_file() {
996
0
            return Parser::get_file_host(input);
997
24.7k
        }
998
        // Undo the Input abstraction here to avoid allocating in the common case
999
        // where the host part of the input does not contain any tab or newline
1000
24.7k
        let input_str = input.chars.as_str();
1001
24.7k
        let mut inside_square_brackets = false;
1002
24.7k
        let mut has_ignored_chars = false;
1003
24.7k
        let mut non_ignored_chars = 0;
1004
24.7k
        let mut bytes = 0;
1005
500M
        for c in input_str.chars() {
1006
4.74k
            match c {
1007
1.87k
                ':' if !inside_square_brackets => break,
1008
981
                '\\' if scheme_type.is_special() => break,
1009
5.71k
                '/' | '?' | '#' => break,
1010
25.9k
                ascii_tab_or_new_line_pattern!() => {
1011
25.9k
                    has_ignored_chars = true;
1012
25.9k
                }
1013
                '[' => {
1014
1.48k
                    inside_square_brackets = true;
1015
1.48k
                    non_ignored_chars += 1
1016
                }
1017
                ']' => {
1018
3.67k
                    inside_square_brackets = false;
1019
3.67k
                    non_ignored_chars += 1
1020
                }
1021
500M
                _ => non_ignored_chars += 1,
1022
            }
1023
500M
            bytes += c.len_utf8();
1024
        }
1025
        let host_str;
1026
        {
1027
24.7k
            let host_input = input.by_ref().take(non_ignored_chars);
1028
24.7k
            if has_ignored_chars {
1029
1.65k
                host_str = Cow::Owned(host_input.collect());
1030
1.65k
            } else {
1031
435M
                for _ in host_input {}
1032
23.0k
                host_str = Cow::Borrowed(&input_str[..bytes]);
1033
            }
1034
        }
1035
24.7k
        if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
1036
372
            return Err(ParseError::EmptyHost);
1037
24.3k
        }
1038
24.3k
        if !scheme_type.is_special() {
1039
2.48k
            let host = Host::parse_opaque_cow(host_str)?;
1040
2.38k
            return Ok((host, input));
1041
21.8k
        }
1042
21.8k
        let host = Host::parse_cow(host_str)?;
1043
15.1k
        Ok((host, input))
1044
24.7k
    }
1045
1046
0
    fn get_file_host(input: Input<'_>) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
1047
0
        let (_, host_str, remaining) = Parser::file_host(input)?;
1048
0
        let host = match Host::parse(&host_str)? {
1049
0
            Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")),
1050
0
            Host::Domain(s) => Host::Domain(Cow::Owned(s)),
1051
0
            Host::Ipv4(ip) => Host::Ipv4(ip),
1052
0
            Host::Ipv6(ip) => Host::Ipv6(ip),
1053
        };
1054
0
        Ok((host, remaining))
1055
0
    }
1056
1057
2.89k
    fn parse_file_host<'i>(
1058
2.89k
        &mut self,
1059
2.89k
        input: Input<'i>,
1060
2.89k
    ) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1061
        let has_host;
1062
2.89k
        let (_, host_str, remaining) = Parser::file_host(input)?;
1063
2.89k
        let host = if host_str.is_empty() {
1064
1.11k
            has_host = false;
1065
1.11k
            HostInternal::None
1066
        } else {
1067
1.78k
            match Host::parse_cow(host_str)? {
1068
1.51k
                Host::Domain(ref d) if d == "localhost" => {
1069
4
                    has_host = false;
1070
4
                    HostInternal::None
1071
                }
1072
1.56k
                host => {
1073
1.56k
                    write!(&mut self.serialization, "{host}").unwrap();
1074
1.56k
                    has_host = true;
1075
1.56k
                    host.into()
1076
                }
1077
            }
1078
        };
1079
2.67k
        Ok((has_host, host, remaining))
1080
2.89k
    }
1081
1082
2.89k
    pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> {
1083
        // Undo the Input abstraction here to avoid allocating in the common case
1084
        // where the host part of the input does not contain any tab or newline
1085
2.89k
        let input_str = input.chars.as_str();
1086
2.89k
        let mut has_ignored_chars = false;
1087
2.89k
        let mut non_ignored_chars = 0;
1088
2.89k
        let mut bytes = 0;
1089
115M
        for c in input_str.chars() {
1090
115M
            match c {
1091
2.29k
                '/' | '\\' | '?' | '#' => break,
1092
2.11k
                ascii_tab_or_new_line_pattern!() => has_ignored_chars = true,
1093
115M
                _ => non_ignored_chars += 1,
1094
            }
1095
115M
            bytes += c.len_utf8();
1096
        }
1097
        let host_str;
1098
2.89k
        let mut remaining = input.clone();
1099
        {
1100
2.89k
            let host_input = remaining.by_ref().take(non_ignored_chars);
1101
2.89k
            if has_ignored_chars {
1102
164
                host_str = Cow::Owned(host_input.collect());
1103
164
            } else {
1104
110M
                for _ in host_input {}
1105
2.73k
                host_str = Cow::Borrowed(&input_str[..bytes]);
1106
            }
1107
        }
1108
2.89k
        if is_windows_drive_letter(&host_str) {
1109
20
            return Ok((false, "".into(), input));
1110
2.87k
        }
1111
2.87k
        Ok((true, host_str, remaining))
1112
2.89k
    }
1113
1114
1.76k
    pub fn parse_port<P>(
1115
1.76k
        mut input: Input<'_>,
1116
1.76k
        default_port: P,
1117
1.76k
        context: Context,
1118
1.76k
    ) -> ParseResult<(Option<u16>, Input<'_>)>
1119
1.76k
    where
1120
1.76k
        P: Fn() -> Option<u16>,
1121
    {
1122
1.76k
        let mut port: u32 = 0;
1123
1.76k
        let mut has_any_digit = false;
1124
7.44k
        while let (Some(c), remaining) = input.split_first() {
1125
6.83k
            if let Some(digit) = c.to_digit(10) {
1126
5.69k
                port = port * 10 + digit;
1127
5.69k
                if port > u16::MAX as u32 {
1128
14
                    return Err(ParseError::InvalidPort);
1129
5.67k
                }
1130
5.67k
                has_any_digit = true;
1131
1.14k
            } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
1132
261
                return Err(ParseError::InvalidPort);
1133
            } else {
1134
881
                break;
1135
            }
1136
5.67k
            input = remaining;
1137
        }
1138
1139
1.48k
        if !has_any_digit && context == Context::Setter && !input.is_empty() {
1140
0
            return Err(ParseError::InvalidPort);
1141
1.48k
        }
1142
1143
1.48k
        let mut opt_port = Some(port as u16);
1144
1.48k
        if !has_any_digit || opt_port == default_port() {
1145
351
            opt_port = None;
1146
1.13k
        }
1147
1.48k
        Ok((opt_port, input))
1148
1.76k
    }
<url::parser::Parser>::parse_port::<<url::parser::Parser>::parse_host_and_port::{closure#0}>
Line
Count
Source
1114
1.76k
    pub fn parse_port<P>(
1115
1.76k
        mut input: Input<'_>,
1116
1.76k
        default_port: P,
1117
1.76k
        context: Context,
1118
1.76k
    ) -> ParseResult<(Option<u16>, Input<'_>)>
1119
1.76k
    where
1120
1.76k
        P: Fn() -> Option<u16>,
1121
    {
1122
1.76k
        let mut port: u32 = 0;
1123
1.76k
        let mut has_any_digit = false;
1124
7.44k
        while let (Some(c), remaining) = input.split_first() {
1125
6.83k
            if let Some(digit) = c.to_digit(10) {
1126
5.69k
                port = port * 10 + digit;
1127
5.69k
                if port > u16::MAX as u32 {
1128
14
                    return Err(ParseError::InvalidPort);
1129
5.67k
                }
1130
5.67k
                has_any_digit = true;
1131
1.14k
            } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
1132
261
                return Err(ParseError::InvalidPort);
1133
            } else {
1134
881
                break;
1135
            }
1136
5.67k
            input = remaining;
1137
        }
1138
1139
1.48k
        if !has_any_digit && context == Context::Setter && !input.is_empty() {
1140
0
            return Err(ParseError::InvalidPort);
1141
1.48k
        }
1142
1143
1.48k
        let mut opt_port = Some(port as u16);
1144
1.48k
        if !has_any_digit || opt_port == default_port() {
1145
351
            opt_port = None;
1146
1.13k
        }
1147
1.48k
        Ok((opt_port, input))
1148
1.76k
    }
Unexecuted instantiation: <url::parser::Parser>::parse_port::<url::quirks::set_host::{closure#0}>
Unexecuted instantiation: <url::parser::Parser>::parse_port::<url::quirks::set_port::{closure#0}>
1149
1150
19.4k
    pub fn parse_path_start<'i>(
1151
19.4k
        &mut self,
1152
19.4k
        scheme_type: SchemeType,
1153
19.4k
        has_host: &mut bool,
1154
19.4k
        input: Input<'i>,
1155
19.4k
    ) -> Input<'i> {
1156
19.4k
        let path_start = self.serialization.len();
1157
19.4k
        let (maybe_c, remaining) = input.split_first();
1158
        // If url is special, then:
1159
19.4k
        if scheme_type.is_special() {
1160
16.9k
            if maybe_c == Some('\\') {
1161
280
                // If c is U+005C (\), validation error.
1162
280
                self.log_violation(SyntaxViolation::Backslash);
1163
16.6k
            }
1164
            // A special URL always has a non-empty path.
1165
16.9k
            if !self.serialization.ends_with('/') {
1166
16.6k
                self.serialization.push('/');
1167
                // We have already made sure the forward slash is present.
1168
16.6k
                if maybe_c == Some('/') || maybe_c == Some('\\') {
1169
6.89k
                    return self.parse_path(scheme_type, has_host, path_start, remaining);
1170
9.78k
                }
1171
284
            }
1172
10.0k
            return self.parse_path(scheme_type, has_host, path_start, input);
1173
2.44k
        } else if maybe_c == Some('?') || maybe_c == Some('#') {
1174
            // Otherwise, if state override is not given and c is U+003F (?),
1175
            // set url’s query to the empty string and state to query state.
1176
            // Otherwise, if state override is not given and c is U+0023 (#),
1177
            // set url’s fragment to the empty string and state to fragment state.
1178
            // The query and path states will be handled by the caller.
1179
165
            return input;
1180
2.28k
        }
1181
1182
2.28k
        if maybe_c.is_some() && maybe_c != Some('/') {
1183
139
            self.serialization.push('/');
1184
2.14k
        }
1185
        // Otherwise, if c is not the EOF code point:
1186
2.28k
        self.parse_path(scheme_type, has_host, path_start, input)
1187
19.4k
    }
1188
1189
25.3k
    pub fn parse_path<'i>(
1190
25.3k
        &mut self,
1191
25.3k
        scheme_type: SchemeType,
1192
25.3k
        has_host: &mut bool,
1193
25.3k
        path_start: usize,
1194
25.3k
        mut input: Input<'i>,
1195
25.3k
    ) -> Input<'i> {
1196
        // it's much faster to call utf8_percent_encode in bulk
1197
2.87M
        fn push_pending(
1198
2.87M
            serialization: &mut String,
1199
2.87M
            start_str: &str,
1200
2.87M
            remaining_len: usize,
1201
2.87M
            context: Context,
1202
2.87M
            scheme_type: SchemeType,
1203
2.87M
        ) {
1204
2.87M
            let text = &start_str[..start_str.len() - remaining_len];
1205
2.87M
            if text.is_empty() {
1206
79.5k
                return;
1207
2.79M
            }
1208
2.79M
            if context == Context::PathSegmentSetter {
1209
666
                if scheme_type.is_special() {
1210
350
                    serialization.extend(utf8_percent_encode(text, SPECIAL_PATH_SEGMENT));
1211
350
                } else {
1212
316
                    serialization.extend(utf8_percent_encode(text, PATH_SEGMENT));
1213
316
                }
1214
2.79M
            } else {
1215
2.79M
                serialization.extend(utf8_percent_encode(text, PATH));
1216
2.79M
            }
1217
2.87M
        }
1218
1219
        // Relative path state
1220
        loop {
1221
2.81M
            let mut segment_start = self.serialization.len();
1222
2.81M
            let mut ends_with_slash = false;
1223
2.81M
            let mut start_str = input.chars.as_str();
1224
            loop {
1225
323M
                let input_before_c = input.clone();
1226
                // bypass input.next() and manually handle ascii_tab_or_new_line
1227
                // in order to encode string slices in bulk
1228
323M
                let c = if let Some(c) = input.chars.next() {
1229
323M
                    c
1230
                } else {
1231
24.1k
                    push_pending(
1232
24.1k
                        &mut self.serialization,
1233
24.1k
                        start_str,
1234
                        0,
1235
24.1k
                        self.context,
1236
24.1k
                        scheme_type,
1237
                    );
1238
24.1k
                    break;
1239
                };
1240
2.75M
                match c {
1241
61.1k
                    ascii_tab_or_new_line_pattern!() => {
1242
61.1k
                        push_pending(
1243
61.1k
                            &mut self.serialization,
1244
61.1k
                            start_str,
1245
61.1k
                            input_before_c.chars.as_str().len(),
1246
61.1k
                            self.context,
1247
61.1k
                            scheme_type,
1248
61.1k
                        );
1249
61.1k
                        start_str = input.chars.as_str();
1250
61.1k
                    }
1251
2.75M
                    '/' if self.context != Context::PathSegmentSetter => {
1252
2.75M
                        push_pending(
1253
2.75M
                            &mut self.serialization,
1254
2.75M
                            start_str,
1255
2.75M
                            input_before_c.chars.as_str().len(),
1256
2.75M
                            self.context,
1257
2.75M
                            scheme_type,
1258
                        );
1259
2.75M
                        self.serialization.push(c);
1260
2.75M
                        ends_with_slash = true;
1261
2.75M
                        break;
1262
                    }
1263
57.0k
                    '\\' if self.context != Context::PathSegmentSetter
1264
57.0k
                        && scheme_type.is_special() =>
1265
                    {
1266
28.5k
                        push_pending(
1267
28.5k
                            &mut self.serialization,
1268
28.5k
                            start_str,
1269
28.5k
                            input_before_c.chars.as_str().len(),
1270
28.5k
                            self.context,
1271
28.5k
                            scheme_type,
1272
                        );
1273
28.5k
                        self.log_violation(SyntaxViolation::Backslash);
1274
28.5k
                        self.serialization.push('/');
1275
28.5k
                        ends_with_slash = true;
1276
28.5k
                        break;
1277
                    }
1278
642
                    '?' | '#' if self.context == Context::UrlParser => {
1279
1.22k
                        push_pending(
1280
1.22k
                            &mut self.serialization,
1281
1.22k
                            start_str,
1282
1.22k
                            input_before_c.chars.as_str().len(),
1283
1.22k
                            self.context,
1284
1.22k
                            scheme_type,
1285
                        );
1286
1.22k
                        input = input_before_c;
1287
1.22k
                        break;
1288
                    }
1289
                    _ => {
1290
320M
                        self.check_url_code_point(c, &input);
1291
320M
                        if scheme_type.is_file()
1292
188M
                            && self.serialization.len() > path_start
1293
188M
                            && is_normalized_windows_drive_letter(
1294
188M
                                &self.serialization[path_start + 1..],
1295
                            )
1296
76
                        {
1297
76
                            push_pending(
1298
76
                                &mut self.serialization,
1299
76
                                start_str,
1300
76
                                input_before_c.chars.as_str().len(),
1301
76
                                self.context,
1302
76
                                scheme_type,
1303
76
                            );
1304
76
                            start_str = input_before_c.chars.as_str();
1305
76
                            self.serialization.push('/');
1306
76
                            segment_start += 1;
1307
320M
                        }
1308
                    }
1309
                }
1310
            }
1311
1312
2.81M
            let segment_before_slash = if ends_with_slash {
1313
2.78M
                &self.serialization[segment_start..self.serialization.len() - 1]
1314
            } else {
1315
25.3k
                &self.serialization[segment_start..self.serialization.len()]
1316
            };
1317
2.81M
            match segment_before_slash {
1318
                // If buffer is a double-dot path segment, shorten url’s path,
1319
2.81M
                ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1320
2.79M
                | ".%2E" => {
1321
18.8k
                    debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1322
18.8k
                    self.serialization.truncate(segment_start);
1323
18.8k
                    if self.serialization.ends_with('/')
1324
18.8k
                        && Parser::last_slash_can_be_removed(&self.serialization, path_start)
1325
10.7k
                    {
1326
10.7k
                        self.serialization.pop();
1327
10.7k
                    }
1328
18.8k
                    self.shorten_path(scheme_type, path_start);
1329
1330
                    // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1331
18.8k
                    if ends_with_slash && !self.serialization.ends_with('/') {
1332
0
                        self.serialization.push('/');
1333
18.8k
                    }
1334
                }
1335
                // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
1336
                // nor url is special and c is U+005C (\), append the empty string to url’s path.
1337
2.79M
                "." | "%2e" | "%2E" => {
1338
11.1k
                    self.serialization.truncate(segment_start);
1339
11.1k
                    if !self.serialization.ends_with('/') {
1340
0
                        self.serialization.push('/');
1341
11.1k
                    }
1342
                }
1343
                _ => {
1344
                    // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then
1345
2.78M
                    if scheme_type.is_file()
1346
37.2k
                        && segment_start == path_start + 1
1347
8.90k
                        && is_windows_drive_letter(segment_before_slash)
1348
                    {
1349
                        // Replace the second code point in buffer with U+003A (:).
1350
301
                        if let Some(c) = segment_before_slash.chars().next() {
1351
301
                            self.serialization.truncate(segment_start);
1352
301
                            self.serialization.push(c);
1353
301
                            self.serialization.push(':');
1354
301
                            if ends_with_slash {
1355
82
                                self.serialization.push('/');
1356
219
                            }
1357
0
                        }
1358
                        // If url’s host is neither the empty string nor null,
1359
                        // validation error, set url’s host to the empty string.
1360
301
                        if *has_host {
1361
254
                            self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
1362
254
                            *has_host = false; // FIXME account for this in callers
1363
254
                        }
1364
2.78M
                    }
1365
                }
1366
            }
1367
2.81M
            if !ends_with_slash {
1368
25.3k
                break;
1369
2.78M
            }
1370
        }
1371
25.3k
        if scheme_type.is_file() {
1372
5.08k
            // while url’s path’s size is greater than 1
1373
5.08k
            // and url’s path[0] is the empty string,
1374
5.08k
            // validation error, remove the first item from url’s path.
1375
5.08k
            //FIXME: log violation
1376
5.08k
            let path = self.serialization.split_off(path_start);
1377
5.08k
            self.serialization.push('/');
1378
5.08k
            self.serialization.push_str(path.trim_start_matches('/'));
1379
20.2k
        }
1380
1381
25.3k
        input
1382
25.3k
    }
1383
1384
18.8k
    fn last_slash_can_be_removed(serialization: &str, path_start: usize) -> bool {
1385
18.8k
        let url_before_segment = &serialization[..serialization.len() - 1];
1386
18.8k
        if let Some(segment_before_start) = url_before_segment.rfind('/') {
1387
            // Do not remove the root slash
1388
16.2k
            segment_before_start >= path_start
1389
                // Or a windows drive letter slash
1390
13.5k
                && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..])
1391
        } else {
1392
2.59k
            false
1393
        }
1394
18.8k
    }
1395
1396
    /// https://url.spec.whatwg.org/#shorten-a-urls-path
1397
18.8k
    fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1398
        // If path is empty, then return.
1399
18.8k
        if self.serialization.len() == path_start {
1400
0
            return;
1401
18.8k
        }
1402
        // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
1403
18.8k
        if scheme_type.is_file()
1404
10.2k
            && is_normalized_windows_drive_letter(&self.serialization[path_start..])
1405
        {
1406
0
            return;
1407
18.8k
        }
1408
        // Remove path’s last item.
1409
18.8k
        self.pop_path(scheme_type, path_start);
1410
18.8k
    }
1411
1412
    /// https://url.spec.whatwg.org/#pop-a-urls-path
1413
18.8k
    fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1414
18.8k
        if self.serialization.len() > path_start {
1415
18.8k
            let slash_position = self.serialization[path_start..].rfind('/').unwrap();
1416
            // + 1 since rfind returns the position before the slash.
1417
18.8k
            let segment_start = path_start + slash_position + 1;
1418
            // Don’t pop a Windows drive letter
1419
18.8k
            if !(scheme_type.is_file()
1420
10.2k
                && is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
1421
18.8k
            {
1422
18.8k
                self.serialization.truncate(segment_start);
1423
18.8k
            }
1424
0
        }
1425
18.8k
    }
1426
1427
41.8k
    pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1428
        loop {
1429
207M
            let input_before_c = input.clone();
1430
207M
            match input.next_utf8() {
1431
756
                Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => {
1432
1.41k
                    return input_before_c
1433
                }
1434
207M
                Some((c, utf8_c)) => {
1435
207M
                    self.check_url_code_point(c, &input);
1436
207M
                    self.serialization
1437
207M
                        .extend(utf8_percent_encode(utf8_c, CONTROLS));
1438
207M
                }
1439
40.4k
                None => return input,
1440
            }
1441
        }
1442
41.8k
    }
1443
1444
    #[allow(clippy::too_many_arguments)]
1445
61.5k
    fn with_query_and_fragment(
1446
61.5k
        mut self,
1447
61.5k
        scheme_type: SchemeType,
1448
61.5k
        scheme_end: u32,
1449
61.5k
        username_end: u32,
1450
61.5k
        host_start: u32,
1451
61.5k
        host_end: u32,
1452
61.5k
        host: HostInternal,
1453
61.5k
        port: Option<u16>,
1454
61.5k
        mut path_start: u32,
1455
61.5k
        remaining: Input<'_>,
1456
61.5k
    ) -> ParseResult<Url> {
1457
        // Special case for anarchist URL's with a leading empty path segment
1458
        // This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/,
1459
        // when parsed and then serialized, from ending up as web+demo://not-a-host/
1460
        // (they end up as web+demo:/.//not-a-host/).
1461
        //
1462
        // If url’s host is null, url does not have an opaque path,
1463
        // url’s path’s size is greater than 1, and url’s path[0] is the empty string,
1464
        // then append U+002F (/) followed by U+002E (.) to output.
1465
61.5k
        let scheme_end_as_usize = scheme_end as usize;
1466
61.5k
        let path_start_as_usize = path_start as usize;
1467
61.5k
        if path_start_as_usize == scheme_end_as_usize + 1 {
1468
            // Anarchist URL
1469
44.0k
            if self.serialization[path_start_as_usize..].starts_with("//") {
1470
158
                // Case 1: The base URL did not have an empty path segment, but the resulting one does
1471
158
                // Insert the "/." prefix
1472
158
                self.serialization.insert_str(path_start_as_usize, "/.");
1473
158
                path_start += 2;
1474
43.9k
            }
1475
44.0k
            assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1476
17.4k
        } else if path_start_as_usize == scheme_end_as_usize + 3
1477
396
            && &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/."
1478
        {
1479
            // Anarchist URL with leading empty path segment
1480
            // The base URL has a "/." between the host and the path
1481
23
            assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/');
1482
23
            if self
1483
23
                .serialization
1484
23
                .as_bytes()
1485
23
                .get(path_start_as_usize + 1)
1486
23
                .copied()
1487
23
                != Some(b'/')
1488
23
            {
1489
23
                // Case 2: The base URL had an empty path segment, but the resulting one does not
1490
23
                // Remove the "/." prefix
1491
23
                self.serialization
1492
23
                    .replace_range(scheme_end_as_usize..path_start_as_usize, ":");
1493
23
                path_start -= 2;
1494
23
            }
1495
23
            assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1496
17.4k
        }
1497
1498
61.5k
        let (query_start, fragment_start) =
1499
61.5k
            self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
1500
61.5k
        Ok(Url {
1501
61.5k
            serialization: self.serialization,
1502
61.5k
            scheme_end,
1503
61.5k
            username_end,
1504
61.5k
            host_start,
1505
61.5k
            host_end,
1506
61.5k
            host,
1507
61.5k
            port,
1508
61.5k
            path_start,
1509
61.5k
            query_start,
1510
61.5k
            fragment_start,
1511
61.5k
        })
1512
61.5k
    }
1513
1514
    /// Return (query_start, fragment_start)
1515
66.0k
    fn parse_query_and_fragment(
1516
66.0k
        &mut self,
1517
66.0k
        scheme_type: SchemeType,
1518
66.0k
        scheme_end: u32,
1519
66.0k
        mut input: Input<'_>,
1520
66.0k
    ) -> ParseResult<(Option<u32>, Option<u32>)> {
1521
66.0k
        let mut query_start = None;
1522
66.0k
        match input.next() {
1523
1.37k
            Some('#') => {}
1524
            Some('?') => {
1525
1.44k
                query_start = Some(to_u32(self.serialization.len())?);
1526
1.44k
                self.serialization.push('?');
1527
1.44k
                let remaining = self.parse_query(scheme_type, scheme_end, input);
1528
1.44k
                if let Some(remaining) = remaining {
1529
233
                    input = remaining
1530
                } else {
1531
1.21k
                    return Ok((query_start, None));
1532
                }
1533
            }
1534
63.2k
            None => return Ok((None, None)),
1535
0
            _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"),
1536
        }
1537
1538
1.60k
        let fragment_start = to_u32(self.serialization.len())?;
1539
1.60k
        self.serialization.push('#');
1540
1.60k
        self.parse_fragment(input);
1541
1.60k
        Ok((query_start, Some(fragment_start)))
1542
66.0k
    }
1543
1544
2.05k
    pub fn parse_query<'i>(
1545
2.05k
        &mut self,
1546
2.05k
        scheme_type: SchemeType,
1547
2.05k
        scheme_end: u32,
1548
2.05k
        input: Input<'i>,
1549
2.05k
    ) -> Option<Input<'i>> {
1550
        struct QueryPartIter<'i, 'p> {
1551
            is_url_parser: bool,
1552
            input: Input<'i>,
1553
            violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
1554
        }
1555
1556
        impl<'i> Iterator for QueryPartIter<'i, '_> {
1557
            type Item = (&'i str, bool);
1558
1559
85.7k
            fn next(&mut self) -> Option<Self::Item> {
1560
85.7k
                let start = self.input.chars.as_str();
1561
                // bypass self.input.next() in order to get string slices
1562
                // which are faster to operate on
1563
48.5M
                while let Some(c) = self.input.chars.next() {
1564
1.03k
                    match c {
1565
                        ascii_tab_or_new_line_pattern!() => {
1566
82.1k
                            return Some((
1567
82.1k
                                &start[..start.len() - self.input.chars.as_str().len() - 1],
1568
82.1k
                                false,
1569
82.1k
                            ));
1570
                        }
1571
233
                        '#' if self.is_url_parser => {
1572
233
                            return Some((
1573
233
                                &start[..start.len() - self.input.chars.as_str().len() - 1],
1574
233
                                true,
1575
233
                            ));
1576
                        }
1577
48.4M
                        c => {
1578
48.4M
                            if let Some(vfn) = &self.violation_fn {
1579
0
                                check_url_code_point(vfn, c, &self.input);
1580
48.4M
                            }
1581
                        }
1582
                    }
1583
                }
1584
3.43k
                if start.is_empty() {
1585
1.81k
                    None
1586
                } else {
1587
1.61k
                    Some((start, false))
1588
                }
1589
85.7k
            }
1590
        }
1591
1592
2.05k
        let mut part_iter = QueryPartIter {
1593
2.05k
            is_url_parser: self.context == Context::UrlParser,
1594
2.05k
            input,
1595
2.05k
            violation_fn: self.violation_fn,
1596
2.05k
        };
1597
2.05k
        let set = if scheme_type.is_special() {
1598
531
            SPECIAL_QUERY
1599
        } else {
1600
1.51k
            QUERY
1601
        };
1602
2.05k
        let query_encoding_override = self.query_encoding_override.filter(|_| {
1603
0
            matches!(
1604
0
                &self.serialization[..scheme_end as usize],
1605
0
                "http" | "https" | "file" | "ftp"
1606
            )
1607
0
        });
1608
1609
85.7k
        while let Some((part, is_finished)) = part_iter.next() {
1610
83.9k
            match query_encoding_override {
1611
                // slightly faster to be repetitive and not convert text to Cow
1612
0
                Some(o) => self.serialization.extend(percent_encode(&o(part), set)),
1613
83.9k
                None => self
1614
83.9k
                    .serialization
1615
83.9k
                    .extend(percent_encode(part.as_bytes(), set)),
1616
            }
1617
83.9k
            if is_finished {
1618
233
                return Some(part_iter.input);
1619
83.7k
            }
1620
        }
1621
1622
1.81k
        None
1623
2.05k
    }
1624
1625
0
    fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult<Url> {
1626
0
        let before_fragment = match base_url.fragment_start {
1627
0
            Some(i) => base_url.slice(..i),
1628
0
            None => &*base_url.serialization,
1629
        };
1630
0
        debug_assert!(self.serialization.is_empty());
1631
0
        self.serialization
1632
0
            .reserve(before_fragment.len() + input.chars.as_str().len());
1633
0
        self.serialization.push_str(before_fragment);
1634
0
        self.serialization.push('#');
1635
0
        let next = input.next();
1636
0
        debug_assert!(next == Some('#'));
1637
0
        self.parse_fragment(input);
1638
        Ok(Url {
1639
0
            serialization: self.serialization,
1640
0
            fragment_start: Some(to_u32(before_fragment.len())?),
1641
            ..*base_url
1642
        })
1643
0
    }
1644
1645
1.96k
    pub fn parse_fragment(&mut self, input: Input<'_>) {
1646
        struct FragmentPartIter<'i, 'p> {
1647
            input: Input<'i>,
1648
            violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
1649
        }
1650
1651
        impl<'i> Iterator for FragmentPartIter<'i, '_> {
1652
            type Item = &'i str;
1653
1654
28.6k
            fn next(&mut self) -> Option<Self::Item> {
1655
28.6k
                let start = self.input.chars.as_str();
1656
                // bypass self.input.next() in order to get string slices
1657
                // which are faster to operate on
1658
56.8M
                while let Some(c) = self.input.chars.next() {
1659
56.8M
                    match c {
1660
                        ascii_tab_or_new_line_pattern!() => {
1661
25.1k
                            return Some(
1662
25.1k
                                &start[..start.len() - self.input.chars.as_str().len() - 1],
1663
25.1k
                            );
1664
                        }
1665
                        '\0' => {
1666
3.12M
                            if let Some(vfn) = &self.violation_fn {
1667
0
                                vfn(SyntaxViolation::NullInFragment);
1668
3.12M
                            }
1669
                        }
1670
53.7M
                        c => {
1671
53.7M
                            if let Some(vfn) = &self.violation_fn {
1672
0
                                check_url_code_point(vfn, c, &self.input);
1673
53.7M
                            }
1674
                        }
1675
                    }
1676
                }
1677
3.50k
                if start.is_empty() {
1678
1.96k
                    None
1679
                } else {
1680
1.54k
                    Some(start)
1681
                }
1682
28.6k
            }
1683
        }
1684
1685
1.96k
        let part_iter = FragmentPartIter {
1686
1.96k
            input,
1687
1.96k
            violation_fn: self.violation_fn,
1688
1.96k
        };
1689
1690
28.6k
        for part in part_iter {
1691
26.6k
            self.serialization
1692
26.6k
                .extend(utf8_percent_encode(part, FRAGMENT));
1693
26.6k
        }
1694
1.96k
    }
1695
1696
    #[inline]
1697
633M
    fn check_url_code_point(&self, c: char, input: &Input<'_>) {
1698
633M
        if let Some(vfn) = self.violation_fn {
1699
0
            check_url_code_point(vfn, c, input)
1700
633M
        }
1701
633M
    }
1702
}
1703
1704
0
fn check_url_code_point(vfn: &dyn Fn(SyntaxViolation), c: char, input: &Input<'_>) {
1705
0
    if c == '%' {
1706
0
        let mut input = input.clone();
1707
0
        if !matches!((input.next(), input.next()), (Some(a), Some(b))
1708
0
                             if a.is_ascii_hexdigit() && b.is_ascii_hexdigit())
1709
        {
1710
0
            vfn(SyntaxViolation::PercentDecode)
1711
0
        }
1712
0
    } else if !is_url_code_point(c) {
1713
0
        vfn(SyntaxViolation::NonUrlCodePoint)
1714
0
    }
1715
0
}
1716
1717
// Non URL code points:
1718
// U+0000 to U+0020 (space)
1719
// " # % < > [ \ ] ^ ` { | }
1720
// U+007F to U+009F
1721
// surrogates
1722
// U+FDD0 to U+FDEF
1723
// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1724
#[inline]
1725
0
fn is_url_code_point(c: char) -> bool {
1726
0
    matches!(c,
1727
0
        'a'..='z' |
1728
0
        'A'..='Z' |
1729
0
        '0'..='9' |
1730
        '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' |
1731
        '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' |
1732
0
        '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
1733
0
        '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' |
1734
0
        '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' |
1735
0
        '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' |
1736
0
        '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' |
1737
0
        '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' |
1738
0
        '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' |
1739
0
        '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' |
1740
0
        '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1741
0
}
1742
1743
/// https://url.spec.whatwg.org/#c0-controls-and-space
1744
#[inline]
1745
2.28M
fn c0_control_or_space(ch: char) -> bool {
1746
2.28M
    ch <= ' ' // U+0000 to U+0020
1747
2.28M
}
1748
1749
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
1750
#[inline]
1751
1.58G
fn ascii_tab_or_new_line(ch: char) -> bool {
1752
1.58G
    matches!(ch, ascii_tab_or_new_line_pattern!())
1753
1.58G
}
1754
1755
/// https://url.spec.whatwg.org/#ascii-alpha
1756
#[inline]
1757
300k
pub fn ascii_alpha(ch: char) -> bool {
1758
300k
    ch.is_ascii_alphabetic()
1759
300k
}
1760
1761
#[inline]
1762
212k
pub fn to_u32(i: usize) -> ParseResult<u32> {
1763
212k
    if i <= u32::MAX as usize {
1764
212k
        Ok(i as u32)
1765
    } else {
1766
0
        Err(ParseError::Overflow)
1767
    }
1768
212k
}
1769
1770
188M
fn is_normalized_windows_drive_letter(segment: &str) -> bool {
1771
188M
    is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':'
1772
188M
}
1773
1774
/// Whether the scheme is file:, the path has a single segment, and that segment
1775
/// is a Windows drive letter
1776
#[inline]
1777
188M
pub fn is_windows_drive_letter(segment: &str) -> bool {
1778
188M
    segment.len() == 2 && starts_with_windows_drive_letter(segment)
1779
188M
}
1780
1781
/// Whether path starts with a root slash
1782
/// and a windows drive letter eg: "/c:" or "/a:/"
1783
13.5k
fn path_starts_with_windows_drive_letter(s: &str) -> bool {
1784
13.5k
    if let Some(c) = s.as_bytes().first() {
1785
13.5k
        matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..])
1786
    } else {
1787
0
        false
1788
    }
1789
13.5k
}
1790
1791
225k
fn starts_with_windows_drive_letter(s: &str) -> bool {
1792
225k
    s.len() >= 2
1793
224k
        && ascii_alpha(s.as_bytes()[0] as char)
1794
15.1k
        && matches!(s.as_bytes()[1], b':' | b'|')
1795
3.73k
        && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
1796
225k
}
1797
1798
/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
1799
863
fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool {
1800
863
    let mut input = input.clone();
1801
863
    match (input.next(), input.next(), input.next()) {
1802
        // its first two code points are a Windows drive letter
1803
        // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
1804
4
        (Some(a), Some(b), Some(c))
1805
445
            if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') =>
1806
        {
1807
4
            true
1808
        }
1809
        // its first two code points are a Windows drive letter
1810
        // its length is 2
1811
231
        (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true,
1812
855
        _ => false,
1813
    }
1814
863
}
1815
1816
#[inline]
1817
1.13k
fn fast_u16_to_str(
1818
1.13k
    // max 5 digits for u16 (65535)
1819
1.13k
    buffer: &mut [u8; 5],
1820
1.13k
    mut value: u16,
1821
1.13k
) -> &str {
1822
1.13k
    let mut index = buffer.len();
1823
1824
    loop {
1825
2.15k
        index -= 1;
1826
2.15k
        buffer[index] = b'0' + (value % 10) as u8;
1827
2.15k
        value /= 10;
1828
2.15k
        if value == 0 {
1829
1.13k
            break;
1830
1.02k
        }
1831
    }
1832
1833
    // SAFETY: we know the values in the buffer from the
1834
    // current index on will be a number
1835
1.13k
    unsafe { core::str::from_utf8_unchecked(&buffer[index..]) }
1836
1.13k
}