Coverage Report

Created: 2026-02-14 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rust-url/url/src/parser.rs
Line
Count
Source
1
// Copyright 2013-2016 The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
use alloc::borrow::Cow;
10
use alloc::string::String;
11
use core::fmt::{self, Formatter, Write};
12
use core::str;
13
14
use crate::host::{Host, HostInternal};
15
use crate::Url;
16
use form_urlencoded::EncodingOverride;
17
use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
18
19
/// https://url.spec.whatwg.org/#fragment-percent-encode-set
20
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
21
22
/// https://url.spec.whatwg.org/#path-percent-encode-set
23
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
24
25
/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
26
pub(crate) const USERINFO: &AsciiSet = &PATH
27
    .add(b'/')
28
    .add(b':')
29
    .add(b';')
30
    .add(b'=')
31
    .add(b'@')
32
    .add(b'[')
33
    .add(b'\\')
34
    .add(b']')
35
    .add(b'^')
36
    .add(b'|');
37
38
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
39
40
// The backslash (\) character is treated as a path separator in special URLs
41
// so it needs to be additionally escaped in that case.
42
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
43
44
// https://url.spec.whatwg.org/#query-state
45
const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
46
const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
47
48
pub type ParseResult<T> = Result<T, ParseError>;
49
50
macro_rules! simple_enum_error {
51
    ($($name: ident => $description: expr,)+) => {
52
        /// Errors that can occur during parsing.
53
        ///
54
        /// This may be extended in the future so exhaustive matching is
55
        /// forbidden.
56
        #[derive(PartialEq, Eq, Clone, Copy, Debug)]
57
        #[non_exhaustive]
58
        pub enum ParseError {
59
            $(
60
                $name,
61
            )+
62
        }
63
64
        impl fmt::Display for ParseError {
65
0
            fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
66
0
                match *self {
67
                    $(
68
0
                        ParseError::$name => fmt.write_str($description),
69
                    )+
70
                }
71
0
            }
72
        }
73
    }
74
}
75
76
macro_rules! ascii_tab_or_new_line_pattern {
77
    () => {
78
        '\t' | '\n' | '\r'
79
    };
80
}
81
82
#[cfg(feature = "std")]
83
impl std::error::Error for ParseError {}
84
85
#[cfg(not(feature = "std"))]
86
impl core::error::Error for ParseError {}
87
88
simple_enum_error! {
89
    EmptyHost => "empty host",
90
    IdnaError => "invalid international domain name",
91
    InvalidPort => "invalid port number",
92
    InvalidIpv4Address => "invalid IPv4 address",
93
    InvalidIpv6Address => "invalid IPv6 address",
94
    InvalidDomainCharacter => "invalid domain character",
95
    RelativeUrlWithoutBase => "relative URL without a base",
96
    RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
97
    SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
98
    Overflow => "URLs more than 4 GB are not supported",
99
}
100
101
impl From<::idna::Errors> for ParseError {
102
6.34k
    fn from(_: ::idna::Errors) -> Self {
103
6.34k
        Self::IdnaError
104
6.34k
    }
105
}
106
107
macro_rules! syntax_violation_enum {
108
    ($($name: ident => $description: literal,)+) => {
109
        /// Non-fatal syntax violations that can occur during parsing.
110
        ///
111
        /// This may be extended in the future so exhaustive matching is
112
        /// forbidden.
113
        #[derive(PartialEq, Eq, Clone, Copy, Debug)]
114
        #[non_exhaustive]
115
        pub enum SyntaxViolation {
116
            $(
117
                /// ```text
118
                #[doc = $description]
119
                /// ```
120
                $name,
121
            )+
122
        }
123
124
        impl SyntaxViolation {
125
0
            pub fn description(&self) -> &'static str {
126
0
                match *self {
127
                    $(
128
                        SyntaxViolation::$name => $description,
129
                    )+
130
                }
131
0
            }
132
        }
133
    }
134
}
135
136
syntax_violation_enum! {
137
    Backslash => "backslash",
138
    C0SpaceIgnored =>
139
        "leading or trailing control or space character are ignored in URLs",
140
    EmbeddedCredentials =>
141
        "embedding authentication information (username or password) \
142
         in an URL is not recommended",
143
    ExpectedDoubleSlash => "expected //",
144
    ExpectedFileDoubleSlash => "expected // after file:",
145
    FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
146
    NonUrlCodePoint => "non-URL code point",
147
    NullInFragment => "NULL characters are ignored in URL fragment identifiers",
148
    PercentDecode => "expected 2 hex digits after %",
149
    TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
150
    UnencodedAtSign => "unencoded @ sign in username or password",
151
}
152
153
impl fmt::Display for SyntaxViolation {
154
0
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
155
0
        fmt::Display::fmt(self.description(), f)
156
0
    }
157
}
158
159
#[derive(Copy, Clone, PartialEq, Eq)]
160
pub enum SchemeType {
161
    File,
162
    SpecialNotFile,
163
    NotSpecial,
164
}
165
166
impl SchemeType {
167
104k
    pub fn is_special(&self) -> bool {
168
104k
        !matches!(*self, Self::NotSpecial)
169
104k
    }
170
171
305M
    pub fn is_file(&self) -> bool {
172
305M
        matches!(*self, Self::File)
173
305M
    }
174
}
175
176
impl<T: AsRef<str>> From<T> for SchemeType {
177
74.6k
    fn from(s: T) -> Self {
178
74.6k
        match s.as_ref() {
179
74.6k
            "http" | "https" | "ws" | "wss" | "ftp" => Self::SpecialNotFile,
180
50.5k
            "file" => Self::File,
181
43.8k
            _ => Self::NotSpecial,
182
        }
183
74.6k
    }
<url::parser::SchemeType as core::convert::From<&alloc::string::String>>::from
Line
Count
Source
177
69.7k
    fn from(s: T) -> Self {
178
69.7k
        match s.as_ref() {
179
69.7k
            "http" | "https" | "ws" | "wss" | "ftp" => Self::SpecialNotFile,
180
46.8k
            "file" => Self::File,
181
41.6k
            _ => Self::NotSpecial,
182
        }
183
69.7k
    }
<url::parser::SchemeType as core::convert::From<&str>>::from
Line
Count
Source
177
4.88k
    fn from(s: T) -> Self {
178
4.88k
        match s.as_ref() {
179
4.88k
            "http" | "https" | "ws" | "wss" | "ftp" => Self::SpecialNotFile,
180
3.65k
            "file" => Self::File,
181
2.14k
            _ => Self::NotSpecial,
182
        }
183
4.88k
    }
184
}
185
186
1.79k
pub fn default_port(scheme: &str) -> Option<u16> {
187
1.79k
    match scheme {
188
1.79k
        "http" | "ws" => Some(80),
189
686
        "https" | "wss" => Some(443),
190
542
        "ftp" => Some(21),
191
531
        _ => None,
192
    }
193
1.79k
}
194
195
#[derive(Clone, Debug)]
196
pub struct Input<'i> {
197
    chars: str::Chars<'i>,
198
}
199
200
impl<'i> Input<'i> {
201
3.17k
    pub fn new_no_trim(input: &'i str) -> Self {
202
3.17k
        Input {
203
3.17k
            chars: input.chars(),
204
3.17k
        }
205
3.17k
    }
206
207
635
    pub fn new_trim_tab_and_newlines(
208
635
        original_input: &'i str,
209
635
        vfn: Option<&dyn Fn(SyntaxViolation)>,
210
635
    ) -> Self {
211
635
        let input = original_input.trim_matches(ascii_tab_or_new_line);
212
635
        if let Some(vfn) = vfn {
213
0
            if input.len() < original_input.len() {
214
0
                vfn(SyntaxViolation::C0SpaceIgnored)
215
0
            }
216
0
            if input.chars().any(ascii_tab_or_new_line) {
217
0
                vfn(SyntaxViolation::TabOrNewlineIgnored)
218
0
            }
219
635
        }
220
635
        Input {
221
635
            chars: input.chars(),
222
635
        }
223
635
    }
224
225
69.3k
    pub fn new_trim_c0_control_and_space(
226
69.3k
        original_input: &'i str,
227
69.3k
        vfn: Option<&dyn Fn(SyntaxViolation)>,
228
69.3k
    ) -> Self {
229
69.3k
        let input = original_input.trim_matches(c0_control_or_space);
230
69.3k
        if let Some(vfn) = vfn {
231
0
            if input.len() < original_input.len() {
232
0
                vfn(SyntaxViolation::C0SpaceIgnored)
233
0
            }
234
0
            if input.chars().any(ascii_tab_or_new_line) {
235
0
                vfn(SyntaxViolation::TabOrNewlineIgnored)
236
0
            }
237
69.3k
        }
238
69.3k
        Input {
239
69.3k
            chars: input.chars(),
240
69.3k
        }
241
69.3k
    }
242
243
    #[inline]
244
738
    pub fn is_empty(&self) -> bool {
245
738
        self.clone().next().is_none()
246
738
    }
247
248
    #[inline]
249
71.5k
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
71.5k
        p.split_prefix(&mut self.clone())
251
71.5k
    }
<url::parser::Input>::starts_with::<url::parser::ascii_alpha>
Line
Count
Source
249
70.7k
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
70.7k
        p.split_prefix(&mut self.clone())
251
70.7k
    }
<url::parser::Input>::starts_with::<&str>
Line
Count
Source
249
350
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
350
        p.split_prefix(&mut self.clone())
251
350
    }
<url::parser::Input>::starts_with::<char>
Line
Count
Source
249
383
    pub fn starts_with<P: Pattern>(&self, p: P) -> bool {
250
383
        p.split_prefix(&mut self.clone())
251
383
    }
252
253
    #[inline]
254
98.0k
    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
255
98.0k
        let mut remaining = self.clone();
256
98.0k
        if p.split_prefix(&mut remaining) {
257
6.44k
            Some(remaining)
258
        } else {
259
91.6k
            None
260
        }
261
98.0k
    }
<url::parser::Input>::split_prefix::<&str>
Line
Count
Source
254
41.4k
    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
255
41.4k
        let mut remaining = self.clone();
256
41.4k
        if p.split_prefix(&mut remaining) {
257
2.44k
            Some(remaining)
258
        } else {
259
39.0k
            None
260
        }
261
41.4k
    }
<url::parser::Input>::split_prefix::<char>
Line
Count
Source
254
56.6k
    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
255
56.6k
        let mut remaining = self.clone();
256
56.6k
        if p.split_prefix(&mut remaining) {
257
4.00k
            Some(remaining)
258
        } else {
259
52.6k
            None
260
        }
261
56.6k
    }
262
263
    #[inline]
264
36.1k
    fn split_first(&self) -> (Option<char>, Self) {
265
36.1k
        let mut remaining = self.clone();
266
36.1k
        (remaining.next(), remaining)
267
36.1k
    }
268
269
    #[inline]
270
22.5k
    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
271
22.5k
        let mut count = 0;
272
22.5k
        let mut remaining = self.clone();
273
        loop {
274
33.7k
            let mut input = remaining.clone();
275
33.7k
            if matches!(input.next(), Some(c) if f(c)) {
276
11.2k
                remaining = input;
277
11.2k
                count += 1;
278
11.2k
            } else {
279
22.5k
                return (count, remaining);
280
            }
281
        }
282
22.5k
    }
<url::parser::Input>::count_matching::<<url::parser::Parser>::parse_relative::{closure#0}>
Line
Count
Source
270
240
    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
271
240
        let mut count = 0;
272
240
        let mut remaining = self.clone();
273
        loop {
274
480
            let mut input = remaining.clone();
275
480
            if matches!(input.next(), Some(c) if f(c)) {
276
240
                remaining = input;
277
240
                count += 1;
278
240
            } else {
279
240
                return (count, remaining);
280
            }
281
        }
282
240
    }
<url::parser::Input>::count_matching::<<url::parser::Parser>::parse_with_scheme::{closure#2}>
Line
Count
Source
270
22.2k
    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
271
22.2k
        let mut count = 0;
272
22.2k
        let mut remaining = self.clone();
273
        loop {
274
33.3k
            let mut input = remaining.clone();
275
33.3k
            if matches!(input.next(), Some(c) if f(c)) {
276
11.0k
                remaining = input;
277
11.0k
                count += 1;
278
11.0k
            } else {
279
22.2k
                return (count, remaining);
280
            }
281
        }
282
22.2k
    }
283
284
    #[inline]
285
278M
    fn next_utf8(&mut self) -> Option<(char, &'i str)> {
286
        loop {
287
278M
            let utf8 = self.chars.as_str();
288
278M
            match self.chars.next() {
289
278M
                Some(c) => {
290
278M
                    if !ascii_tab_or_new_line(c) {
291
278M
                        return Some((c, &utf8[..c.len_utf8()]));
292
68.3k
                    }
293
                }
294
35.5k
                None => return None,
295
            }
296
        }
297
278M
    }
298
}
299
300
pub trait Pattern {
301
    fn split_prefix(self, input: &mut Input) -> bool;
302
}
303
304
impl Pattern for char {
305
57.0k
    fn split_prefix(self, input: &mut Input) -> bool {
306
57.0k
        input.next() == Some(self)
307
57.0k
    }
308
}
309
310
impl Pattern for &str {
311
41.8k
    fn split_prefix(self, input: &mut Input) -> bool {
312
46.5k
        for c in self.chars() {
313
46.5k
            if input.next() != Some(c) {
314
39.3k
                return false;
315
7.14k
            }
316
        }
317
2.44k
        true
318
41.8k
    }
319
}
320
321
impl<F: FnMut(char) -> bool> Pattern for F {
322
70.7k
    fn split_prefix(self, input: &mut Input) -> bool {
323
70.7k
        input.next().map_or(false, self)
324
70.7k
    }
325
}
326
327
impl Iterator for Input<'_> {
328
    type Item = char;
329
1.10G
    fn next(&mut self) -> Option<char> {
330
1.10G
        self.chars.by_ref().find(|&c| !ascii_tab_or_new_line(c))
331
1.10G
    }
332
333
1.70k
    fn size_hint(&self) -> (usize, Option<usize>) {
334
1.70k
        (0, Some(self.chars.as_str().len()))
335
1.70k
    }
336
}
337
338
pub struct Parser<'a> {
339
    pub serialization: String,
340
    pub base_url: Option<&'a Url>,
341
    pub query_encoding_override: EncodingOverride<'a>,
342
    pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
343
    pub context: Context,
344
}
345
346
#[derive(PartialEq, Eq, Copy, Clone)]
347
pub enum Context {
348
    UrlParser,
349
    Setter,
350
    PathSegmentSetter,
351
}
352
353
impl Parser<'_> {
354
32.1k
    fn log_violation(&self, v: SyntaxViolation) {
355
32.1k
        if let Some(f) = self.violation_fn {
356
0
            f(v)
357
32.1k
        }
358
32.1k
    }
359
360
33.3k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
33.3k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
33.3k
        }
366
33.3k
    }
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_file::{closure#0}>
Line
Count
Source
360
3.67k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
3.67k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
3.67k
        }
366
3.67k
    }
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_file::{closure#1}>
Line
Count
Source
360
2.82k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
2.82k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
2.82k
        }
366
2.82k
    }
Unexecuted instantiation: <url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_relative::{closure#1}>
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_with_scheme::{closure#0}>
Line
Count
Source
360
4.53k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
4.53k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
4.53k
        }
366
4.53k
    }
<url::parser::Parser>::log_violation_if::<<url::parser::Parser>::parse_with_scheme::{closure#3}>
Line
Count
Source
360
22.2k
    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
361
22.2k
        if let Some(f) = self.violation_fn {
362
0
            if test() {
363
0
                f(v)
364
0
            }
365
22.2k
        }
366
22.2k
    }
367
368
3.81k
    pub fn for_setter(serialization: String) -> Self {
369
3.81k
        Parser {
370
3.81k
            serialization,
371
3.81k
            base_url: None,
372
3.81k
            query_encoding_override: None,
373
3.81k
            violation_fn: None,
374
3.81k
            context: Context::Setter,
375
3.81k
        }
376
3.81k
    }
377
378
    /// https://url.spec.whatwg.org/#concept-basic-url-parser
379
69.3k
    pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
380
69.3k
        let input = Input::new_trim_c0_control_and_space(input, self.violation_fn);
381
69.3k
        if let Ok(remaining) = self.parse_scheme(input.clone()) {
382
68.2k
            return self.parse_with_scheme(remaining);
383
1.02k
        }
384
385
        // No-scheme state
386
1.02k
        if let Some(base_url) = self.base_url {
387
383
            if input.starts_with('#') {
388
0
                self.fragment_only(base_url, input)
389
383
            } else if base_url.cannot_be_a_base() {
390
19
                Err(ParseError::RelativeUrlWithCannotBeABaseBase)
391
            } else {
392
364
                let scheme_type = SchemeType::from(base_url.scheme());
393
364
                if scheme_type.is_file() {
394
124
                    self.parse_file(input, scheme_type, Some(base_url))
395
                } else {
396
240
                    self.parse_relative(input, scheme_type, base_url)
397
                }
398
            }
399
        } else {
400
645
            Err(ParseError::RelativeUrlWithoutBase)
401
        }
402
69.3k
    }
403
404
70.7k
    pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
405
        // starts_with will also fail for empty strings so we can skip that comparison for perf
406
70.7k
        if !input.starts_with(ascii_alpha) {
407
645
            return Err(());
408
70.1k
        }
409
70.1k
        debug_assert!(self.serialization.is_empty());
410
46.3M
        while let Some(c) = input.next() {
411
46.3M
            match c {
412
43.8M
                'a'..='z' | '0'..='9' | '+' | '-' | '.' => self.serialization.push(c),
413
2.35M
                'A'..='Z' => self.serialization.push(c.to_ascii_lowercase()),
414
68.2k
                ':' => return Ok(input),
415
                _ => {
416
210
                    self.serialization.clear();
417
210
                    return Err(());
418
                }
419
            }
420
        }
421
        // EOF before ':'
422
1.62k
        if self.context == Context::Setter {
423
1.44k
            Ok(input)
424
        } else {
425
178
            self.serialization.clear();
426
178
            Err(())
427
        }
428
70.7k
    }
429
430
68.2k
    fn parse_with_scheme(mut self, input: Input<'_>) -> ParseResult<Url> {
431
        use crate::SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash};
432
68.2k
        let scheme_end = to_u32(self.serialization.len())?;
433
68.2k
        let scheme_type = SchemeType::from(&self.serialization);
434
68.2k
        self.serialization.push(':');
435
68.2k
        match scheme_type {
436
            SchemeType::File => {
437
4.53k
                self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//"));
438
4.53k
                let base_file_url = self.base_url.and_then(|base| {
439
0
                    if base.scheme() == "file" {
440
0
                        Some(base)
441
                    } else {
442
0
                        None
443
                    }
444
0
                });
445
4.53k
                self.serialization.clear();
446
4.53k
                self.parse_file(input, scheme_type, base_file_url)
447
            }
448
            SchemeType::SpecialNotFile => {
449
                // special relative or authority state
450
33.2k
                let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
451
22.2k
                if let Some(base_url) = self.base_url {
452
0
                    if slashes_count < 2
453
0
                        && base_url.scheme() == &self.serialization[..scheme_end as usize]
454
                    {
455
                        // "Cannot-be-a-base" URLs only happen with "not special" schemes.
456
0
                        debug_assert!(!base_url.cannot_be_a_base());
457
0
                        self.serialization.clear();
458
0
                        return self.parse_relative(input, scheme_type, base_url);
459
0
                    }
460
22.2k
                }
461
                // special authority slashes state
462
22.2k
                self.log_violation_if(ExpectedDoubleSlash, || {
463
0
                    input
464
0
                        .clone()
465
0
                        .take_while(|&c| matches!(c, '/' | '\\'))
466
0
                        .collect::<String>()
467
0
                        != "//"
468
0
                });
469
22.2k
                self.after_double_slash(remaining, scheme_type, scheme_end)
470
            }
471
41.4k
            SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
472
        }
473
68.2k
    }
474
475
    /// Scheme other than file, http, https, ws, ws, ftp.
476
41.4k
    fn parse_non_special(
477
41.4k
        mut self,
478
41.4k
        input: Input<'_>,
479
41.4k
        scheme_type: SchemeType,
480
41.4k
        scheme_end: u32,
481
41.4k
    ) -> ParseResult<Url> {
482
        // path or authority state (
483
41.4k
        if let Some(input) = input.split_prefix("//") {
484
2.44k
            return self.after_double_slash(input, scheme_type, scheme_end);
485
39.0k
        }
486
        // Anarchist URL (no authority)
487
39.0k
        let path_start = to_u32(self.serialization.len())?;
488
39.0k
        let username_end = path_start;
489
39.0k
        let host_start = path_start;
490
39.0k
        let host_end = path_start;
491
39.0k
        let host = HostInternal::None;
492
39.0k
        let port = None;
493
39.0k
        let remaining = if let Some(input) = input.split_prefix('/') {
494
2.25k
            self.serialization.push('/');
495
2.25k
            self.parse_path(scheme_type, &mut false, path_start as usize, input)
496
        } else {
497
36.7k
            self.parse_cannot_be_a_base_path(input)
498
        };
499
39.0k
        self.with_query_and_fragment(
500
39.0k
            scheme_type,
501
39.0k
            scheme_end,
502
39.0k
            username_end,
503
39.0k
            host_start,
504
39.0k
            host_end,
505
39.0k
            host,
506
39.0k
            port,
507
39.0k
            path_start,
508
39.0k
            remaining,
509
        )
510
41.4k
    }
511
512
4.65k
    fn parse_file(
513
4.65k
        mut self,
514
4.65k
        input: Input<'_>,
515
4.65k
        scheme_type: SchemeType,
516
4.65k
        base_file_url: Option<&Url>,
517
4.65k
    ) -> ParseResult<Url> {
518
        use crate::SyntaxViolation::Backslash;
519
        // file state
520
4.65k
        debug_assert!(self.serialization.is_empty());
521
4.65k
        let (first_char, input_after_first_char) = input.split_first();
522
4.65k
        if matches!(first_char, Some('/') | Some('\\')) {
523
3.67k
            self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\'));
524
            // file slash state
525
3.67k
            let (next_char, input_after_next_char) = input_after_first_char.split_first();
526
3.67k
            if matches!(next_char, Some('/') | Some('\\')) {
527
2.82k
                self.log_violation_if(Backslash, || next_char == Some('\\'));
528
                // file host state
529
2.82k
                self.serialization.push_str("file://");
530
2.82k
                let scheme_end = "file".len() as u32;
531
2.82k
                let host_start = "file://".len() as u32;
532
2.60k
                let (path_start, mut host, remaining) =
533
2.82k
                    self.parse_file_host(input_after_next_char)?;
534
2.60k
                let mut host_end = to_u32(self.serialization.len())?;
535
2.60k
                let mut has_host = !matches!(host, HostInternal::None);
536
2.60k
                let remaining = if path_start {
537
1.50k
                    self.parse_path_start(SchemeType::File, &mut has_host, remaining)
538
                } else {
539
1.10k
                    let path_start = self.serialization.len();
540
1.10k
                    self.serialization.push('/');
541
1.10k
                    self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
542
                };
543
544
                // For file URLs that have a host and whose path starts
545
                // with the windows drive letter we just remove the host.
546
2.60k
                if !has_host {
547
1.35k
                    self.serialization
548
1.35k
                        .drain(host_start as usize..host_end as usize);
549
1.35k
                    host_end = host_start;
550
1.35k
                    host = HostInternal::None;
551
1.35k
                }
552
2.60k
                let (query_start, fragment_start) =
553
2.60k
                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
554
2.60k
                return Ok(Url {
555
2.60k
                    serialization: self.serialization,
556
2.60k
                    scheme_end,
557
2.60k
                    username_end: host_start,
558
2.60k
                    host_start,
559
2.60k
                    host_end,
560
2.60k
                    host,
561
2.60k
                    port: None,
562
2.60k
                    path_start: host_end,
563
2.60k
                    query_start,
564
2.60k
                    fragment_start,
565
2.60k
                });
566
            } else {
567
850
                self.serialization.push_str("file://");
568
850
                let scheme_end = "file".len() as u32;
569
850
                let host_start = "file://".len();
570
850
                let mut host_end = host_start;
571
850
                let mut host = HostInternal::None;
572
850
                if !starts_with_windows_drive_letter_segment(&input_after_first_char) {
573
838
                    if let Some(base_url) = base_file_url {
574
124
                        let first_segment = base_url.path_segments().unwrap().next().unwrap();
575
124
                        if is_normalized_windows_drive_letter(first_segment) {
576
10
                            self.serialization.push('/');
577
10
                            self.serialization.push_str(first_segment);
578
114
                        } else if let Some(host_str) = base_url.host_str() {
579
42
                            self.serialization.push_str(host_str);
580
42
                            host_end = self.serialization.len();
581
42
                            host = base_url.host;
582
72
                        }
583
714
                    }
584
12
                }
585
                // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
586
850
                let parse_path_input = if let Some(c) = first_char {
587
850
                    if c == '/' || c == '\\' || c == '?' || c == '#' {
588
850
                        input
589
                    } else {
590
0
                        input_after_first_char
591
                    }
592
                } else {
593
0
                    input_after_first_char
594
                };
595
596
850
                let remaining =
597
850
                    self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
598
599
850
                let host_start = host_start as u32;
600
601
850
                let (query_start, fragment_start) =
602
850
                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
603
604
850
                let host_end = host_end as u32;
605
850
                return Ok(Url {
606
850
                    serialization: self.serialization,
607
850
                    scheme_end,
608
850
                    username_end: host_start,
609
850
                    host_start,
610
850
                    host_end,
611
850
                    host,
612
850
                    port: None,
613
850
                    path_start: host_end,
614
850
                    query_start,
615
850
                    fragment_start,
616
850
                });
617
            }
618
986
        }
619
986
        if let Some(base_url) = base_file_url {
620
0
            match first_char {
621
                None => {
622
                    // Copy everything except the fragment
623
0
                    let before_fragment = match base_url.fragment_start {
624
0
                        Some(i) => &base_url.serialization[..i as usize],
625
0
                        None => &*base_url.serialization,
626
                    };
627
0
                    self.serialization.push_str(before_fragment);
628
0
                    Ok(Url {
629
0
                        serialization: self.serialization,
630
0
                        fragment_start: None,
631
0
                        ..*base_url
632
0
                    })
633
                }
634
                Some('?') => {
635
                    // Copy everything up to the query string
636
0
                    let before_query = match (base_url.query_start, base_url.fragment_start) {
637
0
                        (None, None) => &*base_url.serialization,
638
0
                        (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
639
                    };
640
0
                    self.serialization.push_str(before_query);
641
0
                    let (query_start, fragment_start) =
642
0
                        self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
643
0
                    Ok(Url {
644
0
                        serialization: self.serialization,
645
0
                        query_start,
646
0
                        fragment_start,
647
0
                        ..*base_url
648
0
                    })
649
                }
650
0
                Some('#') => self.fragment_only(base_url, input),
651
                _ => {
652
0
                    if !starts_with_windows_drive_letter_segment(&input) {
653
0
                        let before_query = match (base_url.query_start, base_url.fragment_start) {
654
0
                            (None, None) => &*base_url.serialization,
655
0
                            (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
656
                        };
657
0
                        self.serialization.push_str(before_query);
658
0
                        self.shorten_path(SchemeType::File, base_url.path_start as usize);
659
0
                        let remaining = self.parse_path(
660
0
                            SchemeType::File,
661
0
                            &mut true,
662
0
                            base_url.path_start as usize,
663
0
                            input,
664
                        );
665
0
                        self.with_query_and_fragment(
666
0
                            SchemeType::File,
667
0
                            base_url.scheme_end,
668
0
                            base_url.username_end,
669
0
                            base_url.host_start,
670
0
                            base_url.host_end,
671
0
                            base_url.host,
672
0
                            base_url.port,
673
0
                            base_url.path_start,
674
0
                            remaining,
675
                        )
676
                    } else {
677
0
                        self.serialization.push_str("file:///");
678
0
                        let scheme_end = "file".len() as u32;
679
0
                        let path_start = "file://".len();
680
0
                        let remaining =
681
0
                            self.parse_path(SchemeType::File, &mut false, path_start, input);
682
0
                        let (query_start, fragment_start) =
683
0
                            self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
684
0
                        let path_start = path_start as u32;
685
0
                        Ok(Url {
686
0
                            serialization: self.serialization,
687
0
                            scheme_end,
688
0
                            username_end: path_start,
689
0
                            host_start: path_start,
690
0
                            host_end: path_start,
691
0
                            host: HostInternal::None,
692
0
                            port: None,
693
0
                            path_start,
694
0
                            query_start,
695
0
                            fragment_start,
696
0
                        })
697
                    }
698
                }
699
            }
700
        } else {
701
986
            self.serialization.push_str("file:///");
702
986
            let scheme_end = "file".len() as u32;
703
986
            let path_start = "file://".len();
704
986
            let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
705
986
            let (query_start, fragment_start) =
706
986
                self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
707
986
            let path_start = path_start as u32;
708
986
            Ok(Url {
709
986
                serialization: self.serialization,
710
986
                scheme_end,
711
986
                username_end: path_start,
712
986
                host_start: path_start,
713
986
                host_end: path_start,
714
986
                host: HostInternal::None,
715
986
                port: None,
716
986
                path_start,
717
986
                query_start,
718
986
                fragment_start,
719
986
            })
720
        }
721
4.65k
    }
722
723
240
    fn parse_relative(
724
240
        mut self,
725
240
        input: Input<'_>,
726
240
        scheme_type: SchemeType,
727
240
        base_url: &Url,
728
240
    ) -> ParseResult<Url> {
729
        // relative state
730
240
        debug_assert!(self.serialization.is_empty());
731
240
        let (first_char, input_after_first_char) = input.split_first();
732
240
        match first_char {
733
            None => {
734
                // Copy everything except the fragment
735
0
                let before_fragment = match base_url.fragment_start {
736
0
                    Some(i) => &base_url.serialization[..i as usize],
737
0
                    None => &*base_url.serialization,
738
                };
739
0
                self.serialization.push_str(before_fragment);
740
0
                Ok(Url {
741
0
                    serialization: self.serialization,
742
0
                    fragment_start: None,
743
0
                    ..*base_url
744
0
                })
745
            }
746
            Some('?') => {
747
                // Copy everything up to the query string
748
0
                let before_query = match (base_url.query_start, base_url.fragment_start) {
749
0
                    (None, None) => &*base_url.serialization,
750
0
                    (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
751
                };
752
0
                self.serialization.push_str(before_query);
753
0
                let (query_start, fragment_start) =
754
0
                    self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
755
0
                Ok(Url {
756
0
                    serialization: self.serialization,
757
0
                    query_start,
758
0
                    fragment_start,
759
0
                    ..*base_url
760
0
                })
761
            }
762
0
            Some('#') => self.fragment_only(base_url, input),
763
            Some('/') | Some('\\') => {
764
480
                let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
765
240
                if slashes_count >= 2 {
766
0
                    self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || {
767
0
                        input
768
0
                            .clone()
769
0
                            .take_while(|&c| matches!(c, '/' | '\\'))
770
0
                            .collect::<String>()
771
0
                            != "//"
772
0
                    });
773
0
                    let scheme_end = base_url.scheme_end;
774
0
                    debug_assert!(base_url.byte_at(scheme_end) == b':');
775
0
                    self.serialization
776
0
                        .push_str(base_url.slice(..scheme_end + 1));
777
0
                    if let Some(after_prefix) = input.split_prefix("//") {
778
0
                        return self.after_double_slash(after_prefix, scheme_type, scheme_end);
779
0
                    }
780
0
                    return self.after_double_slash(remaining, scheme_type, scheme_end);
781
240
                }
782
240
                let path_start = base_url.path_start;
783
240
                self.serialization.push_str(base_url.slice(..path_start));
784
240
                self.serialization.push('/');
785
240
                let remaining = self.parse_path(
786
240
                    scheme_type,
787
240
                    &mut true,
788
240
                    path_start as usize,
789
240
                    input_after_first_char,
790
                );
791
240
                self.with_query_and_fragment(
792
240
                    scheme_type,
793
240
                    base_url.scheme_end,
794
240
                    base_url.username_end,
795
240
                    base_url.host_start,
796
240
                    base_url.host_end,
797
240
                    base_url.host,
798
240
                    base_url.port,
799
240
                    base_url.path_start,
800
240
                    remaining,
801
                )
802
            }
803
            _ => {
804
0
                let before_query = match (base_url.query_start, base_url.fragment_start) {
805
0
                    (None, None) => &*base_url.serialization,
806
0
                    (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
807
                };
808
0
                self.serialization.push_str(before_query);
809
                // FIXME spec says just "remove last entry", not the "pop" algorithm
810
0
                self.pop_path(scheme_type, base_url.path_start as usize);
811
                // A special url always has a path.
812
                // A path always starts with '/'
813
0
                if self.serialization.len() == base_url.path_start as usize
814
0
                    && (SchemeType::from(base_url.scheme()).is_special() || !input.is_empty())
815
0
                {
816
0
                    self.serialization.push('/');
817
0
                }
818
0
                let remaining = match input.split_first() {
819
0
                    (Some('/'), remaining) => self.parse_path(
820
0
                        scheme_type,
821
0
                        &mut true,
822
0
                        base_url.path_start as usize,
823
0
                        remaining,
824
                    ),
825
                    _ => {
826
0
                        self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input)
827
                    }
828
                };
829
0
                self.with_query_and_fragment(
830
0
                    scheme_type,
831
0
                    base_url.scheme_end,
832
0
                    base_url.username_end,
833
0
                    base_url.host_start,
834
0
                    base_url.host_end,
835
0
                    base_url.host,
836
0
                    base_url.port,
837
0
                    base_url.path_start,
838
0
                    remaining,
839
                )
840
            }
841
        }
842
240
    }
843
844
24.7k
    fn after_double_slash(
845
24.7k
        mut self,
846
24.7k
        input: Input<'_>,
847
24.7k
        scheme_type: SchemeType,
848
24.7k
        scheme_end: u32,
849
24.7k
    ) -> ParseResult<Url> {
850
24.7k
        self.serialization.push('/');
851
24.7k
        self.serialization.push('/');
852
        // authority state
853
24.7k
        let before_authority = self.serialization.len();
854
24.7k
        let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
855
24.7k
        let has_authority = before_authority != self.serialization.len();
856
        // host state
857
24.7k
        let host_start = to_u32(self.serialization.len())?;
858
17.3k
        let (host_end, host, port, remaining) =
859
24.7k
            self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
860
17.3k
        if host == HostInternal::None && has_authority {
861
10
            return Err(ParseError::EmptyHost);
862
17.3k
        }
863
        // path state
864
17.3k
        let path_start = to_u32(self.serialization.len())?;
865
17.3k
        let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
866
17.3k
        self.with_query_and_fragment(
867
17.3k
            scheme_type,
868
17.3k
            scheme_end,
869
17.3k
            username_end,
870
17.3k
            host_start,
871
17.3k
            host_end,
872
17.3k
            host,
873
17.3k
            port,
874
17.3k
            path_start,
875
17.3k
            remaining,
876
        )
877
24.7k
    }
878
879
    /// Return (username_end, remaining)
880
24.7k
    fn parse_userinfo<'i>(
881
24.7k
        &mut self,
882
24.7k
        mut input: Input<'i>,
883
24.7k
        scheme_type: SchemeType,
884
24.7k
    ) -> ParseResult<(u32, Input<'i>)> {
885
24.7k
        let mut last_at = None;
886
24.7k
        let mut remaining = input.clone();
887
24.7k
        let mut char_count = 0;
888
546M
        while let Some(c) = remaining.next() {
889
1.59k
            match c {
890
                '@' => {
891
8.25k
                    if last_at.is_some() {
892
6.57k
                        self.log_violation(SyntaxViolation::UnencodedAtSign)
893
                    } else {
894
1.67k
                        self.log_violation(SyntaxViolation::EmbeddedCredentials)
895
                    }
896
8.25k
                    last_at = Some((char_count, remaining.clone()))
897
                }
898
6.51k
                '/' | '?' | '#' => break,
899
1.59k
                '\\' if scheme_type.is_special() => break,
900
546M
                _ => (),
901
            }
902
546M
            char_count += 1;
903
        }
904
24.7k
        let (mut userinfo_char_count, remaining) = match last_at {
905
23.0k
            None => return Ok((to_u32(self.serialization.len())?, input)),
906
264
            Some((0, remaining)) => {
907
                // Otherwise, if one of the following is true
908
                // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
909
                // url is special and c is U+005C (\)
910
                // If @ flag is set and buffer is the empty string, validation error, return failure.
911
264
                if let (Some(c), _) = remaining.split_first() {
912
236
                    if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') {
913
24
                        return Err(ParseError::EmptyHost);
914
212
                    }
915
28
                }
916
240
                return Ok((to_u32(self.serialization.len())?, remaining));
917
            }
918
1.40k
            Some(x) => x,
919
        };
920
921
1.40k
        let mut username_end = None;
922
1.40k
        let mut has_password = false;
923
1.40k
        let mut has_username = false;
924
108M
        while userinfo_char_count > 0 {
925
108M
            let (c, utf8_c) = input.next_utf8().unwrap();
926
108M
            userinfo_char_count -= 1;
927
108M
            if c == ':' && username_end.is_none() {
928
                // Start parsing password
929
573
                username_end = Some(to_u32(self.serialization.len())?);
930
                // We don't add a colon if the password is empty
931
573
                if userinfo_char_count > 0 {
932
548
                    self.serialization.push(':');
933
548
                    has_password = true;
934
548
                }
935
            } else {
936
108M
                if !has_password {
937
43.2M
                    has_username = true;
938
65.2M
                }
939
108M
                self.check_url_code_point(c, &input);
940
108M
                self.serialization
941
108M
                    .extend(utf8_percent_encode(utf8_c, USERINFO));
942
            }
943
        }
944
1.40k
        let username_end = match username_end {
945
573
            Some(i) => i,
946
835
            None => to_u32(self.serialization.len())?,
947
        };
948
1.40k
        if has_username || has_password {
949
1.40k
            self.serialization.push('@');
950
1.40k
        }
951
1.40k
        Ok((username_end, remaining))
952
24.7k
    }
953
954
24.7k
    fn parse_host_and_port<'i>(
955
24.7k
        &mut self,
956
24.7k
        input: Input<'i>,
957
24.7k
        scheme_end: u32,
958
24.7k
        scheme_type: SchemeType,
959
24.7k
    ) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
960
24.7k
        let (host, remaining) = Parser::parse_host(input, scheme_type)?;
961
17.5k
        write!(&mut self.serialization, "{host}").unwrap();
962
17.5k
        let host_end = to_u32(self.serialization.len())?;
963
17.5k
        if let Host::Domain(h) = &host {
964
16.3k
            if h.is_empty() {
965
                // Port with an empty host
966
350
                if remaining.starts_with(":") {
967
6
                    return Err(ParseError::EmptyHost);
968
344
                }
969
344
                if scheme_type.is_special() {
970
0
                    return Err(ParseError::EmptyHost);
971
344
                }
972
15.9k
            }
973
1.27k
        };
974
975
17.5k
        let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
976
1.74k
            let scheme = || default_port(&self.serialization[..scheme_end as usize]);
977
1.74k
            let (port, remaining) = Parser::parse_port(remaining, scheme, self.context)?;
978
1.46k
            if let Some(port) = port {
979
1.13k
                self.serialization.push(':');
980
1.13k
                let mut buffer = [0u8; 5];
981
1.13k
                let port_str = fast_u16_to_str(&mut buffer, port);
982
1.13k
                self.serialization.push_str(port_str);
983
1.13k
            }
984
1.46k
            (port, remaining)
985
        } else {
986
15.8k
            (None, remaining)
987
        };
988
17.3k
        Ok((host_end, host.into(), port, remaining))
989
24.7k
    }
990
991
24.7k
    pub fn parse_host(
992
24.7k
        mut input: Input<'_>,
993
24.7k
        scheme_type: SchemeType,
994
24.7k
    ) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
995
24.7k
        if scheme_type.is_file() {
996
0
            return Parser::get_file_host(input);
997
24.7k
        }
998
        // Undo the Input abstraction here to avoid allocating in the common case
999
        // where the host part of the input does not contain any tab or newline
1000
24.7k
        let input_str = input.chars.as_str();
1001
24.7k
        let mut inside_square_brackets = false;
1002
24.7k
        let mut has_ignored_chars = false;
1003
24.7k
        let mut non_ignored_chars = 0;
1004
24.7k
        let mut bytes = 0;
1005
434M
        for c in input_str.chars() {
1006
4.77k
            match c {
1007
1.85k
                ':' if !inside_square_brackets => break,
1008
929
                '\\' if scheme_type.is_special() => break,
1009
5.70k
                '/' | '?' | '#' => break,
1010
29.4k
                ascii_tab_or_new_line_pattern!() => {
1011
29.4k
                    has_ignored_chars = true;
1012
29.4k
                }
1013
                '[' => {
1014
1.48k
                    inside_square_brackets = true;
1015
1.48k
                    non_ignored_chars += 1
1016
                }
1017
                ']' => {
1018
3.67k
                    inside_square_brackets = false;
1019
3.67k
                    non_ignored_chars += 1
1020
                }
1021
434M
                _ => non_ignored_chars += 1,
1022
            }
1023
434M
            bytes += c.len_utf8();
1024
        }
1025
        let host_str;
1026
        {
1027
24.7k
            let host_input = input.by_ref().take(non_ignored_chars);
1028
24.7k
            if has_ignored_chars {
1029
1.59k
                host_str = Cow::Owned(host_input.collect());
1030
1.59k
            } else {
1031
371M
                for _ in host_input {}
1032
23.1k
                host_str = Cow::Borrowed(&input_str[..bytes]);
1033
            }
1034
        }
1035
24.7k
        if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
1036
381
            return Err(ParseError::EmptyHost);
1037
24.3k
        }
1038
24.3k
        if !scheme_type.is_special() {
1039
2.44k
            let host = Host::parse_opaque_cow(host_str)?;
1040
2.34k
            return Ok((host, input));
1041
21.8k
        }
1042
21.8k
        let host = Host::parse_cow(host_str)?;
1043
15.2k
        Ok((host, input))
1044
24.7k
    }
1045
1046
0
    fn get_file_host(input: Input<'_>) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
1047
0
        let (_, host_str, remaining) = Parser::file_host(input)?;
1048
0
        let host = match Host::parse(&host_str)? {
1049
0
            Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")),
1050
0
            Host::Domain(s) => Host::Domain(Cow::Owned(s)),
1051
0
            Host::Ipv4(ip) => Host::Ipv4(ip),
1052
0
            Host::Ipv6(ip) => Host::Ipv6(ip),
1053
        };
1054
0
        Ok((host, remaining))
1055
0
    }
1056
1057
2.82k
    fn parse_file_host<'i>(
1058
2.82k
        &mut self,
1059
2.82k
        input: Input<'i>,
1060
2.82k
    ) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1061
        let has_host;
1062
2.82k
        let (_, host_str, remaining) = Parser::file_host(input)?;
1063
2.82k
        let host = if host_str.is_empty() {
1064
1.10k
            has_host = false;
1065
1.10k
            HostInternal::None
1066
        } else {
1067
1.71k
            match Host::parse_cow(host_str)? {
1068
1.45k
                Host::Domain(ref d) if d == "localhost" => {
1069
4
                    has_host = false;
1070
4
                    HostInternal::None
1071
                }
1072
1.50k
                host => {
1073
1.50k
                    write!(&mut self.serialization, "{host}").unwrap();
1074
1.50k
                    has_host = true;
1075
1.50k
                    host.into()
1076
                }
1077
            }
1078
        };
1079
2.60k
        Ok((has_host, host, remaining))
1080
2.82k
    }
1081
1082
2.82k
    pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> {
1083
        // Undo the Input abstraction here to avoid allocating in the common case
1084
        // where the host part of the input does not contain any tab or newline
1085
2.82k
        let input_str = input.chars.as_str();
1086
2.82k
        let mut has_ignored_chars = false;
1087
2.82k
        let mut non_ignored_chars = 0;
1088
2.82k
        let mut bytes = 0;
1089
78.6M
        for c in input_str.chars() {
1090
78.6M
            match c {
1091
2.24k
                '/' | '\\' | '?' | '#' => break,
1092
2.14k
                ascii_tab_or_new_line_pattern!() => has_ignored_chars = true,
1093
78.6M
                _ => non_ignored_chars += 1,
1094
            }
1095
78.6M
            bytes += c.len_utf8();
1096
        }
1097
        let host_str;
1098
2.82k
        let mut remaining = input.clone();
1099
        {
1100
2.82k
            let host_input = remaining.by_ref().take(non_ignored_chars);
1101
2.82k
            if has_ignored_chars {
1102
159
                host_str = Cow::Owned(host_input.collect());
1103
159
            } else {
1104
73.6M
                for _ in host_input {}
1105
2.66k
                host_str = Cow::Borrowed(&input_str[..bytes]);
1106
            }
1107
        }
1108
2.82k
        if is_windows_drive_letter(&host_str) {
1109
25
            return Ok((false, "".into(), input));
1110
2.79k
        }
1111
2.79k
        Ok((true, host_str, remaining))
1112
2.82k
    }
1113
1114
1.74k
    pub fn parse_port<P>(
1115
1.74k
        mut input: Input<'_>,
1116
1.74k
        default_port: P,
1117
1.74k
        context: Context,
1118
1.74k
    ) -> ParseResult<(Option<u16>, Input<'_>)>
1119
1.74k
    where
1120
1.74k
        P: Fn() -> Option<u16>,
1121
    {
1122
1.74k
        let mut port: u32 = 0;
1123
1.74k
        let mut has_any_digit = false;
1124
7.98k
        while let (Some(c), remaining) = input.split_first() {
1125
7.39k
            if let Some(digit) = c.to_digit(10) {
1126
6.25k
                port = port * 10 + digit;
1127
6.25k
                if port > u16::MAX as u32 {
1128
15
                    return Err(ParseError::InvalidPort);
1129
6.23k
                }
1130
6.23k
                has_any_digit = true;
1131
1.14k
            } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
1132
263
                return Err(ParseError::InvalidPort);
1133
            } else {
1134
880
                break;
1135
            }
1136
6.23k
            input = remaining;
1137
        }
1138
1139
1.46k
        if !has_any_digit && context == Context::Setter && !input.is_empty() {
1140
0
            return Err(ParseError::InvalidPort);
1141
1.46k
        }
1142
1143
1.46k
        let mut opt_port = Some(port as u16);
1144
1.46k
        if !has_any_digit || opt_port == default_port() {
1145
337
            opt_port = None;
1146
1.13k
        }
1147
1.46k
        Ok((opt_port, input))
1148
1.74k
    }
<url::parser::Parser>::parse_port::<<url::parser::Parser>::parse_host_and_port::{closure#0}>
Line
Count
Source
1114
1.74k
    pub fn parse_port<P>(
1115
1.74k
        mut input: Input<'_>,
1116
1.74k
        default_port: P,
1117
1.74k
        context: Context,
1118
1.74k
    ) -> ParseResult<(Option<u16>, Input<'_>)>
1119
1.74k
    where
1120
1.74k
        P: Fn() -> Option<u16>,
1121
    {
1122
1.74k
        let mut port: u32 = 0;
1123
1.74k
        let mut has_any_digit = false;
1124
7.98k
        while let (Some(c), remaining) = input.split_first() {
1125
7.39k
            if let Some(digit) = c.to_digit(10) {
1126
6.25k
                port = port * 10 + digit;
1127
6.25k
                if port > u16::MAX as u32 {
1128
15
                    return Err(ParseError::InvalidPort);
1129
6.23k
                }
1130
6.23k
                has_any_digit = true;
1131
1.14k
            } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
1132
263
                return Err(ParseError::InvalidPort);
1133
            } else {
1134
880
                break;
1135
            }
1136
6.23k
            input = remaining;
1137
        }
1138
1139
1.46k
        if !has_any_digit && context == Context::Setter && !input.is_empty() {
1140
0
            return Err(ParseError::InvalidPort);
1141
1.46k
        }
1142
1143
1.46k
        let mut opt_port = Some(port as u16);
1144
1.46k
        if !has_any_digit || opt_port == default_port() {
1145
337
            opt_port = None;
1146
1.13k
        }
1147
1.46k
        Ok((opt_port, input))
1148
1.74k
    }
Unexecuted instantiation: <url::parser::Parser>::parse_port::<url::quirks::set_host::{closure#0}>
Unexecuted instantiation: <url::parser::Parser>::parse_port::<url::quirks::set_port::{closure#0}>
1149
1150
19.3k
    pub fn parse_path_start<'i>(
1151
19.3k
        &mut self,
1152
19.3k
        scheme_type: SchemeType,
1153
19.3k
        has_host: &mut bool,
1154
19.3k
        input: Input<'i>,
1155
19.3k
    ) -> Input<'i> {
1156
19.3k
        let path_start = self.serialization.len();
1157
19.3k
        let (maybe_c, remaining) = input.split_first();
1158
        // If url is special, then:
1159
19.3k
        if scheme_type.is_special() {
1160
16.9k
            if maybe_c == Some('\\') {
1161
287
                // If c is U+005C (\), validation error.
1162
287
                self.log_violation(SyntaxViolation::Backslash);
1163
16.6k
            }
1164
            // A special URL always has a non-empty path.
1165
16.9k
            if !self.serialization.ends_with('/') {
1166
16.6k
                self.serialization.push('/');
1167
                // We have already made sure the forward slash is present.
1168
16.6k
                if maybe_c == Some('/') || maybe_c == Some('\\') {
1169
6.90k
                    return self.parse_path(scheme_type, has_host, path_start, remaining);
1170
9.77k
                }
1171
301
            }
1172
10.0k
            return self.parse_path(scheme_type, has_host, path_start, input);
1173
2.39k
        } else if maybe_c == Some('?') || maybe_c == Some('#') {
1174
            // Otherwise, if state override is not given and c is U+003F (?),
1175
            // set url’s query to the empty string and state to query state.
1176
            // Otherwise, if state override is not given and c is U+0023 (#),
1177
            // set url’s fragment to the empty string and state to fragment state.
1178
            // The query and path states will be handled by the caller.
1179
156
            return input;
1180
2.23k
        }
1181
1182
2.23k
        if maybe_c.is_some() && maybe_c != Some('/') {
1183
141
            self.serialization.push('/');
1184
2.09k
        }
1185
        // Otherwise, if c is not the EOF code point:
1186
2.23k
        self.parse_path(scheme_type, has_host, path_start, input)
1187
19.3k
    }
1188
1189
25.3k
    pub fn parse_path<'i>(
1190
25.3k
        &mut self,
1191
25.3k
        scheme_type: SchemeType,
1192
25.3k
        has_host: &mut bool,
1193
25.3k
        path_start: usize,
1194
25.3k
        mut input: Input<'i>,
1195
25.3k
    ) -> Input<'i> {
1196
        // it's much faster to call utf8_percent_encode in bulk
1197
3.17M
        fn push_pending(
1198
3.17M
            serialization: &mut String,
1199
3.17M
            start_str: &str,
1200
3.17M
            remaining_len: usize,
1201
3.17M
            context: Context,
1202
3.17M
            scheme_type: SchemeType,
1203
3.17M
        ) {
1204
3.17M
            let text = &start_str[..start_str.len() - remaining_len];
1205
3.17M
            if text.is_empty() {
1206
76.8k
                return;
1207
3.10M
            }
1208
3.10M
            if context == Context::PathSegmentSetter {
1209
682
                if scheme_type.is_special() {
1210
374
                    serialization.extend(utf8_percent_encode(text, SPECIAL_PATH_SEGMENT));
1211
374
                } else {
1212
308
                    serialization.extend(utf8_percent_encode(text, PATH_SEGMENT));
1213
308
                }
1214
3.10M
            } else {
1215
3.10M
                serialization.extend(utf8_percent_encode(text, PATH));
1216
3.10M
            }
1217
3.17M
        }
1218
1219
        // Relative path state
1220
        loop {
1221
3.11M
            let mut segment_start = self.serialization.len();
1222
3.11M
            let mut ends_with_slash = false;
1223
3.11M
            let mut start_str = input.chars.as_str();
1224
            loop {
1225
305M
                let input_before_c = input.clone();
1226
                // bypass input.next() and manually handle ascii_tab_or_new_line
1227
                // in order to encode string slices in bulk
1228
305M
                let c = if let Some(c) = input.chars.next() {
1229
305M
                    c
1230
                } else {
1231
24.1k
                    push_pending(
1232
24.1k
                        &mut self.serialization,
1233
24.1k
                        start_str,
1234
                        0,
1235
24.1k
                        self.context,
1236
24.1k
                        scheme_type,
1237
                    );
1238
24.1k
                    break;
1239
                };
1240
3.07M
                match c {
1241
59.1k
                    ascii_tab_or_new_line_pattern!() => {
1242
59.1k
                        push_pending(
1243
59.1k
                            &mut self.serialization,
1244
59.1k
                            start_str,
1245
59.1k
                            input_before_c.chars.as_str().len(),
1246
59.1k
                            self.context,
1247
59.1k
                            scheme_type,
1248
59.1k
                        );
1249
59.1k
                        start_str = input.chars.as_str();
1250
59.1k
                    }
1251
3.07M
                    '/' if self.context != Context::PathSegmentSetter => {
1252
3.07M
                        push_pending(
1253
3.07M
                            &mut self.serialization,
1254
3.07M
                            start_str,
1255
3.07M
                            input_before_c.chars.as_str().len(),
1256
3.07M
                            self.context,
1257
3.07M
                            scheme_type,
1258
                        );
1259
3.07M
                        self.serialization.push(c);
1260
3.07M
                        ends_with_slash = true;
1261
3.07M
                        break;
1262
                    }
1263
50.3k
                    '\\' if self.context != Context::PathSegmentSetter
1264
50.3k
                        && scheme_type.is_special() =>
1265
                    {
1266
23.3k
                        push_pending(
1267
23.3k
                            &mut self.serialization,
1268
23.3k
                            start_str,
1269
23.3k
                            input_before_c.chars.as_str().len(),
1270
23.3k
                            self.context,
1271
23.3k
                            scheme_type,
1272
                        );
1273
23.3k
                        self.log_violation(SyntaxViolation::Backslash);
1274
23.3k
                        self.serialization.push('/');
1275
23.3k
                        ends_with_slash = true;
1276
23.3k
                        break;
1277
                    }
1278
592
                    '?' | '#' if self.context == Context::UrlParser => {
1279
1.18k
                        push_pending(
1280
1.18k
                            &mut self.serialization,
1281
1.18k
                            start_str,
1282
1.18k
                            input_before_c.chars.as_str().len(),
1283
1.18k
                            self.context,
1284
1.18k
                            scheme_type,
1285
                        );
1286
1.18k
                        input = input_before_c;
1287
1.18k
                        break;
1288
                    }
1289
                    _ => {
1290
301M
                        self.check_url_code_point(c, &input);
1291
301M
                        if scheme_type.is_file()
1292
162M
                            && self.serialization.len() > path_start
1293
162M
                            && is_normalized_windows_drive_letter(
1294
162M
                                &self.serialization[path_start + 1..],
1295
                            )
1296
72
                        {
1297
72
                            push_pending(
1298
72
                                &mut self.serialization,
1299
72
                                start_str,
1300
72
                                input_before_c.chars.as_str().len(),
1301
72
                                self.context,
1302
72
                                scheme_type,
1303
72
                            );
1304
72
                            start_str = input_before_c.chars.as_str();
1305
72
                            self.serialization.push('/');
1306
72
                            segment_start += 1;
1307
301M
                        }
1308
                    }
1309
                }
1310
            }
1311
1312
3.11M
            let segment_before_slash = if ends_with_slash {
1313
3.09M
                &self.serialization[segment_start..self.serialization.len() - 1]
1314
            } else {
1315
25.3k
                &self.serialization[segment_start..self.serialization.len()]
1316
            };
1317
3.11M
            match segment_before_slash {
1318
                // If buffer is a double-dot path segment, shorten url’s path,
1319
3.11M
                ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1320
3.10M
                | ".%2E" => {
1321
19.1k
                    debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1322
19.1k
                    self.serialization.truncate(segment_start);
1323
19.1k
                    if self.serialization.ends_with('/')
1324
19.1k
                        && Parser::last_slash_can_be_removed(&self.serialization, path_start)
1325
11.1k
                    {
1326
11.1k
                        self.serialization.pop();
1327
11.1k
                    }
1328
19.1k
                    self.shorten_path(scheme_type, path_start);
1329
1330
                    // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1331
19.1k
                    if ends_with_slash && !self.serialization.ends_with('/') {
1332
0
                        self.serialization.push('/');
1333
19.1k
                    }
1334
                }
1335
                // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
1336
                // nor url is special and c is U+005C (\), append the empty string to url’s path.
1337
3.10M
                "." | "%2e" | "%2E" => {
1338
9.53k
                    self.serialization.truncate(segment_start);
1339
9.53k
                    if !self.serialization.ends_with('/') {
1340
0
                        self.serialization.push('/');
1341
9.53k
                    }
1342
                }
1343
                _ => {
1344
                    // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then
1345
3.09M
                    if scheme_type.is_file()
1346
96.5k
                        && segment_start == path_start + 1
1347
9.11k
                        && is_windows_drive_letter(segment_before_slash)
1348
                    {
1349
                        // Replace the second code point in buffer with U+003A (:).
1350
302
                        if let Some(c) = segment_before_slash.chars().next() {
1351
302
                            self.serialization.truncate(segment_start);
1352
302
                            self.serialization.push(c);
1353
302
                            self.serialization.push(':');
1354
302
                            if ends_with_slash {
1355
80
                                self.serialization.push('/');
1356
222
                            }
1357
0
                        }
1358
                        // If url’s host is neither the empty string nor null,
1359
                        // validation error, set url’s host to the empty string.
1360
302
                        if *has_host {
1361
245
                            self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
1362
245
                            *has_host = false; // FIXME account for this in callers
1363
245
                        }
1364
3.09M
                    }
1365
                }
1366
            }
1367
3.11M
            if !ends_with_slash {
1368
25.3k
                break;
1369
3.09M
            }
1370
        }
1371
25.3k
        if scheme_type.is_file() {
1372
5.01k
            // while url’s path’s size is greater than 1
1373
5.01k
            // and url’s path[0] is the empty string,
1374
5.01k
            // validation error, remove the first item from url’s path.
1375
5.01k
            //FIXME: log violation
1376
5.01k
            let path = self.serialization.split_off(path_start);
1377
5.01k
            self.serialization.push('/');
1378
5.01k
            self.serialization.push_str(path.trim_start_matches('/'));
1379
20.3k
        }
1380
1381
25.3k
        input
1382
25.3k
    }
1383
1384
19.1k
    fn last_slash_can_be_removed(serialization: &str, path_start: usize) -> bool {
1385
19.1k
        let url_before_segment = &serialization[..serialization.len() - 1];
1386
19.1k
        if let Some(segment_before_start) = url_before_segment.rfind('/') {
1387
            // Do not remove the root slash
1388
16.4k
            segment_before_start >= path_start
1389
                // Or a windows drive letter slash
1390
13.6k
                && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..])
1391
        } else {
1392
2.69k
            false
1393
        }
1394
19.1k
    }
1395
1396
    /// https://url.spec.whatwg.org/#shorten-a-urls-path
1397
19.1k
    fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1398
        // If path is empty, then return.
1399
19.1k
        if self.serialization.len() == path_start {
1400
0
            return;
1401
19.1k
        }
1402
        // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
1403
19.1k
        if scheme_type.is_file()
1404
9.50k
            && is_normalized_windows_drive_letter(&self.serialization[path_start..])
1405
        {
1406
0
            return;
1407
19.1k
        }
1408
        // Remove path’s last item.
1409
19.1k
        self.pop_path(scheme_type, path_start);
1410
19.1k
    }
1411
1412
    /// https://url.spec.whatwg.org/#pop-a-urls-path
1413
19.1k
    fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1414
19.1k
        if self.serialization.len() > path_start {
1415
19.1k
            let slash_position = self.serialization[path_start..].rfind('/').unwrap();
1416
            // + 1 since rfind returns the position before the slash.
1417
19.1k
            let segment_start = path_start + slash_position + 1;
1418
            // Don’t pop a Windows drive letter
1419
19.1k
            if !(scheme_type.is_file()
1420
9.50k
                && is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
1421
19.1k
            {
1422
19.1k
                self.serialization.truncate(segment_start);
1423
19.1k
            }
1424
0
        }
1425
19.1k
    }
1426
1427
36.8k
    pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1428
        loop {
1429
169M
            let input_before_c = input.clone();
1430
169M
            match input.next_utf8() {
1431
727
                Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => {
1432
1.39k
                    return input_before_c
1433
                }
1434
169M
                Some((c, utf8_c)) => {
1435
169M
                    self.check_url_code_point(c, &input);
1436
169M
                    self.serialization
1437
169M
                        .extend(utf8_percent_encode(utf8_c, CONTROLS));
1438
169M
                }
1439
35.5k
                None => return input,
1440
            }
1441
        }
1442
36.8k
    }
1443
1444
    #[allow(clippy::too_many_arguments)]
1445
56.5k
    fn with_query_and_fragment(
1446
56.5k
        mut self,
1447
56.5k
        scheme_type: SchemeType,
1448
56.5k
        scheme_end: u32,
1449
56.5k
        username_end: u32,
1450
56.5k
        host_start: u32,
1451
56.5k
        host_end: u32,
1452
56.5k
        host: HostInternal,
1453
56.5k
        port: Option<u16>,
1454
56.5k
        mut path_start: u32,
1455
56.5k
        remaining: Input<'_>,
1456
56.5k
    ) -> ParseResult<Url> {
1457
        // Special case for anarchist URL's with a leading empty path segment
1458
        // This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/,
1459
        // when parsed and then serialized, from ending up as web+demo://not-a-host/
1460
        // (they end up as web+demo:/.//not-a-host/).
1461
        //
1462
        // If url’s host is null, url does not have an opaque path,
1463
        // url’s path’s size is greater than 1, and url’s path[0] is the empty string,
1464
        // then append U+002F (/) followed by U+002E (.) to output.
1465
56.5k
        let scheme_end_as_usize = scheme_end as usize;
1466
56.5k
        let path_start_as_usize = path_start as usize;
1467
56.5k
        if path_start_as_usize == scheme_end_as_usize + 1 {
1468
            // Anarchist URL
1469
39.1k
            if self.serialization[path_start_as_usize..].starts_with("//") {
1470
153
                // Case 1: The base URL did not have an empty path segment, but the resulting one does
1471
153
                // Insert the "/." prefix
1472
153
                self.serialization.insert_str(path_start_as_usize, "/.");
1473
153
                path_start += 2;
1474
38.9k
            }
1475
39.1k
            assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1476
17.4k
        } else if path_start_as_usize == scheme_end_as_usize + 3
1477
369
            && &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/."
1478
        {
1479
            // Anarchist URL with leading empty path segment
1480
            // The base URL has a "/." between the host and the path
1481
24
            assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/');
1482
24
            if self
1483
24
                .serialization
1484
24
                .as_bytes()
1485
24
                .get(path_start_as_usize + 1)
1486
24
                .copied()
1487
24
                != Some(b'/')
1488
24
            {
1489
24
                // Case 2: The base URL had an empty path segment, but the resulting one does not
1490
24
                // Remove the "/." prefix
1491
24
                self.serialization
1492
24
                    .replace_range(scheme_end_as_usize..path_start_as_usize, ":");
1493
24
                path_start -= 2;
1494
24
            }
1495
24
            assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1496
17.4k
        }
1497
1498
56.5k
        let (query_start, fragment_start) =
1499
56.5k
            self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
1500
56.5k
        Ok(Url {
1501
56.5k
            serialization: self.serialization,
1502
56.5k
            scheme_end,
1503
56.5k
            username_end,
1504
56.5k
            host_start,
1505
56.5k
            host_end,
1506
56.5k
            host,
1507
56.5k
            port,
1508
56.5k
            path_start,
1509
56.5k
            query_start,
1510
56.5k
            fragment_start,
1511
56.5k
        })
1512
56.5k
    }
1513
1514
    /// Return (query_start, fragment_start)
1515
61.0k
    fn parse_query_and_fragment(
1516
61.0k
        &mut self,
1517
61.0k
        scheme_type: SchemeType,
1518
61.0k
        scheme_end: u32,
1519
61.0k
        mut input: Input<'_>,
1520
61.0k
    ) -> ParseResult<(Option<u32>, Option<u32>)> {
1521
61.0k
        let mut query_start = None;
1522
61.0k
        match input.next() {
1523
1.37k
            Some('#') => {}
1524
            Some('?') => {
1525
1.35k
                query_start = Some(to_u32(self.serialization.len())?);
1526
1.35k
                self.serialization.push('?');
1527
1.35k
                let remaining = self.parse_query(scheme_type, scheme_end, input);
1528
1.35k
                if let Some(remaining) = remaining {
1529
238
                    input = remaining
1530
                } else {
1531
1.11k
                    return Ok((query_start, None));
1532
                }
1533
            }
1534
58.2k
            None => return Ok((None, None)),
1535
0
            _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"),
1536
        }
1537
1538
1.61k
        let fragment_start = to_u32(self.serialization.len())?;
1539
1.61k
        self.serialization.push('#');
1540
1.61k
        self.parse_fragment(input);
1541
1.61k
        Ok((query_start, Some(fragment_start)))
1542
61.0k
    }
1543
1544
1.99k
    pub fn parse_query<'i>(
1545
1.99k
        &mut self,
1546
1.99k
        scheme_type: SchemeType,
1547
1.99k
        scheme_end: u32,
1548
1.99k
        input: Input<'i>,
1549
1.99k
    ) -> Option<Input<'i>> {
1550
        struct QueryPartIter<'i, 'p> {
1551
            is_url_parser: bool,
1552
            input: Input<'i>,
1553
            violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
1554
        }
1555
1556
        impl<'i> Iterator for QueryPartIter<'i, '_> {
1557
            type Item = (&'i str, bool);
1558
1559
83.4k
            fn next(&mut self) -> Option<Self::Item> {
1560
83.4k
                let start = self.input.chars.as_str();
1561
                // bypass self.input.next() in order to get string slices
1562
                // which are faster to operate on
1563
47.9M
                while let Some(c) = self.input.chars.next() {
1564
1.07k
                    match c {
1565
                        ascii_tab_or_new_line_pattern!() => {
1566
79.8k
                            return Some((
1567
79.8k
                                &start[..start.len() - self.input.chars.as_str().len() - 1],
1568
79.8k
                                false,
1569
79.8k
                            ));
1570
                        }
1571
238
                        '#' if self.is_url_parser => {
1572
238
                            return Some((
1573
238
                                &start[..start.len() - self.input.chars.as_str().len() - 1],
1574
238
                                true,
1575
238
                            ));
1576
                        }
1577
47.8M
                        c => {
1578
47.8M
                            if let Some(vfn) = &self.violation_fn {
1579
0
                                check_url_code_point(vfn, c, &self.input);
1580
47.8M
                            }
1581
                        }
1582
                    }
1583
                }
1584
3.33k
                if start.is_empty() {
1585
1.75k
                    None
1586
                } else {
1587
1.58k
                    Some((start, false))
1588
                }
1589
83.4k
            }
1590
        }
1591
1592
1.99k
        let mut part_iter = QueryPartIter {
1593
1.99k
            is_url_parser: self.context == Context::UrlParser,
1594
1.99k
            input,
1595
1.99k
            violation_fn: self.violation_fn,
1596
1.99k
        };
1597
1.99k
        let set = if scheme_type.is_special() {
1598
501
            SPECIAL_QUERY
1599
        } else {
1600
1.48k
            QUERY
1601
        };
1602
1.99k
        let query_encoding_override = self.query_encoding_override.filter(|_| {
1603
0
            matches!(
1604
0
                &self.serialization[..scheme_end as usize],
1605
0
                "http" | "https" | "file" | "ftp"
1606
            )
1607
0
        });
1608
1609
83.4k
        while let Some((part, is_finished)) = part_iter.next() {
1610
81.6k
            match query_encoding_override {
1611
                // slightly faster to be repetitive and not convert text to Cow
1612
0
                Some(o) => self.serialization.extend(percent_encode(&o(part), set)),
1613
81.6k
                None => self
1614
81.6k
                    .serialization
1615
81.6k
                    .extend(percent_encode(part.as_bytes(), set)),
1616
            }
1617
81.6k
            if is_finished {
1618
238
                return Some(part_iter.input);
1619
81.4k
            }
1620
        }
1621
1622
1.75k
        None
1623
1.99k
    }
1624
1625
0
    fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult<Url> {
1626
0
        let before_fragment = match base_url.fragment_start {
1627
0
            Some(i) => base_url.slice(..i),
1628
0
            None => &*base_url.serialization,
1629
        };
1630
0
        debug_assert!(self.serialization.is_empty());
1631
0
        self.serialization
1632
0
            .reserve(before_fragment.len() + input.chars.as_str().len());
1633
0
        self.serialization.push_str(before_fragment);
1634
0
        self.serialization.push('#');
1635
0
        let next = input.next();
1636
0
        debug_assert!(next == Some('#'));
1637
0
        self.parse_fragment(input);
1638
        Ok(Url {
1639
0
            serialization: self.serialization,
1640
0
            fragment_start: Some(to_u32(before_fragment.len())?),
1641
            ..*base_url
1642
        })
1643
0
    }
1644
1645
1.97k
    pub fn parse_fragment(&mut self, input: Input<'_>) {
1646
        struct FragmentPartIter<'i, 'p> {
1647
            input: Input<'i>,
1648
            violation_fn: Option<&'p dyn Fn(SyntaxViolation)>,
1649
        }
1650
1651
        impl<'i> Iterator for FragmentPartIter<'i, '_> {
1652
            type Item = &'i str;
1653
1654
25.6k
            fn next(&mut self) -> Option<Self::Item> {
1655
25.6k
                let start = self.input.chars.as_str();
1656
                // bypass self.input.next() in order to get string slices
1657
                // which are faster to operate on
1658
56.0M
                while let Some(c) = self.input.chars.next() {
1659
56.0M
                    match c {
1660
                        ascii_tab_or_new_line_pattern!() => {
1661
22.1k
                            return Some(
1662
22.1k
                                &start[..start.len() - self.input.chars.as_str().len() - 1],
1663
22.1k
                            );
1664
                        }
1665
                        '\0' => {
1666
2.95M
                            if let Some(vfn) = &self.violation_fn {
1667
0
                                vfn(SyntaxViolation::NullInFragment);
1668
2.95M
                            }
1669
                        }
1670
53.0M
                        c => {
1671
53.0M
                            if let Some(vfn) = &self.violation_fn {
1672
0
                                check_url_code_point(vfn, c, &self.input);
1673
53.0M
                            }
1674
                        }
1675
                    }
1676
                }
1677
3.50k
                if start.is_empty() {
1678
1.97k
                    None
1679
                } else {
1680
1.53k
                    Some(start)
1681
                }
1682
25.6k
            }
1683
        }
1684
1685
1.97k
        let part_iter = FragmentPartIter {
1686
1.97k
            input,
1687
1.97k
            violation_fn: self.violation_fn,
1688
1.97k
        };
1689
1690
25.6k
        for part in part_iter {
1691
23.6k
            self.serialization
1692
23.6k
                .extend(utf8_percent_encode(part, FRAGMENT));
1693
23.6k
        }
1694
1.97k
    }
1695
1696
    #[inline]
1697
580M
    fn check_url_code_point(&self, c: char, input: &Input<'_>) {
1698
580M
        if let Some(vfn) = self.violation_fn {
1699
0
            check_url_code_point(vfn, c, input)
1700
580M
        }
1701
580M
    }
1702
}
1703
1704
0
fn check_url_code_point(vfn: &dyn Fn(SyntaxViolation), c: char, input: &Input<'_>) {
1705
0
    if c == '%' {
1706
0
        let mut input = input.clone();
1707
0
        if !matches!((input.next(), input.next()), (Some(a), Some(b))
1708
0
                             if a.is_ascii_hexdigit() && b.is_ascii_hexdigit())
1709
        {
1710
0
            vfn(SyntaxViolation::PercentDecode)
1711
0
        }
1712
0
    } else if !is_url_code_point(c) {
1713
0
        vfn(SyntaxViolation::NonUrlCodePoint)
1714
0
    }
1715
0
}
1716
1717
// Non URL code points:
1718
// U+0000 to U+0020 (space)
1719
// " # % < > [ \ ] ^ ` { | }
1720
// U+007F to U+009F
1721
// surrogates
1722
// U+FDD0 to U+FDEF
1723
// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1724
#[inline]
1725
0
fn is_url_code_point(c: char) -> bool {
1726
0
    matches!(c,
1727
0
        'a'..='z' |
1728
0
        'A'..='Z' |
1729
0
        '0'..='9' |
1730
        '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' |
1731
        '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' |
1732
0
        '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
1733
0
        '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' |
1734
0
        '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' |
1735
0
        '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' |
1736
0
        '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' |
1737
0
        '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' |
1738
0
        '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' |
1739
0
        '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' |
1740
0
        '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1741
0
}
1742
1743
/// https://url.spec.whatwg.org/#c0-controls-and-space
1744
#[inline]
1745
1.22M
fn c0_control_or_space(ch: char) -> bool {
1746
1.22M
    ch <= ' ' // U+0000 to U+0020
1747
1.22M
}
1748
1749
/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
1750
#[inline]
1751
1.38G
fn ascii_tab_or_new_line(ch: char) -> bool {
1752
1.38G
    matches!(ch, ascii_tab_or_new_line_pattern!())
1753
1.38G
}
1754
1755
/// https://url.spec.whatwg.org/#ascii-alpha
1756
#[inline]
1757
443k
pub fn ascii_alpha(ch: char) -> bool {
1758
443k
    ch.is_ascii_alphabetic()
1759
443k
}
1760
1761
#[inline]
1762
202k
pub fn to_u32(i: usize) -> ParseResult<u32> {
1763
202k
    if i <= u32::MAX as usize {
1764
202k
        Ok(i as u32)
1765
    } else {
1766
0
        Err(ParseError::Overflow)
1767
    }
1768
202k
}
1769
1770
162M
fn is_normalized_windows_drive_letter(segment: &str) -> bool {
1771
162M
    is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':'
1772
162M
}
1773
1774
/// Whether the scheme is file:, the path has a single segment, and that segment
1775
/// is a Windows drive letter
1776
#[inline]
1777
162M
pub fn is_windows_drive_letter(segment: &str) -> bool {
1778
162M
    segment.len() == 2 && starts_with_windows_drive_letter(segment)
1779
162M
}
1780
1781
/// Whether path starts with a root slash
1782
/// and a windows drive letter eg: "/c:" or "/a:/"
1783
13.6k
fn path_starts_with_windows_drive_letter(s: &str) -> bool {
1784
13.6k
    if let Some(c) = s.as_bytes().first() {
1785
13.6k
        matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..])
1786
    } else {
1787
0
        false
1788
    }
1789
13.6k
}
1790
1791
373k
fn starts_with_windows_drive_letter(s: &str) -> bool {
1792
373k
    s.len() >= 2
1793
371k
        && ascii_alpha(s.as_bytes()[0] as char)
1794
16.7k
        && matches!(s.as_bytes()[1], b':' | b'|')
1795
3.71k
        && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
1796
373k
}
1797
1798
/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
1799
850
fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool {
1800
850
    let mut input = input.clone();
1801
850
    match (input.next(), input.next(), input.next()) {
1802
        // its first two code points are a Windows drive letter
1803
        // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
1804
2
        (Some(a), Some(b), Some(c))
1805
431
            if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') =>
1806
        {
1807
2
            true
1808
        }
1809
        // its first two code points are a Windows drive letter
1810
        // its length is 2
1811
223
        (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true,
1812
838
        _ => false,
1813
    }
1814
850
}
1815
1816
#[inline]
1817
1.13k
fn fast_u16_to_str(
1818
1.13k
    // max 5 digits for u16 (65535)
1819
1.13k
    buffer: &mut [u8; 5],
1820
1.13k
    mut value: u16,
1821
1.13k
) -> &str {
1822
1.13k
    let mut index = buffer.len();
1823
1824
    loop {
1825
2.16k
        index -= 1;
1826
2.16k
        buffer[index] = b'0' + (value % 10) as u8;
1827
2.16k
        value /= 10;
1828
2.16k
        if value == 0 {
1829
1.13k
            break;
1830
1.02k
        }
1831
    }
1832
1833
    // SAFETY: we know the values in the buffer from the
1834
    // current index on will be a number
1835
1.13k
    unsafe { core::str::from_utf8_unchecked(&buffer[index..]) }
1836
1.13k
}