Coverage Report

Created: 2025-05-08 06:13

/src/rust-url/url/src/host.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2013-2016 The rust-url developers.
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6
// option. This file may not be copied, modified, or distributed
7
// except according to those terms.
8
9
use crate::net::{Ipv4Addr, Ipv6Addr};
10
use alloc::borrow::Cow;
11
use alloc::borrow::ToOwned;
12
use alloc::string::String;
13
use alloc::vec::Vec;
14
use core::cmp;
15
use core::fmt::{self, Formatter};
16
17
use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
18
#[cfg(feature = "serde")]
19
use serde::{Deserialize, Serialize};
20
21
use crate::parser::{ParseError, ParseResult};
22
23
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
24
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
25
pub(crate) enum HostInternal {
26
    None,
27
    Domain,
28
    Ipv4(Ipv4Addr),
29
    Ipv6(Ipv6Addr),
30
}
31
32
impl From<Host<Cow<'_, str>>> for HostInternal {
33
16.2k
    fn from(host: Host<Cow<'_, str>>) -> HostInternal {
34
15.1k
        match host {
35
15.1k
            Host::Domain(ref s) if s.is_empty() => HostInternal::None,
36
14.7k
            Host::Domain(_) => HostInternal::Domain,
37
695
            Host::Ipv4(address) => HostInternal::Ipv4(address),
38
410
            Host::Ipv6(address) => HostInternal::Ipv6(address),
39
        }
40
16.2k
    }
41
}
42
43
/// The host name of an URL.
44
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
45
#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
46
pub enum Host<S = String> {
47
    /// A DNS domain name, as '.' dot-separated labels.
48
    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
49
    /// a special URL, or percent encoded for non-special URLs. Hosts for
50
    /// non-special URLs are also called opaque hosts.
51
    Domain(S),
52
53
    /// An IPv4 address.
54
    /// `Url::host_str` returns the serialization of this address,
55
    /// as four decimal integers separated by `.` dots.
56
    Ipv4(Ipv4Addr),
57
58
    /// An IPv6 address.
59
    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
60
    /// in the format per [RFC 5952 *A Recommendation
61
    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
62
    /// lowercase hexadecimal with maximal `::` compression.
63
    Ipv6(Ipv6Addr),
64
}
65
66
impl Host<&str> {
67
    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
68
162
    pub fn to_owned(&self) -> Host<String> {
69
162
        match *self {
70
138
            Host::Domain(domain) => Host::Domain(domain.to_owned()),
71
6
            Host::Ipv4(address) => Host::Ipv4(address),
72
18
            Host::Ipv6(address) => Host::Ipv6(address),
73
        }
74
162
    }
75
}
76
77
impl Host<String> {
78
    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
79
    ///
80
    /// <https://url.spec.whatwg.org/#host-parsing>
81
1.91k
    pub fn parse(input: &str) -> Result<Self, ParseError> {
82
1.91k
        Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
83
1.91k
    }
84
85
    /// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
86
0
    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
87
0
        Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())
88
0
    }
89
}
90
91
impl<'a> Host<Cow<'a, str>> {
92
22.2k
    pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
93
22.2k
        if input.starts_with('[') {
94
678
            if !input.ends_with(']') {
95
51
                return Err(ParseError::InvalidIpv6Address);
96
627
            }
97
627
            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
98
21.5k
        }
99
21.5k
        let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
100
21.5k
        let domain: Cow<'a, [u8]> = match domain {
101
1.04k
            Cow::Owned(v) => Cow::Owned(v),
102
            // if borrowed then we can use the original cow
103
20.4k
            Cow::Borrowed(_) => match input {
104
19.4k
                Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
105
1.06k
                Cow::Owned(input) => Cow::Owned(input.into_bytes()),
106
            },
107
        };
108
109
21.5k
        let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?;
110
111
15.6k
        if domain.is_empty() {
112
20
            return Err(ParseError::EmptyHost);
113
15.6k
        }
114
15.6k
115
15.6k
        if ends_in_a_number(&domain) {
116
972
            let address = parse_ipv4addr(&domain)?;
117
755
            Ok(Host::Ipv4(address))
118
        } else {
119
14.6k
            Ok(Host::Domain(domain))
120
        }
121
22.2k
    }
122
123
2.47k
    pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
124
2.47k
        if input.starts_with('[') {
125
113
            if !input.ends_with(']') {
126
38
                return Err(ParseError::InvalidIpv6Address);
127
75
            }
128
75
            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
129
2.36k
        }
130
2.36k
131
41.0M
        let is_invalid_host_char = |c| {
132
41.0M
            matches!(
133
41.0M
                c,
134
                '\0' | '\t'
135
                    | '\n'
136
                    | '\r'
137
                    | ' '
138
                    | '#'
139
                    | '/'
140
                    | ':'
141
                    | '<'
142
                    | '>'
143
                    | '?'
144
                    | '@'
145
                    | '['
146
                    | '\\'
147
                    | ']'
148
                    | '^'
149
                    | '|'
150
            )
151
41.0M
        };
152
153
2.36k
        if input.find(is_invalid_host_char).is_some() {
154
60
            Err(ParseError::InvalidDomainCharacter)
155
        } else {
156
            Ok(Host::Domain(
157
2.30k
                match utf8_percent_encode(&input, CONTROLS).into() {
158
791
                    Cow::Owned(v) => Cow::Owned(v),
159
                    // if we're borrowing, then we can return the original Cow
160
1.50k
                    Cow::Borrowed(_) => input,
161
                },
162
            ))
163
        }
164
2.47k
    }
165
166
1.75k
    pub(crate) fn into_owned(self) -> Host<String> {
167
1.75k
        match self {
168
1.66k
            Host::Domain(s) => Host::Domain(s.into_owned()),
169
42
            Host::Ipv4(ip) => Host::Ipv4(ip),
170
44
            Host::Ipv6(ip) => Host::Ipv6(ip),
171
        }
172
1.75k
    }
173
}
174
175
impl<S: AsRef<str>> fmt::Display for Host<S> {
176
16.5k
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
177
16.5k
        match *self {
178
15.4k
            Host::Domain(ref domain) => domain.as_ref().fmt(f),
179
719
            Host::Ipv4(ref addr) => addr.fmt(f),
180
445
            Host::Ipv6(ref addr) => {
181
445
                f.write_str("[")?;
182
445
                write_ipv6(addr, f)?;
183
445
                f.write_str("]")
184
            }
185
        }
186
16.5k
    }
<url::host::Host<alloc::borrow::Cow<str>> as core::fmt::Display>::fmt
Line
Count
Source
176
16.4k
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
177
16.4k
        match *self {
178
15.2k
            Host::Domain(ref domain) => domain.as_ref().fmt(f),
179
713
            Host::Ipv4(ref addr) => addr.fmt(f),
180
427
            Host::Ipv6(ref addr) => {
181
427
                f.write_str("[")?;
182
427
                write_ipv6(addr, f)?;
183
427
                f.write_str("]")
184
            }
185
        }
186
16.4k
    }
<url::host::Host as core::fmt::Display>::fmt
Line
Count
Source
176
162
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
177
162
        match *self {
178
138
            Host::Domain(ref domain) => domain.as_ref().fmt(f),
179
6
            Host::Ipv4(ref addr) => addr.fmt(f),
180
18
            Host::Ipv6(ref addr) => {
181
18
                f.write_str("[")?;
182
18
                write_ipv6(addr, f)?;
183
18
                f.write_str("]")
184
            }
185
        }
186
162
    }
187
}
188
189
impl<S, T> PartialEq<Host<T>> for Host<S>
190
where
191
    S: PartialEq<T>,
192
{
193
1.34k
    fn eq(&self, other: &Host<T>) -> bool {
194
1.34k
        match (self, other) {
195
1.23k
            (Host::Domain(a), Host::Domain(b)) => a == b,
196
0
            (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
197
0
            (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
198
114
            (_, _) => false,
199
        }
200
1.34k
    }
Unexecuted instantiation: <url::host::Host<alloc::borrow::Cow<str>> as core::cmp::PartialEq<url::host::Host>>::eq
<url::host::Host<&str> as core::cmp::PartialEq>::eq
Line
Count
Source
193
1.34k
    fn eq(&self, other: &Host<T>) -> bool {
194
1.34k
        match (self, other) {
195
1.23k
            (Host::Domain(a), Host::Domain(b)) => a == b,
196
0
            (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
197
0
            (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
198
114
            (_, _) => false,
199
        }
200
1.34k
    }
201
}
202
203
445
fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
204
445
    let segments = addr.segments();
205
445
    let (compress_start, compress_end) = longest_zero_sequence(&segments);
206
445
    let mut i = 0;
207
1.67k
    while i < 8 {
208
1.41k
        if i == compress_start {
209
424
            f.write_str(":")?;
210
424
            if i == 0 {
211
198
                f.write_str(":")?;
212
226
            }
213
424
            if compress_end < 8 {
214
244
                i = compress_end;
215
244
            } else {
216
180
                break;
217
            }
218
989
        }
219
1.23k
        write!(f, "{:x}", segments[i as usize])?;
220
1.23k
        if i < 7 {
221
968
            f.write_str(":")?;
222
265
        }
223
1.23k
        i += 1;
224
    }
225
445
    Ok(())
226
445
}
227
228
// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
229
445
fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
230
445
    let mut longest = -1;
231
445
    let mut longest_length = -1;
232
445
    let mut start = -1;
233
    macro_rules! finish_sequence(
234
        ($end: expr) => {
235
            if start >= 0 {
236
                let length = $end - start;
237
                if length > longest_length {
238
                    longest = start;
239
                    longest_length = length;
240
                }
241
            }
242
        };
243
    );
244
4.00k
    for i in 0..8 {
245
3.56k
        if pieces[i as usize] == 0 {
246
2.48k
            if start < 0 {
247
557
                start = i;
248
1.92k
            }
249
        } else {
250
1.07k
            finish_sequence!(i);
251
1.07k
            start = -1;
252
        }
253
    }
254
445
    finish_sequence!(8);
255
    // https://url.spec.whatwg.org/#concept-ipv6-serializer
256
    // step 3: ignore lone zeroes
257
445
    if longest_length < 2 {
258
21
        (-1, -2)
259
    } else {
260
424
        (longest, longest + longest_length)
261
    }
262
445
}
263
264
/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
265
15.6k
fn ends_in_a_number(input: &str) -> bool {
266
15.6k
    let mut parts = input.rsplit('.');
267
15.6k
    let last = parts.next().unwrap();
268
15.6k
    let last = if last.is_empty() {
269
1.50k
        if let Some(last) = parts.next() {
270
1.50k
            last
271
        } else {
272
0
            return false;
273
        }
274
    } else {
275
14.1k
        last
276
    };
277
52.5k
    if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
278
913
        return true;
279
14.7k
    }
280
14.7k
281
14.7k
    parse_ipv4number(last).is_ok()
282
15.6k
}
283
284
/// <https://url.spec.whatwg.org/#ipv4-number-parser>
285
/// Ok(None) means the input is a valid number, but it overflows a `u32`.
286
16.3k
fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
287
16.3k
    if input.is_empty() {
288
676
        return Err(());
289
15.6k
    }
290
15.6k
291
15.6k
    let mut r = 10;
292
15.6k
    if input.starts_with("0x") || input.starts_with("0X") {
293
169
        input = &input[2..];
294
169
        r = 16;
295
15.5k
    } else if input.len() >= 2 && input.starts_with('0') {
296
266
        input = &input[1..];
297
266
        r = 8;
298
15.2k
    }
299
300
15.6k
    if input.is_empty() {
301
34
        return Ok(Some(0));
302
15.6k
    }
303
304
15.6k
    let valid_number = match r {
305
27.9k
        8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
306
23.0k
        10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
307
4.75k
        16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
308
0
        _ => false,
309
    };
310
15.6k
    if !valid_number {
311
14.0k
        return Err(());
312
1.62k
    }
313
1.62k
314
1.62k
    match u32::from_str_radix(input, r) {
315
1.61k
        Ok(num) => Ok(Some(num)),
316
11
        Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
317
                            // The validity of the chars in the input is checked above.
318
    }
319
16.3k
}
320
321
/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
322
972
fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
323
972
    let mut parts: Vec<&str> = input.split('.').collect();
324
972
    if parts.last() == Some(&"") {
325
42
        parts.pop();
326
930
    }
327
972
    if parts.len() > 4 {
328
161
        return Err(ParseError::InvalidIpv4Address);
329
811
    }
330
811
    let mut numbers: Vec<u32> = Vec::new();
331
2.40k
    for part in parts {
332
1.64k
        match parse_ipv4number(part) {
333
1.59k
            Ok(Some(n)) => numbers.push(n),
334
5
            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
335
47
            Err(()) => return Err(ParseError::InvalidIpv4Address),
336
        };
337
    }
338
759
    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
339
759
    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
340
759
    if ipv4 > u32::MAX >> (8 * numbers.len() as u32) {
341
2
        return Err(ParseError::InvalidIpv4Address);
342
757
    }
343
826
    if numbers.iter().any(|x| *x > 255) {
344
2
        return Err(ParseError::InvalidIpv4Address);
345
755
    }
346
824
    for (counter, n) in numbers.iter().enumerate() {
347
824
        ipv4 += n << (8 * (3 - counter as u32))
348
    }
349
755
    Ok(Ipv4Addr::from(ipv4))
350
972
}
351
352
/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
353
702
fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
354
702
    let input = input.as_bytes();
355
702
    let len = input.len();
356
702
    let mut is_ip_v4 = false;
357
702
    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
358
702
    let mut piece_pointer = 0;
359
702
    let mut compress_pointer = None;
360
702
    let mut i = 0;
361
702
362
702
    if len < 2 {
363
14
        return Err(ParseError::InvalidIpv6Address);
364
688
    }
365
688
366
688
    if input[0] == b':' {
367
233
        if input[1] != b':' {
368
12
            return Err(ParseError::InvalidIpv6Address);
369
221
        }
370
221
        i = 2;
371
221
        piece_pointer = 1;
372
221
        compress_pointer = Some(1);
373
455
    }
374
375
2.32k
    while i < len {
376
1.83k
        if piece_pointer == 8 {
377
2
            return Err(ParseError::InvalidIpv6Address);
378
1.83k
        }
379
1.83k
        if input[i] == b':' {
380
268
            if compress_pointer.is_some() {
381
4
                return Err(ParseError::InvalidIpv6Address);
382
264
            }
383
264
            i += 1;
384
264
            piece_pointer += 1;
385
264
            compress_pointer = Some(piece_pointer);
386
264
            continue;
387
1.56k
        }
388
1.56k
        let start = i;
389
1.56k
        let end = cmp::min(len, start + 4);
390
1.56k
        let mut value = 0u16;
391
4.19k
        while i < end {
392
3.69k
            match (input[i] as char).to_digit(16) {
393
2.62k
                Some(digit) => {
394
2.62k
                    value = value * 0x10 + digit as u16;
395
2.62k
                    i += 1;
396
2.62k
                }
397
1.07k
                None => break,
398
            }
399
        }
400
1.56k
        if i < len {
401
1.26k
            match input[i] {
402
                b'.' => {
403
100
                    if i == start {
404
7
                        return Err(ParseError::InvalidIpv6Address);
405
93
                    }
406
93
                    i = start;
407
93
                    if piece_pointer > 6 {
408
0
                        return Err(ParseError::InvalidIpv6Address);
409
93
                    }
410
93
                    is_ip_v4 = true;
411
                }
412
                b':' => {
413
1.08k
                    i += 1;
414
1.08k
                    if i == len {
415
3
                        return Err(ParseError::InvalidIpv6Address);
416
1.08k
                    }
417
                }
418
79
                _ => return Err(ParseError::InvalidIpv6Address),
419
            }
420
303
        }
421
1.48k
        if is_ip_v4 {
422
93
            break;
423
1.38k
        }
424
1.38k
        pieces[piece_pointer] = value;
425
1.38k
        piece_pointer += 1;
426
    }
427
428
581
    if is_ip_v4 {
429
93
        if piece_pointer > 6 {
430
0
            return Err(ParseError::InvalidIpv6Address);
431
93
        }
432
93
        let mut numbers_seen = 0;
433
239
        while i < len {
434
224
            if numbers_seen > 0 {
435
131
                if numbers_seen < 4 && (i < len && input[i] == b'.') {
436
119
                    i += 1
437
                } else {
438
12
                    return Err(ParseError::InvalidIpv6Address);
439
                }
440
93
            }
441
442
212
            let mut ipv4_piece = None;
443
492
            while i < len {
444
444
                let digit = match input[i] {
445
316
                    c @ b'0'..=b'9' => c - b'0',
446
148
                    _ => break,
447
                };
448
296
                match ipv4_piece {
449
162
                    None => ipv4_piece = Some(digit as u16),
450
2
                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
451
132
                    Some(ref mut v) => {
452
132
                        *v = *v * 10 + digit as u16;
453
132
                        if *v > 255 {
454
14
                            return Err(ParseError::InvalidIpv6Address);
455
118
                        }
456
                    }
457
                }
458
280
                i += 1;
459
            }
460
461
196
            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
462
146
                pieces[piece_pointer] * 0x100 + v
463
            } else {
464
50
                return Err(ParseError::InvalidIpv6Address);
465
            };
466
146
            numbers_seen += 1;
467
146
468
146
            if numbers_seen == 2 || numbers_seen == 4 {
469
46
                piece_pointer += 1;
470
100
            }
471
        }
472
473
15
        if numbers_seen != 4 {
474
11
            return Err(ParseError::InvalidIpv6Address);
475
4
        }
476
488
    }
477
478
492
    if i < len {
479
0
        return Err(ParseError::InvalidIpv6Address);
480
492
    }
481
492
482
492
    match compress_pointer {
483
455
        Some(compress_pointer) => {
484
455
            let mut swaps = piece_pointer - compress_pointer;
485
455
            piece_pointer = 7;
486
1.06k
            while swaps > 0 {
487
613
                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
488
613
                swaps -= 1;
489
613
                piece_pointer -= 1;
490
613
            }
491
        }
492
        _ => {
493
37
            if piece_pointer != 8 {
494
21
                return Err(ParseError::InvalidIpv6Address);
495
16
            }
496
        }
497
    }
498
471
    Ok(Ipv6Addr::new(
499
471
        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
500
471
    ))
501
702
}