/rust/registry/src/index.crates.io-1949cf8c6b5b557f/url-2.5.8/src/host.rs
Line | Count | Source |
1 | | // Copyright 2013-2016 The rust-url developers. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | | // option. This file may not be copied, modified, or distributed |
7 | | // except according to those terms. |
8 | | |
9 | | use crate::net::{Ipv4Addr, Ipv6Addr}; |
10 | | use alloc::borrow::Cow; |
11 | | use alloc::borrow::ToOwned; |
12 | | use alloc::string::String; |
13 | | use alloc::vec::Vec; |
14 | | use core::cmp; |
15 | | use core::fmt::{self, Formatter}; |
16 | | |
17 | | use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS}; |
18 | | #[cfg(feature = "serde")] |
19 | | use serde_derive::{Deserialize, Serialize}; |
20 | | |
21 | | use crate::parser::{ParseError, ParseResult}; |
22 | | |
23 | | #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] |
24 | | #[derive(Copy, Clone, Debug, Eq, PartialEq)] |
25 | | pub(crate) enum HostInternal { |
26 | | None, |
27 | | Domain, |
28 | | Ipv4(Ipv4Addr), |
29 | | Ipv6(Ipv6Addr), |
30 | | } |
31 | | |
32 | | impl From<Host<Cow<'_, str>>> for HostInternal { |
33 | 0 | fn from(host: Host<Cow<'_, str>>) -> Self { |
34 | 0 | match host { |
35 | 0 | Host::Domain(ref s) if s.is_empty() => Self::None, |
36 | 0 | Host::Domain(_) => Self::Domain, |
37 | 0 | Host::Ipv4(address) => Self::Ipv4(address), |
38 | 0 | Host::Ipv6(address) => Self::Ipv6(address), |
39 | | } |
40 | 0 | } |
41 | | } |
42 | | |
43 | | /// The host name of an URL. |
44 | | #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] |
45 | | #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)] |
46 | | pub enum Host<S = String> { |
47 | | /// A DNS domain name, as '.' dot-separated labels. |
48 | | /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of |
49 | | /// a special URL, or percent encoded for non-special URLs. Hosts for |
50 | | /// non-special URLs are also called opaque hosts. |
51 | | Domain(S), |
52 | | |
53 | | /// An IPv4 address. |
54 | | /// `Url::host_str` returns the serialization of this address, |
55 | | /// as four decimal integers separated by `.` dots. |
56 | | Ipv4(Ipv4Addr), |
57 | | |
58 | | /// An IPv6 address. |
59 | | /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets, |
60 | | /// in the format per [RFC 5952 *A Recommendation |
61 | | /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952): |
62 | | /// lowercase hexadecimal with maximal `::` compression. |
63 | | Ipv6(Ipv6Addr), |
64 | | } |
65 | | |
66 | | impl Host<&str> { |
67 | | /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. |
68 | 0 | pub fn to_owned(&self) -> Host<String> { |
69 | 0 | match *self { |
70 | 0 | Host::Domain(domain) => Host::Domain(domain.to_owned()), |
71 | 0 | Host::Ipv4(address) => Host::Ipv4(address), |
72 | 0 | Host::Ipv6(address) => Host::Ipv6(address), |
73 | | } |
74 | 0 | } |
75 | | } |
76 | | |
77 | | impl Host<String> { |
78 | | /// Parse a host: either an IPv6 address in [] square brackets, or a domain. |
79 | | /// |
80 | | /// <https://url.spec.whatwg.org/#host-parsing> |
81 | 0 | pub fn parse(input: &str) -> Result<Self, ParseError> { |
82 | 0 | Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned()) |
83 | 0 | } |
84 | | |
85 | | /// <https://url.spec.whatwg.org/#concept-opaque-host-parser> |
86 | 0 | pub fn parse_opaque(input: &str) -> Result<Self, ParseError> { |
87 | 0 | Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned()) |
88 | 0 | } |
89 | | } |
90 | | |
91 | | impl<'a> Host<Cow<'a, str>> { |
92 | 0 | pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> { |
93 | 0 | if input.starts_with('[') { |
94 | 0 | if !input.ends_with(']') { |
95 | 0 | return Err(ParseError::InvalidIpv6Address); |
96 | 0 | } |
97 | 0 | return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); |
98 | 0 | } |
99 | 0 | let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into(); |
100 | 0 | let domain: Cow<'a, [u8]> = match domain { |
101 | 0 | Cow::Owned(v) => Cow::Owned(v), |
102 | | // if borrowed then we can use the original cow |
103 | 0 | Cow::Borrowed(_) => match input { |
104 | 0 | Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()), |
105 | 0 | Cow::Owned(input) => Cow::Owned(input.into_bytes()), |
106 | | }, |
107 | | }; |
108 | | |
109 | 0 | let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?; |
110 | | |
111 | 0 | if domain.is_empty() { |
112 | 0 | return Err(ParseError::EmptyHost); |
113 | 0 | } |
114 | | |
115 | 0 | if ends_in_a_number(&domain) { |
116 | 0 | let address = parse_ipv4addr(&domain)?; |
117 | 0 | Ok(Host::Ipv4(address)) |
118 | | } else { |
119 | 0 | Ok(Host::Domain(domain)) |
120 | | } |
121 | 0 | } |
122 | | |
123 | 0 | pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> { |
124 | 0 | if input.starts_with('[') { |
125 | 0 | if !input.ends_with(']') { |
126 | 0 | return Err(ParseError::InvalidIpv6Address); |
127 | 0 | } |
128 | 0 | return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); |
129 | 0 | } |
130 | | |
131 | 0 | let is_invalid_host_char = |c| { |
132 | 0 | matches!( |
133 | 0 | c, |
134 | | '\0' | '\t' |
135 | | | '\n' |
136 | | | '\r' |
137 | | | ' ' |
138 | | | '#' |
139 | | | '/' |
140 | | | ':' |
141 | | | '<' |
142 | | | '>' |
143 | | | '?' |
144 | | | '@' |
145 | | | '[' |
146 | | | '\\' |
147 | | | ']' |
148 | | | '^' |
149 | | | '|' |
150 | | ) |
151 | 0 | }; |
152 | | |
153 | 0 | if input.find(is_invalid_host_char).is_some() { |
154 | 0 | return Err(ParseError::InvalidDomainCharacter); |
155 | 0 | } |
156 | | |
157 | | // Call utf8_percent_encode and use the result. |
158 | | // Note: This returns Cow::Borrowed for single-item results (either from input |
159 | | // or from the static encoding table), and Cow::Owned for multi-item results. |
160 | | // We cannot distinguish between "borrowed from input" vs "borrowed from static table" |
161 | | // based on the Cow variant alone. |
162 | | Ok(Host::Domain( |
163 | 0 | match utf8_percent_encode(&input, CONTROLS).into() { |
164 | 0 | Cow::Owned(v) => Cow::Owned(v), |
165 | | // If we're borrowing, we need to check if it's the same as the input |
166 | 0 | Cow::Borrowed(v) => { |
167 | 0 | if v == &*input { |
168 | 0 | input // No encoding happened, reuse original |
169 | | } else { |
170 | 0 | Cow::Owned(v.to_owned()) // Borrowed from static table, need to own it |
171 | | } |
172 | | } |
173 | | }, |
174 | | )) |
175 | 0 | } |
176 | | |
177 | 0 | pub(crate) fn into_owned(self) -> Host<String> { |
178 | 0 | match self { |
179 | 0 | Host::Domain(s) => Host::Domain(s.into_owned()), |
180 | 0 | Host::Ipv4(ip) => Host::Ipv4(ip), |
181 | 0 | Host::Ipv6(ip) => Host::Ipv6(ip), |
182 | | } |
183 | 0 | } |
184 | | } |
185 | | |
186 | | impl<S: AsRef<str>> fmt::Display for Host<S> { |
187 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
188 | 0 | match *self { |
189 | 0 | Self::Domain(ref domain) => domain.as_ref().fmt(f), |
190 | 0 | Self::Ipv4(ref addr) => addr.fmt(f), |
191 | 0 | Self::Ipv6(ref addr) => { |
192 | 0 | f.write_str("[")?; |
193 | 0 | write_ipv6(addr, f)?; |
194 | 0 | f.write_str("]") |
195 | | } |
196 | | } |
197 | 0 | } Unexecuted instantiation: <url::host::Host<alloc::borrow::Cow<str>> as core::fmt::Display>::fmt Unexecuted instantiation: <url::host::Host as core::fmt::Display>::fmt |
198 | | } |
199 | | |
200 | | impl<S, T> PartialEq<Host<T>> for Host<S> |
201 | | where |
202 | | S: PartialEq<T>, |
203 | | { |
204 | 0 | fn eq(&self, other: &Host<T>) -> bool { |
205 | 0 | match (self, other) { |
206 | 0 | (Self::Domain(a), Host::Domain(b)) => a == b, |
207 | 0 | (Self::Ipv4(a), Host::Ipv4(b)) => a == b, |
208 | 0 | (Self::Ipv6(a), Host::Ipv6(b)) => a == b, |
209 | 0 | (_, _) => false, |
210 | | } |
211 | 0 | } Unexecuted instantiation: <url::host::Host<alloc::borrow::Cow<str>> as core::cmp::PartialEq<url::host::Host>>::eq Unexecuted instantiation: <url::host::Host<&str> as core::cmp::PartialEq>::eq |
212 | | } |
213 | | |
214 | 0 | fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result { |
215 | 0 | let segments = addr.segments(); |
216 | 0 | let (compress_start, compress_end) = longest_zero_sequence(&segments); |
217 | 0 | let mut i = 0; |
218 | 0 | while i < 8 { |
219 | 0 | if i == compress_start { |
220 | 0 | f.write_str(":")?; |
221 | 0 | if i == 0 { |
222 | 0 | f.write_str(":")?; |
223 | 0 | } |
224 | 0 | if compress_end < 8 { |
225 | 0 | i = compress_end; |
226 | 0 | } else { |
227 | 0 | break; |
228 | | } |
229 | 0 | } |
230 | 0 | write!(f, "{:x}", segments[i as usize])?; |
231 | 0 | if i < 7 { |
232 | 0 | f.write_str(":")?; |
233 | 0 | } |
234 | 0 | i += 1; |
235 | | } |
236 | 0 | Ok(()) |
237 | 0 | } |
238 | | |
239 | | // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3 |
240 | 0 | fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { |
241 | 0 | let mut longest = -1; |
242 | 0 | let mut longest_length = -1; |
243 | 0 | let mut start = -1; |
244 | | macro_rules! finish_sequence( |
245 | | ($end: expr) => { |
246 | | if start >= 0 { |
247 | | let length = $end - start; |
248 | | if length > longest_length { |
249 | | longest = start; |
250 | | longest_length = length; |
251 | | } |
252 | | } |
253 | | }; |
254 | | ); |
255 | 0 | for i in 0..8 { |
256 | 0 | if pieces[i as usize] == 0 { |
257 | 0 | if start < 0 { |
258 | 0 | start = i; |
259 | 0 | } |
260 | | } else { |
261 | 0 | finish_sequence!(i); |
262 | 0 | start = -1; |
263 | | } |
264 | | } |
265 | 0 | finish_sequence!(8); |
266 | | // https://url.spec.whatwg.org/#concept-ipv6-serializer |
267 | | // step 3: ignore lone zeroes |
268 | 0 | if longest_length < 2 { |
269 | 0 | (-1, -2) |
270 | | } else { |
271 | 0 | (longest, longest + longest_length) |
272 | | } |
273 | 0 | } |
274 | | |
275 | | /// <https://url.spec.whatwg.org/#ends-in-a-number-checker> |
276 | 0 | fn ends_in_a_number(input: &str) -> bool { |
277 | 0 | let mut parts = input.rsplit('.'); |
278 | 0 | let last = parts.next().unwrap(); |
279 | 0 | let last = if last.is_empty() { |
280 | 0 | if let Some(last) = parts.next() { |
281 | 0 | last |
282 | | } else { |
283 | 0 | return false; |
284 | | } |
285 | | } else { |
286 | 0 | last |
287 | | }; |
288 | 0 | if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) { |
289 | 0 | return true; |
290 | 0 | } |
291 | | |
292 | 0 | parse_ipv4number(last).is_ok() |
293 | 0 | } |
294 | | |
295 | | /// <https://url.spec.whatwg.org/#ipv4-number-parser> |
296 | | /// Ok(None) means the input is a valid number, but it overflows a `u32`. |
297 | 0 | fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> { |
298 | 0 | if input.is_empty() { |
299 | 0 | return Err(()); |
300 | 0 | } |
301 | | |
302 | 0 | let mut r = 10; |
303 | 0 | if input.starts_with("0x") || input.starts_with("0X") { |
304 | 0 | input = &input[2..]; |
305 | 0 | r = 16; |
306 | 0 | } else if input.len() >= 2 && input.starts_with('0') { |
307 | 0 | input = &input[1..]; |
308 | 0 | r = 8; |
309 | 0 | } |
310 | | |
311 | 0 | if input.is_empty() { |
312 | 0 | return Ok(Some(0)); |
313 | 0 | } |
314 | | |
315 | 0 | let valid_number = match r { |
316 | 0 | 8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)), |
317 | 0 | 10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()), |
318 | 0 | 16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()), |
319 | 0 | _ => false, |
320 | | }; |
321 | 0 | if !valid_number { |
322 | 0 | return Err(()); |
323 | 0 | } |
324 | | |
325 | 0 | match u32::from_str_radix(input, r) { |
326 | 0 | Ok(num) => Ok(Some(num)), |
327 | 0 | Err(_) => Ok(None), // The only possible error kind here is an integer overflow. |
328 | | // The validity of the chars in the input is checked above. |
329 | | } |
330 | 0 | } |
331 | | |
332 | | /// <https://url.spec.whatwg.org/#concept-ipv4-parser> |
333 | 0 | fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> { |
334 | 0 | let mut parts: Vec<&str> = input.split('.').collect(); |
335 | 0 | if parts.last() == Some(&"") { |
336 | 0 | parts.pop(); |
337 | 0 | } |
338 | 0 | if parts.len() > 4 { |
339 | 0 | return Err(ParseError::InvalidIpv4Address); |
340 | 0 | } |
341 | 0 | let mut numbers: Vec<u32> = Vec::new(); |
342 | 0 | for part in parts { |
343 | 0 | match parse_ipv4number(part) { |
344 | 0 | Ok(Some(n)) => numbers.push(n), |
345 | 0 | Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow |
346 | 0 | Err(()) => return Err(ParseError::InvalidIpv4Address), |
347 | | }; |
348 | | } |
349 | 0 | let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); |
350 | | // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) |
351 | 0 | if ipv4 > u32::MAX >> (8 * numbers.len() as u32) { |
352 | 0 | return Err(ParseError::InvalidIpv4Address); |
353 | 0 | } |
354 | 0 | if numbers.iter().any(|x| *x > 255) { |
355 | 0 | return Err(ParseError::InvalidIpv4Address); |
356 | 0 | } |
357 | 0 | for (counter, n) in numbers.iter().enumerate() { |
358 | 0 | ipv4 += n << (8 * (3 - counter as u32)) |
359 | | } |
360 | 0 | Ok(Ipv4Addr::from(ipv4)) |
361 | 0 | } |
362 | | |
363 | | /// <https://url.spec.whatwg.org/#concept-ipv6-parser> |
364 | 0 | fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> { |
365 | 0 | let input = input.as_bytes(); |
366 | 0 | let len = input.len(); |
367 | 0 | let mut is_ip_v4 = false; |
368 | 0 | let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; |
369 | 0 | let mut piece_pointer = 0; |
370 | 0 | let mut compress_pointer = None; |
371 | 0 | let mut i = 0; |
372 | | |
373 | 0 | if len < 2 { |
374 | 0 | return Err(ParseError::InvalidIpv6Address); |
375 | 0 | } |
376 | | |
377 | 0 | if input[0] == b':' { |
378 | 0 | if input[1] != b':' { |
379 | 0 | return Err(ParseError::InvalidIpv6Address); |
380 | 0 | } |
381 | 0 | i = 2; |
382 | 0 | piece_pointer = 1; |
383 | 0 | compress_pointer = Some(1); |
384 | 0 | } |
385 | | |
386 | 0 | while i < len { |
387 | 0 | if piece_pointer == 8 { |
388 | 0 | return Err(ParseError::InvalidIpv6Address); |
389 | 0 | } |
390 | 0 | if input[i] == b':' { |
391 | 0 | if compress_pointer.is_some() { |
392 | 0 | return Err(ParseError::InvalidIpv6Address); |
393 | 0 | } |
394 | 0 | i += 1; |
395 | 0 | piece_pointer += 1; |
396 | 0 | compress_pointer = Some(piece_pointer); |
397 | 0 | continue; |
398 | 0 | } |
399 | 0 | let start = i; |
400 | 0 | let end = cmp::min(len, start + 4); |
401 | 0 | let mut value = 0u16; |
402 | 0 | while i < end { |
403 | 0 | match (input[i] as char).to_digit(16) { |
404 | 0 | Some(digit) => { |
405 | 0 | value = value * 0x10 + digit as u16; |
406 | 0 | i += 1; |
407 | 0 | } |
408 | 0 | None => break, |
409 | | } |
410 | | } |
411 | 0 | if i < len { |
412 | 0 | match input[i] { |
413 | | b'.' => { |
414 | 0 | if i == start { |
415 | 0 | return Err(ParseError::InvalidIpv6Address); |
416 | 0 | } |
417 | 0 | i = start; |
418 | 0 | if piece_pointer > 6 { |
419 | 0 | return Err(ParseError::InvalidIpv6Address); |
420 | 0 | } |
421 | 0 | is_ip_v4 = true; |
422 | | } |
423 | | b':' => { |
424 | 0 | i += 1; |
425 | 0 | if i == len { |
426 | 0 | return Err(ParseError::InvalidIpv6Address); |
427 | 0 | } |
428 | | } |
429 | 0 | _ => return Err(ParseError::InvalidIpv6Address), |
430 | | } |
431 | 0 | } |
432 | 0 | if is_ip_v4 { |
433 | 0 | break; |
434 | 0 | } |
435 | 0 | pieces[piece_pointer] = value; |
436 | 0 | piece_pointer += 1; |
437 | | } |
438 | | |
439 | 0 | if is_ip_v4 { |
440 | 0 | if piece_pointer > 6 { |
441 | 0 | return Err(ParseError::InvalidIpv6Address); |
442 | 0 | } |
443 | 0 | let mut numbers_seen = 0; |
444 | 0 | while i < len { |
445 | 0 | if numbers_seen > 0 { |
446 | 0 | if numbers_seen < 4 && (i < len && input[i] == b'.') { |
447 | 0 | i += 1 |
448 | | } else { |
449 | 0 | return Err(ParseError::InvalidIpv6Address); |
450 | | } |
451 | 0 | } |
452 | | |
453 | 0 | let mut ipv4_piece = None; |
454 | 0 | while i < len { |
455 | 0 | let digit = match input[i] { |
456 | 0 | c @ b'0'..=b'9' => c - b'0', |
457 | 0 | _ => break, |
458 | | }; |
459 | 0 | match ipv4_piece { |
460 | 0 | None => ipv4_piece = Some(digit as u16), |
461 | 0 | Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero |
462 | 0 | Some(ref mut v) => { |
463 | 0 | *v = *v * 10 + digit as u16; |
464 | 0 | if *v > 255 { |
465 | 0 | return Err(ParseError::InvalidIpv6Address); |
466 | 0 | } |
467 | | } |
468 | | } |
469 | 0 | i += 1; |
470 | | } |
471 | | |
472 | 0 | pieces[piece_pointer] = if let Some(v) = ipv4_piece { |
473 | 0 | pieces[piece_pointer] * 0x100 + v |
474 | | } else { |
475 | 0 | return Err(ParseError::InvalidIpv6Address); |
476 | | }; |
477 | 0 | numbers_seen += 1; |
478 | | |
479 | 0 | if numbers_seen == 2 || numbers_seen == 4 { |
480 | 0 | piece_pointer += 1; |
481 | 0 | } |
482 | | } |
483 | | |
484 | 0 | if numbers_seen != 4 { |
485 | 0 | return Err(ParseError::InvalidIpv6Address); |
486 | 0 | } |
487 | 0 | } |
488 | | |
489 | 0 | if i < len { |
490 | 0 | return Err(ParseError::InvalidIpv6Address); |
491 | 0 | } |
492 | | |
493 | 0 | match compress_pointer { |
494 | 0 | Some(compress_pointer) => { |
495 | 0 | let mut swaps = piece_pointer - compress_pointer; |
496 | 0 | piece_pointer = 7; |
497 | 0 | while swaps > 0 { |
498 | 0 | pieces.swap(piece_pointer, compress_pointer + swaps - 1); |
499 | 0 | swaps -= 1; |
500 | 0 | piece_pointer -= 1; |
501 | 0 | } |
502 | | } |
503 | | _ => { |
504 | 0 | if piece_pointer != 8 { |
505 | 0 | return Err(ParseError::InvalidIpv6Address); |
506 | 0 | } |
507 | | } |
508 | | } |
509 | 0 | Ok(Ipv6Addr::new( |
510 | 0 | pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7], |
511 | 0 | )) |
512 | 0 | } |