Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/_url.py: 38%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2import sys
3import warnings
4from collections.abc import Mapping, Sequence
5from enum import Enum
6from functools import _CacheInfo, lru_cache
7from ipaddress import ip_address
8from typing import (
9 TYPE_CHECKING,
10 Any,
11 NoReturn,
12 TypedDict,
13 TypeVar,
14 Union,
15 cast,
16 overload,
17)
18from urllib.parse import SplitResult, uses_relative
20import idna
21from multidict import MultiDict, MultiDictProxy, istr
22from propcache.api import under_cached_property as cached_property
24from ._parse import (
25 USES_AUTHORITY,
26 SplitURLType,
27 make_netloc,
28 query_to_pairs,
29 split_netloc,
30 split_url,
31 unsplit_result,
32)
33from ._path import normalize_path, normalize_path_segments
34from ._query import (
35 Query,
36 QueryVariable,
37 SimpleQuery,
38 get_str_query,
39 get_str_query_from_iterable,
40 get_str_query_from_sequence_iterable,
41)
42from ._quoters import (
43 FRAGMENT_QUOTER,
44 FRAGMENT_REQUOTER,
45 PATH_QUOTER,
46 PATH_REQUOTER,
47 PATH_SAFE_UNQUOTER,
48 PATH_UNQUOTER,
49 QS_UNQUOTER,
50 QUERY_QUOTER,
51 QUERY_REQUOTER,
52 QUOTER,
53 REQUOTER,
54 UNQUOTER,
55 human_quote,
56)
58DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21}
59USES_RELATIVE = frozenset(uses_relative)
61# Special schemes https://url.spec.whatwg.org/#special-scheme
62# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation
63SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp"))
66# reg-name: unreserved / pct-encoded / sub-delims
67# this pattern matches anything that is *not* in those classes. and is only used
68# on lower-cased ASCII values.
69NOT_REG_NAME = re.compile(
70 r"""
71 # any character not in the unreserved or sub-delims sets, plus %
72 # (validated with the additional check for pct-encoded sequences below)
73 [^a-z0-9\-._~!$&'()*+,;=%]
74 |
75 # % only allowed if it is part of a pct-encoded
76 # sequence of 2 hex digits.
77 %(?![0-9a-f]{2})
78 """,
79 re.VERBOSE,
80)
82_T = TypeVar("_T")
84if sys.version_info >= (3, 11):
85 from typing import Self
86else:
87 Self = Any
90class UndefinedType(Enum):
91 """Singleton type for use with not set sentinel values."""
93 _singleton = 0
96UNDEFINED = UndefinedType._singleton
99class CacheInfo(TypedDict):
100 """Host encoding cache."""
102 idna_encode: _CacheInfo
103 idna_decode: _CacheInfo
104 ip_address: _CacheInfo
105 host_validate: _CacheInfo
106 encode_host: _CacheInfo
109class _InternalURLCache(TypedDict, total=False):
110 _val: SplitURLType
111 _origin: "URL"
112 absolute: bool
113 hash: int
114 scheme: str
115 raw_authority: str
116 authority: str
117 raw_user: Union[str, None]
118 user: Union[str, None]
119 raw_password: Union[str, None]
120 password: Union[str, None]
121 raw_host: Union[str, None]
122 host: Union[str, None]
123 host_subcomponent: Union[str, None]
124 host_port_subcomponent: Union[str, None]
125 port: Union[int, None]
126 explicit_port: Union[int, None]
127 raw_path: str
128 path: str
129 _parsed_query: list[tuple[str, str]]
130 query: "MultiDictProxy[str]"
131 raw_query_string: str
132 query_string: str
133 path_qs: str
134 raw_path_qs: str
135 raw_fragment: str
136 fragment: str
137 raw_parts: tuple[str, ...]
138 parts: tuple[str, ...]
139 parent: "URL"
140 raw_name: str
141 name: str
142 raw_suffix: str
143 suffix: str
144 raw_suffixes: tuple[str, ...]
145 suffixes: tuple[str, ...]
148def rewrite_module(obj: _T) -> _T:
149 obj.__module__ = "yarl"
150 return obj
153@lru_cache
154def encode_url(url_str: str) -> "URL":
155 """Parse unencoded URL."""
156 cache: _InternalURLCache = {}
157 host: Union[str, None]
158 scheme, netloc, path, query, fragment = split_url(url_str)
159 if not netloc: # netloc
160 host = ""
161 else:
162 if ":" in netloc or "@" in netloc or "[" in netloc:
163 # Complex netloc
164 username, password, host, port = split_netloc(netloc)
165 else:
166 username = password = port = None
167 host = netloc
168 if host is None:
169 if scheme in SCHEME_REQUIRES_HOST:
170 msg = (
171 "Invalid URL: host is required for "
172 f"absolute urls with the {scheme} scheme"
173 )
174 raise ValueError(msg)
175 else:
176 host = ""
177 host = _encode_host(host, validate_host=False)
178 # Remove brackets as host encoder adds back brackets for IPv6 addresses
179 cache["raw_host"] = host[1:-1] if "[" in host else host
180 cache["explicit_port"] = port
181 if password is None and username is None:
182 # Fast path for URLs without user, password
183 netloc = host if port is None else f"{host}:{port}"
184 cache["raw_user"] = None
185 cache["raw_password"] = None
186 else:
187 raw_user = REQUOTER(username) if username else username
188 raw_password = REQUOTER(password) if password else password
189 netloc = make_netloc(raw_user, raw_password, host, port)
190 cache["raw_user"] = raw_user
191 cache["raw_password"] = raw_password
193 if path:
194 path = PATH_REQUOTER(path)
195 if netloc and "." in path:
196 path = normalize_path(path)
197 if query:
198 query = QUERY_REQUOTER(query)
199 if fragment:
200 fragment = FRAGMENT_REQUOTER(fragment)
202 cache["scheme"] = scheme
203 cache["raw_path"] = "/" if not path and netloc else path
204 cache["raw_query_string"] = query
205 cache["raw_fragment"] = fragment
207 self = object.__new__(URL)
208 self._scheme = scheme
209 self._netloc = netloc
210 self._path = path
211 self._query = query
212 self._fragment = fragment
213 self._cache = cache
214 return self
217@lru_cache
218def pre_encoded_url(url_str: str) -> "URL":
219 """Parse pre-encoded URL."""
220 self = object.__new__(URL)
221 val = split_url(url_str)
222 self._scheme, self._netloc, self._path, self._query, self._fragment = val
223 self._cache = {}
224 return self
227@lru_cache
228def build_pre_encoded_url(
229 scheme: str,
230 authority: str,
231 user: Union[str, None],
232 password: Union[str, None],
233 host: str,
234 port: Union[int, None],
235 path: str,
236 query_string: str,
237 fragment: str,
238) -> "URL":
239 """Build a pre-encoded URL from parts."""
240 self = object.__new__(URL)
241 self._scheme = scheme
242 if authority:
243 self._netloc = authority
244 elif host:
245 if port is not None:
246 port = None if port == DEFAULT_PORTS.get(scheme) else port
247 if user is None and password is None:
248 self._netloc = host if port is None else f"{host}:{port}"
249 else:
250 self._netloc = make_netloc(user, password, host, port)
251 else:
252 self._netloc = ""
253 self._path = path
254 self._query = query_string
255 self._fragment = fragment
256 self._cache = {}
257 return self
260def from_parts_uncached(
261 scheme: str, netloc: str, path: str, query: str, fragment: str
262) -> "URL":
263 """Create a new URL from parts."""
264 self = object.__new__(URL)
265 self._scheme = scheme
266 self._netloc = netloc
267 self._path = path
268 self._query = query
269 self._fragment = fragment
270 self._cache = {}
271 return self
274from_parts = lru_cache(from_parts_uncached)
277@rewrite_module
278class URL:
279 # Don't derive from str
280 # follow pathlib.Path design
281 # probably URL will not suffer from pathlib problems:
282 # it's intended for libraries like aiohttp,
283 # not to be passed into standard library functions like os.open etc.
285 # URL grammar (RFC 3986)
286 # pct-encoded = "%" HEXDIG HEXDIG
287 # reserved = gen-delims / sub-delims
288 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
289 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
290 # / "*" / "+" / "," / ";" / "="
291 # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
292 # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
293 # hier-part = "//" authority path-abempty
294 # / path-absolute
295 # / path-rootless
296 # / path-empty
297 # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
298 # authority = [ userinfo "@" ] host [ ":" port ]
299 # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
300 # host = IP-literal / IPv4address / reg-name
301 # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
302 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
303 # IPv6address = 6( h16 ":" ) ls32
304 # / "::" 5( h16 ":" ) ls32
305 # / [ h16 ] "::" 4( h16 ":" ) ls32
306 # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
307 # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
308 # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
309 # / [ *4( h16 ":" ) h16 ] "::" ls32
310 # / [ *5( h16 ":" ) h16 ] "::" h16
311 # / [ *6( h16 ":" ) h16 ] "::"
312 # ls32 = ( h16 ":" h16 ) / IPv4address
313 # ; least-significant 32 bits of address
314 # h16 = 1*4HEXDIG
315 # ; 16 bits of address represented in hexadecimal
316 # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
317 # dec-octet = DIGIT ; 0-9
318 # / %x31-39 DIGIT ; 10-99
319 # / "1" 2DIGIT ; 100-199
320 # / "2" %x30-34 DIGIT ; 200-249
321 # / "25" %x30-35 ; 250-255
322 # reg-name = *( unreserved / pct-encoded / sub-delims )
323 # port = *DIGIT
324 # path = path-abempty ; begins with "/" or is empty
325 # / path-absolute ; begins with "/" but not "//"
326 # / path-noscheme ; begins with a non-colon segment
327 # / path-rootless ; begins with a segment
328 # / path-empty ; zero characters
329 # path-abempty = *( "/" segment )
330 # path-absolute = "/" [ segment-nz *( "/" segment ) ]
331 # path-noscheme = segment-nz-nc *( "/" segment )
332 # path-rootless = segment-nz *( "/" segment )
333 # path-empty = 0<pchar>
334 # segment = *pchar
335 # segment-nz = 1*pchar
336 # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
337 # ; non-zero-length segment without any colon ":"
338 # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
339 # query = *( pchar / "/" / "?" )
340 # fragment = *( pchar / "/" / "?" )
341 # URI-reference = URI / relative-ref
342 # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
343 # relative-part = "//" authority path-abempty
344 # / path-absolute
345 # / path-noscheme
346 # / path-empty
347 # absolute-URI = scheme ":" hier-part [ "?" query ]
348 __slots__ = ("_cache", "_scheme", "_netloc", "_path", "_query", "_fragment")
350 _cache: _InternalURLCache
351 _scheme: str
352 _netloc: str
353 _path: str
354 _query: str
355 _fragment: str
357 def __new__(
358 cls,
359 val: Union[str, SplitResult, "URL", UndefinedType] = UNDEFINED,
360 *,
361 encoded: bool = False,
362 strict: Union[bool, None] = None,
363 ) -> "URL":
364 if strict is not None: # pragma: no cover
365 warnings.warn("strict parameter is ignored")
366 if type(val) is str:
367 return pre_encoded_url(val) if encoded else encode_url(val)
368 if type(val) is cls:
369 return val
370 if type(val) is SplitResult:
371 if not encoded:
372 raise ValueError("Cannot apply decoding to SplitResult")
373 return from_parts(*val)
374 if isinstance(val, str):
375 return pre_encoded_url(str(val)) if encoded else encode_url(str(val))
376 if val is UNDEFINED:
377 # Special case for UNDEFINED since it might be unpickling and we do
378 # not want to cache as the `__set_state__` call would mutate the URL
379 # object in the `pre_encoded_url` or `encoded_url` caches.
380 self = object.__new__(URL)
381 self._scheme = self._netloc = self._path = self._query = self._fragment = ""
382 self._cache = {}
383 return self
384 raise TypeError("Constructor parameter should be str")
386 @classmethod
387 def build(
388 cls,
389 *,
390 scheme: str = "",
391 authority: str = "",
392 user: Union[str, None] = None,
393 password: Union[str, None] = None,
394 host: str = "",
395 port: Union[int, None] = None,
396 path: str = "",
397 query: Union[Query, None] = None,
398 query_string: str = "",
399 fragment: str = "",
400 encoded: bool = False,
401 ) -> "URL":
402 """Creates and returns a new URL"""
404 if authority and (user or password or host or port):
405 raise ValueError(
406 'Can\'t mix "authority" with "user", "password", "host" or "port".'
407 )
408 if port is not None and not isinstance(port, int):
409 raise TypeError(f"The port is required to be int, got {type(port)!r}.")
410 if port and not host:
411 raise ValueError('Can\'t build URL with "port" but without "host".')
412 if query and query_string:
413 raise ValueError('Only one of "query" or "query_string" should be passed')
414 if (
415 scheme is None # type: ignore[redundant-expr]
416 or authority is None # type: ignore[redundant-expr]
417 or host is None # type: ignore[redundant-expr]
418 or path is None # type: ignore[redundant-expr]
419 or query_string is None # type: ignore[redundant-expr]
420 or fragment is None
421 ):
422 raise TypeError(
423 'NoneType is illegal for "scheme", "authority", "host", "path", '
424 '"query_string", and "fragment" args, use empty string instead.'
425 )
427 if query:
428 query_string = get_str_query(query) or ""
430 if encoded:
431 return build_pre_encoded_url(
432 scheme,
433 authority,
434 user,
435 password,
436 host,
437 port,
438 path,
439 query_string,
440 fragment,
441 )
443 self = object.__new__(URL)
444 self._scheme = scheme
445 _host: Union[str, None] = None
446 if authority:
447 user, password, _host, port = split_netloc(authority)
448 _host = _encode_host(_host, validate_host=False) if _host else ""
449 elif host:
450 _host = _encode_host(host, validate_host=True)
451 else:
452 self._netloc = ""
454 if _host is not None:
455 if port is not None:
456 port = None if port == DEFAULT_PORTS.get(scheme) else port
457 if user is None and password is None:
458 self._netloc = _host if port is None else f"{_host}:{port}"
459 else:
460 self._netloc = make_netloc(user, password, _host, port, True)
462 path = PATH_QUOTER(path) if path else path
463 if path and self._netloc:
464 if "." in path:
465 path = normalize_path(path)
466 if path[0] != "/":
467 msg = (
468 "Path in a URL with authority should "
469 "start with a slash ('/') if set"
470 )
471 raise ValueError(msg)
473 self._path = path
474 if not query and query_string:
475 query_string = QUERY_QUOTER(query_string)
476 self._query = query_string
477 self._fragment = FRAGMENT_QUOTER(fragment) if fragment else fragment
478 self._cache = {}
479 return self
481 def __init_subclass__(cls) -> NoReturn:
482 raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden")
484 def __str__(self) -> str:
485 if not self._path and self._netloc and (self._query or self._fragment):
486 path = "/"
487 else:
488 path = self._path
489 if (port := self.explicit_port) is not None and port == DEFAULT_PORTS.get(
490 self._scheme
491 ):
492 # port normalization - using None for default ports to remove from rendering
493 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3
494 host = self.host_subcomponent
495 netloc = make_netloc(self.raw_user, self.raw_password, host, None)
496 else:
497 netloc = self._netloc
498 return unsplit_result(self._scheme, netloc, path, self._query, self._fragment)
500 def __repr__(self) -> str:
501 return f"{self.__class__.__name__}('{str(self)}')"
503 def __bytes__(self) -> bytes:
504 return str(self).encode("ascii")
506 def __eq__(self, other: object) -> bool:
507 if type(other) is not URL:
508 return NotImplemented
510 path1 = "/" if not self._path and self._netloc else self._path
511 path2 = "/" if not other._path and other._netloc else other._path
512 return (
513 self._scheme == other._scheme
514 and self._netloc == other._netloc
515 and path1 == path2
516 and self._query == other._query
517 and self._fragment == other._fragment
518 )
520 def __hash__(self) -> int:
521 if (ret := self._cache.get("hash")) is None:
522 path = "/" if not self._path and self._netloc else self._path
523 ret = self._cache["hash"] = hash(
524 (self._scheme, self._netloc, path, self._query, self._fragment)
525 )
526 return ret
528 def __le__(self, other: object) -> bool:
529 if type(other) is not URL:
530 return NotImplemented
531 return self._val <= other._val
533 def __lt__(self, other: object) -> bool:
534 if type(other) is not URL:
535 return NotImplemented
536 return self._val < other._val
538 def __ge__(self, other: object) -> bool:
539 if type(other) is not URL:
540 return NotImplemented
541 return self._val >= other._val
543 def __gt__(self, other: object) -> bool:
544 if type(other) is not URL:
545 return NotImplemented
546 return self._val > other._val
548 def __truediv__(self, name: str) -> "URL":
549 if not isinstance(name, str):
550 return NotImplemented # type: ignore[unreachable]
551 return self._make_child((str(name),))
553 def __mod__(self, query: Query) -> "URL":
554 return self.update_query(query)
556 def __bool__(self) -> bool:
557 return bool(self._netloc or self._path or self._query or self._fragment)
559 def __getstate__(self) -> tuple[SplitResult]:
560 return (tuple.__new__(SplitResult, self._val),)
562 def __setstate__(
563 self, state: Union[tuple[SplitURLType], tuple[None, _InternalURLCache]]
564 ) -> None:
565 if state[0] is None and isinstance(state[1], dict):
566 # default style pickle
567 val = state[1]["_val"]
568 else:
569 unused: list[object]
570 val, *unused = state
571 self._scheme, self._netloc, self._path, self._query, self._fragment = val
572 self._cache = {}
574 def _cache_netloc(self) -> None:
575 """Cache the netloc parts of the URL."""
576 c = self._cache
577 split_loc = split_netloc(self._netloc)
578 c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc
580 def is_absolute(self) -> bool:
581 """A check for absolute URLs.
583 Return True for absolute ones (having scheme or starting
584 with //), False otherwise.
586 Is is preferred to call the .absolute property instead
587 as it is cached.
588 """
589 return self.absolute
591 def is_default_port(self) -> bool:
592 """A check for default port.
594 Return True if port is default for specified scheme,
595 e.g. 'http://python.org' or 'http://python.org:80', False
596 otherwise.
598 Return False for relative URLs.
600 """
601 if (explicit := self.explicit_port) is None:
602 # If the explicit port is None, then the URL must be
603 # using the default port unless its a relative URL
604 # which does not have an implicit port / default port
605 return self._netloc != ""
606 return explicit == DEFAULT_PORTS.get(self._scheme)
608 def origin(self) -> "URL":
609 """Return an URL with scheme, host and port parts only.
611 user, password, path, query and fragment are removed.
613 """
614 # TODO: add a keyword-only option for keeping user/pass maybe?
615 return self._origin
617 @cached_property
618 def _val(self) -> SplitURLType:
619 return (self._scheme, self._netloc, self._path, self._query, self._fragment)
621 @cached_property
622 def _origin(self) -> "URL":
623 """Return an URL with scheme, host and port parts only.
625 user, password, path, query and fragment are removed.
626 """
627 if not (netloc := self._netloc):
628 raise ValueError("URL should be absolute")
629 if not (scheme := self._scheme):
630 raise ValueError("URL should have scheme")
631 if "@" in netloc:
632 encoded_host = self.host_subcomponent
633 netloc = make_netloc(None, None, encoded_host, self.explicit_port)
634 elif not self._path and not self._query and not self._fragment:
635 return self
636 return from_parts(scheme, netloc, "", "", "")
638 def relative(self) -> "URL":
639 """Return a relative part of the URL.
641 scheme, user, password, host and port are removed.
643 """
644 if not self._netloc:
645 raise ValueError("URL should be absolute")
646 return from_parts("", "", self._path, self._query, self._fragment)
648 @cached_property
649 def absolute(self) -> bool:
650 """A check for absolute URLs.
652 Return True for absolute ones (having scheme or starting
653 with //), False otherwise.
655 """
656 # `netloc`` is an empty string for relative URLs
657 # Checking `netloc` is faster than checking `hostname`
658 # because `hostname` is a property that does some extra work
659 # to parse the host from the `netloc`
660 return self._netloc != ""
662 @cached_property
663 def scheme(self) -> str:
664 """Scheme for absolute URLs.
666 Empty string for relative URLs or URLs starting with //
668 """
669 return self._scheme
671 @cached_property
672 def raw_authority(self) -> str:
673 """Encoded authority part of URL.
675 Empty string for relative URLs.
677 """
678 return self._netloc
680 @cached_property
681 def authority(self) -> str:
682 """Decoded authority part of URL.
684 Empty string for relative URLs.
686 """
687 return make_netloc(self.user, self.password, self.host, self.port)
689 @cached_property
690 def raw_user(self) -> Union[str, None]:
691 """Encoded user part of URL.
693 None if user is missing.
695 """
696 # not .username
697 self._cache_netloc()
698 return self._cache["raw_user"]
700 @cached_property
701 def user(self) -> Union[str, None]:
702 """Decoded user part of URL.
704 None if user is missing.
706 """
707 if (raw_user := self.raw_user) is None:
708 return None
709 return UNQUOTER(raw_user)
711 @cached_property
712 def raw_password(self) -> Union[str, None]:
713 """Encoded password part of URL.
715 None if password is missing.
717 """
718 self._cache_netloc()
719 return self._cache["raw_password"]
721 @cached_property
722 def password(self) -> Union[str, None]:
723 """Decoded password part of URL.
725 None if password is missing.
727 """
728 if (raw_password := self.raw_password) is None:
729 return None
730 return UNQUOTER(raw_password)
732 @cached_property
733 def raw_host(self) -> Union[str, None]:
734 """Encoded host part of URL.
736 None for relative URLs.
738 When working with IPv6 addresses, use the `host_subcomponent` property instead
739 as it will return the host subcomponent with brackets.
740 """
741 # Use host instead of hostname for sake of shortness
742 # May add .hostname prop later
743 self._cache_netloc()
744 return self._cache["raw_host"]
746 @cached_property
747 def host(self) -> Union[str, None]:
748 """Decoded host part of URL.
750 None for relative URLs.
752 """
753 if (raw := self.raw_host) is None:
754 return None
755 if raw and raw[-1].isdigit() or ":" in raw:
756 # IP addresses are never IDNA encoded
757 return raw
758 return _idna_decode(raw)
760 @cached_property
761 def host_subcomponent(self) -> Union[str, None]:
762 """Return the host subcomponent part of URL.
764 None for relative URLs.
766 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
768 `IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
770 Examples:
771 - `http://example.com:8080` -> `example.com`
772 - `http://example.com:80` -> `example.com`
773 - `https://127.0.0.1:8443` -> `127.0.0.1`
774 - `https://[::1]:8443` -> `[::1]`
775 - `http://[::1]` -> `[::1]`
777 """
778 if (raw := self.raw_host) is None:
779 return None
780 return f"[{raw}]" if ":" in raw else raw
782 @cached_property
783 def host_port_subcomponent(self) -> Union[str, None]:
784 """Return the host and port subcomponent part of URL.
786 Trailing dots are removed from the host part.
788 This value is suitable for use in the Host header of an HTTP request.
790 None for relative URLs.
792 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
793 `IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
794 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3
795 port = *DIGIT
797 Examples:
798 - `http://example.com:8080` -> `example.com:8080`
799 - `http://example.com:80` -> `example.com`
800 - `http://example.com.:80` -> `example.com`
801 - `https://127.0.0.1:8443` -> `127.0.0.1:8443`
802 - `https://[::1]:8443` -> `[::1]:8443`
803 - `http://[::1]` -> `[::1]`
805 """
806 if (raw := self.raw_host) is None:
807 return None
808 if raw[-1] == ".":
809 # Remove all trailing dots from the netloc as while
810 # they are valid FQDNs in DNS, TLS validation fails.
811 # See https://github.com/aio-libs/aiohttp/issues/3636.
812 # To avoid string manipulation we only call rstrip if
813 # the last character is a dot.
814 raw = raw.rstrip(".")
815 port = self.explicit_port
816 if port is None or port == DEFAULT_PORTS.get(self._scheme):
817 return f"[{raw}]" if ":" in raw else raw
818 return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}"
820 @cached_property
821 def port(self) -> Union[int, None]:
822 """Port part of URL, with scheme-based fallback.
824 None for relative URLs or URLs without explicit port and
825 scheme without default port substitution.
827 """
828 if (explicit_port := self.explicit_port) is not None:
829 return explicit_port
830 return DEFAULT_PORTS.get(self._scheme)
832 @cached_property
833 def explicit_port(self) -> Union[int, None]:
834 """Port part of URL, without scheme-based fallback.
836 None for relative URLs or URLs without explicit port.
838 """
839 self._cache_netloc()
840 return self._cache["explicit_port"]
842 @cached_property
843 def raw_path(self) -> str:
844 """Encoded path of URL.
846 / for absolute URLs without path part.
848 """
849 return self._path if self._path or not self._netloc else "/"
851 @cached_property
852 def path(self) -> str:
853 """Decoded path of URL.
855 / for absolute URLs without path part.
857 """
858 return PATH_UNQUOTER(self._path) if self._path else "/" if self._netloc else ""
860 @cached_property
861 def path_safe(self) -> str:
862 """Decoded path of URL.
864 / for absolute URLs without path part.
866 / (%2F) and % (%25) are not decoded
868 """
869 if self._path:
870 return PATH_SAFE_UNQUOTER(self._path)
871 return "/" if self._netloc else ""
873 @cached_property
874 def _parsed_query(self) -> list[tuple[str, str]]:
875 """Parse query part of URL."""
876 return query_to_pairs(self._query)
878 @cached_property
879 def query(self) -> "MultiDictProxy[str]":
880 """A MultiDictProxy representing parsed query parameters in decoded
881 representation.
883 Empty value if URL has no query part.
885 """
886 return MultiDictProxy(MultiDict(self._parsed_query))
888 @cached_property
889 def raw_query_string(self) -> str:
890 """Encoded query part of URL.
892 Empty string if query is missing.
894 """
895 return self._query
897 @cached_property
898 def query_string(self) -> str:
899 """Decoded query part of URL.
901 Empty string if query is missing.
903 """
904 return QS_UNQUOTER(self._query) if self._query else ""
906 @cached_property
907 def path_qs(self) -> str:
908 """Decoded path of URL with query."""
909 return self.path if not (q := self.query_string) else f"{self.path}?{q}"
911 @cached_property
912 def raw_path_qs(self) -> str:
913 """Encoded path of URL with query."""
914 if q := self._query:
915 return f"{self._path}?{q}" if self._path or not self._netloc else f"/?{q}"
916 return self._path if self._path or not self._netloc else "/"
918 @cached_property
919 def raw_fragment(self) -> str:
920 """Encoded fragment part of URL.
922 Empty string if fragment is missing.
924 """
925 return self._fragment
927 @cached_property
928 def fragment(self) -> str:
929 """Decoded fragment part of URL.
931 Empty string if fragment is missing.
933 """
934 return UNQUOTER(self._fragment) if self._fragment else ""
936 @cached_property
937 def raw_parts(self) -> tuple[str, ...]:
938 """A tuple containing encoded *path* parts.
940 ('/',) for absolute URLs if *path* is missing.
942 """
943 path = self._path
944 if self._netloc:
945 return ("/", *path[1:].split("/")) if path else ("/",)
946 if path and path[0] == "/":
947 return ("/", *path[1:].split("/"))
948 return tuple(path.split("/"))
950 @cached_property
951 def parts(self) -> tuple[str, ...]:
952 """A tuple containing decoded *path* parts.
954 ('/',) for absolute URLs if *path* is missing.
956 """
957 return tuple(UNQUOTER(part) for part in self.raw_parts)
959 @cached_property
960 def parent(self) -> "URL":
961 """A new URL with last part of path removed and cleaned up query and
962 fragment.
964 """
965 path = self._path
966 if not path or path == "/":
967 if self._fragment or self._query:
968 return from_parts(self._scheme, self._netloc, path, "", "")
969 return self
970 parts = path.split("/")
971 return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "")
973 @cached_property
974 def raw_name(self) -> str:
975 """The last part of raw_parts."""
976 parts = self.raw_parts
977 if not self._netloc:
978 return parts[-1]
979 parts = parts[1:]
980 return parts[-1] if parts else ""
982 @cached_property
983 def name(self) -> str:
984 """The last part of parts."""
985 return UNQUOTER(self.raw_name)
987 @cached_property
988 def raw_suffix(self) -> str:
989 name = self.raw_name
990 i = name.rfind(".")
991 return name[i:] if 0 < i < len(name) - 1 else ""
993 @cached_property
994 def suffix(self) -> str:
995 return UNQUOTER(self.raw_suffix)
997 @cached_property
998 def raw_suffixes(self) -> tuple[str, ...]:
999 name = self.raw_name
1000 if name.endswith("."):
1001 return ()
1002 name = name.lstrip(".")
1003 return tuple("." + suffix for suffix in name.split(".")[1:])
1005 @cached_property
1006 def suffixes(self) -> tuple[str, ...]:
1007 return tuple(UNQUOTER(suffix) for suffix in self.raw_suffixes)
1009 def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL":
1010 """
1011 add paths to self._path, accounting for absolute vs relative paths,
1012 keep existing, but do not create new, empty segments
1013 """
1014 parsed: list[str] = []
1015 needs_normalize: bool = False
1016 for idx, path in enumerate(reversed(paths)):
1017 # empty segment of last is not removed
1018 last = idx == 0
1019 if path and path[0] == "/":
1020 raise ValueError(
1021 f"Appending path {path!r} starting from slash is forbidden"
1022 )
1023 # We need to quote the path if it is not already encoded
1024 # This cannot be done at the end because the existing
1025 # path is already quoted and we do not want to double quote
1026 # the existing path.
1027 path = path if encoded else PATH_QUOTER(path)
1028 needs_normalize |= "." in path
1029 segments = path.split("/")
1030 segments.reverse()
1031 # remove trailing empty segment for all but the last path
1032 parsed += segments[1:] if not last and segments[0] == "" else segments
1034 if (path := self._path) and (old_segments := path.split("/")):
1035 # If the old path ends with a slash, the last segment is an empty string
1036 # and should be removed before adding the new path segments.
1037 old = old_segments[:-1] if old_segments[-1] == "" else old_segments
1038 old.reverse()
1039 parsed += old
1041 # If the netloc is present, inject a leading slash when adding a
1042 # path to an absolute URL where there was none before.
1043 if (netloc := self._netloc) and parsed and parsed[-1] != "":
1044 parsed.append("")
1046 parsed.reverse()
1047 if not netloc or not needs_normalize:
1048 return from_parts(self._scheme, netloc, "/".join(parsed), "", "")
1050 path = "/".join(normalize_path_segments(parsed))
1051 # If normalizing the path segments removed the leading slash, add it back.
1052 if path and path[0] != "/":
1053 path = f"/{path}"
1054 return from_parts(self._scheme, netloc, path, "", "")
1056 def with_scheme(self, scheme: str) -> "URL":
1057 """Return a new URL with scheme replaced."""
1058 # N.B. doesn't cleanup query/fragment
1059 if not isinstance(scheme, str):
1060 raise TypeError("Invalid scheme type")
1061 lower_scheme = scheme.lower()
1062 netloc = self._netloc
1063 if not netloc and lower_scheme in SCHEME_REQUIRES_HOST:
1064 msg = (
1065 "scheme replacement is not allowed for "
1066 f"relative URLs for the {lower_scheme} scheme"
1067 )
1068 raise ValueError(msg)
1069 return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment)
1071 def with_user(self, user: Union[str, None]) -> "URL":
1072 """Return a new URL with user replaced.
1074 Autoencode user if needed.
1076 Clear user/password if user is None.
1078 """
1079 # N.B. doesn't cleanup query/fragment
1080 if user is None:
1081 password = None
1082 elif isinstance(user, str):
1083 user = QUOTER(user)
1084 password = self.raw_password
1085 else:
1086 raise TypeError("Invalid user type")
1087 if not (netloc := self._netloc):
1088 raise ValueError("user replacement is not allowed for relative URLs")
1089 encoded_host = self.host_subcomponent or ""
1090 netloc = make_netloc(user, password, encoded_host, self.explicit_port)
1091 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1093 def with_password(self, password: Union[str, None]) -> "URL":
1094 """Return a new URL with password replaced.
1096 Autoencode password if needed.
1098 Clear password if argument is None.
1100 """
1101 # N.B. doesn't cleanup query/fragment
1102 if password is None:
1103 pass
1104 elif isinstance(password, str):
1105 password = QUOTER(password)
1106 else:
1107 raise TypeError("Invalid password type")
1108 if not (netloc := self._netloc):
1109 raise ValueError("password replacement is not allowed for relative URLs")
1110 encoded_host = self.host_subcomponent or ""
1111 port = self.explicit_port
1112 netloc = make_netloc(self.raw_user, password, encoded_host, port)
1113 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1115 def with_host(self, host: str) -> "URL":
1116 """Return a new URL with host replaced.
1118 Autoencode host if needed.
1120 Changing host for relative URLs is not allowed, use .join()
1121 instead.
1123 """
1124 # N.B. doesn't cleanup query/fragment
1125 if not isinstance(host, str):
1126 raise TypeError("Invalid host type")
1127 if not (netloc := self._netloc):
1128 raise ValueError("host replacement is not allowed for relative URLs")
1129 if not host:
1130 raise ValueError("host removing is not allowed")
1131 encoded_host = _encode_host(host, validate_host=True) if host else ""
1132 port = self.explicit_port
1133 netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
1134 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1136 def with_port(self, port: Union[int, None]) -> "URL":
1137 """Return a new URL with port replaced.
1139 Clear port to default if None is passed.
1141 """
1142 # N.B. doesn't cleanup query/fragment
1143 if port is not None:
1144 if isinstance(port, bool) or not isinstance(port, int):
1145 raise TypeError(f"port should be int or None, got {type(port)}")
1146 if not (0 <= port <= 65535):
1147 raise ValueError(f"port must be between 0 and 65535, got {port}")
1148 if not (netloc := self._netloc):
1149 raise ValueError("port replacement is not allowed for relative URLs")
1150 encoded_host = self.host_subcomponent or ""
1151 netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
1152 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1154 def with_path(
1155 self,
1156 path: str,
1157 *,
1158 encoded: bool = False,
1159 keep_query: bool = False,
1160 keep_fragment: bool = False,
1161 ) -> "URL":
1162 """Return a new URL with path replaced."""
1163 netloc = self._netloc
1164 if not encoded:
1165 path = PATH_QUOTER(path)
1166 if netloc:
1167 path = normalize_path(path) if "." in path else path
1168 if path and path[0] != "/":
1169 path = f"/{path}"
1170 query = self._query if keep_query else ""
1171 fragment = self._fragment if keep_fragment else ""
1172 return from_parts(self._scheme, netloc, path, query, fragment)
1174 @overload
1175 def with_query(self, query: Query) -> "URL": ...
1177 @overload
1178 def with_query(self, **kwargs: QueryVariable) -> "URL": ...
1180 def with_query(self, *args: Any, **kwargs: Any) -> "URL":
1181 """Return a new URL with query part replaced.
1183 Accepts any Mapping (e.g. dict, multidict.MultiDict instances)
1184 or str, autoencode the argument if needed.
1186 A sequence of (key, value) pairs is supported as well.
1188 It also can take an arbitrary number of keyword arguments.
1190 Clear query if None is passed.
1192 """
1193 # N.B. doesn't cleanup query/fragment
1194 query = get_str_query(*args, **kwargs) or ""
1195 return from_parts_uncached(
1196 self._scheme, self._netloc, self._path, query, self._fragment
1197 )
1199 @overload
1200 def extend_query(self, query: Query) -> "URL": ...
1202 @overload
1203 def extend_query(self, **kwargs: QueryVariable) -> "URL": ...
1205 def extend_query(self, *args: Any, **kwargs: Any) -> "URL":
1206 """Return a new URL with query part combined with the existing.
1208 This method will not remove existing query parameters.
1210 Example:
1211 >>> url = URL('http://example.com/?a=1&b=2')
1212 >>> url.extend_query(a=3, c=4)
1213 URL('http://example.com/?a=1&b=2&a=3&c=4')
1214 """
1215 if not (new_query := get_str_query(*args, **kwargs)):
1216 return self
1217 if query := self._query:
1218 # both strings are already encoded so we can use a simple
1219 # string join
1220 query += new_query if query[-1] == "&" else f"&{new_query}"
1221 else:
1222 query = new_query
1223 return from_parts_uncached(
1224 self._scheme, self._netloc, self._path, query, self._fragment
1225 )
1227 @overload
1228 def update_query(self, query: Query) -> "URL": ...
1230 @overload
1231 def update_query(self, **kwargs: QueryVariable) -> "URL": ...
1233 def update_query(self, *args: Any, **kwargs: Any) -> "URL":
1234 """Return a new URL with query part updated.
1236 This method will overwrite existing query parameters.
1238 Example:
1239 >>> url = URL('http://example.com/?a=1&b=2')
1240 >>> url.update_query(a=3, c=4)
1241 URL('http://example.com/?a=3&b=2&c=4')
1242 """
1243 in_query: Union[
1244 str,
1245 Mapping[str, QueryVariable],
1246 Sequence[tuple[Union[str, istr], SimpleQuery]],
1247 None,
1248 ]
1249 if kwargs:
1250 if args:
1251 msg = "Either kwargs or single query parameter must be present"
1252 raise ValueError(msg)
1253 in_query = kwargs
1254 elif len(args) == 1:
1255 in_query = args[0]
1256 else:
1257 raise ValueError("Either kwargs or single query parameter must be present")
1259 if in_query is None:
1260 query = ""
1261 elif not in_query:
1262 query = self._query
1263 elif isinstance(in_query, Mapping):
1264 qm: MultiDict[QueryVariable] = MultiDict(self._parsed_query)
1265 qm.update(in_query)
1266 query = get_str_query_from_sequence_iterable(qm.items())
1267 elif isinstance(in_query, str):
1268 qstr: MultiDict[str] = MultiDict(self._parsed_query)
1269 qstr.update(query_to_pairs(in_query))
1270 query = get_str_query_from_iterable(qstr.items())
1271 elif isinstance(in_query, (bytes, bytearray, memoryview)):
1272 msg = "Invalid query type: bytes, bytearray and memoryview are forbidden"
1273 raise TypeError(msg)
1274 elif isinstance(in_query, Sequence):
1275 # We don't expect sequence values if we're given a list of pairs
1276 # already; only mappings like builtin `dict` which can't have the
1277 # same key pointing to multiple values are allowed to use
1278 # `_query_seq_pairs`.
1279 if TYPE_CHECKING:
1280 in_query = cast(
1281 Sequence[tuple[Union[str, istr], SimpleQuery]], in_query
1282 )
1283 qs: MultiDict[SimpleQuery] = MultiDict(self._parsed_query)
1284 qs.update(in_query)
1285 query = get_str_query_from_iterable(qs.items())
1286 else:
1287 raise TypeError(
1288 "Invalid query type: only str, mapping or "
1289 "sequence of (key, value) pairs is allowed"
1290 )
1291 return from_parts_uncached(
1292 self._scheme, self._netloc, self._path, query, self._fragment
1293 )
1295 def without_query_params(self, *query_params: str) -> "URL":
1296 """Remove some keys from query part and return new URL."""
1297 params_to_remove = set(query_params) & self.query.keys()
1298 if not params_to_remove:
1299 return self
1300 return self.with_query(
1301 tuple(
1302 (name, value)
1303 for name, value in self.query.items()
1304 if name not in params_to_remove
1305 )
1306 )
1308 def with_fragment(self, fragment: Union[str, None]) -> "URL":
1309 """Return a new URL with fragment replaced.
1311 Autoencode fragment if needed.
1313 Clear fragment to default if None is passed.
1315 """
1316 # N.B. doesn't cleanup query/fragment
1317 if fragment is None:
1318 raw_fragment = ""
1319 elif not isinstance(fragment, str):
1320 raise TypeError("Invalid fragment type")
1321 else:
1322 raw_fragment = FRAGMENT_QUOTER(fragment)
1323 if self._fragment == raw_fragment:
1324 return self
1325 return from_parts(
1326 self._scheme, self._netloc, self._path, self._query, raw_fragment
1327 )
1329 def with_name(
1330 self,
1331 name: str,
1332 *,
1333 keep_query: bool = False,
1334 keep_fragment: bool = False,
1335 ) -> "URL":
1336 """Return a new URL with name (last part of path) replaced.
1338 Query and fragment parts are cleaned up.
1340 Name is encoded if needed.
1342 """
1343 # N.B. DOES cleanup query/fragment
1344 if not isinstance(name, str):
1345 raise TypeError("Invalid name type")
1346 if "/" in name:
1347 raise ValueError("Slash in name is not allowed")
1348 name = PATH_QUOTER(name)
1349 if name in (".", ".."):
1350 raise ValueError(". and .. values are forbidden")
1351 parts = list(self.raw_parts)
1352 if netloc := self._netloc:
1353 if len(parts) == 1:
1354 parts.append(name)
1355 else:
1356 parts[-1] = name
1357 parts[0] = "" # replace leading '/'
1358 else:
1359 parts[-1] = name
1360 if parts[0] == "/":
1361 parts[0] = "" # replace leading '/'
1363 query = self._query if keep_query else ""
1364 fragment = self._fragment if keep_fragment else ""
1365 return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
1367 def with_suffix(
1368 self,
1369 suffix: str,
1370 *,
1371 keep_query: bool = False,
1372 keep_fragment: bool = False,
1373 ) -> "URL":
1374 """Return a new URL with suffix (file extension of name) replaced.
1376 Query and fragment parts are cleaned up.
1378 suffix is encoded if needed.
1379 """
1380 if not isinstance(suffix, str):
1381 raise TypeError("Invalid suffix type")
1382 if suffix and not suffix[0] == "." or suffix == "." or "/" in suffix:
1383 raise ValueError(f"Invalid suffix {suffix!r}")
1384 name = self.raw_name
1385 if not name:
1386 raise ValueError(f"{self!r} has an empty name")
1387 old_suffix = self.raw_suffix
1388 suffix = PATH_QUOTER(suffix)
1389 name = name + suffix if not old_suffix else name[: -len(old_suffix)] + suffix
1390 if name in (".", ".."):
1391 raise ValueError(". and .. values are forbidden")
1392 parts = list(self.raw_parts)
1393 if netloc := self._netloc:
1394 if len(parts) == 1:
1395 parts.append(name)
1396 else:
1397 parts[-1] = name
1398 parts[0] = "" # replace leading '/'
1399 else:
1400 parts[-1] = name
1401 if parts[0] == "/":
1402 parts[0] = "" # replace leading '/'
1404 query = self._query if keep_query else ""
1405 fragment = self._fragment if keep_fragment else ""
1406 return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
1408 def join(self, url: "URL") -> "URL":
1409 """Join URLs
1411 Construct a full (“absolute”) URL by combining a “base URL”
1412 (self) with another URL (url).
1414 Informally, this uses components of the base URL, in
1415 particular the addressing scheme, the network location and
1416 (part of) the path, to provide missing components in the
1417 relative URL.
1419 """
1420 if type(url) is not URL:
1421 raise TypeError("url should be URL")
1423 scheme = url._scheme or self._scheme
1424 if scheme != self._scheme or scheme not in USES_RELATIVE:
1425 return url
1427 # scheme is in uses_authority as uses_authority is a superset of uses_relative
1428 if (join_netloc := url._netloc) and scheme in USES_AUTHORITY:
1429 return from_parts(scheme, join_netloc, url._path, url._query, url._fragment)
1431 orig_path = self._path
1432 if join_path := url._path:
1433 if join_path[0] == "/":
1434 path = join_path
1435 elif not orig_path:
1436 path = f"/{join_path}"
1437 elif orig_path[-1] == "/":
1438 path = f"{orig_path}{join_path}"
1439 else:
1440 # …
1441 # and relativizing ".."
1442 # parts[0] is / for absolute urls,
1443 # this join will add a double slash there
1444 path = "/".join([*self.parts[:-1], ""]) + join_path
1445 # which has to be removed
1446 if orig_path[0] == "/":
1447 path = path[1:]
1448 path = normalize_path(path) if "." in path else path
1449 else:
1450 path = orig_path
1452 return from_parts(
1453 scheme,
1454 self._netloc,
1455 path,
1456 url._query if join_path or url._query else self._query,
1457 url._fragment if join_path or url._fragment else self._fragment,
1458 )
1460 def joinpath(self, *other: str, encoded: bool = False) -> "URL":
1461 """Return a new URL with the elements in other appended to the path."""
1462 return self._make_child(other, encoded=encoded)
1464 def human_repr(self) -> str:
1465 """Return decoded human readable string for URL representation."""
1466 user = human_quote(self.user, "#/:?@[]")
1467 password = human_quote(self.password, "#/:?@[]")
1468 if (host := self.host) and ":" in host:
1469 host = f"[{host}]"
1470 path = human_quote(self.path, "#?")
1471 if TYPE_CHECKING:
1472 assert path is not None
1473 query_string = "&".join(
1474 "{}={}".format(human_quote(k, "#&+;="), human_quote(v, "#&+;="))
1475 for k, v in self.query.items()
1476 )
1477 fragment = human_quote(self.fragment, "")
1478 if TYPE_CHECKING:
1479 assert fragment is not None
1480 netloc = make_netloc(user, password, host, self.explicit_port)
1481 return unsplit_result(self._scheme, netloc, path, query_string, fragment)
1484_DEFAULT_IDNA_SIZE = 256
1485_DEFAULT_ENCODE_SIZE = 512
1488@lru_cache(_DEFAULT_IDNA_SIZE)
1489def _idna_decode(raw: str) -> str:
1490 try:
1491 return idna.decode(raw.encode("ascii"))
1492 except UnicodeError: # e.g. '::1'
1493 return raw.encode("ascii").decode("idna")
1496@lru_cache(_DEFAULT_IDNA_SIZE)
1497def _idna_encode(host: str) -> str:
1498 try:
1499 return idna.encode(host, uts46=True).decode("ascii")
1500 except UnicodeError:
1501 return host.encode("idna").decode("ascii")
1504@lru_cache(_DEFAULT_ENCODE_SIZE)
1505def _encode_host(host: str, validate_host: bool) -> str:
1506 """Encode host part of URL."""
1507 # If the host ends with a digit or contains a colon, its likely
1508 # an IP address.
1509 if host and (host[-1].isdigit() or ":" in host):
1510 raw_ip, sep, zone = host.partition("%")
1511 # If it looks like an IP, we check with _ip_compressed_version
1512 # and fall-through if its not an IP address. This is a performance
1513 # optimization to avoid parsing IP addresses as much as possible
1514 # because it is orders of magnitude slower than almost any other
1515 # operation this library does.
1516 # Might be an IP address, check it
1517 #
1518 # IP Addresses can look like:
1519 # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
1520 # - 127.0.0.1 (last character is a digit)
1521 # - 2001:db8::ff00:42:8329 (contains a colon)
1522 # - 2001:db8::ff00:42:8329%eth0 (contains a colon)
1523 # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
1524 # have been removed before it gets here)
1525 # Rare IP Address formats are not supported per:
1526 # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
1527 #
1528 # IP parsing is slow, so its wrapped in an LRU
1529 try:
1530 ip = ip_address(raw_ip)
1531 except ValueError:
1532 pass
1533 else:
1534 # These checks should not happen in the
1535 # LRU to keep the cache size small
1536 host = ip.compressed
1537 if ip.version == 6:
1538 return f"[{host}%{zone}]" if sep else f"[{host}]"
1539 return f"{host}%{zone}" if sep else host
1541 # IDNA encoding is slow, skip it for ASCII-only strings
1542 if host.isascii():
1543 # Check for invalid characters explicitly; _idna_encode() does this
1544 # for non-ascii host names.
1545 host = host.lower()
1546 if validate_host and (invalid := NOT_REG_NAME.search(host)):
1547 value, pos, extra = invalid.group(), invalid.start(), ""
1548 if value == "@" or (value == ":" and "@" in host[pos:]):
1549 # this looks like an authority string
1550 extra = (
1551 ", if the value includes a username or password, "
1552 "use 'authority' instead of 'host'"
1553 )
1554 raise ValueError(
1555 f"Host {host!r} cannot contain {value!r} (at position {pos}){extra}"
1556 ) from None
1557 return host
1559 return _idna_encode(host)
1562@rewrite_module
1563def cache_clear() -> None:
1564 """Clear all LRU caches."""
1565 _idna_encode.cache_clear()
1566 _idna_decode.cache_clear()
1567 _encode_host.cache_clear()
1570@rewrite_module
1571def cache_info() -> CacheInfo:
1572 """Report cache statistics."""
1573 return {
1574 "idna_encode": _idna_encode.cache_info(),
1575 "idna_decode": _idna_decode.cache_info(),
1576 "ip_address": _encode_host.cache_info(),
1577 "host_validate": _encode_host.cache_info(),
1578 "encode_host": _encode_host.cache_info(),
1579 }
1582@rewrite_module
1583def cache_configure(
1584 *,
1585 idna_encode_size: Union[int, None] = _DEFAULT_IDNA_SIZE,
1586 idna_decode_size: Union[int, None] = _DEFAULT_IDNA_SIZE,
1587 ip_address_size: Union[int, None, UndefinedType] = UNDEFINED,
1588 host_validate_size: Union[int, None, UndefinedType] = UNDEFINED,
1589 encode_host_size: Union[int, None, UndefinedType] = UNDEFINED,
1590) -> None:
1591 """Configure LRU cache sizes."""
1592 global _idna_decode, _idna_encode, _encode_host
1593 # ip_address_size, host_validate_size are no longer
1594 # used, but are kept for backwards compatibility.
1595 if ip_address_size is not UNDEFINED or host_validate_size is not UNDEFINED:
1596 warnings.warn(
1597 "cache_configure() no longer accepts the "
1598 "ip_address_size or host_validate_size arguments, "
1599 "they are used to set the encode_host_size instead "
1600 "and will be removed in the future",
1601 DeprecationWarning,
1602 stacklevel=2,
1603 )
1605 if encode_host_size is not None:
1606 for size in (ip_address_size, host_validate_size):
1607 if size is None:
1608 encode_host_size = None
1609 elif encode_host_size is UNDEFINED:
1610 if size is not UNDEFINED:
1611 encode_host_size = size
1612 elif size is not UNDEFINED:
1613 if TYPE_CHECKING:
1614 assert isinstance(size, int)
1615 assert isinstance(encode_host_size, int)
1616 encode_host_size = max(size, encode_host_size)
1617 if encode_host_size is UNDEFINED:
1618 encode_host_size = _DEFAULT_ENCODE_SIZE
1620 _encode_host = lru_cache(encode_host_size)(_encode_host.__wrapped__)
1621 _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__)
1622 _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__)