Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/_url.py: 39%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2import sys
3import warnings
4from collections.abc import Mapping, Sequence
5from enum import Enum
6from functools import _CacheInfo, lru_cache
7from importlib.util import find_spec
8from ipaddress import ip_address
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 NoReturn,
13 TypedDict,
14 TypeVar,
15 Union,
16 cast,
17 overload,
18)
19from urllib.parse import SplitResult, scheme_chars, uses_relative
21import idna
22from multidict import MultiDict, MultiDictProxy, istr
23from propcache.api import under_cached_property as cached_property
25from ._parse import (
26 USES_AUTHORITY,
27 SplitURLType,
28 make_netloc,
29 query_to_pairs,
30 split_netloc,
31 split_url,
32 unsplit_result,
33)
34from ._path import normalize_path, normalize_path_segments
35from ._query import (
36 Query,
37 QueryVariable,
38 SimpleQuery,
39 get_str_query,
40 get_str_query_from_iterable,
41 get_str_query_from_sequence_iterable,
42)
43from ._quoters import (
44 FRAGMENT_QUOTER,
45 FRAGMENT_REQUOTER,
46 PATH_QUOTER,
47 PATH_REQUOTER,
48 PATH_SAFE_UNQUOTER,
49 PATH_UNQUOTER,
50 QS_UNQUOTER,
51 QUERY_QUOTER,
52 QUERY_REQUOTER,
53 QUOTER,
54 REQUOTER,
55 UNQUOTER,
56 human_quote,
57)
59# Avoid Pydantic import if not used (increases yarl's import time by 3-7x).
60HAS_PYDANTIC = find_spec("pydantic_core") is not None
61if TYPE_CHECKING:
62 from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler
63 from pydantic.json_schema import JsonSchemaValue
64 from pydantic_core import CoreSchema
67DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21}
68USES_RELATIVE = frozenset(uses_relative)
69_SCHEME_CHARS = frozenset(scheme_chars)
71# Special schemes https://url.spec.whatwg.org/#special-scheme
72# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation
73SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp"))
76# reg-name: unreserved / pct-encoded / sub-delims
77# this pattern matches anything that is *not* in those classes. and is only used
78# on lower-cased ASCII values.
79NOT_REG_NAME = re.compile(
80 r"""
81 # any character not in the unreserved or sub-delims sets, plus %
82 # (validated with the additional check for pct-encoded sequences below)
83 [^a-z0-9\-._~!$&'()*+,;=%]
84 |
85 # % only allowed if it is part of a pct-encoded
86 # sequence of 2 hex digits.
87 %(?![0-9a-f]{2})
88 """,
89 re.VERBOSE,
90)
92# Zone IDs are OS-specific text strings with no format defined by the RFCs:
93# https://datatracker.ietf.org/doc/html/rfc4007#section-11.2
94# RFC 9844 §6.3 recommends rejecting characters inappropriate for the
95# environment; for yarl we reject ASCII control characters (CTL):
96# https://datatracker.ietf.org/doc/html/rfc9844#section-6-3
97_ZONE_ID_UNSAFE_RE = re.compile(r"[\x00-\x1f\x7f]")
99_T = TypeVar("_T")
101if sys.version_info >= (3, 11):
102 from typing import Self
103else:
104 Self = Any
107class UndefinedType(Enum):
108 """Singleton type for use with not set sentinel values."""
110 _singleton = 0
113UNDEFINED = UndefinedType._singleton
116class CacheInfo(TypedDict):
117 """Host encoding cache."""
119 idna_encode: _CacheInfo
120 idna_decode: _CacheInfo
121 ip_address: _CacheInfo
122 host_validate: _CacheInfo
123 encode_host: _CacheInfo
126class _InternalURLCache(TypedDict, total=False):
127 _val: SplitURLType
128 _origin: "URL"
129 absolute: bool
130 hash: int
131 scheme: str
132 raw_authority: str
133 authority: str
134 raw_user: str | None
135 user: str | None
136 raw_password: str | None
137 password: str | None
138 raw_host: str | None
139 host: str | None
140 host_subcomponent: str | None
141 host_port_subcomponent: str | None
142 port: int | None
143 explicit_port: int | None
144 raw_path: str
145 path: str
146 _parsed_query: list[tuple[str, str]]
147 query: "MultiDictProxy[str]"
148 raw_query_string: str
149 query_string: str
150 path_qs: str
151 raw_path_qs: str
152 raw_fragment: str
153 fragment: str
154 raw_parts: tuple[str, ...]
155 parts: tuple[str, ...]
156 parent: "URL"
157 raw_name: str
158 name: str
159 raw_suffix: str
160 suffix: str
161 raw_suffixes: tuple[str, ...]
162 suffixes: tuple[str, ...]
165def rewrite_module(obj: _T) -> _T:
166 obj.__module__ = "yarl"
167 return obj
170def _encode_relative_scheme_colon(path: str) -> str:
171 """Re-encode a scheme-shaped leading ``:`` in a relative path to ``%3A``."""
172 colon_pos = path.find(":")
173 if colon_pos <= 0:
174 return path
175 for c in path[:colon_pos]:
176 if c not in _SCHEME_CHARS:
177 return path
178 return path[:colon_pos] + "%3A" + path[colon_pos + 1 :]
181@lru_cache
182def encode_url(url_str: str) -> "URL":
183 """Parse unencoded URL."""
184 cache: _InternalURLCache = {}
185 host: str | None
186 scheme, netloc, path, query, fragment = split_url(url_str)
187 if not netloc: # netloc
188 host = ""
189 else:
190 if ":" in netloc or "@" in netloc or "[" in netloc:
191 # Complex netloc
192 username, password, host, port = split_netloc(netloc)
193 else:
194 username = password = port = None
195 host = netloc
196 if host is None:
197 if scheme in SCHEME_REQUIRES_HOST:
198 msg = (
199 "Invalid URL: host is required for "
200 f"absolute urls with the {scheme} scheme"
201 )
202 raise ValueError(msg)
203 else:
204 host = ""
205 host = _encode_host(host, validate_host=False)
206 # Remove brackets as host encoder adds back brackets for IPv6 addresses
207 cache["raw_host"] = host[1:-1] if "[" in host else host
208 cache["explicit_port"] = port
209 if password is None and username is None:
210 # Fast path for URLs without user, password
211 netloc = host if port is None else f"{host}:{port}"
212 cache["raw_user"] = None
213 cache["raw_password"] = None
214 else:
215 raw_user = REQUOTER(username) if username else username
216 raw_password = REQUOTER(password) if password else password
217 netloc = make_netloc(raw_user, raw_password, host, port)
218 cache["raw_user"] = raw_user
219 cache["raw_password"] = raw_password
221 if path:
222 path = PATH_REQUOTER(path)
223 if netloc and "." in path:
224 path = normalize_path(path)
225 elif not scheme and not netloc:
226 path = _encode_relative_scheme_colon(path)
227 if query:
228 query = QUERY_REQUOTER(query)
229 if fragment:
230 fragment = FRAGMENT_REQUOTER(fragment)
232 cache["scheme"] = scheme
233 cache["raw_path"] = "/" if not path and netloc else path
234 cache["raw_query_string"] = query
235 cache["raw_fragment"] = fragment
237 self = object.__new__(URL)
238 self._scheme = scheme
239 self._netloc = netloc
240 self._path = path
241 self._query = query
242 self._fragment = fragment
243 self._cache = cache
244 return self
247@lru_cache
248def pre_encoded_url(url_str: str) -> "URL":
249 """Parse pre-encoded URL."""
250 self = object.__new__(URL)
251 val = split_url(url_str)
252 self._scheme, self._netloc, self._path, self._query, self._fragment = val
253 self._cache = {}
254 return self
257@lru_cache
258def build_pre_encoded_url(
259 scheme: str,
260 authority: str,
261 user: str | None,
262 password: str | None,
263 host: str,
264 port: int | None,
265 path: str,
266 query_string: str,
267 fragment: str,
268) -> "URL":
269 """Build a pre-encoded URL from parts."""
270 self = object.__new__(URL)
271 self._scheme = scheme
272 if authority:
273 self._netloc = authority
274 elif host:
275 if port is not None:
276 port = None if port == DEFAULT_PORTS.get(scheme) else port
277 if user is None and password is None:
278 self._netloc = host if port is None else f"{host}:{port}"
279 else:
280 self._netloc = make_netloc(user, password, host, port)
281 else:
282 self._netloc = ""
283 self._path = path
284 self._query = query_string
285 self._fragment = fragment
286 self._cache = {}
287 return self
290def from_parts_uncached(
291 scheme: str, netloc: str, path: str, query: str, fragment: str
292) -> "URL":
293 """Create a new URL from parts."""
294 self = object.__new__(URL)
295 self._scheme = scheme
296 self._netloc = netloc
297 self._path = path
298 self._query = query
299 self._fragment = fragment
300 self._cache = {}
301 return self
304from_parts = lru_cache(from_parts_uncached)
307@rewrite_module
308class URL:
309 # Don't derive from str
310 # follow pathlib.Path design
311 # probably URL will not suffer from pathlib problems:
312 # it's intended for libraries like aiohttp,
313 # not to be passed into standard library functions like os.open etc.
315 # URL grammar (RFC 3986)
316 # pct-encoded = "%" HEXDIG HEXDIG
317 # reserved = gen-delims / sub-delims
318 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
319 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
320 # / "*" / "+" / "," / ";" / "="
321 # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
322 # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
323 # hier-part = "//" authority path-abempty
324 # / path-absolute
325 # / path-rootless
326 # / path-empty
327 # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
328 # authority = [ userinfo "@" ] host [ ":" port ]
329 # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
330 # host = IP-literal / IPv4address / reg-name
331 # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
332 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
333 # IPv6address = 6( h16 ":" ) ls32
334 # / "::" 5( h16 ":" ) ls32
335 # / [ h16 ] "::" 4( h16 ":" ) ls32
336 # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
337 # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
338 # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
339 # / [ *4( h16 ":" ) h16 ] "::" ls32
340 # / [ *5( h16 ":" ) h16 ] "::" h16
341 # / [ *6( h16 ":" ) h16 ] "::"
342 # ls32 = ( h16 ":" h16 ) / IPv4address
343 # ; least-significant 32 bits of address
344 # h16 = 1*4HEXDIG
345 # ; 16 bits of address represented in hexadecimal
346 # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
347 # dec-octet = DIGIT ; 0-9
348 # / %x31-39 DIGIT ; 10-99
349 # / "1" 2DIGIT ; 100-199
350 # / "2" %x30-34 DIGIT ; 200-249
351 # / "25" %x30-35 ; 250-255
352 # reg-name = *( unreserved / pct-encoded / sub-delims )
353 # port = *DIGIT
354 # path = path-abempty ; begins with "/" or is empty
355 # / path-absolute ; begins with "/" but not "//"
356 # / path-noscheme ; begins with a non-colon segment
357 # / path-rootless ; begins with a segment
358 # / path-empty ; zero characters
359 # path-abempty = *( "/" segment )
360 # path-absolute = "/" [ segment-nz *( "/" segment ) ]
361 # path-noscheme = segment-nz-nc *( "/" segment )
362 # path-rootless = segment-nz *( "/" segment )
363 # path-empty = 0<pchar>
364 # segment = *pchar
365 # segment-nz = 1*pchar
366 # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
367 # ; non-zero-length segment without any colon ":"
368 # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
369 # query = *( pchar / "/" / "?" )
370 # fragment = *( pchar / "/" / "?" )
371 # URI-reference = URI / relative-ref
372 # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
373 # relative-part = "//" authority path-abempty
374 # / path-absolute
375 # / path-noscheme
376 # / path-empty
377 # absolute-URI = scheme ":" hier-part [ "?" query ]
378 __slots__ = ("_cache", "_scheme", "_netloc", "_path", "_query", "_fragment")
380 _cache: _InternalURLCache
381 _scheme: str
382 _netloc: str
383 _path: str
384 _query: str
385 _fragment: str
387 def __new__(
388 cls,
389 val: Union[str, SplitResult, "URL", UndefinedType] = UNDEFINED,
390 *,
391 encoded: bool = False,
392 strict: bool | None = None,
393 ) -> "URL":
394 if strict is not None: # pragma: no cover
395 warnings.warn("strict parameter is ignored")
396 if type(val) is str:
397 return pre_encoded_url(val) if encoded else encode_url(val)
398 if type(val) is cls:
399 return val
400 if type(val) is SplitResult:
401 if not encoded:
402 raise ValueError("Cannot apply decoding to SplitResult")
403 return from_parts(*val)
404 if isinstance(val, str):
405 return pre_encoded_url(str(val)) if encoded else encode_url(str(val))
406 if val is UNDEFINED:
407 # Special case for UNDEFINED since it might be unpickling and we do
408 # not want to cache as the `__set_state__` call would mutate the URL
409 # object in the `pre_encoded_url` or `encoded_url` caches.
410 self = object.__new__(URL)
411 self._scheme = self._netloc = self._path = self._query = self._fragment = ""
412 self._cache = {}
413 return self
414 raise TypeError("Constructor parameter should be str")
416 @classmethod
417 def build(
418 cls,
419 *,
420 scheme: str = "",
421 authority: str = "",
422 user: str | None = None,
423 password: str | None = None,
424 host: str = "",
425 port: int | None = None,
426 path: str = "",
427 query: Query | None = None,
428 query_string: str = "",
429 fragment: str = "",
430 encoded: bool = False,
431 ) -> "URL":
432 """Creates and returns a new URL"""
434 if authority and (user or password or host or port):
435 raise ValueError(
436 'Can\'t mix "authority" with "user", "password", "host" or "port".'
437 )
438 if port is not None and not isinstance(port, int):
439 raise TypeError(f"The port is required to be int, got {type(port)!r}.")
440 if port and not host:
441 raise ValueError('Can\'t build URL with "port" but without "host".')
442 if query and query_string:
443 raise ValueError('Only one of "query" or "query_string" should be passed')
444 if (
445 scheme is None # type: ignore[redundant-expr]
446 or authority is None # type: ignore[redundant-expr]
447 or host is None # type: ignore[redundant-expr]
448 or path is None # type: ignore[redundant-expr]
449 or query_string is None # type: ignore[redundant-expr]
450 or fragment is None
451 ):
452 raise TypeError(
453 'NoneType is illegal for "scheme", "authority", "host", "path", '
454 '"query_string", and "fragment" args, use empty string instead.'
455 )
457 if query:
458 query_string = get_str_query(query) or ""
460 if encoded:
461 return build_pre_encoded_url(
462 scheme,
463 authority,
464 user,
465 password,
466 host,
467 port,
468 path,
469 query_string,
470 fragment,
471 )
473 self = object.__new__(URL)
474 self._scheme = scheme
475 _host: str | None = None
476 if authority:
477 user, password, _host, port = split_netloc(authority)
478 _host = _encode_host(_host, validate_host=False) if _host else ""
479 elif host:
480 _host = _encode_host(host, validate_host=True)
481 else:
482 self._netloc = ""
484 if _host is not None:
485 if port is not None:
486 port = None if port == DEFAULT_PORTS.get(scheme) else port
487 if user is None and password is None:
488 self._netloc = _host if port is None else f"{_host}:{port}"
489 else:
490 self._netloc = make_netloc(user, password, _host, port, True)
492 path = PATH_QUOTER(path) if path else path
493 if path and self._netloc:
494 if "." in path:
495 path = normalize_path(path)
496 if path[0] != "/":
497 msg = (
498 "Path in a URL with authority should "
499 "start with a slash ('/') if set"
500 )
501 raise ValueError(msg)
503 self._path = path
504 if not query and query_string:
505 query_string = QUERY_QUOTER(query_string)
506 self._query = query_string
507 self._fragment = FRAGMENT_QUOTER(fragment) if fragment else fragment
508 self._cache = {}
509 return self
511 def __init_subclass__(cls) -> NoReturn:
512 raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden")
514 def __str__(self) -> str:
515 if not self._path and self._netloc and (self._query or self._fragment):
516 path = "/"
517 else:
518 path = self._path
519 if (port := self.explicit_port) is not None and port == DEFAULT_PORTS.get(
520 self._scheme
521 ):
522 # port normalization - using None for default ports to remove from rendering
523 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3
524 host = self.host_subcomponent
525 netloc = make_netloc(self.raw_user, self.raw_password, host, None)
526 else:
527 netloc = self._netloc
528 return unsplit_result(self._scheme, netloc, path, self._query, self._fragment)
530 def __repr__(self) -> str:
531 return f"{self.__class__.__name__}('{str(self)}')"
533 def __bytes__(self) -> bytes:
534 return str(self).encode("ascii")
536 def __eq__(self, other: object) -> bool:
537 if type(other) is not URL:
538 return NotImplemented
540 path1 = "/" if not self._path and self._netloc else self._path
541 path2 = "/" if not other._path and other._netloc else other._path
542 return (
543 self._scheme == other._scheme
544 and self._netloc == other._netloc
545 and path1 == path2
546 and self._query == other._query
547 and self._fragment == other._fragment
548 )
550 def __hash__(self) -> int:
551 if (ret := self._cache.get("hash")) is None:
552 path = "/" if not self._path and self._netloc else self._path
553 ret = self._cache["hash"] = hash(
554 (self._scheme, self._netloc, path, self._query, self._fragment)
555 )
556 return ret
558 def __le__(self, other: object) -> bool:
559 if type(other) is not URL:
560 return NotImplemented
561 return self._val <= other._val
563 def __lt__(self, other: object) -> bool:
564 if type(other) is not URL:
565 return NotImplemented
566 return self._val < other._val
568 def __ge__(self, other: object) -> bool:
569 if type(other) is not URL:
570 return NotImplemented
571 return self._val >= other._val
573 def __gt__(self, other: object) -> bool:
574 if type(other) is not URL:
575 return NotImplemented
576 return self._val > other._val
578 def __truediv__(self, name: str) -> "URL":
579 if not isinstance(name, str):
580 return NotImplemented
581 return self._make_child((str(name),))
583 def __mod__(self, query: Query) -> "URL":
584 return self.update_query(query)
586 def __bool__(self) -> bool:
587 return bool(self._netloc or self._path or self._query or self._fragment)
589 def __getstate__(self) -> tuple[SplitURLType]:
590 # Return a plain tuple rather than a ``SplitResult``. Constructing a
591 # ``SplitResult`` via ``tuple.__new__`` skips its ``__init__`` and on
592 # Python 3.15+ leaves ``_keep_empty`` unset, which breaks pickling: the
593 # new ``SplitResult.__getstate__`` indexes a state that ends up as
594 # ``None`` (gh-1632). ``__setstate__`` already unpacks both shapes, so
595 # pickles produced by older yarl releases (which embed a real
596 # ``SplitResult``) still load correctly.
597 return (self._val,)
599 def __setstate__(
600 self, state: tuple[SplitURLType] | tuple[None, _InternalURLCache]
601 ) -> None:
602 if state[0] is None and isinstance(state[1], dict):
603 # default style pickle
604 val = state[1]["_val"]
605 else:
606 unused: list[object]
607 val, *unused = state
608 self._scheme, self._netloc, self._path, self._query, self._fragment = val
609 self._cache = {}
611 def _cache_netloc(self) -> None:
612 """Cache the netloc parts of the URL."""
613 c = self._cache
614 split_loc = split_netloc(self._netloc)
615 c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc
617 def is_absolute(self) -> bool:
618 """A check for absolute URLs.
620 Return True for absolute ones (having scheme or starting
621 with //), False otherwise.
623 Is is preferred to call the .absolute property instead
624 as it is cached.
625 """
626 return self.absolute
628 def is_default_port(self) -> bool:
629 """A check for default port.
631 Return True if port is default for specified scheme,
632 e.g. 'http://python.org' or 'http://python.org:80', False
633 otherwise.
635 Return False for relative URLs.
637 """
638 if (explicit := self.explicit_port) is None:
639 # If the explicit port is None, then the URL must be
640 # using the default port unless its a relative URL
641 # which does not have an implicit port / default port
642 return self._netloc != ""
643 return explicit == DEFAULT_PORTS.get(self._scheme)
645 def origin(self) -> "URL":
646 """Return an URL with scheme, host and port parts only.
648 user, password, path, query and fragment are removed.
650 """
651 # TODO: add a keyword-only option for keeping user/pass maybe?
652 return self._origin
654 @cached_property
655 def _val(self) -> SplitURLType:
656 return (self._scheme, self._netloc, self._path, self._query, self._fragment)
658 @cached_property
659 def _origin(self) -> "URL":
660 """Return an URL with scheme, host and port parts only.
662 user, password, path, query and fragment are removed.
663 """
664 if not (netloc := self._netloc):
665 raise ValueError("URL should be absolute")
666 if not (scheme := self._scheme):
667 raise ValueError("URL should have scheme")
668 if "@" in netloc:
669 encoded_host = self.host_subcomponent
670 netloc = make_netloc(None, None, encoded_host, self.explicit_port)
671 elif not self._path and not self._query and not self._fragment:
672 return self
673 return from_parts(scheme, netloc, "", "", "")
675 def relative(self) -> "URL":
676 """Return a relative part of the URL.
678 scheme, user, password, host and port are removed.
680 """
681 if not self._netloc:
682 raise ValueError("URL should be absolute")
683 return from_parts("", "", self._path, self._query, self._fragment)
685 @cached_property
686 def absolute(self) -> bool:
687 """A check for absolute URLs.
689 Return True for absolute ones (having scheme or starting
690 with //), False otherwise.
692 """
693 # `netloc`` is an empty string for relative URLs
694 # Checking `netloc` is faster than checking `hostname`
695 # because `hostname` is a property that does some extra work
696 # to parse the host from the `netloc`
697 return self._netloc != ""
699 @cached_property
700 def scheme(self) -> str:
701 """Scheme for absolute URLs.
703 Empty string for relative URLs or URLs starting with //
705 """
706 return self._scheme
708 @cached_property
709 def raw_authority(self) -> str:
710 """Encoded authority part of URL.
712 Empty string for relative URLs.
714 """
715 return self._netloc
717 @cached_property
718 def authority(self) -> str:
719 """Decoded authority part of URL.
721 Empty string for relative URLs.
723 """
724 return make_netloc(self.user, self.password, self.host, self.port)
726 @cached_property
727 def raw_user(self) -> str | None:
728 """Encoded user part of URL.
730 None if user is missing.
732 """
733 # not .username
734 self._cache_netloc()
735 return self._cache["raw_user"]
737 @cached_property
738 def user(self) -> str | None:
739 """Decoded user part of URL.
741 None if user is missing.
743 """
744 if (raw_user := self.raw_user) is None:
745 return None
746 return UNQUOTER(raw_user)
748 @cached_property
749 def raw_password(self) -> str | None:
750 """Encoded password part of URL.
752 None if password is missing.
754 """
755 self._cache_netloc()
756 return self._cache["raw_password"]
758 @cached_property
759 def password(self) -> str | None:
760 """Decoded password part of URL.
762 None if password is missing.
764 """
765 if (raw_password := self.raw_password) is None:
766 return None
767 return UNQUOTER(raw_password)
769 @cached_property
770 def raw_host(self) -> str | None:
771 """Encoded host part of URL.
773 None for relative URLs.
775 When working with IPv6 addresses, use the `host_subcomponent` property instead
776 as it will return the host subcomponent with brackets.
777 """
778 # Use host instead of hostname for sake of shortness
779 # May add .hostname prop later
780 self._cache_netloc()
781 return self._cache["raw_host"]
783 @cached_property
784 def host(self) -> str | None:
785 """Decoded host part of URL.
787 None for relative URLs.
789 For IPv6 hosts that carry an RFC 6874 zone identifier, the
790 ``%25`` zone separator is decoded back to ``%``; the encoded
791 form is still available via :attr:`raw_host` and
792 :attr:`host_subcomponent`.
794 """
795 if (raw := self.raw_host) is None:
796 return None
797 if raw and raw[-1].isdigit() or ":" in raw:
798 # IP addresses are never IDNA encoded; only the RFC 6874
799 # zone separator needs to be decoded.
800 if "%25" in raw:
801 return raw.replace("%25", "%")
802 return raw
803 return _idna_decode(raw)
805 @cached_property
806 def host_subcomponent(self) -> str | None:
807 """Return the host subcomponent part of URL.
809 None for relative URLs.
811 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
813 `IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
815 Examples:
816 - `http://example.com:8080` -> `example.com`
817 - `http://example.com:80` -> `example.com`
818 - `https://127.0.0.1:8443` -> `127.0.0.1`
819 - `https://[::1]:8443` -> `[::1]`
820 - `http://[::1]` -> `[::1]`
822 """
823 if (raw := self.raw_host) is None:
824 return None
825 return f"[{raw}]" if ":" in raw else raw
827 @cached_property
828 def host_port_subcomponent(self) -> str | None:
829 """Return the host and port subcomponent part of URL.
831 Trailing dots are removed from the host part.
833 This value is suitable for use in the Host header of an HTTP request.
835 None for relative URLs.
837 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
838 `IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
839 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3
840 port = *DIGIT
842 Examples:
843 - `http://example.com:8080` -> `example.com:8080`
844 - `http://example.com:80` -> `example.com`
845 - `http://example.com.:80` -> `example.com`
846 - `https://127.0.0.1:8443` -> `127.0.0.1:8443`
847 - `https://[::1]:8443` -> `[::1]:8443`
848 - `http://[::1]` -> `[::1]`
850 """
851 if (raw := self.raw_host) is None:
852 return None
853 if raw[-1] == ".":
854 # Remove all trailing dots from the netloc as while
855 # they are valid FQDNs in DNS, TLS validation fails.
856 # See https://github.com/aio-libs/aiohttp/issues/3636.
857 # To avoid string manipulation we only call rstrip if
858 # the last character is a dot.
859 raw = raw.rstrip(".")
860 port = self.explicit_port
861 if port is None or port == DEFAULT_PORTS.get(self._scheme):
862 return f"[{raw}]" if ":" in raw else raw
863 return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}"
865 @cached_property
866 def port(self) -> int | None:
867 """Port part of URL, with scheme-based fallback.
869 None for relative URLs or URLs without explicit port and
870 scheme without default port substitution.
872 """
873 if (explicit_port := self.explicit_port) is not None:
874 return explicit_port
875 return DEFAULT_PORTS.get(self._scheme)
877 @cached_property
878 def explicit_port(self) -> int | None:
879 """Port part of URL, without scheme-based fallback.
881 None for relative URLs or URLs without explicit port.
883 """
884 self._cache_netloc()
885 return self._cache["explicit_port"]
887 @cached_property
888 def raw_path(self) -> str:
889 """Encoded path of URL.
891 / for absolute URLs without path part.
893 """
894 return self._path if self._path or not self._netloc else "/"
896 @cached_property
897 def path(self) -> str:
898 """Decoded path of URL.
900 / for absolute URLs without path part.
902 """
903 return PATH_UNQUOTER(self._path) if self._path else "/" if self._netloc else ""
905 @cached_property
906 def path_safe(self) -> str:
907 """Decoded path of URL.
909 / for absolute URLs without path part.
911 / (%2F) and % (%25) are not decoded
913 """
914 if self._path:
915 return PATH_SAFE_UNQUOTER(self._path)
916 return "/" if self._netloc else ""
918 @cached_property
919 def _parsed_query(self) -> list[tuple[str, str]]:
920 """Parse query part of URL."""
921 return query_to_pairs(self._query)
923 @cached_property
924 def query(self) -> "MultiDictProxy[str]":
925 """A MultiDictProxy representing parsed query parameters in decoded
926 representation.
928 Empty value if URL has no query part.
930 """
931 return MultiDictProxy(MultiDict(self._parsed_query))
933 @cached_property
934 def raw_query_string(self) -> str:
935 """Encoded query part of URL.
937 Empty string if query is missing.
939 """
940 return self._query
942 @cached_property
943 def query_string(self) -> str:
944 """Decoded query part of URL.
946 Empty string if query is missing.
948 """
949 return QS_UNQUOTER(self._query) if self._query else ""
951 @cached_property
952 def path_qs(self) -> str:
953 """Decoded path of URL with query."""
954 return self.path if not (q := self.query_string) else f"{self.path}?{q}"
956 @cached_property
957 def raw_path_qs(self) -> str:
958 """Encoded path of URL with query."""
959 if q := self._query:
960 return f"{self._path}?{q}" if self._path or not self._netloc else f"/?{q}"
961 return self._path if self._path or not self._netloc else "/"
963 @cached_property
964 def raw_fragment(self) -> str:
965 """Encoded fragment part of URL.
967 Empty string if fragment is missing.
969 """
970 return self._fragment
972 @cached_property
973 def fragment(self) -> str:
974 """Decoded fragment part of URL.
976 Empty string if fragment is missing.
978 """
979 return UNQUOTER(self._fragment) if self._fragment else ""
981 @cached_property
982 def raw_parts(self) -> tuple[str, ...]:
983 """A tuple containing encoded *path* parts.
985 ('/',) for absolute URLs if *path* is missing.
987 """
988 path = self._path
989 if self._netloc:
990 return ("/", *path[1:].split("/")) if path else ("/",)
991 if path and path[0] == "/":
992 return ("/", *path[1:].split("/"))
993 return tuple(path.split("/"))
995 @cached_property
996 def parts(self) -> tuple[str, ...]:
997 """A tuple containing decoded *path* parts.
999 ('/',) for absolute URLs if *path* is missing.
1001 """
1002 return tuple(UNQUOTER(part) for part in self.raw_parts)
1004 @cached_property
1005 def parent(self) -> "URL":
1006 """A new URL with last part of path removed and cleaned up query and
1007 fragment.
1009 """
1010 path = self._path
1011 if not path or path == "/":
1012 if self._fragment or self._query:
1013 return from_parts(self._scheme, self._netloc, path, "", "")
1014 return self
1015 parts = path.split("/")
1016 return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "")
1018 @cached_property
1019 def raw_name(self) -> str:
1020 """The last part of raw_parts."""
1021 parts = self.raw_parts
1022 if not self._netloc:
1023 return parts[-1]
1024 parts = parts[1:]
1025 return parts[-1] if parts else ""
1027 @cached_property
1028 def name(self) -> str:
1029 """The last part of parts."""
1030 return UNQUOTER(self.raw_name)
1032 @cached_property
1033 def raw_suffix(self) -> str:
1034 name = self.raw_name
1035 i = name.rfind(".")
1036 return name[i:] if 0 < i < len(name) - 1 else ""
1038 @cached_property
1039 def suffix(self) -> str:
1040 return UNQUOTER(self.raw_suffix)
1042 @cached_property
1043 def raw_suffixes(self) -> tuple[str, ...]:
1044 name = self.raw_name
1045 if name.endswith("."):
1046 return ()
1047 name = name.lstrip(".")
1048 return tuple("." + suffix for suffix in name.split(".")[1:])
1050 @cached_property
1051 def suffixes(self) -> tuple[str, ...]:
1052 return tuple(UNQUOTER(suffix) for suffix in self.raw_suffixes)
1054 def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL":
1055 """
1056 add paths to self._path, accounting for absolute vs relative paths,
1057 keep existing, but do not create new, empty segments
1058 """
1059 parsed: list[str] = []
1060 needs_normalize: bool = False
1061 for idx, path in enumerate(reversed(paths)):
1062 # empty segment of last is not removed
1063 last = idx == 0
1064 if path and path[0] == "/":
1065 raise ValueError(
1066 f"Appending path {path!r} starting from slash is forbidden"
1067 )
1068 # We need to quote the path if it is not already encoded
1069 # This cannot be done at the end because the existing
1070 # path is already quoted and we do not want to double quote
1071 # the existing path.
1072 path = path if encoded else PATH_QUOTER(path)
1073 needs_normalize |= "." in path
1074 segments = path.split("/")
1075 segments.reverse()
1076 # remove trailing empty segment for all but the last path
1077 parsed += segments[1:] if not last and segments[0] == "" else segments
1079 if (path := self._path) and (old_segments := path.split("/")):
1080 # If the old path ends with a slash, the last segment is an empty string
1081 # and should be removed before adding the new path segments.
1082 old = old_segments[:-1] if old_segments[-1] == "" else old_segments
1083 old.reverse()
1084 parsed += old
1086 # If the netloc is present, inject a leading slash when adding a
1087 # path to an absolute URL where there was none before.
1088 if (netloc := self._netloc) and parsed and parsed[-1] != "":
1089 parsed.append("")
1091 parsed.reverse()
1092 if not netloc or not needs_normalize:
1093 return from_parts(self._scheme, netloc, "/".join(parsed), "", "")
1095 path = "/".join(normalize_path_segments(parsed))
1096 # If normalizing the path segments removed the leading slash, add it back.
1097 if path and path[0] != "/":
1098 path = f"/{path}"
1099 return from_parts(self._scheme, netloc, path, "", "")
1101 def with_scheme(self, scheme: str) -> "URL":
1102 """Return a new URL with scheme replaced."""
1103 # N.B. doesn't cleanup query/fragment
1104 if not isinstance(scheme, str):
1105 raise TypeError("Invalid scheme type")
1106 lower_scheme = scheme.lower()
1107 netloc = self._netloc
1108 if not netloc and lower_scheme in SCHEME_REQUIRES_HOST:
1109 msg = (
1110 "scheme replacement is not allowed for "
1111 f"relative URLs for the {lower_scheme} scheme"
1112 )
1113 raise ValueError(msg)
1114 return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment)
1116 def with_user(self, user: str | None) -> "URL":
1117 """Return a new URL with user replaced.
1119 Autoencode user if needed.
1121 Clear user/password if user is None.
1123 """
1124 # N.B. doesn't cleanup query/fragment
1125 if user is None:
1126 password = None
1127 elif isinstance(user, str):
1128 user = QUOTER(user)
1129 password = self.raw_password
1130 else:
1131 raise TypeError("Invalid user type")
1132 if not (netloc := self._netloc):
1133 raise ValueError("user replacement is not allowed for relative URLs")
1134 encoded_host = self.host_subcomponent or ""
1135 netloc = make_netloc(user, password, encoded_host, self.explicit_port)
1136 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1138 def with_password(self, password: str | None) -> "URL":
1139 """Return a new URL with password replaced.
1141 Autoencode password if needed.
1143 Clear password if argument is None.
1145 """
1146 # N.B. doesn't cleanup query/fragment
1147 if password is None:
1148 pass
1149 elif isinstance(password, str):
1150 password = QUOTER(password)
1151 else:
1152 raise TypeError("Invalid password type")
1153 if not (netloc := self._netloc):
1154 raise ValueError("password replacement is not allowed for relative URLs")
1155 encoded_host = self.host_subcomponent or ""
1156 port = self.explicit_port
1157 netloc = make_netloc(self.raw_user, password, encoded_host, port)
1158 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1160 def with_host(self, host: str) -> "URL":
1161 """Return a new URL with host replaced.
1163 Autoencode host if needed.
1165 Changing host for relative URLs is not allowed, use .join()
1166 instead.
1168 """
1169 # N.B. doesn't cleanup query/fragment
1170 if not isinstance(host, str):
1171 raise TypeError("Invalid host type")
1172 if not (netloc := self._netloc):
1173 raise ValueError("host replacement is not allowed for relative URLs")
1174 if not host:
1175 raise ValueError("host removing is not allowed")
1176 encoded_host = _encode_host(host, validate_host=True) if host else ""
1177 port = self.explicit_port
1178 netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
1179 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1181 def with_port(self, port: int | None) -> "URL":
1182 """Return a new URL with port replaced.
1184 Clear port to default if None is passed.
1186 """
1187 # N.B. doesn't cleanup query/fragment
1188 if port is not None:
1189 if isinstance(port, bool) or not isinstance(port, int):
1190 raise TypeError(f"port should be int or None, got {type(port)}")
1191 if not (0 <= port <= 65535):
1192 raise ValueError(f"port must be between 0 and 65535, got {port}")
1193 if not (netloc := self._netloc):
1194 raise ValueError("port replacement is not allowed for relative URLs")
1195 encoded_host = self.host_subcomponent or ""
1196 netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
1197 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
1199 def with_path(
1200 self,
1201 path: str,
1202 *,
1203 encoded: bool = False,
1204 keep_query: bool = False,
1205 keep_fragment: bool = False,
1206 ) -> "URL":
1207 """Return a new URL with path replaced."""
1208 netloc = self._netloc
1209 if not encoded:
1210 path = PATH_QUOTER(path)
1211 if netloc:
1212 path = normalize_path(path) if "." in path else path
1213 if path and path[0] != "/":
1214 path = f"/{path}"
1215 query = self._query if keep_query else ""
1216 fragment = self._fragment if keep_fragment else ""
1217 return from_parts(self._scheme, netloc, path, query, fragment)
1219 @overload
1220 def with_query(self, query: Query) -> "URL": ...
1222 @overload
1223 def with_query(self, **kwargs: QueryVariable) -> "URL": ...
1225 def with_query(self, *args: Any, **kwargs: Any) -> "URL":
1226 """Return a new URL with query part replaced.
1228 Accepts any Mapping (e.g. dict, multidict.MultiDict instances)
1229 or str, autoencode the argument if needed.
1231 A sequence of (key, value) pairs is supported as well.
1233 It also can take an arbitrary number of keyword arguments.
1235 Clear query if None is passed.
1237 """
1238 # N.B. doesn't cleanup query/fragment
1239 query = get_str_query(*args, **kwargs) or ""
1240 return from_parts_uncached(
1241 self._scheme, self._netloc, self._path, query, self._fragment
1242 )
1244 @overload
1245 def extend_query(self, query: Query) -> "URL": ...
1247 @overload
1248 def extend_query(self, **kwargs: QueryVariable) -> "URL": ...
1250 def extend_query(self, *args: Any, **kwargs: Any) -> "URL":
1251 """Return a new URL with query part combined with the existing.
1253 This method will not remove existing query parameters.
1255 Example:
1256 >>> url = URL('http://example.com/?a=1&b=2')
1257 >>> url.extend_query(a=3, c=4)
1258 URL('http://example.com/?a=1&b=2&a=3&c=4')
1259 """
1260 if not (new_query := get_str_query(*args, **kwargs)):
1261 return self
1262 if query := self._query:
1263 # both strings are already encoded so we can use a simple
1264 # string join
1265 query += new_query if query[-1] == "&" else f"&{new_query}"
1266 else:
1267 query = new_query
1268 return from_parts_uncached(
1269 self._scheme, self._netloc, self._path, query, self._fragment
1270 )
1272 @overload
1273 def update_query(self, query: Query) -> "URL": ...
1275 @overload
1276 def update_query(self, **kwargs: QueryVariable) -> "URL": ...
1278 def update_query(self, *args: Any, **kwargs: Any) -> "URL":
1279 """Return a new URL with query part updated.
1281 This method will overwrite existing query parameters.
1283 Example:
1284 >>> url = URL('http://example.com/?a=1&b=2')
1285 >>> url.update_query(a=3, c=4)
1286 URL('http://example.com/?a=3&b=2&c=4')
1287 """
1288 in_query: (
1289 str
1290 | Mapping[str, QueryVariable]
1291 | Sequence[tuple[str | istr, SimpleQuery]]
1292 | None
1293 )
1294 if kwargs:
1295 if args:
1296 msg = "Either kwargs or single query parameter must be present"
1297 raise ValueError(msg)
1298 in_query = kwargs
1299 elif len(args) == 1:
1300 in_query = args[0]
1301 else:
1302 raise ValueError("Either kwargs or single query parameter must be present")
1304 if in_query is None:
1305 query = ""
1306 elif not in_query:
1307 query = self._query
1308 elif isinstance(in_query, Mapping):
1309 qm: MultiDict[QueryVariable] = MultiDict(self._parsed_query)
1310 qm.update(in_query)
1311 query = get_str_query_from_sequence_iterable(qm.items())
1312 elif isinstance(in_query, str):
1313 qstr: MultiDict[str] = MultiDict(self._parsed_query)
1314 qstr.update(query_to_pairs(in_query))
1315 query = get_str_query_from_iterable(qstr.items())
1316 elif isinstance(in_query, (bytes, bytearray, memoryview)):
1317 msg = "Invalid query type: bytes, bytearray and memoryview are forbidden"
1318 raise TypeError(msg)
1319 elif isinstance(in_query, Sequence):
1320 # We don't expect sequence values if we're given a list of pairs
1321 # already; only mappings like builtin `dict` which can't have the
1322 # same key pointing to multiple values are allowed to use
1323 # `_query_seq_pairs`.
1324 if TYPE_CHECKING:
1325 in_query = cast(
1326 Sequence[tuple[Union[str, istr], SimpleQuery]], in_query
1327 )
1328 qs: MultiDict[SimpleQuery] = MultiDict(self._parsed_query)
1329 qs.update(in_query)
1330 query = get_str_query_from_iterable(qs.items())
1331 else:
1332 raise TypeError(
1333 "Invalid query type: only str, mapping or "
1334 "sequence of (key, value) pairs is allowed"
1335 )
1336 return from_parts_uncached(
1337 self._scheme, self._netloc, self._path, query, self._fragment
1338 )
1340 def without_query_params(self, *query_params: str) -> "URL":
1341 """Remove some keys from query part and return new URL."""
1342 params_to_remove = set(query_params) & self.query.keys()
1343 if not params_to_remove:
1344 return self
1345 return self.with_query(
1346 tuple(
1347 (name, value)
1348 for name, value in self.query.items()
1349 if name not in params_to_remove
1350 )
1351 )
1353 def with_fragment(self, fragment: str | None) -> "URL":
1354 """Return a new URL with fragment replaced.
1356 Autoencode fragment if needed.
1358 Clear fragment to default if None is passed.
1360 """
1361 # N.B. doesn't cleanup query/fragment
1362 if fragment is None:
1363 raw_fragment = ""
1364 elif not isinstance(fragment, str):
1365 raise TypeError("Invalid fragment type")
1366 else:
1367 raw_fragment = FRAGMENT_QUOTER(fragment)
1368 if self._fragment == raw_fragment:
1369 return self
1370 return from_parts(
1371 self._scheme, self._netloc, self._path, self._query, raw_fragment
1372 )
1374 def with_name(
1375 self,
1376 name: str,
1377 *,
1378 keep_query: bool = False,
1379 keep_fragment: bool = False,
1380 ) -> "URL":
1381 """Return a new URL with name (last part of path) replaced.
1383 Query and fragment parts are cleaned up.
1385 Name is encoded if needed.
1387 """
1388 # N.B. DOES cleanup query/fragment
1389 if not isinstance(name, str):
1390 raise TypeError("Invalid name type")
1391 if "/" in name:
1392 raise ValueError("Slash in name is not allowed")
1393 name = PATH_QUOTER(name)
1394 if name in (".", ".."):
1395 raise ValueError(". and .. values are forbidden")
1396 parts = list(self.raw_parts)
1397 if netloc := self._netloc:
1398 if len(parts) == 1:
1399 parts.append(name)
1400 else:
1401 parts[-1] = name
1402 parts[0] = "" # replace leading '/'
1403 else:
1404 parts[-1] = name
1405 if parts[0] == "/":
1406 parts[0] = "" # replace leading '/'
1408 query = self._query if keep_query else ""
1409 fragment = self._fragment if keep_fragment else ""
1410 return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
1412 def with_suffix(
1413 self,
1414 suffix: str,
1415 *,
1416 keep_query: bool = False,
1417 keep_fragment: bool = False,
1418 ) -> "URL":
1419 """Return a new URL with suffix (file extension of name) replaced.
1421 Query and fragment parts are cleaned up.
1423 suffix is encoded if needed.
1424 """
1425 if not isinstance(suffix, str):
1426 raise TypeError("Invalid suffix type")
1427 if suffix and not suffix[0] == "." or suffix == "." or "/" in suffix:
1428 raise ValueError(f"Invalid suffix {suffix!r}")
1429 name = self.raw_name
1430 if not name:
1431 raise ValueError(f"{self!r} has an empty name")
1432 old_suffix = self.raw_suffix
1433 suffix = PATH_QUOTER(suffix)
1434 name = name + suffix if not old_suffix else name[: -len(old_suffix)] + suffix
1435 if name in (".", ".."):
1436 raise ValueError(". and .. values are forbidden")
1437 parts = list(self.raw_parts)
1438 if netloc := self._netloc:
1439 if len(parts) == 1:
1440 parts.append(name)
1441 else:
1442 parts[-1] = name
1443 parts[0] = "" # replace leading '/'
1444 else:
1445 parts[-1] = name
1446 if parts[0] == "/":
1447 parts[0] = "" # replace leading '/'
1449 query = self._query if keep_query else ""
1450 fragment = self._fragment if keep_fragment else ""
1451 return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
1453 def join(self, url: "URL") -> "URL":
1454 """Join URLs
1456 Construct a full (“absolute”) URL by combining a “base URL”
1457 (self) with another URL (url).
1459 Informally, this uses components of the base URL, in
1460 particular the addressing scheme, the network location and
1461 (part of) the path, to provide missing components in the
1462 relative URL.
1464 """
1465 if type(url) is not URL:
1466 raise TypeError("url should be URL")
1468 scheme = url._scheme or self._scheme
1469 if scheme != self._scheme or scheme not in USES_RELATIVE:
1470 return url
1472 # scheme is in uses_authority as uses_authority is a superset of uses_relative
1473 if (join_netloc := url._netloc) and scheme in USES_AUTHORITY:
1474 return from_parts(scheme, join_netloc, url._path, url._query, url._fragment)
1476 orig_path = self._path
1477 if join_path := url._path:
1478 if join_path[0] == "/":
1479 path = join_path
1480 elif not orig_path:
1481 path = f"/{join_path}"
1482 elif orig_path[-1] == "/":
1483 path = f"{orig_path}{join_path}"
1484 else:
1485 # …
1486 # and relativizing ".."
1487 # parts[0] is / for absolute urls,
1488 # this join will add a double slash there
1489 path = "/".join([*self.parts[:-1], ""]) + join_path
1490 # which has to be removed
1491 if orig_path[0] == "/":
1492 path = path[1:]
1493 path = normalize_path(path) if "." in path else path
1494 else:
1495 path = orig_path
1497 return from_parts(
1498 scheme,
1499 self._netloc,
1500 path,
1501 url._query if join_path or url._query else self._query,
1502 url._fragment if join_path or url._fragment else self._fragment,
1503 )
1505 def joinpath(self, *other: str, encoded: bool = False) -> "URL":
1506 """Return a new URL with the elements in other appended to the path."""
1507 return self._make_child(other, encoded=encoded)
1509 def human_repr(self) -> str:
1510 """Return decoded human readable string for URL representation."""
1511 user = human_quote(self.user, "#/:?@[]\\")
1512 password = human_quote(self.password, "#/:?@[]\\")
1513 if (host := self.host) and ":" in host:
1514 host = f"[{host}]"
1515 path = human_quote(self.path, "#?")
1516 if TYPE_CHECKING:
1517 assert path is not None
1518 if not self._scheme and not self._netloc:
1519 path = _encode_relative_scheme_colon(path)
1520 query_string = "&".join(
1521 "{}={}".format(human_quote(k, "#&+;="), human_quote(v, "#&+;="))
1522 for k, v in self.query.items()
1523 )
1524 fragment = human_quote(self.fragment, "")
1525 if TYPE_CHECKING:
1526 assert fragment is not None
1527 netloc = make_netloc(user, password, host, self.explicit_port)
1528 return unsplit_result(self._scheme, netloc, path, query_string, fragment)
1530 if HAS_PYDANTIC:
1531 # Borrowed from https://docs.pydantic.dev/latest/concepts/types/#handling-third-party-types
1532 @classmethod
1533 def __get_pydantic_json_schema__(
1534 cls,
1535 core_schema: "CoreSchema",
1536 handler: "GetJsonSchemaHandler",
1537 ) -> "JsonSchemaValue":
1538 field_schema: dict[str, Any] = {}
1539 field_schema.update(type="string", format="uri")
1540 return field_schema
1542 @classmethod
1543 def __get_pydantic_core_schema__(
1544 cls,
1545 source_type: type[Self] | type[str],
1546 handler: "GetCoreSchemaHandler",
1547 ) -> "CoreSchema":
1548 # Lazy import: pulling in pydantic_core at module load time
1549 # increases yarl's import cost 3-7x for users who don't use
1550 # pydantic. Keep this import function-scoped.
1551 from pydantic_core import core_schema # noqa: PLC0415
1553 from_str_schema = core_schema.chain_schema(
1554 [
1555 core_schema.str_schema(),
1556 core_schema.no_info_plain_validator_function(URL),
1557 ]
1558 )
1560 return core_schema.json_or_python_schema(
1561 json_schema=from_str_schema,
1562 python_schema=core_schema.union_schema(
1563 [
1564 # check if it's an instance first before doing any further work
1565 core_schema.is_instance_schema(URL),
1566 from_str_schema,
1567 ]
1568 ),
1569 serialization=core_schema.plain_serializer_function_ser_schema(str),
1570 )
1573_DEFAULT_IDNA_SIZE = 256
1574_DEFAULT_ENCODE_SIZE = 512
1577@lru_cache(_DEFAULT_IDNA_SIZE)
1578def _idna_decode(raw: str) -> str:
1579 try:
1580 return idna.decode(raw.encode("ascii"))
1581 except UnicodeError: # e.g. '::1'
1582 return raw.encode("ascii").decode("idna")
1585@lru_cache(_DEFAULT_IDNA_SIZE)
1586def _idna_encode(host: str) -> str:
1587 try:
1588 return idna.encode(host, uts46=True).decode("ascii")
1589 except UnicodeError:
1590 return host.encode("idna").decode("ascii")
1593@lru_cache(_DEFAULT_ENCODE_SIZE)
1594def _encode_host(host: str, validate_host: bool) -> str:
1595 """Encode host part of URL."""
1596 # If the host ends with a digit or contains a colon, its likely
1597 # an IP address.
1598 if host and (host[-1].isdigit() or ":" in host):
1599 # RFC 6874 spells the IPv6 zone separator as the percent-encoded
1600 # ``%25``; bare ``%`` is still accepted so that hosts constructed
1601 # programmatically (e.g. ``with_host("fe80::1%1")``) keep working.
1602 part = "%25" if "%25" in host else "%"
1603 raw_ip, sep, zone = host.partition(part)
1604 # If it looks like an IP, we check with _ip_compressed_version
1605 # and fall-through if its not an IP address. This is a performance
1606 # optimization to avoid parsing IP addresses as much as possible
1607 # because it is orders of magnitude slower than almost any other
1608 # operation this library does.
1609 # Might be an IP address, check it
1610 #
1611 # IP Addresses can look like:
1612 # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
1613 # - 127.0.0.1 (last character is a digit)
1614 # - 2001:db8::ff00:42:8329 (contains a colon)
1615 # - 2001:db8::ff00:42:8329%eth0 (contains a colon)
1616 # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
1617 # have been removed before it gets here)
1618 # Rare IP Address formats are not supported per:
1619 # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
1620 #
1621 # IP parsing is slow, so its wrapped in an LRU
1622 try:
1623 ip = ip_address(raw_ip)
1624 except ValueError:
1625 pass
1626 else:
1627 if sep and validate_host and (not zone or _ZONE_ID_UNSAFE_RE.search(zone)):
1628 raise ValueError("Invalid characters in zone identifier")
1629 # These checks should not happen in the
1630 # LRU to keep the cache size small
1631 host = ip.compressed
1632 if ip.version == 6:
1633 return f"[{host}{sep}{zone}]" if sep else f"[{host}]"
1634 return f"{host}{sep}{zone}" if sep else host
1636 # IDNA encoding is slow, skip it for ASCII-only strings
1637 if host.isascii():
1638 # Check for invalid characters explicitly; _idna_encode() does this
1639 # for non-ascii host names.
1640 host = host.lower()
1641 if validate_host and (invalid := NOT_REG_NAME.search(host)):
1642 value, pos, extra = invalid.group(), invalid.start(), ""
1643 if value == "@" or (value == ":" and "@" in host[pos:]):
1644 # this looks like an authority string
1645 extra = (
1646 ", if the value includes a username or password, "
1647 "use 'authority' instead of 'host'"
1648 )
1649 raise ValueError(
1650 f"Host {host!r} cannot contain {value!r} (at position {pos}){extra}"
1651 ) from None
1652 return host
1654 return _idna_encode(host)
1657@rewrite_module
1658def cache_clear() -> None:
1659 """Clear all LRU caches."""
1660 _idna_encode.cache_clear()
1661 _idna_decode.cache_clear()
1662 _encode_host.cache_clear()
1665@rewrite_module
1666def cache_info() -> CacheInfo:
1667 """Report cache statistics."""
1668 return {
1669 "idna_encode": _idna_encode.cache_info(),
1670 "idna_decode": _idna_decode.cache_info(),
1671 "ip_address": _encode_host.cache_info(),
1672 "host_validate": _encode_host.cache_info(),
1673 "encode_host": _encode_host.cache_info(),
1674 }
1677@rewrite_module
1678def cache_configure(
1679 *,
1680 idna_encode_size: int | None = _DEFAULT_IDNA_SIZE,
1681 idna_decode_size: int | None = _DEFAULT_IDNA_SIZE,
1682 ip_address_size: int | None | UndefinedType = UNDEFINED,
1683 host_validate_size: int | None | UndefinedType = UNDEFINED,
1684 encode_host_size: int | None | UndefinedType = UNDEFINED,
1685) -> None:
1686 """Configure LRU cache sizes."""
1687 global _idna_decode, _idna_encode, _encode_host
1688 # ip_address_size, host_validate_size are no longer
1689 # used, but are kept for backwards compatibility.
1690 if ip_address_size is not UNDEFINED or host_validate_size is not UNDEFINED:
1691 warnings.warn(
1692 "cache_configure() no longer accepts the "
1693 "ip_address_size or host_validate_size arguments, "
1694 "they are used to set the encode_host_size instead "
1695 "and will be removed in the future",
1696 DeprecationWarning,
1697 stacklevel=2,
1698 )
1700 if encode_host_size is not None:
1701 for size in (ip_address_size, host_validate_size):
1702 if size is None:
1703 encode_host_size = None
1704 elif encode_host_size is UNDEFINED:
1705 if size is not UNDEFINED:
1706 encode_host_size = size
1707 elif size is not UNDEFINED:
1708 if TYPE_CHECKING:
1709 assert isinstance(size, int)
1710 assert isinstance(encode_host_size, int)
1711 encode_host_size = max(size, encode_host_size)
1712 if encode_host_size is UNDEFINED:
1713 encode_host_size = _DEFAULT_ENCODE_SIZE
1715 _encode_host = lru_cache(encode_host_size)(_encode_host.__wrapped__)
1716 _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__)
1717 _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__)