Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/_url.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

807 statements  

1import re 

2import sys 

3import warnings 

4from collections.abc import Mapping, Sequence 

5from enum import Enum 

6from functools import _CacheInfo, lru_cache 

7from importlib.util import find_spec 

8from ipaddress import ip_address 

9from typing import ( 

10 TYPE_CHECKING, 

11 Any, 

12 NoReturn, 

13 TypedDict, 

14 TypeVar, 

15 Union, 

16 cast, 

17 overload, 

18) 

19from urllib.parse import SplitResult, scheme_chars, uses_relative 

20 

21import idna 

22from multidict import MultiDict, MultiDictProxy, istr 

23from propcache.api import under_cached_property as cached_property 

24 

25from ._parse import ( 

26 USES_AUTHORITY, 

27 SplitURLType, 

28 make_netloc, 

29 query_to_pairs, 

30 split_netloc, 

31 split_url, 

32 unsplit_result, 

33) 

34from ._path import normalize_path, normalize_path_segments 

35from ._query import ( 

36 Query, 

37 QueryVariable, 

38 SimpleQuery, 

39 get_str_query, 

40 get_str_query_from_iterable, 

41 get_str_query_from_sequence_iterable, 

42) 

43from ._quoters import ( 

44 FRAGMENT_QUOTER, 

45 FRAGMENT_REQUOTER, 

46 PATH_QUOTER, 

47 PATH_REQUOTER, 

48 PATH_SAFE_UNQUOTER, 

49 PATH_UNQUOTER, 

50 QS_UNQUOTER, 

51 QUERY_QUOTER, 

52 QUERY_REQUOTER, 

53 QUOTER, 

54 REQUOTER, 

55 UNQUOTER, 

56 human_quote, 

57) 

58 

59# Avoid Pydantic import if not used (increases yarl's import time by 3-7x). 

60HAS_PYDANTIC = find_spec("pydantic_core") is not None 

61if TYPE_CHECKING: 

62 from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler 

63 from pydantic.json_schema import JsonSchemaValue 

64 from pydantic_core import CoreSchema 

65 

66 

67DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21} 

68USES_RELATIVE = frozenset(uses_relative) 

69_SCHEME_CHARS = frozenset(scheme_chars) 

70 

71# Special schemes https://url.spec.whatwg.org/#special-scheme 

72# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation 

73SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp")) 

74 

75 

76# reg-name: unreserved / pct-encoded / sub-delims 

77# this pattern matches anything that is *not* in those classes. and is only used 

78# on lower-cased ASCII values. 

79NOT_REG_NAME = re.compile( 

80 r""" 

81 # any character not in the unreserved or sub-delims sets, plus % 

82 # (validated with the additional check for pct-encoded sequences below) 

83 [^a-z0-9\-._~!$&'()*+,;=%] 

84 | 

85 # % only allowed if it is part of a pct-encoded 

86 # sequence of 2 hex digits. 

87 %(?![0-9a-f]{2}) 

88 """, 

89 re.VERBOSE, 

90) 

91 

92# Zone IDs are OS-specific text strings with no format defined by the RFCs: 

93# https://datatracker.ietf.org/doc/html/rfc4007#section-11.2 

94# RFC 9844 §6.3 recommends rejecting characters inappropriate for the 

95# environment; for yarl we reject ASCII control characters (CTL): 

96# https://datatracker.ietf.org/doc/html/rfc9844#section-6-3 

97_ZONE_ID_UNSAFE_RE = re.compile(r"[\x00-\x1f\x7f]") 

98 

99_T = TypeVar("_T") 

100 

101if sys.version_info >= (3, 11): 

102 from typing import Self 

103else: 

104 Self = Any 

105 

106 

107class UndefinedType(Enum): 

108 """Singleton type for use with not set sentinel values.""" 

109 

110 _singleton = 0 

111 

112 

113UNDEFINED = UndefinedType._singleton 

114 

115 

116class CacheInfo(TypedDict): 

117 """Host encoding cache.""" 

118 

119 idna_encode: _CacheInfo 

120 idna_decode: _CacheInfo 

121 ip_address: _CacheInfo 

122 host_validate: _CacheInfo 

123 encode_host: _CacheInfo 

124 

125 

126class _InternalURLCache(TypedDict, total=False): 

127 _val: SplitURLType 

128 _origin: "URL" 

129 absolute: bool 

130 hash: int 

131 scheme: str 

132 raw_authority: str 

133 authority: str 

134 raw_user: str | None 

135 user: str | None 

136 raw_password: str | None 

137 password: str | None 

138 raw_host: str | None 

139 host: str | None 

140 host_subcomponent: str | None 

141 host_port_subcomponent: str | None 

142 port: int | None 

143 explicit_port: int | None 

144 raw_path: str 

145 path: str 

146 _parsed_query: list[tuple[str, str]] 

147 query: "MultiDictProxy[str]" 

148 raw_query_string: str 

149 query_string: str 

150 path_qs: str 

151 raw_path_qs: str 

152 raw_fragment: str 

153 fragment: str 

154 raw_parts: tuple[str, ...] 

155 parts: tuple[str, ...] 

156 parent: "URL" 

157 raw_name: str 

158 name: str 

159 raw_suffix: str 

160 suffix: str 

161 raw_suffixes: tuple[str, ...] 

162 suffixes: tuple[str, ...] 

163 

164 

165def rewrite_module(obj: _T) -> _T: 

166 obj.__module__ = "yarl" 

167 return obj 

168 

169 

170def _encode_relative_scheme_colon(path: str) -> str: 

171 """Re-encode a scheme-shaped leading ``:`` in a relative path to ``%3A``.""" 

172 colon_pos = path.find(":") 

173 if colon_pos <= 0: 

174 return path 

175 for c in path[:colon_pos]: 

176 if c not in _SCHEME_CHARS: 

177 return path 

178 return path[:colon_pos] + "%3A" + path[colon_pos + 1 :] 

179 

180 

181@lru_cache 

182def encode_url(url_str: str) -> "URL": 

183 """Parse unencoded URL.""" 

184 cache: _InternalURLCache = {} 

185 host: str | None 

186 scheme, netloc, path, query, fragment = split_url(url_str) 

187 if not netloc: # netloc 

188 host = "" 

189 else: 

190 if ":" in netloc or "@" in netloc or "[" in netloc: 

191 # Complex netloc 

192 username, password, host, port = split_netloc(netloc) 

193 else: 

194 username = password = port = None 

195 host = netloc 

196 if host is None: 

197 if scheme in SCHEME_REQUIRES_HOST: 

198 msg = ( 

199 "Invalid URL: host is required for " 

200 f"absolute urls with the {scheme} scheme" 

201 ) 

202 raise ValueError(msg) 

203 else: 

204 host = "" 

205 host = _encode_host(host, validate_host=False) 

206 # Remove brackets as host encoder adds back brackets for IPv6 addresses 

207 cache["raw_host"] = host[1:-1] if "[" in host else host 

208 cache["explicit_port"] = port 

209 if password is None and username is None: 

210 # Fast path for URLs without user, password 

211 netloc = host if port is None else f"{host}:{port}" 

212 cache["raw_user"] = None 

213 cache["raw_password"] = None 

214 else: 

215 raw_user = REQUOTER(username) if username else username 

216 raw_password = REQUOTER(password) if password else password 

217 netloc = make_netloc(raw_user, raw_password, host, port) 

218 cache["raw_user"] = raw_user 

219 cache["raw_password"] = raw_password 

220 

221 if path: 

222 path = PATH_REQUOTER(path) 

223 if netloc and "." in path: 

224 path = normalize_path(path) 

225 elif not scheme and not netloc: 

226 path = _encode_relative_scheme_colon(path) 

227 if query: 

228 query = QUERY_REQUOTER(query) 

229 if fragment: 

230 fragment = FRAGMENT_REQUOTER(fragment) 

231 

232 cache["scheme"] = scheme 

233 cache["raw_path"] = "/" if not path and netloc else path 

234 cache["raw_query_string"] = query 

235 cache["raw_fragment"] = fragment 

236 

237 self = object.__new__(URL) 

238 self._scheme = scheme 

239 self._netloc = netloc 

240 self._path = path 

241 self._query = query 

242 self._fragment = fragment 

243 self._cache = cache 

244 return self 

245 

246 

247@lru_cache 

248def pre_encoded_url(url_str: str) -> "URL": 

249 """Parse pre-encoded URL.""" 

250 self = object.__new__(URL) 

251 val = split_url(url_str) 

252 self._scheme, self._netloc, self._path, self._query, self._fragment = val 

253 self._cache = {} 

254 return self 

255 

256 

257@lru_cache 

258def build_pre_encoded_url( 

259 scheme: str, 

260 authority: str, 

261 user: str | None, 

262 password: str | None, 

263 host: str, 

264 port: int | None, 

265 path: str, 

266 query_string: str, 

267 fragment: str, 

268) -> "URL": 

269 """Build a pre-encoded URL from parts.""" 

270 self = object.__new__(URL) 

271 self._scheme = scheme 

272 if authority: 

273 self._netloc = authority 

274 elif host: 

275 if port is not None: 

276 port = None if port == DEFAULT_PORTS.get(scheme) else port 

277 if user is None and password is None: 

278 self._netloc = host if port is None else f"{host}:{port}" 

279 else: 

280 self._netloc = make_netloc(user, password, host, port) 

281 else: 

282 self._netloc = "" 

283 self._path = path 

284 self._query = query_string 

285 self._fragment = fragment 

286 self._cache = {} 

287 return self 

288 

289 

290def from_parts_uncached( 

291 scheme: str, netloc: str, path: str, query: str, fragment: str 

292) -> "URL": 

293 """Create a new URL from parts.""" 

294 self = object.__new__(URL) 

295 self._scheme = scheme 

296 self._netloc = netloc 

297 self._path = path 

298 self._query = query 

299 self._fragment = fragment 

300 self._cache = {} 

301 return self 

302 

303 

304from_parts = lru_cache(from_parts_uncached) 

305 

306 

307@rewrite_module 

308class URL: 

309 # Don't derive from str 

310 # follow pathlib.Path design 

311 # probably URL will not suffer from pathlib problems: 

312 # it's intended for libraries like aiohttp, 

313 # not to be passed into standard library functions like os.open etc. 

314 

315 # URL grammar (RFC 3986) 

316 # pct-encoded = "%" HEXDIG HEXDIG 

317 # reserved = gen-delims / sub-delims 

318 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 

319 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 

320 # / "*" / "+" / "," / ";" / "=" 

321 # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 

322 # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 

323 # hier-part = "//" authority path-abempty 

324 # / path-absolute 

325 # / path-rootless 

326 # / path-empty 

327 # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 

328 # authority = [ userinfo "@" ] host [ ":" port ] 

329 # userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 

330 # host = IP-literal / IPv4address / reg-name 

331 # IP-literal = "[" ( IPv6address / IPvFuture ) "]" 

332 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 

333 # IPv6address = 6( h16 ":" ) ls32 

334 # / "::" 5( h16 ":" ) ls32 

335 # / [ h16 ] "::" 4( h16 ":" ) ls32 

336 # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 

337 # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 

338 # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 

339 # / [ *4( h16 ":" ) h16 ] "::" ls32 

340 # / [ *5( h16 ":" ) h16 ] "::" h16 

341 # / [ *6( h16 ":" ) h16 ] "::" 

342 # ls32 = ( h16 ":" h16 ) / IPv4address 

343 # ; least-significant 32 bits of address 

344 # h16 = 1*4HEXDIG 

345 # ; 16 bits of address represented in hexadecimal 

346 # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 

347 # dec-octet = DIGIT ; 0-9 

348 # / %x31-39 DIGIT ; 10-99 

349 # / "1" 2DIGIT ; 100-199 

350 # / "2" %x30-34 DIGIT ; 200-249 

351 # / "25" %x30-35 ; 250-255 

352 # reg-name = *( unreserved / pct-encoded / sub-delims ) 

353 # port = *DIGIT 

354 # path = path-abempty ; begins with "/" or is empty 

355 # / path-absolute ; begins with "/" but not "//" 

356 # / path-noscheme ; begins with a non-colon segment 

357 # / path-rootless ; begins with a segment 

358 # / path-empty ; zero characters 

359 # path-abempty = *( "/" segment ) 

360 # path-absolute = "/" [ segment-nz *( "/" segment ) ] 

361 # path-noscheme = segment-nz-nc *( "/" segment ) 

362 # path-rootless = segment-nz *( "/" segment ) 

363 # path-empty = 0<pchar> 

364 # segment = *pchar 

365 # segment-nz = 1*pchar 

366 # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 

367 # ; non-zero-length segment without any colon ":" 

368 # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 

369 # query = *( pchar / "/" / "?" ) 

370 # fragment = *( pchar / "/" / "?" ) 

371 # URI-reference = URI / relative-ref 

372 # relative-ref = relative-part [ "?" query ] [ "#" fragment ] 

373 # relative-part = "//" authority path-abempty 

374 # / path-absolute 

375 # / path-noscheme 

376 # / path-empty 

377 # absolute-URI = scheme ":" hier-part [ "?" query ] 

378 __slots__ = ("_cache", "_scheme", "_netloc", "_path", "_query", "_fragment") 

379 

380 _cache: _InternalURLCache 

381 _scheme: str 

382 _netloc: str 

383 _path: str 

384 _query: str 

385 _fragment: str 

386 

387 def __new__( 

388 cls, 

389 val: Union[str, SplitResult, "URL", UndefinedType] = UNDEFINED, 

390 *, 

391 encoded: bool = False, 

392 strict: bool | None = None, 

393 ) -> "URL": 

394 if strict is not None: # pragma: no cover 

395 warnings.warn("strict parameter is ignored") 

396 if type(val) is str: 

397 return pre_encoded_url(val) if encoded else encode_url(val) 

398 if type(val) is cls: 

399 return val 

400 if type(val) is SplitResult: 

401 if not encoded: 

402 raise ValueError("Cannot apply decoding to SplitResult") 

403 return from_parts(*val) 

404 if isinstance(val, str): 

405 return pre_encoded_url(str(val)) if encoded else encode_url(str(val)) 

406 if val is UNDEFINED: 

407 # Special case for UNDEFINED since it might be unpickling and we do 

408 # not want to cache as the `__set_state__` call would mutate the URL 

409 # object in the `pre_encoded_url` or `encoded_url` caches. 

410 self = object.__new__(URL) 

411 self._scheme = self._netloc = self._path = self._query = self._fragment = "" 

412 self._cache = {} 

413 return self 

414 raise TypeError("Constructor parameter should be str") 

415 

416 @classmethod 

417 def build( 

418 cls, 

419 *, 

420 scheme: str = "", 

421 authority: str = "", 

422 user: str | None = None, 

423 password: str | None = None, 

424 host: str = "", 

425 port: int | None = None, 

426 path: str = "", 

427 query: Query | None = None, 

428 query_string: str = "", 

429 fragment: str = "", 

430 encoded: bool = False, 

431 ) -> "URL": 

432 """Creates and returns a new URL""" 

433 

434 if authority and (user or password or host or port): 

435 raise ValueError( 

436 'Can\'t mix "authority" with "user", "password", "host" or "port".' 

437 ) 

438 if port is not None and not isinstance(port, int): 

439 raise TypeError(f"The port is required to be int, got {type(port)!r}.") 

440 if port and not host: 

441 raise ValueError('Can\'t build URL with "port" but without "host".') 

442 if query and query_string: 

443 raise ValueError('Only one of "query" or "query_string" should be passed') 

444 if ( 

445 scheme is None # type: ignore[redundant-expr] 

446 or authority is None # type: ignore[redundant-expr] 

447 or host is None # type: ignore[redundant-expr] 

448 or path is None # type: ignore[redundant-expr] 

449 or query_string is None # type: ignore[redundant-expr] 

450 or fragment is None 

451 ): 

452 raise TypeError( 

453 'NoneType is illegal for "scheme", "authority", "host", "path", ' 

454 '"query_string", and "fragment" args, use empty string instead.' 

455 ) 

456 

457 if query: 

458 query_string = get_str_query(query) or "" 

459 

460 if encoded: 

461 return build_pre_encoded_url( 

462 scheme, 

463 authority, 

464 user, 

465 password, 

466 host, 

467 port, 

468 path, 

469 query_string, 

470 fragment, 

471 ) 

472 

473 self = object.__new__(URL) 

474 self._scheme = scheme 

475 _host: str | None = None 

476 if authority: 

477 user, password, _host, port = split_netloc(authority) 

478 _host = _encode_host(_host, validate_host=False) if _host else "" 

479 elif host: 

480 _host = _encode_host(host, validate_host=True) 

481 else: 

482 self._netloc = "" 

483 

484 if _host is not None: 

485 if port is not None: 

486 port = None if port == DEFAULT_PORTS.get(scheme) else port 

487 if user is None and password is None: 

488 self._netloc = _host if port is None else f"{_host}:{port}" 

489 else: 

490 self._netloc = make_netloc(user, password, _host, port, True) 

491 

492 path = PATH_QUOTER(path) if path else path 

493 if path and self._netloc: 

494 if "." in path: 

495 path = normalize_path(path) 

496 if path[0] != "/": 

497 msg = ( 

498 "Path in a URL with authority should " 

499 "start with a slash ('/') if set" 

500 ) 

501 raise ValueError(msg) 

502 

503 self._path = path 

504 if not query and query_string: 

505 query_string = QUERY_QUOTER(query_string) 

506 self._query = query_string 

507 self._fragment = FRAGMENT_QUOTER(fragment) if fragment else fragment 

508 self._cache = {} 

509 return self 

510 

511 def __init_subclass__(cls) -> NoReturn: 

512 raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") 

513 

514 def __str__(self) -> str: 

515 if not self._path and self._netloc and (self._query or self._fragment): 

516 path = "/" 

517 else: 

518 path = self._path 

519 if (port := self.explicit_port) is not None and port == DEFAULT_PORTS.get( 

520 self._scheme 

521 ): 

522 # port normalization - using None for default ports to remove from rendering 

523 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3 

524 host = self.host_subcomponent 

525 netloc = make_netloc(self.raw_user, self.raw_password, host, None) 

526 else: 

527 netloc = self._netloc 

528 return unsplit_result(self._scheme, netloc, path, self._query, self._fragment) 

529 

530 def __repr__(self) -> str: 

531 return f"{self.__class__.__name__}('{str(self)}')" 

532 

533 def __bytes__(self) -> bytes: 

534 return str(self).encode("ascii") 

535 

536 def __eq__(self, other: object) -> bool: 

537 if type(other) is not URL: 

538 return NotImplemented 

539 

540 path1 = "/" if not self._path and self._netloc else self._path 

541 path2 = "/" if not other._path and other._netloc else other._path 

542 return ( 

543 self._scheme == other._scheme 

544 and self._netloc == other._netloc 

545 and path1 == path2 

546 and self._query == other._query 

547 and self._fragment == other._fragment 

548 ) 

549 

550 def __hash__(self) -> int: 

551 if (ret := self._cache.get("hash")) is None: 

552 path = "/" if not self._path and self._netloc else self._path 

553 ret = self._cache["hash"] = hash( 

554 (self._scheme, self._netloc, path, self._query, self._fragment) 

555 ) 

556 return ret 

557 

558 def __le__(self, other: object) -> bool: 

559 if type(other) is not URL: 

560 return NotImplemented 

561 return self._val <= other._val 

562 

563 def __lt__(self, other: object) -> bool: 

564 if type(other) is not URL: 

565 return NotImplemented 

566 return self._val < other._val 

567 

568 def __ge__(self, other: object) -> bool: 

569 if type(other) is not URL: 

570 return NotImplemented 

571 return self._val >= other._val 

572 

573 def __gt__(self, other: object) -> bool: 

574 if type(other) is not URL: 

575 return NotImplemented 

576 return self._val > other._val 

577 

578 def __truediv__(self, name: str) -> "URL": 

579 if not isinstance(name, str): 

580 return NotImplemented 

581 return self._make_child((str(name),)) 

582 

583 def __mod__(self, query: Query) -> "URL": 

584 return self.update_query(query) 

585 

586 def __bool__(self) -> bool: 

587 return bool(self._netloc or self._path or self._query or self._fragment) 

588 

589 def __getstate__(self) -> tuple[SplitURLType]: 

590 # Return a plain tuple rather than a ``SplitResult``. Constructing a 

591 # ``SplitResult`` via ``tuple.__new__`` skips its ``__init__`` and on 

592 # Python 3.15+ leaves ``_keep_empty`` unset, which breaks pickling: the 

593 # new ``SplitResult.__getstate__`` indexes a state that ends up as 

594 # ``None`` (gh-1632). ``__setstate__`` already unpacks both shapes, so 

595 # pickles produced by older yarl releases (which embed a real 

596 # ``SplitResult``) still load correctly. 

597 return (self._val,) 

598 

599 def __setstate__( 

600 self, state: tuple[SplitURLType] | tuple[None, _InternalURLCache] 

601 ) -> None: 

602 if state[0] is None and isinstance(state[1], dict): 

603 # default style pickle 

604 val = state[1]["_val"] 

605 else: 

606 unused: list[object] 

607 val, *unused = state 

608 self._scheme, self._netloc, self._path, self._query, self._fragment = val 

609 self._cache = {} 

610 

611 def _cache_netloc(self) -> None: 

612 """Cache the netloc parts of the URL.""" 

613 c = self._cache 

614 split_loc = split_netloc(self._netloc) 

615 c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc 

616 

617 def is_absolute(self) -> bool: 

618 """A check for absolute URLs. 

619 

620 Return True for absolute ones (having scheme or starting 

621 with //), False otherwise. 

622 

623 Is is preferred to call the .absolute property instead 

624 as it is cached. 

625 """ 

626 return self.absolute 

627 

628 def is_default_port(self) -> bool: 

629 """A check for default port. 

630 

631 Return True if port is default for specified scheme, 

632 e.g. 'http://python.org' or 'http://python.org:80', False 

633 otherwise. 

634 

635 Return False for relative URLs. 

636 

637 """ 

638 if (explicit := self.explicit_port) is None: 

639 # If the explicit port is None, then the URL must be 

640 # using the default port unless its a relative URL 

641 # which does not have an implicit port / default port 

642 return self._netloc != "" 

643 return explicit == DEFAULT_PORTS.get(self._scheme) 

644 

645 def origin(self) -> "URL": 

646 """Return an URL with scheme, host and port parts only. 

647 

648 user, password, path, query and fragment are removed. 

649 

650 """ 

651 # TODO: add a keyword-only option for keeping user/pass maybe? 

652 return self._origin 

653 

654 @cached_property 

655 def _val(self) -> SplitURLType: 

656 return (self._scheme, self._netloc, self._path, self._query, self._fragment) 

657 

658 @cached_property 

659 def _origin(self) -> "URL": 

660 """Return an URL with scheme, host and port parts only. 

661 

662 user, password, path, query and fragment are removed. 

663 """ 

664 if not (netloc := self._netloc): 

665 raise ValueError("URL should be absolute") 

666 if not (scheme := self._scheme): 

667 raise ValueError("URL should have scheme") 

668 if "@" in netloc: 

669 encoded_host = self.host_subcomponent 

670 netloc = make_netloc(None, None, encoded_host, self.explicit_port) 

671 elif not self._path and not self._query and not self._fragment: 

672 return self 

673 return from_parts(scheme, netloc, "", "", "") 

674 

675 def relative(self) -> "URL": 

676 """Return a relative part of the URL. 

677 

678 scheme, user, password, host and port are removed. 

679 

680 """ 

681 if not self._netloc: 

682 raise ValueError("URL should be absolute") 

683 return from_parts("", "", self._path, self._query, self._fragment) 

684 

685 @cached_property 

686 def absolute(self) -> bool: 

687 """A check for absolute URLs. 

688 

689 Return True for absolute ones (having scheme or starting 

690 with //), False otherwise. 

691 

692 """ 

693 # `netloc`` is an empty string for relative URLs 

694 # Checking `netloc` is faster than checking `hostname` 

695 # because `hostname` is a property that does some extra work 

696 # to parse the host from the `netloc` 

697 return self._netloc != "" 

698 

699 @cached_property 

700 def scheme(self) -> str: 

701 """Scheme for absolute URLs. 

702 

703 Empty string for relative URLs or URLs starting with // 

704 

705 """ 

706 return self._scheme 

707 

708 @cached_property 

709 def raw_authority(self) -> str: 

710 """Encoded authority part of URL. 

711 

712 Empty string for relative URLs. 

713 

714 """ 

715 return self._netloc 

716 

717 @cached_property 

718 def authority(self) -> str: 

719 """Decoded authority part of URL. 

720 

721 Empty string for relative URLs. 

722 

723 """ 

724 return make_netloc(self.user, self.password, self.host, self.port) 

725 

726 @cached_property 

727 def raw_user(self) -> str | None: 

728 """Encoded user part of URL. 

729 

730 None if user is missing. 

731 

732 """ 

733 # not .username 

734 self._cache_netloc() 

735 return self._cache["raw_user"] 

736 

737 @cached_property 

738 def user(self) -> str | None: 

739 """Decoded user part of URL. 

740 

741 None if user is missing. 

742 

743 """ 

744 if (raw_user := self.raw_user) is None: 

745 return None 

746 return UNQUOTER(raw_user) 

747 

748 @cached_property 

749 def raw_password(self) -> str | None: 

750 """Encoded password part of URL. 

751 

752 None if password is missing. 

753 

754 """ 

755 self._cache_netloc() 

756 return self._cache["raw_password"] 

757 

758 @cached_property 

759 def password(self) -> str | None: 

760 """Decoded password part of URL. 

761 

762 None if password is missing. 

763 

764 """ 

765 if (raw_password := self.raw_password) is None: 

766 return None 

767 return UNQUOTER(raw_password) 

768 

769 @cached_property 

770 def raw_host(self) -> str | None: 

771 """Encoded host part of URL. 

772 

773 None for relative URLs. 

774 

775 When working with IPv6 addresses, use the `host_subcomponent` property instead 

776 as it will return the host subcomponent with brackets. 

777 """ 

778 # Use host instead of hostname for sake of shortness 

779 # May add .hostname prop later 

780 self._cache_netloc() 

781 return self._cache["raw_host"] 

782 

783 @cached_property 

784 def host(self) -> str | None: 

785 """Decoded host part of URL. 

786 

787 None for relative URLs. 

788 

789 For IPv6 hosts that carry an RFC 6874 zone identifier, the 

790 ``%25`` zone separator is decoded back to ``%``; the encoded 

791 form is still available via :attr:`raw_host` and 

792 :attr:`host_subcomponent`. 

793 

794 """ 

795 if (raw := self.raw_host) is None: 

796 return None 

797 if raw and raw[-1].isdigit() or ":" in raw: 

798 # IP addresses are never IDNA encoded; only the RFC 6874 

799 # zone separator needs to be decoded. 

800 if "%25" in raw: 

801 return raw.replace("%25", "%") 

802 return raw 

803 return _idna_decode(raw) 

804 

805 @cached_property 

806 def host_subcomponent(self) -> str | None: 

807 """Return the host subcomponent part of URL. 

808 

809 None for relative URLs. 

810 

811 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 

812 

813 `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` 

814 

815 Examples: 

816 - `http://example.com:8080` -> `example.com` 

817 - `http://example.com:80` -> `example.com` 

818 - `https://127.0.0.1:8443` -> `127.0.0.1` 

819 - `https://[::1]:8443` -> `[::1]` 

820 - `http://[::1]` -> `[::1]` 

821 

822 """ 

823 if (raw := self.raw_host) is None: 

824 return None 

825 return f"[{raw}]" if ":" in raw else raw 

826 

827 @cached_property 

828 def host_port_subcomponent(self) -> str | None: 

829 """Return the host and port subcomponent part of URL. 

830 

831 Trailing dots are removed from the host part. 

832 

833 This value is suitable for use in the Host header of an HTTP request. 

834 

835 None for relative URLs. 

836 

837 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 

838 `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` 

839 https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 

840 port = *DIGIT 

841 

842 Examples: 

843 - `http://example.com:8080` -> `example.com:8080` 

844 - `http://example.com:80` -> `example.com` 

845 - `http://example.com.:80` -> `example.com` 

846 - `https://127.0.0.1:8443` -> `127.0.0.1:8443` 

847 - `https://[::1]:8443` -> `[::1]:8443` 

848 - `http://[::1]` -> `[::1]` 

849 

850 """ 

851 if (raw := self.raw_host) is None: 

852 return None 

853 if raw[-1] == ".": 

854 # Remove all trailing dots from the netloc as while 

855 # they are valid FQDNs in DNS, TLS validation fails. 

856 # See https://github.com/aio-libs/aiohttp/issues/3636. 

857 # To avoid string manipulation we only call rstrip if 

858 # the last character is a dot. 

859 raw = raw.rstrip(".") 

860 port = self.explicit_port 

861 if port is None or port == DEFAULT_PORTS.get(self._scheme): 

862 return f"[{raw}]" if ":" in raw else raw 

863 return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}" 

864 

865 @cached_property 

866 def port(self) -> int | None: 

867 """Port part of URL, with scheme-based fallback. 

868 

869 None for relative URLs or URLs without explicit port and 

870 scheme without default port substitution. 

871 

872 """ 

873 if (explicit_port := self.explicit_port) is not None: 

874 return explicit_port 

875 return DEFAULT_PORTS.get(self._scheme) 

876 

877 @cached_property 

878 def explicit_port(self) -> int | None: 

879 """Port part of URL, without scheme-based fallback. 

880 

881 None for relative URLs or URLs without explicit port. 

882 

883 """ 

884 self._cache_netloc() 

885 return self._cache["explicit_port"] 

886 

887 @cached_property 

888 def raw_path(self) -> str: 

889 """Encoded path of URL. 

890 

891 / for absolute URLs without path part. 

892 

893 """ 

894 return self._path if self._path or not self._netloc else "/" 

895 

896 @cached_property 

897 def path(self) -> str: 

898 """Decoded path of URL. 

899 

900 / for absolute URLs without path part. 

901 

902 """ 

903 return PATH_UNQUOTER(self._path) if self._path else "/" if self._netloc else "" 

904 

905 @cached_property 

906 def path_safe(self) -> str: 

907 """Decoded path of URL. 

908 

909 / for absolute URLs without path part. 

910 

911 / (%2F) and % (%25) are not decoded 

912 

913 """ 

914 if self._path: 

915 return PATH_SAFE_UNQUOTER(self._path) 

916 return "/" if self._netloc else "" 

917 

918 @cached_property 

919 def _parsed_query(self) -> list[tuple[str, str]]: 

920 """Parse query part of URL.""" 

921 return query_to_pairs(self._query) 

922 

923 @cached_property 

924 def query(self) -> "MultiDictProxy[str]": 

925 """A MultiDictProxy representing parsed query parameters in decoded 

926 representation. 

927 

928 Empty value if URL has no query part. 

929 

930 """ 

931 return MultiDictProxy(MultiDict(self._parsed_query)) 

932 

933 @cached_property 

934 def raw_query_string(self) -> str: 

935 """Encoded query part of URL. 

936 

937 Empty string if query is missing. 

938 

939 """ 

940 return self._query 

941 

942 @cached_property 

943 def query_string(self) -> str: 

944 """Decoded query part of URL. 

945 

946 Empty string if query is missing. 

947 

948 """ 

949 return QS_UNQUOTER(self._query) if self._query else "" 

950 

951 @cached_property 

952 def path_qs(self) -> str: 

953 """Decoded path of URL with query.""" 

954 return self.path if not (q := self.query_string) else f"{self.path}?{q}" 

955 

956 @cached_property 

957 def raw_path_qs(self) -> str: 

958 """Encoded path of URL with query.""" 

959 if q := self._query: 

960 return f"{self._path}?{q}" if self._path or not self._netloc else f"/?{q}" 

961 return self._path if self._path or not self._netloc else "/" 

962 

963 @cached_property 

964 def raw_fragment(self) -> str: 

965 """Encoded fragment part of URL. 

966 

967 Empty string if fragment is missing. 

968 

969 """ 

970 return self._fragment 

971 

972 @cached_property 

973 def fragment(self) -> str: 

974 """Decoded fragment part of URL. 

975 

976 Empty string if fragment is missing. 

977 

978 """ 

979 return UNQUOTER(self._fragment) if self._fragment else "" 

980 

981 @cached_property 

982 def raw_parts(self) -> tuple[str, ...]: 

983 """A tuple containing encoded *path* parts. 

984 

985 ('/',) for absolute URLs if *path* is missing. 

986 

987 """ 

988 path = self._path 

989 if self._netloc: 

990 return ("/", *path[1:].split("/")) if path else ("/",) 

991 if path and path[0] == "/": 

992 return ("/", *path[1:].split("/")) 

993 return tuple(path.split("/")) 

994 

995 @cached_property 

996 def parts(self) -> tuple[str, ...]: 

997 """A tuple containing decoded *path* parts. 

998 

999 ('/',) for absolute URLs if *path* is missing. 

1000 

1001 """ 

1002 return tuple(UNQUOTER(part) for part in self.raw_parts) 

1003 

1004 @cached_property 

1005 def parent(self) -> "URL": 

1006 """A new URL with last part of path removed and cleaned up query and 

1007 fragment. 

1008 

1009 """ 

1010 path = self._path 

1011 if not path or path == "/": 

1012 if self._fragment or self._query: 

1013 return from_parts(self._scheme, self._netloc, path, "", "") 

1014 return self 

1015 parts = path.split("/") 

1016 return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "") 

1017 

1018 @cached_property 

1019 def raw_name(self) -> str: 

1020 """The last part of raw_parts.""" 

1021 parts = self.raw_parts 

1022 if not self._netloc: 

1023 return parts[-1] 

1024 parts = parts[1:] 

1025 return parts[-1] if parts else "" 

1026 

1027 @cached_property 

1028 def name(self) -> str: 

1029 """The last part of parts.""" 

1030 return UNQUOTER(self.raw_name) 

1031 

1032 @cached_property 

1033 def raw_suffix(self) -> str: 

1034 name = self.raw_name 

1035 i = name.rfind(".") 

1036 return name[i:] if 0 < i < len(name) - 1 else "" 

1037 

1038 @cached_property 

1039 def suffix(self) -> str: 

1040 return UNQUOTER(self.raw_suffix) 

1041 

1042 @cached_property 

1043 def raw_suffixes(self) -> tuple[str, ...]: 

1044 name = self.raw_name 

1045 if name.endswith("."): 

1046 return () 

1047 name = name.lstrip(".") 

1048 return tuple("." + suffix for suffix in name.split(".")[1:]) 

1049 

1050 @cached_property 

1051 def suffixes(self) -> tuple[str, ...]: 

1052 return tuple(UNQUOTER(suffix) for suffix in self.raw_suffixes) 

1053 

1054 def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": 

1055 """ 

1056 add paths to self._path, accounting for absolute vs relative paths, 

1057 keep existing, but do not create new, empty segments 

1058 """ 

1059 parsed: list[str] = [] 

1060 needs_normalize: bool = False 

1061 for idx, path in enumerate(reversed(paths)): 

1062 # empty segment of last is not removed 

1063 last = idx == 0 

1064 if path and path[0] == "/": 

1065 raise ValueError( 

1066 f"Appending path {path!r} starting from slash is forbidden" 

1067 ) 

1068 # We need to quote the path if it is not already encoded 

1069 # This cannot be done at the end because the existing 

1070 # path is already quoted and we do not want to double quote 

1071 # the existing path. 

1072 path = path if encoded else PATH_QUOTER(path) 

1073 needs_normalize |= "." in path 

1074 segments = path.split("/") 

1075 segments.reverse() 

1076 # remove trailing empty segment for all but the last path 

1077 parsed += segments[1:] if not last and segments[0] == "" else segments 

1078 

1079 if (path := self._path) and (old_segments := path.split("/")): 

1080 # If the old path ends with a slash, the last segment is an empty string 

1081 # and should be removed before adding the new path segments. 

1082 old = old_segments[:-1] if old_segments[-1] == "" else old_segments 

1083 old.reverse() 

1084 parsed += old 

1085 

1086 # If the netloc is present, inject a leading slash when adding a 

1087 # path to an absolute URL where there was none before. 

1088 if (netloc := self._netloc) and parsed and parsed[-1] != "": 

1089 parsed.append("") 

1090 

1091 parsed.reverse() 

1092 if not netloc or not needs_normalize: 

1093 return from_parts(self._scheme, netloc, "/".join(parsed), "", "") 

1094 

1095 path = "/".join(normalize_path_segments(parsed)) 

1096 # If normalizing the path segments removed the leading slash, add it back. 

1097 if path and path[0] != "/": 

1098 path = f"/{path}" 

1099 return from_parts(self._scheme, netloc, path, "", "") 

1100 

1101 def with_scheme(self, scheme: str) -> "URL": 

1102 """Return a new URL with scheme replaced.""" 

1103 # N.B. doesn't cleanup query/fragment 

1104 if not isinstance(scheme, str): 

1105 raise TypeError("Invalid scheme type") 

1106 lower_scheme = scheme.lower() 

1107 netloc = self._netloc 

1108 if not netloc and lower_scheme in SCHEME_REQUIRES_HOST: 

1109 msg = ( 

1110 "scheme replacement is not allowed for " 

1111 f"relative URLs for the {lower_scheme} scheme" 

1112 ) 

1113 raise ValueError(msg) 

1114 return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment) 

1115 

1116 def with_user(self, user: str | None) -> "URL": 

1117 """Return a new URL with user replaced. 

1118 

1119 Autoencode user if needed. 

1120 

1121 Clear user/password if user is None. 

1122 

1123 """ 

1124 # N.B. doesn't cleanup query/fragment 

1125 if user is None: 

1126 password = None 

1127 elif isinstance(user, str): 

1128 user = QUOTER(user) 

1129 password = self.raw_password 

1130 else: 

1131 raise TypeError("Invalid user type") 

1132 if not (netloc := self._netloc): 

1133 raise ValueError("user replacement is not allowed for relative URLs") 

1134 encoded_host = self.host_subcomponent or "" 

1135 netloc = make_netloc(user, password, encoded_host, self.explicit_port) 

1136 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) 

1137 

1138 def with_password(self, password: str | None) -> "URL": 

1139 """Return a new URL with password replaced. 

1140 

1141 Autoencode password if needed. 

1142 

1143 Clear password if argument is None. 

1144 

1145 """ 

1146 # N.B. doesn't cleanup query/fragment 

1147 if password is None: 

1148 pass 

1149 elif isinstance(password, str): 

1150 password = QUOTER(password) 

1151 else: 

1152 raise TypeError("Invalid password type") 

1153 if not (netloc := self._netloc): 

1154 raise ValueError("password replacement is not allowed for relative URLs") 

1155 encoded_host = self.host_subcomponent or "" 

1156 port = self.explicit_port 

1157 netloc = make_netloc(self.raw_user, password, encoded_host, port) 

1158 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) 

1159 

1160 def with_host(self, host: str) -> "URL": 

1161 """Return a new URL with host replaced. 

1162 

1163 Autoencode host if needed. 

1164 

1165 Changing host for relative URLs is not allowed, use .join() 

1166 instead. 

1167 

1168 """ 

1169 # N.B. doesn't cleanup query/fragment 

1170 if not isinstance(host, str): 

1171 raise TypeError("Invalid host type") 

1172 if not (netloc := self._netloc): 

1173 raise ValueError("host replacement is not allowed for relative URLs") 

1174 if not host: 

1175 raise ValueError("host removing is not allowed") 

1176 encoded_host = _encode_host(host, validate_host=True) if host else "" 

1177 port = self.explicit_port 

1178 netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) 

1179 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) 

1180 

1181 def with_port(self, port: int | None) -> "URL": 

1182 """Return a new URL with port replaced. 

1183 

1184 Clear port to default if None is passed. 

1185 

1186 """ 

1187 # N.B. doesn't cleanup query/fragment 

1188 if port is not None: 

1189 if isinstance(port, bool) or not isinstance(port, int): 

1190 raise TypeError(f"port should be int or None, got {type(port)}") 

1191 if not (0 <= port <= 65535): 

1192 raise ValueError(f"port must be between 0 and 65535, got {port}") 

1193 if not (netloc := self._netloc): 

1194 raise ValueError("port replacement is not allowed for relative URLs") 

1195 encoded_host = self.host_subcomponent or "" 

1196 netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) 

1197 return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) 

1198 

1199 def with_path( 

1200 self, 

1201 path: str, 

1202 *, 

1203 encoded: bool = False, 

1204 keep_query: bool = False, 

1205 keep_fragment: bool = False, 

1206 ) -> "URL": 

1207 """Return a new URL with path replaced.""" 

1208 netloc = self._netloc 

1209 if not encoded: 

1210 path = PATH_QUOTER(path) 

1211 if netloc: 

1212 path = normalize_path(path) if "." in path else path 

1213 if path and path[0] != "/": 

1214 path = f"/{path}" 

1215 query = self._query if keep_query else "" 

1216 fragment = self._fragment if keep_fragment else "" 

1217 return from_parts(self._scheme, netloc, path, query, fragment) 

1218 

1219 @overload 

1220 def with_query(self, query: Query) -> "URL": ... 

1221 

1222 @overload 

1223 def with_query(self, **kwargs: QueryVariable) -> "URL": ... 

1224 

1225 def with_query(self, *args: Any, **kwargs: Any) -> "URL": 

1226 """Return a new URL with query part replaced. 

1227 

1228 Accepts any Mapping (e.g. dict, multidict.MultiDict instances) 

1229 or str, autoencode the argument if needed. 

1230 

1231 A sequence of (key, value) pairs is supported as well. 

1232 

1233 It also can take an arbitrary number of keyword arguments. 

1234 

1235 Clear query if None is passed. 

1236 

1237 """ 

1238 # N.B. doesn't cleanup query/fragment 

1239 query = get_str_query(*args, **kwargs) or "" 

1240 return from_parts_uncached( 

1241 self._scheme, self._netloc, self._path, query, self._fragment 

1242 ) 

1243 

1244 @overload 

1245 def extend_query(self, query: Query) -> "URL": ... 

1246 

1247 @overload 

1248 def extend_query(self, **kwargs: QueryVariable) -> "URL": ... 

1249 

1250 def extend_query(self, *args: Any, **kwargs: Any) -> "URL": 

1251 """Return a new URL with query part combined with the existing. 

1252 

1253 This method will not remove existing query parameters. 

1254 

1255 Example: 

1256 >>> url = URL('http://example.com/?a=1&b=2') 

1257 >>> url.extend_query(a=3, c=4) 

1258 URL('http://example.com/?a=1&b=2&a=3&c=4') 

1259 """ 

1260 if not (new_query := get_str_query(*args, **kwargs)): 

1261 return self 

1262 if query := self._query: 

1263 # both strings are already encoded so we can use a simple 

1264 # string join 

1265 query += new_query if query[-1] == "&" else f"&{new_query}" 

1266 else: 

1267 query = new_query 

1268 return from_parts_uncached( 

1269 self._scheme, self._netloc, self._path, query, self._fragment 

1270 ) 

1271 

1272 @overload 

1273 def update_query(self, query: Query) -> "URL": ... 

1274 

1275 @overload 

1276 def update_query(self, **kwargs: QueryVariable) -> "URL": ... 

1277 

1278 def update_query(self, *args: Any, **kwargs: Any) -> "URL": 

1279 """Return a new URL with query part updated. 

1280 

1281 This method will overwrite existing query parameters. 

1282 

1283 Example: 

1284 >>> url = URL('http://example.com/?a=1&b=2') 

1285 >>> url.update_query(a=3, c=4) 

1286 URL('http://example.com/?a=3&b=2&c=4') 

1287 """ 

1288 in_query: ( 

1289 str 

1290 | Mapping[str, QueryVariable] 

1291 | Sequence[tuple[str | istr, SimpleQuery]] 

1292 | None 

1293 ) 

1294 if kwargs: 

1295 if args: 

1296 msg = "Either kwargs or single query parameter must be present" 

1297 raise ValueError(msg) 

1298 in_query = kwargs 

1299 elif len(args) == 1: 

1300 in_query = args[0] 

1301 else: 

1302 raise ValueError("Either kwargs or single query parameter must be present") 

1303 

1304 if in_query is None: 

1305 query = "" 

1306 elif not in_query: 

1307 query = self._query 

1308 elif isinstance(in_query, Mapping): 

1309 qm: MultiDict[QueryVariable] = MultiDict(self._parsed_query) 

1310 qm.update(in_query) 

1311 query = get_str_query_from_sequence_iterable(qm.items()) 

1312 elif isinstance(in_query, str): 

1313 qstr: MultiDict[str] = MultiDict(self._parsed_query) 

1314 qstr.update(query_to_pairs(in_query)) 

1315 query = get_str_query_from_iterable(qstr.items()) 

1316 elif isinstance(in_query, (bytes, bytearray, memoryview)): 

1317 msg = "Invalid query type: bytes, bytearray and memoryview are forbidden" 

1318 raise TypeError(msg) 

1319 elif isinstance(in_query, Sequence): 

1320 # We don't expect sequence values if we're given a list of pairs 

1321 # already; only mappings like builtin `dict` which can't have the 

1322 # same key pointing to multiple values are allowed to use 

1323 # `_query_seq_pairs`. 

1324 if TYPE_CHECKING: 

1325 in_query = cast( 

1326 Sequence[tuple[Union[str, istr], SimpleQuery]], in_query 

1327 ) 

1328 qs: MultiDict[SimpleQuery] = MultiDict(self._parsed_query) 

1329 qs.update(in_query) 

1330 query = get_str_query_from_iterable(qs.items()) 

1331 else: 

1332 raise TypeError( 

1333 "Invalid query type: only str, mapping or " 

1334 "sequence of (key, value) pairs is allowed" 

1335 ) 

1336 return from_parts_uncached( 

1337 self._scheme, self._netloc, self._path, query, self._fragment 

1338 ) 

1339 

1340 def without_query_params(self, *query_params: str) -> "URL": 

1341 """Remove some keys from query part and return new URL.""" 

1342 params_to_remove = set(query_params) & self.query.keys() 

1343 if not params_to_remove: 

1344 return self 

1345 return self.with_query( 

1346 tuple( 

1347 (name, value) 

1348 for name, value in self.query.items() 

1349 if name not in params_to_remove 

1350 ) 

1351 ) 

1352 

1353 def with_fragment(self, fragment: str | None) -> "URL": 

1354 """Return a new URL with fragment replaced. 

1355 

1356 Autoencode fragment if needed. 

1357 

1358 Clear fragment to default if None is passed. 

1359 

1360 """ 

1361 # N.B. doesn't cleanup query/fragment 

1362 if fragment is None: 

1363 raw_fragment = "" 

1364 elif not isinstance(fragment, str): 

1365 raise TypeError("Invalid fragment type") 

1366 else: 

1367 raw_fragment = FRAGMENT_QUOTER(fragment) 

1368 if self._fragment == raw_fragment: 

1369 return self 

1370 return from_parts( 

1371 self._scheme, self._netloc, self._path, self._query, raw_fragment 

1372 ) 

1373 

1374 def with_name( 

1375 self, 

1376 name: str, 

1377 *, 

1378 keep_query: bool = False, 

1379 keep_fragment: bool = False, 

1380 ) -> "URL": 

1381 """Return a new URL with name (last part of path) replaced. 

1382 

1383 Query and fragment parts are cleaned up. 

1384 

1385 Name is encoded if needed. 

1386 

1387 """ 

1388 # N.B. DOES cleanup query/fragment 

1389 if not isinstance(name, str): 

1390 raise TypeError("Invalid name type") 

1391 if "/" in name: 

1392 raise ValueError("Slash in name is not allowed") 

1393 name = PATH_QUOTER(name) 

1394 if name in (".", ".."): 

1395 raise ValueError(". and .. values are forbidden") 

1396 parts = list(self.raw_parts) 

1397 if netloc := self._netloc: 

1398 if len(parts) == 1: 

1399 parts.append(name) 

1400 else: 

1401 parts[-1] = name 

1402 parts[0] = "" # replace leading '/' 

1403 else: 

1404 parts[-1] = name 

1405 if parts[0] == "/": 

1406 parts[0] = "" # replace leading '/' 

1407 

1408 query = self._query if keep_query else "" 

1409 fragment = self._fragment if keep_fragment else "" 

1410 return from_parts(self._scheme, netloc, "/".join(parts), query, fragment) 

1411 

1412 def with_suffix( 

1413 self, 

1414 suffix: str, 

1415 *, 

1416 keep_query: bool = False, 

1417 keep_fragment: bool = False, 

1418 ) -> "URL": 

1419 """Return a new URL with suffix (file extension of name) replaced. 

1420 

1421 Query and fragment parts are cleaned up. 

1422 

1423 suffix is encoded if needed. 

1424 """ 

1425 if not isinstance(suffix, str): 

1426 raise TypeError("Invalid suffix type") 

1427 if suffix and not suffix[0] == "." or suffix == "." or "/" in suffix: 

1428 raise ValueError(f"Invalid suffix {suffix!r}") 

1429 name = self.raw_name 

1430 if not name: 

1431 raise ValueError(f"{self!r} has an empty name") 

1432 old_suffix = self.raw_suffix 

1433 suffix = PATH_QUOTER(suffix) 

1434 name = name + suffix if not old_suffix else name[: -len(old_suffix)] + suffix 

1435 if name in (".", ".."): 

1436 raise ValueError(". and .. values are forbidden") 

1437 parts = list(self.raw_parts) 

1438 if netloc := self._netloc: 

1439 if len(parts) == 1: 

1440 parts.append(name) 

1441 else: 

1442 parts[-1] = name 

1443 parts[0] = "" # replace leading '/' 

1444 else: 

1445 parts[-1] = name 

1446 if parts[0] == "/": 

1447 parts[0] = "" # replace leading '/' 

1448 

1449 query = self._query if keep_query else "" 

1450 fragment = self._fragment if keep_fragment else "" 

1451 return from_parts(self._scheme, netloc, "/".join(parts), query, fragment) 

1452 

1453 def join(self, url: "URL") -> "URL": 

1454 """Join URLs 

1455 

1456 Construct a full (“absolute”) URL by combining a “base URL” 

1457 (self) with another URL (url). 

1458 

1459 Informally, this uses components of the base URL, in 

1460 particular the addressing scheme, the network location and 

1461 (part of) the path, to provide missing components in the 

1462 relative URL. 

1463 

1464 """ 

1465 if type(url) is not URL: 

1466 raise TypeError("url should be URL") 

1467 

1468 scheme = url._scheme or self._scheme 

1469 if scheme != self._scheme or scheme not in USES_RELATIVE: 

1470 return url 

1471 

1472 # scheme is in uses_authority as uses_authority is a superset of uses_relative 

1473 if (join_netloc := url._netloc) and scheme in USES_AUTHORITY: 

1474 return from_parts(scheme, join_netloc, url._path, url._query, url._fragment) 

1475 

1476 orig_path = self._path 

1477 if join_path := url._path: 

1478 if join_path[0] == "/": 

1479 path = join_path 

1480 elif not orig_path: 

1481 path = f"/{join_path}" 

1482 elif orig_path[-1] == "/": 

1483 path = f"{orig_path}{join_path}" 

1484 else: 

1485 # … 

1486 # and relativizing ".." 

1487 # parts[0] is / for absolute urls, 

1488 # this join will add a double slash there 

1489 path = "/".join([*self.parts[:-1], ""]) + join_path 

1490 # which has to be removed 

1491 if orig_path[0] == "/": 

1492 path = path[1:] 

1493 path = normalize_path(path) if "." in path else path 

1494 else: 

1495 path = orig_path 

1496 

1497 return from_parts( 

1498 scheme, 

1499 self._netloc, 

1500 path, 

1501 url._query if join_path or url._query else self._query, 

1502 url._fragment if join_path or url._fragment else self._fragment, 

1503 ) 

1504 

1505 def joinpath(self, *other: str, encoded: bool = False) -> "URL": 

1506 """Return a new URL with the elements in other appended to the path.""" 

1507 return self._make_child(other, encoded=encoded) 

1508 

1509 def human_repr(self) -> str: 

1510 """Return decoded human readable string for URL representation.""" 

1511 user = human_quote(self.user, "#/:?@[]\\") 

1512 password = human_quote(self.password, "#/:?@[]\\") 

1513 if (host := self.host) and ":" in host: 

1514 host = f"[{host}]" 

1515 path = human_quote(self.path, "#?") 

1516 if TYPE_CHECKING: 

1517 assert path is not None 

1518 if not self._scheme and not self._netloc: 

1519 path = _encode_relative_scheme_colon(path) 

1520 query_string = "&".join( 

1521 "{}={}".format(human_quote(k, "#&+;="), human_quote(v, "#&+;=")) 

1522 for k, v in self.query.items() 

1523 ) 

1524 fragment = human_quote(self.fragment, "") 

1525 if TYPE_CHECKING: 

1526 assert fragment is not None 

1527 netloc = make_netloc(user, password, host, self.explicit_port) 

1528 return unsplit_result(self._scheme, netloc, path, query_string, fragment) 

1529 

1530 if HAS_PYDANTIC: 

1531 # Borrowed from https://docs.pydantic.dev/latest/concepts/types/#handling-third-party-types 

1532 @classmethod 

1533 def __get_pydantic_json_schema__( 

1534 cls, 

1535 core_schema: "CoreSchema", 

1536 handler: "GetJsonSchemaHandler", 

1537 ) -> "JsonSchemaValue": 

1538 field_schema: dict[str, Any] = {} 

1539 field_schema.update(type="string", format="uri") 

1540 return field_schema 

1541 

1542 @classmethod 

1543 def __get_pydantic_core_schema__( 

1544 cls, 

1545 source_type: type[Self] | type[str], 

1546 handler: "GetCoreSchemaHandler", 

1547 ) -> "CoreSchema": 

1548 # Lazy import: pulling in pydantic_core at module load time 

1549 # increases yarl's import cost 3-7x for users who don't use 

1550 # pydantic. Keep this import function-scoped. 

1551 from pydantic_core import core_schema # noqa: PLC0415 

1552 

1553 from_str_schema = core_schema.chain_schema( 

1554 [ 

1555 core_schema.str_schema(), 

1556 core_schema.no_info_plain_validator_function(URL), 

1557 ] 

1558 ) 

1559 

1560 return core_schema.json_or_python_schema( 

1561 json_schema=from_str_schema, 

1562 python_schema=core_schema.union_schema( 

1563 [ 

1564 # check if it's an instance first before doing any further work 

1565 core_schema.is_instance_schema(URL), 

1566 from_str_schema, 

1567 ] 

1568 ), 

1569 serialization=core_schema.plain_serializer_function_ser_schema(str), 

1570 ) 

1571 

1572 

1573_DEFAULT_IDNA_SIZE = 256 

1574_DEFAULT_ENCODE_SIZE = 512 

1575 

1576 

1577@lru_cache(_DEFAULT_IDNA_SIZE) 

1578def _idna_decode(raw: str) -> str: 

1579 try: 

1580 return idna.decode(raw.encode("ascii")) 

1581 except UnicodeError: # e.g. '::1' 

1582 return raw.encode("ascii").decode("idna") 

1583 

1584 

1585@lru_cache(_DEFAULT_IDNA_SIZE) 

1586def _idna_encode(host: str) -> str: 

1587 try: 

1588 return idna.encode(host, uts46=True).decode("ascii") 

1589 except UnicodeError: 

1590 return host.encode("idna").decode("ascii") 

1591 

1592 

1593@lru_cache(_DEFAULT_ENCODE_SIZE) 

1594def _encode_host(host: str, validate_host: bool) -> str: 

1595 """Encode host part of URL.""" 

1596 # If the host ends with a digit or contains a colon, its likely 

1597 # an IP address. 

1598 if host and (host[-1].isdigit() or ":" in host): 

1599 # RFC 6874 spells the IPv6 zone separator as the percent-encoded 

1600 # ``%25``; bare ``%`` is still accepted so that hosts constructed 

1601 # programmatically (e.g. ``with_host("fe80::1%1")``) keep working. 

1602 part = "%25" if "%25" in host else "%" 

1603 raw_ip, sep, zone = host.partition(part) 

1604 # If it looks like an IP, we check with _ip_compressed_version 

1605 # and fall-through if its not an IP address. This is a performance 

1606 # optimization to avoid parsing IP addresses as much as possible 

1607 # because it is orders of magnitude slower than almost any other 

1608 # operation this library does. 

1609 # Might be an IP address, check it 

1610 # 

1611 # IP Addresses can look like: 

1612 # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 

1613 # - 127.0.0.1 (last character is a digit) 

1614 # - 2001:db8::ff00:42:8329 (contains a colon) 

1615 # - 2001:db8::ff00:42:8329%eth0 (contains a colon) 

1616 # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should 

1617 # have been removed before it gets here) 

1618 # Rare IP Address formats are not supported per: 

1619 # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4 

1620 # 

1621 # IP parsing is slow, so its wrapped in an LRU 

1622 try: 

1623 ip = ip_address(raw_ip) 

1624 except ValueError: 

1625 pass 

1626 else: 

1627 if sep and validate_host and (not zone or _ZONE_ID_UNSAFE_RE.search(zone)): 

1628 raise ValueError("Invalid characters in zone identifier") 

1629 # These checks should not happen in the 

1630 # LRU to keep the cache size small 

1631 host = ip.compressed 

1632 if ip.version == 6: 

1633 return f"[{host}{sep}{zone}]" if sep else f"[{host}]" 

1634 return f"{host}{sep}{zone}" if sep else host 

1635 

1636 # IDNA encoding is slow, skip it for ASCII-only strings 

1637 if host.isascii(): 

1638 # Check for invalid characters explicitly; _idna_encode() does this 

1639 # for non-ascii host names. 

1640 host = host.lower() 

1641 if validate_host and (invalid := NOT_REG_NAME.search(host)): 

1642 value, pos, extra = invalid.group(), invalid.start(), "" 

1643 if value == "@" or (value == ":" and "@" in host[pos:]): 

1644 # this looks like an authority string 

1645 extra = ( 

1646 ", if the value includes a username or password, " 

1647 "use 'authority' instead of 'host'" 

1648 ) 

1649 raise ValueError( 

1650 f"Host {host!r} cannot contain {value!r} (at position {pos}){extra}" 

1651 ) from None 

1652 return host 

1653 

1654 return _idna_encode(host) 

1655 

1656 

1657@rewrite_module 

1658def cache_clear() -> None: 

1659 """Clear all LRU caches.""" 

1660 _idna_encode.cache_clear() 

1661 _idna_decode.cache_clear() 

1662 _encode_host.cache_clear() 

1663 

1664 

1665@rewrite_module 

1666def cache_info() -> CacheInfo: 

1667 """Report cache statistics.""" 

1668 return { 

1669 "idna_encode": _idna_encode.cache_info(), 

1670 "idna_decode": _idna_decode.cache_info(), 

1671 "ip_address": _encode_host.cache_info(), 

1672 "host_validate": _encode_host.cache_info(), 

1673 "encode_host": _encode_host.cache_info(), 

1674 } 

1675 

1676 

1677@rewrite_module 

1678def cache_configure( 

1679 *, 

1680 idna_encode_size: int | None = _DEFAULT_IDNA_SIZE, 

1681 idna_decode_size: int | None = _DEFAULT_IDNA_SIZE, 

1682 ip_address_size: int | None | UndefinedType = UNDEFINED, 

1683 host_validate_size: int | None | UndefinedType = UNDEFINED, 

1684 encode_host_size: int | None | UndefinedType = UNDEFINED, 

1685) -> None: 

1686 """Configure LRU cache sizes.""" 

1687 global _idna_decode, _idna_encode, _encode_host 

1688 # ip_address_size, host_validate_size are no longer 

1689 # used, but are kept for backwards compatibility. 

1690 if ip_address_size is not UNDEFINED or host_validate_size is not UNDEFINED: 

1691 warnings.warn( 

1692 "cache_configure() no longer accepts the " 

1693 "ip_address_size or host_validate_size arguments, " 

1694 "they are used to set the encode_host_size instead " 

1695 "and will be removed in the future", 

1696 DeprecationWarning, 

1697 stacklevel=2, 

1698 ) 

1699 

1700 if encode_host_size is not None: 

1701 for size in (ip_address_size, host_validate_size): 

1702 if size is None: 

1703 encode_host_size = None 

1704 elif encode_host_size is UNDEFINED: 

1705 if size is not UNDEFINED: 

1706 encode_host_size = size 

1707 elif size is not UNDEFINED: 

1708 if TYPE_CHECKING: 

1709 assert isinstance(size, int) 

1710 assert isinstance(encode_host_size, int) 

1711 encode_host_size = max(size, encode_host_size) 

1712 if encode_host_size is UNDEFINED: 

1713 encode_host_size = _DEFAULT_ENCODE_SIZE 

1714 

1715 _encode_host = lru_cache(encode_host_size)(_encode_host.__wrapped__) 

1716 _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__) 

1717 _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__)