Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 72%

532 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-09 06:08 +0000

1"""Functions for working with URLs. 

2 

3Contains implementations of functions from :mod:`urllib.parse` that 

4handle bytes and strings. 

5""" 

6from __future__ import annotations 

7 

8import codecs 

9import os 

10import re 

11import typing as t 

12import warnings 

13from urllib.parse import quote 

14from urllib.parse import unquote 

15from urllib.parse import urlencode 

16from urllib.parse import urlsplit 

17from urllib.parse import urlunsplit 

18 

19from ._internal import _check_str_tuple 

20from ._internal import _decode_idna 

21from ._internal import _make_encode_wrapper 

22from ._internal import _to_str 

23from .datastructures import iter_multi_items 

24 

25if t.TYPE_CHECKING: 

26 from . import datastructures as ds 

27 

28# A regular expression for what a valid schema looks like 

29_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$") 

30 

31# Characters that are safe in any part of an URL. 

32_always_safe_chars = ( 

33 "abcdefghijklmnopqrstuvwxyz" 

34 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 

35 "0123456789" 

36 "-._~" 

37 "$!'()*+,;" # RFC3986 sub-delims set, not including query string delimiters &= 

38) 

39_always_safe = frozenset(_always_safe_chars.encode("ascii")) 

40 

41_hexdigits = "0123456789ABCDEFabcdef" 

42_hextobyte = { 

43 f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16) 

44 for a in _hexdigits 

45 for b in _hexdigits 

46} 

47_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)] 

48 

49 

50class _URLTuple(t.NamedTuple): 

51 scheme: str 

52 netloc: str 

53 path: str 

54 query: str 

55 fragment: str 

56 

57 

58class BaseURL(_URLTuple): 

59 """Superclass of :py:class:`URL` and :py:class:`BytesURL`. 

60 

61 .. deprecated:: 2.3 

62 Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead. 

63 """ 

64 

65 __slots__ = () 

66 _at: str 

67 _colon: str 

68 _lbracket: str 

69 _rbracket: str 

70 

71 def __new__(cls, *args: t.Any, **kwargs: t.Any) -> BaseURL: 

72 warnings.warn( 

73 f"'werkzeug.urls.{cls.__name__}' is deprecated and will be removed in" 

74 " Werkzeug 3.0. Use the 'urllib.parse' library instead.", 

75 DeprecationWarning, 

76 stacklevel=2, 

77 ) 

78 return super().__new__(cls, *args, **kwargs) 

79 

80 def __str__(self) -> str: 

81 return self.to_url() 

82 

83 def replace(self, **kwargs: t.Any) -> BaseURL: 

84 """Return an URL with the same values, except for those parameters 

85 given new values by whichever keyword arguments are specified.""" 

86 return self._replace(**kwargs) 

87 

88 @property 

89 def host(self) -> str | None: 

90 """The host part of the URL if available, otherwise `None`. The 

91 host is either the hostname or the IP address mentioned in the 

92 URL. It will not contain the port. 

93 """ 

94 return self._split_host()[0] 

95 

96 @property 

97 def ascii_host(self) -> str | None: 

98 """Works exactly like :attr:`host` but will return a result that 

99 is restricted to ASCII. If it finds a netloc that is not ASCII 

100 it will attempt to idna decode it. This is useful for socket 

101 operations when the URL might include internationalized characters. 

102 """ 

103 rv = self.host 

104 if rv is not None and isinstance(rv, str): 

105 try: 

106 rv = rv.encode("idna").decode("ascii") 

107 except UnicodeError: 

108 pass 

109 return rv 

110 

111 @property 

112 def port(self) -> int | None: 

113 """The port in the URL as an integer if it was present, `None` 

114 otherwise. This does not fill in default ports. 

115 """ 

116 try: 

117 rv = int(_to_str(self._split_host()[1])) 

118 if 0 <= rv <= 65535: 

119 return rv 

120 except (ValueError, TypeError): 

121 pass 

122 return None 

123 

124 @property 

125 def auth(self) -> str | None: 

126 """The authentication part in the URL if available, `None` 

127 otherwise. 

128 """ 

129 return self._split_netloc()[0] 

130 

131 @property 

132 def username(self) -> str | None: 

133 """The username if it was part of the URL, `None` otherwise. 

134 This undergoes URL decoding and will always be a string. 

135 """ 

136 rv = self._split_auth()[0] 

137 if rv is not None: 

138 return _url_unquote_legacy(rv) 

139 return None 

140 

141 @property 

142 def raw_username(self) -> str | None: 

143 """The username if it was part of the URL, `None` otherwise. 

144 Unlike :attr:`username` this one is not being decoded. 

145 """ 

146 return self._split_auth()[0] 

147 

148 @property 

149 def password(self) -> str | None: 

150 """The password if it was part of the URL, `None` otherwise. 

151 This undergoes URL decoding and will always be a string. 

152 """ 

153 rv = self._split_auth()[1] 

154 if rv is not None: 

155 return _url_unquote_legacy(rv) 

156 return None 

157 

158 @property 

159 def raw_password(self) -> str | None: 

160 """The password if it was part of the URL, `None` otherwise. 

161 Unlike :attr:`password` this one is not being decoded. 

162 """ 

163 return self._split_auth()[1] 

164 

165 def decode_query(self, *args: t.Any, **kwargs: t.Any) -> ds.MultiDict[str, str]: 

166 """Decodes the query part of the URL. Ths is a shortcut for 

167 calling :func:`url_decode` on the query argument. The arguments and 

168 keyword arguments are forwarded to :func:`url_decode` unchanged. 

169 """ 

170 return url_decode(self.query, *args, **kwargs) 

171 

172 def join(self, *args: t.Any, **kwargs: t.Any) -> BaseURL: 

173 """Joins this URL with another one. This is just a convenience 

174 function for calling into :meth:`url_join` and then parsing the 

175 return value again. 

176 """ 

177 return url_parse(url_join(self, *args, **kwargs)) 

178 

179 def to_url(self) -> str: 

180 """Returns a URL string or bytes depending on the type of the 

181 information stored. This is just a convenience function 

182 for calling :meth:`url_unparse` for this URL. 

183 """ 

184 return url_unparse(self) 

185 

186 def encode_netloc(self) -> str: 

187 """Encodes the netloc part to an ASCII safe URL as bytes.""" 

188 rv = self.ascii_host or "" 

189 if ":" in rv: 

190 rv = f"[{rv}]" 

191 port = self.port 

192 if port is not None: 

193 rv = f"{rv}:{port}" 

194 auth = ":".join( 

195 filter( 

196 None, 

197 [ 

198 url_quote(self.raw_username or "", "utf-8", "strict", "/:%"), 

199 url_quote(self.raw_password or "", "utf-8", "strict", "/:%"), 

200 ], 

201 ) 

202 ) 

203 if auth: 

204 rv = f"{auth}@{rv}" 

205 return rv 

206 

207 def decode_netloc(self) -> str: 

208 """Decodes the netloc part into a string.""" 

209 host = self.host or "" 

210 

211 if isinstance(host, bytes): 

212 host = host.decode() 

213 

214 rv = _decode_idna(host) 

215 

216 if ":" in rv: 

217 rv = f"[{rv}]" 

218 port = self.port 

219 if port is not None: 

220 rv = f"{rv}:{port}" 

221 auth = ":".join( 

222 filter( 

223 None, 

224 [ 

225 _url_unquote_legacy(self.raw_username or "", "/:%@"), 

226 _url_unquote_legacy(self.raw_password or "", "/:%@"), 

227 ], 

228 ) 

229 ) 

230 if auth: 

231 rv = f"{auth}@{rv}" 

232 return rv 

233 

234 def to_uri_tuple(self) -> BaseURL: 

235 """Returns a :class:`BytesURL` tuple that holds a URI. This will 

236 encode all the information in the URL properly to ASCII using the 

237 rules a web browser would follow. 

238 

239 It's usually more interesting to directly call :meth:`iri_to_uri` which 

240 will return a string. 

241 """ 

242 return url_parse(iri_to_uri(self)) 

243 

244 def to_iri_tuple(self) -> BaseURL: 

245 """Returns a :class:`URL` tuple that holds a IRI. This will try 

246 to decode as much information as possible in the URL without 

247 losing information similar to how a web browser does it for the 

248 URL bar. 

249 

250 It's usually more interesting to directly call :meth:`uri_to_iri` which 

251 will return a string. 

252 """ 

253 return url_parse(uri_to_iri(self)) 

254 

255 def get_file_location( 

256 self, pathformat: str | None = None 

257 ) -> tuple[str | None, str | None]: 

258 """Returns a tuple with the location of the file in the form 

259 ``(server, location)``. If the netloc is empty in the URL or 

260 points to localhost, it's represented as ``None``. 

261 

262 The `pathformat` by default is autodetection but needs to be set 

263 when working with URLs of a specific system. The supported values 

264 are ``'windows'`` when working with Windows or DOS paths and 

265 ``'posix'`` when working with posix paths. 

266 

267 If the URL does not point to a local file, the server and location 

268 are both represented as ``None``. 

269 

270 :param pathformat: The expected format of the path component. 

271 Currently ``'windows'`` and ``'posix'`` are 

272 supported. Defaults to ``None`` which is 

273 autodetect. 

274 """ 

275 if self.scheme != "file": 

276 return None, None 

277 

278 path = url_unquote(self.path) 

279 host = self.netloc or None 

280 

281 if pathformat is None: 

282 if os.name == "nt": 

283 pathformat = "windows" 

284 else: 

285 pathformat = "posix" 

286 

287 if pathformat == "windows": 

288 if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:": 

289 path = f"{path[1:2]}:{path[3:]}" 

290 windows_share = path[:3] in ("\\" * 3, "/" * 3) 

291 import ntpath 

292 

293 path = ntpath.normpath(path) 

294 # Windows shared drives are represented as ``\\host\\directory``. 

295 # That results in a URL like ``file://///host/directory``, and a 

296 # path like ``///host/directory``. We need to special-case this 

297 # because the path contains the hostname. 

298 if windows_share and host is None: 

299 parts = path.lstrip("\\").split("\\", 1) 

300 if len(parts) == 2: 

301 host, path = parts 

302 else: 

303 host = parts[0] 

304 path = "" 

305 elif pathformat == "posix": 

306 import posixpath 

307 

308 path = posixpath.normpath(path) 

309 else: 

310 raise TypeError(f"Invalid path format {pathformat!r}") 

311 

312 if host in ("127.0.0.1", "::1", "localhost"): 

313 host = None 

314 

315 return host, path 

316 

317 def _split_netloc(self) -> tuple[str | None, str]: 

318 if self._at in self.netloc: 

319 auth, _, netloc = self.netloc.partition(self._at) 

320 return auth, netloc 

321 return None, self.netloc 

322 

323 def _split_auth(self) -> tuple[str | None, str | None]: 

324 auth = self._split_netloc()[0] 

325 if not auth: 

326 return None, None 

327 if self._colon not in auth: 

328 return auth, None 

329 

330 username, _, password = auth.partition(self._colon) 

331 return username, password 

332 

333 def _split_host(self) -> tuple[str | None, str | None]: 

334 rv = self._split_netloc()[1] 

335 if not rv: 

336 return None, None 

337 

338 if not rv.startswith(self._lbracket): 

339 if self._colon in rv: 

340 host, _, port = rv.partition(self._colon) 

341 return host, port 

342 return rv, None 

343 

344 idx = rv.find(self._rbracket) 

345 if idx < 0: 

346 return rv, None 

347 

348 host = rv[1:idx] 

349 rest = rv[idx + 1 :] 

350 if rest.startswith(self._colon): 

351 return host, rest[1:] 

352 return host, None 

353 

354 

355class URL(BaseURL): 

356 """Represents a parsed URL. This behaves like a regular tuple but 

357 also has some extra attributes that give further insight into the 

358 URL. 

359 

360 .. deprecated:: 2.3 

361 Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead. 

362 """ 

363 

364 __slots__ = () 

365 _at = "@" 

366 _colon = ":" 

367 _lbracket = "[" 

368 _rbracket = "]" 

369 

370 def encode(self, charset: str = "utf-8", errors: str = "replace") -> BytesURL: 

371 """Encodes the URL to a tuple made out of bytes. The charset is 

372 only being used for the path, query and fragment. 

373 """ 

374 return BytesURL( 

375 self.scheme.encode("ascii"), 

376 self.encode_netloc(), 

377 self.path.encode(charset, errors), 

378 self.query.encode(charset, errors), 

379 self.fragment.encode(charset, errors), 

380 ) 

381 

382 

383class BytesURL(BaseURL): 

384 """Represents a parsed URL in bytes. 

385 

386 .. deprecated:: 2.3 

387 Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead. 

388 """ 

389 

390 __slots__ = () 

391 _at = b"@" # type: ignore 

392 _colon = b":" # type: ignore 

393 _lbracket = b"[" # type: ignore 

394 _rbracket = b"]" # type: ignore 

395 

396 def __str__(self) -> str: 

397 return self.to_url().decode("utf-8", "replace") # type: ignore 

398 

399 def encode_netloc(self) -> bytes: # type: ignore 

400 """Returns the netloc unchanged as bytes.""" 

401 return self.netloc # type: ignore 

402 

403 def decode(self, charset: str = "utf-8", errors: str = "replace") -> URL: 

404 """Decodes the URL to a tuple made out of strings. The charset is 

405 only being used for the path, query and fragment. 

406 """ 

407 return URL( 

408 self.scheme.decode("ascii"), # type: ignore 

409 self.decode_netloc(), 

410 self.path.decode(charset, errors), # type: ignore 

411 self.query.decode(charset, errors), # type: ignore 

412 self.fragment.decode(charset, errors), # type: ignore 

413 ) 

414 

415 

416_unquote_maps: dict[frozenset[int], dict[bytes, int]] = {frozenset(): _hextobyte} 

417 

418 

419def _unquote_to_bytes(string: str | bytes, unsafe: str | bytes = "") -> bytes: 

420 if isinstance(string, str): 

421 string = string.encode("utf-8") 

422 

423 if isinstance(unsafe, str): 

424 unsafe = unsafe.encode("utf-8") 

425 

426 unsafe = frozenset(bytearray(unsafe)) 

427 groups = iter(string.split(b"%")) 

428 result = bytearray(next(groups, b"")) 

429 

430 try: 

431 hex_to_byte = _unquote_maps[unsafe] 

432 except KeyError: 

433 hex_to_byte = _unquote_maps[unsafe] = { 

434 h: b for h, b in _hextobyte.items() if b not in unsafe 

435 } 

436 

437 for group in groups: 

438 code = group[:2] 

439 

440 if code in hex_to_byte: 

441 result.append(hex_to_byte[code]) 

442 result.extend(group[2:]) 

443 else: 

444 result.append(37) # % 

445 result.extend(group) 

446 

447 return bytes(result) 

448 

449 

450def _url_encode_impl( 

451 obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]], 

452 charset: str, 

453 sort: bool, 

454 key: t.Callable[[tuple[str, str]], t.Any] | None, 

455) -> t.Iterator[str]: 

456 from .datastructures import iter_multi_items 

457 

458 iterable: t.Iterable[tuple[str, str]] = iter_multi_items(obj) 

459 

460 if sort: 

461 iterable = sorted(iterable, key=key) 

462 

463 for key_str, value_str in iterable: 

464 if value_str is None: 

465 continue 

466 

467 if not isinstance(key_str, bytes): 

468 key_bytes = str(key_str).encode(charset) 

469 else: 

470 key_bytes = key_str 

471 

472 if not isinstance(value_str, bytes): 

473 value_bytes = str(value_str).encode(charset) 

474 else: 

475 value_bytes = value_str 

476 

477 yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}" 

478 

479 

480def _url_unquote_legacy(value: str, unsafe: str = "") -> str: 

481 try: 

482 return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe) 

483 except UnicodeError: 

484 return url_unquote(value, charset="latin1", unsafe=unsafe) 

485 

486 

487def url_parse( 

488 url: str, scheme: str | None = None, allow_fragments: bool = True 

489) -> BaseURL: 

490 """Parses a URL from a string into a :class:`URL` tuple. If the URL 

491 is lacking a scheme it can be provided as second argument. Otherwise, 

492 it is ignored. Optionally fragments can be stripped from the URL 

493 by setting `allow_fragments` to `False`. 

494 

495 The inverse of this function is :func:`url_unparse`. 

496 

497 :param url: the URL to parse. 

498 :param scheme: the default schema to use if the URL is schemaless. 

499 :param allow_fragments: if set to `False` a fragment will be removed 

500 from the URL. 

501 

502 .. deprecated:: 2.3 

503 Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlsplit`` instead. 

504 """ 

505 warnings.warn( 

506 "'werkzeug.urls.url_parse' is deprecated and will be removed in Werkzeug 3.0." 

507 " Use 'urllib.parse.urlsplit' instead.", 

508 DeprecationWarning, 

509 stacklevel=2, 

510 ) 

511 s = _make_encode_wrapper(url) 

512 is_text_based = isinstance(url, str) 

513 

514 if scheme is None: 

515 scheme = s("") 

516 netloc = query = fragment = s("") 

517 i = url.find(s(":")) 

518 if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")): 

519 # make sure "iri" is not actually a port number (in which case 

520 # "scheme" is really part of the path) 

521 rest = url[i + 1 :] 

522 if not rest or any(c not in s("0123456789") for c in rest): 

523 # not a port number 

524 scheme, url = url[:i].lower(), rest 

525 

526 if url[:2] == s("//"): 

527 delim = len(url) 

528 for c in s("/?#"): 

529 wdelim = url.find(c, 2) 

530 if wdelim >= 0: 

531 delim = min(delim, wdelim) 

532 netloc, url = url[2:delim], url[delim:] 

533 if (s("[") in netloc and s("]") not in netloc) or ( 

534 s("]") in netloc and s("[") not in netloc 

535 ): 

536 raise ValueError("Invalid IPv6 URL") 

537 

538 if allow_fragments and s("#") in url: 

539 url, fragment = url.split(s("#"), 1) 

540 if s("?") in url: 

541 url, query = url.split(s("?"), 1) 

542 

543 result_type = URL if is_text_based else BytesURL 

544 

545 return result_type(scheme, netloc, url, query, fragment) 

546 

547 

548def _make_fast_url_quote( 

549 charset: str = "utf-8", 

550 errors: str = "strict", 

551 safe: str | bytes = "/:", 

552 unsafe: str | bytes = "", 

553) -> t.Callable[[bytes], str]: 

554 """Precompile the translation table for a URL encoding function. 

555 

556 Unlike :func:`url_quote`, the generated function only takes the 

557 string to quote. 

558 

559 :param charset: The charset to encode the result with. 

560 :param errors: How to handle encoding errors. 

561 :param safe: An optional sequence of safe characters to never encode. 

562 :param unsafe: An optional sequence of unsafe characters to always encode. 

563 """ 

564 if isinstance(safe, str): 

565 safe = safe.encode(charset, errors) 

566 

567 if isinstance(unsafe, str): 

568 unsafe = unsafe.encode(charset, errors) 

569 

570 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) 

571 table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)] 

572 

573 def quote(string: bytes) -> str: 

574 return "".join([table[c] for c in string]) 

575 

576 return quote 

577 

578 

579_fast_url_quote = _make_fast_url_quote() 

580_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+") 

581 

582 

583def _fast_url_quote_plus(string: bytes) -> str: 

584 return _fast_quote_plus(string).replace(" ", "+") 

585 

586 

587def url_quote( 

588 string: str | bytes, 

589 charset: str = "utf-8", 

590 errors: str = "strict", 

591 safe: str | bytes = "/:", 

592 unsafe: str | bytes = "", 

593) -> str: 

594 """URL encode a single string with a given encoding. 

595 

596 :param s: the string to quote. 

597 :param charset: the charset to be used. 

598 :param safe: an optional sequence of safe characters. 

599 :param unsafe: an optional sequence of unsafe characters. 

600 

601 .. deprecated:: 2.3 

602 Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote`` instead. 

603 

604 .. versionadded:: 0.9.2 

605 The `unsafe` parameter was added. 

606 """ 

607 warnings.warn( 

608 "'werkzeug.urls.url_quote' is deprecated and will be removed in Werkzeug 3.0." 

609 " Use 'urllib.parse.quote' instead.", 

610 DeprecationWarning, 

611 stacklevel=2, 

612 ) 

613 

614 if not isinstance(string, (str, bytes, bytearray)): 

615 string = str(string) 

616 if isinstance(string, str): 

617 string = string.encode(charset, errors) 

618 if isinstance(safe, str): 

619 safe = safe.encode(charset, errors) 

620 if isinstance(unsafe, str): 

621 unsafe = unsafe.encode(charset, errors) 

622 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) 

623 rv = bytearray() 

624 for char in bytearray(string): 

625 if char in safe: 

626 rv.append(char) 

627 else: 

628 rv.extend(_bytetohex[char]) 

629 return bytes(rv).decode(charset) 

630 

631 

632def url_quote_plus( 

633 string: str, charset: str = "utf-8", errors: str = "strict", safe: str = "" 

634) -> str: 

635 """URL encode a single string with the given encoding and convert 

636 whitespace to "+". 

637 

638 :param s: The string to quote. 

639 :param charset: The charset to be used. 

640 :param safe: An optional sequence of safe characters. 

641 

642 .. deprecated:: 2.3 

643 Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote_plus`` instead. 

644 """ 

645 warnings.warn( 

646 "'werkzeug.urls.url_quote_plus' is deprecated and will be removed in Werkzeug" 

647 " 2.4. Use 'urllib.parse.quote_plus' instead.", 

648 DeprecationWarning, 

649 stacklevel=2, 

650 ) 

651 

652 return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+") 

653 

654 

655def url_unparse(components: tuple[str, str, str, str, str]) -> str: 

656 """The reverse operation to :meth:`url_parse`. This accepts arbitrary 

657 as well as :class:`URL` tuples and returns a URL as a string. 

658 

659 :param components: the parsed URL as tuple which should be converted 

660 into a URL string. 

661 

662 .. deprecated:: 2.3 

663 Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlunsplit`` instead. 

664 """ 

665 warnings.warn( 

666 "'werkzeug.urls.url_unparse' is deprecated and will be removed in Werkzeug 3.0." 

667 " Use 'urllib.parse.urlunsplit' instead.", 

668 DeprecationWarning, 

669 stacklevel=2, 

670 ) 

671 _check_str_tuple(components) 

672 scheme, netloc, path, query, fragment = components 

673 s = _make_encode_wrapper(scheme) 

674 url = s("") 

675 

676 # We generally treat file:///x and file:/x the same which is also 

677 # what browsers seem to do. This also allows us to ignore a schema 

678 # register for netloc utilization or having to differentiate between 

679 # empty and missing netloc. 

680 if netloc or (scheme and path.startswith(s("/"))): 

681 if path and path[:1] != s("/"): 

682 path = s("/") + path 

683 url = s("//") + (netloc or s("")) + path 

684 elif path: 

685 url += path 

686 if scheme: 

687 url = scheme + s(":") + url 

688 if query: 

689 url = url + s("?") + query 

690 if fragment: 

691 url = url + s("#") + fragment 

692 return url 

693 

694 

695def url_unquote( 

696 s: str | bytes, 

697 charset: str = "utf-8", 

698 errors: str = "replace", 

699 unsafe: str = "", 

700) -> str: 

701 """URL decode a single string with a given encoding. If the charset 

702 is set to `None` no decoding is performed and raw bytes are 

703 returned. 

704 

705 :param s: the string to unquote. 

706 :param charset: the charset of the query string. If set to `None` 

707 no decoding will take place. 

708 :param errors: the error handling for the charset decoding. 

709 

710 .. deprecated:: 2.3 

711 Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote`` instead. 

712 """ 

713 warnings.warn( 

714 "'werkzeug.urls.url_unquote' is deprecated and will be removed in Werkzeug 3.0." 

715 " Use 'urllib.parse.unquote' instead.", 

716 DeprecationWarning, 

717 stacklevel=2, 

718 ) 

719 rv = _unquote_to_bytes(s, unsafe) 

720 if charset is None: 

721 return rv 

722 return rv.decode(charset, errors) 

723 

724 

725def url_unquote_plus( 

726 s: str | bytes, charset: str = "utf-8", errors: str = "replace" 

727) -> str: 

728 """URL decode a single string with the given `charset` and decode "+" to 

729 whitespace. 

730 

731 Per default encoding errors are ignored. If you want a different behavior 

732 you can set `errors` to ``'replace'`` or ``'strict'``. 

733 

734 :param s: The string to unquote. 

735 :param charset: the charset of the query string. If set to `None` 

736 no decoding will take place. 

737 :param errors: The error handling for the `charset` decoding. 

738 

739 .. deprecated:: 2.3 

740 Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote_plus`` instead. 

741 """ 

742 warnings.warn( 

743 "'werkzeug.urls.url_unquote_plus' is deprecated and will be removed in Werkzeug" 

744 " 2.4. Use 'urllib.parse.unquote_plus' instead.", 

745 DeprecationWarning, 

746 stacklevel=2, 

747 ) 

748 

749 if isinstance(s, str): 

750 s = s.replace("+", " ") 

751 else: 

752 s = s.replace(b"+", b" ") 

753 

754 return url_unquote(s, charset, errors) 

755 

756 

757def url_fix(s: str, charset: str = "utf-8") -> str: 

758 r"""Sometimes you get an URL by a user that just isn't a real URL because 

759 it contains unsafe characters like ' ' and so on. This function can fix 

760 some of the problems in a similar way browsers handle data entered by the 

761 user: 

762 

763 >>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 

764 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' 

765 

766 :param s: the string with the URL to fix. 

767 :param charset: The target charset for the URL if the url was given 

768 as a string. 

769 

770 .. deprecated:: 2.3 

771 Will be removed in Werkzeug 3.0. 

772 """ 

773 warnings.warn( 

774 "'werkzeug.urls.url_fix' is deprecated and will be removed in Werkzeug 3.0.", 

775 DeprecationWarning, 

776 stacklevel=2, 

777 ) 

778 # First step is to switch to text processing and to convert 

779 # backslashes (which are invalid in URLs anyways) to slashes. This is 

780 # consistent with what Chrome does. 

781 s = _to_str(s, charset, "replace").replace("\\", "/") 

782 

783 # For the specific case that we look like a malformed windows URL 

784 # we want to fix this up manually: 

785 if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"): 

786 s = f"file:///{s[7:]}" 

787 

788 url = url_parse(s) 

789 path = url_quote(url.path, charset, safe="/%+$!*'(),") 

790 qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),") 

791 anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),") 

792 return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor)) 

793 

794 

795def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]: 

796 """Used in :func:`uri_to_iri` after unquoting to re-quote any 

797 invalid bytes. 

798 """ 

799 # the docs state that UnicodeError does have these attributes, 

800 # but mypy isn't picking them up 

801 out = quote(e.object[e.start : e.end], safe="") # type: ignore 

802 return out, e.end # type: ignore 

803 

804 

805codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) 

806 

807 

808def _make_unquote_part(name: str, chars: str) -> t.Callable[[str, str, str], str]: 

809 """Create a function that unquotes all percent encoded characters except those 

810 given. This allows working with unquoted characters if possible while not changing 

811 the meaning of a given part of a URL. 

812 """ 

813 choices = "|".join(f"{ord(c):02X}" for c in sorted(chars)) 

814 pattern = re.compile(f"((?:%(?:{choices}))+)", re.I) 

815 

816 def _unquote_partial(value: str, encoding: str, errors: str) -> str: 

817 parts = iter(pattern.split(value)) 

818 out = [] 

819 

820 for part in parts: 

821 out.append(unquote(part, encoding, errors)) 

822 out.append(next(parts, "")) 

823 

824 return "".join(out) 

825 

826 _unquote_partial.__name__ = f"_unquote_{name}" 

827 return _unquote_partial 

828 

829 

830# characters that should remain quoted in URL parts 

831# based on https://url.spec.whatwg.org/#percent-encoded-bytes 

832# always keep all controls, space, and % quoted 

833_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode() 

834_unquote_fragment = _make_unquote_part("fragment", _always_unsafe) 

835_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#") 

836_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#") 

837_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#") 

838 

839 

840def uri_to_iri( 

841 uri: str | tuple[str, str, str, str, str], 

842 charset: str | None = None, 

843 errors: str | None = None, 

844) -> str: 

845 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, 

846 leaving all reserved and invalid characters quoted. If the URL has 

847 a domain, it is decoded from Punycode. 

848 

849 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") 

850 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' 

851 

852 :param uri: The URI to convert. 

853 :param charset: The encoding to encode unquoted bytes with. 

854 :param errors: Error handler to use during ``bytes.encode``. By 

855 default, invalid bytes are left quoted. 

856 

857 .. versionchanged:: 2.3 

858 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are 

859 deprecated and will be removed in Werkzeug 3.0. 

860 

861 .. versionchanged:: 2.3 

862 Which characters remain quoted is specific to each part of the URL. 

863 

864 .. versionchanged:: 0.15 

865 All reserved and invalid characters remain quoted. Previously, 

866 only some reserved characters were preserved, and invalid bytes 

867 were replaced instead of left quoted. 

868 

869 .. versionadded:: 0.6 

870 """ 

871 if isinstance(uri, tuple): 

872 warnings.warn( 

873 "Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.", 

874 DeprecationWarning, 

875 stacklevel=2, 

876 ) 

877 uri = urlunsplit(uri) 

878 

879 if isinstance(uri, bytes): 

880 warnings.warn( 

881 "Passing bytes is deprecated and will not be supported in Werkzeug 3.0.", 

882 DeprecationWarning, 

883 stacklevel=2, 

884 ) 

885 uri = uri.decode() 

886 

887 if charset is not None: 

888 warnings.warn( 

889 "The 'charset' parameter is deprecated and will be removed" 

890 " in Werkzeug 3.0.", 

891 DeprecationWarning, 

892 stacklevel=2, 

893 ) 

894 else: 

895 charset = "utf-8" 

896 

897 if errors is not None: 

898 warnings.warn( 

899 "The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.", 

900 DeprecationWarning, 

901 stacklevel=2, 

902 ) 

903 else: 

904 errors = "werkzeug.url_quote" 

905 

906 parts = urlsplit(uri) 

907 path = _unquote_path(parts.path, charset, errors) 

908 query = _unquote_query(parts.query, charset, errors) 

909 fragment = _unquote_fragment(parts.fragment, charset, errors) 

910 

911 if parts.hostname: 

912 netloc = _decode_idna(parts.hostname) 

913 else: 

914 netloc = "" 

915 

916 if ":" in netloc: 

917 netloc = f"[{netloc}]" 

918 

919 if parts.port: 

920 netloc = f"{netloc}:{parts.port}" 

921 

922 if parts.username: 

923 auth = _unquote_user(parts.username, charset, errors) 

924 

925 if parts.password: 

926 auth = f"{auth}:{_unquote_user(parts.password, charset, errors)}" 

927 

928 netloc = f"{auth}@{netloc}" 

929 

930 return urlunsplit((parts.scheme, netloc, path, query, fragment)) 

931 

932 

933def iri_to_uri( 

934 iri: str | tuple[str, str, str, str, str], 

935 charset: str | None = None, 

936 errors: str | None = None, 

937 safe_conversion: bool | None = None, 

938) -> str: 

939 """Convert an IRI to a URI. All non-ASCII and unsafe characters are 

940 quoted. If the URL has a domain, it is encoded to Punycode. 

941 

942 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') 

943 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' 

944 

945 :param iri: The IRI to convert. 

946 :param charset: The encoding of the IRI. 

947 :param errors: Error handler to use during ``bytes.encode``. 

948 

949 .. versionchanged:: 2.3 

950 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are 

951 deprecated and will be removed in Werkzeug 3.0. 

952 

953 .. versionchanged:: 2.3 

954 Which characters remain unquoted is specific to each part of the URL. 

955 

956 .. versionchanged:: 2.3 

957 The ``safe_conversion`` parameter is deprecated and will be removed in Werkzeug 

958 2.4. 

959 

960 .. versionchanged:: 0.15 

961 All reserved characters remain unquoted. Previously, only some reserved 

962 characters were left unquoted. 

963 

964 .. versionchanged:: 0.9.6 

965 The ``safe_conversion`` parameter was added. 

966 

967 .. versionadded:: 0.6 

968 """ 

969 if charset is not None: 

970 warnings.warn( 

971 "The 'charset' parameter is deprecated and will be removed" 

972 " in Werkzeug 3.0.", 

973 DeprecationWarning, 

974 stacklevel=2, 

975 ) 

976 else: 

977 charset = "utf-8" 

978 

979 if isinstance(iri, tuple): 

980 warnings.warn( 

981 "Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.", 

982 DeprecationWarning, 

983 stacklevel=2, 

984 ) 

985 iri = urlunsplit(iri) 

986 

987 if isinstance(iri, bytes): 

988 warnings.warn( 

989 "Passing bytes is deprecated and will not be supported in Werkzeug 3.0.", 

990 DeprecationWarning, 

991 stacklevel=2, 

992 ) 

993 iri = iri.decode(charset) 

994 

995 if errors is not None: 

996 warnings.warn( 

997 "The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.", 

998 DeprecationWarning, 

999 stacklevel=2, 

1000 ) 

1001 else: 

1002 errors = "strict" 

1003 

1004 if safe_conversion is not None: 

1005 warnings.warn( 

1006 "The 'safe_conversion' parameter is deprecated and will be removed in" 

1007 " Werkzeug 3.0.", 

1008 DeprecationWarning, 

1009 stacklevel=2, 

1010 ) 

1011 

1012 if safe_conversion: 

1013 # If we're not sure if it's safe to normalize the URL, and it only contains 

1014 # ASCII characters, return it as-is. 

1015 try: 

1016 ascii_iri = iri.encode("ascii") 

1017 

1018 # Only return if it doesn't have whitespace. (Why?) 

1019 if len(ascii_iri.split()) == 1: 

1020 return iri 

1021 except UnicodeError: 

1022 pass 

1023 

1024 parts = urlsplit(iri) 

1025 # safe = https://url.spec.whatwg.org/#url-path-segment-string 

1026 # as well as percent for things that are already quoted 

1027 path = quote(parts.path, safe="%!$&'()*+,/:;=@", encoding=charset, errors=errors) 

1028 query = quote(parts.query, safe="%!$&'()*+,/:;=?@", encoding=charset, errors=errors) 

1029 fragment = quote( 

1030 parts.fragment, safe="%!#$&'()*+,/:;=?@", encoding=charset, errors=errors 

1031 ) 

1032 

1033 if parts.hostname: 

1034 netloc = parts.hostname.encode("idna").decode("ascii") 

1035 else: 

1036 netloc = "" 

1037 

1038 if ":" in netloc: 

1039 netloc = f"[{netloc}]" 

1040 

1041 if parts.port: 

1042 netloc = f"{netloc}:{parts.port}" 

1043 

1044 if parts.username: 

1045 auth = quote(parts.username, safe="%!$&'()*+,;=") 

1046 

1047 if parts.password: 

1048 pass_quoted = quote(parts.password, safe="%!$&'()*+,;=") 

1049 auth = f"{auth}:{pass_quoted}" 

1050 

1051 netloc = f"{auth}@{netloc}" 

1052 

1053 return urlunsplit((parts.scheme, netloc, path, query, fragment)) 

1054 

1055 

1056def _invalid_iri_to_uri(iri: str) -> str: 

1057 """The URL scheme ``itms-services://`` must contain the ``//`` even though it does 

1058 not have a host component. There may be other invalid schemes as well. Currently, 

1059 responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which 

1060 removes the ``//``. For now, if the IRI only contains ASCII and does not contain 

1061 spaces, pass it on as-is. In Werkzeug 3.0, this should become a 

1062 ``response.process_location`` flag. 

1063 

1064 :meta private: 

1065 """ 

1066 try: 

1067 iri.encode("ascii") 

1068 except UnicodeError: 

1069 pass 

1070 else: 

1071 if len(iri.split(None, 1)) == 1: 

1072 return iri 

1073 

1074 return iri_to_uri(iri) 

1075 

1076 

1077def url_decode( 

1078 s: t.AnyStr, 

1079 charset: str = "utf-8", 

1080 include_empty: bool = True, 

1081 errors: str = "replace", 

1082 separator: str = "&", 

1083 cls: type[ds.MultiDict] | None = None, 

1084) -> ds.MultiDict[str, str]: 

1085 """Parse a query string and return it as a :class:`MultiDict`. 

1086 

1087 :param s: The query string to parse. 

1088 :param charset: Decode bytes to string with this charset. If not 

1089 given, bytes are returned as-is. 

1090 :param include_empty: Include keys with empty values in the dict. 

1091 :param errors: Error handling behavior when decoding bytes. 

1092 :param separator: Separator character between pairs. 

1093 :param cls: Container to hold result instead of :class:`MultiDict`. 

1094 

1095 .. deprecated:: 2.3 

1096 Will be removed in Werkzeug 3.0. Use ``urllib.parse.parse_qs`` instead. 

1097 

1098 .. versionchanged:: 2.1 

1099 The ``decode_keys`` parameter was removed. 

1100 

1101 .. versionchanged:: 0.5 

1102 In previous versions ";" and "&" could be used for url decoding. 

1103 Now only "&" is supported. If you want to use ";", a different 

1104 ``separator`` can be provided. 

1105 

1106 .. versionchanged:: 0.5 

1107 The ``cls`` parameter was added. 

1108 """ 

1109 warnings.warn( 

1110 "'werkzeug.urls.url_decode' is deprecated and will be removed in Werkzeug 2.4." 

1111 " Use 'urllib.parse.parse_qs' instead.", 

1112 DeprecationWarning, 

1113 stacklevel=2, 

1114 ) 

1115 

1116 if cls is None: 

1117 from .datastructures import MultiDict # noqa: F811 

1118 

1119 cls = MultiDict 

1120 if isinstance(s, str) and not isinstance(separator, str): 

1121 separator = separator.decode(charset or "ascii") 

1122 elif isinstance(s, bytes) and not isinstance(separator, bytes): 

1123 separator = separator.encode(charset or "ascii") # type: ignore 

1124 return cls( 

1125 _url_decode_impl( 

1126 s.split(separator), charset, include_empty, errors # type: ignore 

1127 ) 

1128 ) 

1129 

1130 

1131def url_decode_stream( 

1132 stream: t.IO[bytes], 

1133 charset: str = "utf-8", 

1134 include_empty: bool = True, 

1135 errors: str = "replace", 

1136 separator: bytes = b"&", 

1137 cls: type[ds.MultiDict] | None = None, 

1138 limit: int | None = None, 

1139) -> ds.MultiDict[str, str]: 

1140 """Works like :func:`url_decode` but decodes a stream. The behavior 

1141 of stream and limit follows functions like 

1142 :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is 

1143 directly fed to the `cls` so you can consume the data while it's 

1144 parsed. 

1145 

1146 :param stream: a stream with the encoded querystring 

1147 :param charset: the charset of the query string. If set to `None` 

1148 no decoding will take place. 

1149 :param include_empty: Set to `False` if you don't want empty values to 

1150 appear in the dict. 

1151 :param errors: the decoding error behavior. 

1152 :param separator: the pair separator to be used, defaults to ``&`` 

1153 :param cls: an optional dict class to use. If this is not specified 

1154 or `None` the default :class:`MultiDict` is used. 

1155 :param limit: the content length of the URL data. Not necessary if 

1156 a limited stream is provided. 

1157 

1158 .. deprecated:: 2.3 

1159 Will be removed in Werkzeug 2.4. Use ``urllib.parse.parse_qs`` instead. 

1160 

1161 .. versionchanged:: 2.1 

1162 The ``decode_keys`` and ``return_iterator`` parameters were removed. 

1163 

1164 .. versionadded:: 0.8 

1165 """ 

1166 warnings.warn( 

1167 "'werkzeug.urls.url_decode_stream' is deprecated and will be removed in" 

1168 " Werkzeug 2.4. Use 'urllib.parse.parse_qs' instead.", 

1169 DeprecationWarning, 

1170 stacklevel=2, 

1171 ) 

1172 

1173 from .wsgi import make_chunk_iter 

1174 

1175 pair_iter = make_chunk_iter(stream, separator, limit) 

1176 decoder = _url_decode_impl(pair_iter, charset, include_empty, errors) 

1177 

1178 if cls is None: 

1179 from .datastructures import MultiDict # noqa: F811 

1180 

1181 cls = MultiDict 

1182 

1183 return cls(decoder) 

1184 

1185 

1186def _url_decode_impl( 

1187 pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str 

1188) -> t.Iterator[tuple[str, str]]: 

1189 for pair in pair_iter: 

1190 if not pair: 

1191 continue 

1192 s = _make_encode_wrapper(pair) 

1193 equal = s("=") 

1194 if equal in pair: 

1195 key, value = pair.split(equal, 1) 

1196 else: 

1197 if not include_empty: 

1198 continue 

1199 key = pair 

1200 value = s("") 

1201 yield ( 

1202 url_unquote_plus(key, charset, errors), 

1203 url_unquote_plus(value, charset, errors), 

1204 ) 

1205 

1206 

1207def url_encode( 

1208 obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]], 

1209 charset: str = "utf-8", 

1210 sort: bool = False, 

1211 key: t.Callable[[tuple[str, str]], t.Any] | None = None, 

1212 separator: str = "&", 

1213) -> str: 

1214 """URL encode a dict/`MultiDict`. If a value is `None` it will not appear 

1215 in the result string. Per default only values are encoded into the target 

1216 charset strings. 

1217 

1218 :param obj: the object to encode into a query string. 

1219 :param charset: the charset of the query string. 

1220 :param sort: set to `True` if you want parameters to be sorted by `key`. 

1221 :param separator: the separator to be used for the pairs. 

1222 :param key: an optional function to be used for sorting. For more details 

1223 check out the :func:`sorted` documentation. 

1224 

1225 .. deprecated:: 2.3 

1226 Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead. 

1227 

1228 .. versionchanged:: 2.1 

1229 The ``encode_keys`` parameter was removed. 

1230 

1231 .. versionchanged:: 0.5 

1232 Added the ``sort``, ``key``, and ``separator`` parameters. 

1233 """ 

1234 warnings.warn( 

1235 "'werkzeug.urls.url_encode' is deprecated and will be removed in Werkzeug 2.4." 

1236 " Use 'urllib.parse.urlencode' instead.", 

1237 DeprecationWarning, 

1238 stacklevel=2, 

1239 ) 

1240 separator = _to_str(separator, "ascii") 

1241 return separator.join(_url_encode_impl(obj, charset, sort, key)) 

1242 

1243 

1244def url_encode_stream( 

1245 obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]], 

1246 stream: t.IO[str] | None = None, 

1247 charset: str = "utf-8", 

1248 sort: bool = False, 

1249 key: t.Callable[[tuple[str, str]], t.Any] | None = None, 

1250 separator: str = "&", 

1251) -> None: 

1252 """Like :meth:`url_encode` but writes the results to a stream 

1253 object. If the stream is `None` a generator over all encoded 

1254 pairs is returned. 

1255 

1256 :param obj: the object to encode into a query string. 

1257 :param stream: a stream to write the encoded object into or `None` if 

1258 an iterator over the encoded pairs should be returned. In 

1259 that case the separator argument is ignored. 

1260 :param charset: the charset of the query string. 

1261 :param sort: set to `True` if you want parameters to be sorted by `key`. 

1262 :param separator: the separator to be used for the pairs. 

1263 :param key: an optional function to be used for sorting. For more details 

1264 check out the :func:`sorted` documentation. 

1265 

1266 .. deprecated:: 2.3 

1267 Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead. 

1268 

1269 .. versionchanged:: 2.1 

1270 The ``encode_keys`` parameter was removed. 

1271 

1272 .. versionadded:: 0.8 

1273 """ 

1274 warnings.warn( 

1275 "'werkzeug.urls.url_encode_stream' is deprecated and will be removed in" 

1276 " Werkzeug 2.4. Use 'urllib.parse.urlencode' instead.", 

1277 DeprecationWarning, 

1278 stacklevel=2, 

1279 ) 

1280 separator = _to_str(separator, "ascii") 

1281 gen = _url_encode_impl(obj, charset, sort, key) 

1282 if stream is None: 

1283 return gen # type: ignore 

1284 for idx, chunk in enumerate(gen): 

1285 if idx: 

1286 stream.write(separator) 

1287 stream.write(chunk) 

1288 return None 

1289 

1290 

1291def url_join( 

1292 base: str | tuple[str, str, str, str, str], 

1293 url: str | tuple[str, str, str, str, str], 

1294 allow_fragments: bool = True, 

1295) -> str: 

1296 """Join a base URL and a possibly relative URL to form an absolute 

1297 interpretation of the latter. 

1298 

1299 :param base: the base URL for the join operation. 

1300 :param url: the URL to join. 

1301 :param allow_fragments: indicates whether fragments should be allowed. 

1302 

1303 .. deprecated:: 2.3 

1304 Will be removed in Werkzeug 2.4. Use ``urllib.parse.urljoin`` instead. 

1305 """ 

1306 warnings.warn( 

1307 "'werkzeug.urls.url_join' is deprecated and will be removed in Werkzeug 2.4." 

1308 " Use 'urllib.parse.urljoin' instead.", 

1309 DeprecationWarning, 

1310 stacklevel=2, 

1311 ) 

1312 

1313 if isinstance(base, tuple): 

1314 base = url_unparse(base) 

1315 if isinstance(url, tuple): 

1316 url = url_unparse(url) 

1317 

1318 _check_str_tuple((base, url)) 

1319 s = _make_encode_wrapper(base) 

1320 

1321 if not base: 

1322 return url 

1323 if not url: 

1324 return base 

1325 

1326 bscheme, bnetloc, bpath, bquery, bfragment = url_parse( 

1327 base, allow_fragments=allow_fragments 

1328 ) 

1329 scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments) 

1330 if scheme != bscheme: 

1331 return url 

1332 if netloc: 

1333 return url_unparse((scheme, netloc, path, query, fragment)) 

1334 netloc = bnetloc 

1335 

1336 if path[:1] == s("/"): 

1337 segments = path.split(s("/")) 

1338 elif not path: 

1339 segments = bpath.split(s("/")) 

1340 if not query: 

1341 query = bquery 

1342 else: 

1343 segments = bpath.split(s("/"))[:-1] + path.split(s("/")) 

1344 

1345 # If the rightmost part is "./" we want to keep the slash but 

1346 # remove the dot. 

1347 if segments[-1] == s("."): 

1348 segments[-1] = s("") 

1349 

1350 # Resolve ".." and "." 

1351 segments = [segment for segment in segments if segment != s(".")] 

1352 while True: 

1353 i = 1 

1354 n = len(segments) - 1 

1355 while i < n: 

1356 if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")): 

1357 del segments[i - 1 : i + 1] 

1358 break 

1359 i += 1 

1360 else: 

1361 break 

1362 

1363 # Remove trailing ".." if the URL is absolute 

1364 unwanted_marker = [s(""), s("..")] 

1365 while segments[:2] == unwanted_marker: 

1366 del segments[1] 

1367 

1368 path = s("/").join(segments) 

1369 return url_unparse((scheme, netloc, path, query, fragment)) 

1370 

1371 

1372def _urlencode( 

1373 query: t.Mapping[str, str] | t.Iterable[tuple[str, str]], encoding: str = "utf-8" 

1374) -> str: 

1375 items = [x for x in iter_multi_items(query) if x[1] is not None] 

1376 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes 

1377 return urlencode(items, safe="!$'()*,/:;?@", encoding=encoding)