Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 25%

434 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:35 +0000

1"""Functions for working with URLs. 

2 

3Contains implementations of functions from :mod:`urllib.parse` that 

4handle bytes and strings. 

5""" 

6import codecs 

7import os 

8import re 

9import typing as t 

10 

11from ._internal import _check_str_tuple 

12from ._internal import _decode_idna 

13from ._internal import _encode_idna 

14from ._internal import _make_encode_wrapper 

15from ._internal import _to_str 

16 

17if t.TYPE_CHECKING: 

18 from . import datastructures as ds 

19 

20# A regular expression for what a valid schema looks like 

21_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$") 

22 

23# Characters that are safe in any part of an URL. 

24_always_safe = frozenset( 

25 bytearray( 

26 b"abcdefghijklmnopqrstuvwxyz" 

27 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" 

28 b"0123456789" 

29 b"-._~" 

30 b"$!'()*+,;" # RFC3986 sub-delims set, not including query string delimiters &= 

31 ) 

32) 

33 

34_hexdigits = "0123456789ABCDEFabcdef" 

35_hextobyte = { 

36 f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16) 

37 for a in _hexdigits 

38 for b in _hexdigits 

39} 

40_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)] 

41 

42 

43class _URLTuple(t.NamedTuple): 

44 scheme: str 

45 netloc: str 

46 path: str 

47 query: str 

48 fragment: str 

49 

50 

51class BaseURL(_URLTuple): 

52 """Superclass of :py:class:`URL` and :py:class:`BytesURL`.""" 

53 

54 __slots__ = () 

55 _at: str 

56 _colon: str 

57 _lbracket: str 

58 _rbracket: str 

59 

60 def __str__(self) -> str: 

61 return self.to_url() 

62 

63 def replace(self, **kwargs: t.Any) -> "BaseURL": 

64 """Return an URL with the same values, except for those parameters 

65 given new values by whichever keyword arguments are specified.""" 

66 return self._replace(**kwargs) 

67 

68 @property 

69 def host(self) -> t.Optional[str]: 

70 """The host part of the URL if available, otherwise `None`. The 

71 host is either the hostname or the IP address mentioned in the 

72 URL. It will not contain the port. 

73 """ 

74 return self._split_host()[0] 

75 

76 @property 

77 def ascii_host(self) -> t.Optional[str]: 

78 """Works exactly like :attr:`host` but will return a result that 

79 is restricted to ASCII. If it finds a netloc that is not ASCII 

80 it will attempt to idna decode it. This is useful for socket 

81 operations when the URL might include internationalized characters. 

82 """ 

83 rv = self.host 

84 if rv is not None and isinstance(rv, str): 

85 try: 

86 rv = _encode_idna(rv) # type: ignore 

87 except UnicodeError: 

88 rv = rv.encode("ascii", "ignore") # type: ignore 

89 return _to_str(rv, "ascii", "ignore") 

90 

91 @property 

92 def port(self) -> t.Optional[int]: 

93 """The port in the URL as an integer if it was present, `None` 

94 otherwise. This does not fill in default ports. 

95 """ 

96 try: 

97 rv = int(_to_str(self._split_host()[1])) 

98 if 0 <= rv <= 65535: 

99 return rv 

100 except (ValueError, TypeError): 

101 pass 

102 return None 

103 

104 @property 

105 def auth(self) -> t.Optional[str]: 

106 """The authentication part in the URL if available, `None` 

107 otherwise. 

108 """ 

109 return self._split_netloc()[0] 

110 

111 @property 

112 def username(self) -> t.Optional[str]: 

113 """The username if it was part of the URL, `None` otherwise. 

114 This undergoes URL decoding and will always be a string. 

115 """ 

116 rv = self._split_auth()[0] 

117 if rv is not None: 

118 return _url_unquote_legacy(rv) 

119 return None 

120 

121 @property 

122 def raw_username(self) -> t.Optional[str]: 

123 """The username if it was part of the URL, `None` otherwise. 

124 Unlike :attr:`username` this one is not being decoded. 

125 """ 

126 return self._split_auth()[0] 

127 

128 @property 

129 def password(self) -> t.Optional[str]: 

130 """The password if it was part of the URL, `None` otherwise. 

131 This undergoes URL decoding and will always be a string. 

132 """ 

133 rv = self._split_auth()[1] 

134 if rv is not None: 

135 return _url_unquote_legacy(rv) 

136 return None 

137 

138 @property 

139 def raw_password(self) -> t.Optional[str]: 

140 """The password if it was part of the URL, `None` otherwise. 

141 Unlike :attr:`password` this one is not being decoded. 

142 """ 

143 return self._split_auth()[1] 

144 

145 def decode_query(self, *args: t.Any, **kwargs: t.Any) -> "ds.MultiDict[str, str]": 

146 """Decodes the query part of the URL. Ths is a shortcut for 

147 calling :func:`url_decode` on the query argument. The arguments and 

148 keyword arguments are forwarded to :func:`url_decode` unchanged. 

149 """ 

150 return url_decode(self.query, *args, **kwargs) 

151 

152 def join(self, *args: t.Any, **kwargs: t.Any) -> "BaseURL": 

153 """Joins this URL with another one. This is just a convenience 

154 function for calling into :meth:`url_join` and then parsing the 

155 return value again. 

156 """ 

157 return url_parse(url_join(self, *args, **kwargs)) 

158 

159 def to_url(self) -> str: 

160 """Returns a URL string or bytes depending on the type of the 

161 information stored. This is just a convenience function 

162 for calling :meth:`url_unparse` for this URL. 

163 """ 

164 return url_unparse(self) 

165 

166 def encode_netloc(self) -> str: 

167 """Encodes the netloc part to an ASCII safe URL as bytes.""" 

168 rv = self.ascii_host or "" 

169 if ":" in rv: 

170 rv = f"[{rv}]" 

171 port = self.port 

172 if port is not None: 

173 rv = f"{rv}:{port}" 

174 auth = ":".join( 

175 filter( 

176 None, 

177 [ 

178 url_quote(self.raw_username or "", "utf-8", "strict", "/:%"), 

179 url_quote(self.raw_password or "", "utf-8", "strict", "/:%"), 

180 ], 

181 ) 

182 ) 

183 if auth: 

184 rv = f"{auth}@{rv}" 

185 return rv 

186 

187 def decode_netloc(self) -> str: 

188 """Decodes the netloc part into a string.""" 

189 rv = _decode_idna(self.host or "") 

190 

191 if ":" in rv: 

192 rv = f"[{rv}]" 

193 port = self.port 

194 if port is not None: 

195 rv = f"{rv}:{port}" 

196 auth = ":".join( 

197 filter( 

198 None, 

199 [ 

200 _url_unquote_legacy(self.raw_username or "", "/:%@"), 

201 _url_unquote_legacy(self.raw_password or "", "/:%@"), 

202 ], 

203 ) 

204 ) 

205 if auth: 

206 rv = f"{auth}@{rv}" 

207 return rv 

208 

209 def to_uri_tuple(self) -> "BaseURL": 

210 """Returns a :class:`BytesURL` tuple that holds a URI. This will 

211 encode all the information in the URL properly to ASCII using the 

212 rules a web browser would follow. 

213 

214 It's usually more interesting to directly call :meth:`iri_to_uri` which 

215 will return a string. 

216 """ 

217 return url_parse(iri_to_uri(self)) 

218 

219 def to_iri_tuple(self) -> "BaseURL": 

220 """Returns a :class:`URL` tuple that holds a IRI. This will try 

221 to decode as much information as possible in the URL without 

222 losing information similar to how a web browser does it for the 

223 URL bar. 

224 

225 It's usually more interesting to directly call :meth:`uri_to_iri` which 

226 will return a string. 

227 """ 

228 return url_parse(uri_to_iri(self)) 

229 

230 def get_file_location( 

231 self, pathformat: t.Optional[str] = None 

232 ) -> t.Tuple[t.Optional[str], t.Optional[str]]: 

233 """Returns a tuple with the location of the file in the form 

234 ``(server, location)``. If the netloc is empty in the URL or 

235 points to localhost, it's represented as ``None``. 

236 

237 The `pathformat` by default is autodetection but needs to be set 

238 when working with URLs of a specific system. The supported values 

239 are ``'windows'`` when working with Windows or DOS paths and 

240 ``'posix'`` when working with posix paths. 

241 

242 If the URL does not point to a local file, the server and location 

243 are both represented as ``None``. 

244 

245 :param pathformat: The expected format of the path component. 

246 Currently ``'windows'`` and ``'posix'`` are 

247 supported. Defaults to ``None`` which is 

248 autodetect. 

249 """ 

250 if self.scheme != "file": 

251 return None, None 

252 

253 path = url_unquote(self.path) 

254 host = self.netloc or None 

255 

256 if pathformat is None: 

257 if os.name == "nt": 

258 pathformat = "windows" 

259 else: 

260 pathformat = "posix" 

261 

262 if pathformat == "windows": 

263 if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:": 

264 path = f"{path[1:2]}:{path[3:]}" 

265 windows_share = path[:3] in ("\\" * 3, "/" * 3) 

266 import ntpath 

267 

268 path = ntpath.normpath(path) 

269 # Windows shared drives are represented as ``\\host\\directory``. 

270 # That results in a URL like ``file://///host/directory``, and a 

271 # path like ``///host/directory``. We need to special-case this 

272 # because the path contains the hostname. 

273 if windows_share and host is None: 

274 parts = path.lstrip("\\").split("\\", 1) 

275 if len(parts) == 2: 

276 host, path = parts 

277 else: 

278 host = parts[0] 

279 path = "" 

280 elif pathformat == "posix": 

281 import posixpath 

282 

283 path = posixpath.normpath(path) 

284 else: 

285 raise TypeError(f"Invalid path format {pathformat!r}") 

286 

287 if host in ("127.0.0.1", "::1", "localhost"): 

288 host = None 

289 

290 return host, path 

291 

292 def _split_netloc(self) -> t.Tuple[t.Optional[str], str]: 

293 if self._at in self.netloc: 

294 auth, _, netloc = self.netloc.partition(self._at) 

295 return auth, netloc 

296 return None, self.netloc 

297 

298 def _split_auth(self) -> t.Tuple[t.Optional[str], t.Optional[str]]: 

299 auth = self._split_netloc()[0] 

300 if not auth: 

301 return None, None 

302 if self._colon not in auth: 

303 return auth, None 

304 

305 username, _, password = auth.partition(self._colon) 

306 return username, password 

307 

308 def _split_host(self) -> t.Tuple[t.Optional[str], t.Optional[str]]: 

309 rv = self._split_netloc()[1] 

310 if not rv: 

311 return None, None 

312 

313 if not rv.startswith(self._lbracket): 

314 if self._colon in rv: 

315 host, _, port = rv.partition(self._colon) 

316 return host, port 

317 return rv, None 

318 

319 idx = rv.find(self._rbracket) 

320 if idx < 0: 

321 return rv, None 

322 

323 host = rv[1:idx] 

324 rest = rv[idx + 1 :] 

325 if rest.startswith(self._colon): 

326 return host, rest[1:] 

327 return host, None 

328 

329 

330class URL(BaseURL): 

331 """Represents a parsed URL. This behaves like a regular tuple but 

332 also has some extra attributes that give further insight into the 

333 URL. 

334 """ 

335 

336 __slots__ = () 

337 _at = "@" 

338 _colon = ":" 

339 _lbracket = "[" 

340 _rbracket = "]" 

341 

342 def encode(self, charset: str = "utf-8", errors: str = "replace") -> "BytesURL": 

343 """Encodes the URL to a tuple made out of bytes. The charset is 

344 only being used for the path, query and fragment. 

345 """ 

346 return BytesURL( 

347 self.scheme.encode("ascii"), # type: ignore 

348 self.encode_netloc(), 

349 self.path.encode(charset, errors), # type: ignore 

350 self.query.encode(charset, errors), # type: ignore 

351 self.fragment.encode(charset, errors), # type: ignore 

352 ) 

353 

354 

355class BytesURL(BaseURL): 

356 """Represents a parsed URL in bytes.""" 

357 

358 __slots__ = () 

359 _at = b"@" # type: ignore 

360 _colon = b":" # type: ignore 

361 _lbracket = b"[" # type: ignore 

362 _rbracket = b"]" # type: ignore 

363 

364 def __str__(self) -> str: 

365 return self.to_url().decode("utf-8", "replace") # type: ignore 

366 

367 def encode_netloc(self) -> bytes: # type: ignore 

368 """Returns the netloc unchanged as bytes.""" 

369 return self.netloc # type: ignore 

370 

371 def decode(self, charset: str = "utf-8", errors: str = "replace") -> "URL": 

372 """Decodes the URL to a tuple made out of strings. The charset is 

373 only being used for the path, query and fragment. 

374 """ 

375 return URL( 

376 self.scheme.decode("ascii"), # type: ignore 

377 self.decode_netloc(), 

378 self.path.decode(charset, errors), # type: ignore 

379 self.query.decode(charset, errors), # type: ignore 

380 self.fragment.decode(charset, errors), # type: ignore 

381 ) 

382 

383 

384_unquote_maps: t.Dict[t.FrozenSet[int], t.Dict[bytes, int]] = {frozenset(): _hextobyte} 

385 

386 

387def _unquote_to_bytes( 

388 string: t.Union[str, bytes], unsafe: t.Union[str, bytes] = "" 

389) -> bytes: 

390 if isinstance(string, str): 

391 string = string.encode("utf-8") 

392 

393 if isinstance(unsafe, str): 

394 unsafe = unsafe.encode("utf-8") 

395 

396 unsafe = frozenset(bytearray(unsafe)) 

397 groups = iter(string.split(b"%")) 

398 result = bytearray(next(groups, b"")) 

399 

400 try: 

401 hex_to_byte = _unquote_maps[unsafe] 

402 except KeyError: 

403 hex_to_byte = _unquote_maps[unsafe] = { 

404 h: b for h, b in _hextobyte.items() if b not in unsafe 

405 } 

406 

407 for group in groups: 

408 code = group[:2] 

409 

410 if code in hex_to_byte: 

411 result.append(hex_to_byte[code]) 

412 result.extend(group[2:]) 

413 else: 

414 result.append(37) # % 

415 result.extend(group) 

416 

417 return bytes(result) 

418 

419 

420def _url_encode_impl( 

421 obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]], 

422 charset: str, 

423 sort: bool, 

424 key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]], 

425) -> t.Iterator[str]: 

426 from .datastructures import iter_multi_items 

427 

428 iterable: t.Iterable[t.Tuple[str, str]] = iter_multi_items(obj) 

429 

430 if sort: 

431 iterable = sorted(iterable, key=key) 

432 

433 for key_str, value_str in iterable: 

434 if value_str is None: 

435 continue 

436 

437 if not isinstance(key_str, bytes): 

438 key_bytes = str(key_str).encode(charset) 

439 else: 

440 key_bytes = key_str 

441 

442 if not isinstance(value_str, bytes): 

443 value_bytes = str(value_str).encode(charset) 

444 else: 

445 value_bytes = value_str 

446 

447 yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}" 

448 

449 

450def _url_unquote_legacy(value: str, unsafe: str = "") -> str: 

451 try: 

452 return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe) 

453 except UnicodeError: 

454 return url_unquote(value, charset="latin1", unsafe=unsafe) 

455 

456 

457def url_parse( 

458 url: str, scheme: t.Optional[str] = None, allow_fragments: bool = True 

459) -> BaseURL: 

460 """Parses a URL from a string into a :class:`URL` tuple. If the URL 

461 is lacking a scheme it can be provided as second argument. Otherwise, 

462 it is ignored. Optionally fragments can be stripped from the URL 

463 by setting `allow_fragments` to `False`. 

464 

465 The inverse of this function is :func:`url_unparse`. 

466 

467 :param url: the URL to parse. 

468 :param scheme: the default schema to use if the URL is schemaless. 

469 :param allow_fragments: if set to `False` a fragment will be removed 

470 from the URL. 

471 """ 

472 s = _make_encode_wrapper(url) 

473 is_text_based = isinstance(url, str) 

474 

475 if scheme is None: 

476 scheme = s("") 

477 netloc = query = fragment = s("") 

478 i = url.find(s(":")) 

479 if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")): 

480 # make sure "iri" is not actually a port number (in which case 

481 # "scheme" is really part of the path) 

482 rest = url[i + 1 :] 

483 if not rest or any(c not in s("0123456789") for c in rest): 

484 # not a port number 

485 scheme, url = url[:i].lower(), rest 

486 

487 if url[:2] == s("//"): 

488 delim = len(url) 

489 for c in s("/?#"): 

490 wdelim = url.find(c, 2) 

491 if wdelim >= 0: 

492 delim = min(delim, wdelim) 

493 netloc, url = url[2:delim], url[delim:] 

494 if (s("[") in netloc and s("]") not in netloc) or ( 

495 s("]") in netloc and s("[") not in netloc 

496 ): 

497 raise ValueError("Invalid IPv6 URL") 

498 

499 if allow_fragments and s("#") in url: 

500 url, fragment = url.split(s("#"), 1) 

501 if s("?") in url: 

502 url, query = url.split(s("?"), 1) 

503 

504 result_type = URL if is_text_based else BytesURL 

505 return result_type(scheme, netloc, url, query, fragment) 

506 

507 

508def _make_fast_url_quote( 

509 charset: str = "utf-8", 

510 errors: str = "strict", 

511 safe: t.Union[str, bytes] = "/:", 

512 unsafe: t.Union[str, bytes] = "", 

513) -> t.Callable[[bytes], str]: 

514 """Precompile the translation table for a URL encoding function. 

515 

516 Unlike :func:`url_quote`, the generated function only takes the 

517 string to quote. 

518 

519 :param charset: The charset to encode the result with. 

520 :param errors: How to handle encoding errors. 

521 :param safe: An optional sequence of safe characters to never encode. 

522 :param unsafe: An optional sequence of unsafe characters to always encode. 

523 """ 

524 if isinstance(safe, str): 

525 safe = safe.encode(charset, errors) 

526 

527 if isinstance(unsafe, str): 

528 unsafe = unsafe.encode(charset, errors) 

529 

530 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) 

531 table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)] 

532 

533 def quote(string: bytes) -> str: 

534 return "".join([table[c] for c in string]) 

535 

536 return quote 

537 

538 

539_fast_url_quote = _make_fast_url_quote() 

540_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+") 

541 

542 

543def _fast_url_quote_plus(string: bytes) -> str: 

544 return _fast_quote_plus(string).replace(" ", "+") 

545 

546 

547def url_quote( 

548 string: t.Union[str, bytes], 

549 charset: str = "utf-8", 

550 errors: str = "strict", 

551 safe: t.Union[str, bytes] = "/:", 

552 unsafe: t.Union[str, bytes] = "", 

553) -> str: 

554 """URL encode a single string with a given encoding. 

555 

556 :param s: the string to quote. 

557 :param charset: the charset to be used. 

558 :param safe: an optional sequence of safe characters. 

559 :param unsafe: an optional sequence of unsafe characters. 

560 

561 .. versionadded:: 0.9.2 

562 The `unsafe` parameter was added. 

563 """ 

564 if not isinstance(string, (str, bytes, bytearray)): 

565 string = str(string) 

566 if isinstance(string, str): 

567 string = string.encode(charset, errors) 

568 if isinstance(safe, str): 

569 safe = safe.encode(charset, errors) 

570 if isinstance(unsafe, str): 

571 unsafe = unsafe.encode(charset, errors) 

572 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) 

573 rv = bytearray() 

574 for char in bytearray(string): 

575 if char in safe: 

576 rv.append(char) 

577 else: 

578 rv.extend(_bytetohex[char]) 

579 return bytes(rv).decode(charset) 

580 

581 

582def url_quote_plus( 

583 string: str, charset: str = "utf-8", errors: str = "strict", safe: str = "" 

584) -> str: 

585 """URL encode a single string with the given encoding and convert 

586 whitespace to "+". 

587 

588 :param s: The string to quote. 

589 :param charset: The charset to be used. 

590 :param safe: An optional sequence of safe characters. 

591 """ 

592 return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+") 

593 

594 

595def url_unparse(components: t.Tuple[str, str, str, str, str]) -> str: 

596 """The reverse operation to :meth:`url_parse`. This accepts arbitrary 

597 as well as :class:`URL` tuples and returns a URL as a string. 

598 

599 :param components: the parsed URL as tuple which should be converted 

600 into a URL string. 

601 """ 

602 _check_str_tuple(components) 

603 scheme, netloc, path, query, fragment = components 

604 s = _make_encode_wrapper(scheme) 

605 url = s("") 

606 

607 # We generally treat file:///x and file:/x the same which is also 

608 # what browsers seem to do. This also allows us to ignore a schema 

609 # register for netloc utilization or having to differentiate between 

610 # empty and missing netloc. 

611 if netloc or (scheme and path.startswith(s("/"))): 

612 if path and path[:1] != s("/"): 

613 path = s("/") + path 

614 url = s("//") + (netloc or s("")) + path 

615 elif path: 

616 url += path 

617 if scheme: 

618 url = scheme + s(":") + url 

619 if query: 

620 url = url + s("?") + query 

621 if fragment: 

622 url = url + s("#") + fragment 

623 return url 

624 

625 

626def url_unquote( 

627 s: t.Union[str, bytes], 

628 charset: str = "utf-8", 

629 errors: str = "replace", 

630 unsafe: str = "", 

631) -> str: 

632 """URL decode a single string with a given encoding. If the charset 

633 is set to `None` no decoding is performed and raw bytes are 

634 returned. 

635 

636 :param s: the string to unquote. 

637 :param charset: the charset of the query string. If set to `None` 

638 no decoding will take place. 

639 :param errors: the error handling for the charset decoding. 

640 """ 

641 rv = _unquote_to_bytes(s, unsafe) 

642 if charset is None: 

643 return rv 

644 return rv.decode(charset, errors) 

645 

646 

647def url_unquote_plus( 

648 s: t.Union[str, bytes], charset: str = "utf-8", errors: str = "replace" 

649) -> str: 

650 """URL decode a single string with the given `charset` and decode "+" to 

651 whitespace. 

652 

653 Per default encoding errors are ignored. If you want a different behavior 

654 you can set `errors` to ``'replace'`` or ``'strict'``. 

655 

656 :param s: The string to unquote. 

657 :param charset: the charset of the query string. If set to `None` 

658 no decoding will take place. 

659 :param errors: The error handling for the `charset` decoding. 

660 """ 

661 if isinstance(s, str): 

662 s = s.replace("+", " ") 

663 else: 

664 s = s.replace(b"+", b" ") 

665 return url_unquote(s, charset, errors) 

666 

667 

668def url_fix(s: str, charset: str = "utf-8") -> str: 

669 r"""Sometimes you get an URL by a user that just isn't a real URL because 

670 it contains unsafe characters like ' ' and so on. This function can fix 

671 some of the problems in a similar way browsers handle data entered by the 

672 user: 

673 

674 >>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 

675 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' 

676 

677 :param s: the string with the URL to fix. 

678 :param charset: The target charset for the URL if the url was given 

679 as a string. 

680 """ 

681 # First step is to switch to text processing and to convert 

682 # backslashes (which are invalid in URLs anyways) to slashes. This is 

683 # consistent with what Chrome does. 

684 s = _to_str(s, charset, "replace").replace("\\", "/") 

685 

686 # For the specific case that we look like a malformed windows URL 

687 # we want to fix this up manually: 

688 if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"): 

689 s = f"file:///{s[7:]}" 

690 

691 url = url_parse(s) 

692 path = url_quote(url.path, charset, safe="/%+$!*'(),") 

693 qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),") 

694 anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),") 

695 return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor)) 

696 

697 

698# not-unreserved characters remain quoted when unquoting to IRI 

699_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe]) 

700 

701 

702def _codec_error_url_quote(e: UnicodeError) -> t.Tuple[str, int]: 

703 """Used in :func:`uri_to_iri` after unquoting to re-quote any 

704 invalid bytes. 

705 """ 

706 # the docs state that UnicodeError does have these attributes, 

707 # but mypy isn't picking them up 

708 out = _fast_url_quote(e.object[e.start : e.end]) # type: ignore 

709 return out, e.end # type: ignore 

710 

711 

712codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) 

713 

714 

715def uri_to_iri( 

716 uri: t.Union[str, t.Tuple[str, str, str, str, str]], 

717 charset: str = "utf-8", 

718 errors: str = "werkzeug.url_quote", 

719) -> str: 

720 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, 

721 leaving all reserved and invalid characters quoted. If the URL has 

722 a domain, it is decoded from Punycode. 

723 

724 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") 

725 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' 

726 

727 :param uri: The URI to convert. 

728 :param charset: The encoding to encode unquoted bytes with. 

729 :param errors: Error handler to use during ``bytes.encode``. By 

730 default, invalid bytes are left quoted. 

731 

732 .. versionchanged:: 0.15 

733 All reserved and invalid characters remain quoted. Previously, 

734 only some reserved characters were preserved, and invalid bytes 

735 were replaced instead of left quoted. 

736 

737 .. versionadded:: 0.6 

738 """ 

739 if isinstance(uri, tuple): 

740 uri = url_unparse(uri) 

741 

742 uri = url_parse(_to_str(uri, charset)) 

743 path = url_unquote(uri.path, charset, errors, _to_iri_unsafe) 

744 query = url_unquote(uri.query, charset, errors, _to_iri_unsafe) 

745 fragment = url_unquote(uri.fragment, charset, errors, _to_iri_unsafe) 

746 return url_unparse((uri.scheme, uri.decode_netloc(), path, query, fragment)) 

747 

748 

749# reserved characters remain unquoted when quoting to URI 

750_to_uri_safe = ":/?#[]@!$&'()*+,;=%" 

751 

752 

753def iri_to_uri( 

754 iri: t.Union[str, t.Tuple[str, str, str, str, str]], 

755 charset: str = "utf-8", 

756 errors: str = "strict", 

757 safe_conversion: bool = False, 

758) -> str: 

759 """Convert an IRI to a URI. All non-ASCII and unsafe characters are 

760 quoted. If the URL has a domain, it is encoded to Punycode. 

761 

762 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') 

763 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' 

764 

765 :param iri: The IRI to convert. 

766 :param charset: The encoding of the IRI. 

767 :param errors: Error handler to use during ``bytes.encode``. 

768 :param safe_conversion: Return the URL unchanged if it only contains 

769 ASCII characters and no whitespace. See the explanation below. 

770 

771 There is a general problem with IRI conversion with some protocols 

772 that are in violation of the URI specification. Consider the 

773 following two IRIs:: 

774 

775 magnet:?xt=uri:whatever 

776 itms-services://?action=download-manifest 

777 

778 After parsing, we don't know if the scheme requires the ``//``, 

779 which is dropped if empty, but conveys different meanings in the 

780 final URL if it's present or not. In this case, you can use 

781 ``safe_conversion``, which will return the URL unchanged if it only 

782 contains ASCII characters and no whitespace. This can result in a 

783 URI with unquoted characters if it was not already quoted correctly, 

784 but preserves the URL's semantics. Werkzeug uses this for the 

785 ``Location`` header for redirects. 

786 

787 .. versionchanged:: 0.15 

788 All reserved characters remain unquoted. Previously, only some 

789 reserved characters were left unquoted. 

790 

791 .. versionchanged:: 0.9.6 

792 The ``safe_conversion`` parameter was added. 

793 

794 .. versionadded:: 0.6 

795 """ 

796 if isinstance(iri, tuple): 

797 iri = url_unparse(iri) 

798 

799 if safe_conversion: 

800 # If we're not sure if it's safe to convert the URL, and it only 

801 # contains ASCII characters, return it unconverted. 

802 try: 

803 native_iri = _to_str(iri) 

804 ascii_iri = native_iri.encode("ascii") 

805 

806 # Only return if it doesn't have whitespace. (Why?) 

807 if len(ascii_iri.split()) == 1: 

808 return native_iri 

809 except UnicodeError: 

810 pass 

811 

812 iri = url_parse(_to_str(iri, charset, errors)) 

813 path = url_quote(iri.path, charset, errors, _to_uri_safe) 

814 query = url_quote(iri.query, charset, errors, _to_uri_safe) 

815 fragment = url_quote(iri.fragment, charset, errors, _to_uri_safe) 

816 return url_unparse((iri.scheme, iri.encode_netloc(), path, query, fragment)) 

817 

818 

819def url_decode( 

820 s: t.AnyStr, 

821 charset: str = "utf-8", 

822 include_empty: bool = True, 

823 errors: str = "replace", 

824 separator: str = "&", 

825 cls: t.Optional[t.Type["ds.MultiDict"]] = None, 

826) -> "ds.MultiDict[str, str]": 

827 """Parse a query string and return it as a :class:`MultiDict`. 

828 

829 :param s: The query string to parse. 

830 :param charset: Decode bytes to string with this charset. If not 

831 given, bytes are returned as-is. 

832 :param include_empty: Include keys with empty values in the dict. 

833 :param errors: Error handling behavior when decoding bytes. 

834 :param separator: Separator character between pairs. 

835 :param cls: Container to hold result instead of :class:`MultiDict`. 

836 

837 .. versionchanged:: 2.0 

838 The ``decode_keys`` parameter is deprecated and will be removed 

839 in Werkzeug 2.1. 

840 

841 .. versionchanged:: 0.5 

842 In previous versions ";" and "&" could be used for url decoding. 

843 Now only "&" is supported. If you want to use ";", a different 

844 ``separator`` can be provided. 

845 

846 .. versionchanged:: 0.5 

847 The ``cls`` parameter was added. 

848 """ 

849 if cls is None: 

850 from .datastructures import MultiDict # noqa: F811 

851 

852 cls = MultiDict 

853 if isinstance(s, str) and not isinstance(separator, str): 

854 separator = separator.decode(charset or "ascii") 

855 elif isinstance(s, bytes) and not isinstance(separator, bytes): 

856 separator = separator.encode(charset or "ascii") # type: ignore 

857 return cls( 

858 _url_decode_impl( 

859 s.split(separator), charset, include_empty, errors # type: ignore 

860 ) 

861 ) 

862 

863 

864def url_decode_stream( 

865 stream: t.IO[bytes], 

866 charset: str = "utf-8", 

867 include_empty: bool = True, 

868 errors: str = "replace", 

869 separator: bytes = b"&", 

870 cls: t.Optional[t.Type["ds.MultiDict"]] = None, 

871 limit: t.Optional[int] = None, 

872) -> "ds.MultiDict[str, str]": 

873 """Works like :func:`url_decode` but decodes a stream. The behavior 

874 of stream and limit follows functions like 

875 :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is 

876 directly fed to the `cls` so you can consume the data while it's 

877 parsed. 

878 

879 :param stream: a stream with the encoded querystring 

880 :param charset: the charset of the query string. If set to `None` 

881 no decoding will take place. 

882 :param include_empty: Set to `False` if you don't want empty values to 

883 appear in the dict. 

884 :param errors: the decoding error behavior. 

885 :param separator: the pair separator to be used, defaults to ``&`` 

886 :param cls: an optional dict class to use. If this is not specified 

887 or `None` the default :class:`MultiDict` is used. 

888 :param limit: the content length of the URL data. Not necessary if 

889 a limited stream is provided. 

890 

891 .. versionchanged:: 2.0 

892 The ``decode_keys`` and ``return_iterator`` parameters are 

893 deprecated and will be removed in Werkzeug 2.1. 

894 

895 .. versionadded:: 0.8 

896 """ 

897 from .wsgi import make_chunk_iter 

898 

899 pair_iter = make_chunk_iter(stream, separator, limit) 

900 decoder = _url_decode_impl(pair_iter, charset, include_empty, errors) 

901 

902 if cls is None: 

903 from .datastructures import MultiDict # noqa: F811 

904 

905 cls = MultiDict 

906 

907 return cls(decoder) 

908 

909 

910def _url_decode_impl( 

911 pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str 

912) -> t.Iterator[t.Tuple[str, str]]: 

913 for pair in pair_iter: 

914 if not pair: 

915 continue 

916 s = _make_encode_wrapper(pair) 

917 equal = s("=") 

918 if equal in pair: 

919 key, value = pair.split(equal, 1) 

920 else: 

921 if not include_empty: 

922 continue 

923 key = pair 

924 value = s("") 

925 yield ( 

926 url_unquote_plus(key, charset, errors), 

927 url_unquote_plus(value, charset, errors), 

928 ) 

929 

930 

931def url_encode( 

932 obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]], 

933 charset: str = "utf-8", 

934 sort: bool = False, 

935 key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]] = None, 

936 separator: str = "&", 

937) -> str: 

938 """URL encode a dict/`MultiDict`. If a value is `None` it will not appear 

939 in the result string. Per default only values are encoded into the target 

940 charset strings. 

941 

942 :param obj: the object to encode into a query string. 

943 :param charset: the charset of the query string. 

944 :param sort: set to `True` if you want parameters to be sorted by `key`. 

945 :param separator: the separator to be used for the pairs. 

946 :param key: an optional function to be used for sorting. For more details 

947 check out the :func:`sorted` documentation. 

948 

949 .. versionchanged:: 2.0 

950 The ``encode_keys`` parameter is deprecated and will be removed 

951 in Werkzeug 2.1. 

952 

953 .. versionchanged:: 0.5 

954 Added the ``sort``, ``key``, and ``separator`` parameters. 

955 """ 

956 separator = _to_str(separator, "ascii") 

957 return separator.join(_url_encode_impl(obj, charset, sort, key)) 

958 

959 

960def url_encode_stream( 

961 obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]], 

962 stream: t.Optional[t.IO[str]] = None, 

963 charset: str = "utf-8", 

964 sort: bool = False, 

965 key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]] = None, 

966 separator: str = "&", 

967) -> None: 

968 """Like :meth:`url_encode` but writes the results to a stream 

969 object. If the stream is `None` a generator over all encoded 

970 pairs is returned. 

971 

972 :param obj: the object to encode into a query string. 

973 :param stream: a stream to write the encoded object into or `None` if 

974 an iterator over the encoded pairs should be returned. In 

975 that case the separator argument is ignored. 

976 :param charset: the charset of the query string. 

977 :param sort: set to `True` if you want parameters to be sorted by `key`. 

978 :param separator: the separator to be used for the pairs. 

979 :param key: an optional function to be used for sorting. For more details 

980 check out the :func:`sorted` documentation. 

981 

982 .. versionchanged:: 2.0 

983 The ``encode_keys`` parameter is deprecated and will be removed 

984 in Werkzeug 2.1. 

985 

986 .. versionadded:: 0.8 

987 """ 

988 separator = _to_str(separator, "ascii") 

989 gen = _url_encode_impl(obj, charset, sort, key) 

990 if stream is None: 

991 return gen # type: ignore 

992 for idx, chunk in enumerate(gen): 

993 if idx: 

994 stream.write(separator) 

995 stream.write(chunk) 

996 return None 

997 

998 

999def url_join( 

1000 base: t.Union[str, t.Tuple[str, str, str, str, str]], 

1001 url: t.Union[str, t.Tuple[str, str, str, str, str]], 

1002 allow_fragments: bool = True, 

1003) -> str: 

1004 """Join a base URL and a possibly relative URL to form an absolute 

1005 interpretation of the latter. 

1006 

1007 :param base: the base URL for the join operation. 

1008 :param url: the URL to join. 

1009 :param allow_fragments: indicates whether fragments should be allowed. 

1010 """ 

1011 if isinstance(base, tuple): 

1012 base = url_unparse(base) 

1013 if isinstance(url, tuple): 

1014 url = url_unparse(url) 

1015 

1016 _check_str_tuple((base, url)) 

1017 s = _make_encode_wrapper(base) 

1018 

1019 if not base: 

1020 return url 

1021 if not url: 

1022 return base 

1023 

1024 bscheme, bnetloc, bpath, bquery, bfragment = url_parse( 

1025 base, allow_fragments=allow_fragments 

1026 ) 

1027 scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments) 

1028 if scheme != bscheme: 

1029 return url 

1030 if netloc: 

1031 return url_unparse((scheme, netloc, path, query, fragment)) 

1032 netloc = bnetloc 

1033 

1034 if path[:1] == s("/"): 

1035 segments = path.split(s("/")) 

1036 elif not path: 

1037 segments = bpath.split(s("/")) 

1038 if not query: 

1039 query = bquery 

1040 else: 

1041 segments = bpath.split(s("/"))[:-1] + path.split(s("/")) 

1042 

1043 # If the rightmost part is "./" we want to keep the slash but 

1044 # remove the dot. 

1045 if segments[-1] == s("."): 

1046 segments[-1] = s("") 

1047 

1048 # Resolve ".." and "." 

1049 segments = [segment for segment in segments if segment != s(".")] 

1050 while True: 

1051 i = 1 

1052 n = len(segments) - 1 

1053 while i < n: 

1054 if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")): 

1055 del segments[i - 1 : i + 1] 

1056 break 

1057 i += 1 

1058 else: 

1059 break 

1060 

1061 # Remove trailing ".." if the URL is absolute 

1062 unwanted_marker = [s(""), s("..")] 

1063 while segments[:2] == unwanted_marker: 

1064 del segments[1] 

1065 

1066 path = s("/").join(segments) 

1067 return url_unparse((scheme, netloc, path, query, fragment))