Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 25%
434 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:35 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:35 +0000
1"""Functions for working with URLs.
3Contains implementations of functions from :mod:`urllib.parse` that
4handle bytes and strings.
5"""
6import codecs
7import os
8import re
9import typing as t
11from ._internal import _check_str_tuple
12from ._internal import _decode_idna
13from ._internal import _encode_idna
14from ._internal import _make_encode_wrapper
15from ._internal import _to_str
17if t.TYPE_CHECKING:
18 from . import datastructures as ds
20# A regular expression for what a valid schema looks like
21_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$")
23# Characters that are safe in any part of an URL.
24_always_safe = frozenset(
25 bytearray(
26 b"abcdefghijklmnopqrstuvwxyz"
27 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
28 b"0123456789"
29 b"-._~"
30 b"$!'()*+,;" # RFC3986 sub-delims set, not including query string delimiters &=
31 )
32)
34_hexdigits = "0123456789ABCDEFabcdef"
35_hextobyte = {
36 f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16)
37 for a in _hexdigits
38 for b in _hexdigits
39}
40_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)]
43class _URLTuple(t.NamedTuple):
44 scheme: str
45 netloc: str
46 path: str
47 query: str
48 fragment: str
51class BaseURL(_URLTuple):
52 """Superclass of :py:class:`URL` and :py:class:`BytesURL`."""
54 __slots__ = ()
55 _at: str
56 _colon: str
57 _lbracket: str
58 _rbracket: str
60 def __str__(self) -> str:
61 return self.to_url()
63 def replace(self, **kwargs: t.Any) -> "BaseURL":
64 """Return an URL with the same values, except for those parameters
65 given new values by whichever keyword arguments are specified."""
66 return self._replace(**kwargs)
68 @property
69 def host(self) -> t.Optional[str]:
70 """The host part of the URL if available, otherwise `None`. The
71 host is either the hostname or the IP address mentioned in the
72 URL. It will not contain the port.
73 """
74 return self._split_host()[0]
76 @property
77 def ascii_host(self) -> t.Optional[str]:
78 """Works exactly like :attr:`host` but will return a result that
79 is restricted to ASCII. If it finds a netloc that is not ASCII
80 it will attempt to idna decode it. This is useful for socket
81 operations when the URL might include internationalized characters.
82 """
83 rv = self.host
84 if rv is not None and isinstance(rv, str):
85 try:
86 rv = _encode_idna(rv) # type: ignore
87 except UnicodeError:
88 rv = rv.encode("ascii", "ignore") # type: ignore
89 return _to_str(rv, "ascii", "ignore")
91 @property
92 def port(self) -> t.Optional[int]:
93 """The port in the URL as an integer if it was present, `None`
94 otherwise. This does not fill in default ports.
95 """
96 try:
97 rv = int(_to_str(self._split_host()[1]))
98 if 0 <= rv <= 65535:
99 return rv
100 except (ValueError, TypeError):
101 pass
102 return None
104 @property
105 def auth(self) -> t.Optional[str]:
106 """The authentication part in the URL if available, `None`
107 otherwise.
108 """
109 return self._split_netloc()[0]
111 @property
112 def username(self) -> t.Optional[str]:
113 """The username if it was part of the URL, `None` otherwise.
114 This undergoes URL decoding and will always be a string.
115 """
116 rv = self._split_auth()[0]
117 if rv is not None:
118 return _url_unquote_legacy(rv)
119 return None
121 @property
122 def raw_username(self) -> t.Optional[str]:
123 """The username if it was part of the URL, `None` otherwise.
124 Unlike :attr:`username` this one is not being decoded.
125 """
126 return self._split_auth()[0]
128 @property
129 def password(self) -> t.Optional[str]:
130 """The password if it was part of the URL, `None` otherwise.
131 This undergoes URL decoding and will always be a string.
132 """
133 rv = self._split_auth()[1]
134 if rv is not None:
135 return _url_unquote_legacy(rv)
136 return None
138 @property
139 def raw_password(self) -> t.Optional[str]:
140 """The password if it was part of the URL, `None` otherwise.
141 Unlike :attr:`password` this one is not being decoded.
142 """
143 return self._split_auth()[1]
145 def decode_query(self, *args: t.Any, **kwargs: t.Any) -> "ds.MultiDict[str, str]":
146 """Decodes the query part of the URL. Ths is a shortcut for
147 calling :func:`url_decode` on the query argument. The arguments and
148 keyword arguments are forwarded to :func:`url_decode` unchanged.
149 """
150 return url_decode(self.query, *args, **kwargs)
152 def join(self, *args: t.Any, **kwargs: t.Any) -> "BaseURL":
153 """Joins this URL with another one. This is just a convenience
154 function for calling into :meth:`url_join` and then parsing the
155 return value again.
156 """
157 return url_parse(url_join(self, *args, **kwargs))
159 def to_url(self) -> str:
160 """Returns a URL string or bytes depending on the type of the
161 information stored. This is just a convenience function
162 for calling :meth:`url_unparse` for this URL.
163 """
164 return url_unparse(self)
166 def encode_netloc(self) -> str:
167 """Encodes the netloc part to an ASCII safe URL as bytes."""
168 rv = self.ascii_host or ""
169 if ":" in rv:
170 rv = f"[{rv}]"
171 port = self.port
172 if port is not None:
173 rv = f"{rv}:{port}"
174 auth = ":".join(
175 filter(
176 None,
177 [
178 url_quote(self.raw_username or "", "utf-8", "strict", "/:%"),
179 url_quote(self.raw_password or "", "utf-8", "strict", "/:%"),
180 ],
181 )
182 )
183 if auth:
184 rv = f"{auth}@{rv}"
185 return rv
187 def decode_netloc(self) -> str:
188 """Decodes the netloc part into a string."""
189 rv = _decode_idna(self.host or "")
191 if ":" in rv:
192 rv = f"[{rv}]"
193 port = self.port
194 if port is not None:
195 rv = f"{rv}:{port}"
196 auth = ":".join(
197 filter(
198 None,
199 [
200 _url_unquote_legacy(self.raw_username or "", "/:%@"),
201 _url_unquote_legacy(self.raw_password or "", "/:%@"),
202 ],
203 )
204 )
205 if auth:
206 rv = f"{auth}@{rv}"
207 return rv
209 def to_uri_tuple(self) -> "BaseURL":
210 """Returns a :class:`BytesURL` tuple that holds a URI. This will
211 encode all the information in the URL properly to ASCII using the
212 rules a web browser would follow.
214 It's usually more interesting to directly call :meth:`iri_to_uri` which
215 will return a string.
216 """
217 return url_parse(iri_to_uri(self))
219 def to_iri_tuple(self) -> "BaseURL":
220 """Returns a :class:`URL` tuple that holds a IRI. This will try
221 to decode as much information as possible in the URL without
222 losing information similar to how a web browser does it for the
223 URL bar.
225 It's usually more interesting to directly call :meth:`uri_to_iri` which
226 will return a string.
227 """
228 return url_parse(uri_to_iri(self))
230 def get_file_location(
231 self, pathformat: t.Optional[str] = None
232 ) -> t.Tuple[t.Optional[str], t.Optional[str]]:
233 """Returns a tuple with the location of the file in the form
234 ``(server, location)``. If the netloc is empty in the URL or
235 points to localhost, it's represented as ``None``.
237 The `pathformat` by default is autodetection but needs to be set
238 when working with URLs of a specific system. The supported values
239 are ``'windows'`` when working with Windows or DOS paths and
240 ``'posix'`` when working with posix paths.
242 If the URL does not point to a local file, the server and location
243 are both represented as ``None``.
245 :param pathformat: The expected format of the path component.
246 Currently ``'windows'`` and ``'posix'`` are
247 supported. Defaults to ``None`` which is
248 autodetect.
249 """
250 if self.scheme != "file":
251 return None, None
253 path = url_unquote(self.path)
254 host = self.netloc or None
256 if pathformat is None:
257 if os.name == "nt":
258 pathformat = "windows"
259 else:
260 pathformat = "posix"
262 if pathformat == "windows":
263 if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:":
264 path = f"{path[1:2]}:{path[3:]}"
265 windows_share = path[:3] in ("\\" * 3, "/" * 3)
266 import ntpath
268 path = ntpath.normpath(path)
269 # Windows shared drives are represented as ``\\host\\directory``.
270 # That results in a URL like ``file://///host/directory``, and a
271 # path like ``///host/directory``. We need to special-case this
272 # because the path contains the hostname.
273 if windows_share and host is None:
274 parts = path.lstrip("\\").split("\\", 1)
275 if len(parts) == 2:
276 host, path = parts
277 else:
278 host = parts[0]
279 path = ""
280 elif pathformat == "posix":
281 import posixpath
283 path = posixpath.normpath(path)
284 else:
285 raise TypeError(f"Invalid path format {pathformat!r}")
287 if host in ("127.0.0.1", "::1", "localhost"):
288 host = None
290 return host, path
292 def _split_netloc(self) -> t.Tuple[t.Optional[str], str]:
293 if self._at in self.netloc:
294 auth, _, netloc = self.netloc.partition(self._at)
295 return auth, netloc
296 return None, self.netloc
298 def _split_auth(self) -> t.Tuple[t.Optional[str], t.Optional[str]]:
299 auth = self._split_netloc()[0]
300 if not auth:
301 return None, None
302 if self._colon not in auth:
303 return auth, None
305 username, _, password = auth.partition(self._colon)
306 return username, password
308 def _split_host(self) -> t.Tuple[t.Optional[str], t.Optional[str]]:
309 rv = self._split_netloc()[1]
310 if not rv:
311 return None, None
313 if not rv.startswith(self._lbracket):
314 if self._colon in rv:
315 host, _, port = rv.partition(self._colon)
316 return host, port
317 return rv, None
319 idx = rv.find(self._rbracket)
320 if idx < 0:
321 return rv, None
323 host = rv[1:idx]
324 rest = rv[idx + 1 :]
325 if rest.startswith(self._colon):
326 return host, rest[1:]
327 return host, None
330class URL(BaseURL):
331 """Represents a parsed URL. This behaves like a regular tuple but
332 also has some extra attributes that give further insight into the
333 URL.
334 """
336 __slots__ = ()
337 _at = "@"
338 _colon = ":"
339 _lbracket = "["
340 _rbracket = "]"
342 def encode(self, charset: str = "utf-8", errors: str = "replace") -> "BytesURL":
343 """Encodes the URL to a tuple made out of bytes. The charset is
344 only being used for the path, query and fragment.
345 """
346 return BytesURL(
347 self.scheme.encode("ascii"), # type: ignore
348 self.encode_netloc(),
349 self.path.encode(charset, errors), # type: ignore
350 self.query.encode(charset, errors), # type: ignore
351 self.fragment.encode(charset, errors), # type: ignore
352 )
355class BytesURL(BaseURL):
356 """Represents a parsed URL in bytes."""
358 __slots__ = ()
359 _at = b"@" # type: ignore
360 _colon = b":" # type: ignore
361 _lbracket = b"[" # type: ignore
362 _rbracket = b"]" # type: ignore
364 def __str__(self) -> str:
365 return self.to_url().decode("utf-8", "replace") # type: ignore
367 def encode_netloc(self) -> bytes: # type: ignore
368 """Returns the netloc unchanged as bytes."""
369 return self.netloc # type: ignore
371 def decode(self, charset: str = "utf-8", errors: str = "replace") -> "URL":
372 """Decodes the URL to a tuple made out of strings. The charset is
373 only being used for the path, query and fragment.
374 """
375 return URL(
376 self.scheme.decode("ascii"), # type: ignore
377 self.decode_netloc(),
378 self.path.decode(charset, errors), # type: ignore
379 self.query.decode(charset, errors), # type: ignore
380 self.fragment.decode(charset, errors), # type: ignore
381 )
384_unquote_maps: t.Dict[t.FrozenSet[int], t.Dict[bytes, int]] = {frozenset(): _hextobyte}
387def _unquote_to_bytes(
388 string: t.Union[str, bytes], unsafe: t.Union[str, bytes] = ""
389) -> bytes:
390 if isinstance(string, str):
391 string = string.encode("utf-8")
393 if isinstance(unsafe, str):
394 unsafe = unsafe.encode("utf-8")
396 unsafe = frozenset(bytearray(unsafe))
397 groups = iter(string.split(b"%"))
398 result = bytearray(next(groups, b""))
400 try:
401 hex_to_byte = _unquote_maps[unsafe]
402 except KeyError:
403 hex_to_byte = _unquote_maps[unsafe] = {
404 h: b for h, b in _hextobyte.items() if b not in unsafe
405 }
407 for group in groups:
408 code = group[:2]
410 if code in hex_to_byte:
411 result.append(hex_to_byte[code])
412 result.extend(group[2:])
413 else:
414 result.append(37) # %
415 result.extend(group)
417 return bytes(result)
420def _url_encode_impl(
421 obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]],
422 charset: str,
423 sort: bool,
424 key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]],
425) -> t.Iterator[str]:
426 from .datastructures import iter_multi_items
428 iterable: t.Iterable[t.Tuple[str, str]] = iter_multi_items(obj)
430 if sort:
431 iterable = sorted(iterable, key=key)
433 for key_str, value_str in iterable:
434 if value_str is None:
435 continue
437 if not isinstance(key_str, bytes):
438 key_bytes = str(key_str).encode(charset)
439 else:
440 key_bytes = key_str
442 if not isinstance(value_str, bytes):
443 value_bytes = str(value_str).encode(charset)
444 else:
445 value_bytes = value_str
447 yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}"
450def _url_unquote_legacy(value: str, unsafe: str = "") -> str:
451 try:
452 return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe)
453 except UnicodeError:
454 return url_unquote(value, charset="latin1", unsafe=unsafe)
457def url_parse(
458 url: str, scheme: t.Optional[str] = None, allow_fragments: bool = True
459) -> BaseURL:
460 """Parses a URL from a string into a :class:`URL` tuple. If the URL
461 is lacking a scheme it can be provided as second argument. Otherwise,
462 it is ignored. Optionally fragments can be stripped from the URL
463 by setting `allow_fragments` to `False`.
465 The inverse of this function is :func:`url_unparse`.
467 :param url: the URL to parse.
468 :param scheme: the default schema to use if the URL is schemaless.
469 :param allow_fragments: if set to `False` a fragment will be removed
470 from the URL.
471 """
472 s = _make_encode_wrapper(url)
473 is_text_based = isinstance(url, str)
475 if scheme is None:
476 scheme = s("")
477 netloc = query = fragment = s("")
478 i = url.find(s(":"))
479 if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")):
480 # make sure "iri" is not actually a port number (in which case
481 # "scheme" is really part of the path)
482 rest = url[i + 1 :]
483 if not rest or any(c not in s("0123456789") for c in rest):
484 # not a port number
485 scheme, url = url[:i].lower(), rest
487 if url[:2] == s("//"):
488 delim = len(url)
489 for c in s("/?#"):
490 wdelim = url.find(c, 2)
491 if wdelim >= 0:
492 delim = min(delim, wdelim)
493 netloc, url = url[2:delim], url[delim:]
494 if (s("[") in netloc and s("]") not in netloc) or (
495 s("]") in netloc and s("[") not in netloc
496 ):
497 raise ValueError("Invalid IPv6 URL")
499 if allow_fragments and s("#") in url:
500 url, fragment = url.split(s("#"), 1)
501 if s("?") in url:
502 url, query = url.split(s("?"), 1)
504 result_type = URL if is_text_based else BytesURL
505 return result_type(scheme, netloc, url, query, fragment)
508def _make_fast_url_quote(
509 charset: str = "utf-8",
510 errors: str = "strict",
511 safe: t.Union[str, bytes] = "/:",
512 unsafe: t.Union[str, bytes] = "",
513) -> t.Callable[[bytes], str]:
514 """Precompile the translation table for a URL encoding function.
516 Unlike :func:`url_quote`, the generated function only takes the
517 string to quote.
519 :param charset: The charset to encode the result with.
520 :param errors: How to handle encoding errors.
521 :param safe: An optional sequence of safe characters to never encode.
522 :param unsafe: An optional sequence of unsafe characters to always encode.
523 """
524 if isinstance(safe, str):
525 safe = safe.encode(charset, errors)
527 if isinstance(unsafe, str):
528 unsafe = unsafe.encode(charset, errors)
530 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
531 table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)]
533 def quote(string: bytes) -> str:
534 return "".join([table[c] for c in string])
536 return quote
539_fast_url_quote = _make_fast_url_quote()
540_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+")
543def _fast_url_quote_plus(string: bytes) -> str:
544 return _fast_quote_plus(string).replace(" ", "+")
547def url_quote(
548 string: t.Union[str, bytes],
549 charset: str = "utf-8",
550 errors: str = "strict",
551 safe: t.Union[str, bytes] = "/:",
552 unsafe: t.Union[str, bytes] = "",
553) -> str:
554 """URL encode a single string with a given encoding.
556 :param s: the string to quote.
557 :param charset: the charset to be used.
558 :param safe: an optional sequence of safe characters.
559 :param unsafe: an optional sequence of unsafe characters.
561 .. versionadded:: 0.9.2
562 The `unsafe` parameter was added.
563 """
564 if not isinstance(string, (str, bytes, bytearray)):
565 string = str(string)
566 if isinstance(string, str):
567 string = string.encode(charset, errors)
568 if isinstance(safe, str):
569 safe = safe.encode(charset, errors)
570 if isinstance(unsafe, str):
571 unsafe = unsafe.encode(charset, errors)
572 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
573 rv = bytearray()
574 for char in bytearray(string):
575 if char in safe:
576 rv.append(char)
577 else:
578 rv.extend(_bytetohex[char])
579 return bytes(rv).decode(charset)
582def url_quote_plus(
583 string: str, charset: str = "utf-8", errors: str = "strict", safe: str = ""
584) -> str:
585 """URL encode a single string with the given encoding and convert
586 whitespace to "+".
588 :param s: The string to quote.
589 :param charset: The charset to be used.
590 :param safe: An optional sequence of safe characters.
591 """
592 return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+")
595def url_unparse(components: t.Tuple[str, str, str, str, str]) -> str:
596 """The reverse operation to :meth:`url_parse`. This accepts arbitrary
597 as well as :class:`URL` tuples and returns a URL as a string.
599 :param components: the parsed URL as tuple which should be converted
600 into a URL string.
601 """
602 _check_str_tuple(components)
603 scheme, netloc, path, query, fragment = components
604 s = _make_encode_wrapper(scheme)
605 url = s("")
607 # We generally treat file:///x and file:/x the same which is also
608 # what browsers seem to do. This also allows us to ignore a schema
609 # register for netloc utilization or having to differentiate between
610 # empty and missing netloc.
611 if netloc or (scheme and path.startswith(s("/"))):
612 if path and path[:1] != s("/"):
613 path = s("/") + path
614 url = s("//") + (netloc or s("")) + path
615 elif path:
616 url += path
617 if scheme:
618 url = scheme + s(":") + url
619 if query:
620 url = url + s("?") + query
621 if fragment:
622 url = url + s("#") + fragment
623 return url
626def url_unquote(
627 s: t.Union[str, bytes],
628 charset: str = "utf-8",
629 errors: str = "replace",
630 unsafe: str = "",
631) -> str:
632 """URL decode a single string with a given encoding. If the charset
633 is set to `None` no decoding is performed and raw bytes are
634 returned.
636 :param s: the string to unquote.
637 :param charset: the charset of the query string. If set to `None`
638 no decoding will take place.
639 :param errors: the error handling for the charset decoding.
640 """
641 rv = _unquote_to_bytes(s, unsafe)
642 if charset is None:
643 return rv
644 return rv.decode(charset, errors)
647def url_unquote_plus(
648 s: t.Union[str, bytes], charset: str = "utf-8", errors: str = "replace"
649) -> str:
650 """URL decode a single string with the given `charset` and decode "+" to
651 whitespace.
653 Per default encoding errors are ignored. If you want a different behavior
654 you can set `errors` to ``'replace'`` or ``'strict'``.
656 :param s: The string to unquote.
657 :param charset: the charset of the query string. If set to `None`
658 no decoding will take place.
659 :param errors: The error handling for the `charset` decoding.
660 """
661 if isinstance(s, str):
662 s = s.replace("+", " ")
663 else:
664 s = s.replace(b"+", b" ")
665 return url_unquote(s, charset, errors)
668def url_fix(s: str, charset: str = "utf-8") -> str:
669 r"""Sometimes you get an URL by a user that just isn't a real URL because
670 it contains unsafe characters like ' ' and so on. This function can fix
671 some of the problems in a similar way browsers handle data entered by the
672 user:
674 >>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
675 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'
677 :param s: the string with the URL to fix.
678 :param charset: The target charset for the URL if the url was given
679 as a string.
680 """
681 # First step is to switch to text processing and to convert
682 # backslashes (which are invalid in URLs anyways) to slashes. This is
683 # consistent with what Chrome does.
684 s = _to_str(s, charset, "replace").replace("\\", "/")
686 # For the specific case that we look like a malformed windows URL
687 # we want to fix this up manually:
688 if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"):
689 s = f"file:///{s[7:]}"
691 url = url_parse(s)
692 path = url_quote(url.path, charset, safe="/%+$!*'(),")
693 qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),")
694 anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),")
695 return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor))
698# not-unreserved characters remain quoted when unquoting to IRI
699_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe])
702def _codec_error_url_quote(e: UnicodeError) -> t.Tuple[str, int]:
703 """Used in :func:`uri_to_iri` after unquoting to re-quote any
704 invalid bytes.
705 """
706 # the docs state that UnicodeError does have these attributes,
707 # but mypy isn't picking them up
708 out = _fast_url_quote(e.object[e.start : e.end]) # type: ignore
709 return out, e.end # type: ignore
712codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
715def uri_to_iri(
716 uri: t.Union[str, t.Tuple[str, str, str, str, str]],
717 charset: str = "utf-8",
718 errors: str = "werkzeug.url_quote",
719) -> str:
720 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
721 leaving all reserved and invalid characters quoted. If the URL has
722 a domain, it is decoded from Punycode.
724 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
725 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
727 :param uri: The URI to convert.
728 :param charset: The encoding to encode unquoted bytes with.
729 :param errors: Error handler to use during ``bytes.encode``. By
730 default, invalid bytes are left quoted.
732 .. versionchanged:: 0.15
733 All reserved and invalid characters remain quoted. Previously,
734 only some reserved characters were preserved, and invalid bytes
735 were replaced instead of left quoted.
737 .. versionadded:: 0.6
738 """
739 if isinstance(uri, tuple):
740 uri = url_unparse(uri)
742 uri = url_parse(_to_str(uri, charset))
743 path = url_unquote(uri.path, charset, errors, _to_iri_unsafe)
744 query = url_unquote(uri.query, charset, errors, _to_iri_unsafe)
745 fragment = url_unquote(uri.fragment, charset, errors, _to_iri_unsafe)
746 return url_unparse((uri.scheme, uri.decode_netloc(), path, query, fragment))
749# reserved characters remain unquoted when quoting to URI
750_to_uri_safe = ":/?#[]@!$&'()*+,;=%"
753def iri_to_uri(
754 iri: t.Union[str, t.Tuple[str, str, str, str, str]],
755 charset: str = "utf-8",
756 errors: str = "strict",
757 safe_conversion: bool = False,
758) -> str:
759 """Convert an IRI to a URI. All non-ASCII and unsafe characters are
760 quoted. If the URL has a domain, it is encoded to Punycode.
762 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
763 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
765 :param iri: The IRI to convert.
766 :param charset: The encoding of the IRI.
767 :param errors: Error handler to use during ``bytes.encode``.
768 :param safe_conversion: Return the URL unchanged if it only contains
769 ASCII characters and no whitespace. See the explanation below.
771 There is a general problem with IRI conversion with some protocols
772 that are in violation of the URI specification. Consider the
773 following two IRIs::
775 magnet:?xt=uri:whatever
776 itms-services://?action=download-manifest
778 After parsing, we don't know if the scheme requires the ``//``,
779 which is dropped if empty, but conveys different meanings in the
780 final URL if it's present or not. In this case, you can use
781 ``safe_conversion``, which will return the URL unchanged if it only
782 contains ASCII characters and no whitespace. This can result in a
783 URI with unquoted characters if it was not already quoted correctly,
784 but preserves the URL's semantics. Werkzeug uses this for the
785 ``Location`` header for redirects.
787 .. versionchanged:: 0.15
788 All reserved characters remain unquoted. Previously, only some
789 reserved characters were left unquoted.
791 .. versionchanged:: 0.9.6
792 The ``safe_conversion`` parameter was added.
794 .. versionadded:: 0.6
795 """
796 if isinstance(iri, tuple):
797 iri = url_unparse(iri)
799 if safe_conversion:
800 # If we're not sure if it's safe to convert the URL, and it only
801 # contains ASCII characters, return it unconverted.
802 try:
803 native_iri = _to_str(iri)
804 ascii_iri = native_iri.encode("ascii")
806 # Only return if it doesn't have whitespace. (Why?)
807 if len(ascii_iri.split()) == 1:
808 return native_iri
809 except UnicodeError:
810 pass
812 iri = url_parse(_to_str(iri, charset, errors))
813 path = url_quote(iri.path, charset, errors, _to_uri_safe)
814 query = url_quote(iri.query, charset, errors, _to_uri_safe)
815 fragment = url_quote(iri.fragment, charset, errors, _to_uri_safe)
816 return url_unparse((iri.scheme, iri.encode_netloc(), path, query, fragment))
819def url_decode(
820 s: t.AnyStr,
821 charset: str = "utf-8",
822 include_empty: bool = True,
823 errors: str = "replace",
824 separator: str = "&",
825 cls: t.Optional[t.Type["ds.MultiDict"]] = None,
826) -> "ds.MultiDict[str, str]":
827 """Parse a query string and return it as a :class:`MultiDict`.
829 :param s: The query string to parse.
830 :param charset: Decode bytes to string with this charset. If not
831 given, bytes are returned as-is.
832 :param include_empty: Include keys with empty values in the dict.
833 :param errors: Error handling behavior when decoding bytes.
834 :param separator: Separator character between pairs.
835 :param cls: Container to hold result instead of :class:`MultiDict`.
837 .. versionchanged:: 2.0
838 The ``decode_keys`` parameter is deprecated and will be removed
839 in Werkzeug 2.1.
841 .. versionchanged:: 0.5
842 In previous versions ";" and "&" could be used for url decoding.
843 Now only "&" is supported. If you want to use ";", a different
844 ``separator`` can be provided.
846 .. versionchanged:: 0.5
847 The ``cls`` parameter was added.
848 """
849 if cls is None:
850 from .datastructures import MultiDict # noqa: F811
852 cls = MultiDict
853 if isinstance(s, str) and not isinstance(separator, str):
854 separator = separator.decode(charset or "ascii")
855 elif isinstance(s, bytes) and not isinstance(separator, bytes):
856 separator = separator.encode(charset or "ascii") # type: ignore
857 return cls(
858 _url_decode_impl(
859 s.split(separator), charset, include_empty, errors # type: ignore
860 )
861 )
864def url_decode_stream(
865 stream: t.IO[bytes],
866 charset: str = "utf-8",
867 include_empty: bool = True,
868 errors: str = "replace",
869 separator: bytes = b"&",
870 cls: t.Optional[t.Type["ds.MultiDict"]] = None,
871 limit: t.Optional[int] = None,
872) -> "ds.MultiDict[str, str]":
873 """Works like :func:`url_decode` but decodes a stream. The behavior
874 of stream and limit follows functions like
875 :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is
876 directly fed to the `cls` so you can consume the data while it's
877 parsed.
879 :param stream: a stream with the encoded querystring
880 :param charset: the charset of the query string. If set to `None`
881 no decoding will take place.
882 :param include_empty: Set to `False` if you don't want empty values to
883 appear in the dict.
884 :param errors: the decoding error behavior.
885 :param separator: the pair separator to be used, defaults to ``&``
886 :param cls: an optional dict class to use. If this is not specified
887 or `None` the default :class:`MultiDict` is used.
888 :param limit: the content length of the URL data. Not necessary if
889 a limited stream is provided.
891 .. versionchanged:: 2.0
892 The ``decode_keys`` and ``return_iterator`` parameters are
893 deprecated and will be removed in Werkzeug 2.1.
895 .. versionadded:: 0.8
896 """
897 from .wsgi import make_chunk_iter
899 pair_iter = make_chunk_iter(stream, separator, limit)
900 decoder = _url_decode_impl(pair_iter, charset, include_empty, errors)
902 if cls is None:
903 from .datastructures import MultiDict # noqa: F811
905 cls = MultiDict
907 return cls(decoder)
910def _url_decode_impl(
911 pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str
912) -> t.Iterator[t.Tuple[str, str]]:
913 for pair in pair_iter:
914 if not pair:
915 continue
916 s = _make_encode_wrapper(pair)
917 equal = s("=")
918 if equal in pair:
919 key, value = pair.split(equal, 1)
920 else:
921 if not include_empty:
922 continue
923 key = pair
924 value = s("")
925 yield (
926 url_unquote_plus(key, charset, errors),
927 url_unquote_plus(value, charset, errors),
928 )
931def url_encode(
932 obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]],
933 charset: str = "utf-8",
934 sort: bool = False,
935 key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]] = None,
936 separator: str = "&",
937) -> str:
938 """URL encode a dict/`MultiDict`. If a value is `None` it will not appear
939 in the result string. Per default only values are encoded into the target
940 charset strings.
942 :param obj: the object to encode into a query string.
943 :param charset: the charset of the query string.
944 :param sort: set to `True` if you want parameters to be sorted by `key`.
945 :param separator: the separator to be used for the pairs.
946 :param key: an optional function to be used for sorting. For more details
947 check out the :func:`sorted` documentation.
949 .. versionchanged:: 2.0
950 The ``encode_keys`` parameter is deprecated and will be removed
951 in Werkzeug 2.1.
953 .. versionchanged:: 0.5
954 Added the ``sort``, ``key``, and ``separator`` parameters.
955 """
956 separator = _to_str(separator, "ascii")
957 return separator.join(_url_encode_impl(obj, charset, sort, key))
960def url_encode_stream(
961 obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]],
962 stream: t.Optional[t.IO[str]] = None,
963 charset: str = "utf-8",
964 sort: bool = False,
965 key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]] = None,
966 separator: str = "&",
967) -> None:
968 """Like :meth:`url_encode` but writes the results to a stream
969 object. If the stream is `None` a generator over all encoded
970 pairs is returned.
972 :param obj: the object to encode into a query string.
973 :param stream: a stream to write the encoded object into or `None` if
974 an iterator over the encoded pairs should be returned. In
975 that case the separator argument is ignored.
976 :param charset: the charset of the query string.
977 :param sort: set to `True` if you want parameters to be sorted by `key`.
978 :param separator: the separator to be used for the pairs.
979 :param key: an optional function to be used for sorting. For more details
980 check out the :func:`sorted` documentation.
982 .. versionchanged:: 2.0
983 The ``encode_keys`` parameter is deprecated and will be removed
984 in Werkzeug 2.1.
986 .. versionadded:: 0.8
987 """
988 separator = _to_str(separator, "ascii")
989 gen = _url_encode_impl(obj, charset, sort, key)
990 if stream is None:
991 return gen # type: ignore
992 for idx, chunk in enumerate(gen):
993 if idx:
994 stream.write(separator)
995 stream.write(chunk)
996 return None
999def url_join(
1000 base: t.Union[str, t.Tuple[str, str, str, str, str]],
1001 url: t.Union[str, t.Tuple[str, str, str, str, str]],
1002 allow_fragments: bool = True,
1003) -> str:
1004 """Join a base URL and a possibly relative URL to form an absolute
1005 interpretation of the latter.
1007 :param base: the base URL for the join operation.
1008 :param url: the URL to join.
1009 :param allow_fragments: indicates whether fragments should be allowed.
1010 """
1011 if isinstance(base, tuple):
1012 base = url_unparse(base)
1013 if isinstance(url, tuple):
1014 url = url_unparse(url)
1016 _check_str_tuple((base, url))
1017 s = _make_encode_wrapper(base)
1019 if not base:
1020 return url
1021 if not url:
1022 return base
1024 bscheme, bnetloc, bpath, bquery, bfragment = url_parse(
1025 base, allow_fragments=allow_fragments
1026 )
1027 scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments)
1028 if scheme != bscheme:
1029 return url
1030 if netloc:
1031 return url_unparse((scheme, netloc, path, query, fragment))
1032 netloc = bnetloc
1034 if path[:1] == s("/"):
1035 segments = path.split(s("/"))
1036 elif not path:
1037 segments = bpath.split(s("/"))
1038 if not query:
1039 query = bquery
1040 else:
1041 segments = bpath.split(s("/"))[:-1] + path.split(s("/"))
1043 # If the rightmost part is "./" we want to keep the slash but
1044 # remove the dot.
1045 if segments[-1] == s("."):
1046 segments[-1] = s("")
1048 # Resolve ".." and "."
1049 segments = [segment for segment in segments if segment != s(".")]
1050 while True:
1051 i = 1
1052 n = len(segments) - 1
1053 while i < n:
1054 if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")):
1055 del segments[i - 1 : i + 1]
1056 break
1057 i += 1
1058 else:
1059 break
1061 # Remove trailing ".." if the URL is absolute
1062 unwanted_marker = [s(""), s("..")]
1063 while segments[:2] == unwanted_marker:
1064 del segments[1]
1066 path = s("/").join(segments)
1067 return url_unparse((scheme, netloc, path, query, fragment))