Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 72%
532 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-09 06:08 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-09 06:08 +0000
1"""Functions for working with URLs.
3Contains implementations of functions from :mod:`urllib.parse` that
4handle bytes and strings.
5"""
6from __future__ import annotations
8import codecs
9import os
10import re
11import typing as t
12import warnings
13from urllib.parse import quote
14from urllib.parse import unquote
15from urllib.parse import urlencode
16from urllib.parse import urlsplit
17from urllib.parse import urlunsplit
19from ._internal import _check_str_tuple
20from ._internal import _decode_idna
21from ._internal import _make_encode_wrapper
22from ._internal import _to_str
23from .datastructures import iter_multi_items
25if t.TYPE_CHECKING:
26 from . import datastructures as ds
28# A regular expression for what a valid schema looks like
29_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$")
31# Characters that are safe in any part of an URL.
32_always_safe_chars = (
33 "abcdefghijklmnopqrstuvwxyz"
34 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
35 "0123456789"
36 "-._~"
37 "$!'()*+,;" # RFC3986 sub-delims set, not including query string delimiters &=
38)
39_always_safe = frozenset(_always_safe_chars.encode("ascii"))
41_hexdigits = "0123456789ABCDEFabcdef"
42_hextobyte = {
43 f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16)
44 for a in _hexdigits
45 for b in _hexdigits
46}
47_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)]
50class _URLTuple(t.NamedTuple):
51 scheme: str
52 netloc: str
53 path: str
54 query: str
55 fragment: str
58class BaseURL(_URLTuple):
59 """Superclass of :py:class:`URL` and :py:class:`BytesURL`.
61 .. deprecated:: 2.3
62 Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
63 """
65 __slots__ = ()
66 _at: str
67 _colon: str
68 _lbracket: str
69 _rbracket: str
71 def __new__(cls, *args: t.Any, **kwargs: t.Any) -> BaseURL:
72 warnings.warn(
73 f"'werkzeug.urls.{cls.__name__}' is deprecated and will be removed in"
74 " Werkzeug 3.0. Use the 'urllib.parse' library instead.",
75 DeprecationWarning,
76 stacklevel=2,
77 )
78 return super().__new__(cls, *args, **kwargs)
80 def __str__(self) -> str:
81 return self.to_url()
83 def replace(self, **kwargs: t.Any) -> BaseURL:
84 """Return an URL with the same values, except for those parameters
85 given new values by whichever keyword arguments are specified."""
86 return self._replace(**kwargs)
88 @property
89 def host(self) -> str | None:
90 """The host part of the URL if available, otherwise `None`. The
91 host is either the hostname or the IP address mentioned in the
92 URL. It will not contain the port.
93 """
94 return self._split_host()[0]
96 @property
97 def ascii_host(self) -> str | None:
98 """Works exactly like :attr:`host` but will return a result that
99 is restricted to ASCII. If it finds a netloc that is not ASCII
100 it will attempt to idna decode it. This is useful for socket
101 operations when the URL might include internationalized characters.
102 """
103 rv = self.host
104 if rv is not None and isinstance(rv, str):
105 try:
106 rv = rv.encode("idna").decode("ascii")
107 except UnicodeError:
108 pass
109 return rv
111 @property
112 def port(self) -> int | None:
113 """The port in the URL as an integer if it was present, `None`
114 otherwise. This does not fill in default ports.
115 """
116 try:
117 rv = int(_to_str(self._split_host()[1]))
118 if 0 <= rv <= 65535:
119 return rv
120 except (ValueError, TypeError):
121 pass
122 return None
124 @property
125 def auth(self) -> str | None:
126 """The authentication part in the URL if available, `None`
127 otherwise.
128 """
129 return self._split_netloc()[0]
131 @property
132 def username(self) -> str | None:
133 """The username if it was part of the URL, `None` otherwise.
134 This undergoes URL decoding and will always be a string.
135 """
136 rv = self._split_auth()[0]
137 if rv is not None:
138 return _url_unquote_legacy(rv)
139 return None
141 @property
142 def raw_username(self) -> str | None:
143 """The username if it was part of the URL, `None` otherwise.
144 Unlike :attr:`username` this one is not being decoded.
145 """
146 return self._split_auth()[0]
148 @property
149 def password(self) -> str | None:
150 """The password if it was part of the URL, `None` otherwise.
151 This undergoes URL decoding and will always be a string.
152 """
153 rv = self._split_auth()[1]
154 if rv is not None:
155 return _url_unquote_legacy(rv)
156 return None
158 @property
159 def raw_password(self) -> str | None:
160 """The password if it was part of the URL, `None` otherwise.
161 Unlike :attr:`password` this one is not being decoded.
162 """
163 return self._split_auth()[1]
165 def decode_query(self, *args: t.Any, **kwargs: t.Any) -> ds.MultiDict[str, str]:
166 """Decodes the query part of the URL. Ths is a shortcut for
167 calling :func:`url_decode` on the query argument. The arguments and
168 keyword arguments are forwarded to :func:`url_decode` unchanged.
169 """
170 return url_decode(self.query, *args, **kwargs)
172 def join(self, *args: t.Any, **kwargs: t.Any) -> BaseURL:
173 """Joins this URL with another one. This is just a convenience
174 function for calling into :meth:`url_join` and then parsing the
175 return value again.
176 """
177 return url_parse(url_join(self, *args, **kwargs))
179 def to_url(self) -> str:
180 """Returns a URL string or bytes depending on the type of the
181 information stored. This is just a convenience function
182 for calling :meth:`url_unparse` for this URL.
183 """
184 return url_unparse(self)
186 def encode_netloc(self) -> str:
187 """Encodes the netloc part to an ASCII safe URL as bytes."""
188 rv = self.ascii_host or ""
189 if ":" in rv:
190 rv = f"[{rv}]"
191 port = self.port
192 if port is not None:
193 rv = f"{rv}:{port}"
194 auth = ":".join(
195 filter(
196 None,
197 [
198 url_quote(self.raw_username or "", "utf-8", "strict", "/:%"),
199 url_quote(self.raw_password or "", "utf-8", "strict", "/:%"),
200 ],
201 )
202 )
203 if auth:
204 rv = f"{auth}@{rv}"
205 return rv
207 def decode_netloc(self) -> str:
208 """Decodes the netloc part into a string."""
209 host = self.host or ""
211 if isinstance(host, bytes):
212 host = host.decode()
214 rv = _decode_idna(host)
216 if ":" in rv:
217 rv = f"[{rv}]"
218 port = self.port
219 if port is not None:
220 rv = f"{rv}:{port}"
221 auth = ":".join(
222 filter(
223 None,
224 [
225 _url_unquote_legacy(self.raw_username or "", "/:%@"),
226 _url_unquote_legacy(self.raw_password or "", "/:%@"),
227 ],
228 )
229 )
230 if auth:
231 rv = f"{auth}@{rv}"
232 return rv
234 def to_uri_tuple(self) -> BaseURL:
235 """Returns a :class:`BytesURL` tuple that holds a URI. This will
236 encode all the information in the URL properly to ASCII using the
237 rules a web browser would follow.
239 It's usually more interesting to directly call :meth:`iri_to_uri` which
240 will return a string.
241 """
242 return url_parse(iri_to_uri(self))
244 def to_iri_tuple(self) -> BaseURL:
245 """Returns a :class:`URL` tuple that holds a IRI. This will try
246 to decode as much information as possible in the URL without
247 losing information similar to how a web browser does it for the
248 URL bar.
250 It's usually more interesting to directly call :meth:`uri_to_iri` which
251 will return a string.
252 """
253 return url_parse(uri_to_iri(self))
255 def get_file_location(
256 self, pathformat: str | None = None
257 ) -> tuple[str | None, str | None]:
258 """Returns a tuple with the location of the file in the form
259 ``(server, location)``. If the netloc is empty in the URL or
260 points to localhost, it's represented as ``None``.
262 The `pathformat` by default is autodetection but needs to be set
263 when working with URLs of a specific system. The supported values
264 are ``'windows'`` when working with Windows or DOS paths and
265 ``'posix'`` when working with posix paths.
267 If the URL does not point to a local file, the server and location
268 are both represented as ``None``.
270 :param pathformat: The expected format of the path component.
271 Currently ``'windows'`` and ``'posix'`` are
272 supported. Defaults to ``None`` which is
273 autodetect.
274 """
275 if self.scheme != "file":
276 return None, None
278 path = url_unquote(self.path)
279 host = self.netloc or None
281 if pathformat is None:
282 if os.name == "nt":
283 pathformat = "windows"
284 else:
285 pathformat = "posix"
287 if pathformat == "windows":
288 if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:":
289 path = f"{path[1:2]}:{path[3:]}"
290 windows_share = path[:3] in ("\\" * 3, "/" * 3)
291 import ntpath
293 path = ntpath.normpath(path)
294 # Windows shared drives are represented as ``\\host\\directory``.
295 # That results in a URL like ``file://///host/directory``, and a
296 # path like ``///host/directory``. We need to special-case this
297 # because the path contains the hostname.
298 if windows_share and host is None:
299 parts = path.lstrip("\\").split("\\", 1)
300 if len(parts) == 2:
301 host, path = parts
302 else:
303 host = parts[0]
304 path = ""
305 elif pathformat == "posix":
306 import posixpath
308 path = posixpath.normpath(path)
309 else:
310 raise TypeError(f"Invalid path format {pathformat!r}")
312 if host in ("127.0.0.1", "::1", "localhost"):
313 host = None
315 return host, path
317 def _split_netloc(self) -> tuple[str | None, str]:
318 if self._at in self.netloc:
319 auth, _, netloc = self.netloc.partition(self._at)
320 return auth, netloc
321 return None, self.netloc
323 def _split_auth(self) -> tuple[str | None, str | None]:
324 auth = self._split_netloc()[0]
325 if not auth:
326 return None, None
327 if self._colon not in auth:
328 return auth, None
330 username, _, password = auth.partition(self._colon)
331 return username, password
333 def _split_host(self) -> tuple[str | None, str | None]:
334 rv = self._split_netloc()[1]
335 if not rv:
336 return None, None
338 if not rv.startswith(self._lbracket):
339 if self._colon in rv:
340 host, _, port = rv.partition(self._colon)
341 return host, port
342 return rv, None
344 idx = rv.find(self._rbracket)
345 if idx < 0:
346 return rv, None
348 host = rv[1:idx]
349 rest = rv[idx + 1 :]
350 if rest.startswith(self._colon):
351 return host, rest[1:]
352 return host, None
355class URL(BaseURL):
356 """Represents a parsed URL. This behaves like a regular tuple but
357 also has some extra attributes that give further insight into the
358 URL.
360 .. deprecated:: 2.3
361 Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
362 """
364 __slots__ = ()
365 _at = "@"
366 _colon = ":"
367 _lbracket = "["
368 _rbracket = "]"
370 def encode(self, charset: str = "utf-8", errors: str = "replace") -> BytesURL:
371 """Encodes the URL to a tuple made out of bytes. The charset is
372 only being used for the path, query and fragment.
373 """
374 return BytesURL(
375 self.scheme.encode("ascii"),
376 self.encode_netloc(),
377 self.path.encode(charset, errors),
378 self.query.encode(charset, errors),
379 self.fragment.encode(charset, errors),
380 )
383class BytesURL(BaseURL):
384 """Represents a parsed URL in bytes.
386 .. deprecated:: 2.3
387 Will be removed in Werkzeug 3.0. Use the ``urllib.parse`` library instead.
388 """
390 __slots__ = ()
391 _at = b"@" # type: ignore
392 _colon = b":" # type: ignore
393 _lbracket = b"[" # type: ignore
394 _rbracket = b"]" # type: ignore
396 def __str__(self) -> str:
397 return self.to_url().decode("utf-8", "replace") # type: ignore
399 def encode_netloc(self) -> bytes: # type: ignore
400 """Returns the netloc unchanged as bytes."""
401 return self.netloc # type: ignore
403 def decode(self, charset: str = "utf-8", errors: str = "replace") -> URL:
404 """Decodes the URL to a tuple made out of strings. The charset is
405 only being used for the path, query and fragment.
406 """
407 return URL(
408 self.scheme.decode("ascii"), # type: ignore
409 self.decode_netloc(),
410 self.path.decode(charset, errors), # type: ignore
411 self.query.decode(charset, errors), # type: ignore
412 self.fragment.decode(charset, errors), # type: ignore
413 )
416_unquote_maps: dict[frozenset[int], dict[bytes, int]] = {frozenset(): _hextobyte}
419def _unquote_to_bytes(string: str | bytes, unsafe: str | bytes = "") -> bytes:
420 if isinstance(string, str):
421 string = string.encode("utf-8")
423 if isinstance(unsafe, str):
424 unsafe = unsafe.encode("utf-8")
426 unsafe = frozenset(bytearray(unsafe))
427 groups = iter(string.split(b"%"))
428 result = bytearray(next(groups, b""))
430 try:
431 hex_to_byte = _unquote_maps[unsafe]
432 except KeyError:
433 hex_to_byte = _unquote_maps[unsafe] = {
434 h: b for h, b in _hextobyte.items() if b not in unsafe
435 }
437 for group in groups:
438 code = group[:2]
440 if code in hex_to_byte:
441 result.append(hex_to_byte[code])
442 result.extend(group[2:])
443 else:
444 result.append(37) # %
445 result.extend(group)
447 return bytes(result)
450def _url_encode_impl(
451 obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
452 charset: str,
453 sort: bool,
454 key: t.Callable[[tuple[str, str]], t.Any] | None,
455) -> t.Iterator[str]:
456 from .datastructures import iter_multi_items
458 iterable: t.Iterable[tuple[str, str]] = iter_multi_items(obj)
460 if sort:
461 iterable = sorted(iterable, key=key)
463 for key_str, value_str in iterable:
464 if value_str is None:
465 continue
467 if not isinstance(key_str, bytes):
468 key_bytes = str(key_str).encode(charset)
469 else:
470 key_bytes = key_str
472 if not isinstance(value_str, bytes):
473 value_bytes = str(value_str).encode(charset)
474 else:
475 value_bytes = value_str
477 yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}"
480def _url_unquote_legacy(value: str, unsafe: str = "") -> str:
481 try:
482 return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe)
483 except UnicodeError:
484 return url_unquote(value, charset="latin1", unsafe=unsafe)
487def url_parse(
488 url: str, scheme: str | None = None, allow_fragments: bool = True
489) -> BaseURL:
490 """Parses a URL from a string into a :class:`URL` tuple. If the URL
491 is lacking a scheme it can be provided as second argument. Otherwise,
492 it is ignored. Optionally fragments can be stripped from the URL
493 by setting `allow_fragments` to `False`.
495 The inverse of this function is :func:`url_unparse`.
497 :param url: the URL to parse.
498 :param scheme: the default schema to use if the URL is schemaless.
499 :param allow_fragments: if set to `False` a fragment will be removed
500 from the URL.
502 .. deprecated:: 2.3
503 Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlsplit`` instead.
504 """
505 warnings.warn(
506 "'werkzeug.urls.url_parse' is deprecated and will be removed in Werkzeug 3.0."
507 " Use 'urllib.parse.urlsplit' instead.",
508 DeprecationWarning,
509 stacklevel=2,
510 )
511 s = _make_encode_wrapper(url)
512 is_text_based = isinstance(url, str)
514 if scheme is None:
515 scheme = s("")
516 netloc = query = fragment = s("")
517 i = url.find(s(":"))
518 if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")):
519 # make sure "iri" is not actually a port number (in which case
520 # "scheme" is really part of the path)
521 rest = url[i + 1 :]
522 if not rest or any(c not in s("0123456789") for c in rest):
523 # not a port number
524 scheme, url = url[:i].lower(), rest
526 if url[:2] == s("//"):
527 delim = len(url)
528 for c in s("/?#"):
529 wdelim = url.find(c, 2)
530 if wdelim >= 0:
531 delim = min(delim, wdelim)
532 netloc, url = url[2:delim], url[delim:]
533 if (s("[") in netloc and s("]") not in netloc) or (
534 s("]") in netloc and s("[") not in netloc
535 ):
536 raise ValueError("Invalid IPv6 URL")
538 if allow_fragments and s("#") in url:
539 url, fragment = url.split(s("#"), 1)
540 if s("?") in url:
541 url, query = url.split(s("?"), 1)
543 result_type = URL if is_text_based else BytesURL
545 return result_type(scheme, netloc, url, query, fragment)
548def _make_fast_url_quote(
549 charset: str = "utf-8",
550 errors: str = "strict",
551 safe: str | bytes = "/:",
552 unsafe: str | bytes = "",
553) -> t.Callable[[bytes], str]:
554 """Precompile the translation table for a URL encoding function.
556 Unlike :func:`url_quote`, the generated function only takes the
557 string to quote.
559 :param charset: The charset to encode the result with.
560 :param errors: How to handle encoding errors.
561 :param safe: An optional sequence of safe characters to never encode.
562 :param unsafe: An optional sequence of unsafe characters to always encode.
563 """
564 if isinstance(safe, str):
565 safe = safe.encode(charset, errors)
567 if isinstance(unsafe, str):
568 unsafe = unsafe.encode(charset, errors)
570 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
571 table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)]
573 def quote(string: bytes) -> str:
574 return "".join([table[c] for c in string])
576 return quote
579_fast_url_quote = _make_fast_url_quote()
580_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+")
583def _fast_url_quote_plus(string: bytes) -> str:
584 return _fast_quote_plus(string).replace(" ", "+")
587def url_quote(
588 string: str | bytes,
589 charset: str = "utf-8",
590 errors: str = "strict",
591 safe: str | bytes = "/:",
592 unsafe: str | bytes = "",
593) -> str:
594 """URL encode a single string with a given encoding.
596 :param s: the string to quote.
597 :param charset: the charset to be used.
598 :param safe: an optional sequence of safe characters.
599 :param unsafe: an optional sequence of unsafe characters.
601 .. deprecated:: 2.3
602 Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote`` instead.
604 .. versionadded:: 0.9.2
605 The `unsafe` parameter was added.
606 """
607 warnings.warn(
608 "'werkzeug.urls.url_quote' is deprecated and will be removed in Werkzeug 3.0."
609 " Use 'urllib.parse.quote' instead.",
610 DeprecationWarning,
611 stacklevel=2,
612 )
614 if not isinstance(string, (str, bytes, bytearray)):
615 string = str(string)
616 if isinstance(string, str):
617 string = string.encode(charset, errors)
618 if isinstance(safe, str):
619 safe = safe.encode(charset, errors)
620 if isinstance(unsafe, str):
621 unsafe = unsafe.encode(charset, errors)
622 safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
623 rv = bytearray()
624 for char in bytearray(string):
625 if char in safe:
626 rv.append(char)
627 else:
628 rv.extend(_bytetohex[char])
629 return bytes(rv).decode(charset)
632def url_quote_plus(
633 string: str, charset: str = "utf-8", errors: str = "strict", safe: str = ""
634) -> str:
635 """URL encode a single string with the given encoding and convert
636 whitespace to "+".
638 :param s: The string to quote.
639 :param charset: The charset to be used.
640 :param safe: An optional sequence of safe characters.
642 .. deprecated:: 2.3
643 Will be removed in Werkzeug 3.0. Use ``urllib.parse.quote_plus`` instead.
644 """
645 warnings.warn(
646 "'werkzeug.urls.url_quote_plus' is deprecated and will be removed in Werkzeug"
647 " 2.4. Use 'urllib.parse.quote_plus' instead.",
648 DeprecationWarning,
649 stacklevel=2,
650 )
652 return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+")
655def url_unparse(components: tuple[str, str, str, str, str]) -> str:
656 """The reverse operation to :meth:`url_parse`. This accepts arbitrary
657 as well as :class:`URL` tuples and returns a URL as a string.
659 :param components: the parsed URL as tuple which should be converted
660 into a URL string.
662 .. deprecated:: 2.3
663 Will be removed in Werkzeug 3.0. Use ``urllib.parse.urlunsplit`` instead.
664 """
665 warnings.warn(
666 "'werkzeug.urls.url_unparse' is deprecated and will be removed in Werkzeug 3.0."
667 " Use 'urllib.parse.urlunsplit' instead.",
668 DeprecationWarning,
669 stacklevel=2,
670 )
671 _check_str_tuple(components)
672 scheme, netloc, path, query, fragment = components
673 s = _make_encode_wrapper(scheme)
674 url = s("")
676 # We generally treat file:///x and file:/x the same which is also
677 # what browsers seem to do. This also allows us to ignore a schema
678 # register for netloc utilization or having to differentiate between
679 # empty and missing netloc.
680 if netloc or (scheme and path.startswith(s("/"))):
681 if path and path[:1] != s("/"):
682 path = s("/") + path
683 url = s("//") + (netloc or s("")) + path
684 elif path:
685 url += path
686 if scheme:
687 url = scheme + s(":") + url
688 if query:
689 url = url + s("?") + query
690 if fragment:
691 url = url + s("#") + fragment
692 return url
695def url_unquote(
696 s: str | bytes,
697 charset: str = "utf-8",
698 errors: str = "replace",
699 unsafe: str = "",
700) -> str:
701 """URL decode a single string with a given encoding. If the charset
702 is set to `None` no decoding is performed and raw bytes are
703 returned.
705 :param s: the string to unquote.
706 :param charset: the charset of the query string. If set to `None`
707 no decoding will take place.
708 :param errors: the error handling for the charset decoding.
710 .. deprecated:: 2.3
711 Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote`` instead.
712 """
713 warnings.warn(
714 "'werkzeug.urls.url_unquote' is deprecated and will be removed in Werkzeug 3.0."
715 " Use 'urllib.parse.unquote' instead.",
716 DeprecationWarning,
717 stacklevel=2,
718 )
719 rv = _unquote_to_bytes(s, unsafe)
720 if charset is None:
721 return rv
722 return rv.decode(charset, errors)
725def url_unquote_plus(
726 s: str | bytes, charset: str = "utf-8", errors: str = "replace"
727) -> str:
728 """URL decode a single string with the given `charset` and decode "+" to
729 whitespace.
731 Per default encoding errors are ignored. If you want a different behavior
732 you can set `errors` to ``'replace'`` or ``'strict'``.
734 :param s: The string to unquote.
735 :param charset: the charset of the query string. If set to `None`
736 no decoding will take place.
737 :param errors: The error handling for the `charset` decoding.
739 .. deprecated:: 2.3
740 Will be removed in Werkzeug 3.0. Use ``urllib.parse.unquote_plus`` instead.
741 """
742 warnings.warn(
743 "'werkzeug.urls.url_unquote_plus' is deprecated and will be removed in Werkzeug"
744 " 2.4. Use 'urllib.parse.unquote_plus' instead.",
745 DeprecationWarning,
746 stacklevel=2,
747 )
749 if isinstance(s, str):
750 s = s.replace("+", " ")
751 else:
752 s = s.replace(b"+", b" ")
754 return url_unquote(s, charset, errors)
757def url_fix(s: str, charset: str = "utf-8") -> str:
758 r"""Sometimes you get an URL by a user that just isn't a real URL because
759 it contains unsafe characters like ' ' and so on. This function can fix
760 some of the problems in a similar way browsers handle data entered by the
761 user:
763 >>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
764 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'
766 :param s: the string with the URL to fix.
767 :param charset: The target charset for the URL if the url was given
768 as a string.
770 .. deprecated:: 2.3
771 Will be removed in Werkzeug 3.0.
772 """
773 warnings.warn(
774 "'werkzeug.urls.url_fix' is deprecated and will be removed in Werkzeug 3.0.",
775 DeprecationWarning,
776 stacklevel=2,
777 )
778 # First step is to switch to text processing and to convert
779 # backslashes (which are invalid in URLs anyways) to slashes. This is
780 # consistent with what Chrome does.
781 s = _to_str(s, charset, "replace").replace("\\", "/")
783 # For the specific case that we look like a malformed windows URL
784 # we want to fix this up manually:
785 if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"):
786 s = f"file:///{s[7:]}"
788 url = url_parse(s)
789 path = url_quote(url.path, charset, safe="/%+$!*'(),")
790 qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),")
791 anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),")
792 return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor))
795def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
796 """Used in :func:`uri_to_iri` after unquoting to re-quote any
797 invalid bytes.
798 """
799 # the docs state that UnicodeError does have these attributes,
800 # but mypy isn't picking them up
801 out = quote(e.object[e.start : e.end], safe="") # type: ignore
802 return out, e.end # type: ignore
805codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
808def _make_unquote_part(name: str, chars: str) -> t.Callable[[str, str, str], str]:
809 """Create a function that unquotes all percent encoded characters except those
810 given. This allows working with unquoted characters if possible while not changing
811 the meaning of a given part of a URL.
812 """
813 choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
814 pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)
816 def _unquote_partial(value: str, encoding: str, errors: str) -> str:
817 parts = iter(pattern.split(value))
818 out = []
820 for part in parts:
821 out.append(unquote(part, encoding, errors))
822 out.append(next(parts, ""))
824 return "".join(out)
826 _unquote_partial.__name__ = f"_unquote_{name}"
827 return _unquote_partial
830# characters that should remain quoted in URL parts
831# based on https://url.spec.whatwg.org/#percent-encoded-bytes
832# always keep all controls, space, and % quoted
833_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
834_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
835_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
836_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
837_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")
840def uri_to_iri(
841 uri: str | tuple[str, str, str, str, str],
842 charset: str | None = None,
843 errors: str | None = None,
844) -> str:
845 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
846 leaving all reserved and invalid characters quoted. If the URL has
847 a domain, it is decoded from Punycode.
849 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
850 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
852 :param uri: The URI to convert.
853 :param charset: The encoding to encode unquoted bytes with.
854 :param errors: Error handler to use during ``bytes.encode``. By
855 default, invalid bytes are left quoted.
857 .. versionchanged:: 2.3
858 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are
859 deprecated and will be removed in Werkzeug 3.0.
861 .. versionchanged:: 2.3
862 Which characters remain quoted is specific to each part of the URL.
864 .. versionchanged:: 0.15
865 All reserved and invalid characters remain quoted. Previously,
866 only some reserved characters were preserved, and invalid bytes
867 were replaced instead of left quoted.
869 .. versionadded:: 0.6
870 """
871 if isinstance(uri, tuple):
872 warnings.warn(
873 "Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.",
874 DeprecationWarning,
875 stacklevel=2,
876 )
877 uri = urlunsplit(uri)
879 if isinstance(uri, bytes):
880 warnings.warn(
881 "Passing bytes is deprecated and will not be supported in Werkzeug 3.0.",
882 DeprecationWarning,
883 stacklevel=2,
884 )
885 uri = uri.decode()
887 if charset is not None:
888 warnings.warn(
889 "The 'charset' parameter is deprecated and will be removed"
890 " in Werkzeug 3.0.",
891 DeprecationWarning,
892 stacklevel=2,
893 )
894 else:
895 charset = "utf-8"
897 if errors is not None:
898 warnings.warn(
899 "The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.",
900 DeprecationWarning,
901 stacklevel=2,
902 )
903 else:
904 errors = "werkzeug.url_quote"
906 parts = urlsplit(uri)
907 path = _unquote_path(parts.path, charset, errors)
908 query = _unquote_query(parts.query, charset, errors)
909 fragment = _unquote_fragment(parts.fragment, charset, errors)
911 if parts.hostname:
912 netloc = _decode_idna(parts.hostname)
913 else:
914 netloc = ""
916 if ":" in netloc:
917 netloc = f"[{netloc}]"
919 if parts.port:
920 netloc = f"{netloc}:{parts.port}"
922 if parts.username:
923 auth = _unquote_user(parts.username, charset, errors)
925 if parts.password:
926 auth = f"{auth}:{_unquote_user(parts.password, charset, errors)}"
928 netloc = f"{auth}@{netloc}"
930 return urlunsplit((parts.scheme, netloc, path, query, fragment))
933def iri_to_uri(
934 iri: str | tuple[str, str, str, str, str],
935 charset: str | None = None,
936 errors: str | None = None,
937 safe_conversion: bool | None = None,
938) -> str:
939 """Convert an IRI to a URI. All non-ASCII and unsafe characters are
940 quoted. If the URL has a domain, it is encoded to Punycode.
942 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
943 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
945 :param iri: The IRI to convert.
946 :param charset: The encoding of the IRI.
947 :param errors: Error handler to use during ``bytes.encode``.
949 .. versionchanged:: 2.3
950 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, are
951 deprecated and will be removed in Werkzeug 3.0.
953 .. versionchanged:: 2.3
954 Which characters remain unquoted is specific to each part of the URL.
956 .. versionchanged:: 2.3
957 The ``safe_conversion`` parameter is deprecated and will be removed in Werkzeug
958 2.4.
960 .. versionchanged:: 0.15
961 All reserved characters remain unquoted. Previously, only some reserved
962 characters were left unquoted.
964 .. versionchanged:: 0.9.6
965 The ``safe_conversion`` parameter was added.
967 .. versionadded:: 0.6
968 """
969 if charset is not None:
970 warnings.warn(
971 "The 'charset' parameter is deprecated and will be removed"
972 " in Werkzeug 3.0.",
973 DeprecationWarning,
974 stacklevel=2,
975 )
976 else:
977 charset = "utf-8"
979 if isinstance(iri, tuple):
980 warnings.warn(
981 "Passing a tuple is deprecated and will not be supported in Werkzeug 3.0.",
982 DeprecationWarning,
983 stacklevel=2,
984 )
985 iri = urlunsplit(iri)
987 if isinstance(iri, bytes):
988 warnings.warn(
989 "Passing bytes is deprecated and will not be supported in Werkzeug 3.0.",
990 DeprecationWarning,
991 stacklevel=2,
992 )
993 iri = iri.decode(charset)
995 if errors is not None:
996 warnings.warn(
997 "The 'errors' parameter is deprecated and will be removed in Werkzeug 3.0.",
998 DeprecationWarning,
999 stacklevel=2,
1000 )
1001 else:
1002 errors = "strict"
1004 if safe_conversion is not None:
1005 warnings.warn(
1006 "The 'safe_conversion' parameter is deprecated and will be removed in"
1007 " Werkzeug 3.0.",
1008 DeprecationWarning,
1009 stacklevel=2,
1010 )
1012 if safe_conversion:
1013 # If we're not sure if it's safe to normalize the URL, and it only contains
1014 # ASCII characters, return it as-is.
1015 try:
1016 ascii_iri = iri.encode("ascii")
1018 # Only return if it doesn't have whitespace. (Why?)
1019 if len(ascii_iri.split()) == 1:
1020 return iri
1021 except UnicodeError:
1022 pass
1024 parts = urlsplit(iri)
1025 # safe = https://url.spec.whatwg.org/#url-path-segment-string
1026 # as well as percent for things that are already quoted
1027 path = quote(parts.path, safe="%!$&'()*+,/:;=@", encoding=charset, errors=errors)
1028 query = quote(parts.query, safe="%!$&'()*+,/:;=?@", encoding=charset, errors=errors)
1029 fragment = quote(
1030 parts.fragment, safe="%!#$&'()*+,/:;=?@", encoding=charset, errors=errors
1031 )
1033 if parts.hostname:
1034 netloc = parts.hostname.encode("idna").decode("ascii")
1035 else:
1036 netloc = ""
1038 if ":" in netloc:
1039 netloc = f"[{netloc}]"
1041 if parts.port:
1042 netloc = f"{netloc}:{parts.port}"
1044 if parts.username:
1045 auth = quote(parts.username, safe="%!$&'()*+,;=")
1047 if parts.password:
1048 pass_quoted = quote(parts.password, safe="%!$&'()*+,;=")
1049 auth = f"{auth}:{pass_quoted}"
1051 netloc = f"{auth}@{netloc}"
1053 return urlunsplit((parts.scheme, netloc, path, query, fragment))
1056def _invalid_iri_to_uri(iri: str) -> str:
1057 """The URL scheme ``itms-services://`` must contain the ``//`` even though it does
1058 not have a host component. There may be other invalid schemes as well. Currently,
1059 responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
1060 removes the ``//``. For now, if the IRI only contains ASCII and does not contain
1061 spaces, pass it on as-is. In Werkzeug 3.0, this should become a
1062 ``response.process_location`` flag.
1064 :meta private:
1065 """
1066 try:
1067 iri.encode("ascii")
1068 except UnicodeError:
1069 pass
1070 else:
1071 if len(iri.split(None, 1)) == 1:
1072 return iri
1074 return iri_to_uri(iri)
1077def url_decode(
1078 s: t.AnyStr,
1079 charset: str = "utf-8",
1080 include_empty: bool = True,
1081 errors: str = "replace",
1082 separator: str = "&",
1083 cls: type[ds.MultiDict] | None = None,
1084) -> ds.MultiDict[str, str]:
1085 """Parse a query string and return it as a :class:`MultiDict`.
1087 :param s: The query string to parse.
1088 :param charset: Decode bytes to string with this charset. If not
1089 given, bytes are returned as-is.
1090 :param include_empty: Include keys with empty values in the dict.
1091 :param errors: Error handling behavior when decoding bytes.
1092 :param separator: Separator character between pairs.
1093 :param cls: Container to hold result instead of :class:`MultiDict`.
1095 .. deprecated:: 2.3
1096 Will be removed in Werkzeug 3.0. Use ``urllib.parse.parse_qs`` instead.
1098 .. versionchanged:: 2.1
1099 The ``decode_keys`` parameter was removed.
1101 .. versionchanged:: 0.5
1102 In previous versions ";" and "&" could be used for url decoding.
1103 Now only "&" is supported. If you want to use ";", a different
1104 ``separator`` can be provided.
1106 .. versionchanged:: 0.5
1107 The ``cls`` parameter was added.
1108 """
1109 warnings.warn(
1110 "'werkzeug.urls.url_decode' is deprecated and will be removed in Werkzeug 2.4."
1111 " Use 'urllib.parse.parse_qs' instead.",
1112 DeprecationWarning,
1113 stacklevel=2,
1114 )
1116 if cls is None:
1117 from .datastructures import MultiDict # noqa: F811
1119 cls = MultiDict
1120 if isinstance(s, str) and not isinstance(separator, str):
1121 separator = separator.decode(charset or "ascii")
1122 elif isinstance(s, bytes) and not isinstance(separator, bytes):
1123 separator = separator.encode(charset or "ascii") # type: ignore
1124 return cls(
1125 _url_decode_impl(
1126 s.split(separator), charset, include_empty, errors # type: ignore
1127 )
1128 )
1131def url_decode_stream(
1132 stream: t.IO[bytes],
1133 charset: str = "utf-8",
1134 include_empty: bool = True,
1135 errors: str = "replace",
1136 separator: bytes = b"&",
1137 cls: type[ds.MultiDict] | None = None,
1138 limit: int | None = None,
1139) -> ds.MultiDict[str, str]:
1140 """Works like :func:`url_decode` but decodes a stream. The behavior
1141 of stream and limit follows functions like
1142 :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is
1143 directly fed to the `cls` so you can consume the data while it's
1144 parsed.
1146 :param stream: a stream with the encoded querystring
1147 :param charset: the charset of the query string. If set to `None`
1148 no decoding will take place.
1149 :param include_empty: Set to `False` if you don't want empty values to
1150 appear in the dict.
1151 :param errors: the decoding error behavior.
1152 :param separator: the pair separator to be used, defaults to ``&``
1153 :param cls: an optional dict class to use. If this is not specified
1154 or `None` the default :class:`MultiDict` is used.
1155 :param limit: the content length of the URL data. Not necessary if
1156 a limited stream is provided.
1158 .. deprecated:: 2.3
1159 Will be removed in Werkzeug 2.4. Use ``urllib.parse.parse_qs`` instead.
1161 .. versionchanged:: 2.1
1162 The ``decode_keys`` and ``return_iterator`` parameters were removed.
1164 .. versionadded:: 0.8
1165 """
1166 warnings.warn(
1167 "'werkzeug.urls.url_decode_stream' is deprecated and will be removed in"
1168 " Werkzeug 2.4. Use 'urllib.parse.parse_qs' instead.",
1169 DeprecationWarning,
1170 stacklevel=2,
1171 )
1173 from .wsgi import make_chunk_iter
1175 pair_iter = make_chunk_iter(stream, separator, limit)
1176 decoder = _url_decode_impl(pair_iter, charset, include_empty, errors)
1178 if cls is None:
1179 from .datastructures import MultiDict # noqa: F811
1181 cls = MultiDict
1183 return cls(decoder)
1186def _url_decode_impl(
1187 pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str
1188) -> t.Iterator[tuple[str, str]]:
1189 for pair in pair_iter:
1190 if not pair:
1191 continue
1192 s = _make_encode_wrapper(pair)
1193 equal = s("=")
1194 if equal in pair:
1195 key, value = pair.split(equal, 1)
1196 else:
1197 if not include_empty:
1198 continue
1199 key = pair
1200 value = s("")
1201 yield (
1202 url_unquote_plus(key, charset, errors),
1203 url_unquote_plus(value, charset, errors),
1204 )
1207def url_encode(
1208 obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
1209 charset: str = "utf-8",
1210 sort: bool = False,
1211 key: t.Callable[[tuple[str, str]], t.Any] | None = None,
1212 separator: str = "&",
1213) -> str:
1214 """URL encode a dict/`MultiDict`. If a value is `None` it will not appear
1215 in the result string. Per default only values are encoded into the target
1216 charset strings.
1218 :param obj: the object to encode into a query string.
1219 :param charset: the charset of the query string.
1220 :param sort: set to `True` if you want parameters to be sorted by `key`.
1221 :param separator: the separator to be used for the pairs.
1222 :param key: an optional function to be used for sorting. For more details
1223 check out the :func:`sorted` documentation.
1225 .. deprecated:: 2.3
1226 Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead.
1228 .. versionchanged:: 2.1
1229 The ``encode_keys`` parameter was removed.
1231 .. versionchanged:: 0.5
1232 Added the ``sort``, ``key``, and ``separator`` parameters.
1233 """
1234 warnings.warn(
1235 "'werkzeug.urls.url_encode' is deprecated and will be removed in Werkzeug 2.4."
1236 " Use 'urllib.parse.urlencode' instead.",
1237 DeprecationWarning,
1238 stacklevel=2,
1239 )
1240 separator = _to_str(separator, "ascii")
1241 return separator.join(_url_encode_impl(obj, charset, sort, key))
1244def url_encode_stream(
1245 obj: t.Mapping[str, str] | t.Iterable[tuple[str, str]],
1246 stream: t.IO[str] | None = None,
1247 charset: str = "utf-8",
1248 sort: bool = False,
1249 key: t.Callable[[tuple[str, str]], t.Any] | None = None,
1250 separator: str = "&",
1251) -> None:
1252 """Like :meth:`url_encode` but writes the results to a stream
1253 object. If the stream is `None` a generator over all encoded
1254 pairs is returned.
1256 :param obj: the object to encode into a query string.
1257 :param stream: a stream to write the encoded object into or `None` if
1258 an iterator over the encoded pairs should be returned. In
1259 that case the separator argument is ignored.
1260 :param charset: the charset of the query string.
1261 :param sort: set to `True` if you want parameters to be sorted by `key`.
1262 :param separator: the separator to be used for the pairs.
1263 :param key: an optional function to be used for sorting. For more details
1264 check out the :func:`sorted` documentation.
1266 .. deprecated:: 2.3
1267 Will be removed in Werkzeug 2.4. Use ``urllib.parse.urlencode`` instead.
1269 .. versionchanged:: 2.1
1270 The ``encode_keys`` parameter was removed.
1272 .. versionadded:: 0.8
1273 """
1274 warnings.warn(
1275 "'werkzeug.urls.url_encode_stream' is deprecated and will be removed in"
1276 " Werkzeug 2.4. Use 'urllib.parse.urlencode' instead.",
1277 DeprecationWarning,
1278 stacklevel=2,
1279 )
1280 separator = _to_str(separator, "ascii")
1281 gen = _url_encode_impl(obj, charset, sort, key)
1282 if stream is None:
1283 return gen # type: ignore
1284 for idx, chunk in enumerate(gen):
1285 if idx:
1286 stream.write(separator)
1287 stream.write(chunk)
1288 return None
1291def url_join(
1292 base: str | tuple[str, str, str, str, str],
1293 url: str | tuple[str, str, str, str, str],
1294 allow_fragments: bool = True,
1295) -> str:
1296 """Join a base URL and a possibly relative URL to form an absolute
1297 interpretation of the latter.
1299 :param base: the base URL for the join operation.
1300 :param url: the URL to join.
1301 :param allow_fragments: indicates whether fragments should be allowed.
1303 .. deprecated:: 2.3
1304 Will be removed in Werkzeug 2.4. Use ``urllib.parse.urljoin`` instead.
1305 """
1306 warnings.warn(
1307 "'werkzeug.urls.url_join' is deprecated and will be removed in Werkzeug 2.4."
1308 " Use 'urllib.parse.urljoin' instead.",
1309 DeprecationWarning,
1310 stacklevel=2,
1311 )
1313 if isinstance(base, tuple):
1314 base = url_unparse(base)
1315 if isinstance(url, tuple):
1316 url = url_unparse(url)
1318 _check_str_tuple((base, url))
1319 s = _make_encode_wrapper(base)
1321 if not base:
1322 return url
1323 if not url:
1324 return base
1326 bscheme, bnetloc, bpath, bquery, bfragment = url_parse(
1327 base, allow_fragments=allow_fragments
1328 )
1329 scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments)
1330 if scheme != bscheme:
1331 return url
1332 if netloc:
1333 return url_unparse((scheme, netloc, path, query, fragment))
1334 netloc = bnetloc
1336 if path[:1] == s("/"):
1337 segments = path.split(s("/"))
1338 elif not path:
1339 segments = bpath.split(s("/"))
1340 if not query:
1341 query = bquery
1342 else:
1343 segments = bpath.split(s("/"))[:-1] + path.split(s("/"))
1345 # If the rightmost part is "./" we want to keep the slash but
1346 # remove the dot.
1347 if segments[-1] == s("."):
1348 segments[-1] = s("")
1350 # Resolve ".." and "."
1351 segments = [segment for segment in segments if segment != s(".")]
1352 while True:
1353 i = 1
1354 n = len(segments) - 1
1355 while i < n:
1356 if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")):
1357 del segments[i - 1 : i + 1]
1358 break
1359 i += 1
1360 else:
1361 break
1363 # Remove trailing ".." if the URL is absolute
1364 unwanted_marker = [s(""), s("..")]
1365 while segments[:2] == unwanted_marker:
1366 del segments[1]
1368 path = s("/").join(segments)
1369 return url_unparse((scheme, netloc, path, query, fragment))
1372def _urlencode(
1373 query: t.Mapping[str, str] | t.Iterable[tuple[str, str]], encoding: str = "utf-8"
1374) -> str:
1375 items = [x for x in iter_multi_items(query) if x[1] is not None]
1376 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
1377 return urlencode(items, safe="!$'()*,/:;?@", encoding=encoding)