Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 33%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import codecs
4import re
5import typing as t
6import urllib.parse
7from urllib.parse import quote
8from urllib.parse import unquote
9from urllib.parse import urlencode
10from urllib.parse import urlsplit
11from urllib.parse import urlunsplit
13from .datastructures import iter_multi_items
16def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
17 """Used in :func:`uri_to_iri` after unquoting to re-quote any
18 invalid bytes.
19 """
20 # the docs state that UnicodeError does have these attributes,
21 # but mypy isn't picking them up
22 out = quote(e.object[e.start : e.end], safe="") # type: ignore
23 return out, e.end # type: ignore
26codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
29def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]:
30 """Create a function that unquotes all percent encoded characters except those
31 given. This allows working with unquoted characters if possible while not changing
32 the meaning of a given part of a URL.
33 """
34 choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
35 pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)
37 def _unquote_partial(value: str) -> str:
38 parts = iter(pattern.split(value))
39 out = []
41 for part in parts:
42 out.append(unquote(part, "utf-8", "werkzeug.url_quote"))
43 out.append(next(parts, ""))
45 return "".join(out)
47 _unquote_partial.__name__ = f"_unquote_{name}"
48 return _unquote_partial
51# characters that should remain quoted in URL parts
52# based on https://url.spec.whatwg.org/#percent-encoded-bytes
53# always keep all controls, space, and % quoted
54_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
55_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
56_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
57_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
58_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")
61def uri_to_iri(uri: str) -> str:
62 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
63 leaving all reserved and invalid characters quoted. If the URL has
64 a domain, it is decoded from Punycode.
66 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
67 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
69 :param uri: The URI to convert.
71 .. versionchanged:: 3.0
72 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
73 are removed.
75 .. versionchanged:: 2.3
76 Which characters remain quoted is specific to each part of the URL.
78 .. versionchanged:: 0.15
79 All reserved and invalid characters remain quoted. Previously,
80 only some reserved characters were preserved, and invalid bytes
81 were replaced instead of left quoted.
83 .. versionadded:: 0.6
84 """
85 parts = urlsplit(uri)
86 path = _unquote_path(parts.path)
87 query = _unquote_query(parts.query)
88 fragment = _unquote_fragment(parts.fragment)
90 if parts.hostname:
91 netloc = _decode_idna(parts.hostname)
92 else:
93 netloc = ""
95 if ":" in netloc:
96 netloc = f"[{netloc}]"
98 if parts.port:
99 netloc = f"{netloc}:{parts.port}"
101 if parts.username:
102 auth = _unquote_user(parts.username)
104 if parts.password:
105 password = _unquote_user(parts.password)
106 auth = f"{auth}:{password}"
108 netloc = f"{auth}@{netloc}"
110 return urlunsplit((parts.scheme, netloc, path, query, fragment))
113def iri_to_uri(iri: str) -> str:
114 """Convert an IRI to a URI. All non-ASCII and unsafe characters are
115 quoted. If the URL has a domain, it is encoded to Punycode.
117 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
118 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
120 :param iri: The IRI to convert.
122 .. versionchanged:: 3.0
123 Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
124 and the ``safe_conversion`` parameter, are removed.
126 .. versionchanged:: 2.3
127 Which characters remain unquoted is specific to each part of the URL.
129 .. versionchanged:: 0.15
130 All reserved characters remain unquoted. Previously, only some reserved
131 characters were left unquoted.
133 .. versionchanged:: 0.9.6
134 The ``safe_conversion`` parameter was added.
136 .. versionadded:: 0.6
137 """
138 parts = urlsplit(iri)
139 # safe = https://url.spec.whatwg.org/#url-path-segment-string
140 # as well as percent for things that are already quoted
141 path = quote(parts.path, safe="%!$&'()*+,/:;=@")
142 query = quote(parts.query, safe="%!$&'()*+,/:;=?@")
143 fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@")
145 if parts.hostname:
146 netloc = parts.hostname.encode("idna").decode("ascii")
147 else:
148 netloc = ""
150 if ":" in netloc:
151 netloc = f"[{netloc}]"
153 if parts.port:
154 netloc = f"{netloc}:{parts.port}"
156 if parts.username:
157 auth = quote(parts.username, safe="%!$&'()*+,;=")
159 if parts.password:
160 password = quote(parts.password, safe="%!$&'()*+,;=")
161 auth = f"{auth}:{password}"
163 netloc = f"{auth}@{netloc}"
165 return urlunsplit((parts.scheme, netloc, path, query, fragment))
168# Python < 3.12
169# itms-services was worked around in previous iri_to_uri implementations, but
170# we can tell Python directly that it needs to preserve the //.
171if "itms-services" not in urllib.parse.uses_netloc:
172 urllib.parse.uses_netloc.append("itms-services")
175def _decode_idna(domain: str) -> str:
176 try:
177 data = domain.encode("ascii")
178 except UnicodeEncodeError:
179 # If the domain is not ASCII, it's decoded already.
180 return domain
182 try:
183 # Try decoding in one shot.
184 return data.decode("idna")
185 except UnicodeDecodeError:
186 pass
188 # Decode each part separately, leaving invalid parts as punycode.
189 parts = []
191 for part in data.split(b"."):
192 try:
193 parts.append(part.decode("idna"))
194 except UnicodeDecodeError:
195 parts.append(part.decode("ascii"))
197 return ".".join(parts)
200def _urlencode(query: t.Mapping[str, str] | t.Iterable[tuple[str, str]]) -> str:
201 items = [x for x in iter_multi_items(query) if x[1] is not None]
202 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
203 return urlencode(items, safe="!$'()*,/:;?@")