Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 33%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

91 statements  

1from __future__ import annotations 

2 

3import codecs 

4import re 

5import typing as t 

6import urllib.parse 

7from urllib.parse import quote 

8from urllib.parse import unquote 

9from urllib.parse import urlencode 

10from urllib.parse import urlsplit 

11from urllib.parse import urlunsplit 

12 

13from .datastructures import iter_multi_items 

14 

15 

16def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]: 

17 """Used in :func:`uri_to_iri` after unquoting to re-quote any 

18 invalid bytes. 

19 """ 

20 # the docs state that UnicodeError does have these attributes, 

21 # but mypy isn't picking them up 

22 out = quote(e.object[e.start : e.end], safe="") # type: ignore 

23 return out, e.end # type: ignore 

24 

25 

26codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) 

27 

28 

29def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]: 

30 """Create a function that unquotes all percent encoded characters except those 

31 given. This allows working with unquoted characters if possible while not changing 

32 the meaning of a given part of a URL. 

33 """ 

34 choices = "|".join(f"{ord(c):02X}" for c in sorted(chars)) 

35 pattern = re.compile(f"((?:%(?:{choices}))+)", re.I) 

36 

37 def _unquote_partial(value: str) -> str: 

38 parts = iter(pattern.split(value)) 

39 out = [] 

40 

41 for part in parts: 

42 out.append(unquote(part, "utf-8", "werkzeug.url_quote")) 

43 out.append(next(parts, "")) 

44 

45 return "".join(out) 

46 

47 _unquote_partial.__name__ = f"_unquote_{name}" 

48 return _unquote_partial 

49 

50 

51# characters that should remain quoted in URL parts 

52# based on https://url.spec.whatwg.org/#percent-encoded-bytes 

53# always keep all controls, space, and % quoted 

54_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode() 

55_unquote_fragment = _make_unquote_part("fragment", _always_unsafe) 

56_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#") 

57_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#") 

58_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#") 

59 

60 

61def uri_to_iri(uri: str) -> str: 

62 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, 

63 leaving all reserved and invalid characters quoted. If the URL has 

64 a domain, it is decoded from Punycode. 

65 

66 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") 

67 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' 

68 

69 :param uri: The URI to convert. 

70 

71 .. versionchanged:: 3.0 

72 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, 

73 are removed. 

74 

75 .. versionchanged:: 2.3 

76 Which characters remain quoted is specific to each part of the URL. 

77 

78 .. versionchanged:: 0.15 

79 All reserved and invalid characters remain quoted. Previously, 

80 only some reserved characters were preserved, and invalid bytes 

81 were replaced instead of left quoted. 

82 

83 .. versionadded:: 0.6 

84 """ 

85 parts = urlsplit(uri) 

86 path = _unquote_path(parts.path) 

87 query = _unquote_query(parts.query) 

88 fragment = _unquote_fragment(parts.fragment) 

89 

90 if parts.hostname: 

91 netloc = _decode_idna(parts.hostname) 

92 else: 

93 netloc = "" 

94 

95 if ":" in netloc: 

96 netloc = f"[{netloc}]" 

97 

98 if parts.port: 

99 netloc = f"{netloc}:{parts.port}" 

100 

101 if parts.username: 

102 auth = _unquote_user(parts.username) 

103 

104 if parts.password: 

105 password = _unquote_user(parts.password) 

106 auth = f"{auth}:{password}" 

107 

108 netloc = f"{auth}@{netloc}" 

109 

110 return urlunsplit((parts.scheme, netloc, path, query, fragment)) 

111 

112 

113def iri_to_uri(iri: str) -> str: 

114 """Convert an IRI to a URI. All non-ASCII and unsafe characters are 

115 quoted. If the URL has a domain, it is encoded to Punycode. 

116 

117 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') 

118 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' 

119 

120 :param iri: The IRI to convert. 

121 

122 .. versionchanged:: 3.0 

123 Passing a tuple or bytes, the ``charset`` and ``errors`` parameters, 

124 and the ``safe_conversion`` parameter, are removed. 

125 

126 .. versionchanged:: 2.3 

127 Which characters remain unquoted is specific to each part of the URL. 

128 

129 .. versionchanged:: 0.15 

130 All reserved characters remain unquoted. Previously, only some reserved 

131 characters were left unquoted. 

132 

133 .. versionchanged:: 0.9.6 

134 The ``safe_conversion`` parameter was added. 

135 

136 .. versionadded:: 0.6 

137 """ 

138 parts = urlsplit(iri) 

139 # safe = https://url.spec.whatwg.org/#url-path-segment-string 

140 # as well as percent for things that are already quoted 

141 path = quote(parts.path, safe="%!$&'()*+,/:;=@") 

142 query = quote(parts.query, safe="%!$&'()*+,/:;=?@") 

143 fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@") 

144 

145 if parts.hostname: 

146 netloc = parts.hostname.encode("idna").decode("ascii") 

147 else: 

148 netloc = "" 

149 

150 if ":" in netloc: 

151 netloc = f"[{netloc}]" 

152 

153 if parts.port: 

154 netloc = f"{netloc}:{parts.port}" 

155 

156 if parts.username: 

157 auth = quote(parts.username, safe="%!$&'()*+,;=") 

158 

159 if parts.password: 

160 password = quote(parts.password, safe="%!$&'()*+,;=") 

161 auth = f"{auth}:{password}" 

162 

163 netloc = f"{auth}@{netloc}" 

164 

165 return urlunsplit((parts.scheme, netloc, path, query, fragment)) 

166 

167 

168# Python < 3.12 

169# itms-services was worked around in previous iri_to_uri implementations, but 

170# we can tell Python directly that it needs to preserve the //. 

171if "itms-services" not in urllib.parse.uses_netloc: 

172 urllib.parse.uses_netloc.append("itms-services") 

173 

174 

175def _decode_idna(domain: str) -> str: 

176 try: 

177 data = domain.encode("ascii") 

178 except UnicodeEncodeError: 

179 # If the domain is not ASCII, it's decoded already. 

180 return domain 

181 

182 try: 

183 # Try decoding in one shot. 

184 return data.decode("idna") 

185 except UnicodeDecodeError: 

186 pass 

187 

188 # Decode each part separately, leaving invalid parts as punycode. 

189 parts = [] 

190 

191 for part in data.split(b"."): 

192 try: 

193 parts.append(part.decode("idna")) 

194 except UnicodeDecodeError: 

195 parts.append(part.decode("ascii")) 

196 

197 return ".".join(parts) 

198 

199 

200def _urlencode(query: t.Mapping[str, str] | t.Iterable[tuple[str, str]]) -> str: 

201 items = [x for x in iter_multi_items(query) if x[1] is not None] 

202 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes 

203 return urlencode(items, safe="!$'()*,/:;?@")