Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/werkzeug/urls.py: 64%

96 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-09 07:17 +0000

1from __future__ import annotations 

2 

3import codecs 

4import re 

5import typing as t 

6from urllib.parse import quote 

7from urllib.parse import unquote 

8from urllib.parse import urlencode 

9from urllib.parse import urlsplit 

10from urllib.parse import urlunsplit 

11 

12from .datastructures import iter_multi_items 

13 

14 

15def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]: 

16 """Used in :func:`uri_to_iri` after unquoting to re-quote any 

17 invalid bytes. 

18 """ 

19 # the docs state that UnicodeError does have these attributes, 

20 # but mypy isn't picking them up 

21 out = quote(e.object[e.start : e.end], safe="") # type: ignore 

22 return out, e.end # type: ignore 

23 

24 

25codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) 

26 

27 

28def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]: 

29 """Create a function that unquotes all percent encoded characters except those 

30 given. This allows working with unquoted characters if possible while not changing 

31 the meaning of a given part of a URL. 

32 """ 

33 choices = "|".join(f"{ord(c):02X}" for c in sorted(chars)) 

34 pattern = re.compile(f"((?:%(?:{choices}))+)", re.I) 

35 

36 def _unquote_partial(value: str) -> str: 

37 parts = iter(pattern.split(value)) 

38 out = [] 

39 

40 for part in parts: 

41 out.append(unquote(part, "utf-8", "werkzeug.url_quote")) 

42 out.append(next(parts, "")) 

43 

44 return "".join(out) 

45 

46 _unquote_partial.__name__ = f"_unquote_{name}" 

47 return _unquote_partial 

48 

49 

50# characters that should remain quoted in URL parts 

51# based on https://url.spec.whatwg.org/#percent-encoded-bytes 

52# always keep all controls, space, and % quoted 

53_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode() 

54_unquote_fragment = _make_unquote_part("fragment", _always_unsafe) 

55_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#") 

56_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#") 

57_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#") 

58 

59 

60def uri_to_iri(uri: str) -> str: 

61 """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, 

62 leaving all reserved and invalid characters quoted. If the URL has 

63 a domain, it is decoded from Punycode. 

64 

65 >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") 

66 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' 

67 

68 :param uri: The URI to convert. 

69 

70 .. versionchanged:: 3.0 

71 Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters, 

72 are removed. 

73 

74 .. versionchanged:: 2.3 

75 Which characters remain quoted is specific to each part of the URL. 

76 

77 .. versionchanged:: 0.15 

78 All reserved and invalid characters remain quoted. Previously, 

79 only some reserved characters were preserved, and invalid bytes 

80 were replaced instead of left quoted. 

81 

82 .. versionadded:: 0.6 

83 """ 

84 parts = urlsplit(uri) 

85 path = _unquote_path(parts.path) 

86 query = _unquote_query(parts.query) 

87 fragment = _unquote_fragment(parts.fragment) 

88 

89 if parts.hostname: 

90 netloc = _decode_idna(parts.hostname) 

91 else: 

92 netloc = "" 

93 

94 if ":" in netloc: 

95 netloc = f"[{netloc}]" 

96 

97 if parts.port: 

98 netloc = f"{netloc}:{parts.port}" 

99 

100 if parts.username: 

101 auth = _unquote_user(parts.username) 

102 

103 if parts.password: 

104 password = _unquote_user(parts.password) 

105 auth = f"{auth}:{password}" 

106 

107 netloc = f"{auth}@{netloc}" 

108 

109 return urlunsplit((parts.scheme, netloc, path, query, fragment)) 

110 

111 

112def iri_to_uri(iri: str) -> str: 

113 """Convert an IRI to a URI. All non-ASCII and unsafe characters are 

114 quoted. If the URL has a domain, it is encoded to Punycode. 

115 

116 >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') 

117 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' 

118 

119 :param iri: The IRI to convert. 

120 

121 .. versionchanged:: 3.0 

122 Passing a tuple or bytes, the ``charset`` and ``errors`` parameters, 

123 and the ``safe_conversion`` parameter, are removed. 

124 

125 .. versionchanged:: 2.3 

126 Which characters remain unquoted is specific to each part of the URL. 

127 

128 .. versionchanged:: 0.15 

129 All reserved characters remain unquoted. Previously, only some reserved 

130 characters were left unquoted. 

131 

132 .. versionchanged:: 0.9.6 

133 The ``safe_conversion`` parameter was added. 

134 

135 .. versionadded:: 0.6 

136 """ 

137 parts = urlsplit(iri) 

138 # safe = https://url.spec.whatwg.org/#url-path-segment-string 

139 # as well as percent for things that are already quoted 

140 path = quote(parts.path, safe="%!$&'()*+,/:;=@") 

141 query = quote(parts.query, safe="%!$&'()*+,/:;=?@") 

142 fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@") 

143 

144 if parts.hostname: 

145 netloc = parts.hostname.encode("idna").decode("ascii") 

146 else: 

147 netloc = "" 

148 

149 if ":" in netloc: 

150 netloc = f"[{netloc}]" 

151 

152 if parts.port: 

153 netloc = f"{netloc}:{parts.port}" 

154 

155 if parts.username: 

156 auth = quote(parts.username, safe="%!$&'()*+,;=") 

157 

158 if parts.password: 

159 password = quote(parts.password, safe="%!$&'()*+,;=") 

160 auth = f"{auth}:{password}" 

161 

162 netloc = f"{auth}@{netloc}" 

163 

164 return urlunsplit((parts.scheme, netloc, path, query, fragment)) 

165 

166 

167def _invalid_iri_to_uri(iri: str) -> str: 

168 """The URL scheme ``itms-services://`` must contain the ``//`` even though it does 

169 not have a host component. There may be other invalid schemes as well. Currently, 

170 responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which 

171 removes the ``//``. For now, if the IRI only contains ASCII and does not contain 

172 spaces, pass it on as-is. In Werkzeug 3.0, this should become a 

173 ``response.process_location`` flag. 

174 

175 :meta private: 

176 """ 

177 try: 

178 iri.encode("ascii") 

179 except UnicodeError: 

180 pass 

181 else: 

182 if len(iri.split(None, 1)) == 1: 

183 return iri 

184 

185 return iri_to_uri(iri) 

186 

187 

188def _decode_idna(domain: str) -> str: 

189 try: 

190 data = domain.encode("ascii") 

191 except UnicodeEncodeError: 

192 # If the domain is not ASCII, it's decoded already. 

193 return domain 

194 

195 try: 

196 # Try decoding in one shot. 

197 return data.decode("idna") 

198 except UnicodeDecodeError: 

199 pass 

200 

201 # Decode each part separately, leaving invalid parts as punycode. 

202 parts = [] 

203 

204 for part in data.split(b"."): 

205 try: 

206 parts.append(part.decode("idna")) 

207 except UnicodeDecodeError: 

208 parts.append(part.decode("ascii")) 

209 

210 return ".".join(parts) 

211 

212 

213def _urlencode(query: t.Mapping[str, str] | t.Iterable[tuple[str, str]]) -> str: 

214 items = [x for x in iter_multi_items(query) if x[1] is not None] 

215 # safe = https://url.spec.whatwg.org/#percent-encoded-bytes 

216 return urlencode(items, safe="!$'()*,/:;?@")