Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/werkzeug/sansio/utils.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

70 statements  

1from __future__ import annotations 

2 

3import re 

4import typing as t 

5from urllib.parse import quote 

6 

7from .._internal import _plain_int 

8from ..datastructures import HeaderSet 

9from ..exceptions import SecurityError 

10from ..urls import uri_to_iri 

11 

12_host_re = re.compile( 

13 r""" 

14 (?P<hostname> 

15 [a-z0-9.-]+ # domain or ipv4 

16 | 

17 \[[a-f0-9]*:[a-f0-9.:]+] # ipv6 

18 ) 

19 (?P<port>:[0-9]+)? # optional port 

20 """, 

21 flags=re.ASCII | re.IGNORECASE | re.VERBOSE, 

22) 

23 

24 

25def host_is_trusted( 

26 hostname: str | None, trusted_list: t.Collection[str] | None = None 

27) -> bool: 

28 """Perform some checks on a ``Host`` header ``host:port``. The host must be 

29 made up of valid characters, but this does not check validity beyond that. 

30 If a list of trusted domains is given, the domain must match one. 

31 

32 :param hostname: The ``Host`` header ``host:port`` to check. 

33 :param trusted_list: A list of trusted domains to match. These should 

34 already be IDNA encoded, but will be encoded if needed. If a name starts 

35 with a dot it will match as a suffix, accepting all subdomains. If empty 

36 or ``None``, all domains are allowed. 

37 

38 .. versionchanged:: 3.2 

39 The value's characters are validated. 

40 

41 .. versionchanged:: 3.2 

42 ``trusted_list`` defaults to ``None``. 

43 

44 .. versionadded:: 0.9 

45 """ 

46 if not hostname: 

47 return False 

48 

49 if (match := _host_re.fullmatch(hostname)) is None: 

50 return False 

51 

52 hostname = match.group("hostname") 

53 

54 if not trusted_list: 

55 return True 

56 

57 if isinstance(trusted_list, str): 

58 trusted_list = [trusted_list] 

59 

60 for ref in trusted_list: 

61 if ref.startswith("."): 

62 ref = ref[1:] 

63 suffix_match = True 

64 else: 

65 suffix_match = False 

66 

67 try: 

68 ref = ref.encode("idna").decode("ascii") 

69 except UnicodeEncodeError: 

70 return False 

71 

72 if ref == hostname or (suffix_match and hostname.endswith(f".{ref}")): 

73 return True 

74 

75 return False 

76 

77 

78def get_host( 

79 scheme: str, 

80 host_header: str | None, 

81 server: tuple[str, int | None] | None = None, 

82 trusted_hosts: t.Collection[str] | None = None, 

83) -> str: 

84 """Get and validate a request's ``host:port`` based on the given values. 

85 

86 The ``Host`` header sent by the client is preferred. Otherwise, the server's 

87 configured address is used. The port is omitted if it matches the standard 

88 HTTP or HTTPS ports. 

89 

90 The value is passed through :func:`host_is_trusted`. The host must be made 

91 up of valid characters, but this does not check validity beyond that. If a 

92 list of trusted domains is given, the domain must match one. 

93 

94 If the host header is not available, such as for HTTP/0.9 and 1.0, or it has 

95 invalid characters, the empty string is returned. Subdomain and host 

96 routing, and external URL building, will not work in these cases. 

97 

98 :param scheme: The protocol of the request. Used to omit the standard ports 

99 80 and 443. 

100 :param host_header: The ``Host`` header value. 

101 :param server: The server's configured address ``(host, port)``. The server 

102 may be using a Unix socket and give ``(path, None)``; this is ignored as 

103 it would not produce a useful host value. 

104 :param trusted_hosts: A list of trusted domains to match. These should 

105 already be IDNA encoded, but will be encoded if needed. The port is 

106 ignored for this check. If a name starts with a dot it will match as a 

107 suffix, accepting all subdomains. If empty or ``None``, all domains are 

108 allowed. 

109 

110 :return: Host, with port if necessary. 

111 :raise .SecurityError: If the host is not trusted. 

112 

113 .. versionchanged:: 3.1.8 

114 The empty string is again returned if no host header value is available, 

115 or if the characters are invalid. 

116 

117 .. versionchanged:: 3.1.7 

118 The characters of the host value are validated. The empty string is no 

119 longer allowed if no header value is available. 

120 

121 .. versionchanged:: 3.2 

122 When using the server address, Unix sockets are ignored. 

123 

124 .. versionchanged:: 3.1.3 

125 If ``SERVER_NAME`` is IPv6, it is wrapped in ``[]``. 

126 """ 

127 if host_header is not None: 

128 host = host_header 

129 # The port server[1] will be None for a Unix socket. Ignore in that case. 

130 elif server is not None and server[1] is not None: 

131 host = server[0] 

132 

133 # If SERVER_NAME is IPv6, wrap it in [] to match Host header. 

134 # Check for : because domain or IPv4 can't have that. 

135 if ":" in host and host[0] != "[": 

136 host = f"[{host}]" 

137 

138 host = f"{host}:{server[1]}" 

139 else: 

140 # Pass through empty host from HTTP/0.9 and 1.0. 

141 return "" 

142 

143 if scheme in {"http", "ws"}: 

144 host = host.removesuffix(":80") 

145 elif scheme in {"https", "wss"}: 

146 host = host.removesuffix(":443") 

147 

148 if not host_is_trusted(host, trusted_hosts): 

149 if trusted_hosts: 

150 raise SecurityError(f"Host {host!r} is not trusted.") 

151 

152 # Invalid characters, treat as empty. 

153 return "" 

154 

155 return host 

156 

157 

158def get_current_url( 

159 scheme: str, 

160 host: str, 

161 root_path: str | None = None, 

162 path: str | None = None, 

163 query_string: bytes | None = None, 

164) -> str: 

165 """Recreate the URL for a request. If an optional part isn't 

166 provided, it and subsequent parts are not included in the URL. 

167 

168 The URL is an IRI, not a URI, so it may contain Unicode characters. 

169 Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII. 

170 

171 :param scheme: The protocol the request used, like ``"https"``. 

172 :param host: The host the request was made to. See :func:`get_host`. 

173 :param root_path: Prefix that the application is mounted under. This 

174 is prepended to ``path``. 

175 :param path: The path part of the URL after ``root_path``. 

176 :param query_string: The portion of the URL after the "?". 

177 """ 

178 url = [scheme, "://", host] 

179 

180 if root_path is None: 

181 url.append("/") 

182 return uri_to_iri("".join(url)) 

183 

184 # safe = https://url.spec.whatwg.org/#url-path-segment-string 

185 # as well as percent for things that are already quoted 

186 url.append(quote(root_path.rstrip("/"), safe="!$&'()*+,/:;=@%")) 

187 url.append("/") 

188 

189 if path is None: 

190 return uri_to_iri("".join(url)) 

191 

192 url.append(quote(path.lstrip("/"), safe="!$&'()*+,/:;=@%")) 

193 

194 if query_string: 

195 url.append("?") 

196 url.append(quote(query_string, safe="!$&'()*+,/:;=?@%")) 

197 

198 return uri_to_iri("".join(url)) 

199 

200 

201def get_content_length( 

202 http_content_length: str | None = None, 

203 http_transfer_encoding: str | None = None, 

204) -> int | None: 

205 """Return the ``Content-Length`` header value as an int. If the header is not given 

206 or the ``Transfer-Encoding`` header is ``chunked``, ``None`` is returned to indicate 

207 a streaming request. If the value is not an integer, or negative, 0 is returned. 

208 

209 :param http_content_length: The Content-Length HTTP header. 

210 :param http_transfer_encoding: The Transfer-Encoding HTTP header. 

211 

212 .. versionadded:: 2.2 

213 """ 

214 if ( 

215 http_transfer_encoding is not None 

216 and "chunked" in HeaderSet.from_header(http_transfer_encoding) 

217 ) or http_content_length is None: 

218 return None 

219 

220 try: 

221 return max(0, _plain_int(http_content_length)) 

222 except ValueError: 

223 return 0