Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/werkzeug/sansio/utils.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

70 statements  

1from __future__ import annotations 

2 

3import re 

4import typing as t 

5from urllib.parse import quote 

6 

7from .._internal import _plain_int 

8from ..exceptions import SecurityError 

9from ..http import parse_set_header 

10from ..urls import uri_to_iri 

11 

12_host_re = re.compile( 

13 r""" 

14 ( 

15 [a-z0-9.-]+ # domain or ipv4 

16 | 

17 \[[a-f0-9]*:[a-f0-9.:]+] # ipv6 

18 ) 

19 (?::[0-9]+)? # optional port 

20 """, 

21 flags=re.ASCII | re.IGNORECASE | re.VERBOSE, 

22) 

23 

24 

25def host_is_trusted( 

26 hostname: str | None, trusted_list: t.Collection[str] | None = None 

27) -> bool: 

28 """Perform some checks on a ``Host`` header ``host:port``. The host must be 

29 made up of valid characters, but this does not check validity beyond that. 

30 If a list of trusted domains is given, the domain must match one. 

31 

32 :param hostname: The ``Host`` header ``host:port`` to check. 

33 :param trusted_list: A list of trusted domains to match. These should 

34 already be IDNA encoded, but will be encoded if needed. The port is 

35 ignored for this check. If a name starts with a dot it will match as a 

36 suffix, accepting all subdomains. If empty or ``None``, all domains are 

37 allowed. 

38 

39 .. versionchanged:: 3.2 

40 The value's characters are validated. 

41 

42 .. versionchanged:: 3.2 

43 ``trusted_list`` defaults to ``None``. 

44 

45 .. versionadded:: 0.9 

46 """ 

47 if not hostname: 

48 return False 

49 

50 if _host_re.fullmatch(hostname) is None: 

51 return False 

52 

53 hostname = hostname.partition(":")[0] 

54 

55 if not trusted_list: 

56 return True 

57 

58 if isinstance(trusted_list, str): 

59 trusted_list = [trusted_list] 

60 

61 for ref in trusted_list: 

62 if ref.startswith("."): 

63 ref = ref[1:] 

64 suffix_match = True 

65 else: 

66 suffix_match = False 

67 

68 try: 

69 ref = ref.partition(":")[0].encode("idna").decode("ascii") 

70 except UnicodeEncodeError: 

71 return False 

72 

73 if ref == hostname or (suffix_match and hostname.endswith(f".{ref}")): 

74 return True 

75 

76 return False 

77 

78 

79def get_host( 

80 scheme: str, 

81 host_header: str | None, 

82 server: tuple[str, int | None] | None = None, 

83 trusted_hosts: t.Collection[str] | None = None, 

84) -> str: 

85 """Get and validate a request's ``host:port`` based on the given values. 

86 

87 The ``Host`` header sent by the client is preferred. Otherwise, the server's 

88 configured address is used. The port is omitted if it matches the standard 

89 HTTP or HTTPS ports. 

90 

91 The value is passed through :func:`host_is_trusted`. The host must be made 

92 up of valid characters, but this does not check validity beyond that. If a 

93 list of trusted domains is given, the domain must match one. 

94 

95 If the host header is not available, such as for HTTP/0.9 and 1.0, or it has 

96 invalid characters, the empty string is returned. Subdomain and host 

97 routing, and external URL building, will not work in these cases. 

98 

99 :param scheme: The protocol of the request. Used to omit the standard ports 

100 80 and 443. 

101 :param host_header: The ``Host`` header value. 

102 :param server: The server's configured address ``(host, port)``. The server 

103 may be using a Unix socket and give ``(path, None)``; this is ignored as 

104 it would not produce a useful host value. 

105 :param trusted_hosts: A list of trusted domains to match. These should 

106 already be IDNA encoded, but will be encoded if needed. The port is 

107 ignored for this check. If a name starts with a dot it will match as a 

108 suffix, accepting all subdomains. If empty or ``None``, all domains are 

109 allowed. 

110 

111 :return: Host, with port if necessary. 

112 :raise .SecurityError: If the host is not trusted. 

113 

114 .. versionchanged:: 3.1.8 

115 The empty string is again returned if no host header value is available, 

116 or if the characters are invalid. 

117 

118 .. versionchanged:: 3.1.7 

119 The characters of the host value are validated. The empty string is no 

120 longer allowed if no header value is available. 

121 

122 .. versionchanged:: 3.2 

123 When using the server address, Unix sockets are ignored. 

124 

125 .. versionchanged:: 3.1.3 

126 If ``SERVER_NAME`` is IPv6, it is wrapped in ``[]``. 

127 """ 

128 if host_header is not None: 

129 host = host_header 

130 # The port server[1] will be None for a Unix socket. Ignore in that case. 

131 elif server is not None and server[1] is not None: 

132 host = server[0] 

133 

134 # If SERVER_NAME is IPv6, wrap it in [] to match Host header. 

135 # Check for : because domain or IPv4 can't have that. 

136 if ":" in host and host[0] != "[": 

137 host = f"[{host}]" 

138 

139 host = f"{host}:{server[1]}" 

140 else: 

141 # Pass through empty host from HTTP/0.9 and 1.0. 

142 return "" 

143 

144 if scheme in {"http", "ws"}: 

145 host = host.removesuffix(":80") 

146 elif scheme in {"https", "wss"}: 

147 host = host.removesuffix(":443") 

148 

149 if not host_is_trusted(host, trusted_hosts): 

150 if trusted_hosts: 

151 raise SecurityError(f"Host {host!r} is not trusted.") 

152 

153 # Invalid characters, treat as empty. 

154 return "" 

155 

156 return host 

157 

158 

159def get_current_url( 

160 scheme: str, 

161 host: str, 

162 root_path: str | None = None, 

163 path: str | None = None, 

164 query_string: bytes | None = None, 

165) -> str: 

166 """Recreate the URL for a request. If an optional part isn't 

167 provided, it and subsequent parts are not included in the URL. 

168 

169 The URL is an IRI, not a URI, so it may contain Unicode characters. 

170 Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII. 

171 

172 :param scheme: The protocol the request used, like ``"https"``. 

173 :param host: The host the request was made to. See :func:`get_host`. 

174 :param root_path: Prefix that the application is mounted under. This 

175 is prepended to ``path``. 

176 :param path: The path part of the URL after ``root_path``. 

177 :param query_string: The portion of the URL after the "?". 

178 """ 

179 url = [scheme, "://", host] 

180 

181 if root_path is None: 

182 url.append("/") 

183 return uri_to_iri("".join(url)) 

184 

185 # safe = https://url.spec.whatwg.org/#url-path-segment-string 

186 # as well as percent for things that are already quoted 

187 url.append(quote(root_path.rstrip("/"), safe="!$&'()*+,/:;=@%")) 

188 url.append("/") 

189 

190 if path is None: 

191 return uri_to_iri("".join(url)) 

192 

193 url.append(quote(path.lstrip("/"), safe="!$&'()*+,/:;=@%")) 

194 

195 if query_string: 

196 url.append("?") 

197 url.append(quote(query_string, safe="!$&'()*+,/:;=?@%")) 

198 

199 return uri_to_iri("".join(url)) 

200 

201 

202def get_content_length( 

203 http_content_length: str | None = None, 

204 http_transfer_encoding: str | None = None, 

205) -> int | None: 

206 """Return the ``Content-Length`` header value as an int. If the header is not given 

207 or the ``Transfer-Encoding`` header is ``chunked``, ``None`` is returned to indicate 

208 a streaming request. If the value is not an integer, or negative, 0 is returned. 

209 

210 :param http_content_length: The Content-Length HTTP header. 

211 :param http_transfer_encoding: The Transfer-Encoding HTTP header. 

212 

213 .. versionadded:: 2.2 

214 """ 

215 if ( 

216 http_transfer_encoding is not None 

217 and "chunked" in parse_set_header(http_transfer_encoding) 

218 ) or http_content_length is None: 

219 return None 

220 

221 try: 

222 return max(0, _plain_int(http_content_length)) 

223 except ValueError: 

224 return 0