1from __future__ import annotations
2
3import re
4import typing as t
5from urllib.parse import quote
6
7from .._internal import _plain_int
8from ..exceptions import SecurityError
9from ..http import parse_set_header
10from ..urls import uri_to_iri
11
12_host_re = re.compile(
13 r"""
14 (
15 [a-z0-9.-]+ # domain or ipv4
16 |
17 \[[a-f0-9]*:[a-f0-9.:]+] # ipv6
18 )
19 (?::[0-9]+)? # optional port
20 """,
21 flags=re.ASCII | re.IGNORECASE | re.VERBOSE,
22)
23
24
25def host_is_trusted(
26 hostname: str | None, trusted_list: t.Collection[str] | None = None
27) -> bool:
28 """Perform some checks on a ``Host`` header ``host:port``. The host must be
29 made up of valid characters, but this does not check validity beyond that.
30 If a list of trusted domains is given, the domain must match one.
31
32 :param hostname: The ``Host`` header ``host:port`` to check.
33 :param trusted_list: A list of trusted domains to match. These should
34 already be IDNA encoded, but will be encoded if needed. The port is
35 ignored for this check. If a name starts with a dot it will match as a
36 suffix, accepting all subdomains. If empty or ``None``, all domains are
37 allowed.
38
39 .. versionchanged:: 3.2
40 The value's characters are validated.
41
42 .. versionchanged:: 3.2
43 ``trusted_list`` defaults to ``None``.
44
45 .. versionadded:: 0.9
46 """
47 if not hostname:
48 return False
49
50 if _host_re.fullmatch(hostname) is None:
51 return False
52
53 hostname = hostname.partition(":")[0]
54
55 if not trusted_list:
56 return True
57
58 if isinstance(trusted_list, str):
59 trusted_list = [trusted_list]
60
61 for ref in trusted_list:
62 if ref.startswith("."):
63 ref = ref[1:]
64 suffix_match = True
65 else:
66 suffix_match = False
67
68 try:
69 ref = ref.partition(":")[0].encode("idna").decode("ascii")
70 except UnicodeEncodeError:
71 return False
72
73 if ref == hostname or (suffix_match and hostname.endswith(f".{ref}")):
74 return True
75
76 return False
77
78
79def get_host(
80 scheme: str,
81 host_header: str | None,
82 server: tuple[str, int | None] | None = None,
83 trusted_hosts: t.Collection[str] | None = None,
84) -> str:
85 """Get and validate a request's ``host:port`` based on the given values.
86
87 The ``Host`` header sent by the client is preferred. Otherwise, the server's
88 configured address is used. The port is omitted if it matches the standard
89 HTTP or HTTPS ports.
90
91 The value is passed through :func:`host_is_trusted`. The host must be made
92 up of valid characters, but this does not check validity beyond that. If a
93 list of trusted domains is given, the domain must match one.
94
95 :param scheme: The protocol of the request. Used to omit the standard ports
96 80 and 443.
97 :param host_header: The ``Host`` header value.
98 :param server: The server's configured address ``(host, port)``. The server
99 may be using a Unix socket and give ``(path, None)``; this is ignored as
100 it would not produce a useful host value.
101 :param trusted_hosts: A list of trusted domains to match. These should
102 already be IDNA encoded, but will be encoded if needed. The port is
103 ignored for this check. If a name starts with a dot it will match as a
104 suffix, accepting all subdomains. If empty or ``None``, all domains are
105 allowed.
106
107 :return: Host, with port if necessary.
108 :raise .SecurityError: If the host is not trusted.
109
110 .. versionchanged:: 3.2
111 The characters of the host value are validated. The empty string is no
112 longer allowed if no header value is available.
113
114 .. versionchanged:: 3.2
115 When using the server address, Unix sockets are ignored.
116
117 .. versionchanged:: 3.1.3
118 If ``SERVER_NAME`` is IPv6, it is wrapped in ``[]``.
119 """
120 if host_header is not None:
121 host = host_header
122 # The port server[1] will be None for a Unix socket. Ignore in that case.
123 elif server is not None and server[1] is not None:
124 host = server[0]
125
126 # If SERVER_NAME is IPv6, wrap it in [] to match Host header.
127 # Check for : because domain or IPv4 can't have that.
128 if ":" in host and host[0] != "[":
129 host = f"[{host}]"
130
131 host = f"{host}:{server[1]}"
132 else:
133 host = ""
134
135 if scheme in {"http", "ws"}:
136 host = host.removesuffix(":80")
137 elif scheme in {"https", "wss"}:
138 host = host.removesuffix(":443")
139
140 if not host_is_trusted(host, trusted_hosts):
141 raise SecurityError(f"Host {host!r} is not trusted.")
142
143 return host
144
145
146def get_current_url(
147 scheme: str,
148 host: str,
149 root_path: str | None = None,
150 path: str | None = None,
151 query_string: bytes | None = None,
152) -> str:
153 """Recreate the URL for a request. If an optional part isn't
154 provided, it and subsequent parts are not included in the URL.
155
156 The URL is an IRI, not a URI, so it may contain Unicode characters.
157 Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII.
158
159 :param scheme: The protocol the request used, like ``"https"``.
160 :param host: The host the request was made to. See :func:`get_host`.
161 :param root_path: Prefix that the application is mounted under. This
162 is prepended to ``path``.
163 :param path: The path part of the URL after ``root_path``.
164 :param query_string: The portion of the URL after the "?".
165 """
166 url = [scheme, "://", host]
167
168 if root_path is None:
169 url.append("/")
170 return uri_to_iri("".join(url))
171
172 # safe = https://url.spec.whatwg.org/#url-path-segment-string
173 # as well as percent for things that are already quoted
174 url.append(quote(root_path.rstrip("/"), safe="!$&'()*+,/:;=@%"))
175 url.append("/")
176
177 if path is None:
178 return uri_to_iri("".join(url))
179
180 url.append(quote(path.lstrip("/"), safe="!$&'()*+,/:;=@%"))
181
182 if query_string:
183 url.append("?")
184 url.append(quote(query_string, safe="!$&'()*+,/:;=?@%"))
185
186 return uri_to_iri("".join(url))
187
188
189def get_content_length(
190 http_content_length: str | None = None,
191 http_transfer_encoding: str | None = None,
192) -> int | None:
193 """Return the ``Content-Length`` header value as an int. If the header is not given
194 or the ``Transfer-Encoding`` header is ``chunked``, ``None`` is returned to indicate
195 a streaming request. If the value is not an integer, or negative, 0 is returned.
196
197 :param http_content_length: The Content-Length HTTP header.
198 :param http_transfer_encoding: The Transfer-Encoding HTTP header.
199
200 .. versionadded:: 2.2
201 """
202 if (
203 http_transfer_encoding is not None
204 and "chunked" in parse_set_header(http_transfer_encoding)
205 ) or http_content_length is None:
206 return None
207
208 try:
209 return max(0, _plain_int(http_content_length))
210 except ValueError:
211 return 0