Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/websockets/http11.py: 87%
129 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:20 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:20 +0000
1from __future__ import annotations
3import dataclasses
4import re
5import warnings
6from typing import Callable, Generator, Optional
8from . import datastructures, exceptions
11# Maximum total size of headers is around 128 * 8 KiB = 1 MiB.
12MAX_HEADERS = 128
14# Limit request line and header lines. 8KiB is the most common default
15# configuration of popular HTTP servers.
16MAX_LINE = 8192
18# Support for HTTP response bodies is intended to read an error message
19# returned by a server. It isn't designed to perform large file transfers.
20MAX_BODY = 2**20 # 1 MiB
23def d(value: bytes) -> str:
24 """
25 Decode a bytestring for interpolating into an error message.
27 """
28 return value.decode(errors="backslashreplace")
31# See https://www.rfc-editor.org/rfc/rfc7230.html#appendix-B.
33# Regex for validating header names.
35_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
37# Regex for validating header values.
39# We don't attempt to support obsolete line folding.
41# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
43# The ABNF is complicated because it attempts to express that optional
44# whitespace is ignored. We strip whitespace and don't revalidate that.
46# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
48_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
51@dataclasses.dataclass
52class Request:
53 """
54 WebSocket handshake request.
56 Attributes:
57 path: Request path, including optional query.
58 headers: Request headers.
59 """
61 path: str
62 headers: datastructures.Headers
63 # body isn't useful is the context of this library.
65 _exception: Optional[Exception] = None
67 @property
68 def exception(self) -> Optional[Exception]: # pragma: no cover
69 warnings.warn(
70 "Request.exception is deprecated; "
71 "use ServerProtocol.handshake_exc instead",
72 DeprecationWarning,
73 )
74 return self._exception
76 @classmethod
77 def parse(
78 cls,
79 read_line: Callable[[int], Generator[None, None, bytes]],
80 ) -> Generator[None, None, Request]:
81 """
82 Parse a WebSocket handshake request.
84 This is a generator-based coroutine.
86 The request path isn't URL-decoded or validated in any way.
88 The request path and headers are expected to contain only ASCII
89 characters. Other characters are represented with surrogate escapes.
91 :meth:`parse` doesn't attempt to read the request body because
92 WebSocket handshake requests don't have one. If the request contains a
93 body, it may be read from the data stream after :meth:`parse` returns.
95 Args:
96 read_line: generator-based coroutine that reads a LF-terminated
97 line or raises an exception if there isn't enough data
99 Raises:
100 EOFError: if the connection is closed without a full HTTP request.
101 SecurityError: if the request exceeds a security limit.
102 ValueError: if the request isn't well formatted.
104 """
105 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.1
107 # Parsing is simple because fixed values are expected for method and
108 # version and because path isn't checked. Since WebSocket software tends
109 # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
111 try:
112 request_line = yield from parse_line(read_line)
113 except EOFError as exc:
114 raise EOFError("connection closed while reading HTTP request line") from exc
116 try:
117 method, raw_path, version = request_line.split(b" ", 2)
118 except ValueError: # not enough values to unpack (expected 3, got 1-2)
119 raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
121 if method != b"GET":
122 raise ValueError(f"unsupported HTTP method: {d(method)}")
123 if version != b"HTTP/1.1":
124 raise ValueError(f"unsupported HTTP version: {d(version)}")
125 path = raw_path.decode("ascii", "surrogateescape")
127 headers = yield from parse_headers(read_line)
129 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3
131 if "Transfer-Encoding" in headers:
132 raise NotImplementedError("transfer codings aren't supported")
134 if "Content-Length" in headers:
135 raise ValueError("unsupported request body")
137 return cls(path, headers)
139 def serialize(self) -> bytes:
140 """
141 Serialize a WebSocket handshake request.
143 """
144 # Since the request line and headers only contain ASCII characters,
145 # we can keep this simple.
146 request = f"GET {self.path} HTTP/1.1\r\n".encode()
147 request += self.headers.serialize()
148 return request
151@dataclasses.dataclass
152class Response:
153 """
154 WebSocket handshake response.
156 Attributes:
157 status_code: Response code.
158 reason_phrase: Response reason.
159 headers: Response headers.
160 body: Response body, if any.
162 """
164 status_code: int
165 reason_phrase: str
166 headers: datastructures.Headers
167 body: Optional[bytes] = None
169 _exception: Optional[Exception] = None
171 @property
172 def exception(self) -> Optional[Exception]: # pragma: no cover
173 warnings.warn(
174 "Response.exception is deprecated; "
175 "use ClientProtocol.handshake_exc instead",
176 DeprecationWarning,
177 )
178 return self._exception
180 @classmethod
181 def parse(
182 cls,
183 read_line: Callable[[int], Generator[None, None, bytes]],
184 read_exact: Callable[[int], Generator[None, None, bytes]],
185 read_to_eof: Callable[[int], Generator[None, None, bytes]],
186 ) -> Generator[None, None, Response]:
187 """
188 Parse a WebSocket handshake response.
190 This is a generator-based coroutine.
192 The reason phrase and headers are expected to contain only ASCII
193 characters. Other characters are represented with surrogate escapes.
195 Args:
196 read_line: generator-based coroutine that reads a LF-terminated
197 line or raises an exception if there isn't enough data.
198 read_exact: generator-based coroutine that reads the requested
199 bytes or raises an exception if there isn't enough data.
200 read_to_eof: generator-based coroutine that reads until the end
201 of the stream.
203 Raises:
204 EOFError: if the connection is closed without a full HTTP response.
205 SecurityError: if the response exceeds a security limit.
206 LookupError: if the response isn't well formatted.
207 ValueError: if the response isn't well formatted.
209 """
210 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.2
212 try:
213 status_line = yield from parse_line(read_line)
214 except EOFError as exc:
215 raise EOFError("connection closed while reading HTTP status line") from exc
217 try:
218 version, raw_status_code, raw_reason = status_line.split(b" ", 2)
219 except ValueError: # not enough values to unpack (expected 3, got 1-2)
220 raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
222 if version != b"HTTP/1.1":
223 raise ValueError(f"unsupported HTTP version: {d(version)}")
224 try:
225 status_code = int(raw_status_code)
226 except ValueError: # invalid literal for int() with base 10
227 raise ValueError(
228 f"invalid HTTP status code: {d(raw_status_code)}"
229 ) from None
230 if not 100 <= status_code < 1000:
231 raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}")
232 if not _value_re.fullmatch(raw_reason):
233 raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
234 reason = raw_reason.decode()
236 headers = yield from parse_headers(read_line)
238 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3
240 if "Transfer-Encoding" in headers:
241 raise NotImplementedError("transfer codings aren't supported")
243 # Since websockets only does GET requests (no HEAD, no CONNECT), all
244 # responses except 1xx, 204, and 304 include a message body.
245 if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
246 body = None
247 else:
248 content_length: Optional[int]
249 try:
250 # MultipleValuesError is sufficiently unlikely that we don't
251 # attempt to handle it. Instead we document that its parent
252 # class, LookupError, may be raised.
253 raw_content_length = headers["Content-Length"]
254 except KeyError:
255 content_length = None
256 else:
257 content_length = int(raw_content_length)
259 if content_length is None:
260 try:
261 body = yield from read_to_eof(MAX_BODY)
262 except RuntimeError:
263 raise exceptions.SecurityError(
264 f"body too large: over {MAX_BODY} bytes"
265 )
266 elif content_length > MAX_BODY:
267 raise exceptions.SecurityError(
268 f"body too large: {content_length} bytes"
269 )
270 else:
271 body = yield from read_exact(content_length)
273 return cls(status_code, reason, headers, body)
275 def serialize(self) -> bytes:
276 """
277 Serialize a WebSocket handshake response.
279 """
280 # Since the status line and headers only contain ASCII characters,
281 # we can keep this simple.
282 response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
283 response += self.headers.serialize()
284 if self.body is not None:
285 response += self.body
286 return response
289def parse_headers(
290 read_line: Callable[[int], Generator[None, None, bytes]],
291) -> Generator[None, None, datastructures.Headers]:
292 """
293 Parse HTTP headers.
295 Non-ASCII characters are represented with surrogate escapes.
297 Args:
298 read_line: generator-based coroutine that reads a LF-terminated line
299 or raises an exception if there isn't enough data.
301 Raises:
302 EOFError: if the connection is closed without complete headers.
303 SecurityError: if the request exceeds a security limit.
304 ValueError: if the request isn't well formatted.
306 """
307 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2
309 # We don't attempt to support obsolete line folding.
311 headers = datastructures.Headers()
312 for _ in range(MAX_HEADERS + 1):
313 try:
314 line = yield from parse_line(read_line)
315 except EOFError as exc:
316 raise EOFError("connection closed while reading HTTP headers") from exc
317 if line == b"":
318 break
320 try:
321 raw_name, raw_value = line.split(b":", 1)
322 except ValueError: # not enough values to unpack (expected 2, got 1)
323 raise ValueError(f"invalid HTTP header line: {d(line)}") from None
324 if not _token_re.fullmatch(raw_name):
325 raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
326 raw_value = raw_value.strip(b" \t")
327 if not _value_re.fullmatch(raw_value):
328 raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
330 name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
331 value = raw_value.decode("ascii", "surrogateescape")
332 headers[name] = value
334 else:
335 raise exceptions.SecurityError("too many HTTP headers")
337 return headers
340def parse_line(
341 read_line: Callable[[int], Generator[None, None, bytes]],
342) -> Generator[None, None, bytes]:
343 """
344 Parse a single line.
346 CRLF is stripped from the return value.
348 Args:
349 read_line: generator-based coroutine that reads a LF-terminated line
350 or raises an exception if there isn't enough data.
352 Raises:
353 EOFError: if the connection is closed without a CRLF.
354 SecurityError: if the response exceeds a security limit.
356 """
357 try:
358 line = yield from read_line(MAX_LINE)
359 except RuntimeError:
360 raise exceptions.SecurityError("line too long")
361 # Not mandatory but safe - https://www.rfc-editor.org/rfc/rfc7230.html#section-3.5
362 if not line.endswith(b"\r\n"):
363 raise EOFError("line without CRLF")
364 return line[:-2]