Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/websockets/http11.py: 75%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import dataclasses
4import os
5import re
6import sys
7import warnings
8from collections.abc import Generator
9from typing import Callable
11from .datastructures import Headers
12from .exceptions import SecurityError
13from .version import version as websockets_version
16__all__ = [
17 "SERVER",
18 "USER_AGENT",
19 "Request",
20 "Response",
21]
24PYTHON_VERSION = "{}.{}".format(*sys.version_info)
26# User-Agent header for HTTP requests.
27USER_AGENT = os.environ.get(
28 "WEBSOCKETS_USER_AGENT",
29 f"Python/{PYTHON_VERSION} websockets/{websockets_version}",
30)
32# Server header for HTTP responses.
33SERVER = os.environ.get(
34 "WEBSOCKETS_SERVER",
35 f"Python/{PYTHON_VERSION} websockets/{websockets_version}",
36)
38# Maximum total size of headers is around 128 * 8 KiB = 1 MiB.
39MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128"))
41# Limit request line and header lines. 8KiB is the most common default
42# configuration of popular HTTP servers.
43MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192"))
45# Support for HTTP response bodies is intended to read an error message
46# returned by a server. It isn't designed to perform large file transfers.
47MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) # 1 MiB
50def d(value: bytes | bytearray) -> str:
51 """
52 Decode a bytestring for interpolating into an error message.
54 """
55 return value.decode(errors="backslashreplace")
58# See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B.
60# Regex for validating header names.
62_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
64# Regex for validating header values.
66# We don't attempt to support obsolete line folding.
68# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
70# The ABNF is complicated because it attempts to express that optional
71# whitespace is ignored. We strip whitespace and don't revalidate that.
73# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
75_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
78@dataclasses.dataclass
79class Request:
80 """
81 WebSocket handshake request.
83 Attributes:
84 path: Request path, including optional query.
85 headers: Request headers.
86 """
88 path: str
89 headers: Headers
90 # body isn't useful is the context of this library.
92 _exception: Exception | None = None
94 @property
95 def exception(self) -> Exception | None: # pragma: no cover
96 warnings.warn( # deprecated in 10.3 - 2022-04-17
97 "Request.exception is deprecated; use ServerProtocol.handshake_exc instead",
98 DeprecationWarning,
99 )
100 return self._exception
102 @classmethod
103 def parse(
104 cls,
105 read_line: Callable[[int], Generator[None, None, bytes | bytearray]],
106 ) -> Generator[None, None, Request]:
107 """
108 Parse a WebSocket handshake request.
110 This is a generator-based coroutine.
112 The request path isn't URL-decoded or validated in any way.
114 The request path and headers are expected to contain only ASCII
115 characters. Other characters are represented with surrogate escapes.
117 :meth:`parse` doesn't attempt to read the request body because
118 WebSocket handshake requests don't have one. If the request contains a
119 body, it may be read from the data stream after :meth:`parse` returns.
121 Args:
122 read_line: Generator-based coroutine that reads a LF-terminated
123 line or raises an exception if there isn't enough data
125 Raises:
126 EOFError: If the connection is closed without a full HTTP request.
127 SecurityError: If the request exceeds a security limit.
128 ValueError: If the request isn't well formatted.
130 """
131 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1
133 # Parsing is simple because fixed values are expected for method and
134 # version and because path isn't checked. Since WebSocket software tends
135 # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
137 try:
138 request_line = yield from parse_line(read_line)
139 except EOFError as exc:
140 raise EOFError("connection closed while reading HTTP request line") from exc
142 try:
143 method, raw_path, protocol = request_line.split(b" ", 2)
144 except ValueError: # not enough values to unpack (expected 3, got 1-2)
145 raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
146 if protocol != b"HTTP/1.1":
147 raise ValueError(
148 f"unsupported protocol; expected HTTP/1.1: {d(request_line)}"
149 )
150 if method != b"GET":
151 raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}")
153 # RFC 9110 defers the definition of URIs to RFC 3986, which allows only
154 # a subset of ASCII. Non-ASCII IRIs must be UTF-8 then percent-encoded.
155 path = raw_path.decode("ascii")
157 headers = yield from parse_headers(read_line)
159 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
161 if "Transfer-Encoding" in headers:
162 raise NotImplementedError("transfer codings aren't supported")
164 if "Content-Length" in headers:
165 # Some devices send a Content-Length header with a value of 0.
166 # This raises ValueError if Content-Length isn't an integer too.
167 if int(headers["Content-Length"]) != 0:
168 raise ValueError("unsupported request body")
170 return cls(path, headers)
172 def serialize(self) -> bytes:
173 """
174 Serialize a WebSocket handshake request.
176 """
177 # Since the request line and headers only contain ASCII characters,
178 # we can keep this simple.
179 request = f"GET {self.path} HTTP/1.1\r\n".encode()
180 request += self.headers.serialize()
181 return request
184@dataclasses.dataclass
185class Response:
186 """
187 WebSocket handshake response.
189 Attributes:
190 status_code: Response code.
191 reason_phrase: Response reason.
192 headers: Response headers.
193 body: Response body.
195 """
197 status_code: int
198 reason_phrase: str
199 headers: Headers
200 body: bytes | bytearray = b""
202 _exception: Exception | None = None
204 @property
205 def exception(self) -> Exception | None: # pragma: no cover
206 warnings.warn( # deprecated in 10.3 - 2022-04-17
207 "Response.exception is deprecated; "
208 "use ClientProtocol.handshake_exc instead",
209 DeprecationWarning,
210 )
211 return self._exception
213 @classmethod
214 def parse(
215 cls,
216 read_line: Callable[[int], Generator[None, None, bytes | bytearray]],
217 read_exact: Callable[[int], Generator[None, None, bytes | bytearray]],
218 read_to_eof: Callable[[int], Generator[None, None, bytes | bytearray]],
219 proxy: bool = False,
220 ) -> Generator[None, None, Response]:
221 """
222 Parse a WebSocket handshake response.
224 This is a generator-based coroutine.
226 The reason phrase and headers are expected to contain only ASCII
227 characters. Other characters are represented with surrogate escapes.
229 Args:
230 read_line: Generator-based coroutine that reads a LF-terminated
231 line or raises an exception if there isn't enough data.
232 read_exact: Generator-based coroutine that reads the requested
233 bytes or raises an exception if there isn't enough data.
234 read_to_eof: Generator-based coroutine that reads until the end
235 of the stream.
237 Raises:
238 EOFError: If the connection is closed without a full HTTP response.
239 SecurityError: If the response exceeds a security limit.
240 LookupError: If the response isn't well formatted.
241 ValueError: If the response isn't well formatted.
243 """
244 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2
246 try:
247 status_line = yield from parse_line(read_line)
248 except EOFError as exc:
249 raise EOFError("connection closed while reading HTTP status line") from exc
251 try:
252 protocol, raw_status_code, raw_reason = status_line.split(b" ", 2)
253 except ValueError: # not enough values to unpack (expected 3, got 1-2)
254 raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
255 if proxy: # some proxies still use HTTP/1.0
256 if protocol not in [b"HTTP/1.1", b"HTTP/1.0"]:
257 raise ValueError(
258 f"unsupported protocol; expected HTTP/1.1 or HTTP/1.0: "
259 f"{d(status_line)}"
260 )
261 else:
262 if protocol != b"HTTP/1.1":
263 raise ValueError(
264 f"unsupported protocol; expected HTTP/1.1: {d(status_line)}"
265 )
266 try:
267 status_code = int(raw_status_code)
268 except ValueError: # invalid literal for int() with base 10
269 raise ValueError(
270 f"invalid status code; expected integer; got {d(raw_status_code)}"
271 ) from None
272 if not 100 <= status_code < 600:
273 raise ValueError(
274 f"invalid status code; expected 100–599; got {d(raw_status_code)}"
275 )
276 if not _value_re.fullmatch(raw_reason):
277 raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
279 # RFC 2616 implies ISO-8859-1. It's easy to reverse and cannot crash.
280 # Non-ASCII never worked reliably and the reason isn't useful anyway.
281 reason = raw_reason.decode("iso-8859-1")
283 headers = yield from parse_headers(read_line)
285 body: bytes | bytearray
286 if proxy:
287 body = b""
288 else:
289 body = yield from read_body(
290 status_code, headers, read_line, read_exact, read_to_eof
291 )
293 return cls(status_code, reason, headers, body)
295 def serialize(self) -> bytes:
296 """
297 Serialize a WebSocket handshake response.
299 """
300 # Since the status line and headers only contain ASCII characters,
301 # we can keep this simple.
302 response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
303 response += self.headers.serialize()
304 response += self.body
305 return response
308def parse_line(
309 read_line: Callable[[int], Generator[None, None, bytes | bytearray]],
310) -> Generator[None, None, bytes | bytearray]:
311 """
312 Parse a single line.
314 CRLF is stripped from the return value.
316 Args:
317 read_line: Generator-based coroutine that reads a LF-terminated line
318 or raises an exception if there isn't enough data.
320 Raises:
321 EOFError: If the connection is closed without a CRLF.
322 SecurityError: If the response exceeds a security limit.
324 """
325 try:
326 line = yield from read_line(MAX_LINE_LENGTH)
327 except RuntimeError:
328 raise SecurityError("line too long")
329 # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
330 if not line.endswith(b"\r\n"):
331 raise EOFError("line without CRLF")
332 return line[:-2]
335def parse_headers(
336 read_line: Callable[[int], Generator[None, None, bytes | bytearray]],
337) -> Generator[None, None, Headers]:
338 """
339 Parse HTTP headers.
341 Non-ASCII characters are represented with surrogate escapes.
343 Args:
344 read_line: Generator-based coroutine that reads a LF-terminated line
345 or raises an exception if there isn't enough data.
347 Raises:
348 EOFError: If the connection is closed without complete headers.
349 SecurityError: If the request exceeds a security limit.
350 ValueError: If the request isn't well formatted.
352 """
353 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
355 # We don't attempt to support obsolete line folding.
357 headers = Headers()
358 for _ in range(MAX_NUM_HEADERS + 1):
359 try:
360 line = yield from parse_line(read_line)
361 except EOFError as exc:
362 raise EOFError("connection closed while reading HTTP headers") from exc
363 if line == b"":
364 break
366 try:
367 raw_name, raw_value = line.split(b":", 1)
368 except ValueError: # not enough values to unpack (expected 2, got 1)
369 raise ValueError(f"invalid HTTP header line: {d(line)}") from None
370 if not _token_re.fullmatch(raw_name):
371 raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
372 raw_value = raw_value.strip(b" \t")
373 if not _value_re.fullmatch(raw_value):
374 raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
376 name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
377 # Headers should be ASCII. Section 5.5 of RFC 9110 says: "Historically,
378 # HTTP allowed field content with text in the ISO-8859-1 charset."
379 # It's easy to reverse and cannot crash, making it a decent choice.
380 value = raw_value.decode("iso-8859-1")
382 # Since we just validated raw_value, we don't need to revalidate it.
383 headers.set_insecure(name, value)
385 else:
386 raise SecurityError("too many HTTP headers")
388 return headers
391def read_body(
392 status_code: int,
393 headers: Headers,
394 read_line: Callable[[int], Generator[None, None, bytes | bytearray]],
395 read_exact: Callable[[int], Generator[None, None, bytes | bytearray]],
396 read_to_eof: Callable[[int], Generator[None, None, bytes | bytearray]],
397) -> Generator[None, None, bytes | bytearray]:
398 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
400 # Since websockets only does GET requests (no HEAD, no CONNECT), all
401 # responses except 1xx, 204, and 304 include a message body.
402 if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
403 return b""
405 # MultipleValuesError is sufficiently unlikely that we don't attempt to
406 # handle it when accessing headers. Instead we document that its parent
407 # class, LookupError, may be raised.
408 # Conversions from str to int are protected by sys.set_int_max_str_digits..
410 elif (coding := headers.get("Transfer-Encoding")) is not None:
411 if coding != "chunked":
412 raise NotImplementedError(f"transfer coding {coding} isn't supported")
414 body = b""
415 while True:
416 chunk_size_line = yield from parse_line(read_line)
417 raw_chunk_size = chunk_size_line.split(b";", 1)[0]
418 # Set a lower limit than default_max_str_digits; 1 EB is plenty.
419 if len(raw_chunk_size) > 15:
420 str_chunk_size = raw_chunk_size.decode(errors="backslashreplace")
421 raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes")
422 chunk_size = int(raw_chunk_size, 16)
423 if chunk_size == 0:
424 break
425 if len(body) + chunk_size > MAX_BODY_SIZE:
426 raise SecurityError(
427 f"chunk too large: {chunk_size} bytes after {len(body)} bytes"
428 )
429 body += yield from read_exact(chunk_size)
430 if (yield from read_exact(2)) != b"\r\n":
431 raise ValueError("chunk without CRLF")
432 # Read the trailer.
433 yield from parse_headers(read_line)
434 return body
436 elif (raw_content_length := headers.get("Content-Length")) is not None:
437 # Set a lower limit than default_max_str_digits; 1 EiB is plenty.
438 if len(raw_content_length) > 18:
439 raise SecurityError(f"body too large: {raw_content_length} bytes")
440 content_length = int(raw_content_length)
441 if content_length > MAX_BODY_SIZE:
442 raise SecurityError(f"body too large: {content_length} bytes")
443 return (yield from read_exact(content_length))
445 else:
446 try:
447 return (yield from read_to_eof(MAX_BODY_SIZE))
448 except RuntimeError:
449 raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")