Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/websockets/http11.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

161 statements  

1from __future__ import annotations 

2 

3import dataclasses 

4import os 

5import re 

6import sys 

7import warnings 

8from collections.abc import Generator 

9from typing import Callable 

10 

11from .datastructures import Headers 

12from .exceptions import SecurityError 

13from .version import version as websockets_version 

14 

15 

16__all__ = [ 

17 "SERVER", 

18 "USER_AGENT", 

19 "Request", 

20 "Response", 

21] 

22 

23 

24PYTHON_VERSION = "{}.{}".format(*sys.version_info) 

25 

26# User-Agent header for HTTP requests. 

27USER_AGENT = os.environ.get( 

28 "WEBSOCKETS_USER_AGENT", 

29 f"Python/{PYTHON_VERSION} websockets/{websockets_version}", 

30) 

31 

32# Server header for HTTP responses. 

33SERVER = os.environ.get( 

34 "WEBSOCKETS_SERVER", 

35 f"Python/{PYTHON_VERSION} websockets/{websockets_version}", 

36) 

37 

38# Maximum total size of headers is around 128 * 8 KiB = 1 MiB. 

39MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128")) 

40 

41# Limit request line and header lines. 8KiB is the most common default 

42# configuration of popular HTTP servers. 

43MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192")) 

44 

45# Support for HTTP response bodies is intended to read an error message 

46# returned by a server. It isn't designed to perform large file transfers. 

47MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) # 1 MiB 

48 

49 

50def d(value: bytes | bytearray) -> str: 

51 """ 

52 Decode a bytestring for interpolating into an error message. 

53 

54 """ 

55 return value.decode(errors="backslashreplace") 

56 

57 

58# See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B. 

59 

60# Regex for validating header names. 

61 

62_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") 

63 

64# Regex for validating header values. 

65 

66# We don't attempt to support obsolete line folding. 

67 

68# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff). 

69 

70# The ABNF is complicated because it attempts to express that optional 

71# whitespace is ignored. We strip whitespace and don't revalidate that. 

72 

73# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 

74 

75_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") 

76 

77 

78@dataclasses.dataclass 

79class Request: 

80 """ 

81 WebSocket handshake request. 

82 

83 Attributes: 

84 path: Request path, including optional query. 

85 headers: Request headers. 

86 """ 

87 

88 path: str 

89 headers: Headers 

90 # body isn't useful is the context of this library. 

91 

92 _exception: Exception | None = None 

93 

94 @property 

95 def exception(self) -> Exception | None: # pragma: no cover 

96 warnings.warn( # deprecated in 10.3 - 2022-04-17 

97 "Request.exception is deprecated; use ServerProtocol.handshake_exc instead", 

98 DeprecationWarning, 

99 ) 

100 return self._exception 

101 

102 @classmethod 

103 def parse( 

104 cls, 

105 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

106 ) -> Generator[None, None, Request]: 

107 """ 

108 Parse a WebSocket handshake request. 

109 

110 This is a generator-based coroutine. 

111 

112 The request path isn't URL-decoded or validated in any way. 

113 

114 The request path and headers are expected to contain only ASCII 

115 characters. Other characters are represented with surrogate escapes. 

116 

117 :meth:`parse` doesn't attempt to read the request body because 

118 WebSocket handshake requests don't have one. If the request contains a 

119 body, it may be read from the data stream after :meth:`parse` returns. 

120 

121 Args: 

122 read_line: Generator-based coroutine that reads a LF-terminated 

123 line or raises an exception if there isn't enough data 

124 

125 Raises: 

126 EOFError: If the connection is closed without a full HTTP request. 

127 SecurityError: If the request exceeds a security limit. 

128 ValueError: If the request isn't well formatted. 

129 

130 """ 

131 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 

132 

133 # Parsing is simple because fixed values are expected for method and 

134 # version and because path isn't checked. Since WebSocket software tends 

135 # to implement HTTP/1.1 strictly, there's little need for lenient parsing. 

136 

137 try: 

138 request_line = yield from parse_line(read_line) 

139 except EOFError as exc: 

140 raise EOFError("connection closed while reading HTTP request line") from exc 

141 

142 try: 

143 method, raw_path, protocol = request_line.split(b" ", 2) 

144 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

145 raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None 

146 if protocol != b"HTTP/1.1": 

147 raise ValueError( 

148 f"unsupported protocol; expected HTTP/1.1: {d(request_line)}" 

149 ) 

150 if method != b"GET": 

151 raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}") 

152 

153 # RFC 9110 defers the definition of URIs to RFC 3986, which allows only 

154 # a subset of ASCII. Non-ASCII IRIs must be UTF-8 then percent-encoded. 

155 path = raw_path.decode("ascii") 

156 

157 headers = yield from parse_headers(read_line) 

158 

159 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 

160 

161 if "Transfer-Encoding" in headers: 

162 raise NotImplementedError("transfer codings aren't supported") 

163 

164 if "Content-Length" in headers: 

165 # Some devices send a Content-Length header with a value of 0. 

166 # This raises ValueError if Content-Length isn't an integer too. 

167 if int(headers["Content-Length"]) != 0: 

168 raise ValueError("unsupported request body") 

169 

170 return cls(path, headers) 

171 

172 def serialize(self) -> bytes: 

173 """ 

174 Serialize a WebSocket handshake request. 

175 

176 """ 

177 # Since the request line and headers only contain ASCII characters, 

178 # we can keep this simple. 

179 request = f"GET {self.path} HTTP/1.1\r\n".encode() 

180 request += self.headers.serialize() 

181 return request 

182 

183 

184@dataclasses.dataclass 

185class Response: 

186 """ 

187 WebSocket handshake response. 

188 

189 Attributes: 

190 status_code: Response code. 

191 reason_phrase: Response reason. 

192 headers: Response headers. 

193 body: Response body. 

194 

195 """ 

196 

197 status_code: int 

198 reason_phrase: str 

199 headers: Headers 

200 body: bytes | bytearray = b"" 

201 

202 _exception: Exception | None = None 

203 

204 @property 

205 def exception(self) -> Exception | None: # pragma: no cover 

206 warnings.warn( # deprecated in 10.3 - 2022-04-17 

207 "Response.exception is deprecated; " 

208 "use ClientProtocol.handshake_exc instead", 

209 DeprecationWarning, 

210 ) 

211 return self._exception 

212 

213 @classmethod 

214 def parse( 

215 cls, 

216 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

217 read_exact: Callable[[int], Generator[None, None, bytes | bytearray]], 

218 read_to_eof: Callable[[int], Generator[None, None, bytes | bytearray]], 

219 proxy: bool = False, 

220 ) -> Generator[None, None, Response]: 

221 """ 

222 Parse a WebSocket handshake response. 

223 

224 This is a generator-based coroutine. 

225 

226 The reason phrase and headers are expected to contain only ASCII 

227 characters. Other characters are represented with surrogate escapes. 

228 

229 Args: 

230 read_line: Generator-based coroutine that reads a LF-terminated 

231 line or raises an exception if there isn't enough data. 

232 read_exact: Generator-based coroutine that reads the requested 

233 bytes or raises an exception if there isn't enough data. 

234 read_to_eof: Generator-based coroutine that reads until the end 

235 of the stream. 

236 

237 Raises: 

238 EOFError: If the connection is closed without a full HTTP response. 

239 SecurityError: If the response exceeds a security limit. 

240 LookupError: If the response isn't well formatted. 

241 ValueError: If the response isn't well formatted. 

242 

243 """ 

244 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2 

245 

246 try: 

247 status_line = yield from parse_line(read_line) 

248 except EOFError as exc: 

249 raise EOFError("connection closed while reading HTTP status line") from exc 

250 

251 try: 

252 protocol, raw_status_code, raw_reason = status_line.split(b" ", 2) 

253 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

254 raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None 

255 if proxy: # some proxies still use HTTP/1.0 

256 if protocol not in [b"HTTP/1.1", b"HTTP/1.0"]: 

257 raise ValueError( 

258 f"unsupported protocol; expected HTTP/1.1 or HTTP/1.0: " 

259 f"{d(status_line)}" 

260 ) 

261 else: 

262 if protocol != b"HTTP/1.1": 

263 raise ValueError( 

264 f"unsupported protocol; expected HTTP/1.1: {d(status_line)}" 

265 ) 

266 try: 

267 status_code = int(raw_status_code) 

268 except ValueError: # invalid literal for int() with base 10 

269 raise ValueError( 

270 f"invalid status code; expected integer; got {d(raw_status_code)}" 

271 ) from None 

272 if not 100 <= status_code < 600: 

273 raise ValueError( 

274 f"invalid status code; expected 100–599; got {d(raw_status_code)}" 

275 ) 

276 if not _value_re.fullmatch(raw_reason): 

277 raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") 

278 

279 # RFC 2616 implies ISO-8859-1. It's easy to reverse and cannot crash. 

280 # Non-ASCII never worked reliably and the reason isn't useful anyway. 

281 reason = raw_reason.decode("iso-8859-1") 

282 

283 headers = yield from parse_headers(read_line) 

284 

285 body: bytes | bytearray 

286 if proxy: 

287 body = b"" 

288 else: 

289 body = yield from read_body( 

290 status_code, headers, read_line, read_exact, read_to_eof 

291 ) 

292 

293 return cls(status_code, reason, headers, body) 

294 

295 def serialize(self) -> bytes: 

296 """ 

297 Serialize a WebSocket handshake response. 

298 

299 """ 

300 # Since the status line and headers only contain ASCII characters, 

301 # we can keep this simple. 

302 response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() 

303 response += self.headers.serialize() 

304 response += self.body 

305 return response 

306 

307 

308def parse_line( 

309 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

310) -> Generator[None, None, bytes | bytearray]: 

311 """ 

312 Parse a single line. 

313 

314 CRLF is stripped from the return value. 

315 

316 Args: 

317 read_line: Generator-based coroutine that reads a LF-terminated line 

318 or raises an exception if there isn't enough data. 

319 

320 Raises: 

321 EOFError: If the connection is closed without a CRLF. 

322 SecurityError: If the response exceeds a security limit. 

323 

324 """ 

325 try: 

326 line = yield from read_line(MAX_LINE_LENGTH) 

327 except RuntimeError: 

328 raise SecurityError("line too long") 

329 # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5 

330 if not line.endswith(b"\r\n"): 

331 raise EOFError("line without CRLF") 

332 return line[:-2] 

333 

334 

335def parse_headers( 

336 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

337) -> Generator[None, None, Headers]: 

338 """ 

339 Parse HTTP headers. 

340 

341 Non-ASCII characters are represented with surrogate escapes. 

342 

343 Args: 

344 read_line: Generator-based coroutine that reads a LF-terminated line 

345 or raises an exception if there isn't enough data. 

346 

347 Raises: 

348 EOFError: If the connection is closed without complete headers. 

349 SecurityError: If the request exceeds a security limit. 

350 ValueError: If the request isn't well formatted. 

351 

352 """ 

353 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2 

354 

355 # We don't attempt to support obsolete line folding. 

356 

357 headers = Headers() 

358 for _ in range(MAX_NUM_HEADERS + 1): 

359 try: 

360 line = yield from parse_line(read_line) 

361 except EOFError as exc: 

362 raise EOFError("connection closed while reading HTTP headers") from exc 

363 if line == b"": 

364 break 

365 

366 try: 

367 raw_name, raw_value = line.split(b":", 1) 

368 except ValueError: # not enough values to unpack (expected 2, got 1) 

369 raise ValueError(f"invalid HTTP header line: {d(line)}") from None 

370 if not _token_re.fullmatch(raw_name): 

371 raise ValueError(f"invalid HTTP header name: {d(raw_name)}") 

372 raw_value = raw_value.strip(b" \t") 

373 if not _value_re.fullmatch(raw_value): 

374 raise ValueError(f"invalid HTTP header value: {d(raw_value)}") 

375 

376 name = raw_name.decode("ascii") # guaranteed to be ASCII at this point 

377 # Headers should be ASCII. Section 5.5 of RFC 9110 says: "Historically, 

378 # HTTP allowed field content with text in the ISO-8859-1 charset." 

379 # It's easy to reverse and cannot crash, making it a decent choice. 

380 value = raw_value.decode("iso-8859-1") 

381 

382 # Since we just validated raw_value, we don't need to revalidate it. 

383 headers.set_insecure(name, value) 

384 

385 else: 

386 raise SecurityError("too many HTTP headers") 

387 

388 return headers 

389 

390 

391def read_body( 

392 status_code: int, 

393 headers: Headers, 

394 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

395 read_exact: Callable[[int], Generator[None, None, bytes | bytearray]], 

396 read_to_eof: Callable[[int], Generator[None, None, bytes | bytearray]], 

397) -> Generator[None, None, bytes | bytearray]: 

398 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 

399 

400 # Since websockets only does GET requests (no HEAD, no CONNECT), all 

401 # responses except 1xx, 204, and 304 include a message body. 

402 if 100 <= status_code < 200 or status_code == 204 or status_code == 304: 

403 return b"" 

404 

405 # MultipleValuesError is sufficiently unlikely that we don't attempt to 

406 # handle it when accessing headers. Instead we document that its parent 

407 # class, LookupError, may be raised. 

408 # Conversions from str to int are protected by sys.set_int_max_str_digits.. 

409 

410 elif (coding := headers.get("Transfer-Encoding")) is not None: 

411 if coding != "chunked": 

412 raise NotImplementedError(f"transfer coding {coding} isn't supported") 

413 

414 body = b"" 

415 while True: 

416 chunk_size_line = yield from parse_line(read_line) 

417 raw_chunk_size = chunk_size_line.split(b";", 1)[0] 

418 # Set a lower limit than default_max_str_digits; 1 EB is plenty. 

419 if len(raw_chunk_size) > 15: 

420 str_chunk_size = raw_chunk_size.decode(errors="backslashreplace") 

421 raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes") 

422 chunk_size = int(raw_chunk_size, 16) 

423 if chunk_size == 0: 

424 break 

425 if len(body) + chunk_size > MAX_BODY_SIZE: 

426 raise SecurityError( 

427 f"chunk too large: {chunk_size} bytes after {len(body)} bytes" 

428 ) 

429 body += yield from read_exact(chunk_size) 

430 if (yield from read_exact(2)) != b"\r\n": 

431 raise ValueError("chunk without CRLF") 

432 # Read the trailer. 

433 yield from parse_headers(read_line) 

434 return body 

435 

436 elif (raw_content_length := headers.get("Content-Length")) is not None: 

437 # Set a lower limit than default_max_str_digits; 1 EiB is plenty. 

438 if len(raw_content_length) > 18: 

439 raise SecurityError(f"body too large: {raw_content_length} bytes") 

440 content_length = int(raw_content_length) 

441 if content_length > MAX_BODY_SIZE: 

442 raise SecurityError(f"body too large: {content_length} bytes") 

443 return (yield from read_exact(content_length)) 

444 

445 else: 

446 try: 

447 return (yield from read_to_eof(MAX_BODY_SIZE)) 

448 except RuntimeError: 

449 raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")