Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/websockets/http11.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

161 statements  

1from __future__ import annotations 

2 

3import dataclasses 

4import os 

5import re 

6import sys 

7import warnings 

8from collections.abc import Generator 

9from typing import Callable 

10 

11from .datastructures import Headers 

12from .exceptions import SecurityError 

13from .version import version as websockets_version 

14 

15 

16__all__ = [ 

17 "SERVER", 

18 "USER_AGENT", 

19 "Request", 

20 "Response", 

21] 

22 

23 

24PYTHON_VERSION = "{}.{}".format(*sys.version_info) 

25 

26# User-Agent header for HTTP requests. 

27USER_AGENT = os.environ.get( 

28 "WEBSOCKETS_USER_AGENT", 

29 f"Python/{PYTHON_VERSION} websockets/{websockets_version}", 

30) 

31 

32# Server header for HTTP responses. 

33SERVER = os.environ.get( 

34 "WEBSOCKETS_SERVER", 

35 f"Python/{PYTHON_VERSION} websockets/{websockets_version}", 

36) 

37 

38# Maximum total size of headers is around 128 * 8 KiB = 1 MiB. 

39MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128")) 

40 

41# Limit request line and header lines. 8KiB is the most common default 

42# configuration of popular HTTP servers. 

43MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192")) 

44 

45# Support for HTTP response bodies is intended to read an error message 

46# returned by a server. It isn't designed to perform large file transfers. 

47MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) # 1 MiB 

48 

49 

50def d(value: bytes | bytearray) -> str: 

51 """ 

52 Decode a bytestring for interpolating into an error message. 

53 

54 """ 

55 return value.decode(errors="backslashreplace") 

56 

57 

58# See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B. 

59 

60# Regex for validating header names. 

61 

62_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") 

63 

64# Regex for validating header values. 

65 

66# We don't attempt to support obsolete line folding. 

67 

68# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff). 

69 

70# The ABNF is complicated because it attempts to express that optional 

71# whitespace is ignored. We strip whitespace and don't revalidate that. 

72 

73# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 

74 

75_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") 

76 

77 

78@dataclasses.dataclass 

79class Request: 

80 """ 

81 WebSocket handshake request. 

82 

83 Attributes: 

84 path: Request path, including optional query. 

85 headers: Request headers. 

86 """ 

87 

88 path: str 

89 headers: Headers 

90 # body isn't useful is the context of this library. 

91 

92 _exception: Exception | None = None 

93 

94 @property 

95 def exception(self) -> Exception | None: # pragma: no cover 

96 warnings.warn( # deprecated in 10.3 - 2022-04-17 

97 "Request.exception is deprecated; use ServerProtocol.handshake_exc instead", 

98 DeprecationWarning, 

99 ) 

100 return self._exception 

101 

102 @classmethod 

103 def parse( 

104 cls, 

105 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

106 ) -> Generator[None, None, Request]: 

107 """ 

108 Parse a WebSocket handshake request. 

109 

110 This is a generator-based coroutine. 

111 

112 The request path isn't URL-decoded or validated in any way. 

113 

114 The request path and headers are expected to contain only ASCII 

115 characters. Other characters are represented with surrogate escapes. 

116 

117 :meth:`parse` doesn't attempt to read the request body because 

118 WebSocket handshake requests don't have one. If the request contains a 

119 body, it may be read from the data stream after :meth:`parse` returns. 

120 

121 Args: 

122 read_line: Generator-based coroutine that reads a LF-terminated 

123 line or raises an exception if there isn't enough data 

124 

125 Raises: 

126 EOFError: If the connection is closed without a full HTTP request. 

127 SecurityError: If the request exceeds a security limit. 

128 ValueError: If the request isn't well formatted. 

129 

130 """ 

131 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 

132 

133 # Parsing is simple because fixed values are expected for method and 

134 # version and because path isn't checked. Since WebSocket software tends 

135 # to implement HTTP/1.1 strictly, there's little need for lenient parsing. 

136 

137 try: 

138 request_line = yield from parse_line(read_line) 

139 except EOFError as exc: 

140 raise EOFError("connection closed while reading HTTP request line") from exc 

141 

142 try: 

143 method, raw_path, protocol = request_line.split(b" ", 2) 

144 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

145 raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None 

146 if protocol != b"HTTP/1.1": 

147 raise ValueError( 

148 f"unsupported protocol; expected HTTP/1.1: {d(request_line)}" 

149 ) 

150 if method != b"GET": 

151 raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}") 

152 path = raw_path.decode("ascii", "surrogateescape") 

153 

154 headers = yield from parse_headers(read_line) 

155 

156 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 

157 

158 if "Transfer-Encoding" in headers: 

159 raise NotImplementedError("transfer codings aren't supported") 

160 

161 if "Content-Length" in headers: 

162 # Some devices send a Content-Length header with a value of 0. 

163 # This raises ValueError if Content-Length isn't an integer too. 

164 if int(headers["Content-Length"]) != 0: 

165 raise ValueError("unsupported request body") 

166 

167 return cls(path, headers) 

168 

169 def serialize(self) -> bytes: 

170 """ 

171 Serialize a WebSocket handshake request. 

172 

173 """ 

174 # Since the request line and headers only contain ASCII characters, 

175 # we can keep this simple. 

176 request = f"GET {self.path} HTTP/1.1\r\n".encode() 

177 request += self.headers.serialize() 

178 return request 

179 

180 

181@dataclasses.dataclass 

182class Response: 

183 """ 

184 WebSocket handshake response. 

185 

186 Attributes: 

187 status_code: Response code. 

188 reason_phrase: Response reason. 

189 headers: Response headers. 

190 body: Response body. 

191 

192 """ 

193 

194 status_code: int 

195 reason_phrase: str 

196 headers: Headers 

197 body: bytes | bytearray = b"" 

198 

199 _exception: Exception | None = None 

200 

201 @property 

202 def exception(self) -> Exception | None: # pragma: no cover 

203 warnings.warn( # deprecated in 10.3 - 2022-04-17 

204 "Response.exception is deprecated; " 

205 "use ClientProtocol.handshake_exc instead", 

206 DeprecationWarning, 

207 ) 

208 return self._exception 

209 

210 @classmethod 

211 def parse( 

212 cls, 

213 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

214 read_exact: Callable[[int], Generator[None, None, bytes | bytearray]], 

215 read_to_eof: Callable[[int], Generator[None, None, bytes | bytearray]], 

216 proxy: bool = False, 

217 ) -> Generator[None, None, Response]: 

218 """ 

219 Parse a WebSocket handshake response. 

220 

221 This is a generator-based coroutine. 

222 

223 The reason phrase and headers are expected to contain only ASCII 

224 characters. Other characters are represented with surrogate escapes. 

225 

226 Args: 

227 read_line: Generator-based coroutine that reads a LF-terminated 

228 line or raises an exception if there isn't enough data. 

229 read_exact: Generator-based coroutine that reads the requested 

230 bytes or raises an exception if there isn't enough data. 

231 read_to_eof: Generator-based coroutine that reads until the end 

232 of the stream. 

233 

234 Raises: 

235 EOFError: If the connection is closed without a full HTTP response. 

236 SecurityError: If the response exceeds a security limit. 

237 LookupError: If the response isn't well formatted. 

238 ValueError: If the response isn't well formatted. 

239 

240 """ 

241 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2 

242 

243 try: 

244 status_line = yield from parse_line(read_line) 

245 except EOFError as exc: 

246 raise EOFError("connection closed while reading HTTP status line") from exc 

247 

248 try: 

249 protocol, raw_status_code, raw_reason = status_line.split(b" ", 2) 

250 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

251 raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None 

252 if proxy: # some proxies still use HTTP/1.0 

253 if protocol not in [b"HTTP/1.1", b"HTTP/1.0"]: 

254 raise ValueError( 

255 f"unsupported protocol; expected HTTP/1.1 or HTTP/1.0: " 

256 f"{d(status_line)}" 

257 ) 

258 else: 

259 if protocol != b"HTTP/1.1": 

260 raise ValueError( 

261 f"unsupported protocol; expected HTTP/1.1: {d(status_line)}" 

262 ) 

263 try: 

264 status_code = int(raw_status_code) 

265 except ValueError: # invalid literal for int() with base 10 

266 raise ValueError( 

267 f"invalid status code; expected integer; got {d(raw_status_code)}" 

268 ) from None 

269 if not 100 <= status_code < 600: 

270 raise ValueError( 

271 f"invalid status code; expected 100–599; got {d(raw_status_code)}" 

272 ) 

273 if not _value_re.fullmatch(raw_reason): 

274 raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") 

275 reason = raw_reason.decode("ascii", "surrogateescape") 

276 

277 headers = yield from parse_headers(read_line) 

278 

279 body: bytes | bytearray 

280 if proxy: 

281 body = b"" 

282 else: 

283 body = yield from read_body( 

284 status_code, headers, read_line, read_exact, read_to_eof 

285 ) 

286 

287 return cls(status_code, reason, headers, body) 

288 

289 def serialize(self) -> bytes: 

290 """ 

291 Serialize a WebSocket handshake response. 

292 

293 """ 

294 # Since the status line and headers only contain ASCII characters, 

295 # we can keep this simple. 

296 response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() 

297 response += self.headers.serialize() 

298 response += self.body 

299 return response 

300 

301 

302def parse_line( 

303 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

304) -> Generator[None, None, bytes | bytearray]: 

305 """ 

306 Parse a single line. 

307 

308 CRLF is stripped from the return value. 

309 

310 Args: 

311 read_line: Generator-based coroutine that reads a LF-terminated line 

312 or raises an exception if there isn't enough data. 

313 

314 Raises: 

315 EOFError: If the connection is closed without a CRLF. 

316 SecurityError: If the response exceeds a security limit. 

317 

318 """ 

319 try: 

320 line = yield from read_line(MAX_LINE_LENGTH) 

321 except RuntimeError: 

322 raise SecurityError("line too long") 

323 # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5 

324 if not line.endswith(b"\r\n"): 

325 raise EOFError("line without CRLF") 

326 return line[:-2] 

327 

328 

329def parse_headers( 

330 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

331) -> Generator[None, None, Headers]: 

332 """ 

333 Parse HTTP headers. 

334 

335 Non-ASCII characters are represented with surrogate escapes. 

336 

337 Args: 

338 read_line: Generator-based coroutine that reads a LF-terminated line 

339 or raises an exception if there isn't enough data. 

340 

341 Raises: 

342 EOFError: If the connection is closed without complete headers. 

343 SecurityError: If the request exceeds a security limit. 

344 ValueError: If the request isn't well formatted. 

345 

346 """ 

347 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2 

348 

349 # We don't attempt to support obsolete line folding. 

350 

351 headers = Headers() 

352 for _ in range(MAX_NUM_HEADERS + 1): 

353 try: 

354 line = yield from parse_line(read_line) 

355 except EOFError as exc: 

356 raise EOFError("connection closed while reading HTTP headers") from exc 

357 if line == b"": 

358 break 

359 

360 try: 

361 raw_name, raw_value = line.split(b":", 1) 

362 except ValueError: # not enough values to unpack (expected 2, got 1) 

363 raise ValueError(f"invalid HTTP header line: {d(line)}") from None 

364 if not _token_re.fullmatch(raw_name): 

365 raise ValueError(f"invalid HTTP header name: {d(raw_name)}") 

366 raw_value = raw_value.strip(b" \t") 

367 if not _value_re.fullmatch(raw_value): 

368 raise ValueError(f"invalid HTTP header value: {d(raw_value)}") 

369 

370 name = raw_name.decode("ascii") # guaranteed to be ASCII at this point 

371 value = raw_value.decode("ascii", "surrogateescape") 

372 headers[name] = value 

373 

374 else: 

375 raise SecurityError("too many HTTP headers") 

376 

377 return headers 

378 

379 

380def read_body( 

381 status_code: int, 

382 headers: Headers, 

383 read_line: Callable[[int], Generator[None, None, bytes | bytearray]], 

384 read_exact: Callable[[int], Generator[None, None, bytes | bytearray]], 

385 read_to_eof: Callable[[int], Generator[None, None, bytes | bytearray]], 

386) -> Generator[None, None, bytes | bytearray]: 

387 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 

388 

389 # Since websockets only does GET requests (no HEAD, no CONNECT), all 

390 # responses except 1xx, 204, and 304 include a message body. 

391 if 100 <= status_code < 200 or status_code == 204 or status_code == 304: 

392 return b"" 

393 

394 # MultipleValuesError is sufficiently unlikely that we don't attempt to 

395 # handle it when accessing headers. Instead we document that its parent 

396 # class, LookupError, may be raised. 

397 # Conversions from str to int are protected by sys.set_int_max_str_digits.. 

398 

399 elif (coding := headers.get("Transfer-Encoding")) is not None: 

400 if coding != "chunked": 

401 raise NotImplementedError(f"transfer coding {coding} isn't supported") 

402 

403 body = b"" 

404 while True: 

405 chunk_size_line = yield from parse_line(read_line) 

406 raw_chunk_size = chunk_size_line.split(b";", 1)[0] 

407 # Set a lower limit than default_max_str_digits; 1 EB is plenty. 

408 if len(raw_chunk_size) > 15: 

409 str_chunk_size = raw_chunk_size.decode(errors="backslashreplace") 

410 raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes") 

411 chunk_size = int(raw_chunk_size, 16) 

412 if chunk_size == 0: 

413 break 

414 if len(body) + chunk_size > MAX_BODY_SIZE: 

415 raise SecurityError( 

416 f"chunk too large: {chunk_size} bytes after {len(body)} bytes" 

417 ) 

418 body += yield from read_exact(chunk_size) 

419 if (yield from read_exact(2)) != b"\r\n": 

420 raise ValueError("chunk without CRLF") 

421 # Read the trailer. 

422 yield from parse_headers(read_line) 

423 return body 

424 

425 elif (raw_content_length := headers.get("Content-Length")) is not None: 

426 # Set a lower limit than default_max_str_digits; 1 EiB is plenty. 

427 if len(raw_content_length) > 18: 

428 raise SecurityError(f"body too large: {raw_content_length} bytes") 

429 content_length = int(raw_content_length) 

430 if content_length > MAX_BODY_SIZE: 

431 raise SecurityError(f"body too large: {content_length} bytes") 

432 return (yield from read_exact(content_length)) 

433 

434 else: 

435 try: 

436 return (yield from read_to_eof(MAX_BODY_SIZE)) 

437 except RuntimeError: 

438 raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")