Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/websockets/http11.py: 75%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

161 statements  

1from __future__ import annotations 

2 

3import dataclasses 

4import os 

5import re 

6import sys 

7import warnings 

8from collections.abc import Generator 

9from typing import Callable 

10 

11from .datastructures import Headers 

12from .exceptions import SecurityError 

13from .version import version as websockets_version 

14 

15 

16__all__ = [ 

17 "SERVER", 

18 "USER_AGENT", 

19 "Request", 

20 "Response", 

21] 

22 

23 

24PYTHON_VERSION = "{}.{}".format(*sys.version_info) 

25 

26# User-Agent header for HTTP requests. 

27USER_AGENT = os.environ.get( 

28 "WEBSOCKETS_USER_AGENT", 

29 f"Python/{PYTHON_VERSION} websockets/{websockets_version}", 

30) 

31 

32# Server header for HTTP responses. 

33SERVER = os.environ.get( 

34 "WEBSOCKETS_SERVER", 

35 f"Python/{PYTHON_VERSION} websockets/{websockets_version}", 

36) 

37 

38# Maximum total size of headers is around 128 * 8 KiB = 1 MiB. 

39MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128")) 

40 

41# Limit request line and header lines. 8KiB is the most common default 

42# configuration of popular HTTP servers. 

43MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192")) 

44 

45# Support for HTTP response bodies is intended to read an error message 

46# returned by a server. It isn't designed to perform large file transfers. 

47MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) # 1 MiB 

48 

49 

50def d(value: bytes) -> str: 

51 """ 

52 Decode a bytestring for interpolating into an error message. 

53 

54 """ 

55 return value.decode(errors="backslashreplace") 

56 

57 

58# See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B. 

59 

60# Regex for validating header names. 

61 

62_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") 

63 

64# Regex for validating header values. 

65 

66# We don't attempt to support obsolete line folding. 

67 

68# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff). 

69 

70# The ABNF is complicated because it attempts to express that optional 

71# whitespace is ignored. We strip whitespace and don't revalidate that. 

72 

73# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 

74 

75_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") 

76 

77 

78@dataclasses.dataclass 

79class Request: 

80 """ 

81 WebSocket handshake request. 

82 

83 Attributes: 

84 path: Request path, including optional query. 

85 headers: Request headers. 

86 """ 

87 

88 path: str 

89 headers: Headers 

90 # body isn't useful is the context of this library. 

91 

92 _exception: Exception | None = None 

93 

94 @property 

95 def exception(self) -> Exception | None: # pragma: no cover 

96 warnings.warn( # deprecated in 10.3 - 2022-04-17 

97 "Request.exception is deprecated; use ServerProtocol.handshake_exc instead", 

98 DeprecationWarning, 

99 ) 

100 return self._exception 

101 

102 @classmethod 

103 def parse( 

104 cls, 

105 read_line: Callable[[int], Generator[None, None, bytes]], 

106 ) -> Generator[None, None, Request]: 

107 """ 

108 Parse a WebSocket handshake request. 

109 

110 This is a generator-based coroutine. 

111 

112 The request path isn't URL-decoded or validated in any way. 

113 

114 The request path and headers are expected to contain only ASCII 

115 characters. Other characters are represented with surrogate escapes. 

116 

117 :meth:`parse` doesn't attempt to read the request body because 

118 WebSocket handshake requests don't have one. If the request contains a 

119 body, it may be read from the data stream after :meth:`parse` returns. 

120 

121 Args: 

122 read_line: Generator-based coroutine that reads a LF-terminated 

123 line or raises an exception if there isn't enough data 

124 

125 Raises: 

126 EOFError: If the connection is closed without a full HTTP request. 

127 SecurityError: If the request exceeds a security limit. 

128 ValueError: If the request isn't well formatted. 

129 

130 """ 

131 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 

132 

133 # Parsing is simple because fixed values are expected for method and 

134 # version and because path isn't checked. Since WebSocket software tends 

135 # to implement HTTP/1.1 strictly, there's little need for lenient parsing. 

136 

137 try: 

138 request_line = yield from parse_line(read_line) 

139 except EOFError as exc: 

140 raise EOFError("connection closed while reading HTTP request line") from exc 

141 

142 try: 

143 method, raw_path, protocol = request_line.split(b" ", 2) 

144 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

145 raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None 

146 if protocol != b"HTTP/1.1": 

147 raise ValueError( 

148 f"unsupported protocol; expected HTTP/1.1: {d(request_line)}" 

149 ) 

150 if method != b"GET": 

151 raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}") 

152 path = raw_path.decode("ascii", "surrogateescape") 

153 

154 headers = yield from parse_headers(read_line) 

155 

156 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 

157 

158 if "Transfer-Encoding" in headers: 

159 raise NotImplementedError("transfer codings aren't supported") 

160 

161 if "Content-Length" in headers: 

162 # Some devices send a Content-Length header with a value of 0. 

163 # This raises ValueError if Content-Length isn't an integer too. 

164 if int(headers["Content-Length"]) != 0: 

165 raise ValueError("unsupported request body") 

166 

167 return cls(path, headers) 

168 

169 def serialize(self) -> bytes: 

170 """ 

171 Serialize a WebSocket handshake request. 

172 

173 """ 

174 # Since the request line and headers only contain ASCII characters, 

175 # we can keep this simple. 

176 request = f"GET {self.path} HTTP/1.1\r\n".encode() 

177 request += self.headers.serialize() 

178 return request 

179 

180 

181@dataclasses.dataclass 

182class Response: 

183 """ 

184 WebSocket handshake response. 

185 

186 Attributes: 

187 status_code: Response code. 

188 reason_phrase: Response reason. 

189 headers: Response headers. 

190 body: Response body. 

191 

192 """ 

193 

194 status_code: int 

195 reason_phrase: str 

196 headers: Headers 

197 body: bytes = b"" 

198 

199 _exception: Exception | None = None 

200 

201 @property 

202 def exception(self) -> Exception | None: # pragma: no cover 

203 warnings.warn( # deprecated in 10.3 - 2022-04-17 

204 "Response.exception is deprecated; " 

205 "use ClientProtocol.handshake_exc instead", 

206 DeprecationWarning, 

207 ) 

208 return self._exception 

209 

210 @classmethod 

211 def parse( 

212 cls, 

213 read_line: Callable[[int], Generator[None, None, bytes]], 

214 read_exact: Callable[[int], Generator[None, None, bytes]], 

215 read_to_eof: Callable[[int], Generator[None, None, bytes]], 

216 proxy: bool = False, 

217 ) -> Generator[None, None, Response]: 

218 """ 

219 Parse a WebSocket handshake response. 

220 

221 This is a generator-based coroutine. 

222 

223 The reason phrase and headers are expected to contain only ASCII 

224 characters. Other characters are represented with surrogate escapes. 

225 

226 Args: 

227 read_line: Generator-based coroutine that reads a LF-terminated 

228 line or raises an exception if there isn't enough data. 

229 read_exact: Generator-based coroutine that reads the requested 

230 bytes or raises an exception if there isn't enough data. 

231 read_to_eof: Generator-based coroutine that reads until the end 

232 of the stream. 

233 

234 Raises: 

235 EOFError: If the connection is closed without a full HTTP response. 

236 SecurityError: If the response exceeds a security limit. 

237 LookupError: If the response isn't well formatted. 

238 ValueError: If the response isn't well formatted. 

239 

240 """ 

241 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2 

242 

243 try: 

244 status_line = yield from parse_line(read_line) 

245 except EOFError as exc: 

246 raise EOFError("connection closed while reading HTTP status line") from exc 

247 

248 try: 

249 protocol, raw_status_code, raw_reason = status_line.split(b" ", 2) 

250 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

251 raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None 

252 if proxy: # some proxies still use HTTP/1.0 

253 if protocol not in [b"HTTP/1.1", b"HTTP/1.0"]: 

254 raise ValueError( 

255 f"unsupported protocol; expected HTTP/1.1 or HTTP/1.0: " 

256 f"{d(status_line)}" 

257 ) 

258 else: 

259 if protocol != b"HTTP/1.1": 

260 raise ValueError( 

261 f"unsupported protocol; expected HTTP/1.1: {d(status_line)}" 

262 ) 

263 try: 

264 status_code = int(raw_status_code) 

265 except ValueError: # invalid literal for int() with base 10 

266 raise ValueError( 

267 f"invalid status code; expected integer; got {d(raw_status_code)}" 

268 ) from None 

269 if not 100 <= status_code < 600: 

270 raise ValueError( 

271 f"invalid status code; expected 100–599; got {d(raw_status_code)}" 

272 ) 

273 if not _value_re.fullmatch(raw_reason): 

274 raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") 

275 reason = raw_reason.decode("ascii", "surrogateescape") 

276 

277 headers = yield from parse_headers(read_line) 

278 

279 if proxy: 

280 body = b"" 

281 else: 

282 body = yield from read_body( 

283 status_code, headers, read_line, read_exact, read_to_eof 

284 ) 

285 

286 return cls(status_code, reason, headers, body) 

287 

288 def serialize(self) -> bytes: 

289 """ 

290 Serialize a WebSocket handshake response. 

291 

292 """ 

293 # Since the status line and headers only contain ASCII characters, 

294 # we can keep this simple. 

295 response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() 

296 response += self.headers.serialize() 

297 response += self.body 

298 return response 

299 

300 

301def parse_line( 

302 read_line: Callable[[int], Generator[None, None, bytes]], 

303) -> Generator[None, None, bytes]: 

304 """ 

305 Parse a single line. 

306 

307 CRLF is stripped from the return value. 

308 

309 Args: 

310 read_line: Generator-based coroutine that reads a LF-terminated line 

311 or raises an exception if there isn't enough data. 

312 

313 Raises: 

314 EOFError: If the connection is closed without a CRLF. 

315 SecurityError: If the response exceeds a security limit. 

316 

317 """ 

318 try: 

319 line = yield from read_line(MAX_LINE_LENGTH) 

320 except RuntimeError: 

321 raise SecurityError("line too long") 

322 # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5 

323 if not line.endswith(b"\r\n"): 

324 raise EOFError("line without CRLF") 

325 return line[:-2] 

326 

327 

328def parse_headers( 

329 read_line: Callable[[int], Generator[None, None, bytes]], 

330) -> Generator[None, None, Headers]: 

331 """ 

332 Parse HTTP headers. 

333 

334 Non-ASCII characters are represented with surrogate escapes. 

335 

336 Args: 

337 read_line: Generator-based coroutine that reads a LF-terminated line 

338 or raises an exception if there isn't enough data. 

339 

340 Raises: 

341 EOFError: If the connection is closed without complete headers. 

342 SecurityError: If the request exceeds a security limit. 

343 ValueError: If the request isn't well formatted. 

344 

345 """ 

346 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2 

347 

348 # We don't attempt to support obsolete line folding. 

349 

350 headers = Headers() 

351 for _ in range(MAX_NUM_HEADERS + 1): 

352 try: 

353 line = yield from parse_line(read_line) 

354 except EOFError as exc: 

355 raise EOFError("connection closed while reading HTTP headers") from exc 

356 if line == b"": 

357 break 

358 

359 try: 

360 raw_name, raw_value = line.split(b":", 1) 

361 except ValueError: # not enough values to unpack (expected 2, got 1) 

362 raise ValueError(f"invalid HTTP header line: {d(line)}") from None 

363 if not _token_re.fullmatch(raw_name): 

364 raise ValueError(f"invalid HTTP header name: {d(raw_name)}") 

365 raw_value = raw_value.strip(b" \t") 

366 if not _value_re.fullmatch(raw_value): 

367 raise ValueError(f"invalid HTTP header value: {d(raw_value)}") 

368 

369 name = raw_name.decode("ascii") # guaranteed to be ASCII at this point 

370 value = raw_value.decode("ascii", "surrogateescape") 

371 headers[name] = value 

372 

373 else: 

374 raise SecurityError("too many HTTP headers") 

375 

376 return headers 

377 

378 

379def read_body( 

380 status_code: int, 

381 headers: Headers, 

382 read_line: Callable[[int], Generator[None, None, bytes]], 

383 read_exact: Callable[[int], Generator[None, None, bytes]], 

384 read_to_eof: Callable[[int], Generator[None, None, bytes]], 

385) -> Generator[None, None, bytes]: 

386 # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 

387 

388 # Since websockets only does GET requests (no HEAD, no CONNECT), all 

389 # responses except 1xx, 204, and 304 include a message body. 

390 if 100 <= status_code < 200 or status_code == 204 or status_code == 304: 

391 return b"" 

392 

393 # MultipleValuesError is sufficiently unlikely that we don't attempt to 

394 # handle it when accessing headers. Instead we document that its parent 

395 # class, LookupError, may be raised. 

396 # Conversions from str to int are protected by sys.set_int_max_str_digits.. 

397 

398 elif (coding := headers.get("Transfer-Encoding")) is not None: 

399 if coding != "chunked": 

400 raise NotImplementedError(f"transfer coding {coding} isn't supported") 

401 

402 body = b"" 

403 while True: 

404 chunk_size_line = yield from parse_line(read_line) 

405 raw_chunk_size = chunk_size_line.split(b";", 1)[0] 

406 # Set a lower limit than default_max_str_digits; 1 EB is plenty. 

407 if len(raw_chunk_size) > 15: 

408 str_chunk_size = raw_chunk_size.decode(errors="backslashreplace") 

409 raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes") 

410 chunk_size = int(raw_chunk_size, 16) 

411 if chunk_size == 0: 

412 break 

413 if len(body) + chunk_size > MAX_BODY_SIZE: 

414 raise SecurityError( 

415 f"chunk too large: {chunk_size} bytes after {len(body)} bytes" 

416 ) 

417 body += yield from read_exact(chunk_size) 

418 if (yield from read_exact(2)) != b"\r\n": 

419 raise ValueError("chunk without CRLF") 

420 # Read the trailer. 

421 yield from parse_headers(read_line) 

422 return body 

423 

424 elif (raw_content_length := headers.get("Content-Length")) is not None: 

425 # Set a lower limit than default_max_str_digits; 1 EiB is plenty. 

426 if len(raw_content_length) > 18: 

427 raise SecurityError(f"body too large: {raw_content_length} bytes") 

428 content_length = int(raw_content_length) 

429 if content_length > MAX_BODY_SIZE: 

430 raise SecurityError(f"body too large: {content_length} bytes") 

431 return (yield from read_exact(content_length)) 

432 

433 else: 

434 try: 

435 return (yield from read_to_eof(MAX_BODY_SIZE)) 

436 except RuntimeError: 

437 raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")