Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/websockets/http11.py: 87%

129 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:20 +0000

1from __future__ import annotations 

2 

3import dataclasses 

4import re 

5import warnings 

6from typing import Callable, Generator, Optional 

7 

8from . import datastructures, exceptions 

9 

10 

11# Maximum total size of headers is around 128 * 8 KiB = 1 MiB. 

12MAX_HEADERS = 128 

13 

14# Limit request line and header lines. 8KiB is the most common default 

15# configuration of popular HTTP servers. 

16MAX_LINE = 8192 

17 

18# Support for HTTP response bodies is intended to read an error message 

19# returned by a server. It isn't designed to perform large file transfers. 

20MAX_BODY = 2**20 # 1 MiB 

21 

22 

23def d(value: bytes) -> str: 

24 """ 

25 Decode a bytestring for interpolating into an error message. 

26 

27 """ 

28 return value.decode(errors="backslashreplace") 

29 

30 

31# See https://www.rfc-editor.org/rfc/rfc7230.html#appendix-B. 

32 

33# Regex for validating header names. 

34 

35_token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+") 

36 

37# Regex for validating header values. 

38 

39# We don't attempt to support obsolete line folding. 

40 

41# Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff). 

42 

43# The ABNF is complicated because it attempts to express that optional 

44# whitespace is ignored. We strip whitespace and don't revalidate that. 

45 

46# See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 

47 

48_value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*") 

49 

50 

51@dataclasses.dataclass 

52class Request: 

53 """ 

54 WebSocket handshake request. 

55 

56 Attributes: 

57 path: Request path, including optional query. 

58 headers: Request headers. 

59 """ 

60 

61 path: str 

62 headers: datastructures.Headers 

63 # body isn't useful is the context of this library. 

64 

65 _exception: Optional[Exception] = None 

66 

67 @property 

68 def exception(self) -> Optional[Exception]: # pragma: no cover 

69 warnings.warn( 

70 "Request.exception is deprecated; " 

71 "use ServerProtocol.handshake_exc instead", 

72 DeprecationWarning, 

73 ) 

74 return self._exception 

75 

76 @classmethod 

77 def parse( 

78 cls, 

79 read_line: Callable[[int], Generator[None, None, bytes]], 

80 ) -> Generator[None, None, Request]: 

81 """ 

82 Parse a WebSocket handshake request. 

83 

84 This is a generator-based coroutine. 

85 

86 The request path isn't URL-decoded or validated in any way. 

87 

88 The request path and headers are expected to contain only ASCII 

89 characters. Other characters are represented with surrogate escapes. 

90 

91 :meth:`parse` doesn't attempt to read the request body because 

92 WebSocket handshake requests don't have one. If the request contains a 

93 body, it may be read from the data stream after :meth:`parse` returns. 

94 

95 Args: 

96 read_line: generator-based coroutine that reads a LF-terminated 

97 line or raises an exception if there isn't enough data 

98 

99 Raises: 

100 EOFError: if the connection is closed without a full HTTP request. 

101 SecurityError: if the request exceeds a security limit. 

102 ValueError: if the request isn't well formatted. 

103 

104 """ 

105 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.1 

106 

107 # Parsing is simple because fixed values are expected for method and 

108 # version and because path isn't checked. Since WebSocket software tends 

109 # to implement HTTP/1.1 strictly, there's little need for lenient parsing. 

110 

111 try: 

112 request_line = yield from parse_line(read_line) 

113 except EOFError as exc: 

114 raise EOFError("connection closed while reading HTTP request line") from exc 

115 

116 try: 

117 method, raw_path, version = request_line.split(b" ", 2) 

118 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

119 raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None 

120 

121 if method != b"GET": 

122 raise ValueError(f"unsupported HTTP method: {d(method)}") 

123 if version != b"HTTP/1.1": 

124 raise ValueError(f"unsupported HTTP version: {d(version)}") 

125 path = raw_path.decode("ascii", "surrogateescape") 

126 

127 headers = yield from parse_headers(read_line) 

128 

129 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3 

130 

131 if "Transfer-Encoding" in headers: 

132 raise NotImplementedError("transfer codings aren't supported") 

133 

134 if "Content-Length" in headers: 

135 raise ValueError("unsupported request body") 

136 

137 return cls(path, headers) 

138 

139 def serialize(self) -> bytes: 

140 """ 

141 Serialize a WebSocket handshake request. 

142 

143 """ 

144 # Since the request line and headers only contain ASCII characters, 

145 # we can keep this simple. 

146 request = f"GET {self.path} HTTP/1.1\r\n".encode() 

147 request += self.headers.serialize() 

148 return request 

149 

150 

151@dataclasses.dataclass 

152class Response: 

153 """ 

154 WebSocket handshake response. 

155 

156 Attributes: 

157 status_code: Response code. 

158 reason_phrase: Response reason. 

159 headers: Response headers. 

160 body: Response body, if any. 

161 

162 """ 

163 

164 status_code: int 

165 reason_phrase: str 

166 headers: datastructures.Headers 

167 body: Optional[bytes] = None 

168 

169 _exception: Optional[Exception] = None 

170 

171 @property 

172 def exception(self) -> Optional[Exception]: # pragma: no cover 

173 warnings.warn( 

174 "Response.exception is deprecated; " 

175 "use ClientProtocol.handshake_exc instead", 

176 DeprecationWarning, 

177 ) 

178 return self._exception 

179 

180 @classmethod 

181 def parse( 

182 cls, 

183 read_line: Callable[[int], Generator[None, None, bytes]], 

184 read_exact: Callable[[int], Generator[None, None, bytes]], 

185 read_to_eof: Callable[[int], Generator[None, None, bytes]], 

186 ) -> Generator[None, None, Response]: 

187 """ 

188 Parse a WebSocket handshake response. 

189 

190 This is a generator-based coroutine. 

191 

192 The reason phrase and headers are expected to contain only ASCII 

193 characters. Other characters are represented with surrogate escapes. 

194 

195 Args: 

196 read_line: generator-based coroutine that reads a LF-terminated 

197 line or raises an exception if there isn't enough data. 

198 read_exact: generator-based coroutine that reads the requested 

199 bytes or raises an exception if there isn't enough data. 

200 read_to_eof: generator-based coroutine that reads until the end 

201 of the stream. 

202 

203 Raises: 

204 EOFError: if the connection is closed without a full HTTP response. 

205 SecurityError: if the response exceeds a security limit. 

206 LookupError: if the response isn't well formatted. 

207 ValueError: if the response isn't well formatted. 

208 

209 """ 

210 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.1.2 

211 

212 try: 

213 status_line = yield from parse_line(read_line) 

214 except EOFError as exc: 

215 raise EOFError("connection closed while reading HTTP status line") from exc 

216 

217 try: 

218 version, raw_status_code, raw_reason = status_line.split(b" ", 2) 

219 except ValueError: # not enough values to unpack (expected 3, got 1-2) 

220 raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None 

221 

222 if version != b"HTTP/1.1": 

223 raise ValueError(f"unsupported HTTP version: {d(version)}") 

224 try: 

225 status_code = int(raw_status_code) 

226 except ValueError: # invalid literal for int() with base 10 

227 raise ValueError( 

228 f"invalid HTTP status code: {d(raw_status_code)}" 

229 ) from None 

230 if not 100 <= status_code < 1000: 

231 raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}") 

232 if not _value_re.fullmatch(raw_reason): 

233 raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}") 

234 reason = raw_reason.decode() 

235 

236 headers = yield from parse_headers(read_line) 

237 

238 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3 

239 

240 if "Transfer-Encoding" in headers: 

241 raise NotImplementedError("transfer codings aren't supported") 

242 

243 # Since websockets only does GET requests (no HEAD, no CONNECT), all 

244 # responses except 1xx, 204, and 304 include a message body. 

245 if 100 <= status_code < 200 or status_code == 204 or status_code == 304: 

246 body = None 

247 else: 

248 content_length: Optional[int] 

249 try: 

250 # MultipleValuesError is sufficiently unlikely that we don't 

251 # attempt to handle it. Instead we document that its parent 

252 # class, LookupError, may be raised. 

253 raw_content_length = headers["Content-Length"] 

254 except KeyError: 

255 content_length = None 

256 else: 

257 content_length = int(raw_content_length) 

258 

259 if content_length is None: 

260 try: 

261 body = yield from read_to_eof(MAX_BODY) 

262 except RuntimeError: 

263 raise exceptions.SecurityError( 

264 f"body too large: over {MAX_BODY} bytes" 

265 ) 

266 elif content_length > MAX_BODY: 

267 raise exceptions.SecurityError( 

268 f"body too large: {content_length} bytes" 

269 ) 

270 else: 

271 body = yield from read_exact(content_length) 

272 

273 return cls(status_code, reason, headers, body) 

274 

275 def serialize(self) -> bytes: 

276 """ 

277 Serialize a WebSocket handshake response. 

278 

279 """ 

280 # Since the status line and headers only contain ASCII characters, 

281 # we can keep this simple. 

282 response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode() 

283 response += self.headers.serialize() 

284 if self.body is not None: 

285 response += self.body 

286 return response 

287 

288 

289def parse_headers( 

290 read_line: Callable[[int], Generator[None, None, bytes]], 

291) -> Generator[None, None, datastructures.Headers]: 

292 """ 

293 Parse HTTP headers. 

294 

295 Non-ASCII characters are represented with surrogate escapes. 

296 

297 Args: 

298 read_line: generator-based coroutine that reads a LF-terminated line 

299 or raises an exception if there isn't enough data. 

300 

301 Raises: 

302 EOFError: if the connection is closed without complete headers. 

303 SecurityError: if the request exceeds a security limit. 

304 ValueError: if the request isn't well formatted. 

305 

306 """ 

307 # https://www.rfc-editor.org/rfc/rfc7230.html#section-3.2 

308 

309 # We don't attempt to support obsolete line folding. 

310 

311 headers = datastructures.Headers() 

312 for _ in range(MAX_HEADERS + 1): 

313 try: 

314 line = yield from parse_line(read_line) 

315 except EOFError as exc: 

316 raise EOFError("connection closed while reading HTTP headers") from exc 

317 if line == b"": 

318 break 

319 

320 try: 

321 raw_name, raw_value = line.split(b":", 1) 

322 except ValueError: # not enough values to unpack (expected 2, got 1) 

323 raise ValueError(f"invalid HTTP header line: {d(line)}") from None 

324 if not _token_re.fullmatch(raw_name): 

325 raise ValueError(f"invalid HTTP header name: {d(raw_name)}") 

326 raw_value = raw_value.strip(b" \t") 

327 if not _value_re.fullmatch(raw_value): 

328 raise ValueError(f"invalid HTTP header value: {d(raw_value)}") 

329 

330 name = raw_name.decode("ascii") # guaranteed to be ASCII at this point 

331 value = raw_value.decode("ascii", "surrogateescape") 

332 headers[name] = value 

333 

334 else: 

335 raise exceptions.SecurityError("too many HTTP headers") 

336 

337 return headers 

338 

339 

340def parse_line( 

341 read_line: Callable[[int], Generator[None, None, bytes]], 

342) -> Generator[None, None, bytes]: 

343 """ 

344 Parse a single line. 

345 

346 CRLF is stripped from the return value. 

347 

348 Args: 

349 read_line: generator-based coroutine that reads a LF-terminated line 

350 or raises an exception if there isn't enough data. 

351 

352 Raises: 

353 EOFError: if the connection is closed without a CRLF. 

354 SecurityError: if the response exceeds a security limit. 

355 

356 """ 

357 try: 

358 line = yield from read_line(MAX_LINE) 

359 except RuntimeError: 

360 raise exceptions.SecurityError("line too long") 

361 # Not mandatory but safe - https://www.rfc-editor.org/rfc/rfc7230.html#section-3.5 

362 if not line.endswith(b"\r\n"): 

363 raise EOFError("line without CRLF") 

364 return line[:-2]