Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tornado/httputil.py: 29%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

460 statements  

1# 

2# Copyright 2009 Facebook 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); you may 

5# not use this file except in compliance with the License. You may obtain 

6# a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

13# License for the specific language governing permissions and limitations 

14# under the License. 

15 

16"""HTTP utility code shared by clients and servers. 

17 

18This module also defines the `HTTPServerRequest` class which is exposed 

19via `tornado.web.RequestHandler.request`. 

20""" 

21 

22import calendar 

23import collections.abc 

24import copy 

25import datetime 

26import email.utils 

27from functools import lru_cache 

28from http.client import responses 

29import http.cookies 

30import re 

31from ssl import SSLError 

32import time 

33import unicodedata 

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl 

35 

36from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode 

37from tornado.util import ObjectDict, unicode_type 

38 

39 

40# responses is unused in this file, but we re-export it to other files. 

41# Reference it so pyflakes doesn't complain. 

42responses 

43 

44import typing 

45from typing import ( 

46 Tuple, 

47 Iterable, 

48 List, 

49 Mapping, 

50 Iterator, 

51 Dict, 

52 Union, 

53 Optional, 

54 Awaitable, 

55 Generator, 

56 AnyStr, 

57) 

58 

59if typing.TYPE_CHECKING: 

60 from typing import Deque # noqa: F401 

61 from asyncio import Future # noqa: F401 

62 import unittest # noqa: F401 

63 

64 # This can be done unconditionally in the base class of HTTPHeaders 

65 # after we drop support for Python 3.8. 

66 StrMutableMapping = collections.abc.MutableMapping[str, str] 

67else: 

68 StrMutableMapping = collections.abc.MutableMapping 

69 

70# To be used with str.strip() and related methods. 

71HTTP_WHITESPACE = " \t" 

72 

73# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits 

74# chars greater than \xFF (which may appear after decoding utf8). 

75_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]") 

76 

77 

78class _ABNF: 

79 """Class that holds a subset of ABNF rules from RFC 9110 and friends. 

80 

81 Class attributes are re.Pattern objects, with the same name as in the RFC 

82 (with hyphens changed to underscores). Currently contains only the subset 

83 we use (which is why this class is not public). Unfortunately the fields 

84 cannot be alphabetized as they are in the RFCs because of dependencies. 

85 """ 

86 

87 # RFC 3986 (URI) 

88 # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6 

89 # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though 

90 # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any 

91 # position, not only for their use in IPv6 literals. 

92 uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]") 

93 uri_sub_delims = re.compile(r"[!$&'()*+,;=]") 

94 uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}") 

95 uri_host = re.compile( 

96 rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*" 

97 ) 

98 uri_port = re.compile(r"[0-9]*") 

99 

100 # RFC 5234 (ABNF) 

101 VCHAR = re.compile(r"[\x21-\x7E]") 

102 

103 # RFC 9110 (HTTP Semantics) 

104 obs_text = re.compile(r"[\x80-\xFF]") 

105 field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})") 

106 # Not exactly from the RFC to simplify and combine field-content and field-value. 

107 field_value = re.compile( 

108 rf"|" 

109 rf"{field_vchar.pattern}|" 

110 rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}" 

111 ) 

112 tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]") 

113 token = re.compile(rf"{tchar.pattern}+") 

114 field_name = token 

115 method = token 

116 host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?") 

117 

118 # RFC 9112 (HTTP/1.1) 

119 HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]") 

120 reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+") 

121 # request_target delegates to the URI RFC 3986, which is complex and may be 

122 # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII 

123 # characters). Instead, we allow everything but control chars and whitespace. 

124 request_target = re.compile(rf"{field_vchar.pattern}+") 

125 request_line = re.compile( 

126 rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})" 

127 ) 

128 status_code = re.compile(r"[0-9]{3}") 

129 status_line = re.compile( 

130 rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?" 

131 ) 

132 

133 

134@lru_cache(1000) 

135def _normalize_header(name: str) -> str: 

136 """Map a header name to Http-Header-Case. 

137 

138 >>> _normalize_header("coNtent-TYPE") 

139 'Content-Type' 

140 """ 

141 return "-".join([w.capitalize() for w in name.split("-")]) 

142 

143 

144class HTTPHeaders(StrMutableMapping): 

145 """A dictionary that maintains ``Http-Header-Case`` for all keys. 

146 

147 Supports multiple values per key via a pair of new methods, 

148 `add()` and `get_list()`. The regular dictionary interface 

149 returns a single value per key, with multiple values joined by a 

150 comma. 

151 

152 >>> h = HTTPHeaders({"content-type": "text/html"}) 

153 >>> list(h.keys()) 

154 ['Content-Type'] 

155 >>> h["Content-Type"] 

156 'text/html' 

157 

158 >>> h.add("Set-Cookie", "A=B") 

159 >>> h.add("Set-Cookie", "C=D") 

160 >>> h["set-cookie"] 

161 'A=B,C=D' 

162 >>> h.get_list("set-cookie") 

163 ['A=B', 'C=D'] 

164 

165 >>> for (k,v) in sorted(h.get_all()): 

166 ... print('%s: %s' % (k,v)) 

167 ... 

168 Content-Type: text/html 

169 Set-Cookie: A=B 

170 Set-Cookie: C=D 

171 """ 

172 

173 @typing.overload 

174 def __init__(self, __arg: Mapping[str, List[str]]) -> None: 

175 pass 

176 

177 @typing.overload # noqa: F811 

178 def __init__(self, __arg: Mapping[str, str]) -> None: 

179 pass 

180 

181 @typing.overload # noqa: F811 

182 def __init__(self, *args: Tuple[str, str]) -> None: 

183 pass 

184 

185 @typing.overload # noqa: F811 

186 def __init__(self, **kwargs: str) -> None: 

187 pass 

188 

189 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811 

190 self._dict = {} # type: typing.Dict[str, str] 

191 self._as_list = {} # type: typing.Dict[str, typing.List[str]] 

192 self._last_key = None # type: Optional[str] 

193 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders): 

194 # Copy constructor 

195 for k, v in args[0].get_all(): 

196 self.add(k, v) 

197 else: 

198 # Dict-style initialization 

199 self.update(*args, **kwargs) 

200 

201 # new public methods 

202 

203 def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None: 

204 """Adds a new value for the given key.""" 

205 if not _ABNF.field_name.fullmatch(name): 

206 raise HTTPInputError("Invalid header name %r" % name) 

207 if _chars_are_bytes: 

208 if not _ABNF.field_value.fullmatch(to_unicode(value)): 

209 # TODO: the fact we still support bytes here (contrary to type annotations) 

210 # and still test for it should probably be changed. 

211 raise HTTPInputError("Invalid header value %r" % value) 

212 else: 

213 if _FORBIDDEN_HEADER_CHARS_RE.search(value): 

214 raise HTTPInputError("Invalid header value %r" % value) 

215 norm_name = _normalize_header(name) 

216 self._last_key = norm_name 

217 if norm_name in self: 

218 self._dict[norm_name] = ( 

219 native_str(self[norm_name]) + "," + native_str(value) 

220 ) 

221 self._as_list[norm_name].append(value) 

222 else: 

223 self[norm_name] = value 

224 

225 def get_list(self, name: str) -> List[str]: 

226 """Returns all values for the given header as a list.""" 

227 norm_name = _normalize_header(name) 

228 return self._as_list.get(norm_name, []) 

229 

230 def get_all(self) -> Iterable[Tuple[str, str]]: 

231 """Returns an iterable of all (name, value) pairs. 

232 

233 If a header has multiple values, multiple pairs will be 

234 returned with the same name. 

235 """ 

236 for name, values in self._as_list.items(): 

237 for value in values: 

238 yield (name, value) 

239 

240 def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None: 

241 r"""Updates the dictionary with a single header line. 

242 

243 >>> h = HTTPHeaders() 

244 >>> h.parse_line("Content-Type: text/html") 

245 >>> h.get('content-type') 

246 'text/html' 

247 >>> h.parse_line("Content-Length: 42\r\n") 

248 >>> h.get('content-type') 

249 'text/html' 

250 

251 .. versionchanged:: 6.5 

252 Now supports lines with or without the trailing CRLF, making it possible 

253 to pass lines from AsyncHTTPClient's header_callback directly to this method. 

254 

255 .. deprecated:: 6.5 

256 In Tornado 7.0, certain deprecated features of HTTP will become errors. 

257 Specifically, line folding and the use of LF (with CR) as a line separator 

258 will be removed. 

259 """ 

260 if m := re.search(r"\r?\n$", line): 

261 # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line 

262 # terminator and ignore any preceding CR. 

263 # TODO(7.0): Remove this support for LF-only line endings. 

264 line = line[: m.start()] 

265 if not line: 

266 # Empty line, or the final CRLF of a header block. 

267 return 

268 if line[0] in HTTP_WHITESPACE: 

269 # continuation of a multi-line header 

270 # TODO(7.0): Remove support for line folding. 

271 if self._last_key is None: 

272 raise HTTPInputError("first header line cannot start with whitespace") 

273 new_part = " " + line.strip(HTTP_WHITESPACE) 

274 if _chars_are_bytes: 

275 if not _ABNF.field_value.fullmatch(new_part[1:]): 

276 raise HTTPInputError("Invalid header continuation %r" % new_part) 

277 else: 

278 if _FORBIDDEN_HEADER_CHARS_RE.search(new_part): 

279 raise HTTPInputError("Invalid header value %r" % new_part) 

280 self._as_list[self._last_key][-1] += new_part 

281 self._dict[self._last_key] += new_part 

282 else: 

283 try: 

284 name, value = line.split(":", 1) 

285 except ValueError: 

286 raise HTTPInputError("no colon in header line") 

287 self.add( 

288 name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes 

289 ) 

290 

291 @classmethod 

292 def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders": 

293 """Returns a dictionary from HTTP header text. 

294 

295 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n") 

296 >>> sorted(h.items()) 

297 [('Content-Length', '42'), ('Content-Type', 'text/html')] 

298 

299 .. versionchanged:: 5.1 

300 

301 Raises `HTTPInputError` on malformed headers instead of a 

302 mix of `KeyError`, and `ValueError`. 

303 

304 """ 

305 # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which 

306 # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which 

307 # they are to be interpreted as utf-8). For historical reasons, this method handled this by 

308 # expecting both callers to decode the headers to strings before parsing them. This wasn't a 

309 # problem until we started doing stricter validation of the characters allowed in HTTP 

310 # headers (using ABNF rules defined in terms of byte values), which inadvertently started 

311 # disallowing non-latin1 characters in multipart/form-data filenames. 

312 # 

313 # This method should have accepted bytes and a desired encoding, but this change is being 

314 # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes 

315 # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes 

316 # smuggled through the latin1 encoding) or to accept any non-control unicode characters 

317 # as required by multipart/form-data. This method will change to accept bytes in a future 

318 # release. 

319 h = cls() 

320 

321 start = 0 

322 while True: 

323 lf = headers.find("\n", start) 

324 if lf == -1: 

325 h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes) 

326 break 

327 line = headers[start : lf + 1] 

328 start = lf + 1 

329 h.parse_line(line, _chars_are_bytes=_chars_are_bytes) 

330 return h 

331 

332 # MutableMapping abstract method implementations. 

333 

334 def __setitem__(self, name: str, value: str) -> None: 

335 norm_name = _normalize_header(name) 

336 self._dict[norm_name] = value 

337 self._as_list[norm_name] = [value] 

338 

339 def __getitem__(self, name: str) -> str: 

340 return self._dict[_normalize_header(name)] 

341 

342 def __delitem__(self, name: str) -> None: 

343 norm_name = _normalize_header(name) 

344 del self._dict[norm_name] 

345 del self._as_list[norm_name] 

346 

347 def __len__(self) -> int: 

348 return len(self._dict) 

349 

350 def __iter__(self) -> Iterator[typing.Any]: 

351 return iter(self._dict) 

352 

353 def copy(self) -> "HTTPHeaders": 

354 # defined in dict but not in MutableMapping. 

355 return HTTPHeaders(self) 

356 

357 # Use our overridden copy method for the copy.copy module. 

358 # This makes shallow copies one level deeper, but preserves 

359 # the appearance that HTTPHeaders is a single container. 

360 __copy__ = copy 

361 

362 def __str__(self) -> str: 

363 lines = [] 

364 for name, value in self.get_all(): 

365 lines.append(f"{name}: {value}\n") 

366 return "".join(lines) 

367 

368 __unicode__ = __str__ 

369 

370 

371class HTTPServerRequest: 

372 """A single HTTP request. 

373 

374 All attributes are type `str` unless otherwise noted. 

375 

376 .. attribute:: method 

377 

378 HTTP request method, e.g. "GET" or "POST" 

379 

380 .. attribute:: uri 

381 

382 The requested uri. 

383 

384 .. attribute:: path 

385 

386 The path portion of `uri` 

387 

388 .. attribute:: query 

389 

390 The query portion of `uri` 

391 

392 .. attribute:: version 

393 

394 HTTP version specified in request, e.g. "HTTP/1.1" 

395 

396 .. attribute:: headers 

397 

398 `.HTTPHeaders` dictionary-like object for request headers. Acts like 

399 a case-insensitive dictionary with additional methods for repeated 

400 headers. 

401 

402 .. attribute:: body 

403 

404 Request body, if present, as a byte string. 

405 

406 .. attribute:: remote_ip 

407 

408 Client's IP address as a string. If ``HTTPServer.xheaders`` is set, 

409 will pass along the real IP address provided by a load balancer 

410 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header. 

411 

412 .. versionchanged:: 3.1 

413 The list format of ``X-Forwarded-For`` is now supported. 

414 

415 .. attribute:: protocol 

416 

417 The protocol used, either "http" or "https". If ``HTTPServer.xheaders`` 

418 is set, will pass along the protocol used by a load balancer if 

419 reported via an ``X-Scheme`` header. 

420 

421 .. attribute:: host 

422 

423 The requested hostname, usually taken from the ``Host`` header. 

424 

425 .. attribute:: arguments 

426 

427 GET/POST arguments are available in the arguments property, which 

428 maps arguments names to lists of values (to support multiple values 

429 for individual names). Names are of type `str`, while arguments 

430 are byte strings. Note that this is different from 

431 `.RequestHandler.get_argument`, which returns argument values as 

432 unicode strings. 

433 

434 .. attribute:: query_arguments 

435 

436 Same format as ``arguments``, but contains only arguments extracted 

437 from the query string. 

438 

439 .. versionadded:: 3.2 

440 

441 .. attribute:: body_arguments 

442 

443 Same format as ``arguments``, but contains only arguments extracted 

444 from the request body. 

445 

446 .. versionadded:: 3.2 

447 

448 .. attribute:: files 

449 

450 File uploads are available in the files property, which maps file 

451 names to lists of `.HTTPFile`. 

452 

453 .. attribute:: connection 

454 

455 An HTTP request is attached to a single HTTP connection, which can 

456 be accessed through the "connection" attribute. Since connections 

457 are typically kept open in HTTP/1.1, multiple requests can be handled 

458 sequentially on a single connection. 

459 

460 .. versionchanged:: 4.0 

461 Moved from ``tornado.httpserver.HTTPRequest``. 

462 

463 .. deprecated:: 6.5.2 

464 The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use 

465 ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and 

466 temporarily restored in 6.5.2. 

467 """ 

468 

469 path = None # type: str 

470 query = None # type: str 

471 

472 # HACK: Used for stream_request_body 

473 _body_future = None # type: Future[None] 

474 

475 def __init__( 

476 self, 

477 method: Optional[str] = None, 

478 uri: Optional[str] = None, 

479 version: str = "HTTP/1.0", 

480 headers: Optional[HTTPHeaders] = None, 

481 body: Optional[bytes] = None, 

482 host: Optional[str] = None, 

483 files: Optional[Dict[str, List["HTTPFile"]]] = None, 

484 connection: Optional["HTTPConnection"] = None, 

485 start_line: Optional["RequestStartLine"] = None, 

486 server_connection: Optional[object] = None, 

487 ) -> None: 

488 if start_line is not None: 

489 method, uri, version = start_line 

490 self.method = method 

491 self.uri = uri 

492 self.version = version 

493 self.headers = headers or HTTPHeaders() 

494 self.body = body or b"" 

495 

496 # set remote IP and protocol 

497 context = getattr(connection, "context", None) 

498 self.remote_ip = getattr(context, "remote_ip", None) 

499 self.protocol = getattr(context, "protocol", "http") 

500 

501 try: 

502 self.host = host or self.headers["Host"] 

503 except KeyError: 

504 if version == "HTTP/1.0": 

505 # HTTP/1.0 does not require the Host header. 

506 self.host = "127.0.0.1" 

507 else: 

508 raise HTTPInputError("Missing Host header") 

509 if not _ABNF.host.fullmatch(self.host): 

510 raise HTTPInputError("Invalid Host header: %r" % self.host) 

511 if "," in self.host: 

512 # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target 

513 # Server MUST respond with 400 Bad Request if multiple 

514 # Host headers are present. 

515 # 

516 # We test for the presence of a comma instead of the number of 

517 # headers received because a proxy may have converted 

518 # multiple headers into a single comma-separated value 

519 # (per RFC 9110 section 5.3). 

520 # 

521 # This is technically a departure from the RFC since the ABNF 

522 # does not forbid commas in the host header. However, since 

523 # commas are not allowed in DNS names, it is appropriate to 

524 # disallow them. (The same argument could be made for other special 

525 # characters, but commas are the most problematic since they could 

526 # be used to exploit differences between proxies when multiple headers 

527 # are supplied). 

528 raise HTTPInputError("Multiple host headers not allowed: %r" % self.host) 

529 self.host_name = split_host_and_port(self.host.lower())[0] 

530 self.files = files or {} 

531 self.connection = connection 

532 self.server_connection = server_connection 

533 self._start_time = time.time() 

534 self._finish_time = None 

535 

536 if uri is not None: 

537 self.path, sep, self.query = uri.partition("?") 

538 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True) 

539 self.query_arguments = copy.deepcopy(self.arguments) 

540 self.body_arguments = {} # type: Dict[str, List[bytes]] 

541 

542 @property 

543 def cookies(self) -> Dict[str, http.cookies.Morsel]: 

544 """A dictionary of ``http.cookies.Morsel`` objects.""" 

545 if not hasattr(self, "_cookies"): 

546 self._cookies = ( 

547 http.cookies.SimpleCookie() 

548 ) # type: http.cookies.SimpleCookie 

549 if "Cookie" in self.headers: 

550 try: 

551 parsed = parse_cookie(self.headers["Cookie"]) 

552 except Exception: 

553 pass 

554 else: 

555 for k, v in parsed.items(): 

556 try: 

557 self._cookies[k] = v 

558 except Exception: 

559 # SimpleCookie imposes some restrictions on keys; 

560 # parse_cookie does not. Discard any cookies 

561 # with disallowed keys. 

562 pass 

563 return self._cookies 

564 

565 def full_url(self) -> str: 

566 """Reconstructs the full URL for this request.""" 

567 return self.protocol + "://" + self.host + self.uri # type: ignore[operator] 

568 

569 def request_time(self) -> float: 

570 """Returns the amount of time it took for this request to execute.""" 

571 if self._finish_time is None: 

572 return time.time() - self._start_time 

573 else: 

574 return self._finish_time - self._start_time 

575 

576 def get_ssl_certificate( 

577 self, binary_form: bool = False 

578 ) -> Union[None, Dict, bytes]: 

579 """Returns the client's SSL certificate, if any. 

580 

581 To use client certificates, the HTTPServer's 

582 `ssl.SSLContext.verify_mode` field must be set, e.g.:: 

583 

584 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) 

585 ssl_ctx.load_cert_chain("foo.crt", "foo.key") 

586 ssl_ctx.load_verify_locations("cacerts.pem") 

587 ssl_ctx.verify_mode = ssl.CERT_REQUIRED 

588 server = HTTPServer(app, ssl_options=ssl_ctx) 

589 

590 By default, the return value is a dictionary (or None, if no 

591 client certificate is present). If ``binary_form`` is true, a 

592 DER-encoded form of the certificate is returned instead. See 

593 SSLSocket.getpeercert() in the standard library for more 

594 details. 

595 http://docs.python.org/library/ssl.html#sslsocket-objects 

596 """ 

597 try: 

598 if self.connection is None: 

599 return None 

600 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2 

601 return self.connection.stream.socket.getpeercert( # type: ignore 

602 binary_form=binary_form 

603 ) 

604 except SSLError: 

605 return None 

606 

607 def _parse_body(self) -> None: 

608 parse_body_arguments( 

609 self.headers.get("Content-Type", ""), 

610 self.body, 

611 self.body_arguments, 

612 self.files, 

613 self.headers, 

614 ) 

615 

616 for k, v in self.body_arguments.items(): 

617 self.arguments.setdefault(k, []).extend(v) 

618 

619 def __repr__(self) -> str: 

620 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip") 

621 args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs]) 

622 return f"{self.__class__.__name__}({args})" 

623 

624 

625class HTTPInputError(Exception): 

626 """Exception class for malformed HTTP requests or responses 

627 from remote sources. 

628 

629 .. versionadded:: 4.0 

630 """ 

631 

632 pass 

633 

634 

635class HTTPOutputError(Exception): 

636 """Exception class for errors in HTTP output. 

637 

638 .. versionadded:: 4.0 

639 """ 

640 

641 pass 

642 

643 

644class HTTPServerConnectionDelegate: 

645 """Implement this interface to handle requests from `.HTTPServer`. 

646 

647 .. versionadded:: 4.0 

648 """ 

649 

650 def start_request( 

651 self, server_conn: object, request_conn: "HTTPConnection" 

652 ) -> "HTTPMessageDelegate": 

653 """This method is called by the server when a new request has started. 

654 

655 :arg server_conn: is an opaque object representing the long-lived 

656 (e.g. tcp-level) connection. 

657 :arg request_conn: is a `.HTTPConnection` object for a single 

658 request/response exchange. 

659 

660 This method should return a `.HTTPMessageDelegate`. 

661 """ 

662 raise NotImplementedError() 

663 

664 def on_close(self, server_conn: object) -> None: 

665 """This method is called when a connection has been closed. 

666 

667 :arg server_conn: is a server connection that has previously been 

668 passed to ``start_request``. 

669 """ 

670 pass 

671 

672 

673class HTTPMessageDelegate: 

674 """Implement this interface to handle an HTTP request or response. 

675 

676 .. versionadded:: 4.0 

677 """ 

678 

679 # TODO: genericize this class to avoid exposing the Union. 

680 def headers_received( 

681 self, 

682 start_line: Union["RequestStartLine", "ResponseStartLine"], 

683 headers: HTTPHeaders, 

684 ) -> Optional[Awaitable[None]]: 

685 """Called when the HTTP headers have been received and parsed. 

686 

687 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine` 

688 depending on whether this is a client or server message. 

689 :arg headers: a `.HTTPHeaders` instance. 

690 

691 Some `.HTTPConnection` methods can only be called during 

692 ``headers_received``. 

693 

694 May return a `.Future`; if it does the body will not be read 

695 until it is done. 

696 """ 

697 pass 

698 

699 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]: 

700 """Called when a chunk of data has been received. 

701 

702 May return a `.Future` for flow control. 

703 """ 

704 pass 

705 

706 def finish(self) -> None: 

707 """Called after the last chunk of data has been received.""" 

708 pass 

709 

710 def on_connection_close(self) -> None: 

711 """Called if the connection is closed without finishing the request. 

712 

713 If ``headers_received`` is called, either ``finish`` or 

714 ``on_connection_close`` will be called, but not both. 

715 """ 

716 pass 

717 

718 

719class HTTPConnection: 

720 """Applications use this interface to write their responses. 

721 

722 .. versionadded:: 4.0 

723 """ 

724 

725 def write_headers( 

726 self, 

727 start_line: Union["RequestStartLine", "ResponseStartLine"], 

728 headers: HTTPHeaders, 

729 chunk: Optional[bytes] = None, 

730 ) -> "Future[None]": 

731 """Write an HTTP header block. 

732 

733 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`. 

734 :arg headers: a `.HTTPHeaders` instance. 

735 :arg chunk: the first (optional) chunk of data. This is an optimization 

736 so that small responses can be written in the same call as their 

737 headers. 

738 

739 The ``version`` field of ``start_line`` is ignored. 

740 

741 Returns a future for flow control. 

742 

743 .. versionchanged:: 6.0 

744 

745 The ``callback`` argument was removed. 

746 """ 

747 raise NotImplementedError() 

748 

749 def write(self, chunk: bytes) -> "Future[None]": 

750 """Writes a chunk of body data. 

751 

752 Returns a future for flow control. 

753 

754 .. versionchanged:: 6.0 

755 

756 The ``callback`` argument was removed. 

757 """ 

758 raise NotImplementedError() 

759 

760 def finish(self) -> None: 

761 """Indicates that the last body data has been written.""" 

762 raise NotImplementedError() 

763 

764 

765def url_concat( 

766 url: str, 

767 args: Union[ 

768 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...] 

769 ], 

770) -> str: 

771 """Concatenate url and arguments regardless of whether 

772 url has existing query parameters. 

773 

774 ``args`` may be either a dictionary or a list of key-value pairs 

775 (the latter allows for multiple values with the same key. 

776 

777 >>> url_concat("http://example.com/foo", dict(c="d")) 

778 'http://example.com/foo?c=d' 

779 >>> url_concat("http://example.com/foo?a=b", dict(c="d")) 

780 'http://example.com/foo?a=b&c=d' 

781 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")]) 

782 'http://example.com/foo?a=b&c=d&c=d2' 

783 """ 

784 if args is None: 

785 return url 

786 parsed_url = urlparse(url) 

787 if isinstance(args, dict): 

788 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) 

789 parsed_query.extend(args.items()) 

790 elif isinstance(args, list) or isinstance(args, tuple): 

791 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) 

792 parsed_query.extend(args) 

793 else: 

794 err = "'args' parameter should be dict, list or tuple. Not {0}".format( 

795 type(args) 

796 ) 

797 raise TypeError(err) 

798 final_query = urlencode(parsed_query) 

799 url = urlunparse( 

800 ( 

801 parsed_url[0], 

802 parsed_url[1], 

803 parsed_url[2], 

804 parsed_url[3], 

805 final_query, 

806 parsed_url[5], 

807 ) 

808 ) 

809 return url 

810 

811 

812class HTTPFile(ObjectDict): 

813 """Represents a file uploaded via a form. 

814 

815 For backwards compatibility, its instance attributes are also 

816 accessible as dictionary keys. 

817 

818 * ``filename`` 

819 * ``body`` 

820 * ``content_type`` 

821 """ 

822 

823 filename: str 

824 body: bytes 

825 content_type: str 

826 

827 

828def _parse_request_range( 

829 range_header: str, 

830) -> Optional[Tuple[Optional[int], Optional[int]]]: 

831 """Parses a Range header. 

832 

833 Returns either ``None`` or tuple ``(start, end)``. 

834 Note that while the HTTP headers use inclusive byte positions, 

835 this method returns indexes suitable for use in slices. 

836 

837 >>> start, end = _parse_request_range("bytes=1-2") 

838 >>> start, end 

839 (1, 3) 

840 >>> [0, 1, 2, 3, 4][start:end] 

841 [1, 2] 

842 >>> _parse_request_range("bytes=6-") 

843 (6, None) 

844 >>> _parse_request_range("bytes=-6") 

845 (-6, None) 

846 >>> _parse_request_range("bytes=-0") 

847 (None, 0) 

848 >>> _parse_request_range("bytes=") 

849 (None, None) 

850 >>> _parse_request_range("foo=42") 

851 >>> _parse_request_range("bytes=1-2,6-10") 

852 

853 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed). 

854 

855 See [0] for the details of the range header. 

856 

857 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges 

858 """ 

859 unit, _, value = range_header.partition("=") 

860 unit, value = unit.strip(), value.strip() 

861 if unit != "bytes": 

862 return None 

863 start_b, _, end_b = value.partition("-") 

864 try: 

865 start = _int_or_none(start_b) 

866 end = _int_or_none(end_b) 

867 except ValueError: 

868 return None 

869 if end is not None: 

870 if start is None: 

871 if end != 0: 

872 start = -end 

873 end = None 

874 else: 

875 end += 1 

876 return (start, end) 

877 

878 

879def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str: 

880 """Returns a suitable Content-Range header: 

881 

882 >>> print(_get_content_range(None, 1, 4)) 

883 bytes 0-0/4 

884 >>> print(_get_content_range(1, 3, 4)) 

885 bytes 1-2/4 

886 >>> print(_get_content_range(None, None, 4)) 

887 bytes 0-3/4 

888 """ 

889 start = start or 0 

890 end = (end or total) - 1 

891 return f"bytes {start}-{end}/{total}" 

892 

893 

894def _int_or_none(val: str) -> Optional[int]: 

895 val = val.strip() 

896 if val == "": 

897 return None 

898 return int(val) 

899 

900 

901def parse_body_arguments( 

902 content_type: str, 

903 body: bytes, 

904 arguments: Dict[str, List[bytes]], 

905 files: Dict[str, List[HTTPFile]], 

906 headers: Optional[HTTPHeaders] = None, 

907) -> None: 

908 """Parses a form request body. 

909 

910 Supports ``application/x-www-form-urlencoded`` and 

911 ``multipart/form-data``. The ``content_type`` parameter should be 

912 a string and ``body`` should be a byte string. The ``arguments`` 

913 and ``files`` parameters are dictionaries that will be updated 

914 with the parsed contents. 

915 """ 

916 if content_type.startswith("application/x-www-form-urlencoded"): 

917 if headers and "Content-Encoding" in headers: 

918 raise HTTPInputError( 

919 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"] 

920 ) 

921 try: 

922 # real charset decoding will happen in RequestHandler.decode_argument() 

923 uri_arguments = parse_qs_bytes(body, keep_blank_values=True) 

924 except Exception as e: 

925 raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e 

926 for name, values in uri_arguments.items(): 

927 if values: 

928 arguments.setdefault(name, []).extend(values) 

929 elif content_type.startswith("multipart/form-data"): 

930 if headers and "Content-Encoding" in headers: 

931 raise HTTPInputError( 

932 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"] 

933 ) 

934 try: 

935 fields = content_type.split(";") 

936 for field in fields: 

937 k, sep, v = field.strip().partition("=") 

938 if k == "boundary" and v: 

939 parse_multipart_form_data(utf8(v), body, arguments, files) 

940 break 

941 else: 

942 raise HTTPInputError("multipart boundary not found") 

943 except Exception as e: 

944 raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e 

945 

946 

947def parse_multipart_form_data( 

948 boundary: bytes, 

949 data: bytes, 

950 arguments: Dict[str, List[bytes]], 

951 files: Dict[str, List[HTTPFile]], 

952) -> None: 

953 """Parses a ``multipart/form-data`` body. 

954 

955 The ``boundary`` and ``data`` parameters are both byte strings. 

956 The dictionaries given in the arguments and files parameters 

957 will be updated with the contents of the body. 

958 

959 .. versionchanged:: 5.1 

960 

961 Now recognizes non-ASCII filenames in RFC 2231/5987 

962 (``filename*=``) format. 

963 """ 

964 # The standard allows for the boundary to be quoted in the header, 

965 # although it's rare (it happens at least for google app engine 

966 # xmpp). I think we're also supposed to handle backslash-escapes 

967 # here but I'll save that until we see a client that uses them 

968 # in the wild. 

969 if boundary.startswith(b'"') and boundary.endswith(b'"'): 

970 boundary = boundary[1:-1] 

971 final_boundary_index = data.rfind(b"--" + boundary + b"--") 

972 if final_boundary_index == -1: 

973 raise HTTPInputError("Invalid multipart/form-data: no final boundary found") 

974 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n") 

975 for part in parts: 

976 if not part: 

977 continue 

978 eoh = part.find(b"\r\n\r\n") 

979 if eoh == -1: 

980 raise HTTPInputError("multipart/form-data missing headers") 

981 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False) 

982 disp_header = headers.get("Content-Disposition", "") 

983 disposition, disp_params = _parse_header(disp_header) 

984 if disposition != "form-data" or not part.endswith(b"\r\n"): 

985 raise HTTPInputError("Invalid multipart/form-data") 

986 value = part[eoh + 4 : -2] 

987 if not disp_params.get("name"): 

988 raise HTTPInputError("multipart/form-data missing name") 

989 name = disp_params["name"] 

990 if disp_params.get("filename"): 

991 ctype = headers.get("Content-Type", "application/unknown") 

992 files.setdefault(name, []).append( 

993 HTTPFile( 

994 filename=disp_params["filename"], body=value, content_type=ctype 

995 ) 

996 ) 

997 else: 

998 arguments.setdefault(name, []).append(value) 

999 

1000 

1001def format_timestamp( 

1002 ts: Union[int, float, tuple, time.struct_time, datetime.datetime], 

1003) -> str: 

1004 """Formats a timestamp in the format used by HTTP. 

1005 

1006 The argument may be a numeric timestamp as returned by `time.time`, 

1007 a time tuple as returned by `time.gmtime`, or a `datetime.datetime` 

1008 object. Naive `datetime.datetime` objects are assumed to represent 

1009 UTC; aware objects are converted to UTC before formatting. 

1010 

1011 >>> format_timestamp(1359312200) 

1012 'Sun, 27 Jan 2013 18:43:20 GMT' 

1013 """ 

1014 if isinstance(ts, (int, float)): 

1015 time_num = ts 

1016 elif isinstance(ts, (tuple, time.struct_time)): 

1017 time_num = calendar.timegm(ts) 

1018 elif isinstance(ts, datetime.datetime): 

1019 time_num = calendar.timegm(ts.utctimetuple()) 

1020 else: 

1021 raise TypeError("unknown timestamp type: %r" % ts) 

1022 return email.utils.formatdate(time_num, usegmt=True) 

1023 

1024 

1025class RequestStartLine(typing.NamedTuple): 

1026 method: str 

1027 path: str 

1028 version: str 

1029 

1030 

1031def parse_request_start_line(line: str) -> RequestStartLine: 

1032 """Returns a (method, path, version) tuple for an HTTP 1.x request line. 

1033 

1034 The response is a `typing.NamedTuple`. 

1035 

1036 >>> parse_request_start_line("GET /foo HTTP/1.1") 

1037 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1') 

1038 """ 

1039 match = _ABNF.request_line.fullmatch(line) 

1040 if not match: 

1041 # https://tools.ietf.org/html/rfc7230#section-3.1.1 

1042 # invalid request-line SHOULD respond with a 400 (Bad Request) 

1043 raise HTTPInputError("Malformed HTTP request line") 

1044 r = RequestStartLine(match.group(1), match.group(2), match.group(3)) 

1045 if not r.version.startswith("HTTP/1"): 

1046 # HTTP/2 and above doesn't use parse_request_start_line. 

1047 # This could be folded into the regex but we don't want to deviate 

1048 # from the ABNF in the RFCs. 

1049 raise HTTPInputError("Unexpected HTTP version %r" % r.version) 

1050 return r 

1051 

1052 

1053class ResponseStartLine(typing.NamedTuple): 

1054 version: str 

1055 code: int 

1056 reason: str 

1057 

1058 

1059def parse_response_start_line(line: str) -> ResponseStartLine: 

1060 """Returns a (version, code, reason) tuple for an HTTP 1.x response line. 

1061 

1062 The response is a `typing.NamedTuple`. 

1063 

1064 >>> parse_response_start_line("HTTP/1.1 200 OK") 

1065 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK') 

1066 """ 

1067 match = _ABNF.status_line.fullmatch(line) 

1068 if not match: 

1069 raise HTTPInputError("Error parsing response start line") 

1070 r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3)) 

1071 if not r.version.startswith("HTTP/1"): 

1072 # HTTP/2 and above doesn't use parse_response_start_line. 

1073 raise HTTPInputError("Unexpected HTTP version %r" % r.version) 

1074 return r 

1075 

1076 

1077# _parseparam and _parse_header are copied and modified from python2.7's cgi.py 

1078# The original 2.7 version of this code did not correctly support some 

1079# combinations of semicolons and double quotes. 

1080# It has also been modified to support valueless parameters as seen in 

1081# websocket extension negotiations, and to support non-ascii values in 

1082# RFC 2231/5987 format. 

1083 

1084 

1085def _parseparam(s: str) -> Generator[str, None, None]: 

1086 while s[:1] == ";": 

1087 s = s[1:] 

1088 end = s.find(";") 

1089 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 

1090 end = s.find(";", end + 1) 

1091 if end < 0: 

1092 end = len(s) 

1093 f = s[:end] 

1094 yield f.strip() 

1095 s = s[end:] 

1096 

1097 

1098def _parse_header(line: str) -> Tuple[str, Dict[str, str]]: 

1099 r"""Parse a Content-type like header. 

1100 

1101 Return the main content-type and a dictionary of options. 

1102 

1103 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st" 

1104 >>> ct, d = _parse_header(d) 

1105 >>> ct 

1106 'form-data' 

1107 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape') 

1108 True 

1109 >>> d['foo'] 

1110 'b\\a"r' 

1111 """ 

1112 parts = _parseparam(";" + line) 

1113 key = next(parts) 

1114 # decode_params treats first argument special, but we already stripped key 

1115 params = [("Dummy", "value")] 

1116 for p in parts: 

1117 i = p.find("=") 

1118 if i >= 0: 

1119 name = p[:i].strip().lower() 

1120 value = p[i + 1 :].strip() 

1121 params.append((name, native_str(value))) 

1122 decoded_params = email.utils.decode_params(params) 

1123 decoded_params.pop(0) # get rid of the dummy again 

1124 pdict = {} 

1125 for name, decoded_value in decoded_params: 

1126 value = email.utils.collapse_rfc2231_value(decoded_value) 

1127 if len(value) >= 2 and value[0] == '"' and value[-1] == '"': 

1128 value = value[1:-1] 

1129 pdict[name] = value 

1130 return key, pdict 

1131 

1132 

1133def _encode_header(key: str, pdict: Dict[str, str]) -> str: 

1134 """Inverse of _parse_header. 

1135 

1136 >>> _encode_header('permessage-deflate', 

1137 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None}) 

1138 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover' 

1139 """ 

1140 if not pdict: 

1141 return key 

1142 out = [key] 

1143 # Sort the parameters just to make it easy to test. 

1144 for k, v in sorted(pdict.items()): 

1145 if v is None: 

1146 out.append(k) 

1147 else: 

1148 # TODO: quote if necessary. 

1149 out.append(f"{k}={v}") 

1150 return "; ".join(out) 

1151 

1152 

1153def encode_username_password( 

1154 username: Union[str, bytes], password: Union[str, bytes] 

1155) -> bytes: 

1156 """Encodes a username/password pair in the format used by HTTP auth. 

1157 

1158 The return value is a byte string in the form ``username:password``. 

1159 

1160 .. versionadded:: 5.1 

1161 """ 

1162 if isinstance(username, unicode_type): 

1163 username = unicodedata.normalize("NFC", username) 

1164 if isinstance(password, unicode_type): 

1165 password = unicodedata.normalize("NFC", password) 

1166 return utf8(username) + b":" + utf8(password) 

1167 

1168 

1169def doctests(): 

1170 # type: () -> unittest.TestSuite 

1171 import doctest 

1172 

1173 return doctest.DocTestSuite() 

1174 

1175 

1176_netloc_re = re.compile(r"^(.+):(\d+)$") 

1177 

1178 

1179def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]: 

1180 """Returns ``(host, port)`` tuple from ``netloc``. 

1181 

1182 Returned ``port`` will be ``None`` if not present. 

1183 

1184 .. versionadded:: 4.1 

1185 """ 

1186 match = _netloc_re.match(netloc) 

1187 if match: 

1188 host = match.group(1) 

1189 port = int(match.group(2)) # type: Optional[int] 

1190 else: 

1191 host = netloc 

1192 port = None 

1193 return (host, port) 

1194 

1195 

1196def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]: 

1197 """Generator converting a result of ``parse_qs`` back to name-value pairs. 

1198 

1199 .. versionadded:: 5.0 

1200 """ 

1201 for k, vs in qs.items(): 

1202 for v in vs: 

1203 yield (k, v) 

1204 

1205 

1206_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub 

1207 

1208 

1209def _unquote_replace(m: re.Match) -> str: 

1210 if m[1]: 

1211 return chr(int(m[1], 8)) 

1212 else: 

1213 return m[2] 

1214 

1215 

1216def _unquote_cookie(s: str) -> str: 

1217 """Handle double quotes and escaping in cookie values. 

1218 

1219 This method is copied verbatim from the Python 3.13 standard 

1220 library (http.cookies._unquote) so we don't have to depend on 

1221 non-public interfaces. 

1222 """ 

1223 # If there aren't any doublequotes, 

1224 # then there can't be any special characters. See RFC 2109. 

1225 if s is None or len(s) < 2: 

1226 return s 

1227 if s[0] != '"' or s[-1] != '"': 

1228 return s 

1229 

1230 # We have to assume that we must decode this string. 

1231 # Down to work. 

1232 

1233 # Remove the "s 

1234 s = s[1:-1] 

1235 

1236 # Check for special sequences. Examples: 

1237 # \012 --> \n 

1238 # \" --> " 

1239 # 

1240 return _unquote_sub(_unquote_replace, s) 

1241 

1242 

1243def parse_cookie(cookie: str) -> Dict[str, str]: 

1244 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs. 

1245 

1246 This function attempts to mimic browser cookie parsing behavior; 

1247 it specifically does not follow any of the cookie-related RFCs 

1248 (because browsers don't either). 

1249 

1250 The algorithm used is identical to that used by Django version 1.9.10. 

1251 

1252 .. versionadded:: 4.4.2 

1253 """ 

1254 cookiedict = {} 

1255 for chunk in cookie.split(";"): 

1256 if "=" in chunk: 

1257 key, val = chunk.split("=", 1) 

1258 else: 

1259 # Assume an empty name per 

1260 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 

1261 key, val = "", chunk 

1262 key, val = key.strip(), val.strip() 

1263 if key or val: 

1264 # unquote using Python's algorithm. 

1265 cookiedict[key] = _unquote_cookie(val) 

1266 return cookiedict