Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tornado/httputil.py: 28%

4# Licensed under the Apache License, Version 2.0 (the "License"); you may

5# not use this file except in compliance with the License. You may obtain

6# a copy of the License at

8# http://www.apache.org/licenses/LICENSE-2.0

10# Unless required by applicable law or agreed to in writing, software

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

13# License for the specific language governing permissions and limitations

14# under the License.

16"""HTTP utility code shared by clients and servers.

18This module also defines the `HTTPServerRequest` class which is exposed

19via `tornado.web.RequestHandler.request`.

20"""

22import calendar

23import collections.abc

24import copy

25import datetime

26import email.utils

27from functools import lru_cache

28from http.client import responses

29import http.cookies

30import re

31from ssl import SSLError

32import time

33import unicodedata

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl

36from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode

37from tornado.util import ObjectDict, unicode_type

40# responses is unused in this file, but we re-export it to other files.

41# Reference it so pyflakes doesn't complain.

42responses

44import typing

45from typing import (

46 Tuple,

47 Iterable,

48 List,

49 Mapping,

50 Iterator,

51 Dict,

52 Union,

53 Optional,

54 Awaitable,

55 Generator,

56 AnyStr,

57)

59if typing.TYPE_CHECKING:

60 from typing import Deque # noqa: F401

61 from asyncio import Future # noqa: F401

62 import unittest # noqa: F401

64 # This can be done unconditionally in the base class of HTTPHeaders

65 # after we drop support for Python 3.8.

66 StrMutableMapping = collections.abc.MutableMapping[str, str]

67else:

68 StrMutableMapping = collections.abc.MutableMapping

70# To be used with str.strip() and related methods.

71HTTP_WHITESPACE = " \t"

73# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits

74# chars greater than \xFF (which may appear after decoding utf8).

75_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]")

78class _ABNF:

79 """Class that holds a subset of ABNF rules from RFC 9110 and friends.

81 Class attributes are re.Pattern objects, with the same name as in the RFC

82 (with hyphens changed to underscores). Currently contains only the subset

83 we use (which is why this class is not public). Unfortunately the fields

84 cannot be alphabetized as they are in the RFCs because of dependencies.

85 """

87 # RFC 3986 (URI)

88 # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6

89 # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though

90 # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any

91 # position, not only for their use in IPv6 literals.

92 uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")

93 uri_sub_delims = re.compile(r"[!$&'()*+,;=]")

94 uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")

95 uri_host = re.compile(

96 rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"

97 )

98 uri_port = re.compile(r"[0-9]*")

100 # RFC 5234 (ABNF)

101 VCHAR = re.compile(r"[\x21-\x7E]")

102

103 # RFC 9110 (HTTP Semantics)

104 obs_text = re.compile(r"[\x80-\xFF]")

105 field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")

106 # Not exactly from the RFC to simplify and combine field-content and field-value.

107 field_value = re.compile(

108 rf"|"

109 rf"{field_vchar.pattern}|"

110 rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"

111 )

112 tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")

113 token = re.compile(rf"{tchar.pattern}+")

114 field_name = token

115 method = token

116 host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")

117

118 # RFC 9112 (HTTP/1.1)

119 HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")

120 reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")

121 # request_target delegates to the URI RFC 3986, which is complex and may be

122 # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII

123 # characters). Instead, we allow everything but control chars and whitespace.

124 request_target = re.compile(rf"{field_vchar.pattern}+")

125 request_line = re.compile(

126 rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"

127 )

128 status_code = re.compile(r"[0-9]{3}")

129 status_line = re.compile(

130 rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"

131 )

132

133

134@lru_cache(1000)

135def _normalize_header(name: str) -> str:

136 """Map a header name to Http-Header-Case.

137

138 >>> _normalize_header("coNtent-TYPE")

139 'Content-Type'

140 """

141 return "-".join([w.capitalize() for w in name.split("-")])

142

143

144class HTTPHeaders(StrMutableMapping):

145 """A dictionary that maintains ``Http-Header-Case`` for all keys.

146

147 Supports multiple values per key via a pair of new methods,

148 `add()` and `get_list()`. The regular dictionary interface

149 returns a single value per key, with multiple values joined by a

150 comma.

151

152 >>> h = HTTPHeaders({"content-type": "text/html"})

153 >>> list(h.keys())

154 ['Content-Type']

155 >>> h["Content-Type"]

156 'text/html'

157

158 >>> h.add("Set-Cookie", "A=B")

159 >>> h.add("Set-Cookie", "C=D")

160 >>> h["set-cookie"]

161 'A=B,C=D'

162 >>> h.get_list("set-cookie")

163 ['A=B', 'C=D']

164

165 >>> for (k,v) in sorted(h.get_all()):

166 ... print('%s: %s' % (k,v))

167 ...

168 Content-Type: text/html

169 Set-Cookie: A=B

170 Set-Cookie: C=D

171 """

172

173 @typing.overload

174 def __init__(self, __arg: Mapping[str, List[str]]) -> None:

175 pass

176

177 @typing.overload # noqa: F811

178 def __init__(self, __arg: Mapping[str, str]) -> None:

179 pass

180

181 @typing.overload # noqa: F811

182 def __init__(self, *args: Tuple[str, str]) -> None:

183 pass

184

185 @typing.overload # noqa: F811

186 def __init__(self, **kwargs: str) -> None:

187 pass

188

189 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811

190 # Formally, HTTP headers are a mapping from a field name to a "combined field value",

191 # which may be constructed from multiple field lines by joining them with commas.

192 # In practice, however, some headers (notably Set-Cookie) do not follow this convention,

193 # so we maintain a mapping from field name to a list of field lines in self._as_list.

194 # self._combined_cache is a cache of the combined field values derived from self._as_list

195 # on demand (and cleared whenever the list is modified).

196 self._as_list: dict[str, list[str]] = {}

197 self._combined_cache: dict[str, str] = {}

198 self._last_key = None # type: Optional[str]

199 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):

200 # Copy constructor

201 for k, v in args[0].get_all():

202 self.add(k, v)

203 else:

204 # Dict-style initialization

205 self.update(*args, **kwargs)

206

207 # new public methods

208

209 def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:

210 """Adds a new value for the given key."""

211 if not _ABNF.field_name.fullmatch(name):

212 raise HTTPInputError("Invalid header name %r" % name)

213 if _chars_are_bytes:

214 if not _ABNF.field_value.fullmatch(to_unicode(value)):

215 # TODO: the fact we still support bytes here (contrary to type annotations)

216 # and still test for it should probably be changed.

217 raise HTTPInputError("Invalid header value %r" % value)

218 else:

219 if _FORBIDDEN_HEADER_CHARS_RE.search(value):

220 raise HTTPInputError("Invalid header value %r" % value)

221 norm_name = _normalize_header(name)

222 self._last_key = norm_name

223 if norm_name in self:

224 self._combined_cache.pop(norm_name, None)

225 self._as_list[norm_name].append(value)

226 else:

227 self[norm_name] = value

228

229 def get_list(self, name: str) -> List[str]:

230 """Returns all values for the given header as a list."""

231 norm_name = _normalize_header(name)

232 return self._as_list.get(norm_name, [])

233

234 def get_all(self) -> Iterable[Tuple[str, str]]:

235 """Returns an iterable of all (name, value) pairs.

236

237 If a header has multiple values, multiple pairs will be

238 returned with the same name.

239 """

240 for name, values in self._as_list.items():

241 for value in values:

242 yield (name, value)

243

244 def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:

245 r"""Updates the dictionary with a single header line.

246

247 >>> h = HTTPHeaders()

248 >>> h.parse_line("Content-Type: text/html")

249 >>> h.get('content-type')

250 'text/html'

251 >>> h.parse_line("Content-Length: 42\r\n")

252 >>> h.get('content-type')

253 'text/html'

254

255 .. versionchanged:: 6.5

256 Now supports lines with or without the trailing CRLF, making it possible

257 to pass lines from AsyncHTTPClient's header_callback directly to this method.

258

259 .. deprecated:: 6.5

260 In Tornado 7.0, certain deprecated features of HTTP will become errors.

261 Specifically, line folding and the use of LF (with CR) as a line separator

262 will be removed.

263 """

264 if m := re.search(r"\r?\n$", line):

265 # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line

266 # terminator and ignore any preceding CR.

267 # TODO(7.0): Remove this support for LF-only line endings.

268 line = line[: m.start()]

269 if not line:

270 # Empty line, or the final CRLF of a header block.

271 return

272 if line[0] in HTTP_WHITESPACE:

273 # continuation of a multi-line header

274 # TODO(7.0): Remove support for line folding.

275 if self._last_key is None:

276 raise HTTPInputError("first header line cannot start with whitespace")

277 new_part = " " + line.strip(HTTP_WHITESPACE)

278 if _chars_are_bytes:

279 if not _ABNF.field_value.fullmatch(new_part[1:]):

280 raise HTTPInputError("Invalid header continuation %r" % new_part)

281 else:

282 if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):

283 raise HTTPInputError("Invalid header value %r" % new_part)

284 self._as_list[self._last_key][-1] += new_part

285 self._combined_cache.pop(self._last_key, None)

286 else:

287 try:

288 name, value = line.split(":", 1)

289 except ValueError:

290 raise HTTPInputError("no colon in header line")

291 self.add(

292 name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes

293 )

294

295 @classmethod

296 def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":

297 """Returns a dictionary from HTTP header text.

298

299 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")

300 >>> sorted(h.items())

301 [('Content-Length', '42'), ('Content-Type', 'text/html')]

302

303 .. versionchanged:: 5.1

304

305 Raises `HTTPInputError` on malformed headers instead of a

306 mix of `KeyError`, and `ValueError`.

307

308 """

309 # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which

310 # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which

311 # they are to be interpreted as utf-8). For historical reasons, this method handled this by

312 # expecting both callers to decode the headers to strings before parsing them. This wasn't a

313 # problem until we started doing stricter validation of the characters allowed in HTTP

314 # headers (using ABNF rules defined in terms of byte values), which inadvertently started

315 # disallowing non-latin1 characters in multipart/form-data filenames.

316 #

317 # This method should have accepted bytes and a desired encoding, but this change is being

318 # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes

319 # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes

320 # smuggled through the latin1 encoding) or to accept any non-control unicode characters

321 # as required by multipart/form-data. This method will change to accept bytes in a future

322 # release.

323 h = cls()

324

325 start = 0

326 while True:

327 lf = headers.find("\n", start)

328 if lf == -1:

329 h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)

330 break

331 line = headers[start : lf + 1]

332 start = lf + 1

333 h.parse_line(line, _chars_are_bytes=_chars_are_bytes)

334 return h

335

336 # MutableMapping abstract method implementations.

337

338 def __setitem__(self, name: str, value: str) -> None:

339 norm_name = _normalize_header(name)

340 self._combined_cache[norm_name] = value

341 self._as_list[norm_name] = [value]

342

343 def __contains__(self, name: object) -> bool:

344 # This is an important optimization to avoid the expensive concatenation

345 # in __getitem__ when it's not needed.

346 if not isinstance(name, str):

347 return False

348 return name in self._as_list

349

350 def __getitem__(self, name: str) -> str:

351 header = _normalize_header(name)

352 if header not in self._combined_cache:

353 self._combined_cache[header] = ",".join(self._as_list[header])

354 return self._combined_cache[header]

355

356 def __delitem__(self, name: str) -> None:

357 norm_name = _normalize_header(name)

358 del self._combined_cache[norm_name]

359 del self._as_list[norm_name]

360

361 def __len__(self) -> int:

362 return len(self._as_list)

363

364 def __iter__(self) -> Iterator[typing.Any]:

365 return iter(self._as_list)

366

367 def copy(self) -> "HTTPHeaders":

368 # defined in dict but not in MutableMapping.

369 return HTTPHeaders(self)

370

371 # Use our overridden copy method for the copy.copy module.

372 # This makes shallow copies one level deeper, but preserves

373 # the appearance that HTTPHeaders is a single container.

374 __copy__ = copy

375

376 def __str__(self) -> str:

377 lines = []

378 for name, value in self.get_all():

379 lines.append(f"{name}: {value}\n")

380 return "".join(lines)

381

382 __unicode__ = __str__

383

384

385class HTTPServerRequest:

386 """A single HTTP request.

387

388 All attributes are type `str` unless otherwise noted.

389

390 .. attribute:: method

391

392 HTTP request method, e.g. "GET" or "POST"

393

394 .. attribute:: uri

395

396 The requested uri.

397

398 .. attribute:: path

399

400 The path portion of `uri`

401

402 .. attribute:: query

403

404 The query portion of `uri`

405

406 .. attribute:: version

407

408 HTTP version specified in request, e.g. "HTTP/1.1"

409

410 .. attribute:: headers

411

412 `.HTTPHeaders` dictionary-like object for request headers. Acts like

413 a case-insensitive dictionary with additional methods for repeated

414 headers.

415

416 .. attribute:: body

417

418 Request body, if present, as a byte string.

419

420 .. attribute:: remote_ip

421

422 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,

423 will pass along the real IP address provided by a load balancer

424 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.

425

426 .. versionchanged:: 3.1

427 The list format of ``X-Forwarded-For`` is now supported.

428

429 .. attribute:: protocol

430

431 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``

432 is set, will pass along the protocol used by a load balancer if

433 reported via an ``X-Scheme`` header.

434

435 .. attribute:: host

436

437 The requested hostname, usually taken from the ``Host`` header.

438

439 .. attribute:: arguments

440

441 GET/POST arguments are available in the arguments property, which

442 maps arguments names to lists of values (to support multiple values

443 for individual names). Names are of type `str`, while arguments

444 are byte strings. Note that this is different from

445 `.RequestHandler.get_argument`, which returns argument values as

446 unicode strings.

447

448 .. attribute:: query_arguments

449

450 Same format as ``arguments``, but contains only arguments extracted

451 from the query string.

452

453 .. versionadded:: 3.2

454

455 .. attribute:: body_arguments

456

457 Same format as ``arguments``, but contains only arguments extracted

458 from the request body.

459

460 .. versionadded:: 3.2

461

462 .. attribute:: files

463

464 File uploads are available in the files property, which maps file

465 names to lists of `.HTTPFile`.

466

467 .. attribute:: connection

468

469 An HTTP request is attached to a single HTTP connection, which can

470 be accessed through the "connection" attribute. Since connections

471 are typically kept open in HTTP/1.1, multiple requests can be handled

472 sequentially on a single connection.

473

474 .. versionchanged:: 4.0

475 Moved from ``tornado.httpserver.HTTPRequest``.

476

477 .. deprecated:: 6.5.2

478 The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use

479 ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and

480 temporarily restored in 6.5.2.

481 """

482

483 path = None # type: str

484 query = None # type: str

485

486 # HACK: Used for stream_request_body

487 _body_future = None # type: Future[None]

488

489 def __init__(

490 self,

491 method: Optional[str] = None,

492 uri: Optional[str] = None,

493 version: str = "HTTP/1.0",

494 headers: Optional[HTTPHeaders] = None,

495 body: Optional[bytes] = None,

496 host: Optional[str] = None,

497 files: Optional[Dict[str, List["HTTPFile"]]] = None,

498 connection: Optional["HTTPConnection"] = None,

499 start_line: Optional["RequestStartLine"] = None,

500 server_connection: Optional[object] = None,

501 ) -> None:

502 if start_line is not None:

503 method, uri, version = start_line

504 self.method = method

505 self.uri = uri

506 self.version = version

507 self.headers = headers or HTTPHeaders()

508 self.body = body or b""

509

510 # set remote IP and protocol

511 context = getattr(connection, "context", None)

512 self.remote_ip = getattr(context, "remote_ip", None)

513 self.protocol = getattr(context, "protocol", "http")

514

515 try:

516 self.host = host or self.headers["Host"]

517 except KeyError:

518 if version == "HTTP/1.0":

519 # HTTP/1.0 does not require the Host header.

520 self.host = "127.0.0.1"

521 else:

522 raise HTTPInputError("Missing Host header")

523 if not _ABNF.host.fullmatch(self.host):

524 raise HTTPInputError("Invalid Host header: %r" % self.host)

525 if "," in self.host:

526 # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target

527 # Server MUST respond with 400 Bad Request if multiple

528 # Host headers are present.

529 #

530 # We test for the presence of a comma instead of the number of

531 # headers received because a proxy may have converted

532 # multiple headers into a single comma-separated value

533 # (per RFC 9110 section 5.3).

534 #

535 # This is technically a departure from the RFC since the ABNF

536 # does not forbid commas in the host header. However, since

537 # commas are not allowed in DNS names, it is appropriate to

538 # disallow them. (The same argument could be made for other special

539 # characters, but commas are the most problematic since they could

540 # be used to exploit differences between proxies when multiple headers

541 # are supplied).

542 raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)

543 self.host_name = split_host_and_port(self.host.lower())[0]

544 self.files = files or {}

545 self.connection = connection

546 self.server_connection = server_connection

547 self._start_time = time.time()

548 self._finish_time = None

549

550 if uri is not None:

551 self.path, sep, self.query = uri.partition("?")

552 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)

553 self.query_arguments = copy.deepcopy(self.arguments)

554 self.body_arguments = {} # type: Dict[str, List[bytes]]

555

556 @property

557 def cookies(self) -> Dict[str, http.cookies.Morsel]:

558 """A dictionary of ``http.cookies.Morsel`` objects."""

559 if not hasattr(self, "_cookies"):

560 self._cookies = (

561 http.cookies.SimpleCookie()

562 ) # type: http.cookies.SimpleCookie

563 if "Cookie" in self.headers:

564 try:

565 parsed = parse_cookie(self.headers["Cookie"])

566 except Exception:

567 pass

568 else:

569 for k, v in parsed.items():

570 try:

571 self._cookies[k] = v

572 except Exception:

573 # SimpleCookie imposes some restrictions on keys;

574 # parse_cookie does not. Discard any cookies

575 # with disallowed keys.

576 pass

577 return self._cookies

578

579 def full_url(self) -> str:

580 """Reconstructs the full URL for this request."""

581 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]

582

583 def request_time(self) -> float:

584 """Returns the amount of time it took for this request to execute."""

585 if self._finish_time is None:

586 return time.time() - self._start_time

587 else:

588 return self._finish_time - self._start_time

589

590 def get_ssl_certificate(

591 self, binary_form: bool = False

592 ) -> Union[None, Dict, bytes]:

593 """Returns the client's SSL certificate, if any.

594

595 To use client certificates, the HTTPServer's

596 `ssl.SSLContext.verify_mode` field must be set, e.g.::

597

598 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)

599 ssl_ctx.load_cert_chain("foo.crt", "foo.key")

600 ssl_ctx.load_verify_locations("cacerts.pem")

601 ssl_ctx.verify_mode = ssl.CERT_REQUIRED

602 server = HTTPServer(app, ssl_options=ssl_ctx)

603

604 By default, the return value is a dictionary (or None, if no

605 client certificate is present). If ``binary_form`` is true, a

606 DER-encoded form of the certificate is returned instead. See

607 SSLSocket.getpeercert() in the standard library for more

608 details.

609 http://docs.python.org/library/ssl.html#sslsocket-objects

610 """

611 try:

612 if self.connection is None:

613 return None

614 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2

615 return self.connection.stream.socket.getpeercert( # type: ignore

616 binary_form=binary_form

617 )

618 except SSLError:

619 return None

620

621 def _parse_body(self) -> None:

622 parse_body_arguments(

623 self.headers.get("Content-Type", ""),

624 self.body,

625 self.body_arguments,

626 self.files,

627 self.headers,

628 )

629

630 for k, v in self.body_arguments.items():

631 self.arguments.setdefault(k, []).extend(v)

632

633 def __repr__(self) -> str:

634 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")

635 args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])

636 return f"{self.__class__.__name__}({args})"

637

638

639class HTTPInputError(Exception):

640 """Exception class for malformed HTTP requests or responses

641 from remote sources.

642

643 .. versionadded:: 4.0

644 """

645

646 pass

647

648

649class HTTPOutputError(Exception):

650 """Exception class for errors in HTTP output.

651

652 .. versionadded:: 4.0

653 """

654

655 pass

656

657

658class HTTPServerConnectionDelegate:

659 """Implement this interface to handle requests from `.HTTPServer`.

660

661 .. versionadded:: 4.0

662 """

663

664 def start_request(

665 self, server_conn: object, request_conn: "HTTPConnection"

666 ) -> "HTTPMessageDelegate":

667 """This method is called by the server when a new request has started.

668

669 :arg server_conn: is an opaque object representing the long-lived

670 (e.g. tcp-level) connection.

671 :arg request_conn: is a `.HTTPConnection` object for a single

672 request/response exchange.

673

674 This method should return a `.HTTPMessageDelegate`.

675 """

676 raise NotImplementedError()

677

678 def on_close(self, server_conn: object) -> None:

679 """This method is called when a connection has been closed.

680

681 :arg server_conn: is a server connection that has previously been

682 passed to ``start_request``.

683 """

684 pass

685

686

687class HTTPMessageDelegate:

688 """Implement this interface to handle an HTTP request or response.

689

690 .. versionadded:: 4.0

691 """

692

693 # TODO: genericize this class to avoid exposing the Union.

694 def headers_received(

695 self,

696 start_line: Union["RequestStartLine", "ResponseStartLine"],

697 headers: HTTPHeaders,

698 ) -> Optional[Awaitable[None]]:

699 """Called when the HTTP headers have been received and parsed.

700

701 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`

702 depending on whether this is a client or server message.

703 :arg headers: a `.HTTPHeaders` instance.

704

705 Some `.HTTPConnection` methods can only be called during

706 ``headers_received``.

707

708 May return a `.Future`; if it does the body will not be read

709 until it is done.

710 """

711 pass

712

713 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:

714 """Called when a chunk of data has been received.

715

716 May return a `.Future` for flow control.

717 """

718 pass

719

720 def finish(self) -> None:

721 """Called after the last chunk of data has been received."""

722 pass

723

724 def on_connection_close(self) -> None:

725 """Called if the connection is closed without finishing the request.

726

727 If ``headers_received`` is called, either ``finish`` or

728 ``on_connection_close`` will be called, but not both.

729 """

730 pass

731

732

733class HTTPConnection:

734 """Applications use this interface to write their responses.

735

736 .. versionadded:: 4.0

737 """

738

739 def write_headers(

740 self,

741 start_line: Union["RequestStartLine", "ResponseStartLine"],

742 headers: HTTPHeaders,

743 chunk: Optional[bytes] = None,

744 ) -> "Future[None]":

745 """Write an HTTP header block.

746

747 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.

748 :arg headers: a `.HTTPHeaders` instance.

749 :arg chunk: the first (optional) chunk of data. This is an optimization

750 so that small responses can be written in the same call as their

751 headers.

752

753 The ``version`` field of ``start_line`` is ignored.

754

755 Returns a future for flow control.

756

757 .. versionchanged:: 6.0

758

759 The ``callback`` argument was removed.

760 """

761 raise NotImplementedError()

762

763 def write(self, chunk: bytes) -> "Future[None]":

764 """Writes a chunk of body data.

765

766 Returns a future for flow control.

767

768 .. versionchanged:: 6.0

769

770 The ``callback`` argument was removed.

771 """

772 raise NotImplementedError()

773

774 def finish(self) -> None:

775 """Indicates that the last body data has been written."""

776 raise NotImplementedError()

777

778

779def url_concat(

780 url: str,

781 args: Union[

782 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]

783 ],

784) -> str:

785 """Concatenate url and arguments regardless of whether

786 url has existing query parameters.

787

788 ``args`` may be either a dictionary or a list of key-value pairs

789 (the latter allows for multiple values with the same key.

790

791 >>> url_concat("http://example.com/foo", dict(c="d"))

792 'http://example.com/foo?c=d'

793 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))

794 'http://example.com/foo?a=b&c=d'

795 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])

796 'http://example.com/foo?a=b&c=d&c=d2'

797 """

798 if args is None:

799 return url

800 parsed_url = urlparse(url)

801 if isinstance(args, dict):

802 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

803 parsed_query.extend(args.items())

804 elif isinstance(args, list) or isinstance(args, tuple):

805 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

806 parsed_query.extend(args)

807 else:

808 err = "'args' parameter should be dict, list or tuple. Not {0}".format(

809 type(args)

810 )

811 raise TypeError(err)

812 final_query = urlencode(parsed_query)

813 url = urlunparse(

814 (

815 parsed_url[0],

816 parsed_url[1],

817 parsed_url[2],

818 parsed_url[3],

819 final_query,

820 parsed_url[5],

821 )

822 )

823 return url

824

825

826class HTTPFile(ObjectDict):

827 """Represents a file uploaded via a form.

828

829 For backwards compatibility, its instance attributes are also

830 accessible as dictionary keys.

831

832 * ``filename``

833 * ``body``

834 * ``content_type``

835 """

836

837 filename: str

838 body: bytes

839 content_type: str

840

841

842def _parse_request_range(

843 range_header: str,

844) -> Optional[Tuple[Optional[int], Optional[int]]]:

845 """Parses a Range header.

846

847 Returns either ``None`` or tuple ``(start, end)``.

848 Note that while the HTTP headers use inclusive byte positions,

849 this method returns indexes suitable for use in slices.

850

851 >>> start, end = _parse_request_range("bytes=1-2")

852 >>> start, end

853 (1, 3)

854 >>> [0, 1, 2, 3, 4][start:end]

855 [1, 2]

856 >>> _parse_request_range("bytes=6-")

857 (6, None)

858 >>> _parse_request_range("bytes=-6")

859 (-6, None)

860 >>> _parse_request_range("bytes=-0")

861 (None, 0)

862 >>> _parse_request_range("bytes=")

863 (None, None)

864 >>> _parse_request_range("foo=42")

865 >>> _parse_request_range("bytes=1-2,6-10")

866

867 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).

868

869 See [0] for the details of the range header.

870

871 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges

872 """

873 unit, _, value = range_header.partition("=")

874 unit, value = unit.strip(), value.strip()

875 if unit != "bytes":

876 return None

877 start_b, _, end_b = value.partition("-")

878 try:

879 start = _int_or_none(start_b)

880 end = _int_or_none(end_b)

881 except ValueError:

882 return None

883 if end is not None:

884 if start is None:

885 if end != 0:

886 start = -end

887 end = None

888 else:

889 end += 1

890 return (start, end)

891

892

893def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:

894 """Returns a suitable Content-Range header:

895

896 >>> print(_get_content_range(None, 1, 4))

897 bytes 0-0/4

898 >>> print(_get_content_range(1, 3, 4))

899 bytes 1-2/4

900 >>> print(_get_content_range(None, None, 4))

901 bytes 0-3/4

902 """

903 start = start or 0

904 end = (end or total) - 1

905 return f"bytes {start}-{end}/{total}"

906

907

908def _int_or_none(val: str) -> Optional[int]:

909 val = val.strip()

910 if val == "":

911 return None

912 return int(val)

913

914

915def parse_body_arguments(

916 content_type: str,

917 body: bytes,

918 arguments: Dict[str, List[bytes]],

919 files: Dict[str, List[HTTPFile]],

920 headers: Optional[HTTPHeaders] = None,

921) -> None:

922 """Parses a form request body.

923

924 Supports ``application/x-www-form-urlencoded`` and

925 ``multipart/form-data``. The ``content_type`` parameter should be

926 a string and ``body`` should be a byte string. The ``arguments``

927 and ``files`` parameters are dictionaries that will be updated

928 with the parsed contents.

929 """

930 if content_type.startswith("application/x-www-form-urlencoded"):

931 if headers and "Content-Encoding" in headers:

932 raise HTTPInputError(

933 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]

934 )

935 try:

936 # real charset decoding will happen in RequestHandler.decode_argument()

937 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)

938 except Exception as e:

939 raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e

940 for name, values in uri_arguments.items():

941 if values:

942 arguments.setdefault(name, []).extend(values)

943 elif content_type.startswith("multipart/form-data"):

944 if headers and "Content-Encoding" in headers:

945 raise HTTPInputError(

946 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]

947 )

948 try:

949 fields = content_type.split(";")

950 for field in fields:

951 k, sep, v = field.strip().partition("=")

952 if k == "boundary" and v:

953 parse_multipart_form_data(utf8(v), body, arguments, files)

954 break

955 else:

956 raise HTTPInputError("multipart boundary not found")

957 except Exception as e:

958 raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e

959

960

961def parse_multipart_form_data(

962 boundary: bytes,

963 data: bytes,

964 arguments: Dict[str, List[bytes]],

965 files: Dict[str, List[HTTPFile]],

966) -> None:

967 """Parses a ``multipart/form-data`` body.

968

969 The ``boundary`` and ``data`` parameters are both byte strings.

970 The dictionaries given in the arguments and files parameters

971 will be updated with the contents of the body.

972

973 .. versionchanged:: 5.1

974

975 Now recognizes non-ASCII filenames in RFC 2231/5987

976 (``filename*=``) format.

977 """

978 # The standard allows for the boundary to be quoted in the header,

979 # although it's rare (it happens at least for google app engine

980 # xmpp). I think we're also supposed to handle backslash-escapes

981 # here but I'll save that until we see a client that uses them

982 # in the wild.

983 if boundary.startswith(b'"') and boundary.endswith(b'"'):

984 boundary = boundary[1:-1]

985 final_boundary_index = data.rfind(b"--" + boundary + b"--")

986 if final_boundary_index == -1:

987 raise HTTPInputError("Invalid multipart/form-data: no final boundary found")

988 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")

989 for part in parts:

990 if not part:

991 continue

992 eoh = part.find(b"\r\n\r\n")

993 if eoh == -1:

994 raise HTTPInputError("multipart/form-data missing headers")

995 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)

996 disp_header = headers.get("Content-Disposition", "")

997 disposition, disp_params = _parse_header(disp_header)

998 if disposition != "form-data" or not part.endswith(b"\r\n"):

999 raise HTTPInputError("Invalid multipart/form-data")

1000 value = part[eoh + 4 : -2]

1001 if not disp_params.get("name"):

1002 raise HTTPInputError("multipart/form-data missing name")

1003 name = disp_params["name"]

1004 if disp_params.get("filename"):

1005 ctype = headers.get("Content-Type", "application/unknown")

1006 files.setdefault(name, []).append(

1007 HTTPFile(

1008 filename=disp_params["filename"], body=value, content_type=ctype

1009 )

1010 )

1011 else:

1012 arguments.setdefault(name, []).append(value)

1013

1014

1015def format_timestamp(

1016 ts: Union[int, float, tuple, time.struct_time, datetime.datetime],

1017) -> str:

1018 """Formats a timestamp in the format used by HTTP.

1019

1020 The argument may be a numeric timestamp as returned by `time.time`,

1021 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`

1022 object. Naive `datetime.datetime` objects are assumed to represent

1023 UTC; aware objects are converted to UTC before formatting.

1024

1025 >>> format_timestamp(1359312200)

1026 'Sun, 27 Jan 2013 18:43:20 GMT'

1027 """

1028 if isinstance(ts, (int, float)):

1029 time_num = ts

1030 elif isinstance(ts, (tuple, time.struct_time)):

1031 time_num = calendar.timegm(ts)

1032 elif isinstance(ts, datetime.datetime):

1033 time_num = calendar.timegm(ts.utctimetuple())

1034 else:

1035 raise TypeError("unknown timestamp type: %r" % ts)

1036 return email.utils.formatdate(time_num, usegmt=True)

1037

1038

1039class RequestStartLine(typing.NamedTuple):

1040 method: str

1041 path: str

1042 version: str

1043

1044

1045def parse_request_start_line(line: str) -> RequestStartLine:

1046 """Returns a (method, path, version) tuple for an HTTP 1.x request line.

1047

1048 The response is a `typing.NamedTuple`.

1049

1050 >>> parse_request_start_line("GET /foo HTTP/1.1")

1051 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')

1052 """

1053 match = _ABNF.request_line.fullmatch(line)

1054 if not match:

1055 # https://tools.ietf.org/html/rfc7230#section-3.1.1

1056 # invalid request-line SHOULD respond with a 400 (Bad Request)

1057 raise HTTPInputError("Malformed HTTP request line")

1058 r = RequestStartLine(match.group(1), match.group(2), match.group(3))

1059 if not r.version.startswith("HTTP/1"):

1060 # HTTP/2 and above doesn't use parse_request_start_line.

1061 # This could be folded into the regex but we don't want to deviate

1062 # from the ABNF in the RFCs.

1063 raise HTTPInputError("Unexpected HTTP version %r" % r.version)

1064 return r

1065

1066

1067class ResponseStartLine(typing.NamedTuple):

1068 version: str

1069 code: int

1070 reason: str

1071

1072

1073def parse_response_start_line(line: str) -> ResponseStartLine:

1074 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.

1075

1076 The response is a `typing.NamedTuple`.

1077

1078 >>> parse_response_start_line("HTTP/1.1 200 OK")

1079 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')

1080 """

1081 match = _ABNF.status_line.fullmatch(line)

1082 if not match:

1083 raise HTTPInputError("Error parsing response start line")

1084 r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))

1085 if not r.version.startswith("HTTP/1"):

1086 # HTTP/2 and above doesn't use parse_response_start_line.

1087 raise HTTPInputError("Unexpected HTTP version %r" % r.version)

1088 return r

1089

1090

1091# _parseparam and _parse_header are copied and modified from python2.7's cgi.py

1092# The original 2.7 version of this code did not correctly support some

1093# combinations of semicolons and double quotes.

1094# It has also been modified to support valueless parameters as seen in

1095# websocket extension negotiations, and to support non-ascii values in

1096# RFC 2231/5987 format.

1097#

1098# _parseparam has been further modified with the logic from

1099# https://github.com/python/cpython/pull/136072/files

1100# to avoid quadratic behavior when parsing semicolons in quoted strings.

1101#

1102# TODO: See if we can switch to email.message.Message for this functionality.

1103# This is the suggested replacement for the cgi.py module now that cgi has

1104# been removed from recent versions of Python. We need to verify that

1105# the email module is consistent with our existing behavior (and all relevant

1106# RFCs for multipart/form-data) before making this change.

1107

1108

1109def _parseparam(s: str) -> Generator[str, None, None]:

1110 start = 0

1111 while s.find(";", start) == start:

1112 start += 1

1113 end = s.find(";", start)

1114 ind, diff = start, 0

1115 while end > 0:

1116 diff += s.count('"', ind, end) - s.count('\\"', ind, end)

1117 if diff % 2 == 0:

1118 break

1119 end, ind = ind, s.find(";", end + 1)

1120 if end < 0:

1121 end = len(s)

1122 f = s[start:end]

1123 yield f.strip()

1124 start = end

1125

1126

1127def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:

1128 r"""Parse a Content-type like header.

1129

1130 Return the main content-type and a dictionary of options.

1131

1132 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"

1133 >>> ct, d = _parse_header(d)

1134 >>> ct

1135 'form-data'

1136 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')

1137 True

1138 >>> d['foo']

1139 'b\\a"r'

1140 """

1141 parts = _parseparam(";" + line)

1142 key = next(parts)

1143 # decode_params treats first argument special, but we already stripped key

1144 params = [("Dummy", "value")]

1145 for p in parts:

1146 i = p.find("=")

1147 if i >= 0:

1148 name = p[:i].strip().lower()

1149 value = p[i + 1 :].strip()

1150 params.append((name, native_str(value)))

1151 decoded_params = email.utils.decode_params(params)

1152 decoded_params.pop(0) # get rid of the dummy again

1153 pdict = {}

1154 for name, decoded_value in decoded_params:

1155 value = email.utils.collapse_rfc2231_value(decoded_value)

1156 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':

1157 value = value[1:-1]

1158 pdict[name] = value

1159 return key, pdict

1160

1161

1162def _encode_header(key: str, pdict: Dict[str, str]) -> str:

1163 """Inverse of _parse_header.

1164

1165 >>> _encode_header('permessage-deflate',

1166 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})

1167 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'

1168 """

1169 if not pdict:

1170 return key

1171 out = [key]

1172 # Sort the parameters just to make it easy to test.

1173 for k, v in sorted(pdict.items()):

1174 if v is None:

1175 out.append(k)

1176 else:

1177 # TODO: quote if necessary.

1178 out.append(f"{k}={v}")

1179 return "; ".join(out)

1180

1181

1182def encode_username_password(

1183 username: Union[str, bytes], password: Union[str, bytes]

1184) -> bytes:

1185 """Encodes a username/password pair in the format used by HTTP auth.

1186

1187 The return value is a byte string in the form ``username:password``.

1188

1189 .. versionadded:: 5.1

1190 """

1191 if isinstance(username, unicode_type):

1192 username = unicodedata.normalize("NFC", username)

1193 if isinstance(password, unicode_type):

1194 password = unicodedata.normalize("NFC", password)

1195 return utf8(username) + b":" + utf8(password)

1196

1197

1198def doctests():

1199 # type: () -> unittest.TestSuite

1200 import doctest

1201

1202 return doctest.DocTestSuite()

1203

1204

1205_netloc_re = re.compile(r"^(.+):(\d+)$")

1206

1207

1208def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:

1209 """Returns ``(host, port)`` tuple from ``netloc``.

1210

1211 Returned ``port`` will be ``None`` if not present.

1212

1213 .. versionadded:: 4.1

1214 """

1215 match = _netloc_re.match(netloc)

1216 if match:

1217 host = match.group(1)

1218 port = int(match.group(2)) # type: Optional[int]

1219 else:

1220 host = netloc

1221 port = None

1222 return (host, port)

1223

1224

1225def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:

1226 """Generator converting a result of ``parse_qs`` back to name-value pairs.

1227

1228 .. versionadded:: 5.0

1229 """

1230 for k, vs in qs.items():

1231 for v in vs:

1232 yield (k, v)

1233

1234

1235_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub

1236

1237

1238def _unquote_replace(m: re.Match) -> str:

1239 if m[1]:

1240 return chr(int(m[1], 8))

1241 else:

1242 return m[2]

1243

1244

1245def _unquote_cookie(s: str) -> str:

1246 """Handle double quotes and escaping in cookie values.

1247

1248 This method is copied verbatim from the Python 3.13 standard

1249 library (http.cookies._unquote) so we don't have to depend on

1250 non-public interfaces.

1251 """

1252 # If there aren't any doublequotes,

1253 # then there can't be any special characters. See RFC 2109.

1254 if s is None or len(s) < 2:

1255 return s

1256 if s[0] != '"' or s[-1] != '"':

1257 return s

1258

1259 # We have to assume that we must decode this string.

1260 # Down to work.

1261

1262 # Remove the "s

1263 s = s[1:-1]

1264

1265 # Check for special sequences. Examples:

1266 # \012 --> \n

1267 # \" --> "

1268 #

1269 return _unquote_sub(_unquote_replace, s)

1270

1271

1272def parse_cookie(cookie: str) -> Dict[str, str]:

1273 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.

1274

1275 This function attempts to mimic browser cookie parsing behavior;

1276 it specifically does not follow any of the cookie-related RFCs

1277 (because browsers don't either).

1278

1279 The algorithm used is identical to that used by Django version 1.9.10.

1280

1281 .. versionadded:: 4.4.2

1282 """

1283 cookiedict = {}

1284 for chunk in cookie.split(";"):

1285 if "=" in chunk:

1286 key, val = chunk.split("=", 1)

1287 else:

1288 # Assume an empty name per

1289 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091

1290 key, val = "", chunk

1291 key, val = key.strip(), val.strip()

1292 if key or val:

1293 # unquote using Python's algorithm.

1294 cookiedict[key] = _unquote_cookie(val)

1295 return cookiedict