Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tornado/httputil.py: 28%

4# Licensed under the Apache License, Version 2.0 (the "License"); you may

5# not use this file except in compliance with the License. You may obtain

6# a copy of the License at

8# http://www.apache.org/licenses/LICENSE-2.0

10# Unless required by applicable law or agreed to in writing, software

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

13# License for the specific language governing permissions and limitations

14# under the License.

16"""HTTP utility code shared by clients and servers.

18This module also defines the `HTTPServerRequest` class which is exposed

19via `tornado.web.RequestHandler.request`.

20"""

22import calendar

23import collections.abc

24import copy

25import datetime

26import email.utils

27from functools import lru_cache

28from http.client import responses

29import http.cookies

30import re

31from ssl import SSLError

32import time

33import unicodedata

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl

36from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode

37from tornado.util import ObjectDict, unicode_type

40# responses is unused in this file, but we re-export it to other files.

41# Reference it so pyflakes doesn't complain.

42responses

44import typing

45from typing import (

46 Tuple,

47 Iterable,

48 List,

49 Mapping,

50 Iterator,

51 Dict,

52 Union,

53 Optional,

54 Awaitable,

55 Generator,

56 AnyStr,

57)

59if typing.TYPE_CHECKING:

60 from typing import Deque # noqa: F401

61 from asyncio import Future # noqa: F401

62 import unittest # noqa: F401

64 # This can be done unconditionally in the base class of HTTPHeaders

65 # after we drop support for Python 3.8.

66 StrMutableMapping = collections.abc.MutableMapping[str, str]

67else:

68 StrMutableMapping = collections.abc.MutableMapping

70# To be used with str.strip() and related methods.

71HTTP_WHITESPACE = " \t"

73# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits

74# chars greater than \xFF (which may appear after decoding utf8).

75_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]")

78class _ABNF:

79 """Class that holds a subset of ABNF rules from RFC 9110 and friends.

81 Class attributes are re.Pattern objects, with the same name as in the RFC

82 (with hyphens changed to underscores). Currently contains only the subset

83 we use (which is why this class is not public). Unfortunately the fields

84 cannot be alphabetized as they are in the RFCs because of dependencies.

85 """

87 # RFC 3986 (URI)

88 # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6

89 # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though

90 # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any

91 # position, not only for their use in IPv6 literals.

92 uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")

93 uri_sub_delims = re.compile(r"[!$&'()*+,;=]")

94 uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")

95 uri_host = re.compile(

96 rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"

97 )

98 uri_port = re.compile(r"[0-9]*")

100 # RFC 5234 (ABNF)

101 VCHAR = re.compile(r"[\x21-\x7E]")

102

103 # RFC 9110 (HTTP Semantics)

104 obs_text = re.compile(r"[\x80-\xFF]")

105 field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")

106 # Not exactly from the RFC to simplify and combine field-content and field-value.

107 field_value = re.compile(

108 rf"|"

109 rf"{field_vchar.pattern}|"

110 rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"

111 )

112 tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")

113 token = re.compile(rf"{tchar.pattern}+")

114 field_name = token

115 method = token

116 host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")

117

118 # RFC 9112 (HTTP/1.1)

119 HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")

120 reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")

121 # request_target delegates to the URI RFC 3986, which is complex and may be

122 # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII

123 # characters). Instead, we allow everything but control chars and whitespace.

124 request_target = re.compile(rf"{field_vchar.pattern}+")

125 request_line = re.compile(

126 rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"

127 )

128 status_code = re.compile(r"[0-9]{3}")

129 status_line = re.compile(

130 rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"

131 )

132

133

134@lru_cache(1000)

135def _normalize_header(name: str) -> str:

136 """Map a header name to Http-Header-Case.

137

138 >>> _normalize_header("coNtent-TYPE")

139 'Content-Type'

140 """

141 return "-".join([w.capitalize() for w in name.split("-")])

142

143

144class HTTPHeaders(StrMutableMapping):

145 """A dictionary that maintains ``Http-Header-Case`` for all keys.

146

147 Supports multiple values per key via a pair of new methods,

148 `add()` and `get_list()`. The regular dictionary interface

149 returns a single value per key, with multiple values joined by a

150 comma.

151

152 >>> h = HTTPHeaders({"content-type": "text/html"})

153 >>> list(h.keys())

154 ['Content-Type']

155 >>> h["Content-Type"]

156 'text/html'

157

158 >>> h.add("Set-Cookie", "A=B")

159 >>> h.add("Set-Cookie", "C=D")

160 >>> h["set-cookie"]

161 'A=B,C=D'

162 >>> h.get_list("set-cookie")

163 ['A=B', 'C=D']

164

165 >>> for (k,v) in sorted(h.get_all()):

166 ... print('%s: %s' % (k,v))

167 ...

168 Content-Type: text/html

169 Set-Cookie: A=B

170 Set-Cookie: C=D

171 """

172

173 @typing.overload

174 def __init__(self, __arg: Mapping[str, List[str]]) -> None:

175 pass

176

177 @typing.overload # noqa: F811

178 def __init__(self, __arg: Mapping[str, str]) -> None:

179 pass

180

181 @typing.overload # noqa: F811

182 def __init__(self, *args: Tuple[str, str]) -> None:

183 pass

184

185 @typing.overload # noqa: F811

186 def __init__(self, **kwargs: str) -> None:

187 pass

188

189 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811

190 # Formally, HTTP headers are a mapping from a field name to a "combined field value",

191 # which may be constructed from multiple field lines by joining them with commas.

192 # In practice, however, some headers (notably Set-Cookie) do not follow this convention,

193 # so we maintain a mapping from field name to a list of field lines in self._as_list.

194 # self._combined_cache is a cache of the combined field values derived from self._as_list

195 # on demand (and cleared whenever the list is modified).

196 self._as_list: dict[str, list[str]] = {}

197 self._combined_cache: dict[str, str] = {}

198 self._last_key = None # type: Optional[str]

199 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):

200 # Copy constructor

201 for k, v in args[0].get_all():

202 self.add(k, v)

203 else:

204 # Dict-style initialization

205 self.update(*args, **kwargs)

206

207 # new public methods

208

209 def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:

210 """Adds a new value for the given key."""

211 if not _ABNF.field_name.fullmatch(name):

212 raise HTTPInputError("Invalid header name %r" % name)

213 if _chars_are_bytes:

214 if not _ABNF.field_value.fullmatch(to_unicode(value)):

215 # TODO: the fact we still support bytes here (contrary to type annotations)

216 # and still test for it should probably be changed.

217 raise HTTPInputError("Invalid header value %r" % value)

218 else:

219 if _FORBIDDEN_HEADER_CHARS_RE.search(value):

220 raise HTTPInputError("Invalid header value %r" % value)

221 norm_name = _normalize_header(name)

222 self._last_key = norm_name

223 if norm_name in self:

224 self._combined_cache.pop(norm_name, None)

225 self._as_list[norm_name].append(value)

226 else:

227 self[norm_name] = value

228

229 def get_list(self, name: str) -> List[str]:

230 """Returns all values for the given header as a list."""

231 norm_name = _normalize_header(name)

232 return self._as_list.get(norm_name, [])

233

234 def get_all(self) -> Iterable[Tuple[str, str]]:

235 """Returns an iterable of all (name, value) pairs.

236

237 If a header has multiple values, multiple pairs will be

238 returned with the same name.

239 """

240 for name, values in self._as_list.items():

241 for value in values:

242 yield (name, value)

243

244 def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:

245 r"""Updates the dictionary with a single header line.

246

247 >>> h = HTTPHeaders()

248 >>> h.parse_line("Content-Type: text/html")

249 >>> h.get('content-type')

250 'text/html'

251 >>> h.parse_line("Content-Length: 42\r\n")

252 >>> h.get('content-type')

253 'text/html'

254

255 .. versionchanged:: 6.5

256 Now supports lines with or without the trailing CRLF, making it possible

257 to pass lines from AsyncHTTPClient's header_callback directly to this method.

258

259 .. deprecated:: 6.5

260 In Tornado 7.0, certain deprecated features of HTTP will become errors.

261 Specifically, line folding and the use of LF (with CR) as a line separator

262 will be removed.

263 """

264 if m := re.search(r"\r?\n$", line):

265 # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line

266 # terminator and ignore any preceding CR.

267 # TODO(7.0): Remove this support for LF-only line endings.

268 line = line[: m.start()]

269 if not line:

270 # Empty line, or the final CRLF of a header block.

271 return

272 if line[0] in HTTP_WHITESPACE:

273 # continuation of a multi-line header

274 # TODO(7.0): Remove support for line folding.

275 if self._last_key is None:

276 raise HTTPInputError("first header line cannot start with whitespace")

277 new_part = " " + line.strip(HTTP_WHITESPACE)

278 if _chars_are_bytes:

279 if not _ABNF.field_value.fullmatch(new_part[1:]):

280 raise HTTPInputError("Invalid header continuation %r" % new_part)

281 else:

282 if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):

283 raise HTTPInputError("Invalid header value %r" % new_part)

284 self._as_list[self._last_key][-1] += new_part

285 self._combined_cache.pop(self._last_key, None)

286 else:

287 try:

288 name, value = line.split(":", 1)

289 except ValueError:

290 raise HTTPInputError("no colon in header line")

291 self.add(

292 name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes

293 )

294

295 @classmethod

296 def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":

297 """Returns a dictionary from HTTP header text.

298

299 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")

300 >>> sorted(h.items())

301 [('Content-Length', '42'), ('Content-Type', 'text/html')]

302

303 .. versionchanged:: 5.1

304

305 Raises `HTTPInputError` on malformed headers instead of a

306 mix of `KeyError`, and `ValueError`.

307

308 """

309 # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which

310 # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which

311 # they are to be interpreted as utf-8). For historical reasons, this method handled this by

312 # expecting both callers to decode the headers to strings before parsing them. This wasn't a

313 # problem until we started doing stricter validation of the characters allowed in HTTP

314 # headers (using ABNF rules defined in terms of byte values), which inadvertently started

315 # disallowing non-latin1 characters in multipart/form-data filenames.

316 #

317 # This method should have accepted bytes and a desired encoding, but this change is being

318 # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes

319 # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes

320 # smuggled through the latin1 encoding) or to accept any non-control unicode characters

321 # as required by multipart/form-data. This method will change to accept bytes in a future

322 # release.

323 h = cls()

324

325 start = 0

326 while True:

327 lf = headers.find("\n", start)

328 if lf == -1:

329 h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)

330 break

331 line = headers[start : lf + 1]

332 start = lf + 1

333 h.parse_line(line, _chars_are_bytes=_chars_are_bytes)

334 return h

335

336 # MutableMapping abstract method implementations.

337

338 def __setitem__(self, name: str, value: str) -> None:

339 norm_name = _normalize_header(name)

340 self._combined_cache[norm_name] = value

341 self._as_list[norm_name] = [value]

342

343 def __contains__(self, name: object) -> bool:

344 # This is an important optimization to avoid the expensive concatenation

345 # in __getitem__ when it's not needed.

346 if not isinstance(name, str):

347 return False

348 norm_name = _normalize_header(name)

349 return norm_name in self._as_list

350

351 def __getitem__(self, name: str) -> str:

352 header = _normalize_header(name)

353 if header not in self._combined_cache:

354 self._combined_cache[header] = ",".join(self._as_list[header])

355 return self._combined_cache[header]

356

357 def __delitem__(self, name: str) -> None:

358 norm_name = _normalize_header(name)

359 del self._combined_cache[norm_name]

360 del self._as_list[norm_name]

361

362 def __len__(self) -> int:

363 return len(self._as_list)

364

365 def __iter__(self) -> Iterator[typing.Any]:

366 return iter(self._as_list)

367

368 def copy(self) -> "HTTPHeaders":

369 # defined in dict but not in MutableMapping.

370 return HTTPHeaders(self)

371

372 # Use our overridden copy method for the copy.copy module.

373 # This makes shallow copies one level deeper, but preserves

374 # the appearance that HTTPHeaders is a single container.

375 __copy__ = copy

376

377 def __str__(self) -> str:

378 lines = []

379 for name, value in self.get_all():

380 lines.append(f"{name}: {value}\n")

381 return "".join(lines)

382

383 __unicode__ = __str__

384

385

386class HTTPServerRequest:

387 """A single HTTP request.

388

389 All attributes are type `str` unless otherwise noted.

390

391 .. attribute:: method

392

393 HTTP request method, e.g. "GET" or "POST"

394

395 .. attribute:: uri

396

397 The requested uri.

398

399 .. attribute:: path

400

401 The path portion of `uri`

402

403 .. attribute:: query

404

405 The query portion of `uri`

406

407 .. attribute:: version

408

409 HTTP version specified in request, e.g. "HTTP/1.1"

410

411 .. attribute:: headers

412

413 `.HTTPHeaders` dictionary-like object for request headers. Acts like

414 a case-insensitive dictionary with additional methods for repeated

415 headers.

416

417 .. attribute:: body

418

419 Request body, if present, as a byte string.

420

421 .. attribute:: remote_ip

422

423 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,

424 will pass along the real IP address provided by a load balancer

425 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.

426

427 .. versionchanged:: 3.1

428 The list format of ``X-Forwarded-For`` is now supported.

429

430 .. attribute:: protocol

431

432 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``

433 is set, will pass along the protocol used by a load balancer if

434 reported via an ``X-Scheme`` header.

435

436 .. attribute:: host

437

438 The requested hostname, usually taken from the ``Host`` header.

439

440 .. attribute:: arguments

441

442 GET/POST arguments are available in the arguments property, which

443 maps arguments names to lists of values (to support multiple values

444 for individual names). Names are of type `str`, while arguments

445 are byte strings. Note that this is different from

446 `.RequestHandler.get_argument`, which returns argument values as

447 unicode strings.

448

449 .. attribute:: query_arguments

450

451 Same format as ``arguments``, but contains only arguments extracted

452 from the query string.

453

454 .. versionadded:: 3.2

455

456 .. attribute:: body_arguments

457

458 Same format as ``arguments``, but contains only arguments extracted

459 from the request body.

460

461 .. versionadded:: 3.2

462

463 .. attribute:: files

464

465 File uploads are available in the files property, which maps file

466 names to lists of `.HTTPFile`.

467

468 .. attribute:: connection

469

470 An HTTP request is attached to a single HTTP connection, which can

471 be accessed through the "connection" attribute. Since connections

472 are typically kept open in HTTP/1.1, multiple requests can be handled

473 sequentially on a single connection.

474

475 .. versionchanged:: 4.0

476 Moved from ``tornado.httpserver.HTTPRequest``.

477

478 .. deprecated:: 6.5.2

479 The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use

480 ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and

481 temporarily restored in 6.5.2.

482 """

483

484 path = None # type: str

485 query = None # type: str

486

487 # HACK: Used for stream_request_body

488 _body_future = None # type: Future[None]

489

490 def __init__(

491 self,

492 method: Optional[str] = None,

493 uri: Optional[str] = None,

494 version: str = "HTTP/1.0",

495 headers: Optional[HTTPHeaders] = None,

496 body: Optional[bytes] = None,

497 host: Optional[str] = None,

498 files: Optional[Dict[str, List["HTTPFile"]]] = None,

499 connection: Optional["HTTPConnection"] = None,

500 start_line: Optional["RequestStartLine"] = None,

501 server_connection: Optional[object] = None,

502 ) -> None:

503 if start_line is not None:

504 method, uri, version = start_line

505 self.method = method

506 self.uri = uri

507 self.version = version

508 self.headers = headers or HTTPHeaders()

509 self.body = body or b""

510

511 # set remote IP and protocol

512 context = getattr(connection, "context", None)

513 self.remote_ip = getattr(context, "remote_ip", None)

514 self.protocol = getattr(context, "protocol", "http")

515

516 try:

517 self.host = host or self.headers["Host"]

518 except KeyError:

519 if version == "HTTP/1.0":

520 # HTTP/1.0 does not require the Host header.

521 self.host = "127.0.0.1"

522 else:

523 raise HTTPInputError("Missing Host header")

524 if not _ABNF.host.fullmatch(self.host):

525 raise HTTPInputError("Invalid Host header: %r" % self.host)

526 if "," in self.host:

527 # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target

528 # Server MUST respond with 400 Bad Request if multiple

529 # Host headers are present.

530 #

531 # We test for the presence of a comma instead of the number of

532 # headers received because a proxy may have converted

533 # multiple headers into a single comma-separated value

534 # (per RFC 9110 section 5.3).

535 #

536 # This is technically a departure from the RFC since the ABNF

537 # does not forbid commas in the host header. However, since

538 # commas are not allowed in DNS names, it is appropriate to

539 # disallow them. (The same argument could be made for other special

540 # characters, but commas are the most problematic since they could

541 # be used to exploit differences between proxies when multiple headers

542 # are supplied).

543 raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)

544 self.host_name = split_host_and_port(self.host.lower())[0]

545 self.files = files or {}

546 self.connection = connection

547 self.server_connection = server_connection

548 self._start_time = time.time()

549 self._finish_time = None

550

551 if uri is not None:

552 self.path, sep, self.query = uri.partition("?")

553 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)

554 self.query_arguments = copy.deepcopy(self.arguments)

555 self.body_arguments = {} # type: Dict[str, List[bytes]]

556

557 @property

558 def cookies(self) -> Dict[str, http.cookies.Morsel]:

559 """A dictionary of ``http.cookies.Morsel`` objects."""

560 if not hasattr(self, "_cookies"):

561 self._cookies = (

562 http.cookies.SimpleCookie()

563 ) # type: http.cookies.SimpleCookie

564 if "Cookie" in self.headers:

565 try:

566 parsed = parse_cookie(self.headers["Cookie"])

567 except Exception:

568 pass

569 else:

570 for k, v in parsed.items():

571 try:

572 self._cookies[k] = v

573 except Exception:

574 # SimpleCookie imposes some restrictions on keys;

575 # parse_cookie does not. Discard any cookies

576 # with disallowed keys.

577 pass

578 return self._cookies

579

580 def full_url(self) -> str:

581 """Reconstructs the full URL for this request."""

582 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]

583

584 def request_time(self) -> float:

585 """Returns the amount of time it took for this request to execute."""

586 if self._finish_time is None:

587 return time.time() - self._start_time

588 else:

589 return self._finish_time - self._start_time

590

591 def get_ssl_certificate(

592 self, binary_form: bool = False

593 ) -> Union[None, Dict, bytes]:

594 """Returns the client's SSL certificate, if any.

595

596 To use client certificates, the HTTPServer's

597 `ssl.SSLContext.verify_mode` field must be set, e.g.::

598

599 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)

600 ssl_ctx.load_cert_chain("foo.crt", "foo.key")

601 ssl_ctx.load_verify_locations("cacerts.pem")

602 ssl_ctx.verify_mode = ssl.CERT_REQUIRED

603 server = HTTPServer(app, ssl_options=ssl_ctx)

604

605 By default, the return value is a dictionary (or None, if no

606 client certificate is present). If ``binary_form`` is true, a

607 DER-encoded form of the certificate is returned instead. See

608 SSLSocket.getpeercert() in the standard library for more

609 details.

610 http://docs.python.org/library/ssl.html#sslsocket-objects

611 """

612 try:

613 if self.connection is None:

614 return None

615 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2

616 return self.connection.stream.socket.getpeercert( # type: ignore

617 binary_form=binary_form

618 )

619 except SSLError:

620 return None

621

622 def _parse_body(self) -> None:

623 parse_body_arguments(

624 self.headers.get("Content-Type", ""),

625 self.body,

626 self.body_arguments,

627 self.files,

628 self.headers,

629 )

630

631 for k, v in self.body_arguments.items():

632 self.arguments.setdefault(k, []).extend(v)

633

634 def __repr__(self) -> str:

635 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")

636 args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])

637 return f"{self.__class__.__name__}({args})"

638

639

640class HTTPInputError(Exception):

641 """Exception class for malformed HTTP requests or responses

642 from remote sources.

643

644 .. versionadded:: 4.0

645 """

646

647 pass

648

649

650class HTTPOutputError(Exception):

651 """Exception class for errors in HTTP output.

652

653 .. versionadded:: 4.0

654 """

655

656 pass

657

658

659class HTTPServerConnectionDelegate:

660 """Implement this interface to handle requests from `.HTTPServer`.

661

662 .. versionadded:: 4.0

663 """

664

665 def start_request(

666 self, server_conn: object, request_conn: "HTTPConnection"

667 ) -> "HTTPMessageDelegate":

668 """This method is called by the server when a new request has started.

669

670 :arg server_conn: is an opaque object representing the long-lived

671 (e.g. tcp-level) connection.

672 :arg request_conn: is a `.HTTPConnection` object for a single

673 request/response exchange.

674

675 This method should return a `.HTTPMessageDelegate`.

676 """

677 raise NotImplementedError()

678

679 def on_close(self, server_conn: object) -> None:

680 """This method is called when a connection has been closed.

681

682 :arg server_conn: is a server connection that has previously been

683 passed to ``start_request``.

684 """

685 pass

686

687

688class HTTPMessageDelegate:

689 """Implement this interface to handle an HTTP request or response.

690

691 .. versionadded:: 4.0

692 """

693

694 # TODO: genericize this class to avoid exposing the Union.

695 def headers_received(

696 self,

697 start_line: Union["RequestStartLine", "ResponseStartLine"],

698 headers: HTTPHeaders,

699 ) -> Optional[Awaitable[None]]:

700 """Called when the HTTP headers have been received and parsed.

701

702 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`

703 depending on whether this is a client or server message.

704 :arg headers: a `.HTTPHeaders` instance.

705

706 Some `.HTTPConnection` methods can only be called during

707 ``headers_received``.

708

709 May return a `.Future`; if it does the body will not be read

710 until it is done.

711 """

712 pass

713

714 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:

715 """Called when a chunk of data has been received.

716

717 May return a `.Future` for flow control.

718 """

719 pass

720

721 def finish(self) -> None:

722 """Called after the last chunk of data has been received."""

723 pass

724

725 def on_connection_close(self) -> None:

726 """Called if the connection is closed without finishing the request.

727

728 If ``headers_received`` is called, either ``finish`` or

729 ``on_connection_close`` will be called, but not both.

730 """

731 pass

732

733

734class HTTPConnection:

735 """Applications use this interface to write their responses.

736

737 .. versionadded:: 4.0

738 """

739

740 def write_headers(

741 self,

742 start_line: Union["RequestStartLine", "ResponseStartLine"],

743 headers: HTTPHeaders,

744 chunk: Optional[bytes] = None,

745 ) -> "Future[None]":

746 """Write an HTTP header block.

747

748 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.

749 :arg headers: a `.HTTPHeaders` instance.

750 :arg chunk: the first (optional) chunk of data. This is an optimization

751 so that small responses can be written in the same call as their

752 headers.

753

754 The ``version`` field of ``start_line`` is ignored.

755

756 Returns a future for flow control.

757

758 .. versionchanged:: 6.0

759

760 The ``callback`` argument was removed.

761 """

762 raise NotImplementedError()

763

764 def write(self, chunk: bytes) -> "Future[None]":

765 """Writes a chunk of body data.

766

767 Returns a future for flow control.

768

769 .. versionchanged:: 6.0

770

771 The ``callback`` argument was removed.

772 """

773 raise NotImplementedError()

774

775 def finish(self) -> None:

776 """Indicates that the last body data has been written."""

777 raise NotImplementedError()

778

779

780def url_concat(

781 url: str,

782 args: Union[

783 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]

784 ],

785) -> str:

786 """Concatenate url and arguments regardless of whether

787 url has existing query parameters.

788

789 ``args`` may be either a dictionary or a list of key-value pairs

790 (the latter allows for multiple values with the same key.

791

792 >>> url_concat("http://example.com/foo", dict(c="d"))

793 'http://example.com/foo?c=d'

794 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))

795 'http://example.com/foo?a=b&c=d'

796 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])

797 'http://example.com/foo?a=b&c=d&c=d2'

798 """

799 if args is None:

800 return url

801 parsed_url = urlparse(url)

802 if isinstance(args, dict):

803 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

804 parsed_query.extend(args.items())

805 elif isinstance(args, list) or isinstance(args, tuple):

806 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

807 parsed_query.extend(args)

808 else:

809 err = "'args' parameter should be dict, list or tuple. Not {0}".format(

810 type(args)

811 )

812 raise TypeError(err)

813 final_query = urlencode(parsed_query)

814 url = urlunparse(

815 (

816 parsed_url[0],

817 parsed_url[1],

818 parsed_url[2],

819 parsed_url[3],

820 final_query,

821 parsed_url[5],

822 )

823 )

824 return url

825

826

827class HTTPFile(ObjectDict):

828 """Represents a file uploaded via a form.

829

830 For backwards compatibility, its instance attributes are also

831 accessible as dictionary keys.

832

833 * ``filename``

834 * ``body``

835 * ``content_type``

836 """

837

838 filename: str

839 body: bytes

840 content_type: str

841

842

843def _parse_request_range(

844 range_header: str,

845) -> Optional[Tuple[Optional[int], Optional[int]]]:

846 """Parses a Range header.

847

848 Returns either ``None`` or tuple ``(start, end)``.

849 Note that while the HTTP headers use inclusive byte positions,

850 this method returns indexes suitable for use in slices.

851

852 >>> start, end = _parse_request_range("bytes=1-2")

853 >>> start, end

854 (1, 3)

855 >>> [0, 1, 2, 3, 4][start:end]

856 [1, 2]

857 >>> _parse_request_range("bytes=6-")

858 (6, None)

859 >>> _parse_request_range("bytes=-6")

860 (-6, None)

861 >>> _parse_request_range("bytes=-0")

862 (None, 0)

863 >>> _parse_request_range("bytes=")

864 (None, None)

865 >>> _parse_request_range("foo=42")

866 >>> _parse_request_range("bytes=1-2,6-10")

867

868 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).

869

870 See [0] for the details of the range header.

871

872 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges

873 """

874 unit, _, value = range_header.partition("=")

875 unit, value = unit.strip(), value.strip()

876 if unit != "bytes":

877 return None

878 start_b, _, end_b = value.partition("-")

879 try:

880 start = _int_or_none(start_b)

881 end = _int_or_none(end_b)

882 except ValueError:

883 return None

884 if end is not None:

885 if start is None:

886 if end != 0:

887 start = -end

888 end = None

889 else:

890 end += 1

891 return (start, end)

892

893

894def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:

895 """Returns a suitable Content-Range header:

896

897 >>> print(_get_content_range(None, 1, 4))

898 bytes 0-0/4

899 >>> print(_get_content_range(1, 3, 4))

900 bytes 1-2/4

901 >>> print(_get_content_range(None, None, 4))

902 bytes 0-3/4

903 """

904 start = start or 0

905 end = (end or total) - 1

906 return f"bytes {start}-{end}/{total}"

907

908

909def _int_or_none(val: str) -> Optional[int]:

910 val = val.strip()

911 if val == "":

912 return None

913 return int(val)

914

915

916def parse_body_arguments(

917 content_type: str,

918 body: bytes,

919 arguments: Dict[str, List[bytes]],

920 files: Dict[str, List[HTTPFile]],

921 headers: Optional[HTTPHeaders] = None,

922) -> None:

923 """Parses a form request body.

924

925 Supports ``application/x-www-form-urlencoded`` and

926 ``multipart/form-data``. The ``content_type`` parameter should be

927 a string and ``body`` should be a byte string. The ``arguments``

928 and ``files`` parameters are dictionaries that will be updated

929 with the parsed contents.

930 """

931 if content_type.startswith("application/x-www-form-urlencoded"):

932 if headers and "Content-Encoding" in headers:

933 raise HTTPInputError(

934 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]

935 )

936 try:

937 # real charset decoding will happen in RequestHandler.decode_argument()

938 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)

939 except Exception as e:

940 raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e

941 for name, values in uri_arguments.items():

942 if values:

943 arguments.setdefault(name, []).extend(values)

944 elif content_type.startswith("multipart/form-data"):

945 if headers and "Content-Encoding" in headers:

946 raise HTTPInputError(

947 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]

948 )

949 try:

950 fields = content_type.split(";")

951 for field in fields:

952 k, sep, v = field.strip().partition("=")

953 if k == "boundary" and v:

954 parse_multipart_form_data(utf8(v), body, arguments, files)

955 break

956 else:

957 raise HTTPInputError("multipart boundary not found")

958 except Exception as e:

959 raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e

960

961

962def parse_multipart_form_data(

963 boundary: bytes,

964 data: bytes,

965 arguments: Dict[str, List[bytes]],

966 files: Dict[str, List[HTTPFile]],

967) -> None:

968 """Parses a ``multipart/form-data`` body.

969

970 The ``boundary`` and ``data`` parameters are both byte strings.

971 The dictionaries given in the arguments and files parameters

972 will be updated with the contents of the body.

973

974 .. versionchanged:: 5.1

975

976 Now recognizes non-ASCII filenames in RFC 2231/5987

977 (``filename*=``) format.

978 """

979 # The standard allows for the boundary to be quoted in the header,

980 # although it's rare (it happens at least for google app engine

981 # xmpp). I think we're also supposed to handle backslash-escapes

982 # here but I'll save that until we see a client that uses them

983 # in the wild.

984 if boundary.startswith(b'"') and boundary.endswith(b'"'):

985 boundary = boundary[1:-1]

986 final_boundary_index = data.rfind(b"--" + boundary + b"--")

987 if final_boundary_index == -1:

988 raise HTTPInputError("Invalid multipart/form-data: no final boundary found")

989 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")

990 for part in parts:

991 if not part:

992 continue

993 eoh = part.find(b"\r\n\r\n")

994 if eoh == -1:

995 raise HTTPInputError("multipart/form-data missing headers")

996 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)

997 disp_header = headers.get("Content-Disposition", "")

998 disposition, disp_params = _parse_header(disp_header)

999 if disposition != "form-data" or not part.endswith(b"\r\n"):

1000 raise HTTPInputError("Invalid multipart/form-data")

1001 value = part[eoh + 4 : -2]

1002 if not disp_params.get("name"):

1003 raise HTTPInputError("multipart/form-data missing name")

1004 name = disp_params["name"]

1005 if disp_params.get("filename"):

1006 ctype = headers.get("Content-Type", "application/unknown")

1007 files.setdefault(name, []).append(

1008 HTTPFile(

1009 filename=disp_params["filename"], body=value, content_type=ctype

1010 )

1011 )

1012 else:

1013 arguments.setdefault(name, []).append(value)

1014

1015

1016def format_timestamp(

1017 ts: Union[int, float, tuple, time.struct_time, datetime.datetime],

1018) -> str:

1019 """Formats a timestamp in the format used by HTTP.

1020

1021 The argument may be a numeric timestamp as returned by `time.time`,

1022 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`

1023 object. Naive `datetime.datetime` objects are assumed to represent

1024 UTC; aware objects are converted to UTC before formatting.

1025

1026 >>> format_timestamp(1359312200)

1027 'Sun, 27 Jan 2013 18:43:20 GMT'

1028 """

1029 if isinstance(ts, (int, float)):

1030 time_num = ts

1031 elif isinstance(ts, (tuple, time.struct_time)):

1032 time_num = calendar.timegm(ts)

1033 elif isinstance(ts, datetime.datetime):

1034 time_num = calendar.timegm(ts.utctimetuple())

1035 else:

1036 raise TypeError("unknown timestamp type: %r" % ts)

1037 return email.utils.formatdate(time_num, usegmt=True)

1038

1039

1040class RequestStartLine(typing.NamedTuple):

1041 method: str

1042 path: str

1043 version: str

1044

1045

1046def parse_request_start_line(line: str) -> RequestStartLine:

1047 """Returns a (method, path, version) tuple for an HTTP 1.x request line.

1048

1049 The response is a `typing.NamedTuple`.

1050

1051 >>> parse_request_start_line("GET /foo HTTP/1.1")

1052 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')

1053 """

1054 match = _ABNF.request_line.fullmatch(line)

1055 if not match:

1056 # https://tools.ietf.org/html/rfc7230#section-3.1.1

1057 # invalid request-line SHOULD respond with a 400 (Bad Request)

1058 raise HTTPInputError("Malformed HTTP request line")

1059 r = RequestStartLine(match.group(1), match.group(2), match.group(3))

1060 if not r.version.startswith("HTTP/1"):

1061 # HTTP/2 and above doesn't use parse_request_start_line.

1062 # This could be folded into the regex but we don't want to deviate

1063 # from the ABNF in the RFCs.

1064 raise HTTPInputError("Unexpected HTTP version %r" % r.version)

1065 return r

1066

1067

1068class ResponseStartLine(typing.NamedTuple):

1069 version: str

1070 code: int

1071 reason: str

1072

1073

1074def parse_response_start_line(line: str) -> ResponseStartLine:

1075 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.

1076

1077 The response is a `typing.NamedTuple`.

1078

1079 >>> parse_response_start_line("HTTP/1.1 200 OK")

1080 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')

1081 """

1082 match = _ABNF.status_line.fullmatch(line)

1083 if not match:

1084 raise HTTPInputError("Error parsing response start line")

1085 r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))

1086 if not r.version.startswith("HTTP/1"):

1087 # HTTP/2 and above doesn't use parse_response_start_line.

1088 raise HTTPInputError("Unexpected HTTP version %r" % r.version)

1089 return r

1090

1091

1092# _parseparam and _parse_header are copied and modified from python2.7's cgi.py

1093# The original 2.7 version of this code did not correctly support some

1094# combinations of semicolons and double quotes.

1095# It has also been modified to support valueless parameters as seen in

1096# websocket extension negotiations, and to support non-ascii values in

1097# RFC 2231/5987 format.

1098#

1099# _parseparam has been further modified with the logic from

1100# https://github.com/python/cpython/pull/136072/files

1101# to avoid quadratic behavior when parsing semicolons in quoted strings.

1102#

1103# TODO: See if we can switch to email.message.Message for this functionality.

1104# This is the suggested replacement for the cgi.py module now that cgi has

1105# been removed from recent versions of Python. We need to verify that

1106# the email module is consistent with our existing behavior (and all relevant

1107# RFCs for multipart/form-data) before making this change.

1108

1109

1110def _parseparam(s: str) -> Generator[str, None, None]:

1111 start = 0

1112 while s.find(";", start) == start:

1113 start += 1

1114 end = s.find(";", start)

1115 ind, diff = start, 0

1116 while end > 0:

1117 diff += s.count('"', ind, end) - s.count('\\"', ind, end)

1118 if diff % 2 == 0:

1119 break

1120 end, ind = ind, s.find(";", end + 1)

1121 if end < 0:

1122 end = len(s)

1123 f = s[start:end]

1124 yield f.strip()

1125 start = end

1126

1127

1128def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:

1129 r"""Parse a Content-type like header.

1130

1131 Return the main content-type and a dictionary of options.

1132

1133 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"

1134 >>> ct, d = _parse_header(d)

1135 >>> ct

1136 'form-data'

1137 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')

1138 True

1139 >>> d['foo']

1140 'b\\a"r'

1141 """

1142 parts = _parseparam(";" + line)

1143 key = next(parts)

1144 # decode_params treats first argument special, but we already stripped key

1145 params = [("Dummy", "value")]

1146 for p in parts:

1147 i = p.find("=")

1148 if i >= 0:

1149 name = p[:i].strip().lower()

1150 value = p[i + 1 :].strip()

1151 params.append((name, native_str(value)))

1152 decoded_params = email.utils.decode_params(params)

1153 decoded_params.pop(0) # get rid of the dummy again

1154 pdict = {}

1155 for name, decoded_value in decoded_params:

1156 value = email.utils.collapse_rfc2231_value(decoded_value)

1157 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':

1158 value = value[1:-1]

1159 pdict[name] = value

1160 return key, pdict

1161

1162

1163def _encode_header(key: str, pdict: Dict[str, str]) -> str:

1164 """Inverse of _parse_header.

1165

1166 >>> _encode_header('permessage-deflate',

1167 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})

1168 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'

1169 """

1170 if not pdict:

1171 return key

1172 out = [key]

1173 # Sort the parameters just to make it easy to test.

1174 for k, v in sorted(pdict.items()):

1175 if v is None:

1176 out.append(k)

1177 else:

1178 # TODO: quote if necessary.

1179 out.append(f"{k}={v}")

1180 return "; ".join(out)

1181

1182

1183def encode_username_password(

1184 username: Union[str, bytes], password: Union[str, bytes]

1185) -> bytes:

1186 """Encodes a username/password pair in the format used by HTTP auth.

1187

1188 The return value is a byte string in the form ``username:password``.

1189

1190 .. versionadded:: 5.1

1191 """

1192 if isinstance(username, unicode_type):

1193 username = unicodedata.normalize("NFC", username)

1194 if isinstance(password, unicode_type):

1195 password = unicodedata.normalize("NFC", password)

1196 return utf8(username) + b":" + utf8(password)

1197

1198

1199def doctests():

1200 # type: () -> unittest.TestSuite

1201 import doctest

1202

1203 return doctest.DocTestSuite()

1204

1205

1206_netloc_re = re.compile(r"^(.+):(\d+)$")

1207

1208

1209def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:

1210 """Returns ``(host, port)`` tuple from ``netloc``.

1211

1212 Returned ``port`` will be ``None`` if not present.

1213

1214 .. versionadded:: 4.1

1215 """

1216 match = _netloc_re.match(netloc)

1217 if match:

1218 host = match.group(1)

1219 port = int(match.group(2)) # type: Optional[int]

1220 else:

1221 host = netloc

1222 port = None

1223 return (host, port)

1224

1225

1226def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:

1227 """Generator converting a result of ``parse_qs`` back to name-value pairs.

1228

1229 .. versionadded:: 5.0

1230 """

1231 for k, vs in qs.items():

1232 for v in vs:

1233 yield (k, v)

1234

1235

1236_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub

1237

1238

1239def _unquote_replace(m: re.Match) -> str:

1240 if m[1]:

1241 return chr(int(m[1], 8))

1242 else:

1243 return m[2]

1244

1245

1246def _unquote_cookie(s: str) -> str:

1247 """Handle double quotes and escaping in cookie values.

1248

1249 This method is copied verbatim from the Python 3.13 standard

1250 library (http.cookies._unquote) so we don't have to depend on

1251 non-public interfaces.

1252 """

1253 # If there aren't any doublequotes,

1254 # then there can't be any special characters. See RFC 2109.

1255 if s is None or len(s) < 2:

1256 return s

1257 if s[0] != '"' or s[-1] != '"':

1258 return s

1259

1260 # We have to assume that we must decode this string.

1261 # Down to work.

1262

1263 # Remove the "s

1264 s = s[1:-1]

1265

1266 # Check for special sequences. Examples:

1267 # \012 --> \n

1268 # \" --> "

1269 #

1270 return _unquote_sub(_unquote_replace, s)

1271

1272

1273def parse_cookie(cookie: str) -> Dict[str, str]:

1274 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.

1275

1276 This function attempts to mimic browser cookie parsing behavior;

1277 it specifically does not follow any of the cookie-related RFCs

1278 (because browsers don't either).

1279

1280 The algorithm used is identical to that used by Django version 1.9.10.

1281

1282 .. versionadded:: 4.4.2

1283 """

1284 cookiedict = {}

1285 for chunk in cookie.split(";"):

1286 if "=" in chunk:

1287 key, val = chunk.split("=", 1)

1288 else:

1289 # Assume an empty name per

1290 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091

1291 key, val = "", chunk

1292 key, val = key.strip(), val.strip()

1293 if key or val:

1294 # unquote using Python's algorithm.

1295 cookiedict[key] = _unquote_cookie(val)

1296 return cookiedict