Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tornado/httputil.py: 25%

4# Licensed under the Apache License, Version 2.0 (the "License"); you may

5# not use this file except in compliance with the License. You may obtain

6# a copy of the License at

8# http://www.apache.org/licenses/LICENSE-2.0

10# Unless required by applicable law or agreed to in writing, software

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

13# License for the specific language governing permissions and limitations

14# under the License.

16"""HTTP utility code shared by clients and servers.

18This module also defines the `HTTPServerRequest` class which is exposed

19via `tornado.web.RequestHandler.request`.

20"""

22import calendar

23import collections.abc

24import copy

25import datetime

26import email.utils

27from functools import lru_cache

28from http.client import responses

29import http.cookies

30import re

31from ssl import SSLError

32import time

33import unicodedata

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl

36from tornado.escape import native_str, parse_qs_bytes, utf8

37from tornado.log import gen_log

38from tornado.util import ObjectDict, unicode_type

41# responses is unused in this file, but we re-export it to other files.

42# Reference it so pyflakes doesn't complain.

43responses

45import typing

46from typing import (

47 Tuple,

48 Iterable,

49 List,

50 Mapping,

51 Iterator,

52 Dict,

53 Union,

54 Optional,

55 Awaitable,

56 Generator,

57 AnyStr,

58)

60if typing.TYPE_CHECKING:

61 from typing import Deque # noqa: F401

62 from asyncio import Future # noqa: F401

63 import unittest # noqa: F401

65# To be used with str.strip() and related methods.

66HTTP_WHITESPACE = " \t"

69@lru_cache(1000)

70def _normalize_header(name: str) -> str:

71 """Map a header name to Http-Header-Case.

73 >>> _normalize_header("coNtent-TYPE")

74 'Content-Type'

75 """

76 return "-".join([w.capitalize() for w in name.split("-")])

79class HTTPHeaders(collections.abc.MutableMapping):

80 """A dictionary that maintains ``Http-Header-Case`` for all keys.

82 Supports multiple values per key via a pair of new methods,

83 `add()` and `get_list()`. The regular dictionary interface

84 returns a single value per key, with multiple values joined by a

85 comma.

87 >>> h = HTTPHeaders({"content-type": "text/html"})

88 >>> list(h.keys())

89 ['Content-Type']

90 >>> h["Content-Type"]

91 'text/html'

93 >>> h.add("Set-Cookie", "A=B")

94 >>> h.add("Set-Cookie", "C=D")

95 >>> h["set-cookie"]

96 'A=B,C=D'

97 >>> h.get_list("set-cookie")

98 ['A=B', 'C=D']

100 >>> for (k,v) in sorted(h.get_all()):

101 ... print('%s: %s' % (k,v))

102 ...

103 Content-Type: text/html

104 Set-Cookie: A=B

105 Set-Cookie: C=D

106 """

107

108 @typing.overload

109 def __init__(self, __arg: Mapping[str, List[str]]) -> None:

110 pass

111

112 @typing.overload # noqa: F811

113 def __init__(self, __arg: Mapping[str, str]) -> None:

114 pass

115

116 @typing.overload # noqa: F811

117 def __init__(self, *args: Tuple[str, str]) -> None:

118 pass

119

120 @typing.overload # noqa: F811

121 def __init__(self, **kwargs: str) -> None:

122 pass

123

124 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811

125 self._dict = {} # type: typing.Dict[str, str]

126 self._as_list = {} # type: typing.Dict[str, typing.List[str]]

127 self._last_key = None # type: Optional[str]

128 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):

129 # Copy constructor

130 for k, v in args[0].get_all():

131 self.add(k, v)

132 else:

133 # Dict-style initialization

134 self.update(*args, **kwargs)

135

136 # new public methods

137

138 def add(self, name: str, value: str) -> None:

139 """Adds a new value for the given key."""

140 norm_name = _normalize_header(name)

141 self._last_key = norm_name

142 if norm_name in self:

143 self._dict[norm_name] = (

144 native_str(self[norm_name]) + "," + native_str(value)

145 )

146 self._as_list[norm_name].append(value)

147 else:

148 self[norm_name] = value

149

150 def get_list(self, name: str) -> List[str]:

151 """Returns all values for the given header as a list."""

152 norm_name = _normalize_header(name)

153 return self._as_list.get(norm_name, [])

154

155 def get_all(self) -> Iterable[Tuple[str, str]]:

156 """Returns an iterable of all (name, value) pairs.

157

158 If a header has multiple values, multiple pairs will be

159 returned with the same name.

160 """

161 for name, values in self._as_list.items():

162 for value in values:

163 yield (name, value)

164

165 def parse_line(self, line: str) -> None:

166 """Updates the dictionary with a single header line.

167

168 >>> h = HTTPHeaders()

169 >>> h.parse_line("Content-Type: text/html")

170 >>> h.get('content-type')

171 'text/html'

172 """

173 if line[0].isspace():

174 # continuation of a multi-line header

175 if self._last_key is None:

176 raise HTTPInputError("first header line cannot start with whitespace")

177 new_part = " " + line.lstrip(HTTP_WHITESPACE)

178 self._as_list[self._last_key][-1] += new_part

179 self._dict[self._last_key] += new_part

180 else:

181 try:

182 name, value = line.split(":", 1)

183 except ValueError:

184 raise HTTPInputError("no colon in header line")

185 self.add(name, value.strip(HTTP_WHITESPACE))

186

187 @classmethod

188 def parse(cls, headers: str) -> "HTTPHeaders":

189 """Returns a dictionary from HTTP header text.

190

191 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")

192 >>> sorted(h.items())

193 [('Content-Length', '42'), ('Content-Type', 'text/html')]

194

195 .. versionchanged:: 5.1

196

197 Raises `HTTPInputError` on malformed headers instead of a

198 mix of `KeyError`, and `ValueError`.

199

200 """

201 h = cls()

202 # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line

203 # terminator and ignore any preceding CR.

204 for line in headers.split("\n"):

205 if line.endswith("\r"):

206 line = line[:-1]

207 if line:

208 h.parse_line(line)

209 return h

210

211 # MutableMapping abstract method implementations.

212

213 def __setitem__(self, name: str, value: str) -> None:

214 norm_name = _normalize_header(name)

215 self._dict[norm_name] = value

216 self._as_list[norm_name] = [value]

217

218 def __getitem__(self, name: str) -> str:

219 return self._dict[_normalize_header(name)]

220

221 def __delitem__(self, name: str) -> None:

222 norm_name = _normalize_header(name)

223 del self._dict[norm_name]

224 del self._as_list[norm_name]

225

226 def __len__(self) -> int:

227 return len(self._dict)

228

229 def __iter__(self) -> Iterator[typing.Any]:

230 return iter(self._dict)

231

232 def copy(self) -> "HTTPHeaders":

233 # defined in dict but not in MutableMapping.

234 return HTTPHeaders(self)

235

236 # Use our overridden copy method for the copy.copy module.

237 # This makes shallow copies one level deeper, but preserves

238 # the appearance that HTTPHeaders is a single container.

239 __copy__ = copy

240

241 def __str__(self) -> str:

242 lines = []

243 for name, value in self.get_all():

244 lines.append("%s: %s\n" % (name, value))

245 return "".join(lines)

246

247 __unicode__ = __str__

248

249

250class HTTPServerRequest(object):

251 """A single HTTP request.

252

253 All attributes are type `str` unless otherwise noted.

254

255 .. attribute:: method

256

257 HTTP request method, e.g. "GET" or "POST"

258

259 .. attribute:: uri

260

261 The requested uri.

262

263 .. attribute:: path

264

265 The path portion of `uri`

266

267 .. attribute:: query

268

269 The query portion of `uri`

270

271 .. attribute:: version

272

273 HTTP version specified in request, e.g. "HTTP/1.1"

274

275 .. attribute:: headers

276

277 `.HTTPHeaders` dictionary-like object for request headers. Acts like

278 a case-insensitive dictionary with additional methods for repeated

279 headers.

280

281 .. attribute:: body

282

283 Request body, if present, as a byte string.

284

285 .. attribute:: remote_ip

286

287 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,

288 will pass along the real IP address provided by a load balancer

289 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.

290

291 .. versionchanged:: 3.1

292 The list format of ``X-Forwarded-For`` is now supported.

293

294 .. attribute:: protocol

295

296 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``

297 is set, will pass along the protocol used by a load balancer if

298 reported via an ``X-Scheme`` header.

299

300 .. attribute:: host

301

302 The requested hostname, usually taken from the ``Host`` header.

303

304 .. attribute:: arguments

305

306 GET/POST arguments are available in the arguments property, which

307 maps arguments names to lists of values (to support multiple values

308 for individual names). Names are of type `str`, while arguments

309 are byte strings. Note that this is different from

310 `.RequestHandler.get_argument`, which returns argument values as

311 unicode strings.

312

313 .. attribute:: query_arguments

314

315 Same format as ``arguments``, but contains only arguments extracted

316 from the query string.

317

318 .. versionadded:: 3.2

319

320 .. attribute:: body_arguments

321

322 Same format as ``arguments``, but contains only arguments extracted

323 from the request body.

324

325 .. versionadded:: 3.2

326

327 .. attribute:: files

328

329 File uploads are available in the files property, which maps file

330 names to lists of `.HTTPFile`.

331

332 .. attribute:: connection

333

334 An HTTP request is attached to a single HTTP connection, which can

335 be accessed through the "connection" attribute. Since connections

336 are typically kept open in HTTP/1.1, multiple requests can be handled

337 sequentially on a single connection.

338

339 .. versionchanged:: 4.0

340 Moved from ``tornado.httpserver.HTTPRequest``.

341 """

342

343 path = None # type: str

344 query = None # type: str

345

346 # HACK: Used for stream_request_body

347 _body_future = None # type: Future[None]

348

349 def __init__(

350 self,

351 method: Optional[str] = None,

352 uri: Optional[str] = None,

353 version: str = "HTTP/1.0",

354 headers: Optional[HTTPHeaders] = None,

355 body: Optional[bytes] = None,

356 host: Optional[str] = None,

357 files: Optional[Dict[str, List["HTTPFile"]]] = None,

358 connection: Optional["HTTPConnection"] = None,

359 start_line: Optional["RequestStartLine"] = None,

360 server_connection: Optional[object] = None,

361 ) -> None:

362 if start_line is not None:

363 method, uri, version = start_line

364 self.method = method

365 self.uri = uri

366 self.version = version

367 self.headers = headers or HTTPHeaders()

368 self.body = body or b""

369

370 # set remote IP and protocol

371 context = getattr(connection, "context", None)

372 self.remote_ip = getattr(context, "remote_ip", None)

373 self.protocol = getattr(context, "protocol", "http")

374

375 self.host = host or self.headers.get("Host") or "127.0.0.1"

376 self.host_name = split_host_and_port(self.host.lower())[0]

377 self.files = files or {}

378 self.connection = connection

379 self.server_connection = server_connection

380 self._start_time = time.time()

381 self._finish_time = None

382

383 if uri is not None:

384 self.path, sep, self.query = uri.partition("?")

385 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)

386 self.query_arguments = copy.deepcopy(self.arguments)

387 self.body_arguments = {} # type: Dict[str, List[bytes]]

388

389 @property

390 def cookies(self) -> Dict[str, http.cookies.Morsel]:

391 """A dictionary of ``http.cookies.Morsel`` objects."""

392 if not hasattr(self, "_cookies"):

393 self._cookies = (

394 http.cookies.SimpleCookie()

395 ) # type: http.cookies.SimpleCookie

396 if "Cookie" in self.headers:

397 try:

398 parsed = parse_cookie(self.headers["Cookie"])

399 except Exception:

400 pass

401 else:

402 for k, v in parsed.items():

403 try:

404 self._cookies[k] = v

405 except Exception:

406 # SimpleCookie imposes some restrictions on keys;

407 # parse_cookie does not. Discard any cookies

408 # with disallowed keys.

409 pass

410 return self._cookies

411

412 def full_url(self) -> str:

413 """Reconstructs the full URL for this request."""

414 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]

415

416 def request_time(self) -> float:

417 """Returns the amount of time it took for this request to execute."""

418 if self._finish_time is None:

419 return time.time() - self._start_time

420 else:

421 return self._finish_time - self._start_time

422

423 def get_ssl_certificate(

424 self, binary_form: bool = False

425 ) -> Union[None, Dict, bytes]:

426 """Returns the client's SSL certificate, if any.

427

428 To use client certificates, the HTTPServer's

429 `ssl.SSLContext.verify_mode` field must be set, e.g.::

430

431 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)

432 ssl_ctx.load_cert_chain("foo.crt", "foo.key")

433 ssl_ctx.load_verify_locations("cacerts.pem")

434 ssl_ctx.verify_mode = ssl.CERT_REQUIRED

435 server = HTTPServer(app, ssl_options=ssl_ctx)

436

437 By default, the return value is a dictionary (or None, if no

438 client certificate is present). If ``binary_form`` is true, a

439 DER-encoded form of the certificate is returned instead. See

440 SSLSocket.getpeercert() in the standard library for more

441 details.

442 http://docs.python.org/library/ssl.html#sslsocket-objects

443 """

444 try:

445 if self.connection is None:

446 return None

447 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2

448 return self.connection.stream.socket.getpeercert( # type: ignore

449 binary_form=binary_form

450 )

451 except SSLError:

452 return None

453

454 def _parse_body(self) -> None:

455 parse_body_arguments(

456 self.headers.get("Content-Type", ""),

457 self.body,

458 self.body_arguments,

459 self.files,

460 self.headers,

461 )

462

463 for k, v in self.body_arguments.items():

464 self.arguments.setdefault(k, []).extend(v)

465

466 def __repr__(self) -> str:

467 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")

468 args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])

469 return "%s(%s)" % (self.__class__.__name__, args)

470

471

472class HTTPInputError(Exception):

473 """Exception class for malformed HTTP requests or responses

474 from remote sources.

475

476 .. versionadded:: 4.0

477 """

478

479 pass

480

481

482class HTTPOutputError(Exception):

483 """Exception class for errors in HTTP output.

484

485 .. versionadded:: 4.0

486 """

487

488 pass

489

490

491class HTTPServerConnectionDelegate(object):

492 """Implement this interface to handle requests from `.HTTPServer`.

493

494 .. versionadded:: 4.0

495 """

496

497 def start_request(

498 self, server_conn: object, request_conn: "HTTPConnection"

499 ) -> "HTTPMessageDelegate":

500 """This method is called by the server when a new request has started.

501

502 :arg server_conn: is an opaque object representing the long-lived

503 (e.g. tcp-level) connection.

504 :arg request_conn: is a `.HTTPConnection` object for a single

505 request/response exchange.

506

507 This method should return a `.HTTPMessageDelegate`.

508 """

509 raise NotImplementedError()

510

511 def on_close(self, server_conn: object) -> None:

512 """This method is called when a connection has been closed.

513

514 :arg server_conn: is a server connection that has previously been

515 passed to ``start_request``.

516 """

517 pass

518

519

520class HTTPMessageDelegate(object):

521 """Implement this interface to handle an HTTP request or response.

522

523 .. versionadded:: 4.0

524 """

525

526 # TODO: genericize this class to avoid exposing the Union.

527 def headers_received(

528 self,

529 start_line: Union["RequestStartLine", "ResponseStartLine"],

530 headers: HTTPHeaders,

531 ) -> Optional[Awaitable[None]]:

532 """Called when the HTTP headers have been received and parsed.

533

534 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`

535 depending on whether this is a client or server message.

536 :arg headers: a `.HTTPHeaders` instance.

537

538 Some `.HTTPConnection` methods can only be called during

539 ``headers_received``.

540

541 May return a `.Future`; if it does the body will not be read

542 until it is done.

543 """

544 pass

545

546 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:

547 """Called when a chunk of data has been received.

548

549 May return a `.Future` for flow control.

550 """

551 pass

552

553 def finish(self) -> None:

554 """Called after the last chunk of data has been received."""

555 pass

556

557 def on_connection_close(self) -> None:

558 """Called if the connection is closed without finishing the request.

559

560 If ``headers_received`` is called, either ``finish`` or

561 ``on_connection_close`` will be called, but not both.

562 """

563 pass

564

565

566class HTTPConnection(object):

567 """Applications use this interface to write their responses.

568

569 .. versionadded:: 4.0

570 """

571

572 def write_headers(

573 self,

574 start_line: Union["RequestStartLine", "ResponseStartLine"],

575 headers: HTTPHeaders,

576 chunk: Optional[bytes] = None,

577 ) -> "Future[None]":

578 """Write an HTTP header block.

579

580 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.

581 :arg headers: a `.HTTPHeaders` instance.

582 :arg chunk: the first (optional) chunk of data. This is an optimization

583 so that small responses can be written in the same call as their

584 headers.

585

586 The ``version`` field of ``start_line`` is ignored.

587

588 Returns a future for flow control.

589

590 .. versionchanged:: 6.0

591

592 The ``callback`` argument was removed.

593 """

594 raise NotImplementedError()

595

596 def write(self, chunk: bytes) -> "Future[None]":

597 """Writes a chunk of body data.

598

599 Returns a future for flow control.

600

601 .. versionchanged:: 6.0

602

603 The ``callback`` argument was removed.

604 """

605 raise NotImplementedError()

606

607 def finish(self) -> None:

608 """Indicates that the last body data has been written."""

609 raise NotImplementedError()

610

611

612def url_concat(

613 url: str,

614 args: Union[

615 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]

616 ],

617) -> str:

618 """Concatenate url and arguments regardless of whether

619 url has existing query parameters.

620

621 ``args`` may be either a dictionary or a list of key-value pairs

622 (the latter allows for multiple values with the same key.

623

624 >>> url_concat("http://example.com/foo", dict(c="d"))

625 'http://example.com/foo?c=d'

626 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))

627 'http://example.com/foo?a=b&c=d'

628 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])

629 'http://example.com/foo?a=b&c=d&c=d2'

630 """

631 if args is None:

632 return url

633 parsed_url = urlparse(url)

634 if isinstance(args, dict):

635 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

636 parsed_query.extend(args.items())

637 elif isinstance(args, list) or isinstance(args, tuple):

638 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

639 parsed_query.extend(args)

640 else:

641 err = "'args' parameter should be dict, list or tuple. Not {0}".format(

642 type(args)

643 )

644 raise TypeError(err)

645 final_query = urlencode(parsed_query)

646 url = urlunparse(

647 (

648 parsed_url[0],

649 parsed_url[1],

650 parsed_url[2],

651 parsed_url[3],

652 final_query,

653 parsed_url[5],

654 )

655 )

656 return url

657

658

659class HTTPFile(ObjectDict):

660 """Represents a file uploaded via a form.

661

662 For backwards compatibility, its instance attributes are also

663 accessible as dictionary keys.

664

665 * ``filename``

666 * ``body``

667 * ``content_type``

668 """

669

670 filename: str

671 body: bytes

672 content_type: str

673

674

675def _parse_request_range(

676 range_header: str,

677) -> Optional[Tuple[Optional[int], Optional[int]]]:

678 """Parses a Range header.

679

680 Returns either ``None`` or tuple ``(start, end)``.

681 Note that while the HTTP headers use inclusive byte positions,

682 this method returns indexes suitable for use in slices.

683

684 >>> start, end = _parse_request_range("bytes=1-2")

685 >>> start, end

686 (1, 3)

687 >>> [0, 1, 2, 3, 4][start:end]

688 [1, 2]

689 >>> _parse_request_range("bytes=6-")

690 (6, None)

691 >>> _parse_request_range("bytes=-6")

692 (-6, None)

693 >>> _parse_request_range("bytes=-0")

694 (None, 0)

695 >>> _parse_request_range("bytes=")

696 (None, None)

697 >>> _parse_request_range("foo=42")

698 >>> _parse_request_range("bytes=1-2,6-10")

699

700 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).

701

702 See [0] for the details of the range header.

703

704 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges

705 """

706 unit, _, value = range_header.partition("=")

707 unit, value = unit.strip(), value.strip()

708 if unit != "bytes":

709 return None

710 start_b, _, end_b = value.partition("-")

711 try:

712 start = _int_or_none(start_b)

713 end = _int_or_none(end_b)

714 except ValueError:

715 return None

716 if end is not None:

717 if start is None:

718 if end != 0:

719 start = -end

720 end = None

721 else:

722 end += 1

723 return (start, end)

724

725

726def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:

727 """Returns a suitable Content-Range header:

728

729 >>> print(_get_content_range(None, 1, 4))

730 bytes 0-0/4

731 >>> print(_get_content_range(1, 3, 4))

732 bytes 1-2/4

733 >>> print(_get_content_range(None, None, 4))

734 bytes 0-3/4

735 """

736 start = start or 0

737 end = (end or total) - 1

738 return "bytes %s-%s/%s" % (start, end, total)

739

740

741def _int_or_none(val: str) -> Optional[int]:

742 val = val.strip()

743 if val == "":

744 return None

745 return int(val)

746

747

748def parse_body_arguments(

749 content_type: str,

750 body: bytes,

751 arguments: Dict[str, List[bytes]],

752 files: Dict[str, List[HTTPFile]],

753 headers: Optional[HTTPHeaders] = None,

754) -> None:

755 """Parses a form request body.

756

757 Supports ``application/x-www-form-urlencoded`` and

758 ``multipart/form-data``. The ``content_type`` parameter should be

759 a string and ``body`` should be a byte string. The ``arguments``

760 and ``files`` parameters are dictionaries that will be updated

761 with the parsed contents.

762 """

763 if content_type.startswith("application/x-www-form-urlencoded"):

764 if headers and "Content-Encoding" in headers:

765 gen_log.warning(

766 "Unsupported Content-Encoding: %s", headers["Content-Encoding"]

767 )

768 return

769 try:

770 # real charset decoding will happen in RequestHandler.decode_argument()

771 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)

772 except Exception as e:

773 gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)

774 uri_arguments = {}

775 for name, values in uri_arguments.items():

776 if values:

777 arguments.setdefault(name, []).extend(values)

778 elif content_type.startswith("multipart/form-data"):

779 if headers and "Content-Encoding" in headers:

780 gen_log.warning(

781 "Unsupported Content-Encoding: %s", headers["Content-Encoding"]

782 )

783 return

784 try:

785 fields = content_type.split(";")

786 for field in fields:

787 k, sep, v = field.strip().partition("=")

788 if k == "boundary" and v:

789 parse_multipart_form_data(utf8(v), body, arguments, files)

790 break

791 else:

792 raise ValueError("multipart boundary not found")

793 except Exception as e:

794 gen_log.warning("Invalid multipart/form-data: %s", e)

795

796

797def parse_multipart_form_data(

798 boundary: bytes,

799 data: bytes,

800 arguments: Dict[str, List[bytes]],

801 files: Dict[str, List[HTTPFile]],

802) -> None:

803 """Parses a ``multipart/form-data`` body.

804

805 The ``boundary`` and ``data`` parameters are both byte strings.

806 The dictionaries given in the arguments and files parameters

807 will be updated with the contents of the body.

808

809 .. versionchanged:: 5.1

810

811 Now recognizes non-ASCII filenames in RFC 2231/5987

812 (``filename*=``) format.

813 """

814 # The standard allows for the boundary to be quoted in the header,

815 # although it's rare (it happens at least for google app engine

816 # xmpp). I think we're also supposed to handle backslash-escapes

817 # here but I'll save that until we see a client that uses them

818 # in the wild.

819 if boundary.startswith(b'"') and boundary.endswith(b'"'):

820 boundary = boundary[1:-1]

821 final_boundary_index = data.rfind(b"--" + boundary + b"--")

822 if final_boundary_index == -1:

823 gen_log.warning("Invalid multipart/form-data: no final boundary")

824 return

825 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")

826 for part in parts:

827 if not part:

828 continue

829 eoh = part.find(b"\r\n\r\n")

830 if eoh == -1:

831 gen_log.warning("multipart/form-data missing headers")

832 continue

833 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))

834 disp_header = headers.get("Content-Disposition", "")

835 disposition, disp_params = _parse_header(disp_header)

836 if disposition != "form-data" or not part.endswith(b"\r\n"):

837 gen_log.warning("Invalid multipart/form-data")

838 continue

839 value = part[eoh + 4 : -2]

840 if not disp_params.get("name"):

841 gen_log.warning("multipart/form-data value missing name")

842 continue

843 name = disp_params["name"]

844 if disp_params.get("filename"):

845 ctype = headers.get("Content-Type", "application/unknown")

846 files.setdefault(name, []).append(

847 HTTPFile(

848 filename=disp_params["filename"], body=value, content_type=ctype

849 )

850 )

851 else:

852 arguments.setdefault(name, []).append(value)

853

854

855def format_timestamp(

856 ts: Union[int, float, tuple, time.struct_time, datetime.datetime]

857) -> str:

858 """Formats a timestamp in the format used by HTTP.

859

860 The argument may be a numeric timestamp as returned by `time.time`,

861 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`

862 object. Naive `datetime.datetime` objects are assumed to represent

863 UTC; aware objects are converted to UTC before formatting.

864

865 >>> format_timestamp(1359312200)

866 'Sun, 27 Jan 2013 18:43:20 GMT'

867 """

868 if isinstance(ts, (int, float)):

869 time_num = ts

870 elif isinstance(ts, (tuple, time.struct_time)):

871 time_num = calendar.timegm(ts)

872 elif isinstance(ts, datetime.datetime):

873 time_num = calendar.timegm(ts.utctimetuple())

874 else:

875 raise TypeError("unknown timestamp type: %r" % ts)

876 return email.utils.formatdate(time_num, usegmt=True)

877

878

879RequestStartLine = collections.namedtuple(

880 "RequestStartLine", ["method", "path", "version"]

881)

882

883

884_http_version_re = re.compile(r"^HTTP/1\.[0-9]$")

885

886

887def parse_request_start_line(line: str) -> RequestStartLine:

888 """Returns a (method, path, version) tuple for an HTTP 1.x request line.

889

890 The response is a `collections.namedtuple`.

891

892 >>> parse_request_start_line("GET /foo HTTP/1.1")

893 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')

894 """

895 try:

896 method, path, version = line.split(" ")

897 except ValueError:

898 # https://tools.ietf.org/html/rfc7230#section-3.1.1

899 # invalid request-line SHOULD respond with a 400 (Bad Request)

900 raise HTTPInputError("Malformed HTTP request line")

901 if not _http_version_re.match(version):

902 raise HTTPInputError(

903 "Malformed HTTP version in HTTP Request-Line: %r" % version

904 )

905 return RequestStartLine(method, path, version)

906

907

908ResponseStartLine = collections.namedtuple(

909 "ResponseStartLine", ["version", "code", "reason"]

910)

911

912

913_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)")

914

915

916def parse_response_start_line(line: str) -> ResponseStartLine:

917 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.

918

919 The response is a `collections.namedtuple`.

920

921 >>> parse_response_start_line("HTTP/1.1 200 OK")

922 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')

923 """

924 line = native_str(line)

925 match = _http_response_line_re.match(line)

926 if not match:

927 raise HTTPInputError("Error parsing response start line")

928 return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))

929

930

931# _parseparam and _parse_header are copied and modified from python2.7's cgi.py

932# The original 2.7 version of this code did not correctly support some

933# combinations of semicolons and double quotes.

934# It has also been modified to support valueless parameters as seen in

935# websocket extension negotiations, and to support non-ascii values in

936# RFC 2231/5987 format.

937

938

939def _parseparam(s: str) -> Generator[str, None, None]:

940 while s[:1] == ";":

941 s = s[1:]

942 end = s.find(";")

943 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:

944 end = s.find(";", end + 1)

945 if end < 0:

946 end = len(s)

947 f = s[:end]

948 yield f.strip()

949 s = s[end:]

950

951

952def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:

953 r"""Parse a Content-type like header.

954

955 Return the main content-type and a dictionary of options.

956

957 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"

958 >>> ct, d = _parse_header(d)

959 >>> ct

960 'form-data'

961 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')

962 True

963 >>> d['foo']

964 'b\\a"r'

965 """

966 parts = _parseparam(";" + line)

967 key = next(parts)

968 # decode_params treats first argument special, but we already stripped key

969 params = [("Dummy", "value")]

970 for p in parts:

971 i = p.find("=")

972 if i >= 0:

973 name = p[:i].strip().lower()

974 value = p[i + 1 :].strip()

975 params.append((name, native_str(value)))

976 decoded_params = email.utils.decode_params(params)

977 decoded_params.pop(0) # get rid of the dummy again

978 pdict = {}

979 for name, decoded_value in decoded_params:

980 value = email.utils.collapse_rfc2231_value(decoded_value)

981 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':

982 value = value[1:-1]

983 pdict[name] = value

984 return key, pdict

985

986

987def _encode_header(key: str, pdict: Dict[str, str]) -> str:

988 """Inverse of _parse_header.

989

990 >>> _encode_header('permessage-deflate',

991 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})

992 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'

993 """

994 if not pdict:

995 return key

996 out = [key]

997 # Sort the parameters just to make it easy to test.

998 for k, v in sorted(pdict.items()):

999 if v is None:

1000 out.append(k)

1001 else:

1002 # TODO: quote if necessary.

1003 out.append("%s=%s" % (k, v))

1004 return "; ".join(out)

1005

1006

1007def encode_username_password(

1008 username: Union[str, bytes], password: Union[str, bytes]

1009) -> bytes:

1010 """Encodes a username/password pair in the format used by HTTP auth.

1011

1012 The return value is a byte string in the form ``username:password``.

1013

1014 .. versionadded:: 5.1

1015 """

1016 if isinstance(username, unicode_type):

1017 username = unicodedata.normalize("NFC", username)

1018 if isinstance(password, unicode_type):

1019 password = unicodedata.normalize("NFC", password)

1020 return utf8(username) + b":" + utf8(password)

1021

1022

1023def doctests():

1024 # type: () -> unittest.TestSuite

1025 import doctest

1026

1027 return doctest.DocTestSuite()

1028

1029

1030_netloc_re = re.compile(r"^(.+):(\d+)$")

1031

1032

1033def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:

1034 """Returns ``(host, port)`` tuple from ``netloc``.

1035

1036 Returned ``port`` will be ``None`` if not present.

1037

1038 .. versionadded:: 4.1

1039 """

1040 match = _netloc_re.match(netloc)

1041 if match:

1042 host = match.group(1)

1043 port = int(match.group(2)) # type: Optional[int]

1044 else:

1045 host = netloc

1046 port = None

1047 return (host, port)

1048

1049

1050def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:

1051 """Generator converting a result of ``parse_qs`` back to name-value pairs.

1052

1053 .. versionadded:: 5.0

1054 """

1055 for k, vs in qs.items():

1056 for v in vs:

1057 yield (k, v)

1058

1059

1060_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")

1061_QuotePatt = re.compile(r"[\\].")

1062_nulljoin = "".join

1063

1064

1065def _unquote_cookie(s: str) -> str:

1066 """Handle double quotes and escaping in cookie values.

1067

1068 This method is copied verbatim from the Python 3.5 standard

1069 library (http.cookies._unquote) so we don't have to depend on

1070 non-public interfaces.

1071 """

1072 # If there aren't any doublequotes,

1073 # then there can't be any special characters. See RFC 2109.

1074 if s is None or len(s) < 2:

1075 return s

1076 if s[0] != '"' or s[-1] != '"':

1077 return s

1078

1079 # We have to assume that we must decode this string.

1080 # Down to work.

1081

1082 # Remove the "s

1083 s = s[1:-1]

1084

1085 # Check for special sequences. Examples:

1086 # \012 --> \n

1087 # \" --> "

1088 #

1089 i = 0

1090 n = len(s)

1091 res = []

1092 while 0 <= i < n:

1093 o_match = _OctalPatt.search(s, i)

1094 q_match = _QuotePatt.search(s, i)

1095 if not o_match and not q_match: # Neither matched

1096 res.append(s[i:])

1097 break

1098 # else:

1099 j = k = -1

1100 if o_match:

1101 j = o_match.start(0)

1102 if q_match:

1103 k = q_match.start(0)

1104 if q_match and (not o_match or k < j): # QuotePatt matched

1105 res.append(s[i:k])

1106 res.append(s[k + 1])

1107 i = k + 2

1108 else: # OctalPatt matched

1109 res.append(s[i:j])

1110 res.append(chr(int(s[j + 1 : j + 4], 8)))

1111 i = j + 4

1112 return _nulljoin(res)

1113

1114

1115def parse_cookie(cookie: str) -> Dict[str, str]:

1116 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.

1117

1118 This function attempts to mimic browser cookie parsing behavior;

1119 it specifically does not follow any of the cookie-related RFCs

1120 (because browsers don't either).

1121

1122 The algorithm used is identical to that used by Django version 1.9.10.

1123

1124 .. versionadded:: 4.4.2

1125 """

1126 cookiedict = {}

1127 for chunk in cookie.split(str(";")):

1128 if str("=") in chunk:

1129 key, val = chunk.split(str("="), 1)

1130 else:

1131 # Assume an empty name per

1132 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091

1133 key, val = str(""), chunk

1134 key, val = key.strip(), val.strip()

1135 if key or val:

1136 # unquote using Python's algorithm.

1137 cookiedict[key] = _unquote_cookie(val)

1138 return cookiedict