Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tornado/httputil.py: 25%

4# Licensed under the Apache License, Version 2.0 (the "License"); you may

5# not use this file except in compliance with the License. You may obtain

6# a copy of the License at

8# http://www.apache.org/licenses/LICENSE-2.0

10# Unless required by applicable law or agreed to in writing, software

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the

13# License for the specific language governing permissions and limitations

14# under the License.

16"""HTTP utility code shared by clients and servers.

18This module also defines the `HTTPServerRequest` class which is exposed

19via `tornado.web.RequestHandler.request`.

20"""

22import calendar

23import collections.abc

24import copy

25import datetime

26import email.utils

27from functools import lru_cache

28from http.client import responses

29import http.cookies

30import re

31from ssl import SSLError

32import time

33import unicodedata

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl

36from tornado.escape import native_str, parse_qs_bytes, utf8

37from tornado.log import gen_log

38from tornado.util import ObjectDict, unicode_type

41# responses is unused in this file, but we re-export it to other files.

42# Reference it so pyflakes doesn't complain.

43responses

45import typing

46from typing import (

47 Tuple,

48 Iterable,

49 List,

50 Mapping,

51 Iterator,

52 Dict,

53 Union,

54 Optional,

55 Awaitable,

56 Generator,

57 AnyStr,

58)

60if typing.TYPE_CHECKING:

61 from typing import Deque # noqa: F401

62 from asyncio import Future # noqa: F401

63 import unittest # noqa: F401

66@lru_cache(1000)

67def _normalize_header(name: str) -> str:

68 """Map a header name to Http-Header-Case.

70 >>> _normalize_header("coNtent-TYPE")

71 'Content-Type'

72 """

73 return "-".join([w.capitalize() for w in name.split("-")])

76class HTTPHeaders(collections.abc.MutableMapping):

77 """A dictionary that maintains ``Http-Header-Case`` for all keys.

79 Supports multiple values per key via a pair of new methods,

80 `add()` and `get_list()`. The regular dictionary interface

81 returns a single value per key, with multiple values joined by a

82 comma.

84 >>> h = HTTPHeaders({"content-type": "text/html"})

85 >>> list(h.keys())

86 ['Content-Type']

87 >>> h["Content-Type"]

88 'text/html'

90 >>> h.add("Set-Cookie", "A=B")

91 >>> h.add("Set-Cookie", "C=D")

92 >>> h["set-cookie"]

93 'A=B,C=D'

94 >>> h.get_list("set-cookie")

95 ['A=B', 'C=D']

97 >>> for (k,v) in sorted(h.get_all()):

98 ... print('%s: %s' % (k,v))

99 ...

100 Content-Type: text/html

101 Set-Cookie: A=B

102 Set-Cookie: C=D

103 """

104

105 @typing.overload

106 def __init__(self, __arg: Mapping[str, List[str]]) -> None:

107 pass

108

109 @typing.overload # noqa: F811

110 def __init__(self, __arg: Mapping[str, str]) -> None:

111 pass

112

113 @typing.overload # noqa: F811

114 def __init__(self, *args: Tuple[str, str]) -> None:

115 pass

116

117 @typing.overload # noqa: F811

118 def __init__(self, **kwargs: str) -> None:

119 pass

120

121 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811

122 self._dict = {} # type: typing.Dict[str, str]

123 self._as_list = {} # type: typing.Dict[str, typing.List[str]]

124 self._last_key = None # type: Optional[str]

125 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):

126 # Copy constructor

127 for k, v in args[0].get_all():

128 self.add(k, v)

129 else:

130 # Dict-style initialization

131 self.update(*args, **kwargs)

132

133 # new public methods

134

135 def add(self, name: str, value: str) -> None:

136 """Adds a new value for the given key."""

137 norm_name = _normalize_header(name)

138 self._last_key = norm_name

139 if norm_name in self:

140 self._dict[norm_name] = (

141 native_str(self[norm_name]) + "," + native_str(value)

142 )

143 self._as_list[norm_name].append(value)

144 else:

145 self[norm_name] = value

146

147 def get_list(self, name: str) -> List[str]:

148 """Returns all values for the given header as a list."""

149 norm_name = _normalize_header(name)

150 return self._as_list.get(norm_name, [])

151

152 def get_all(self) -> Iterable[Tuple[str, str]]:

153 """Returns an iterable of all (name, value) pairs.

154

155 If a header has multiple values, multiple pairs will be

156 returned with the same name.

157 """

158 for name, values in self._as_list.items():

159 for value in values:

160 yield (name, value)

161

162 def parse_line(self, line: str) -> None:

163 """Updates the dictionary with a single header line.

164

165 >>> h = HTTPHeaders()

166 >>> h.parse_line("Content-Type: text/html")

167 >>> h.get('content-type')

168 'text/html'

169 """

170 if line[0].isspace():

171 # continuation of a multi-line header

172 if self._last_key is None:

173 raise HTTPInputError("first header line cannot start with whitespace")

174 new_part = " " + line.lstrip()

175 self._as_list[self._last_key][-1] += new_part

176 self._dict[self._last_key] += new_part

177 else:

178 try:

179 name, value = line.split(":", 1)

180 except ValueError:

181 raise HTTPInputError("no colon in header line")

182 self.add(name, value.strip())

183

184 @classmethod

185 def parse(cls, headers: str) -> "HTTPHeaders":

186 """Returns a dictionary from HTTP header text.

187

188 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")

189 >>> sorted(h.items())

190 [('Content-Length', '42'), ('Content-Type', 'text/html')]

191

192 .. versionchanged:: 5.1

193

194 Raises `HTTPInputError` on malformed headers instead of a

195 mix of `KeyError`, and `ValueError`.

196

197 """

198 h = cls()

199 # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line

200 # terminator and ignore any preceding CR.

201 for line in headers.split("\n"):

202 if line.endswith("\r"):

203 line = line[:-1]

204 if line:

205 h.parse_line(line)

206 return h

207

208 # MutableMapping abstract method implementations.

209

210 def __setitem__(self, name: str, value: str) -> None:

211 norm_name = _normalize_header(name)

212 self._dict[norm_name] = value

213 self._as_list[norm_name] = [value]

214

215 def __getitem__(self, name: str) -> str:

216 return self._dict[_normalize_header(name)]

217

218 def __delitem__(self, name: str) -> None:

219 norm_name = _normalize_header(name)

220 del self._dict[norm_name]

221 del self._as_list[norm_name]

222

223 def __len__(self) -> int:

224 return len(self._dict)

225

226 def __iter__(self) -> Iterator[typing.Any]:

227 return iter(self._dict)

228

229 def copy(self) -> "HTTPHeaders":

230 # defined in dict but not in MutableMapping.

231 return HTTPHeaders(self)

232

233 # Use our overridden copy method for the copy.copy module.

234 # This makes shallow copies one level deeper, but preserves

235 # the appearance that HTTPHeaders is a single container.

236 __copy__ = copy

237

238 def __str__(self) -> str:

239 lines = []

240 for name, value in self.get_all():

241 lines.append("%s: %s\n" % (name, value))

242 return "".join(lines)

243

244 __unicode__ = __str__

245

246

247class HTTPServerRequest(object):

248 """A single HTTP request.

249

250 All attributes are type `str` unless otherwise noted.

251

252 .. attribute:: method

253

254 HTTP request method, e.g. "GET" or "POST"

255

256 .. attribute:: uri

257

258 The requested uri.

259

260 .. attribute:: path

261

262 The path portion of `uri`

263

264 .. attribute:: query

265

266 The query portion of `uri`

267

268 .. attribute:: version

269

270 HTTP version specified in request, e.g. "HTTP/1.1"

271

272 .. attribute:: headers

273

274 `.HTTPHeaders` dictionary-like object for request headers. Acts like

275 a case-insensitive dictionary with additional methods for repeated

276 headers.

277

278 .. attribute:: body

279

280 Request body, if present, as a byte string.

281

282 .. attribute:: remote_ip

283

284 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,

285 will pass along the real IP address provided by a load balancer

286 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.

287

288 .. versionchanged:: 3.1

289 The list format of ``X-Forwarded-For`` is now supported.

290

291 .. attribute:: protocol

292

293 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``

294 is set, will pass along the protocol used by a load balancer if

295 reported via an ``X-Scheme`` header.

296

297 .. attribute:: host

298

299 The requested hostname, usually taken from the ``Host`` header.

300

301 .. attribute:: arguments

302

303 GET/POST arguments are available in the arguments property, which

304 maps arguments names to lists of values (to support multiple values

305 for individual names). Names are of type `str`, while arguments

306 are byte strings. Note that this is different from

307 `.RequestHandler.get_argument`, which returns argument values as

308 unicode strings.

309

310 .. attribute:: query_arguments

311

312 Same format as ``arguments``, but contains only arguments extracted

313 from the query string.

314

315 .. versionadded:: 3.2

316

317 .. attribute:: body_arguments

318

319 Same format as ``arguments``, but contains only arguments extracted

320 from the request body.

321

322 .. versionadded:: 3.2

323

324 .. attribute:: files

325

326 File uploads are available in the files property, which maps file

327 names to lists of `.HTTPFile`.

328

329 .. attribute:: connection

330

331 An HTTP request is attached to a single HTTP connection, which can

332 be accessed through the "connection" attribute. Since connections

333 are typically kept open in HTTP/1.1, multiple requests can be handled

334 sequentially on a single connection.

335

336 .. versionchanged:: 4.0

337 Moved from ``tornado.httpserver.HTTPRequest``.

338 """

339

340 path = None # type: str

341 query = None # type: str

342

343 # HACK: Used for stream_request_body

344 _body_future = None # type: Future[None]

345

346 def __init__(

347 self,

348 method: Optional[str] = None,

349 uri: Optional[str] = None,

350 version: str = "HTTP/1.0",

351 headers: Optional[HTTPHeaders] = None,

352 body: Optional[bytes] = None,

353 host: Optional[str] = None,

354 files: Optional[Dict[str, List["HTTPFile"]]] = None,

355 connection: Optional["HTTPConnection"] = None,

356 start_line: Optional["RequestStartLine"] = None,

357 server_connection: Optional[object] = None,

358 ) -> None:

359 if start_line is not None:

360 method, uri, version = start_line

361 self.method = method

362 self.uri = uri

363 self.version = version

364 self.headers = headers or HTTPHeaders()

365 self.body = body or b""

366

367 # set remote IP and protocol

368 context = getattr(connection, "context", None)

369 self.remote_ip = getattr(context, "remote_ip", None)

370 self.protocol = getattr(context, "protocol", "http")

371

372 self.host = host or self.headers.get("Host") or "127.0.0.1"

373 self.host_name = split_host_and_port(self.host.lower())[0]

374 self.files = files or {}

375 self.connection = connection

376 self.server_connection = server_connection

377 self._start_time = time.time()

378 self._finish_time = None

379

380 if uri is not None:

381 self.path, sep, self.query = uri.partition("?")

382 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)

383 self.query_arguments = copy.deepcopy(self.arguments)

384 self.body_arguments = {} # type: Dict[str, List[bytes]]

385

386 @property

387 def cookies(self) -> Dict[str, http.cookies.Morsel]:

388 """A dictionary of ``http.cookies.Morsel`` objects."""

389 if not hasattr(self, "_cookies"):

390 self._cookies = (

391 http.cookies.SimpleCookie()

392 ) # type: http.cookies.SimpleCookie

393 if "Cookie" in self.headers:

394 try:

395 parsed = parse_cookie(self.headers["Cookie"])

396 except Exception:

397 pass

398 else:

399 for k, v in parsed.items():

400 try:

401 self._cookies[k] = v

402 except Exception:

403 # SimpleCookie imposes some restrictions on keys;

404 # parse_cookie does not. Discard any cookies

405 # with disallowed keys.

406 pass

407 return self._cookies

408

409 def full_url(self) -> str:

410 """Reconstructs the full URL for this request."""

411 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]

412

413 def request_time(self) -> float:

414 """Returns the amount of time it took for this request to execute."""

415 if self._finish_time is None:

416 return time.time() - self._start_time

417 else:

418 return self._finish_time - self._start_time

419

420 def get_ssl_certificate(

421 self, binary_form: bool = False

422 ) -> Union[None, Dict, bytes]:

423 """Returns the client's SSL certificate, if any.

424

425 To use client certificates, the HTTPServer's

426 `ssl.SSLContext.verify_mode` field must be set, e.g.::

427

428 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)

429 ssl_ctx.load_cert_chain("foo.crt", "foo.key")

430 ssl_ctx.load_verify_locations("cacerts.pem")

431 ssl_ctx.verify_mode = ssl.CERT_REQUIRED

432 server = HTTPServer(app, ssl_options=ssl_ctx)

433

434 By default, the return value is a dictionary (or None, if no

435 client certificate is present). If ``binary_form`` is true, a

436 DER-encoded form of the certificate is returned instead. See

437 SSLSocket.getpeercert() in the standard library for more

438 details.

439 http://docs.python.org/library/ssl.html#sslsocket-objects

440 """

441 try:

442 if self.connection is None:

443 return None

444 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2

445 return self.connection.stream.socket.getpeercert( # type: ignore

446 binary_form=binary_form

447 )

448 except SSLError:

449 return None

450

451 def _parse_body(self) -> None:

452 parse_body_arguments(

453 self.headers.get("Content-Type", ""),

454 self.body,

455 self.body_arguments,

456 self.files,

457 self.headers,

458 )

459

460 for k, v in self.body_arguments.items():

461 self.arguments.setdefault(k, []).extend(v)

462

463 def __repr__(self) -> str:

464 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")

465 args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])

466 return "%s(%s)" % (self.__class__.__name__, args)

467

468

469class HTTPInputError(Exception):

470 """Exception class for malformed HTTP requests or responses

471 from remote sources.

472

473 .. versionadded:: 4.0

474 """

475

476 pass

477

478

479class HTTPOutputError(Exception):

480 """Exception class for errors in HTTP output.

481

482 .. versionadded:: 4.0

483 """

484

485 pass

486

487

488class HTTPServerConnectionDelegate(object):

489 """Implement this interface to handle requests from `.HTTPServer`.

490

491 .. versionadded:: 4.0

492 """

493

494 def start_request(

495 self, server_conn: object, request_conn: "HTTPConnection"

496 ) -> "HTTPMessageDelegate":

497 """This method is called by the server when a new request has started.

498

499 :arg server_conn: is an opaque object representing the long-lived

500 (e.g. tcp-level) connection.

501 :arg request_conn: is a `.HTTPConnection` object for a single

502 request/response exchange.

503

504 This method should return a `.HTTPMessageDelegate`.

505 """

506 raise NotImplementedError()

507

508 def on_close(self, server_conn: object) -> None:

509 """This method is called when a connection has been closed.

510

511 :arg server_conn: is a server connection that has previously been

512 passed to ``start_request``.

513 """

514 pass

515

516

517class HTTPMessageDelegate(object):

518 """Implement this interface to handle an HTTP request or response.

519

520 .. versionadded:: 4.0

521 """

522

523 # TODO: genericize this class to avoid exposing the Union.

524 def headers_received(

525 self,

526 start_line: Union["RequestStartLine", "ResponseStartLine"],

527 headers: HTTPHeaders,

528 ) -> Optional[Awaitable[None]]:

529 """Called when the HTTP headers have been received and parsed.

530

531 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`

532 depending on whether this is a client or server message.

533 :arg headers: a `.HTTPHeaders` instance.

534

535 Some `.HTTPConnection` methods can only be called during

536 ``headers_received``.

537

538 May return a `.Future`; if it does the body will not be read

539 until it is done.

540 """

541 pass

542

543 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:

544 """Called when a chunk of data has been received.

545

546 May return a `.Future` for flow control.

547 """

548 pass

549

550 def finish(self) -> None:

551 """Called after the last chunk of data has been received."""

552 pass

553

554 def on_connection_close(self) -> None:

555 """Called if the connection is closed without finishing the request.

556

557 If ``headers_received`` is called, either ``finish`` or

558 ``on_connection_close`` will be called, but not both.

559 """

560 pass

561

562

563class HTTPConnection(object):

564 """Applications use this interface to write their responses.

565

566 .. versionadded:: 4.0

567 """

568

569 def write_headers(

570 self,

571 start_line: Union["RequestStartLine", "ResponseStartLine"],

572 headers: HTTPHeaders,

573 chunk: Optional[bytes] = None,

574 ) -> "Future[None]":

575 """Write an HTTP header block.

576

577 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.

578 :arg headers: a `.HTTPHeaders` instance.

579 :arg chunk: the first (optional) chunk of data. This is an optimization

580 so that small responses can be written in the same call as their

581 headers.

582

583 The ``version`` field of ``start_line`` is ignored.

584

585 Returns a future for flow control.

586

587 .. versionchanged:: 6.0

588

589 The ``callback`` argument was removed.

590 """

591 raise NotImplementedError()

592

593 def write(self, chunk: bytes) -> "Future[None]":

594 """Writes a chunk of body data.

595

596 Returns a future for flow control.

597

598 .. versionchanged:: 6.0

599

600 The ``callback`` argument was removed.

601 """

602 raise NotImplementedError()

603

604 def finish(self) -> None:

605 """Indicates that the last body data has been written."""

606 raise NotImplementedError()

607

608

609def url_concat(

610 url: str,

611 args: Union[

612 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]

613 ],

614) -> str:

615 """Concatenate url and arguments regardless of whether

616 url has existing query parameters.

617

618 ``args`` may be either a dictionary or a list of key-value pairs

619 (the latter allows for multiple values with the same key.

620

621 >>> url_concat("http://example.com/foo", dict(c="d"))

622 'http://example.com/foo?c=d'

623 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))

624 'http://example.com/foo?a=b&c=d'

625 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])

626 'http://example.com/foo?a=b&c=d&c=d2'

627 """

628 if args is None:

629 return url

630 parsed_url = urlparse(url)

631 if isinstance(args, dict):

632 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

633 parsed_query.extend(args.items())

634 elif isinstance(args, list) or isinstance(args, tuple):

635 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)

636 parsed_query.extend(args)

637 else:

638 err = "'args' parameter should be dict, list or tuple. Not {0}".format(

639 type(args)

640 )

641 raise TypeError(err)

642 final_query = urlencode(parsed_query)

643 url = urlunparse(

644 (

645 parsed_url[0],

646 parsed_url[1],

647 parsed_url[2],

648 parsed_url[3],

649 final_query,

650 parsed_url[5],

651 )

652 )

653 return url

654

655

656class HTTPFile(ObjectDict):

657 """Represents a file uploaded via a form.

658

659 For backwards compatibility, its instance attributes are also

660 accessible as dictionary keys.

661

662 * ``filename``

663 * ``body``

664 * ``content_type``

665 """

666

667 filename: str

668 body: bytes

669 content_type: str

670

671

672def _parse_request_range(

673 range_header: str,

674) -> Optional[Tuple[Optional[int], Optional[int]]]:

675 """Parses a Range header.

676

677 Returns either ``None`` or tuple ``(start, end)``.

678 Note that while the HTTP headers use inclusive byte positions,

679 this method returns indexes suitable for use in slices.

680

681 >>> start, end = _parse_request_range("bytes=1-2")

682 >>> start, end

683 (1, 3)

684 >>> [0, 1, 2, 3, 4][start:end]

685 [1, 2]

686 >>> _parse_request_range("bytes=6-")

687 (6, None)

688 >>> _parse_request_range("bytes=-6")

689 (-6, None)

690 >>> _parse_request_range("bytes=-0")

691 (None, 0)

692 >>> _parse_request_range("bytes=")

693 (None, None)

694 >>> _parse_request_range("foo=42")

695 >>> _parse_request_range("bytes=1-2,6-10")

696

697 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).

698

699 See [0] for the details of the range header.

700

701 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges

702 """

703 unit, _, value = range_header.partition("=")

704 unit, value = unit.strip(), value.strip()

705 if unit != "bytes":

706 return None

707 start_b, _, end_b = value.partition("-")

708 try:

709 start = _int_or_none(start_b)

710 end = _int_or_none(end_b)

711 except ValueError:

712 return None

713 if end is not None:

714 if start is None:

715 if end != 0:

716 start = -end

717 end = None

718 else:

719 end += 1

720 return (start, end)

721

722

723def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:

724 """Returns a suitable Content-Range header:

725

726 >>> print(_get_content_range(None, 1, 4))

727 bytes 0-0/4

728 >>> print(_get_content_range(1, 3, 4))

729 bytes 1-2/4

730 >>> print(_get_content_range(None, None, 4))

731 bytes 0-3/4

732 """

733 start = start or 0

734 end = (end or total) - 1

735 return "bytes %s-%s/%s" % (start, end, total)

736

737

738def _int_or_none(val: str) -> Optional[int]:

739 val = val.strip()

740 if val == "":

741 return None

742 return int(val)

743

744

745def parse_body_arguments(

746 content_type: str,

747 body: bytes,

748 arguments: Dict[str, List[bytes]],

749 files: Dict[str, List[HTTPFile]],

750 headers: Optional[HTTPHeaders] = None,

751) -> None:

752 """Parses a form request body.

753

754 Supports ``application/x-www-form-urlencoded`` and

755 ``multipart/form-data``. The ``content_type`` parameter should be

756 a string and ``body`` should be a byte string. The ``arguments``

757 and ``files`` parameters are dictionaries that will be updated

758 with the parsed contents.

759 """

760 if content_type.startswith("application/x-www-form-urlencoded"):

761 if headers and "Content-Encoding" in headers:

762 gen_log.warning(

763 "Unsupported Content-Encoding: %s", headers["Content-Encoding"]

764 )

765 return

766 try:

767 # real charset decoding will happen in RequestHandler.decode_argument()

768 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)

769 except Exception as e:

770 gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)

771 uri_arguments = {}

772 for name, values in uri_arguments.items():

773 if values:

774 arguments.setdefault(name, []).extend(values)

775 elif content_type.startswith("multipart/form-data"):

776 if headers and "Content-Encoding" in headers:

777 gen_log.warning(

778 "Unsupported Content-Encoding: %s", headers["Content-Encoding"]

779 )

780 return

781 try:

782 fields = content_type.split(";")

783 for field in fields:

784 k, sep, v = field.strip().partition("=")

785 if k == "boundary" and v:

786 parse_multipart_form_data(utf8(v), body, arguments, files)

787 break

788 else:

789 raise ValueError("multipart boundary not found")

790 except Exception as e:

791 gen_log.warning("Invalid multipart/form-data: %s", e)

792

793

794def parse_multipart_form_data(

795 boundary: bytes,

796 data: bytes,

797 arguments: Dict[str, List[bytes]],

798 files: Dict[str, List[HTTPFile]],

799) -> None:

800 """Parses a ``multipart/form-data`` body.

801

802 The ``boundary`` and ``data`` parameters are both byte strings.

803 The dictionaries given in the arguments and files parameters

804 will be updated with the contents of the body.

805

806 .. versionchanged:: 5.1

807

808 Now recognizes non-ASCII filenames in RFC 2231/5987

809 (``filename*=``) format.

810 """

811 # The standard allows for the boundary to be quoted in the header,

812 # although it's rare (it happens at least for google app engine

813 # xmpp). I think we're also supposed to handle backslash-escapes

814 # here but I'll save that until we see a client that uses them

815 # in the wild.

816 if boundary.startswith(b'"') and boundary.endswith(b'"'):

817 boundary = boundary[1:-1]

818 final_boundary_index = data.rfind(b"--" + boundary + b"--")

819 if final_boundary_index == -1:

820 gen_log.warning("Invalid multipart/form-data: no final boundary")

821 return

822 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")

823 for part in parts:

824 if not part:

825 continue

826 eoh = part.find(b"\r\n\r\n")

827 if eoh == -1:

828 gen_log.warning("multipart/form-data missing headers")

829 continue

830 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))

831 disp_header = headers.get("Content-Disposition", "")

832 disposition, disp_params = _parse_header(disp_header)

833 if disposition != "form-data" or not part.endswith(b"\r\n"):

834 gen_log.warning("Invalid multipart/form-data")

835 continue

836 value = part[eoh + 4 : -2]

837 if not disp_params.get("name"):

838 gen_log.warning("multipart/form-data value missing name")

839 continue

840 name = disp_params["name"]

841 if disp_params.get("filename"):

842 ctype = headers.get("Content-Type", "application/unknown")

843 files.setdefault(name, []).append(

844 HTTPFile(

845 filename=disp_params["filename"], body=value, content_type=ctype

846 )

847 )

848 else:

849 arguments.setdefault(name, []).append(value)

850

851

852def format_timestamp(

853 ts: Union[int, float, tuple, time.struct_time, datetime.datetime]

854) -> str:

855 """Formats a timestamp in the format used by HTTP.

856

857 The argument may be a numeric timestamp as returned by `time.time`,

858 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`

859 object.

860

861 >>> format_timestamp(1359312200)

862 'Sun, 27 Jan 2013 18:43:20 GMT'

863 """

864 if isinstance(ts, (int, float)):

865 time_num = ts

866 elif isinstance(ts, (tuple, time.struct_time)):

867 time_num = calendar.timegm(ts)

868 elif isinstance(ts, datetime.datetime):

869 time_num = calendar.timegm(ts.utctimetuple())

870 else:

871 raise TypeError("unknown timestamp type: %r" % ts)

872 return email.utils.formatdate(time_num, usegmt=True)

873

874

875RequestStartLine = collections.namedtuple(

876 "RequestStartLine", ["method", "path", "version"]

877)

878

879

880_http_version_re = re.compile(r"^HTTP/1\.[0-9]$")

881

882

883def parse_request_start_line(line: str) -> RequestStartLine:

884 """Returns a (method, path, version) tuple for an HTTP 1.x request line.

885

886 The response is a `collections.namedtuple`.

887

888 >>> parse_request_start_line("GET /foo HTTP/1.1")

889 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')

890 """

891 try:

892 method, path, version = line.split(" ")

893 except ValueError:

894 # https://tools.ietf.org/html/rfc7230#section-3.1.1

895 # invalid request-line SHOULD respond with a 400 (Bad Request)

896 raise HTTPInputError("Malformed HTTP request line")

897 if not _http_version_re.match(version):

898 raise HTTPInputError(

899 "Malformed HTTP version in HTTP Request-Line: %r" % version

900 )

901 return RequestStartLine(method, path, version)

902

903

904ResponseStartLine = collections.namedtuple(

905 "ResponseStartLine", ["version", "code", "reason"]

906)

907

908

909_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)")

910

911

912def parse_response_start_line(line: str) -> ResponseStartLine:

913 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.

914

915 The response is a `collections.namedtuple`.

916

917 >>> parse_response_start_line("HTTP/1.1 200 OK")

918 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')

919 """

920 line = native_str(line)

921 match = _http_response_line_re.match(line)

922 if not match:

923 raise HTTPInputError("Error parsing response start line")

924 return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))

925

926

927# _parseparam and _parse_header are copied and modified from python2.7's cgi.py

928# The original 2.7 version of this code did not correctly support some

929# combinations of semicolons and double quotes.

930# It has also been modified to support valueless parameters as seen in

931# websocket extension negotiations, and to support non-ascii values in

932# RFC 2231/5987 format.

933

934

935def _parseparam(s: str) -> Generator[str, None, None]:

936 while s[:1] == ";":

937 s = s[1:]

938 end = s.find(";")

939 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:

940 end = s.find(";", end + 1)

941 if end < 0:

942 end = len(s)

943 f = s[:end]

944 yield f.strip()

945 s = s[end:]

946

947

948def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:

949 r"""Parse a Content-type like header.

950

951 Return the main content-type and a dictionary of options.

952

953 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"

954 >>> ct, d = _parse_header(d)

955 >>> ct

956 'form-data'

957 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')

958 True

959 >>> d['foo']

960 'b\\a"r'

961 """

962 parts = _parseparam(";" + line)

963 key = next(parts)

964 # decode_params treats first argument special, but we already stripped key

965 params = [("Dummy", "value")]

966 for p in parts:

967 i = p.find("=")

968 if i >= 0:

969 name = p[:i].strip().lower()

970 value = p[i + 1 :].strip()

971 params.append((name, native_str(value)))

972 decoded_params = email.utils.decode_params(params)

973 decoded_params.pop(0) # get rid of the dummy again

974 pdict = {}

975 for name, decoded_value in decoded_params:

976 value = email.utils.collapse_rfc2231_value(decoded_value)

977 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':

978 value = value[1:-1]

979 pdict[name] = value

980 return key, pdict

981

982

983def _encode_header(key: str, pdict: Dict[str, str]) -> str:

984 """Inverse of _parse_header.

985

986 >>> _encode_header('permessage-deflate',

987 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})

988 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'

989 """

990 if not pdict:

991 return key

992 out = [key]

993 # Sort the parameters just to make it easy to test.

994 for k, v in sorted(pdict.items()):

995 if v is None:

996 out.append(k)

997 else:

998 # TODO: quote if necessary.

999 out.append("%s=%s" % (k, v))

1000 return "; ".join(out)

1001

1002

1003def encode_username_password(

1004 username: Union[str, bytes], password: Union[str, bytes]

1005) -> bytes:

1006 """Encodes a username/password pair in the format used by HTTP auth.

1007

1008 The return value is a byte string in the form ``username:password``.

1009

1010 .. versionadded:: 5.1

1011 """

1012 if isinstance(username, unicode_type):

1013 username = unicodedata.normalize("NFC", username)

1014 if isinstance(password, unicode_type):

1015 password = unicodedata.normalize("NFC", password)

1016 return utf8(username) + b":" + utf8(password)

1017

1018

1019def doctests():

1020 # type: () -> unittest.TestSuite

1021 import doctest

1022

1023 return doctest.DocTestSuite()

1024

1025

1026_netloc_re = re.compile(r"^(.+):(\d+)$")

1027

1028

1029def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:

1030 """Returns ``(host, port)`` tuple from ``netloc``.

1031

1032 Returned ``port`` will be ``None`` if not present.

1033

1034 .. versionadded:: 4.1

1035 """

1036 match = _netloc_re.match(netloc)

1037 if match:

1038 host = match.group(1)

1039 port = int(match.group(2)) # type: Optional[int]

1040 else:

1041 host = netloc

1042 port = None

1043 return (host, port)

1044

1045

1046def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:

1047 """Generator converting a result of ``parse_qs`` back to name-value pairs.

1048

1049 .. versionadded:: 5.0

1050 """

1051 for k, vs in qs.items():

1052 for v in vs:

1053 yield (k, v)

1054

1055

1056_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")

1057_QuotePatt = re.compile(r"[\\].")

1058_nulljoin = "".join

1059

1060

1061def _unquote_cookie(s: str) -> str:

1062 """Handle double quotes and escaping in cookie values.

1063

1064 This method is copied verbatim from the Python 3.5 standard

1065 library (http.cookies._unquote) so we don't have to depend on

1066 non-public interfaces.

1067 """

1068 # If there aren't any doublequotes,

1069 # then there can't be any special characters. See RFC 2109.

1070 if s is None or len(s) < 2:

1071 return s

1072 if s[0] != '"' or s[-1] != '"':

1073 return s

1074

1075 # We have to assume that we must decode this string.

1076 # Down to work.

1077

1078 # Remove the "s

1079 s = s[1:-1]

1080

1081 # Check for special sequences. Examples:

1082 # \012 --> \n

1083 # \" --> "

1084 #

1085 i = 0

1086 n = len(s)

1087 res = []

1088 while 0 <= i < n:

1089 o_match = _OctalPatt.search(s, i)

1090 q_match = _QuotePatt.search(s, i)

1091 if not o_match and not q_match: # Neither matched

1092 res.append(s[i:])

1093 break

1094 # else:

1095 j = k = -1

1096 if o_match:

1097 j = o_match.start(0)

1098 if q_match:

1099 k = q_match.start(0)

1100 if q_match and (not o_match or k < j): # QuotePatt matched

1101 res.append(s[i:k])

1102 res.append(s[k + 1])

1103 i = k + 2

1104 else: # OctalPatt matched

1105 res.append(s[i:j])

1106 res.append(chr(int(s[j + 1 : j + 4], 8)))

1107 i = j + 4

1108 return _nulljoin(res)

1109

1110

1111def parse_cookie(cookie: str) -> Dict[str, str]:

1112 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.

1113

1114 This function attempts to mimic browser cookie parsing behavior;

1115 it specifically does not follow any of the cookie-related RFCs

1116 (because browsers don't either).

1117

1118 The algorithm used is identical to that used by Django version 1.9.10.

1119

1120 .. versionadded:: 4.4.2

1121 """

1122 cookiedict = {}

1123 for chunk in cookie.split(str(";")):

1124 if str("=") in chunk:

1125 key, val = chunk.split(str("="), 1)

1126 else:

1127 # Assume an empty name per

1128 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091

1129 key, val = str(""), chunk

1130 key, val = key.strip(), val.strip()

1131 if key or val:

1132 # unquote using Python's algorithm.

1133 cookiedict[key] = _unquote_cookie(val)

1134 return cookiedict