Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tornado/httputil.py: 25%

426 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1# 

2# Copyright 2009 Facebook 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); you may 

5# not use this file except in compliance with the License. You may obtain 

6# a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

13# License for the specific language governing permissions and limitations 

14# under the License. 

15 

16"""HTTP utility code shared by clients and servers. 

17 

18This module also defines the `HTTPServerRequest` class which is exposed 

19via `tornado.web.RequestHandler.request`. 

20""" 

21 

22import calendar 

23import collections.abc 

24import copy 

25import datetime 

26import email.utils 

27from functools import lru_cache 

28from http.client import responses 

29import http.cookies 

30import re 

31from ssl import SSLError 

32import time 

33import unicodedata 

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl 

35 

36from tornado.escape import native_str, parse_qs_bytes, utf8 

37from tornado.log import gen_log 

38from tornado.util import ObjectDict, unicode_type 

39 

40 

41# responses is unused in this file, but we re-export it to other files. 

42# Reference it so pyflakes doesn't complain. 

43responses 

44 

45import typing 

46from typing import ( 

47 Tuple, 

48 Iterable, 

49 List, 

50 Mapping, 

51 Iterator, 

52 Dict, 

53 Union, 

54 Optional, 

55 Awaitable, 

56 Generator, 

57 AnyStr, 

58) 

59 

60if typing.TYPE_CHECKING: 

61 from typing import Deque # noqa: F401 

62 from asyncio import Future # noqa: F401 

63 import unittest # noqa: F401 

64 

65 

66@lru_cache(1000) 

67def _normalize_header(name: str) -> str: 

68 """Map a header name to Http-Header-Case. 

69 

70 >>> _normalize_header("coNtent-TYPE") 

71 'Content-Type' 

72 """ 

73 return "-".join([w.capitalize() for w in name.split("-")]) 

74 

75 

76class HTTPHeaders(collections.abc.MutableMapping): 

77 """A dictionary that maintains ``Http-Header-Case`` for all keys. 

78 

79 Supports multiple values per key via a pair of new methods, 

80 `add()` and `get_list()`. The regular dictionary interface 

81 returns a single value per key, with multiple values joined by a 

82 comma. 

83 

84 >>> h = HTTPHeaders({"content-type": "text/html"}) 

85 >>> list(h.keys()) 

86 ['Content-Type'] 

87 >>> h["Content-Type"] 

88 'text/html' 

89 

90 >>> h.add("Set-Cookie", "A=B") 

91 >>> h.add("Set-Cookie", "C=D") 

92 >>> h["set-cookie"] 

93 'A=B,C=D' 

94 >>> h.get_list("set-cookie") 

95 ['A=B', 'C=D'] 

96 

97 >>> for (k,v) in sorted(h.get_all()): 

98 ... print('%s: %s' % (k,v)) 

99 ... 

100 Content-Type: text/html 

101 Set-Cookie: A=B 

102 Set-Cookie: C=D 

103 """ 

104 

105 @typing.overload 

106 def __init__(self, __arg: Mapping[str, List[str]]) -> None: 

107 pass 

108 

109 @typing.overload # noqa: F811 

110 def __init__(self, __arg: Mapping[str, str]) -> None: 

111 pass 

112 

113 @typing.overload # noqa: F811 

114 def __init__(self, *args: Tuple[str, str]) -> None: 

115 pass 

116 

117 @typing.overload # noqa: F811 

118 def __init__(self, **kwargs: str) -> None: 

119 pass 

120 

121 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811 

122 self._dict = {} # type: typing.Dict[str, str] 

123 self._as_list = {} # type: typing.Dict[str, typing.List[str]] 

124 self._last_key = None # type: Optional[str] 

125 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders): 

126 # Copy constructor 

127 for k, v in args[0].get_all(): 

128 self.add(k, v) 

129 else: 

130 # Dict-style initialization 

131 self.update(*args, **kwargs) 

132 

133 # new public methods 

134 

135 def add(self, name: str, value: str) -> None: 

136 """Adds a new value for the given key.""" 

137 norm_name = _normalize_header(name) 

138 self._last_key = norm_name 

139 if norm_name in self: 

140 self._dict[norm_name] = ( 

141 native_str(self[norm_name]) + "," + native_str(value) 

142 ) 

143 self._as_list[norm_name].append(value) 

144 else: 

145 self[norm_name] = value 

146 

147 def get_list(self, name: str) -> List[str]: 

148 """Returns all values for the given header as a list.""" 

149 norm_name = _normalize_header(name) 

150 return self._as_list.get(norm_name, []) 

151 

152 def get_all(self) -> Iterable[Tuple[str, str]]: 

153 """Returns an iterable of all (name, value) pairs. 

154 

155 If a header has multiple values, multiple pairs will be 

156 returned with the same name. 

157 """ 

158 for name, values in self._as_list.items(): 

159 for value in values: 

160 yield (name, value) 

161 

162 def parse_line(self, line: str) -> None: 

163 """Updates the dictionary with a single header line. 

164 

165 >>> h = HTTPHeaders() 

166 >>> h.parse_line("Content-Type: text/html") 

167 >>> h.get('content-type') 

168 'text/html' 

169 """ 

170 if line[0].isspace(): 

171 # continuation of a multi-line header 

172 if self._last_key is None: 

173 raise HTTPInputError("first header line cannot start with whitespace") 

174 new_part = " " + line.lstrip() 

175 self._as_list[self._last_key][-1] += new_part 

176 self._dict[self._last_key] += new_part 

177 else: 

178 try: 

179 name, value = line.split(":", 1) 

180 except ValueError: 

181 raise HTTPInputError("no colon in header line") 

182 self.add(name, value.strip()) 

183 

184 @classmethod 

185 def parse(cls, headers: str) -> "HTTPHeaders": 

186 """Returns a dictionary from HTTP header text. 

187 

188 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n") 

189 >>> sorted(h.items()) 

190 [('Content-Length', '42'), ('Content-Type', 'text/html')] 

191 

192 .. versionchanged:: 5.1 

193 

194 Raises `HTTPInputError` on malformed headers instead of a 

195 mix of `KeyError`, and `ValueError`. 

196 

197 """ 

198 h = cls() 

199 # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line 

200 # terminator and ignore any preceding CR. 

201 for line in headers.split("\n"): 

202 if line.endswith("\r"): 

203 line = line[:-1] 

204 if line: 

205 h.parse_line(line) 

206 return h 

207 

208 # MutableMapping abstract method implementations. 

209 

210 def __setitem__(self, name: str, value: str) -> None: 

211 norm_name = _normalize_header(name) 

212 self._dict[norm_name] = value 

213 self._as_list[norm_name] = [value] 

214 

215 def __getitem__(self, name: str) -> str: 

216 return self._dict[_normalize_header(name)] 

217 

218 def __delitem__(self, name: str) -> None: 

219 norm_name = _normalize_header(name) 

220 del self._dict[norm_name] 

221 del self._as_list[norm_name] 

222 

223 def __len__(self) -> int: 

224 return len(self._dict) 

225 

226 def __iter__(self) -> Iterator[typing.Any]: 

227 return iter(self._dict) 

228 

229 def copy(self) -> "HTTPHeaders": 

230 # defined in dict but not in MutableMapping. 

231 return HTTPHeaders(self) 

232 

233 # Use our overridden copy method for the copy.copy module. 

234 # This makes shallow copies one level deeper, but preserves 

235 # the appearance that HTTPHeaders is a single container. 

236 __copy__ = copy 

237 

238 def __str__(self) -> str: 

239 lines = [] 

240 for name, value in self.get_all(): 

241 lines.append("%s: %s\n" % (name, value)) 

242 return "".join(lines) 

243 

244 __unicode__ = __str__ 

245 

246 

247class HTTPServerRequest(object): 

248 """A single HTTP request. 

249 

250 All attributes are type `str` unless otherwise noted. 

251 

252 .. attribute:: method 

253 

254 HTTP request method, e.g. "GET" or "POST" 

255 

256 .. attribute:: uri 

257 

258 The requested uri. 

259 

260 .. attribute:: path 

261 

262 The path portion of `uri` 

263 

264 .. attribute:: query 

265 

266 The query portion of `uri` 

267 

268 .. attribute:: version 

269 

270 HTTP version specified in request, e.g. "HTTP/1.1" 

271 

272 .. attribute:: headers 

273 

274 `.HTTPHeaders` dictionary-like object for request headers. Acts like 

275 a case-insensitive dictionary with additional methods for repeated 

276 headers. 

277 

278 .. attribute:: body 

279 

280 Request body, if present, as a byte string. 

281 

282 .. attribute:: remote_ip 

283 

284 Client's IP address as a string. If ``HTTPServer.xheaders`` is set, 

285 will pass along the real IP address provided by a load balancer 

286 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header. 

287 

288 .. versionchanged:: 3.1 

289 The list format of ``X-Forwarded-For`` is now supported. 

290 

291 .. attribute:: protocol 

292 

293 The protocol used, either "http" or "https". If ``HTTPServer.xheaders`` 

294 is set, will pass along the protocol used by a load balancer if 

295 reported via an ``X-Scheme`` header. 

296 

297 .. attribute:: host 

298 

299 The requested hostname, usually taken from the ``Host`` header. 

300 

301 .. attribute:: arguments 

302 

303 GET/POST arguments are available in the arguments property, which 

304 maps arguments names to lists of values (to support multiple values 

305 for individual names). Names are of type `str`, while arguments 

306 are byte strings. Note that this is different from 

307 `.RequestHandler.get_argument`, which returns argument values as 

308 unicode strings. 

309 

310 .. attribute:: query_arguments 

311 

312 Same format as ``arguments``, but contains only arguments extracted 

313 from the query string. 

314 

315 .. versionadded:: 3.2 

316 

317 .. attribute:: body_arguments 

318 

319 Same format as ``arguments``, but contains only arguments extracted 

320 from the request body. 

321 

322 .. versionadded:: 3.2 

323 

324 .. attribute:: files 

325 

326 File uploads are available in the files property, which maps file 

327 names to lists of `.HTTPFile`. 

328 

329 .. attribute:: connection 

330 

331 An HTTP request is attached to a single HTTP connection, which can 

332 be accessed through the "connection" attribute. Since connections 

333 are typically kept open in HTTP/1.1, multiple requests can be handled 

334 sequentially on a single connection. 

335 

336 .. versionchanged:: 4.0 

337 Moved from ``tornado.httpserver.HTTPRequest``. 

338 """ 

339 

340 path = None # type: str 

341 query = None # type: str 

342 

343 # HACK: Used for stream_request_body 

344 _body_future = None # type: Future[None] 

345 

346 def __init__( 

347 self, 

348 method: Optional[str] = None, 

349 uri: Optional[str] = None, 

350 version: str = "HTTP/1.0", 

351 headers: Optional[HTTPHeaders] = None, 

352 body: Optional[bytes] = None, 

353 host: Optional[str] = None, 

354 files: Optional[Dict[str, List["HTTPFile"]]] = None, 

355 connection: Optional["HTTPConnection"] = None, 

356 start_line: Optional["RequestStartLine"] = None, 

357 server_connection: Optional[object] = None, 

358 ) -> None: 

359 if start_line is not None: 

360 method, uri, version = start_line 

361 self.method = method 

362 self.uri = uri 

363 self.version = version 

364 self.headers = headers or HTTPHeaders() 

365 self.body = body or b"" 

366 

367 # set remote IP and protocol 

368 context = getattr(connection, "context", None) 

369 self.remote_ip = getattr(context, "remote_ip", None) 

370 self.protocol = getattr(context, "protocol", "http") 

371 

372 self.host = host or self.headers.get("Host") or "127.0.0.1" 

373 self.host_name = split_host_and_port(self.host.lower())[0] 

374 self.files = files or {} 

375 self.connection = connection 

376 self.server_connection = server_connection 

377 self._start_time = time.time() 

378 self._finish_time = None 

379 

380 if uri is not None: 

381 self.path, sep, self.query = uri.partition("?") 

382 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True) 

383 self.query_arguments = copy.deepcopy(self.arguments) 

384 self.body_arguments = {} # type: Dict[str, List[bytes]] 

385 

386 @property 

387 def cookies(self) -> Dict[str, http.cookies.Morsel]: 

388 """A dictionary of ``http.cookies.Morsel`` objects.""" 

389 if not hasattr(self, "_cookies"): 

390 self._cookies = ( 

391 http.cookies.SimpleCookie() 

392 ) # type: http.cookies.SimpleCookie 

393 if "Cookie" in self.headers: 

394 try: 

395 parsed = parse_cookie(self.headers["Cookie"]) 

396 except Exception: 

397 pass 

398 else: 

399 for k, v in parsed.items(): 

400 try: 

401 self._cookies[k] = v 

402 except Exception: 

403 # SimpleCookie imposes some restrictions on keys; 

404 # parse_cookie does not. Discard any cookies 

405 # with disallowed keys. 

406 pass 

407 return self._cookies 

408 

409 def full_url(self) -> str: 

410 """Reconstructs the full URL for this request.""" 

411 return self.protocol + "://" + self.host + self.uri # type: ignore[operator] 

412 

413 def request_time(self) -> float: 

414 """Returns the amount of time it took for this request to execute.""" 

415 if self._finish_time is None: 

416 return time.time() - self._start_time 

417 else: 

418 return self._finish_time - self._start_time 

419 

420 def get_ssl_certificate( 

421 self, binary_form: bool = False 

422 ) -> Union[None, Dict, bytes]: 

423 """Returns the client's SSL certificate, if any. 

424 

425 To use client certificates, the HTTPServer's 

426 `ssl.SSLContext.verify_mode` field must be set, e.g.:: 

427 

428 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) 

429 ssl_ctx.load_cert_chain("foo.crt", "foo.key") 

430 ssl_ctx.load_verify_locations("cacerts.pem") 

431 ssl_ctx.verify_mode = ssl.CERT_REQUIRED 

432 server = HTTPServer(app, ssl_options=ssl_ctx) 

433 

434 By default, the return value is a dictionary (or None, if no 

435 client certificate is present). If ``binary_form`` is true, a 

436 DER-encoded form of the certificate is returned instead. See 

437 SSLSocket.getpeercert() in the standard library for more 

438 details. 

439 http://docs.python.org/library/ssl.html#sslsocket-objects 

440 """ 

441 try: 

442 if self.connection is None: 

443 return None 

444 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2 

445 return self.connection.stream.socket.getpeercert( # type: ignore 

446 binary_form=binary_form 

447 ) 

448 except SSLError: 

449 return None 

450 

451 def _parse_body(self) -> None: 

452 parse_body_arguments( 

453 self.headers.get("Content-Type", ""), 

454 self.body, 

455 self.body_arguments, 

456 self.files, 

457 self.headers, 

458 ) 

459 

460 for k, v in self.body_arguments.items(): 

461 self.arguments.setdefault(k, []).extend(v) 

462 

463 def __repr__(self) -> str: 

464 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip") 

465 args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs]) 

466 return "%s(%s)" % (self.__class__.__name__, args) 

467 

468 

469class HTTPInputError(Exception): 

470 """Exception class for malformed HTTP requests or responses 

471 from remote sources. 

472 

473 .. versionadded:: 4.0 

474 """ 

475 

476 pass 

477 

478 

479class HTTPOutputError(Exception): 

480 """Exception class for errors in HTTP output. 

481 

482 .. versionadded:: 4.0 

483 """ 

484 

485 pass 

486 

487 

488class HTTPServerConnectionDelegate(object): 

489 """Implement this interface to handle requests from `.HTTPServer`. 

490 

491 .. versionadded:: 4.0 

492 """ 

493 

494 def start_request( 

495 self, server_conn: object, request_conn: "HTTPConnection" 

496 ) -> "HTTPMessageDelegate": 

497 """This method is called by the server when a new request has started. 

498 

499 :arg server_conn: is an opaque object representing the long-lived 

500 (e.g. tcp-level) connection. 

501 :arg request_conn: is a `.HTTPConnection` object for a single 

502 request/response exchange. 

503 

504 This method should return a `.HTTPMessageDelegate`. 

505 """ 

506 raise NotImplementedError() 

507 

508 def on_close(self, server_conn: object) -> None: 

509 """This method is called when a connection has been closed. 

510 

511 :arg server_conn: is a server connection that has previously been 

512 passed to ``start_request``. 

513 """ 

514 pass 

515 

516 

517class HTTPMessageDelegate(object): 

518 """Implement this interface to handle an HTTP request or response. 

519 

520 .. versionadded:: 4.0 

521 """ 

522 

523 # TODO: genericize this class to avoid exposing the Union. 

524 def headers_received( 

525 self, 

526 start_line: Union["RequestStartLine", "ResponseStartLine"], 

527 headers: HTTPHeaders, 

528 ) -> Optional[Awaitable[None]]: 

529 """Called when the HTTP headers have been received and parsed. 

530 

531 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine` 

532 depending on whether this is a client or server message. 

533 :arg headers: a `.HTTPHeaders` instance. 

534 

535 Some `.HTTPConnection` methods can only be called during 

536 ``headers_received``. 

537 

538 May return a `.Future`; if it does the body will not be read 

539 until it is done. 

540 """ 

541 pass 

542 

543 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]: 

544 """Called when a chunk of data has been received. 

545 

546 May return a `.Future` for flow control. 

547 """ 

548 pass 

549 

550 def finish(self) -> None: 

551 """Called after the last chunk of data has been received.""" 

552 pass 

553 

554 def on_connection_close(self) -> None: 

555 """Called if the connection is closed without finishing the request. 

556 

557 If ``headers_received`` is called, either ``finish`` or 

558 ``on_connection_close`` will be called, but not both. 

559 """ 

560 pass 

561 

562 

563class HTTPConnection(object): 

564 """Applications use this interface to write their responses. 

565 

566 .. versionadded:: 4.0 

567 """ 

568 

569 def write_headers( 

570 self, 

571 start_line: Union["RequestStartLine", "ResponseStartLine"], 

572 headers: HTTPHeaders, 

573 chunk: Optional[bytes] = None, 

574 ) -> "Future[None]": 

575 """Write an HTTP header block. 

576 

577 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`. 

578 :arg headers: a `.HTTPHeaders` instance. 

579 :arg chunk: the first (optional) chunk of data. This is an optimization 

580 so that small responses can be written in the same call as their 

581 headers. 

582 

583 The ``version`` field of ``start_line`` is ignored. 

584 

585 Returns a future for flow control. 

586 

587 .. versionchanged:: 6.0 

588 

589 The ``callback`` argument was removed. 

590 """ 

591 raise NotImplementedError() 

592 

593 def write(self, chunk: bytes) -> "Future[None]": 

594 """Writes a chunk of body data. 

595 

596 Returns a future for flow control. 

597 

598 .. versionchanged:: 6.0 

599 

600 The ``callback`` argument was removed. 

601 """ 

602 raise NotImplementedError() 

603 

604 def finish(self) -> None: 

605 """Indicates that the last body data has been written.""" 

606 raise NotImplementedError() 

607 

608 

609def url_concat( 

610 url: str, 

611 args: Union[ 

612 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...] 

613 ], 

614) -> str: 

615 """Concatenate url and arguments regardless of whether 

616 url has existing query parameters. 

617 

618 ``args`` may be either a dictionary or a list of key-value pairs 

619 (the latter allows for multiple values with the same key. 

620 

621 >>> url_concat("http://example.com/foo", dict(c="d")) 

622 'http://example.com/foo?c=d' 

623 >>> url_concat("http://example.com/foo?a=b", dict(c="d")) 

624 'http://example.com/foo?a=b&c=d' 

625 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")]) 

626 'http://example.com/foo?a=b&c=d&c=d2' 

627 """ 

628 if args is None: 

629 return url 

630 parsed_url = urlparse(url) 

631 if isinstance(args, dict): 

632 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) 

633 parsed_query.extend(args.items()) 

634 elif isinstance(args, list) or isinstance(args, tuple): 

635 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) 

636 parsed_query.extend(args) 

637 else: 

638 err = "'args' parameter should be dict, list or tuple. Not {0}".format( 

639 type(args) 

640 ) 

641 raise TypeError(err) 

642 final_query = urlencode(parsed_query) 

643 url = urlunparse( 

644 ( 

645 parsed_url[0], 

646 parsed_url[1], 

647 parsed_url[2], 

648 parsed_url[3], 

649 final_query, 

650 parsed_url[5], 

651 ) 

652 ) 

653 return url 

654 

655 

656class HTTPFile(ObjectDict): 

657 """Represents a file uploaded via a form. 

658 

659 For backwards compatibility, its instance attributes are also 

660 accessible as dictionary keys. 

661 

662 * ``filename`` 

663 * ``body`` 

664 * ``content_type`` 

665 """ 

666 

667 filename: str 

668 body: bytes 

669 content_type: str 

670 

671 

672def _parse_request_range( 

673 range_header: str, 

674) -> Optional[Tuple[Optional[int], Optional[int]]]: 

675 """Parses a Range header. 

676 

677 Returns either ``None`` or tuple ``(start, end)``. 

678 Note that while the HTTP headers use inclusive byte positions, 

679 this method returns indexes suitable for use in slices. 

680 

681 >>> start, end = _parse_request_range("bytes=1-2") 

682 >>> start, end 

683 (1, 3) 

684 >>> [0, 1, 2, 3, 4][start:end] 

685 [1, 2] 

686 >>> _parse_request_range("bytes=6-") 

687 (6, None) 

688 >>> _parse_request_range("bytes=-6") 

689 (-6, None) 

690 >>> _parse_request_range("bytes=-0") 

691 (None, 0) 

692 >>> _parse_request_range("bytes=") 

693 (None, None) 

694 >>> _parse_request_range("foo=42") 

695 >>> _parse_request_range("bytes=1-2,6-10") 

696 

697 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed). 

698 

699 See [0] for the details of the range header. 

700 

701 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges 

702 """ 

703 unit, _, value = range_header.partition("=") 

704 unit, value = unit.strip(), value.strip() 

705 if unit != "bytes": 

706 return None 

707 start_b, _, end_b = value.partition("-") 

708 try: 

709 start = _int_or_none(start_b) 

710 end = _int_or_none(end_b) 

711 except ValueError: 

712 return None 

713 if end is not None: 

714 if start is None: 

715 if end != 0: 

716 start = -end 

717 end = None 

718 else: 

719 end += 1 

720 return (start, end) 

721 

722 

723def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str: 

724 """Returns a suitable Content-Range header: 

725 

726 >>> print(_get_content_range(None, 1, 4)) 

727 bytes 0-0/4 

728 >>> print(_get_content_range(1, 3, 4)) 

729 bytes 1-2/4 

730 >>> print(_get_content_range(None, None, 4)) 

731 bytes 0-3/4 

732 """ 

733 start = start or 0 

734 end = (end or total) - 1 

735 return "bytes %s-%s/%s" % (start, end, total) 

736 

737 

738def _int_or_none(val: str) -> Optional[int]: 

739 val = val.strip() 

740 if val == "": 

741 return None 

742 return int(val) 

743 

744 

745def parse_body_arguments( 

746 content_type: str, 

747 body: bytes, 

748 arguments: Dict[str, List[bytes]], 

749 files: Dict[str, List[HTTPFile]], 

750 headers: Optional[HTTPHeaders] = None, 

751) -> None: 

752 """Parses a form request body. 

753 

754 Supports ``application/x-www-form-urlencoded`` and 

755 ``multipart/form-data``. The ``content_type`` parameter should be 

756 a string and ``body`` should be a byte string. The ``arguments`` 

757 and ``files`` parameters are dictionaries that will be updated 

758 with the parsed contents. 

759 """ 

760 if content_type.startswith("application/x-www-form-urlencoded"): 

761 if headers and "Content-Encoding" in headers: 

762 gen_log.warning( 

763 "Unsupported Content-Encoding: %s", headers["Content-Encoding"] 

764 ) 

765 return 

766 try: 

767 # real charset decoding will happen in RequestHandler.decode_argument() 

768 uri_arguments = parse_qs_bytes(body, keep_blank_values=True) 

769 except Exception as e: 

770 gen_log.warning("Invalid x-www-form-urlencoded body: %s", e) 

771 uri_arguments = {} 

772 for name, values in uri_arguments.items(): 

773 if values: 

774 arguments.setdefault(name, []).extend(values) 

775 elif content_type.startswith("multipart/form-data"): 

776 if headers and "Content-Encoding" in headers: 

777 gen_log.warning( 

778 "Unsupported Content-Encoding: %s", headers["Content-Encoding"] 

779 ) 

780 return 

781 try: 

782 fields = content_type.split(";") 

783 for field in fields: 

784 k, sep, v = field.strip().partition("=") 

785 if k == "boundary" and v: 

786 parse_multipart_form_data(utf8(v), body, arguments, files) 

787 break 

788 else: 

789 raise ValueError("multipart boundary not found") 

790 except Exception as e: 

791 gen_log.warning("Invalid multipart/form-data: %s", e) 

792 

793 

794def parse_multipart_form_data( 

795 boundary: bytes, 

796 data: bytes, 

797 arguments: Dict[str, List[bytes]], 

798 files: Dict[str, List[HTTPFile]], 

799) -> None: 

800 """Parses a ``multipart/form-data`` body. 

801 

802 The ``boundary`` and ``data`` parameters are both byte strings. 

803 The dictionaries given in the arguments and files parameters 

804 will be updated with the contents of the body. 

805 

806 .. versionchanged:: 5.1 

807 

808 Now recognizes non-ASCII filenames in RFC 2231/5987 

809 (``filename*=``) format. 

810 """ 

811 # The standard allows for the boundary to be quoted in the header, 

812 # although it's rare (it happens at least for google app engine 

813 # xmpp). I think we're also supposed to handle backslash-escapes 

814 # here but I'll save that until we see a client that uses them 

815 # in the wild. 

816 if boundary.startswith(b'"') and boundary.endswith(b'"'): 

817 boundary = boundary[1:-1] 

818 final_boundary_index = data.rfind(b"--" + boundary + b"--") 

819 if final_boundary_index == -1: 

820 gen_log.warning("Invalid multipart/form-data: no final boundary") 

821 return 

822 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n") 

823 for part in parts: 

824 if not part: 

825 continue 

826 eoh = part.find(b"\r\n\r\n") 

827 if eoh == -1: 

828 gen_log.warning("multipart/form-data missing headers") 

829 continue 

830 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) 

831 disp_header = headers.get("Content-Disposition", "") 

832 disposition, disp_params = _parse_header(disp_header) 

833 if disposition != "form-data" or not part.endswith(b"\r\n"): 

834 gen_log.warning("Invalid multipart/form-data") 

835 continue 

836 value = part[eoh + 4 : -2] 

837 if not disp_params.get("name"): 

838 gen_log.warning("multipart/form-data value missing name") 

839 continue 

840 name = disp_params["name"] 

841 if disp_params.get("filename"): 

842 ctype = headers.get("Content-Type", "application/unknown") 

843 files.setdefault(name, []).append( 

844 HTTPFile( 

845 filename=disp_params["filename"], body=value, content_type=ctype 

846 ) 

847 ) 

848 else: 

849 arguments.setdefault(name, []).append(value) 

850 

851 

852def format_timestamp( 

853 ts: Union[int, float, tuple, time.struct_time, datetime.datetime] 

854) -> str: 

855 """Formats a timestamp in the format used by HTTP. 

856 

857 The argument may be a numeric timestamp as returned by `time.time`, 

858 a time tuple as returned by `time.gmtime`, or a `datetime.datetime` 

859 object. 

860 

861 >>> format_timestamp(1359312200) 

862 'Sun, 27 Jan 2013 18:43:20 GMT' 

863 """ 

864 if isinstance(ts, (int, float)): 

865 time_num = ts 

866 elif isinstance(ts, (tuple, time.struct_time)): 

867 time_num = calendar.timegm(ts) 

868 elif isinstance(ts, datetime.datetime): 

869 time_num = calendar.timegm(ts.utctimetuple()) 

870 else: 

871 raise TypeError("unknown timestamp type: %r" % ts) 

872 return email.utils.formatdate(time_num, usegmt=True) 

873 

874 

875RequestStartLine = collections.namedtuple( 

876 "RequestStartLine", ["method", "path", "version"] 

877) 

878 

879 

880_http_version_re = re.compile(r"^HTTP/1\.[0-9]$") 

881 

882 

883def parse_request_start_line(line: str) -> RequestStartLine: 

884 """Returns a (method, path, version) tuple for an HTTP 1.x request line. 

885 

886 The response is a `collections.namedtuple`. 

887 

888 >>> parse_request_start_line("GET /foo HTTP/1.1") 

889 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1') 

890 """ 

891 try: 

892 method, path, version = line.split(" ") 

893 except ValueError: 

894 # https://tools.ietf.org/html/rfc7230#section-3.1.1 

895 # invalid request-line SHOULD respond with a 400 (Bad Request) 

896 raise HTTPInputError("Malformed HTTP request line") 

897 if not _http_version_re.match(version): 

898 raise HTTPInputError( 

899 "Malformed HTTP version in HTTP Request-Line: %r" % version 

900 ) 

901 return RequestStartLine(method, path, version) 

902 

903 

904ResponseStartLine = collections.namedtuple( 

905 "ResponseStartLine", ["version", "code", "reason"] 

906) 

907 

908 

909_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)") 

910 

911 

912def parse_response_start_line(line: str) -> ResponseStartLine: 

913 """Returns a (version, code, reason) tuple for an HTTP 1.x response line. 

914 

915 The response is a `collections.namedtuple`. 

916 

917 >>> parse_response_start_line("HTTP/1.1 200 OK") 

918 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK') 

919 """ 

920 line = native_str(line) 

921 match = _http_response_line_re.match(line) 

922 if not match: 

923 raise HTTPInputError("Error parsing response start line") 

924 return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3)) 

925 

926 

927# _parseparam and _parse_header are copied and modified from python2.7's cgi.py 

928# The original 2.7 version of this code did not correctly support some 

929# combinations of semicolons and double quotes. 

930# It has also been modified to support valueless parameters as seen in 

931# websocket extension negotiations, and to support non-ascii values in 

932# RFC 2231/5987 format. 

933 

934 

935def _parseparam(s: str) -> Generator[str, None, None]: 

936 while s[:1] == ";": 

937 s = s[1:] 

938 end = s.find(";") 

939 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 

940 end = s.find(";", end + 1) 

941 if end < 0: 

942 end = len(s) 

943 f = s[:end] 

944 yield f.strip() 

945 s = s[end:] 

946 

947 

948def _parse_header(line: str) -> Tuple[str, Dict[str, str]]: 

949 r"""Parse a Content-type like header. 

950 

951 Return the main content-type and a dictionary of options. 

952 

953 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st" 

954 >>> ct, d = _parse_header(d) 

955 >>> ct 

956 'form-data' 

957 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape') 

958 True 

959 >>> d['foo'] 

960 'b\\a"r' 

961 """ 

962 parts = _parseparam(";" + line) 

963 key = next(parts) 

964 # decode_params treats first argument special, but we already stripped key 

965 params = [("Dummy", "value")] 

966 for p in parts: 

967 i = p.find("=") 

968 if i >= 0: 

969 name = p[:i].strip().lower() 

970 value = p[i + 1 :].strip() 

971 params.append((name, native_str(value))) 

972 decoded_params = email.utils.decode_params(params) 

973 decoded_params.pop(0) # get rid of the dummy again 

974 pdict = {} 

975 for name, decoded_value in decoded_params: 

976 value = email.utils.collapse_rfc2231_value(decoded_value) 

977 if len(value) >= 2 and value[0] == '"' and value[-1] == '"': 

978 value = value[1:-1] 

979 pdict[name] = value 

980 return key, pdict 

981 

982 

983def _encode_header(key: str, pdict: Dict[str, str]) -> str: 

984 """Inverse of _parse_header. 

985 

986 >>> _encode_header('permessage-deflate', 

987 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None}) 

988 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover' 

989 """ 

990 if not pdict: 

991 return key 

992 out = [key] 

993 # Sort the parameters just to make it easy to test. 

994 for k, v in sorted(pdict.items()): 

995 if v is None: 

996 out.append(k) 

997 else: 

998 # TODO: quote if necessary. 

999 out.append("%s=%s" % (k, v)) 

1000 return "; ".join(out) 

1001 

1002 

1003def encode_username_password( 

1004 username: Union[str, bytes], password: Union[str, bytes] 

1005) -> bytes: 

1006 """Encodes a username/password pair in the format used by HTTP auth. 

1007 

1008 The return value is a byte string in the form ``username:password``. 

1009 

1010 .. versionadded:: 5.1 

1011 """ 

1012 if isinstance(username, unicode_type): 

1013 username = unicodedata.normalize("NFC", username) 

1014 if isinstance(password, unicode_type): 

1015 password = unicodedata.normalize("NFC", password) 

1016 return utf8(username) + b":" + utf8(password) 

1017 

1018 

1019def doctests(): 

1020 # type: () -> unittest.TestSuite 

1021 import doctest 

1022 

1023 return doctest.DocTestSuite() 

1024 

1025 

1026_netloc_re = re.compile(r"^(.+):(\d+)$") 

1027 

1028 

1029def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]: 

1030 """Returns ``(host, port)`` tuple from ``netloc``. 

1031 

1032 Returned ``port`` will be ``None`` if not present. 

1033 

1034 .. versionadded:: 4.1 

1035 """ 

1036 match = _netloc_re.match(netloc) 

1037 if match: 

1038 host = match.group(1) 

1039 port = int(match.group(2)) # type: Optional[int] 

1040 else: 

1041 host = netloc 

1042 port = None 

1043 return (host, port) 

1044 

1045 

1046def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]: 

1047 """Generator converting a result of ``parse_qs`` back to name-value pairs. 

1048 

1049 .. versionadded:: 5.0 

1050 """ 

1051 for k, vs in qs.items(): 

1052 for v in vs: 

1053 yield (k, v) 

1054 

1055 

1056_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") 

1057_QuotePatt = re.compile(r"[\\].") 

1058_nulljoin = "".join 

1059 

1060 

1061def _unquote_cookie(s: str) -> str: 

1062 """Handle double quotes and escaping in cookie values. 

1063 

1064 This method is copied verbatim from the Python 3.5 standard 

1065 library (http.cookies._unquote) so we don't have to depend on 

1066 non-public interfaces. 

1067 """ 

1068 # If there aren't any doublequotes, 

1069 # then there can't be any special characters. See RFC 2109. 

1070 if s is None or len(s) < 2: 

1071 return s 

1072 if s[0] != '"' or s[-1] != '"': 

1073 return s 

1074 

1075 # We have to assume that we must decode this string. 

1076 # Down to work. 

1077 

1078 # Remove the "s 

1079 s = s[1:-1] 

1080 

1081 # Check for special sequences. Examples: 

1082 # \012 --> \n 

1083 # \" --> " 

1084 # 

1085 i = 0 

1086 n = len(s) 

1087 res = [] 

1088 while 0 <= i < n: 

1089 o_match = _OctalPatt.search(s, i) 

1090 q_match = _QuotePatt.search(s, i) 

1091 if not o_match and not q_match: # Neither matched 

1092 res.append(s[i:]) 

1093 break 

1094 # else: 

1095 j = k = -1 

1096 if o_match: 

1097 j = o_match.start(0) 

1098 if q_match: 

1099 k = q_match.start(0) 

1100 if q_match and (not o_match or k < j): # QuotePatt matched 

1101 res.append(s[i:k]) 

1102 res.append(s[k + 1]) 

1103 i = k + 2 

1104 else: # OctalPatt matched 

1105 res.append(s[i:j]) 

1106 res.append(chr(int(s[j + 1 : j + 4], 8))) 

1107 i = j + 4 

1108 return _nulljoin(res) 

1109 

1110 

1111def parse_cookie(cookie: str) -> Dict[str, str]: 

1112 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs. 

1113 

1114 This function attempts to mimic browser cookie parsing behavior; 

1115 it specifically does not follow any of the cookie-related RFCs 

1116 (because browsers don't either). 

1117 

1118 The algorithm used is identical to that used by Django version 1.9.10. 

1119 

1120 .. versionadded:: 4.4.2 

1121 """ 

1122 cookiedict = {} 

1123 for chunk in cookie.split(str(";")): 

1124 if str("=") in chunk: 

1125 key, val = chunk.split(str("="), 1) 

1126 else: 

1127 # Assume an empty name per 

1128 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 

1129 key, val = str(""), chunk 

1130 key, val = key.strip(), val.strip() 

1131 if key or val: 

1132 # unquote using Python's algorithm. 

1133 cookiedict[key] = _unquote_cookie(val) 

1134 return cookiedict