Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tornado/httputil.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

427 statements  

1# 

2# Copyright 2009 Facebook 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); you may 

5# not use this file except in compliance with the License. You may obtain 

6# a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

13# License for the specific language governing permissions and limitations 

14# under the License. 

15 

16"""HTTP utility code shared by clients and servers. 

17 

18This module also defines the `HTTPServerRequest` class which is exposed 

19via `tornado.web.RequestHandler.request`. 

20""" 

21 

22import calendar 

23import collections.abc 

24import copy 

25import datetime 

26import email.utils 

27from functools import lru_cache 

28from http.client import responses 

29import http.cookies 

30import re 

31from ssl import SSLError 

32import time 

33import unicodedata 

34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl 

35 

36from tornado.escape import native_str, parse_qs_bytes, utf8 

37from tornado.log import gen_log 

38from tornado.util import ObjectDict, unicode_type 

39 

40 

41# responses is unused in this file, but we re-export it to other files. 

42# Reference it so pyflakes doesn't complain. 

43responses 

44 

45import typing 

46from typing import ( 

47 Tuple, 

48 Iterable, 

49 List, 

50 Mapping, 

51 Iterator, 

52 Dict, 

53 Union, 

54 Optional, 

55 Awaitable, 

56 Generator, 

57 AnyStr, 

58) 

59 

60if typing.TYPE_CHECKING: 

61 from typing import Deque # noqa: F401 

62 from asyncio import Future # noqa: F401 

63 import unittest # noqa: F401 

64 

65# To be used with str.strip() and related methods. 

66HTTP_WHITESPACE = " \t" 

67 

68 

69@lru_cache(1000) 

70def _normalize_header(name: str) -> str: 

71 """Map a header name to Http-Header-Case. 

72 

73 >>> _normalize_header("coNtent-TYPE") 

74 'Content-Type' 

75 """ 

76 return "-".join([w.capitalize() for w in name.split("-")]) 

77 

78 

79class HTTPHeaders(collections.abc.MutableMapping): 

80 """A dictionary that maintains ``Http-Header-Case`` for all keys. 

81 

82 Supports multiple values per key via a pair of new methods, 

83 `add()` and `get_list()`. The regular dictionary interface 

84 returns a single value per key, with multiple values joined by a 

85 comma. 

86 

87 >>> h = HTTPHeaders({"content-type": "text/html"}) 

88 >>> list(h.keys()) 

89 ['Content-Type'] 

90 >>> h["Content-Type"] 

91 'text/html' 

92 

93 >>> h.add("Set-Cookie", "A=B") 

94 >>> h.add("Set-Cookie", "C=D") 

95 >>> h["set-cookie"] 

96 'A=B,C=D' 

97 >>> h.get_list("set-cookie") 

98 ['A=B', 'C=D'] 

99 

100 >>> for (k,v) in sorted(h.get_all()): 

101 ... print('%s: %s' % (k,v)) 

102 ... 

103 Content-Type: text/html 

104 Set-Cookie: A=B 

105 Set-Cookie: C=D 

106 """ 

107 

108 @typing.overload 

109 def __init__(self, __arg: Mapping[str, List[str]]) -> None: 

110 pass 

111 

112 @typing.overload # noqa: F811 

113 def __init__(self, __arg: Mapping[str, str]) -> None: 

114 pass 

115 

116 @typing.overload # noqa: F811 

117 def __init__(self, *args: Tuple[str, str]) -> None: 

118 pass 

119 

120 @typing.overload # noqa: F811 

121 def __init__(self, **kwargs: str) -> None: 

122 pass 

123 

124 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811 

125 self._dict = {} # type: typing.Dict[str, str] 

126 self._as_list = {} # type: typing.Dict[str, typing.List[str]] 

127 self._last_key = None # type: Optional[str] 

128 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders): 

129 # Copy constructor 

130 for k, v in args[0].get_all(): 

131 self.add(k, v) 

132 else: 

133 # Dict-style initialization 

134 self.update(*args, **kwargs) 

135 

136 # new public methods 

137 

138 def add(self, name: str, value: str) -> None: 

139 """Adds a new value for the given key.""" 

140 norm_name = _normalize_header(name) 

141 self._last_key = norm_name 

142 if norm_name in self: 

143 self._dict[norm_name] = ( 

144 native_str(self[norm_name]) + "," + native_str(value) 

145 ) 

146 self._as_list[norm_name].append(value) 

147 else: 

148 self[norm_name] = value 

149 

150 def get_list(self, name: str) -> List[str]: 

151 """Returns all values for the given header as a list.""" 

152 norm_name = _normalize_header(name) 

153 return self._as_list.get(norm_name, []) 

154 

155 def get_all(self) -> Iterable[Tuple[str, str]]: 

156 """Returns an iterable of all (name, value) pairs. 

157 

158 If a header has multiple values, multiple pairs will be 

159 returned with the same name. 

160 """ 

161 for name, values in self._as_list.items(): 

162 for value in values: 

163 yield (name, value) 

164 

165 def parse_line(self, line: str) -> None: 

166 """Updates the dictionary with a single header line. 

167 

168 >>> h = HTTPHeaders() 

169 >>> h.parse_line("Content-Type: text/html") 

170 >>> h.get('content-type') 

171 'text/html' 

172 """ 

173 if line[0].isspace(): 

174 # continuation of a multi-line header 

175 if self._last_key is None: 

176 raise HTTPInputError("first header line cannot start with whitespace") 

177 new_part = " " + line.lstrip(HTTP_WHITESPACE) 

178 self._as_list[self._last_key][-1] += new_part 

179 self._dict[self._last_key] += new_part 

180 else: 

181 try: 

182 name, value = line.split(":", 1) 

183 except ValueError: 

184 raise HTTPInputError("no colon in header line") 

185 self.add(name, value.strip(HTTP_WHITESPACE)) 

186 

187 @classmethod 

188 def parse(cls, headers: str) -> "HTTPHeaders": 

189 """Returns a dictionary from HTTP header text. 

190 

191 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n") 

192 >>> sorted(h.items()) 

193 [('Content-Length', '42'), ('Content-Type', 'text/html')] 

194 

195 .. versionchanged:: 5.1 

196 

197 Raises `HTTPInputError` on malformed headers instead of a 

198 mix of `KeyError`, and `ValueError`. 

199 

200 """ 

201 h = cls() 

202 # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line 

203 # terminator and ignore any preceding CR. 

204 for line in headers.split("\n"): 

205 if line.endswith("\r"): 

206 line = line[:-1] 

207 if line: 

208 h.parse_line(line) 

209 return h 

210 

211 # MutableMapping abstract method implementations. 

212 

213 def __setitem__(self, name: str, value: str) -> None: 

214 norm_name = _normalize_header(name) 

215 self._dict[norm_name] = value 

216 self._as_list[norm_name] = [value] 

217 

218 def __getitem__(self, name: str) -> str: 

219 return self._dict[_normalize_header(name)] 

220 

221 def __delitem__(self, name: str) -> None: 

222 norm_name = _normalize_header(name) 

223 del self._dict[norm_name] 

224 del self._as_list[norm_name] 

225 

226 def __len__(self) -> int: 

227 return len(self._dict) 

228 

229 def __iter__(self) -> Iterator[typing.Any]: 

230 return iter(self._dict) 

231 

232 def copy(self) -> "HTTPHeaders": 

233 # defined in dict but not in MutableMapping. 

234 return HTTPHeaders(self) 

235 

236 # Use our overridden copy method for the copy.copy module. 

237 # This makes shallow copies one level deeper, but preserves 

238 # the appearance that HTTPHeaders is a single container. 

239 __copy__ = copy 

240 

241 def __str__(self) -> str: 

242 lines = [] 

243 for name, value in self.get_all(): 

244 lines.append("%s: %s\n" % (name, value)) 

245 return "".join(lines) 

246 

247 __unicode__ = __str__ 

248 

249 

250class HTTPServerRequest(object): 

251 """A single HTTP request. 

252 

253 All attributes are type `str` unless otherwise noted. 

254 

255 .. attribute:: method 

256 

257 HTTP request method, e.g. "GET" or "POST" 

258 

259 .. attribute:: uri 

260 

261 The requested uri. 

262 

263 .. attribute:: path 

264 

265 The path portion of `uri` 

266 

267 .. attribute:: query 

268 

269 The query portion of `uri` 

270 

271 .. attribute:: version 

272 

273 HTTP version specified in request, e.g. "HTTP/1.1" 

274 

275 .. attribute:: headers 

276 

277 `.HTTPHeaders` dictionary-like object for request headers. Acts like 

278 a case-insensitive dictionary with additional methods for repeated 

279 headers. 

280 

281 .. attribute:: body 

282 

283 Request body, if present, as a byte string. 

284 

285 .. attribute:: remote_ip 

286 

287 Client's IP address as a string. If ``HTTPServer.xheaders`` is set, 

288 will pass along the real IP address provided by a load balancer 

289 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header. 

290 

291 .. versionchanged:: 3.1 

292 The list format of ``X-Forwarded-For`` is now supported. 

293 

294 .. attribute:: protocol 

295 

296 The protocol used, either "http" or "https". If ``HTTPServer.xheaders`` 

297 is set, will pass along the protocol used by a load balancer if 

298 reported via an ``X-Scheme`` header. 

299 

300 .. attribute:: host 

301 

302 The requested hostname, usually taken from the ``Host`` header. 

303 

304 .. attribute:: arguments 

305 

306 GET/POST arguments are available in the arguments property, which 

307 maps arguments names to lists of values (to support multiple values 

308 for individual names). Names are of type `str`, while arguments 

309 are byte strings. Note that this is different from 

310 `.RequestHandler.get_argument`, which returns argument values as 

311 unicode strings. 

312 

313 .. attribute:: query_arguments 

314 

315 Same format as ``arguments``, but contains only arguments extracted 

316 from the query string. 

317 

318 .. versionadded:: 3.2 

319 

320 .. attribute:: body_arguments 

321 

322 Same format as ``arguments``, but contains only arguments extracted 

323 from the request body. 

324 

325 .. versionadded:: 3.2 

326 

327 .. attribute:: files 

328 

329 File uploads are available in the files property, which maps file 

330 names to lists of `.HTTPFile`. 

331 

332 .. attribute:: connection 

333 

334 An HTTP request is attached to a single HTTP connection, which can 

335 be accessed through the "connection" attribute. Since connections 

336 are typically kept open in HTTP/1.1, multiple requests can be handled 

337 sequentially on a single connection. 

338 

339 .. versionchanged:: 4.0 

340 Moved from ``tornado.httpserver.HTTPRequest``. 

341 """ 

342 

343 path = None # type: str 

344 query = None # type: str 

345 

346 # HACK: Used for stream_request_body 

347 _body_future = None # type: Future[None] 

348 

349 def __init__( 

350 self, 

351 method: Optional[str] = None, 

352 uri: Optional[str] = None, 

353 version: str = "HTTP/1.0", 

354 headers: Optional[HTTPHeaders] = None, 

355 body: Optional[bytes] = None, 

356 host: Optional[str] = None, 

357 files: Optional[Dict[str, List["HTTPFile"]]] = None, 

358 connection: Optional["HTTPConnection"] = None, 

359 start_line: Optional["RequestStartLine"] = None, 

360 server_connection: Optional[object] = None, 

361 ) -> None: 

362 if start_line is not None: 

363 method, uri, version = start_line 

364 self.method = method 

365 self.uri = uri 

366 self.version = version 

367 self.headers = headers or HTTPHeaders() 

368 self.body = body or b"" 

369 

370 # set remote IP and protocol 

371 context = getattr(connection, "context", None) 

372 self.remote_ip = getattr(context, "remote_ip", None) 

373 self.protocol = getattr(context, "protocol", "http") 

374 

375 self.host = host or self.headers.get("Host") or "127.0.0.1" 

376 self.host_name = split_host_and_port(self.host.lower())[0] 

377 self.files = files or {} 

378 self.connection = connection 

379 self.server_connection = server_connection 

380 self._start_time = time.time() 

381 self._finish_time = None 

382 

383 if uri is not None: 

384 self.path, sep, self.query = uri.partition("?") 

385 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True) 

386 self.query_arguments = copy.deepcopy(self.arguments) 

387 self.body_arguments = {} # type: Dict[str, List[bytes]] 

388 

389 @property 

390 def cookies(self) -> Dict[str, http.cookies.Morsel]: 

391 """A dictionary of ``http.cookies.Morsel`` objects.""" 

392 if not hasattr(self, "_cookies"): 

393 self._cookies = ( 

394 http.cookies.SimpleCookie() 

395 ) # type: http.cookies.SimpleCookie 

396 if "Cookie" in self.headers: 

397 try: 

398 parsed = parse_cookie(self.headers["Cookie"]) 

399 except Exception: 

400 pass 

401 else: 

402 for k, v in parsed.items(): 

403 try: 

404 self._cookies[k] = v 

405 except Exception: 

406 # SimpleCookie imposes some restrictions on keys; 

407 # parse_cookie does not. Discard any cookies 

408 # with disallowed keys. 

409 pass 

410 return self._cookies 

411 

412 def full_url(self) -> str: 

413 """Reconstructs the full URL for this request.""" 

414 return self.protocol + "://" + self.host + self.uri # type: ignore[operator] 

415 

416 def request_time(self) -> float: 

417 """Returns the amount of time it took for this request to execute.""" 

418 if self._finish_time is None: 

419 return time.time() - self._start_time 

420 else: 

421 return self._finish_time - self._start_time 

422 

423 def get_ssl_certificate( 

424 self, binary_form: bool = False 

425 ) -> Union[None, Dict, bytes]: 

426 """Returns the client's SSL certificate, if any. 

427 

428 To use client certificates, the HTTPServer's 

429 `ssl.SSLContext.verify_mode` field must be set, e.g.:: 

430 

431 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) 

432 ssl_ctx.load_cert_chain("foo.crt", "foo.key") 

433 ssl_ctx.load_verify_locations("cacerts.pem") 

434 ssl_ctx.verify_mode = ssl.CERT_REQUIRED 

435 server = HTTPServer(app, ssl_options=ssl_ctx) 

436 

437 By default, the return value is a dictionary (or None, if no 

438 client certificate is present). If ``binary_form`` is true, a 

439 DER-encoded form of the certificate is returned instead. See 

440 SSLSocket.getpeercert() in the standard library for more 

441 details. 

442 http://docs.python.org/library/ssl.html#sslsocket-objects 

443 """ 

444 try: 

445 if self.connection is None: 

446 return None 

447 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2 

448 return self.connection.stream.socket.getpeercert( # type: ignore 

449 binary_form=binary_form 

450 ) 

451 except SSLError: 

452 return None 

453 

454 def _parse_body(self) -> None: 

455 parse_body_arguments( 

456 self.headers.get("Content-Type", ""), 

457 self.body, 

458 self.body_arguments, 

459 self.files, 

460 self.headers, 

461 ) 

462 

463 for k, v in self.body_arguments.items(): 

464 self.arguments.setdefault(k, []).extend(v) 

465 

466 def __repr__(self) -> str: 

467 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip") 

468 args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs]) 

469 return "%s(%s)" % (self.__class__.__name__, args) 

470 

471 

472class HTTPInputError(Exception): 

473 """Exception class for malformed HTTP requests or responses 

474 from remote sources. 

475 

476 .. versionadded:: 4.0 

477 """ 

478 

479 pass 

480 

481 

482class HTTPOutputError(Exception): 

483 """Exception class for errors in HTTP output. 

484 

485 .. versionadded:: 4.0 

486 """ 

487 

488 pass 

489 

490 

491class HTTPServerConnectionDelegate(object): 

492 """Implement this interface to handle requests from `.HTTPServer`. 

493 

494 .. versionadded:: 4.0 

495 """ 

496 

497 def start_request( 

498 self, server_conn: object, request_conn: "HTTPConnection" 

499 ) -> "HTTPMessageDelegate": 

500 """This method is called by the server when a new request has started. 

501 

502 :arg server_conn: is an opaque object representing the long-lived 

503 (e.g. tcp-level) connection. 

504 :arg request_conn: is a `.HTTPConnection` object for a single 

505 request/response exchange. 

506 

507 This method should return a `.HTTPMessageDelegate`. 

508 """ 

509 raise NotImplementedError() 

510 

511 def on_close(self, server_conn: object) -> None: 

512 """This method is called when a connection has been closed. 

513 

514 :arg server_conn: is a server connection that has previously been 

515 passed to ``start_request``. 

516 """ 

517 pass 

518 

519 

520class HTTPMessageDelegate(object): 

521 """Implement this interface to handle an HTTP request or response. 

522 

523 .. versionadded:: 4.0 

524 """ 

525 

526 # TODO: genericize this class to avoid exposing the Union. 

527 def headers_received( 

528 self, 

529 start_line: Union["RequestStartLine", "ResponseStartLine"], 

530 headers: HTTPHeaders, 

531 ) -> Optional[Awaitable[None]]: 

532 """Called when the HTTP headers have been received and parsed. 

533 

534 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine` 

535 depending on whether this is a client or server message. 

536 :arg headers: a `.HTTPHeaders` instance. 

537 

538 Some `.HTTPConnection` methods can only be called during 

539 ``headers_received``. 

540 

541 May return a `.Future`; if it does the body will not be read 

542 until it is done. 

543 """ 

544 pass 

545 

546 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]: 

547 """Called when a chunk of data has been received. 

548 

549 May return a `.Future` for flow control. 

550 """ 

551 pass 

552 

553 def finish(self) -> None: 

554 """Called after the last chunk of data has been received.""" 

555 pass 

556 

557 def on_connection_close(self) -> None: 

558 """Called if the connection is closed without finishing the request. 

559 

560 If ``headers_received`` is called, either ``finish`` or 

561 ``on_connection_close`` will be called, but not both. 

562 """ 

563 pass 

564 

565 

566class HTTPConnection(object): 

567 """Applications use this interface to write their responses. 

568 

569 .. versionadded:: 4.0 

570 """ 

571 

572 def write_headers( 

573 self, 

574 start_line: Union["RequestStartLine", "ResponseStartLine"], 

575 headers: HTTPHeaders, 

576 chunk: Optional[bytes] = None, 

577 ) -> "Future[None]": 

578 """Write an HTTP header block. 

579 

580 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`. 

581 :arg headers: a `.HTTPHeaders` instance. 

582 :arg chunk: the first (optional) chunk of data. This is an optimization 

583 so that small responses can be written in the same call as their 

584 headers. 

585 

586 The ``version`` field of ``start_line`` is ignored. 

587 

588 Returns a future for flow control. 

589 

590 .. versionchanged:: 6.0 

591 

592 The ``callback`` argument was removed. 

593 """ 

594 raise NotImplementedError() 

595 

596 def write(self, chunk: bytes) -> "Future[None]": 

597 """Writes a chunk of body data. 

598 

599 Returns a future for flow control. 

600 

601 .. versionchanged:: 6.0 

602 

603 The ``callback`` argument was removed. 

604 """ 

605 raise NotImplementedError() 

606 

607 def finish(self) -> None: 

608 """Indicates that the last body data has been written.""" 

609 raise NotImplementedError() 

610 

611 

612def url_concat( 

613 url: str, 

614 args: Union[ 

615 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...] 

616 ], 

617) -> str: 

618 """Concatenate url and arguments regardless of whether 

619 url has existing query parameters. 

620 

621 ``args`` may be either a dictionary or a list of key-value pairs 

622 (the latter allows for multiple values with the same key. 

623 

624 >>> url_concat("http://example.com/foo", dict(c="d")) 

625 'http://example.com/foo?c=d' 

626 >>> url_concat("http://example.com/foo?a=b", dict(c="d")) 

627 'http://example.com/foo?a=b&c=d' 

628 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")]) 

629 'http://example.com/foo?a=b&c=d&c=d2' 

630 """ 

631 if args is None: 

632 return url 

633 parsed_url = urlparse(url) 

634 if isinstance(args, dict): 

635 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) 

636 parsed_query.extend(args.items()) 

637 elif isinstance(args, list) or isinstance(args, tuple): 

638 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True) 

639 parsed_query.extend(args) 

640 else: 

641 err = "'args' parameter should be dict, list or tuple. Not {0}".format( 

642 type(args) 

643 ) 

644 raise TypeError(err) 

645 final_query = urlencode(parsed_query) 

646 url = urlunparse( 

647 ( 

648 parsed_url[0], 

649 parsed_url[1], 

650 parsed_url[2], 

651 parsed_url[3], 

652 final_query, 

653 parsed_url[5], 

654 ) 

655 ) 

656 return url 

657 

658 

659class HTTPFile(ObjectDict): 

660 """Represents a file uploaded via a form. 

661 

662 For backwards compatibility, its instance attributes are also 

663 accessible as dictionary keys. 

664 

665 * ``filename`` 

666 * ``body`` 

667 * ``content_type`` 

668 """ 

669 

670 filename: str 

671 body: bytes 

672 content_type: str 

673 

674 

675def _parse_request_range( 

676 range_header: str, 

677) -> Optional[Tuple[Optional[int], Optional[int]]]: 

678 """Parses a Range header. 

679 

680 Returns either ``None`` or tuple ``(start, end)``. 

681 Note that while the HTTP headers use inclusive byte positions, 

682 this method returns indexes suitable for use in slices. 

683 

684 >>> start, end = _parse_request_range("bytes=1-2") 

685 >>> start, end 

686 (1, 3) 

687 >>> [0, 1, 2, 3, 4][start:end] 

688 [1, 2] 

689 >>> _parse_request_range("bytes=6-") 

690 (6, None) 

691 >>> _parse_request_range("bytes=-6") 

692 (-6, None) 

693 >>> _parse_request_range("bytes=-0") 

694 (None, 0) 

695 >>> _parse_request_range("bytes=") 

696 (None, None) 

697 >>> _parse_request_range("foo=42") 

698 >>> _parse_request_range("bytes=1-2,6-10") 

699 

700 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed). 

701 

702 See [0] for the details of the range header. 

703 

704 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges 

705 """ 

706 unit, _, value = range_header.partition("=") 

707 unit, value = unit.strip(), value.strip() 

708 if unit != "bytes": 

709 return None 

710 start_b, _, end_b = value.partition("-") 

711 try: 

712 start = _int_or_none(start_b) 

713 end = _int_or_none(end_b) 

714 except ValueError: 

715 return None 

716 if end is not None: 

717 if start is None: 

718 if end != 0: 

719 start = -end 

720 end = None 

721 else: 

722 end += 1 

723 return (start, end) 

724 

725 

726def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str: 

727 """Returns a suitable Content-Range header: 

728 

729 >>> print(_get_content_range(None, 1, 4)) 

730 bytes 0-0/4 

731 >>> print(_get_content_range(1, 3, 4)) 

732 bytes 1-2/4 

733 >>> print(_get_content_range(None, None, 4)) 

734 bytes 0-3/4 

735 """ 

736 start = start or 0 

737 end = (end or total) - 1 

738 return "bytes %s-%s/%s" % (start, end, total) 

739 

740 

741def _int_or_none(val: str) -> Optional[int]: 

742 val = val.strip() 

743 if val == "": 

744 return None 

745 return int(val) 

746 

747 

748def parse_body_arguments( 

749 content_type: str, 

750 body: bytes, 

751 arguments: Dict[str, List[bytes]], 

752 files: Dict[str, List[HTTPFile]], 

753 headers: Optional[HTTPHeaders] = None, 

754) -> None: 

755 """Parses a form request body. 

756 

757 Supports ``application/x-www-form-urlencoded`` and 

758 ``multipart/form-data``. The ``content_type`` parameter should be 

759 a string and ``body`` should be a byte string. The ``arguments`` 

760 and ``files`` parameters are dictionaries that will be updated 

761 with the parsed contents. 

762 """ 

763 if content_type.startswith("application/x-www-form-urlencoded"): 

764 if headers and "Content-Encoding" in headers: 

765 gen_log.warning( 

766 "Unsupported Content-Encoding: %s", headers["Content-Encoding"] 

767 ) 

768 return 

769 try: 

770 # real charset decoding will happen in RequestHandler.decode_argument() 

771 uri_arguments = parse_qs_bytes(body, keep_blank_values=True) 

772 except Exception as e: 

773 gen_log.warning("Invalid x-www-form-urlencoded body: %s", e) 

774 uri_arguments = {} 

775 for name, values in uri_arguments.items(): 

776 if values: 

777 arguments.setdefault(name, []).extend(values) 

778 elif content_type.startswith("multipart/form-data"): 

779 if headers and "Content-Encoding" in headers: 

780 gen_log.warning( 

781 "Unsupported Content-Encoding: %s", headers["Content-Encoding"] 

782 ) 

783 return 

784 try: 

785 fields = content_type.split(";") 

786 for field in fields: 

787 k, sep, v = field.strip().partition("=") 

788 if k == "boundary" and v: 

789 parse_multipart_form_data(utf8(v), body, arguments, files) 

790 break 

791 else: 

792 raise ValueError("multipart boundary not found") 

793 except Exception as e: 

794 gen_log.warning("Invalid multipart/form-data: %s", e) 

795 

796 

797def parse_multipart_form_data( 

798 boundary: bytes, 

799 data: bytes, 

800 arguments: Dict[str, List[bytes]], 

801 files: Dict[str, List[HTTPFile]], 

802) -> None: 

803 """Parses a ``multipart/form-data`` body. 

804 

805 The ``boundary`` and ``data`` parameters are both byte strings. 

806 The dictionaries given in the arguments and files parameters 

807 will be updated with the contents of the body. 

808 

809 .. versionchanged:: 5.1 

810 

811 Now recognizes non-ASCII filenames in RFC 2231/5987 

812 (``filename*=``) format. 

813 """ 

814 # The standard allows for the boundary to be quoted in the header, 

815 # although it's rare (it happens at least for google app engine 

816 # xmpp). I think we're also supposed to handle backslash-escapes 

817 # here but I'll save that until we see a client that uses them 

818 # in the wild. 

819 if boundary.startswith(b'"') and boundary.endswith(b'"'): 

820 boundary = boundary[1:-1] 

821 final_boundary_index = data.rfind(b"--" + boundary + b"--") 

822 if final_boundary_index == -1: 

823 gen_log.warning("Invalid multipart/form-data: no final boundary") 

824 return 

825 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n") 

826 for part in parts: 

827 if not part: 

828 continue 

829 eoh = part.find(b"\r\n\r\n") 

830 if eoh == -1: 

831 gen_log.warning("multipart/form-data missing headers") 

832 continue 

833 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8")) 

834 disp_header = headers.get("Content-Disposition", "") 

835 disposition, disp_params = _parse_header(disp_header) 

836 if disposition != "form-data" or not part.endswith(b"\r\n"): 

837 gen_log.warning("Invalid multipart/form-data") 

838 continue 

839 value = part[eoh + 4 : -2] 

840 if not disp_params.get("name"): 

841 gen_log.warning("multipart/form-data value missing name") 

842 continue 

843 name = disp_params["name"] 

844 if disp_params.get("filename"): 

845 ctype = headers.get("Content-Type", "application/unknown") 

846 files.setdefault(name, []).append( 

847 HTTPFile( 

848 filename=disp_params["filename"], body=value, content_type=ctype 

849 ) 

850 ) 

851 else: 

852 arguments.setdefault(name, []).append(value) 

853 

854 

855def format_timestamp( 

856 ts: Union[int, float, tuple, time.struct_time, datetime.datetime] 

857) -> str: 

858 """Formats a timestamp in the format used by HTTP. 

859 

860 The argument may be a numeric timestamp as returned by `time.time`, 

861 a time tuple as returned by `time.gmtime`, or a `datetime.datetime` 

862 object. Naive `datetime.datetime` objects are assumed to represent 

863 UTC; aware objects are converted to UTC before formatting. 

864 

865 >>> format_timestamp(1359312200) 

866 'Sun, 27 Jan 2013 18:43:20 GMT' 

867 """ 

868 if isinstance(ts, (int, float)): 

869 time_num = ts 

870 elif isinstance(ts, (tuple, time.struct_time)): 

871 time_num = calendar.timegm(ts) 

872 elif isinstance(ts, datetime.datetime): 

873 time_num = calendar.timegm(ts.utctimetuple()) 

874 else: 

875 raise TypeError("unknown timestamp type: %r" % ts) 

876 return email.utils.formatdate(time_num, usegmt=True) 

877 

878 

879RequestStartLine = collections.namedtuple( 

880 "RequestStartLine", ["method", "path", "version"] 

881) 

882 

883 

884_http_version_re = re.compile(r"^HTTP/1\.[0-9]$") 

885 

886 

887def parse_request_start_line(line: str) -> RequestStartLine: 

888 """Returns a (method, path, version) tuple for an HTTP 1.x request line. 

889 

890 The response is a `collections.namedtuple`. 

891 

892 >>> parse_request_start_line("GET /foo HTTP/1.1") 

893 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1') 

894 """ 

895 try: 

896 method, path, version = line.split(" ") 

897 except ValueError: 

898 # https://tools.ietf.org/html/rfc7230#section-3.1.1 

899 # invalid request-line SHOULD respond with a 400 (Bad Request) 

900 raise HTTPInputError("Malformed HTTP request line") 

901 if not _http_version_re.match(version): 

902 raise HTTPInputError( 

903 "Malformed HTTP version in HTTP Request-Line: %r" % version 

904 ) 

905 return RequestStartLine(method, path, version) 

906 

907 

908ResponseStartLine = collections.namedtuple( 

909 "ResponseStartLine", ["version", "code", "reason"] 

910) 

911 

912 

913_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)") 

914 

915 

916def parse_response_start_line(line: str) -> ResponseStartLine: 

917 """Returns a (version, code, reason) tuple for an HTTP 1.x response line. 

918 

919 The response is a `collections.namedtuple`. 

920 

921 >>> parse_response_start_line("HTTP/1.1 200 OK") 

922 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK') 

923 """ 

924 line = native_str(line) 

925 match = _http_response_line_re.match(line) 

926 if not match: 

927 raise HTTPInputError("Error parsing response start line") 

928 return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3)) 

929 

930 

931# _parseparam and _parse_header are copied and modified from python2.7's cgi.py 

932# The original 2.7 version of this code did not correctly support some 

933# combinations of semicolons and double quotes. 

934# It has also been modified to support valueless parameters as seen in 

935# websocket extension negotiations, and to support non-ascii values in 

936# RFC 2231/5987 format. 

937 

938 

939def _parseparam(s: str) -> Generator[str, None, None]: 

940 while s[:1] == ";": 

941 s = s[1:] 

942 end = s.find(";") 

943 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 

944 end = s.find(";", end + 1) 

945 if end < 0: 

946 end = len(s) 

947 f = s[:end] 

948 yield f.strip() 

949 s = s[end:] 

950 

951 

952def _parse_header(line: str) -> Tuple[str, Dict[str, str]]: 

953 r"""Parse a Content-type like header. 

954 

955 Return the main content-type and a dictionary of options. 

956 

957 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st" 

958 >>> ct, d = _parse_header(d) 

959 >>> ct 

960 'form-data' 

961 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape') 

962 True 

963 >>> d['foo'] 

964 'b\\a"r' 

965 """ 

966 parts = _parseparam(";" + line) 

967 key = next(parts) 

968 # decode_params treats first argument special, but we already stripped key 

969 params = [("Dummy", "value")] 

970 for p in parts: 

971 i = p.find("=") 

972 if i >= 0: 

973 name = p[:i].strip().lower() 

974 value = p[i + 1 :].strip() 

975 params.append((name, native_str(value))) 

976 decoded_params = email.utils.decode_params(params) 

977 decoded_params.pop(0) # get rid of the dummy again 

978 pdict = {} 

979 for name, decoded_value in decoded_params: 

980 value = email.utils.collapse_rfc2231_value(decoded_value) 

981 if len(value) >= 2 and value[0] == '"' and value[-1] == '"': 

982 value = value[1:-1] 

983 pdict[name] = value 

984 return key, pdict 

985 

986 

987def _encode_header(key: str, pdict: Dict[str, str]) -> str: 

988 """Inverse of _parse_header. 

989 

990 >>> _encode_header('permessage-deflate', 

991 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None}) 

992 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover' 

993 """ 

994 if not pdict: 

995 return key 

996 out = [key] 

997 # Sort the parameters just to make it easy to test. 

998 for k, v in sorted(pdict.items()): 

999 if v is None: 

1000 out.append(k) 

1001 else: 

1002 # TODO: quote if necessary. 

1003 out.append("%s=%s" % (k, v)) 

1004 return "; ".join(out) 

1005 

1006 

1007def encode_username_password( 

1008 username: Union[str, bytes], password: Union[str, bytes] 

1009) -> bytes: 

1010 """Encodes a username/password pair in the format used by HTTP auth. 

1011 

1012 The return value is a byte string in the form ``username:password``. 

1013 

1014 .. versionadded:: 5.1 

1015 """ 

1016 if isinstance(username, unicode_type): 

1017 username = unicodedata.normalize("NFC", username) 

1018 if isinstance(password, unicode_type): 

1019 password = unicodedata.normalize("NFC", password) 

1020 return utf8(username) + b":" + utf8(password) 

1021 

1022 

1023def doctests(): 

1024 # type: () -> unittest.TestSuite 

1025 import doctest 

1026 

1027 return doctest.DocTestSuite() 

1028 

1029 

1030_netloc_re = re.compile(r"^(.+):(\d+)$") 

1031 

1032 

1033def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]: 

1034 """Returns ``(host, port)`` tuple from ``netloc``. 

1035 

1036 Returned ``port`` will be ``None`` if not present. 

1037 

1038 .. versionadded:: 4.1 

1039 """ 

1040 match = _netloc_re.match(netloc) 

1041 if match: 

1042 host = match.group(1) 

1043 port = int(match.group(2)) # type: Optional[int] 

1044 else: 

1045 host = netloc 

1046 port = None 

1047 return (host, port) 

1048 

1049 

1050def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]: 

1051 """Generator converting a result of ``parse_qs`` back to name-value pairs. 

1052 

1053 .. versionadded:: 5.0 

1054 """ 

1055 for k, vs in qs.items(): 

1056 for v in vs: 

1057 yield (k, v) 

1058 

1059 

1060_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") 

1061_QuotePatt = re.compile(r"[\\].") 

1062_nulljoin = "".join 

1063 

1064 

1065def _unquote_cookie(s: str) -> str: 

1066 """Handle double quotes and escaping in cookie values. 

1067 

1068 This method is copied verbatim from the Python 3.5 standard 

1069 library (http.cookies._unquote) so we don't have to depend on 

1070 non-public interfaces. 

1071 """ 

1072 # If there aren't any doublequotes, 

1073 # then there can't be any special characters. See RFC 2109. 

1074 if s is None or len(s) < 2: 

1075 return s 

1076 if s[0] != '"' or s[-1] != '"': 

1077 return s 

1078 

1079 # We have to assume that we must decode this string. 

1080 # Down to work. 

1081 

1082 # Remove the "s 

1083 s = s[1:-1] 

1084 

1085 # Check for special sequences. Examples: 

1086 # \012 --> \n 

1087 # \" --> " 

1088 # 

1089 i = 0 

1090 n = len(s) 

1091 res = [] 

1092 while 0 <= i < n: 

1093 o_match = _OctalPatt.search(s, i) 

1094 q_match = _QuotePatt.search(s, i) 

1095 if not o_match and not q_match: # Neither matched 

1096 res.append(s[i:]) 

1097 break 

1098 # else: 

1099 j = k = -1 

1100 if o_match: 

1101 j = o_match.start(0) 

1102 if q_match: 

1103 k = q_match.start(0) 

1104 if q_match and (not o_match or k < j): # QuotePatt matched 

1105 res.append(s[i:k]) 

1106 res.append(s[k + 1]) 

1107 i = k + 2 

1108 else: # OctalPatt matched 

1109 res.append(s[i:j]) 

1110 res.append(chr(int(s[j + 1 : j + 4], 8))) 

1111 i = j + 4 

1112 return _nulljoin(res) 

1113 

1114 

1115def parse_cookie(cookie: str) -> Dict[str, str]: 

1116 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs. 

1117 

1118 This function attempts to mimic browser cookie parsing behavior; 

1119 it specifically does not follow any of the cookie-related RFCs 

1120 (because browsers don't either). 

1121 

1122 The algorithm used is identical to that used by Django version 1.9.10. 

1123 

1124 .. versionadded:: 4.4.2 

1125 """ 

1126 cookiedict = {} 

1127 for chunk in cookie.split(str(";")): 

1128 if str("=") in chunk: 

1129 key, val = chunk.split(str("="), 1) 

1130 else: 

1131 # Assume an empty name per 

1132 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 

1133 key, val = str(""), chunk 

1134 key, val = key.strip(), val.strip() 

1135 if key or val: 

1136 # unquote using Python's algorithm. 

1137 cookiedict[key] = _unquote_cookie(val) 

1138 return cookiedict