Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tornado/httputil.py: 25%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# Copyright 2009 Facebook
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain
6# a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
16"""HTTP utility code shared by clients and servers.
18This module also defines the `HTTPServerRequest` class which is exposed
19via `tornado.web.RequestHandler.request`.
20"""
22import calendar
23import collections.abc
24import copy
25import datetime
26import email.utils
27from functools import lru_cache
28from http.client import responses
29import http.cookies
30import re
31from ssl import SSLError
32import time
33import unicodedata
34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
36from tornado.escape import native_str, parse_qs_bytes, utf8
37from tornado.log import gen_log
38from tornado.util import ObjectDict, unicode_type
41# responses is unused in this file, but we re-export it to other files.
42# Reference it so pyflakes doesn't complain.
43responses
45import typing
46from typing import (
47 Tuple,
48 Iterable,
49 List,
50 Mapping,
51 Iterator,
52 Dict,
53 Union,
54 Optional,
55 Awaitable,
56 Generator,
57 AnyStr,
58)
60if typing.TYPE_CHECKING:
61 from typing import Deque # noqa: F401
62 from asyncio import Future # noqa: F401
63 import unittest # noqa: F401
65# To be used with str.strip() and related methods.
66HTTP_WHITESPACE = " \t"
69@lru_cache(1000)
70def _normalize_header(name: str) -> str:
71 """Map a header name to Http-Header-Case.
73 >>> _normalize_header("coNtent-TYPE")
74 'Content-Type'
75 """
76 return "-".join([w.capitalize() for w in name.split("-")])
79class HTTPHeaders(collections.abc.MutableMapping):
80 """A dictionary that maintains ``Http-Header-Case`` for all keys.
82 Supports multiple values per key via a pair of new methods,
83 `add()` and `get_list()`. The regular dictionary interface
84 returns a single value per key, with multiple values joined by a
85 comma.
87 >>> h = HTTPHeaders({"content-type": "text/html"})
88 >>> list(h.keys())
89 ['Content-Type']
90 >>> h["Content-Type"]
91 'text/html'
93 >>> h.add("Set-Cookie", "A=B")
94 >>> h.add("Set-Cookie", "C=D")
95 >>> h["set-cookie"]
96 'A=B,C=D'
97 >>> h.get_list("set-cookie")
98 ['A=B', 'C=D']
100 >>> for (k,v) in sorted(h.get_all()):
101 ... print('%s: %s' % (k,v))
102 ...
103 Content-Type: text/html
104 Set-Cookie: A=B
105 Set-Cookie: C=D
106 """
108 @typing.overload
109 def __init__(self, __arg: Mapping[str, List[str]]) -> None:
110 pass
112 @typing.overload # noqa: F811
113 def __init__(self, __arg: Mapping[str, str]) -> None:
114 pass
116 @typing.overload # noqa: F811
117 def __init__(self, *args: Tuple[str, str]) -> None:
118 pass
120 @typing.overload # noqa: F811
121 def __init__(self, **kwargs: str) -> None:
122 pass
124 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
125 self._dict = {} # type: typing.Dict[str, str]
126 self._as_list = {} # type: typing.Dict[str, typing.List[str]]
127 self._last_key = None # type: Optional[str]
128 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
129 # Copy constructor
130 for k, v in args[0].get_all():
131 self.add(k, v)
132 else:
133 # Dict-style initialization
134 self.update(*args, **kwargs)
136 # new public methods
138 def add(self, name: str, value: str) -> None:
139 """Adds a new value for the given key."""
140 norm_name = _normalize_header(name)
141 self._last_key = norm_name
142 if norm_name in self:
143 self._dict[norm_name] = (
144 native_str(self[norm_name]) + "," + native_str(value)
145 )
146 self._as_list[norm_name].append(value)
147 else:
148 self[norm_name] = value
150 def get_list(self, name: str) -> List[str]:
151 """Returns all values for the given header as a list."""
152 norm_name = _normalize_header(name)
153 return self._as_list.get(norm_name, [])
155 def get_all(self) -> Iterable[Tuple[str, str]]:
156 """Returns an iterable of all (name, value) pairs.
158 If a header has multiple values, multiple pairs will be
159 returned with the same name.
160 """
161 for name, values in self._as_list.items():
162 for value in values:
163 yield (name, value)
165 def parse_line(self, line: str) -> None:
166 """Updates the dictionary with a single header line.
168 >>> h = HTTPHeaders()
169 >>> h.parse_line("Content-Type: text/html")
170 >>> h.get('content-type')
171 'text/html'
172 """
173 if line[0].isspace():
174 # continuation of a multi-line header
175 if self._last_key is None:
176 raise HTTPInputError("first header line cannot start with whitespace")
177 new_part = " " + line.lstrip(HTTP_WHITESPACE)
178 self._as_list[self._last_key][-1] += new_part
179 self._dict[self._last_key] += new_part
180 else:
181 try:
182 name, value = line.split(":", 1)
183 except ValueError:
184 raise HTTPInputError("no colon in header line")
185 self.add(name, value.strip(HTTP_WHITESPACE))
187 @classmethod
188 def parse(cls, headers: str) -> "HTTPHeaders":
189 """Returns a dictionary from HTTP header text.
191 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
192 >>> sorted(h.items())
193 [('Content-Length', '42'), ('Content-Type', 'text/html')]
195 .. versionchanged:: 5.1
197 Raises `HTTPInputError` on malformed headers instead of a
198 mix of `KeyError`, and `ValueError`.
200 """
201 h = cls()
202 # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
203 # terminator and ignore any preceding CR.
204 for line in headers.split("\n"):
205 if line.endswith("\r"):
206 line = line[:-1]
207 if line:
208 h.parse_line(line)
209 return h
211 # MutableMapping abstract method implementations.
213 def __setitem__(self, name: str, value: str) -> None:
214 norm_name = _normalize_header(name)
215 self._dict[norm_name] = value
216 self._as_list[norm_name] = [value]
218 def __getitem__(self, name: str) -> str:
219 return self._dict[_normalize_header(name)]
221 def __delitem__(self, name: str) -> None:
222 norm_name = _normalize_header(name)
223 del self._dict[norm_name]
224 del self._as_list[norm_name]
226 def __len__(self) -> int:
227 return len(self._dict)
229 def __iter__(self) -> Iterator[typing.Any]:
230 return iter(self._dict)
232 def copy(self) -> "HTTPHeaders":
233 # defined in dict but not in MutableMapping.
234 return HTTPHeaders(self)
236 # Use our overridden copy method for the copy.copy module.
237 # This makes shallow copies one level deeper, but preserves
238 # the appearance that HTTPHeaders is a single container.
239 __copy__ = copy
241 def __str__(self) -> str:
242 lines = []
243 for name, value in self.get_all():
244 lines.append("%s: %s\n" % (name, value))
245 return "".join(lines)
247 __unicode__ = __str__
250class HTTPServerRequest(object):
251 """A single HTTP request.
253 All attributes are type `str` unless otherwise noted.
255 .. attribute:: method
257 HTTP request method, e.g. "GET" or "POST"
259 .. attribute:: uri
261 The requested uri.
263 .. attribute:: path
265 The path portion of `uri`
267 .. attribute:: query
269 The query portion of `uri`
271 .. attribute:: version
273 HTTP version specified in request, e.g. "HTTP/1.1"
275 .. attribute:: headers
277 `.HTTPHeaders` dictionary-like object for request headers. Acts like
278 a case-insensitive dictionary with additional methods for repeated
279 headers.
281 .. attribute:: body
283 Request body, if present, as a byte string.
285 .. attribute:: remote_ip
287 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
288 will pass along the real IP address provided by a load balancer
289 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
291 .. versionchanged:: 3.1
292 The list format of ``X-Forwarded-For`` is now supported.
294 .. attribute:: protocol
296 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
297 is set, will pass along the protocol used by a load balancer if
298 reported via an ``X-Scheme`` header.
300 .. attribute:: host
302 The requested hostname, usually taken from the ``Host`` header.
304 .. attribute:: arguments
306 GET/POST arguments are available in the arguments property, which
307 maps arguments names to lists of values (to support multiple values
308 for individual names). Names are of type `str`, while arguments
309 are byte strings. Note that this is different from
310 `.RequestHandler.get_argument`, which returns argument values as
311 unicode strings.
313 .. attribute:: query_arguments
315 Same format as ``arguments``, but contains only arguments extracted
316 from the query string.
318 .. versionadded:: 3.2
320 .. attribute:: body_arguments
322 Same format as ``arguments``, but contains only arguments extracted
323 from the request body.
325 .. versionadded:: 3.2
327 .. attribute:: files
329 File uploads are available in the files property, which maps file
330 names to lists of `.HTTPFile`.
332 .. attribute:: connection
334 An HTTP request is attached to a single HTTP connection, which can
335 be accessed through the "connection" attribute. Since connections
336 are typically kept open in HTTP/1.1, multiple requests can be handled
337 sequentially on a single connection.
339 .. versionchanged:: 4.0
340 Moved from ``tornado.httpserver.HTTPRequest``.
341 """
343 path = None # type: str
344 query = None # type: str
346 # HACK: Used for stream_request_body
347 _body_future = None # type: Future[None]
349 def __init__(
350 self,
351 method: Optional[str] = None,
352 uri: Optional[str] = None,
353 version: str = "HTTP/1.0",
354 headers: Optional[HTTPHeaders] = None,
355 body: Optional[bytes] = None,
356 host: Optional[str] = None,
357 files: Optional[Dict[str, List["HTTPFile"]]] = None,
358 connection: Optional["HTTPConnection"] = None,
359 start_line: Optional["RequestStartLine"] = None,
360 server_connection: Optional[object] = None,
361 ) -> None:
362 if start_line is not None:
363 method, uri, version = start_line
364 self.method = method
365 self.uri = uri
366 self.version = version
367 self.headers = headers or HTTPHeaders()
368 self.body = body or b""
370 # set remote IP and protocol
371 context = getattr(connection, "context", None)
372 self.remote_ip = getattr(context, "remote_ip", None)
373 self.protocol = getattr(context, "protocol", "http")
375 self.host = host or self.headers.get("Host") or "127.0.0.1"
376 self.host_name = split_host_and_port(self.host.lower())[0]
377 self.files = files or {}
378 self.connection = connection
379 self.server_connection = server_connection
380 self._start_time = time.time()
381 self._finish_time = None
383 if uri is not None:
384 self.path, sep, self.query = uri.partition("?")
385 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
386 self.query_arguments = copy.deepcopy(self.arguments)
387 self.body_arguments = {} # type: Dict[str, List[bytes]]
389 @property
390 def cookies(self) -> Dict[str, http.cookies.Morsel]:
391 """A dictionary of ``http.cookies.Morsel`` objects."""
392 if not hasattr(self, "_cookies"):
393 self._cookies = (
394 http.cookies.SimpleCookie()
395 ) # type: http.cookies.SimpleCookie
396 if "Cookie" in self.headers:
397 try:
398 parsed = parse_cookie(self.headers["Cookie"])
399 except Exception:
400 pass
401 else:
402 for k, v in parsed.items():
403 try:
404 self._cookies[k] = v
405 except Exception:
406 # SimpleCookie imposes some restrictions on keys;
407 # parse_cookie does not. Discard any cookies
408 # with disallowed keys.
409 pass
410 return self._cookies
412 def full_url(self) -> str:
413 """Reconstructs the full URL for this request."""
414 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]
416 def request_time(self) -> float:
417 """Returns the amount of time it took for this request to execute."""
418 if self._finish_time is None:
419 return time.time() - self._start_time
420 else:
421 return self._finish_time - self._start_time
423 def get_ssl_certificate(
424 self, binary_form: bool = False
425 ) -> Union[None, Dict, bytes]:
426 """Returns the client's SSL certificate, if any.
428 To use client certificates, the HTTPServer's
429 `ssl.SSLContext.verify_mode` field must be set, e.g.::
431 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
432 ssl_ctx.load_cert_chain("foo.crt", "foo.key")
433 ssl_ctx.load_verify_locations("cacerts.pem")
434 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
435 server = HTTPServer(app, ssl_options=ssl_ctx)
437 By default, the return value is a dictionary (or None, if no
438 client certificate is present). If ``binary_form`` is true, a
439 DER-encoded form of the certificate is returned instead. See
440 SSLSocket.getpeercert() in the standard library for more
441 details.
442 http://docs.python.org/library/ssl.html#sslsocket-objects
443 """
444 try:
445 if self.connection is None:
446 return None
447 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
448 return self.connection.stream.socket.getpeercert( # type: ignore
449 binary_form=binary_form
450 )
451 except SSLError:
452 return None
454 def _parse_body(self) -> None:
455 parse_body_arguments(
456 self.headers.get("Content-Type", ""),
457 self.body,
458 self.body_arguments,
459 self.files,
460 self.headers,
461 )
463 for k, v in self.body_arguments.items():
464 self.arguments.setdefault(k, []).extend(v)
466 def __repr__(self) -> str:
467 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
468 args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])
469 return "%s(%s)" % (self.__class__.__name__, args)
472class HTTPInputError(Exception):
473 """Exception class for malformed HTTP requests or responses
474 from remote sources.
476 .. versionadded:: 4.0
477 """
479 pass
482class HTTPOutputError(Exception):
483 """Exception class for errors in HTTP output.
485 .. versionadded:: 4.0
486 """
488 pass
491class HTTPServerConnectionDelegate(object):
492 """Implement this interface to handle requests from `.HTTPServer`.
494 .. versionadded:: 4.0
495 """
497 def start_request(
498 self, server_conn: object, request_conn: "HTTPConnection"
499 ) -> "HTTPMessageDelegate":
500 """This method is called by the server when a new request has started.
502 :arg server_conn: is an opaque object representing the long-lived
503 (e.g. tcp-level) connection.
504 :arg request_conn: is a `.HTTPConnection` object for a single
505 request/response exchange.
507 This method should return a `.HTTPMessageDelegate`.
508 """
509 raise NotImplementedError()
511 def on_close(self, server_conn: object) -> None:
512 """This method is called when a connection has been closed.
514 :arg server_conn: is a server connection that has previously been
515 passed to ``start_request``.
516 """
517 pass
520class HTTPMessageDelegate(object):
521 """Implement this interface to handle an HTTP request or response.
523 .. versionadded:: 4.0
524 """
526 # TODO: genericize this class to avoid exposing the Union.
527 def headers_received(
528 self,
529 start_line: Union["RequestStartLine", "ResponseStartLine"],
530 headers: HTTPHeaders,
531 ) -> Optional[Awaitable[None]]:
532 """Called when the HTTP headers have been received and parsed.
534 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
535 depending on whether this is a client or server message.
536 :arg headers: a `.HTTPHeaders` instance.
538 Some `.HTTPConnection` methods can only be called during
539 ``headers_received``.
541 May return a `.Future`; if it does the body will not be read
542 until it is done.
543 """
544 pass
546 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
547 """Called when a chunk of data has been received.
549 May return a `.Future` for flow control.
550 """
551 pass
553 def finish(self) -> None:
554 """Called after the last chunk of data has been received."""
555 pass
557 def on_connection_close(self) -> None:
558 """Called if the connection is closed without finishing the request.
560 If ``headers_received`` is called, either ``finish`` or
561 ``on_connection_close`` will be called, but not both.
562 """
563 pass
566class HTTPConnection(object):
567 """Applications use this interface to write their responses.
569 .. versionadded:: 4.0
570 """
572 def write_headers(
573 self,
574 start_line: Union["RequestStartLine", "ResponseStartLine"],
575 headers: HTTPHeaders,
576 chunk: Optional[bytes] = None,
577 ) -> "Future[None]":
578 """Write an HTTP header block.
580 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
581 :arg headers: a `.HTTPHeaders` instance.
582 :arg chunk: the first (optional) chunk of data. This is an optimization
583 so that small responses can be written in the same call as their
584 headers.
586 The ``version`` field of ``start_line`` is ignored.
588 Returns a future for flow control.
590 .. versionchanged:: 6.0
592 The ``callback`` argument was removed.
593 """
594 raise NotImplementedError()
596 def write(self, chunk: bytes) -> "Future[None]":
597 """Writes a chunk of body data.
599 Returns a future for flow control.
601 .. versionchanged:: 6.0
603 The ``callback`` argument was removed.
604 """
605 raise NotImplementedError()
607 def finish(self) -> None:
608 """Indicates that the last body data has been written."""
609 raise NotImplementedError()
612def url_concat(
613 url: str,
614 args: Union[
615 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
616 ],
617) -> str:
618 """Concatenate url and arguments regardless of whether
619 url has existing query parameters.
621 ``args`` may be either a dictionary or a list of key-value pairs
622 (the latter allows for multiple values with the same key.
624 >>> url_concat("http://example.com/foo", dict(c="d"))
625 'http://example.com/foo?c=d'
626 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
627 'http://example.com/foo?a=b&c=d'
628 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
629 'http://example.com/foo?a=b&c=d&c=d2'
630 """
631 if args is None:
632 return url
633 parsed_url = urlparse(url)
634 if isinstance(args, dict):
635 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
636 parsed_query.extend(args.items())
637 elif isinstance(args, list) or isinstance(args, tuple):
638 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
639 parsed_query.extend(args)
640 else:
641 err = "'args' parameter should be dict, list or tuple. Not {0}".format(
642 type(args)
643 )
644 raise TypeError(err)
645 final_query = urlencode(parsed_query)
646 url = urlunparse(
647 (
648 parsed_url[0],
649 parsed_url[1],
650 parsed_url[2],
651 parsed_url[3],
652 final_query,
653 parsed_url[5],
654 )
655 )
656 return url
659class HTTPFile(ObjectDict):
660 """Represents a file uploaded via a form.
662 For backwards compatibility, its instance attributes are also
663 accessible as dictionary keys.
665 * ``filename``
666 * ``body``
667 * ``content_type``
668 """
670 filename: str
671 body: bytes
672 content_type: str
675def _parse_request_range(
676 range_header: str,
677) -> Optional[Tuple[Optional[int], Optional[int]]]:
678 """Parses a Range header.
680 Returns either ``None`` or tuple ``(start, end)``.
681 Note that while the HTTP headers use inclusive byte positions,
682 this method returns indexes suitable for use in slices.
684 >>> start, end = _parse_request_range("bytes=1-2")
685 >>> start, end
686 (1, 3)
687 >>> [0, 1, 2, 3, 4][start:end]
688 [1, 2]
689 >>> _parse_request_range("bytes=6-")
690 (6, None)
691 >>> _parse_request_range("bytes=-6")
692 (-6, None)
693 >>> _parse_request_range("bytes=-0")
694 (None, 0)
695 >>> _parse_request_range("bytes=")
696 (None, None)
697 >>> _parse_request_range("foo=42")
698 >>> _parse_request_range("bytes=1-2,6-10")
700 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
702 See [0] for the details of the range header.
704 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
705 """
706 unit, _, value = range_header.partition("=")
707 unit, value = unit.strip(), value.strip()
708 if unit != "bytes":
709 return None
710 start_b, _, end_b = value.partition("-")
711 try:
712 start = _int_or_none(start_b)
713 end = _int_or_none(end_b)
714 except ValueError:
715 return None
716 if end is not None:
717 if start is None:
718 if end != 0:
719 start = -end
720 end = None
721 else:
722 end += 1
723 return (start, end)
726def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
727 """Returns a suitable Content-Range header:
729 >>> print(_get_content_range(None, 1, 4))
730 bytes 0-0/4
731 >>> print(_get_content_range(1, 3, 4))
732 bytes 1-2/4
733 >>> print(_get_content_range(None, None, 4))
734 bytes 0-3/4
735 """
736 start = start or 0
737 end = (end or total) - 1
738 return "bytes %s-%s/%s" % (start, end, total)
741def _int_or_none(val: str) -> Optional[int]:
742 val = val.strip()
743 if val == "":
744 return None
745 return int(val)
748def parse_body_arguments(
749 content_type: str,
750 body: bytes,
751 arguments: Dict[str, List[bytes]],
752 files: Dict[str, List[HTTPFile]],
753 headers: Optional[HTTPHeaders] = None,
754) -> None:
755 """Parses a form request body.
757 Supports ``application/x-www-form-urlencoded`` and
758 ``multipart/form-data``. The ``content_type`` parameter should be
759 a string and ``body`` should be a byte string. The ``arguments``
760 and ``files`` parameters are dictionaries that will be updated
761 with the parsed contents.
762 """
763 if content_type.startswith("application/x-www-form-urlencoded"):
764 if headers and "Content-Encoding" in headers:
765 gen_log.warning(
766 "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
767 )
768 return
769 try:
770 # real charset decoding will happen in RequestHandler.decode_argument()
771 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
772 except Exception as e:
773 gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
774 uri_arguments = {}
775 for name, values in uri_arguments.items():
776 if values:
777 arguments.setdefault(name, []).extend(values)
778 elif content_type.startswith("multipart/form-data"):
779 if headers and "Content-Encoding" in headers:
780 gen_log.warning(
781 "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
782 )
783 return
784 try:
785 fields = content_type.split(";")
786 for field in fields:
787 k, sep, v = field.strip().partition("=")
788 if k == "boundary" and v:
789 parse_multipart_form_data(utf8(v), body, arguments, files)
790 break
791 else:
792 raise ValueError("multipart boundary not found")
793 except Exception as e:
794 gen_log.warning("Invalid multipart/form-data: %s", e)
797def parse_multipart_form_data(
798 boundary: bytes,
799 data: bytes,
800 arguments: Dict[str, List[bytes]],
801 files: Dict[str, List[HTTPFile]],
802) -> None:
803 """Parses a ``multipart/form-data`` body.
805 The ``boundary`` and ``data`` parameters are both byte strings.
806 The dictionaries given in the arguments and files parameters
807 will be updated with the contents of the body.
809 .. versionchanged:: 5.1
811 Now recognizes non-ASCII filenames in RFC 2231/5987
812 (``filename*=``) format.
813 """
814 # The standard allows for the boundary to be quoted in the header,
815 # although it's rare (it happens at least for google app engine
816 # xmpp). I think we're also supposed to handle backslash-escapes
817 # here but I'll save that until we see a client that uses them
818 # in the wild.
819 if boundary.startswith(b'"') and boundary.endswith(b'"'):
820 boundary = boundary[1:-1]
821 final_boundary_index = data.rfind(b"--" + boundary + b"--")
822 if final_boundary_index == -1:
823 gen_log.warning("Invalid multipart/form-data: no final boundary")
824 return
825 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
826 for part in parts:
827 if not part:
828 continue
829 eoh = part.find(b"\r\n\r\n")
830 if eoh == -1:
831 gen_log.warning("multipart/form-data missing headers")
832 continue
833 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
834 disp_header = headers.get("Content-Disposition", "")
835 disposition, disp_params = _parse_header(disp_header)
836 if disposition != "form-data" or not part.endswith(b"\r\n"):
837 gen_log.warning("Invalid multipart/form-data")
838 continue
839 value = part[eoh + 4 : -2]
840 if not disp_params.get("name"):
841 gen_log.warning("multipart/form-data value missing name")
842 continue
843 name = disp_params["name"]
844 if disp_params.get("filename"):
845 ctype = headers.get("Content-Type", "application/unknown")
846 files.setdefault(name, []).append(
847 HTTPFile(
848 filename=disp_params["filename"], body=value, content_type=ctype
849 )
850 )
851 else:
852 arguments.setdefault(name, []).append(value)
855def format_timestamp(
856 ts: Union[int, float, tuple, time.struct_time, datetime.datetime]
857) -> str:
858 """Formats a timestamp in the format used by HTTP.
860 The argument may be a numeric timestamp as returned by `time.time`,
861 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
862 object. Naive `datetime.datetime` objects are assumed to represent
863 UTC; aware objects are converted to UTC before formatting.
865 >>> format_timestamp(1359312200)
866 'Sun, 27 Jan 2013 18:43:20 GMT'
867 """
868 if isinstance(ts, (int, float)):
869 time_num = ts
870 elif isinstance(ts, (tuple, time.struct_time)):
871 time_num = calendar.timegm(ts)
872 elif isinstance(ts, datetime.datetime):
873 time_num = calendar.timegm(ts.utctimetuple())
874 else:
875 raise TypeError("unknown timestamp type: %r" % ts)
876 return email.utils.formatdate(time_num, usegmt=True)
879RequestStartLine = collections.namedtuple(
880 "RequestStartLine", ["method", "path", "version"]
881)
884_http_version_re = re.compile(r"^HTTP/1\.[0-9]$")
887def parse_request_start_line(line: str) -> RequestStartLine:
888 """Returns a (method, path, version) tuple for an HTTP 1.x request line.
890 The response is a `collections.namedtuple`.
892 >>> parse_request_start_line("GET /foo HTTP/1.1")
893 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
894 """
895 try:
896 method, path, version = line.split(" ")
897 except ValueError:
898 # https://tools.ietf.org/html/rfc7230#section-3.1.1
899 # invalid request-line SHOULD respond with a 400 (Bad Request)
900 raise HTTPInputError("Malformed HTTP request line")
901 if not _http_version_re.match(version):
902 raise HTTPInputError(
903 "Malformed HTTP version in HTTP Request-Line: %r" % version
904 )
905 return RequestStartLine(method, path, version)
908ResponseStartLine = collections.namedtuple(
909 "ResponseStartLine", ["version", "code", "reason"]
910)
913_http_response_line_re = re.compile(r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)")
916def parse_response_start_line(line: str) -> ResponseStartLine:
917 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
919 The response is a `collections.namedtuple`.
921 >>> parse_response_start_line("HTTP/1.1 200 OK")
922 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
923 """
924 line = native_str(line)
925 match = _http_response_line_re.match(line)
926 if not match:
927 raise HTTPInputError("Error parsing response start line")
928 return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
931# _parseparam and _parse_header are copied and modified from python2.7's cgi.py
932# The original 2.7 version of this code did not correctly support some
933# combinations of semicolons and double quotes.
934# It has also been modified to support valueless parameters as seen in
935# websocket extension negotiations, and to support non-ascii values in
936# RFC 2231/5987 format.
939def _parseparam(s: str) -> Generator[str, None, None]:
940 while s[:1] == ";":
941 s = s[1:]
942 end = s.find(";")
943 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
944 end = s.find(";", end + 1)
945 if end < 0:
946 end = len(s)
947 f = s[:end]
948 yield f.strip()
949 s = s[end:]
952def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
953 r"""Parse a Content-type like header.
955 Return the main content-type and a dictionary of options.
957 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
958 >>> ct, d = _parse_header(d)
959 >>> ct
960 'form-data'
961 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
962 True
963 >>> d['foo']
964 'b\\a"r'
965 """
966 parts = _parseparam(";" + line)
967 key = next(parts)
968 # decode_params treats first argument special, but we already stripped key
969 params = [("Dummy", "value")]
970 for p in parts:
971 i = p.find("=")
972 if i >= 0:
973 name = p[:i].strip().lower()
974 value = p[i + 1 :].strip()
975 params.append((name, native_str(value)))
976 decoded_params = email.utils.decode_params(params)
977 decoded_params.pop(0) # get rid of the dummy again
978 pdict = {}
979 for name, decoded_value in decoded_params:
980 value = email.utils.collapse_rfc2231_value(decoded_value)
981 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
982 value = value[1:-1]
983 pdict[name] = value
984 return key, pdict
987def _encode_header(key: str, pdict: Dict[str, str]) -> str:
988 """Inverse of _parse_header.
990 >>> _encode_header('permessage-deflate',
991 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
992 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
993 """
994 if not pdict:
995 return key
996 out = [key]
997 # Sort the parameters just to make it easy to test.
998 for k, v in sorted(pdict.items()):
999 if v is None:
1000 out.append(k)
1001 else:
1002 # TODO: quote if necessary.
1003 out.append("%s=%s" % (k, v))
1004 return "; ".join(out)
1007def encode_username_password(
1008 username: Union[str, bytes], password: Union[str, bytes]
1009) -> bytes:
1010 """Encodes a username/password pair in the format used by HTTP auth.
1012 The return value is a byte string in the form ``username:password``.
1014 .. versionadded:: 5.1
1015 """
1016 if isinstance(username, unicode_type):
1017 username = unicodedata.normalize("NFC", username)
1018 if isinstance(password, unicode_type):
1019 password = unicodedata.normalize("NFC", password)
1020 return utf8(username) + b":" + utf8(password)
1023def doctests():
1024 # type: () -> unittest.TestSuite
1025 import doctest
1027 return doctest.DocTestSuite()
1030_netloc_re = re.compile(r"^(.+):(\d+)$")
1033def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
1034 """Returns ``(host, port)`` tuple from ``netloc``.
1036 Returned ``port`` will be ``None`` if not present.
1038 .. versionadded:: 4.1
1039 """
1040 match = _netloc_re.match(netloc)
1041 if match:
1042 host = match.group(1)
1043 port = int(match.group(2)) # type: Optional[int]
1044 else:
1045 host = netloc
1046 port = None
1047 return (host, port)
1050def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
1051 """Generator converting a result of ``parse_qs`` back to name-value pairs.
1053 .. versionadded:: 5.0
1054 """
1055 for k, vs in qs.items():
1056 for v in vs:
1057 yield (k, v)
1060_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
1061_QuotePatt = re.compile(r"[\\].")
1062_nulljoin = "".join
1065def _unquote_cookie(s: str) -> str:
1066 """Handle double quotes and escaping in cookie values.
1068 This method is copied verbatim from the Python 3.5 standard
1069 library (http.cookies._unquote) so we don't have to depend on
1070 non-public interfaces.
1071 """
1072 # If there aren't any doublequotes,
1073 # then there can't be any special characters. See RFC 2109.
1074 if s is None or len(s) < 2:
1075 return s
1076 if s[0] != '"' or s[-1] != '"':
1077 return s
1079 # We have to assume that we must decode this string.
1080 # Down to work.
1082 # Remove the "s
1083 s = s[1:-1]
1085 # Check for special sequences. Examples:
1086 # \012 --> \n
1087 # \" --> "
1088 #
1089 i = 0
1090 n = len(s)
1091 res = []
1092 while 0 <= i < n:
1093 o_match = _OctalPatt.search(s, i)
1094 q_match = _QuotePatt.search(s, i)
1095 if not o_match and not q_match: # Neither matched
1096 res.append(s[i:])
1097 break
1098 # else:
1099 j = k = -1
1100 if o_match:
1101 j = o_match.start(0)
1102 if q_match:
1103 k = q_match.start(0)
1104 if q_match and (not o_match or k < j): # QuotePatt matched
1105 res.append(s[i:k])
1106 res.append(s[k + 1])
1107 i = k + 2
1108 else: # OctalPatt matched
1109 res.append(s[i:j])
1110 res.append(chr(int(s[j + 1 : j + 4], 8)))
1111 i = j + 4
1112 return _nulljoin(res)
1115def parse_cookie(cookie: str) -> Dict[str, str]:
1116 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
1118 This function attempts to mimic browser cookie parsing behavior;
1119 it specifically does not follow any of the cookie-related RFCs
1120 (because browsers don't either).
1122 The algorithm used is identical to that used by Django version 1.9.10.
1124 .. versionadded:: 4.4.2
1125 """
1126 cookiedict = {}
1127 for chunk in cookie.split(str(";")):
1128 if str("=") in chunk:
1129 key, val = chunk.split(str("="), 1)
1130 else:
1131 # Assume an empty name per
1132 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
1133 key, val = str(""), chunk
1134 key, val = key.strip(), val.strip()
1135 if key or val:
1136 # unquote using Python's algorithm.
1137 cookiedict[key] = _unquote_cookie(val)
1138 return cookiedict