Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tornado/httputil.py: 28%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# Copyright 2009 Facebook
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain
6# a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
16"""HTTP utility code shared by clients and servers.
18This module also defines the `HTTPServerRequest` class which is exposed
19via `tornado.web.RequestHandler.request`.
20"""
22import calendar
23import collections.abc
24import copy
25import datetime
26import email.utils
27from functools import lru_cache
28from http.client import responses
29import http.cookies
30import re
31from ssl import SSLError
32import time
33import unicodedata
34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
36from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
37from tornado.util import ObjectDict, unicode_type
40# responses is unused in this file, but we re-export it to other files.
41# Reference it so pyflakes doesn't complain.
42responses
44import typing
45from typing import (
46 Tuple,
47 Iterable,
48 List,
49 Mapping,
50 Iterator,
51 Dict,
52 Union,
53 Optional,
54 Awaitable,
55 Generator,
56 AnyStr,
57)
59if typing.TYPE_CHECKING:
60 from typing import Deque # noqa: F401
61 from asyncio import Future # noqa: F401
62 import unittest # noqa: F401
64 # This can be done unconditionally in the base class of HTTPHeaders
65 # after we drop support for Python 3.8.
66 StrMutableMapping = collections.abc.MutableMapping[str, str]
67else:
68 StrMutableMapping = collections.abc.MutableMapping
70# To be used with str.strip() and related methods.
71HTTP_WHITESPACE = " \t"
73# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits
74# chars greater than \xFF (which may appear after decoding utf8).
75_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]")
78class _ABNF:
79 """Class that holds a subset of ABNF rules from RFC 9110 and friends.
81 Class attributes are re.Pattern objects, with the same name as in the RFC
82 (with hyphens changed to underscores). Currently contains only the subset
83 we use (which is why this class is not public). Unfortunately the fields
84 cannot be alphabetized as they are in the RFCs because of dependencies.
85 """
87 # RFC 3986 (URI)
88 # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
89 # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
90 # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
91 # position, not only for their use in IPv6 literals.
92 uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
93 uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
94 uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
95 uri_host = re.compile(
96 rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
97 )
98 uri_port = re.compile(r"[0-9]*")
100 # RFC 5234 (ABNF)
101 VCHAR = re.compile(r"[\x21-\x7E]")
103 # RFC 9110 (HTTP Semantics)
104 obs_text = re.compile(r"[\x80-\xFF]")
105 field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
106 # Not exactly from the RFC to simplify and combine field-content and field-value.
107 field_value = re.compile(
108 rf"|"
109 rf"{field_vchar.pattern}|"
110 rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"
111 )
112 tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
113 token = re.compile(rf"{tchar.pattern}+")
114 field_name = token
115 method = token
116 host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
118 # RFC 9112 (HTTP/1.1)
119 HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
120 reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
121 # request_target delegates to the URI RFC 3986, which is complex and may be
122 # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
123 # characters). Instead, we allow everything but control chars and whitespace.
124 request_target = re.compile(rf"{field_vchar.pattern}+")
125 request_line = re.compile(
126 rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
127 )
128 status_code = re.compile(r"[0-9]{3}")
129 status_line = re.compile(
130 rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
131 )
134@lru_cache(1000)
135def _normalize_header(name: str) -> str:
136 """Map a header name to Http-Header-Case.
138 >>> _normalize_header("coNtent-TYPE")
139 'Content-Type'
140 """
141 return "-".join([w.capitalize() for w in name.split("-")])
144class HTTPHeaders(StrMutableMapping):
145 """A dictionary that maintains ``Http-Header-Case`` for all keys.
147 Supports multiple values per key via a pair of new methods,
148 `add()` and `get_list()`. The regular dictionary interface
149 returns a single value per key, with multiple values joined by a
150 comma.
152 >>> h = HTTPHeaders({"content-type": "text/html"})
153 >>> list(h.keys())
154 ['Content-Type']
155 >>> h["Content-Type"]
156 'text/html'
158 >>> h.add("Set-Cookie", "A=B")
159 >>> h.add("Set-Cookie", "C=D")
160 >>> h["set-cookie"]
161 'A=B,C=D'
162 >>> h.get_list("set-cookie")
163 ['A=B', 'C=D']
165 >>> for (k,v) in sorted(h.get_all()):
166 ... print('%s: %s' % (k,v))
167 ...
168 Content-Type: text/html
169 Set-Cookie: A=B
170 Set-Cookie: C=D
171 """
173 @typing.overload
174 def __init__(self, __arg: Mapping[str, List[str]]) -> None:
175 pass
177 @typing.overload # noqa: F811
178 def __init__(self, __arg: Mapping[str, str]) -> None:
179 pass
181 @typing.overload # noqa: F811
182 def __init__(self, *args: Tuple[str, str]) -> None:
183 pass
185 @typing.overload # noqa: F811
186 def __init__(self, **kwargs: str) -> None:
187 pass
189 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
190 # Formally, HTTP headers are a mapping from a field name to a "combined field value",
191 # which may be constructed from multiple field lines by joining them with commas.
192 # In practice, however, some headers (notably Set-Cookie) do not follow this convention,
193 # so we maintain a mapping from field name to a list of field lines in self._as_list.
194 # self._combined_cache is a cache of the combined field values derived from self._as_list
195 # on demand (and cleared whenever the list is modified).
196 self._as_list: dict[str, list[str]] = {}
197 self._combined_cache: dict[str, str] = {}
198 self._last_key = None # type: Optional[str]
199 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
200 # Copy constructor
201 for k, v in args[0].get_all():
202 self.add(k, v)
203 else:
204 # Dict-style initialization
205 self.update(*args, **kwargs)
207 # new public methods
209 def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:
210 """Adds a new value for the given key."""
211 if not _ABNF.field_name.fullmatch(name):
212 raise HTTPInputError("Invalid header name %r" % name)
213 if _chars_are_bytes:
214 if not _ABNF.field_value.fullmatch(to_unicode(value)):
215 # TODO: the fact we still support bytes here (contrary to type annotations)
216 # and still test for it should probably be changed.
217 raise HTTPInputError("Invalid header value %r" % value)
218 else:
219 if _FORBIDDEN_HEADER_CHARS_RE.search(value):
220 raise HTTPInputError("Invalid header value %r" % value)
221 norm_name = _normalize_header(name)
222 self._last_key = norm_name
223 if norm_name in self:
224 self._combined_cache.pop(norm_name, None)
225 self._as_list[norm_name].append(value)
226 else:
227 self[norm_name] = value
229 def get_list(self, name: str) -> List[str]:
230 """Returns all values for the given header as a list."""
231 norm_name = _normalize_header(name)
232 return self._as_list.get(norm_name, [])
234 def get_all(self) -> Iterable[Tuple[str, str]]:
235 """Returns an iterable of all (name, value) pairs.
237 If a header has multiple values, multiple pairs will be
238 returned with the same name.
239 """
240 for name, values in self._as_list.items():
241 for value in values:
242 yield (name, value)
244 def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:
245 r"""Updates the dictionary with a single header line.
247 >>> h = HTTPHeaders()
248 >>> h.parse_line("Content-Type: text/html")
249 >>> h.get('content-type')
250 'text/html'
251 >>> h.parse_line("Content-Length: 42\r\n")
252 >>> h.get('content-type')
253 'text/html'
255 .. versionchanged:: 6.5
256 Now supports lines with or without the trailing CRLF, making it possible
257 to pass lines from AsyncHTTPClient's header_callback directly to this method.
259 .. deprecated:: 6.5
260 In Tornado 7.0, certain deprecated features of HTTP will become errors.
261 Specifically, line folding and the use of LF (with CR) as a line separator
262 will be removed.
263 """
264 if m := re.search(r"\r?\n$", line):
265 # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
266 # terminator and ignore any preceding CR.
267 # TODO(7.0): Remove this support for LF-only line endings.
268 line = line[: m.start()]
269 if not line:
270 # Empty line, or the final CRLF of a header block.
271 return
272 if line[0] in HTTP_WHITESPACE:
273 # continuation of a multi-line header
274 # TODO(7.0): Remove support for line folding.
275 if self._last_key is None:
276 raise HTTPInputError("first header line cannot start with whitespace")
277 new_part = " " + line.strip(HTTP_WHITESPACE)
278 if _chars_are_bytes:
279 if not _ABNF.field_value.fullmatch(new_part[1:]):
280 raise HTTPInputError("Invalid header continuation %r" % new_part)
281 else:
282 if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
283 raise HTTPInputError("Invalid header value %r" % new_part)
284 self._as_list[self._last_key][-1] += new_part
285 self._combined_cache.pop(self._last_key, None)
286 else:
287 try:
288 name, value = line.split(":", 1)
289 except ValueError:
290 raise HTTPInputError("no colon in header line")
291 self.add(
292 name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes
293 )
295 @classmethod
296 def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":
297 """Returns a dictionary from HTTP header text.
299 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
300 >>> sorted(h.items())
301 [('Content-Length', '42'), ('Content-Type', 'text/html')]
303 .. versionchanged:: 5.1
305 Raises `HTTPInputError` on malformed headers instead of a
306 mix of `KeyError`, and `ValueError`.
308 """
309 # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which
310 # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which
311 # they are to be interpreted as utf-8). For historical reasons, this method handled this by
312 # expecting both callers to decode the headers to strings before parsing them. This wasn't a
313 # problem until we started doing stricter validation of the characters allowed in HTTP
314 # headers (using ABNF rules defined in terms of byte values), which inadvertently started
315 # disallowing non-latin1 characters in multipart/form-data filenames.
316 #
317 # This method should have accepted bytes and a desired encoding, but this change is being
318 # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes
319 # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes
320 # smuggled through the latin1 encoding) or to accept any non-control unicode characters
321 # as required by multipart/form-data. This method will change to accept bytes in a future
322 # release.
323 h = cls()
325 start = 0
326 while True:
327 lf = headers.find("\n", start)
328 if lf == -1:
329 h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)
330 break
331 line = headers[start : lf + 1]
332 start = lf + 1
333 h.parse_line(line, _chars_are_bytes=_chars_are_bytes)
334 return h
336 # MutableMapping abstract method implementations.
338 def __setitem__(self, name: str, value: str) -> None:
339 norm_name = _normalize_header(name)
340 self._combined_cache[norm_name] = value
341 self._as_list[norm_name] = [value]
343 def __contains__(self, name: object) -> bool:
344 # This is an important optimization to avoid the expensive concatenation
345 # in __getitem__ when it's not needed.
346 if not isinstance(name, str):
347 return False
348 norm_name = _normalize_header(name)
349 return norm_name in self._as_list
351 def __getitem__(self, name: str) -> str:
352 header = _normalize_header(name)
353 if header not in self._combined_cache:
354 self._combined_cache[header] = ",".join(self._as_list[header])
355 return self._combined_cache[header]
357 def __delitem__(self, name: str) -> None:
358 norm_name = _normalize_header(name)
359 del self._combined_cache[norm_name]
360 del self._as_list[norm_name]
362 def __len__(self) -> int:
363 return len(self._as_list)
365 def __iter__(self) -> Iterator[typing.Any]:
366 return iter(self._as_list)
368 def copy(self) -> "HTTPHeaders":
369 # defined in dict but not in MutableMapping.
370 return HTTPHeaders(self)
372 # Use our overridden copy method for the copy.copy module.
373 # This makes shallow copies one level deeper, but preserves
374 # the appearance that HTTPHeaders is a single container.
375 __copy__ = copy
377 def __str__(self) -> str:
378 lines = []
379 for name, value in self.get_all():
380 lines.append(f"{name}: {value}\n")
381 return "".join(lines)
383 __unicode__ = __str__
386class HTTPServerRequest:
387 """A single HTTP request.
389 All attributes are type `str` unless otherwise noted.
391 .. attribute:: method
393 HTTP request method, e.g. "GET" or "POST"
395 .. attribute:: uri
397 The requested uri.
399 .. attribute:: path
401 The path portion of `uri`
403 .. attribute:: query
405 The query portion of `uri`
407 .. attribute:: version
409 HTTP version specified in request, e.g. "HTTP/1.1"
411 .. attribute:: headers
413 `.HTTPHeaders` dictionary-like object for request headers. Acts like
414 a case-insensitive dictionary with additional methods for repeated
415 headers.
417 .. attribute:: body
419 Request body, if present, as a byte string.
421 .. attribute:: remote_ip
423 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
424 will pass along the real IP address provided by a load balancer
425 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
427 .. versionchanged:: 3.1
428 The list format of ``X-Forwarded-For`` is now supported.
430 .. attribute:: protocol
432 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
433 is set, will pass along the protocol used by a load balancer if
434 reported via an ``X-Scheme`` header.
436 .. attribute:: host
438 The requested hostname, usually taken from the ``Host`` header.
440 .. attribute:: arguments
442 GET/POST arguments are available in the arguments property, which
443 maps arguments names to lists of values (to support multiple values
444 for individual names). Names are of type `str`, while arguments
445 are byte strings. Note that this is different from
446 `.RequestHandler.get_argument`, which returns argument values as
447 unicode strings.
449 .. attribute:: query_arguments
451 Same format as ``arguments``, but contains only arguments extracted
452 from the query string.
454 .. versionadded:: 3.2
456 .. attribute:: body_arguments
458 Same format as ``arguments``, but contains only arguments extracted
459 from the request body.
461 .. versionadded:: 3.2
463 .. attribute:: files
465 File uploads are available in the files property, which maps file
466 names to lists of `.HTTPFile`.
468 .. attribute:: connection
470 An HTTP request is attached to a single HTTP connection, which can
471 be accessed through the "connection" attribute. Since connections
472 are typically kept open in HTTP/1.1, multiple requests can be handled
473 sequentially on a single connection.
475 .. versionchanged:: 4.0
476 Moved from ``tornado.httpserver.HTTPRequest``.
478 .. deprecated:: 6.5.2
479 The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use
480 ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and
481 temporarily restored in 6.5.2.
482 """
484 path = None # type: str
485 query = None # type: str
487 # HACK: Used for stream_request_body
488 _body_future = None # type: Future[None]
490 def __init__(
491 self,
492 method: Optional[str] = None,
493 uri: Optional[str] = None,
494 version: str = "HTTP/1.0",
495 headers: Optional[HTTPHeaders] = None,
496 body: Optional[bytes] = None,
497 host: Optional[str] = None,
498 files: Optional[Dict[str, List["HTTPFile"]]] = None,
499 connection: Optional["HTTPConnection"] = None,
500 start_line: Optional["RequestStartLine"] = None,
501 server_connection: Optional[object] = None,
502 ) -> None:
503 if start_line is not None:
504 method, uri, version = start_line
505 self.method = method
506 self.uri = uri
507 self.version = version
508 self.headers = headers or HTTPHeaders()
509 self.body = body or b""
511 # set remote IP and protocol
512 context = getattr(connection, "context", None)
513 self.remote_ip = getattr(context, "remote_ip", None)
514 self.protocol = getattr(context, "protocol", "http")
516 try:
517 self.host = host or self.headers["Host"]
518 except KeyError:
519 if version == "HTTP/1.0":
520 # HTTP/1.0 does not require the Host header.
521 self.host = "127.0.0.1"
522 else:
523 raise HTTPInputError("Missing Host header")
524 if not _ABNF.host.fullmatch(self.host):
525 raise HTTPInputError("Invalid Host header: %r" % self.host)
526 if "," in self.host:
527 # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
528 # Server MUST respond with 400 Bad Request if multiple
529 # Host headers are present.
530 #
531 # We test for the presence of a comma instead of the number of
532 # headers received because a proxy may have converted
533 # multiple headers into a single comma-separated value
534 # (per RFC 9110 section 5.3).
535 #
536 # This is technically a departure from the RFC since the ABNF
537 # does not forbid commas in the host header. However, since
538 # commas are not allowed in DNS names, it is appropriate to
539 # disallow them. (The same argument could be made for other special
540 # characters, but commas are the most problematic since they could
541 # be used to exploit differences between proxies when multiple headers
542 # are supplied).
543 raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
544 self.host_name = split_host_and_port(self.host.lower())[0]
545 self.files = files or {}
546 self.connection = connection
547 self.server_connection = server_connection
548 self._start_time = time.time()
549 self._finish_time = None
551 if uri is not None:
552 self.path, sep, self.query = uri.partition("?")
553 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
554 self.query_arguments = copy.deepcopy(self.arguments)
555 self.body_arguments = {} # type: Dict[str, List[bytes]]
557 @property
558 def cookies(self) -> Dict[str, http.cookies.Morsel]:
559 """A dictionary of ``http.cookies.Morsel`` objects."""
560 if not hasattr(self, "_cookies"):
561 self._cookies = (
562 http.cookies.SimpleCookie()
563 ) # type: http.cookies.SimpleCookie
564 if "Cookie" in self.headers:
565 try:
566 parsed = parse_cookie(self.headers["Cookie"])
567 except Exception:
568 pass
569 else:
570 for k, v in parsed.items():
571 try:
572 self._cookies[k] = v
573 except Exception:
574 # SimpleCookie imposes some restrictions on keys;
575 # parse_cookie does not. Discard any cookies
576 # with disallowed keys.
577 pass
578 return self._cookies
580 def full_url(self) -> str:
581 """Reconstructs the full URL for this request."""
582 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]
584 def request_time(self) -> float:
585 """Returns the amount of time it took for this request to execute."""
586 if self._finish_time is None:
587 return time.time() - self._start_time
588 else:
589 return self._finish_time - self._start_time
591 def get_ssl_certificate(
592 self, binary_form: bool = False
593 ) -> Union[None, Dict, bytes]:
594 """Returns the client's SSL certificate, if any.
596 To use client certificates, the HTTPServer's
597 `ssl.SSLContext.verify_mode` field must be set, e.g.::
599 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
600 ssl_ctx.load_cert_chain("foo.crt", "foo.key")
601 ssl_ctx.load_verify_locations("cacerts.pem")
602 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
603 server = HTTPServer(app, ssl_options=ssl_ctx)
605 By default, the return value is a dictionary (or None, if no
606 client certificate is present). If ``binary_form`` is true, a
607 DER-encoded form of the certificate is returned instead. See
608 SSLSocket.getpeercert() in the standard library for more
609 details.
610 http://docs.python.org/library/ssl.html#sslsocket-objects
611 """
612 try:
613 if self.connection is None:
614 return None
615 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
616 return self.connection.stream.socket.getpeercert( # type: ignore
617 binary_form=binary_form
618 )
619 except SSLError:
620 return None
622 def _parse_body(self) -> None:
623 parse_body_arguments(
624 self.headers.get("Content-Type", ""),
625 self.body,
626 self.body_arguments,
627 self.files,
628 self.headers,
629 )
631 for k, v in self.body_arguments.items():
632 self.arguments.setdefault(k, []).extend(v)
634 def __repr__(self) -> str:
635 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
636 args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])
637 return f"{self.__class__.__name__}({args})"
640class HTTPInputError(Exception):
641 """Exception class for malformed HTTP requests or responses
642 from remote sources.
644 .. versionadded:: 4.0
645 """
647 pass
650class HTTPOutputError(Exception):
651 """Exception class for errors in HTTP output.
653 .. versionadded:: 4.0
654 """
656 pass
659class HTTPServerConnectionDelegate:
660 """Implement this interface to handle requests from `.HTTPServer`.
662 .. versionadded:: 4.0
663 """
665 def start_request(
666 self, server_conn: object, request_conn: "HTTPConnection"
667 ) -> "HTTPMessageDelegate":
668 """This method is called by the server when a new request has started.
670 :arg server_conn: is an opaque object representing the long-lived
671 (e.g. tcp-level) connection.
672 :arg request_conn: is a `.HTTPConnection` object for a single
673 request/response exchange.
675 This method should return a `.HTTPMessageDelegate`.
676 """
677 raise NotImplementedError()
679 def on_close(self, server_conn: object) -> None:
680 """This method is called when a connection has been closed.
682 :arg server_conn: is a server connection that has previously been
683 passed to ``start_request``.
684 """
685 pass
688class HTTPMessageDelegate:
689 """Implement this interface to handle an HTTP request or response.
691 .. versionadded:: 4.0
692 """
694 # TODO: genericize this class to avoid exposing the Union.
695 def headers_received(
696 self,
697 start_line: Union["RequestStartLine", "ResponseStartLine"],
698 headers: HTTPHeaders,
699 ) -> Optional[Awaitable[None]]:
700 """Called when the HTTP headers have been received and parsed.
702 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
703 depending on whether this is a client or server message.
704 :arg headers: a `.HTTPHeaders` instance.
706 Some `.HTTPConnection` methods can only be called during
707 ``headers_received``.
709 May return a `.Future`; if it does the body will not be read
710 until it is done.
711 """
712 pass
714 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
715 """Called when a chunk of data has been received.
717 May return a `.Future` for flow control.
718 """
719 pass
721 def finish(self) -> None:
722 """Called after the last chunk of data has been received."""
723 pass
725 def on_connection_close(self) -> None:
726 """Called if the connection is closed without finishing the request.
728 If ``headers_received`` is called, either ``finish`` or
729 ``on_connection_close`` will be called, but not both.
730 """
731 pass
734class HTTPConnection:
735 """Applications use this interface to write their responses.
737 .. versionadded:: 4.0
738 """
740 def write_headers(
741 self,
742 start_line: Union["RequestStartLine", "ResponseStartLine"],
743 headers: HTTPHeaders,
744 chunk: Optional[bytes] = None,
745 ) -> "Future[None]":
746 """Write an HTTP header block.
748 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
749 :arg headers: a `.HTTPHeaders` instance.
750 :arg chunk: the first (optional) chunk of data. This is an optimization
751 so that small responses can be written in the same call as their
752 headers.
754 The ``version`` field of ``start_line`` is ignored.
756 Returns a future for flow control.
758 .. versionchanged:: 6.0
760 The ``callback`` argument was removed.
761 """
762 raise NotImplementedError()
764 def write(self, chunk: bytes) -> "Future[None]":
765 """Writes a chunk of body data.
767 Returns a future for flow control.
769 .. versionchanged:: 6.0
771 The ``callback`` argument was removed.
772 """
773 raise NotImplementedError()
775 def finish(self) -> None:
776 """Indicates that the last body data has been written."""
777 raise NotImplementedError()
780def url_concat(
781 url: str,
782 args: Union[
783 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
784 ],
785) -> str:
786 """Concatenate url and arguments regardless of whether
787 url has existing query parameters.
789 ``args`` may be either a dictionary or a list of key-value pairs
790 (the latter allows for multiple values with the same key.
792 >>> url_concat("http://example.com/foo", dict(c="d"))
793 'http://example.com/foo?c=d'
794 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
795 'http://example.com/foo?a=b&c=d'
796 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
797 'http://example.com/foo?a=b&c=d&c=d2'
798 """
799 if args is None:
800 return url
801 parsed_url = urlparse(url)
802 if isinstance(args, dict):
803 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
804 parsed_query.extend(args.items())
805 elif isinstance(args, list) or isinstance(args, tuple):
806 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
807 parsed_query.extend(args)
808 else:
809 err = "'args' parameter should be dict, list or tuple. Not {0}".format(
810 type(args)
811 )
812 raise TypeError(err)
813 final_query = urlencode(parsed_query)
814 url = urlunparse(
815 (
816 parsed_url[0],
817 parsed_url[1],
818 parsed_url[2],
819 parsed_url[3],
820 final_query,
821 parsed_url[5],
822 )
823 )
824 return url
827class HTTPFile(ObjectDict):
828 """Represents a file uploaded via a form.
830 For backwards compatibility, its instance attributes are also
831 accessible as dictionary keys.
833 * ``filename``
834 * ``body``
835 * ``content_type``
836 """
838 filename: str
839 body: bytes
840 content_type: str
843def _parse_request_range(
844 range_header: str,
845) -> Optional[Tuple[Optional[int], Optional[int]]]:
846 """Parses a Range header.
848 Returns either ``None`` or tuple ``(start, end)``.
849 Note that while the HTTP headers use inclusive byte positions,
850 this method returns indexes suitable for use in slices.
852 >>> start, end = _parse_request_range("bytes=1-2")
853 >>> start, end
854 (1, 3)
855 >>> [0, 1, 2, 3, 4][start:end]
856 [1, 2]
857 >>> _parse_request_range("bytes=6-")
858 (6, None)
859 >>> _parse_request_range("bytes=-6")
860 (-6, None)
861 >>> _parse_request_range("bytes=-0")
862 (None, 0)
863 >>> _parse_request_range("bytes=")
864 (None, None)
865 >>> _parse_request_range("foo=42")
866 >>> _parse_request_range("bytes=1-2,6-10")
868 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
870 See [0] for the details of the range header.
872 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
873 """
874 unit, _, value = range_header.partition("=")
875 unit, value = unit.strip(), value.strip()
876 if unit != "bytes":
877 return None
878 start_b, _, end_b = value.partition("-")
879 try:
880 start = _int_or_none(start_b)
881 end = _int_or_none(end_b)
882 except ValueError:
883 return None
884 if end is not None:
885 if start is None:
886 if end != 0:
887 start = -end
888 end = None
889 else:
890 end += 1
891 return (start, end)
894def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
895 """Returns a suitable Content-Range header:
897 >>> print(_get_content_range(None, 1, 4))
898 bytes 0-0/4
899 >>> print(_get_content_range(1, 3, 4))
900 bytes 1-2/4
901 >>> print(_get_content_range(None, None, 4))
902 bytes 0-3/4
903 """
904 start = start or 0
905 end = (end or total) - 1
906 return f"bytes {start}-{end}/{total}"
909def _int_or_none(val: str) -> Optional[int]:
910 val = val.strip()
911 if val == "":
912 return None
913 return int(val)
916def parse_body_arguments(
917 content_type: str,
918 body: bytes,
919 arguments: Dict[str, List[bytes]],
920 files: Dict[str, List[HTTPFile]],
921 headers: Optional[HTTPHeaders] = None,
922) -> None:
923 """Parses a form request body.
925 Supports ``application/x-www-form-urlencoded`` and
926 ``multipart/form-data``. The ``content_type`` parameter should be
927 a string and ``body`` should be a byte string. The ``arguments``
928 and ``files`` parameters are dictionaries that will be updated
929 with the parsed contents.
930 """
931 if content_type.startswith("application/x-www-form-urlencoded"):
932 if headers and "Content-Encoding" in headers:
933 raise HTTPInputError(
934 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
935 )
936 try:
937 # real charset decoding will happen in RequestHandler.decode_argument()
938 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
939 except Exception as e:
940 raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
941 for name, values in uri_arguments.items():
942 if values:
943 arguments.setdefault(name, []).extend(values)
944 elif content_type.startswith("multipart/form-data"):
945 if headers and "Content-Encoding" in headers:
946 raise HTTPInputError(
947 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
948 )
949 try:
950 fields = content_type.split(";")
951 for field in fields:
952 k, sep, v = field.strip().partition("=")
953 if k == "boundary" and v:
954 parse_multipart_form_data(utf8(v), body, arguments, files)
955 break
956 else:
957 raise HTTPInputError("multipart boundary not found")
958 except Exception as e:
959 raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
962def parse_multipart_form_data(
963 boundary: bytes,
964 data: bytes,
965 arguments: Dict[str, List[bytes]],
966 files: Dict[str, List[HTTPFile]],
967) -> None:
968 """Parses a ``multipart/form-data`` body.
970 The ``boundary`` and ``data`` parameters are both byte strings.
971 The dictionaries given in the arguments and files parameters
972 will be updated with the contents of the body.
974 .. versionchanged:: 5.1
976 Now recognizes non-ASCII filenames in RFC 2231/5987
977 (``filename*=``) format.
978 """
979 # The standard allows for the boundary to be quoted in the header,
980 # although it's rare (it happens at least for google app engine
981 # xmpp). I think we're also supposed to handle backslash-escapes
982 # here but I'll save that until we see a client that uses them
983 # in the wild.
984 if boundary.startswith(b'"') and boundary.endswith(b'"'):
985 boundary = boundary[1:-1]
986 final_boundary_index = data.rfind(b"--" + boundary + b"--")
987 if final_boundary_index == -1:
988 raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
989 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
990 for part in parts:
991 if not part:
992 continue
993 eoh = part.find(b"\r\n\r\n")
994 if eoh == -1:
995 raise HTTPInputError("multipart/form-data missing headers")
996 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)
997 disp_header = headers.get("Content-Disposition", "")
998 disposition, disp_params = _parse_header(disp_header)
999 if disposition != "form-data" or not part.endswith(b"\r\n"):
1000 raise HTTPInputError("Invalid multipart/form-data")
1001 value = part[eoh + 4 : -2]
1002 if not disp_params.get("name"):
1003 raise HTTPInputError("multipart/form-data missing name")
1004 name = disp_params["name"]
1005 if disp_params.get("filename"):
1006 ctype = headers.get("Content-Type", "application/unknown")
1007 files.setdefault(name, []).append(
1008 HTTPFile(
1009 filename=disp_params["filename"], body=value, content_type=ctype
1010 )
1011 )
1012 else:
1013 arguments.setdefault(name, []).append(value)
1016def format_timestamp(
1017 ts: Union[int, float, tuple, time.struct_time, datetime.datetime],
1018) -> str:
1019 """Formats a timestamp in the format used by HTTP.
1021 The argument may be a numeric timestamp as returned by `time.time`,
1022 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
1023 object. Naive `datetime.datetime` objects are assumed to represent
1024 UTC; aware objects are converted to UTC before formatting.
1026 >>> format_timestamp(1359312200)
1027 'Sun, 27 Jan 2013 18:43:20 GMT'
1028 """
1029 if isinstance(ts, (int, float)):
1030 time_num = ts
1031 elif isinstance(ts, (tuple, time.struct_time)):
1032 time_num = calendar.timegm(ts)
1033 elif isinstance(ts, datetime.datetime):
1034 time_num = calendar.timegm(ts.utctimetuple())
1035 else:
1036 raise TypeError("unknown timestamp type: %r" % ts)
1037 return email.utils.formatdate(time_num, usegmt=True)
1040class RequestStartLine(typing.NamedTuple):
1041 method: str
1042 path: str
1043 version: str
1046def parse_request_start_line(line: str) -> RequestStartLine:
1047 """Returns a (method, path, version) tuple for an HTTP 1.x request line.
1049 The response is a `typing.NamedTuple`.
1051 >>> parse_request_start_line("GET /foo HTTP/1.1")
1052 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
1053 """
1054 match = _ABNF.request_line.fullmatch(line)
1055 if not match:
1056 # https://tools.ietf.org/html/rfc7230#section-3.1.1
1057 # invalid request-line SHOULD respond with a 400 (Bad Request)
1058 raise HTTPInputError("Malformed HTTP request line")
1059 r = RequestStartLine(match.group(1), match.group(2), match.group(3))
1060 if not r.version.startswith("HTTP/1"):
1061 # HTTP/2 and above doesn't use parse_request_start_line.
1062 # This could be folded into the regex but we don't want to deviate
1063 # from the ABNF in the RFCs.
1064 raise HTTPInputError("Unexpected HTTP version %r" % r.version)
1065 return r
1068class ResponseStartLine(typing.NamedTuple):
1069 version: str
1070 code: int
1071 reason: str
1074def parse_response_start_line(line: str) -> ResponseStartLine:
1075 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
1077 The response is a `typing.NamedTuple`.
1079 >>> parse_response_start_line("HTTP/1.1 200 OK")
1080 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
1081 """
1082 match = _ABNF.status_line.fullmatch(line)
1083 if not match:
1084 raise HTTPInputError("Error parsing response start line")
1085 r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
1086 if not r.version.startswith("HTTP/1"):
1087 # HTTP/2 and above doesn't use parse_response_start_line.
1088 raise HTTPInputError("Unexpected HTTP version %r" % r.version)
1089 return r
1092# _parseparam and _parse_header are copied and modified from python2.7's cgi.py
1093# The original 2.7 version of this code did not correctly support some
1094# combinations of semicolons and double quotes.
1095# It has also been modified to support valueless parameters as seen in
1096# websocket extension negotiations, and to support non-ascii values in
1097# RFC 2231/5987 format.
1098#
1099# _parseparam has been further modified with the logic from
1100# https://github.com/python/cpython/pull/136072/files
1101# to avoid quadratic behavior when parsing semicolons in quoted strings.
1102#
1103# TODO: See if we can switch to email.message.Message for this functionality.
1104# This is the suggested replacement for the cgi.py module now that cgi has
1105# been removed from recent versions of Python. We need to verify that
1106# the email module is consistent with our existing behavior (and all relevant
1107# RFCs for multipart/form-data) before making this change.
1110def _parseparam(s: str) -> Generator[str, None, None]:
1111 start = 0
1112 while s.find(";", start) == start:
1113 start += 1
1114 end = s.find(";", start)
1115 ind, diff = start, 0
1116 while end > 0:
1117 diff += s.count('"', ind, end) - s.count('\\"', ind, end)
1118 if diff % 2 == 0:
1119 break
1120 end, ind = ind, s.find(";", end + 1)
1121 if end < 0:
1122 end = len(s)
1123 f = s[start:end]
1124 yield f.strip()
1125 start = end
1128def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
1129 r"""Parse a Content-type like header.
1131 Return the main content-type and a dictionary of options.
1133 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
1134 >>> ct, d = _parse_header(d)
1135 >>> ct
1136 'form-data'
1137 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
1138 True
1139 >>> d['foo']
1140 'b\\a"r'
1141 """
1142 parts = _parseparam(";" + line)
1143 key = next(parts)
1144 # decode_params treats first argument special, but we already stripped key
1145 params = [("Dummy", "value")]
1146 for p in parts:
1147 i = p.find("=")
1148 if i >= 0:
1149 name = p[:i].strip().lower()
1150 value = p[i + 1 :].strip()
1151 params.append((name, native_str(value)))
1152 decoded_params = email.utils.decode_params(params)
1153 decoded_params.pop(0) # get rid of the dummy again
1154 pdict = {}
1155 for name, decoded_value in decoded_params:
1156 value = email.utils.collapse_rfc2231_value(decoded_value)
1157 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
1158 value = value[1:-1]
1159 pdict[name] = value
1160 return key, pdict
1163def _encode_header(key: str, pdict: Dict[str, str]) -> str:
1164 """Inverse of _parse_header.
1166 >>> _encode_header('permessage-deflate',
1167 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
1168 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
1169 """
1170 if not pdict:
1171 return key
1172 out = [key]
1173 # Sort the parameters just to make it easy to test.
1174 for k, v in sorted(pdict.items()):
1175 if v is None:
1176 out.append(k)
1177 else:
1178 # TODO: quote if necessary.
1179 out.append(f"{k}={v}")
1180 return "; ".join(out)
1183def encode_username_password(
1184 username: Union[str, bytes], password: Union[str, bytes]
1185) -> bytes:
1186 """Encodes a username/password pair in the format used by HTTP auth.
1188 The return value is a byte string in the form ``username:password``.
1190 .. versionadded:: 5.1
1191 """
1192 if isinstance(username, unicode_type):
1193 username = unicodedata.normalize("NFC", username)
1194 if isinstance(password, unicode_type):
1195 password = unicodedata.normalize("NFC", password)
1196 return utf8(username) + b":" + utf8(password)
1199def doctests():
1200 # type: () -> unittest.TestSuite
1201 import doctest
1203 return doctest.DocTestSuite()
1206_netloc_re = re.compile(r"^(.+):(\d+)$")
1209def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
1210 """Returns ``(host, port)`` tuple from ``netloc``.
1212 Returned ``port`` will be ``None`` if not present.
1214 .. versionadded:: 4.1
1215 """
1216 match = _netloc_re.match(netloc)
1217 if match:
1218 host = match.group(1)
1219 port = int(match.group(2)) # type: Optional[int]
1220 else:
1221 host = netloc
1222 port = None
1223 return (host, port)
1226def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
1227 """Generator converting a result of ``parse_qs`` back to name-value pairs.
1229 .. versionadded:: 5.0
1230 """
1231 for k, vs in qs.items():
1232 for v in vs:
1233 yield (k, v)
1236_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
1239def _unquote_replace(m: re.Match) -> str:
1240 if m[1]:
1241 return chr(int(m[1], 8))
1242 else:
1243 return m[2]
1246def _unquote_cookie(s: str) -> str:
1247 """Handle double quotes and escaping in cookie values.
1249 This method is copied verbatim from the Python 3.13 standard
1250 library (http.cookies._unquote) so we don't have to depend on
1251 non-public interfaces.
1252 """
1253 # If there aren't any doublequotes,
1254 # then there can't be any special characters. See RFC 2109.
1255 if s is None or len(s) < 2:
1256 return s
1257 if s[0] != '"' or s[-1] != '"':
1258 return s
1260 # We have to assume that we must decode this string.
1261 # Down to work.
1263 # Remove the "s
1264 s = s[1:-1]
1266 # Check for special sequences. Examples:
1267 # \012 --> \n
1268 # \" --> "
1269 #
1270 return _unquote_sub(_unquote_replace, s)
1273def parse_cookie(cookie: str) -> Dict[str, str]:
1274 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
1276 This function attempts to mimic browser cookie parsing behavior;
1277 it specifically does not follow any of the cookie-related RFCs
1278 (because browsers don't either).
1280 The algorithm used is identical to that used by Django version 1.9.10.
1282 .. versionadded:: 4.4.2
1283 """
1284 cookiedict = {}
1285 for chunk in cookie.split(";"):
1286 if "=" in chunk:
1287 key, val = chunk.split("=", 1)
1288 else:
1289 # Assume an empty name per
1290 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
1291 key, val = "", chunk
1292 key, val = key.strip(), val.strip()
1293 if key or val:
1294 # unquote using Python's algorithm.
1295 cookiedict[key] = _unquote_cookie(val)
1296 return cookiedict