Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tornado/httputil.py: 28%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# Copyright 2009 Facebook
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain
6# a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
16"""HTTP utility code shared by clients and servers.
18This module also defines the `HTTPServerRequest` class which is exposed
19via `tornado.web.RequestHandler.request`.
20"""
22import calendar
23import collections.abc
24import copy
25import datetime
26import email.utils
27from functools import lru_cache
28from http.client import responses
29import http.cookies
30import re
31from ssl import SSLError
32import time
33import unicodedata
34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
36from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
37from tornado.util import ObjectDict, unicode_type
40# responses is unused in this file, but we re-export it to other files.
41# Reference it so pyflakes doesn't complain.
42responses
44import typing
45from typing import (
46 Tuple,
47 Iterable,
48 List,
49 Mapping,
50 Iterator,
51 Dict,
52 Union,
53 Optional,
54 Awaitable,
55 Generator,
56 AnyStr,
57)
59if typing.TYPE_CHECKING:
60 from typing import Deque # noqa: F401
61 from asyncio import Future # noqa: F401
62 import unittest # noqa: F401
64 # This can be done unconditionally in the base class of HTTPHeaders
65 # after we drop support for Python 3.8.
66 StrMutableMapping = collections.abc.MutableMapping[str, str]
67else:
68 StrMutableMapping = collections.abc.MutableMapping
70# To be used with str.strip() and related methods.
71HTTP_WHITESPACE = " \t"
73# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits
74# chars greater than \xFF (which may appear after decoding utf8).
75_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]")
78class _ABNF:
79 """Class that holds a subset of ABNF rules from RFC 9110 and friends.
81 Class attributes are re.Pattern objects, with the same name as in the RFC
82 (with hyphens changed to underscores). Currently contains only the subset
83 we use (which is why this class is not public). Unfortunately the fields
84 cannot be alphabetized as they are in the RFCs because of dependencies.
85 """
87 # RFC 3986 (URI)
88 # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
89 # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
90 # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
91 # position, not only for their use in IPv6 literals.
92 uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
93 uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
94 uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
95 uri_host = re.compile(
96 rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
97 )
98 uri_port = re.compile(r"[0-9]*")
100 # RFC 5234 (ABNF)
101 VCHAR = re.compile(r"[\x21-\x7E]")
103 # RFC 9110 (HTTP Semantics)
104 obs_text = re.compile(r"[\x80-\xFF]")
105 field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
106 # Not exactly from the RFC to simplify and combine field-content and field-value.
107 field_value = re.compile(
108 rf"|"
109 rf"{field_vchar.pattern}|"
110 rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"
111 )
112 tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
113 token = re.compile(rf"{tchar.pattern}+")
114 field_name = token
115 method = token
116 host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
118 # RFC 9112 (HTTP/1.1)
119 HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
120 reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
121 # request_target delegates to the URI RFC 3986, which is complex and may be
122 # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
123 # characters). Instead, we allow everything but control chars and whitespace.
124 request_target = re.compile(rf"{field_vchar.pattern}+")
125 request_line = re.compile(
126 rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
127 )
128 status_code = re.compile(r"[0-9]{3}")
129 status_line = re.compile(
130 rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
131 )
134@lru_cache(1000)
135def _normalize_header(name: str) -> str:
136 """Map a header name to Http-Header-Case.
138 >>> _normalize_header("coNtent-TYPE")
139 'Content-Type'
140 """
141 return "-".join([w.capitalize() for w in name.split("-")])
144class HTTPHeaders(StrMutableMapping):
145 """A dictionary that maintains ``Http-Header-Case`` for all keys.
147 Supports multiple values per key via a pair of new methods,
148 `add()` and `get_list()`. The regular dictionary interface
149 returns a single value per key, with multiple values joined by a
150 comma.
152 >>> h = HTTPHeaders({"content-type": "text/html"})
153 >>> list(h.keys())
154 ['Content-Type']
155 >>> h["Content-Type"]
156 'text/html'
158 >>> h.add("Set-Cookie", "A=B")
159 >>> h.add("Set-Cookie", "C=D")
160 >>> h["set-cookie"]
161 'A=B,C=D'
162 >>> h.get_list("set-cookie")
163 ['A=B', 'C=D']
165 >>> for (k,v) in sorted(h.get_all()):
166 ... print('%s: %s' % (k,v))
167 ...
168 Content-Type: text/html
169 Set-Cookie: A=B
170 Set-Cookie: C=D
171 """
173 @typing.overload
174 def __init__(self, __arg: Mapping[str, List[str]]) -> None:
175 pass
177 @typing.overload # noqa: F811
178 def __init__(self, __arg: Mapping[str, str]) -> None:
179 pass
181 @typing.overload # noqa: F811
182 def __init__(self, *args: Tuple[str, str]) -> None:
183 pass
185 @typing.overload # noqa: F811
186 def __init__(self, **kwargs: str) -> None:
187 pass
189 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
190 # Formally, HTTP headers are a mapping from a field name to a "combined field value",
191 # which may be constructed from multiple field lines by joining them with commas.
192 # In practice, however, some headers (notably Set-Cookie) do not follow this convention,
193 # so we maintain a mapping from field name to a list of field lines in self._as_list.
194 # self._combined_cache is a cache of the combined field values derived from self._as_list
195 # on demand (and cleared whenever the list is modified).
196 self._as_list: dict[str, list[str]] = {}
197 self._combined_cache: dict[str, str] = {}
198 self._last_key = None # type: Optional[str]
199 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
200 # Copy constructor
201 for k, v in args[0].get_all():
202 self.add(k, v)
203 else:
204 # Dict-style initialization
205 self.update(*args, **kwargs)
207 # new public methods
209 def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:
210 """Adds a new value for the given key."""
211 if not _ABNF.field_name.fullmatch(name):
212 raise HTTPInputError("Invalid header name %r" % name)
213 if _chars_are_bytes:
214 if not _ABNF.field_value.fullmatch(to_unicode(value)):
215 # TODO: the fact we still support bytes here (contrary to type annotations)
216 # and still test for it should probably be changed.
217 raise HTTPInputError("Invalid header value %r" % value)
218 else:
219 if _FORBIDDEN_HEADER_CHARS_RE.search(value):
220 raise HTTPInputError("Invalid header value %r" % value)
221 norm_name = _normalize_header(name)
222 self._last_key = norm_name
223 if norm_name in self:
224 self._combined_cache.pop(norm_name, None)
225 self._as_list[norm_name].append(value)
226 else:
227 self[norm_name] = value
229 def get_list(self, name: str) -> List[str]:
230 """Returns all values for the given header as a list."""
231 norm_name = _normalize_header(name)
232 return self._as_list.get(norm_name, [])
234 def get_all(self) -> Iterable[Tuple[str, str]]:
235 """Returns an iterable of all (name, value) pairs.
237 If a header has multiple values, multiple pairs will be
238 returned with the same name.
239 """
240 for name, values in self._as_list.items():
241 for value in values:
242 yield (name, value)
244 def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:
245 r"""Updates the dictionary with a single header line.
247 >>> h = HTTPHeaders()
248 >>> h.parse_line("Content-Type: text/html")
249 >>> h.get('content-type')
250 'text/html'
251 >>> h.parse_line("Content-Length: 42\r\n")
252 >>> h.get('content-type')
253 'text/html'
255 .. versionchanged:: 6.5
256 Now supports lines with or without the trailing CRLF, making it possible
257 to pass lines from AsyncHTTPClient's header_callback directly to this method.
259 .. deprecated:: 6.5
260 In Tornado 7.0, certain deprecated features of HTTP will become errors.
261 Specifically, line folding and the use of LF (with CR) as a line separator
262 will be removed.
263 """
264 if m := re.search(r"\r?\n$", line):
265 # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
266 # terminator and ignore any preceding CR.
267 # TODO(7.0): Remove this support for LF-only line endings.
268 line = line[: m.start()]
269 if not line:
270 # Empty line, or the final CRLF of a header block.
271 return
272 if line[0] in HTTP_WHITESPACE:
273 # continuation of a multi-line header
274 # TODO(7.0): Remove support for line folding.
275 if self._last_key is None:
276 raise HTTPInputError("first header line cannot start with whitespace")
277 new_part = " " + line.strip(HTTP_WHITESPACE)
278 if _chars_are_bytes:
279 if not _ABNF.field_value.fullmatch(new_part[1:]):
280 raise HTTPInputError("Invalid header continuation %r" % new_part)
281 else:
282 if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
283 raise HTTPInputError("Invalid header value %r" % new_part)
284 self._as_list[self._last_key][-1] += new_part
285 self._combined_cache.pop(self._last_key, None)
286 else:
287 try:
288 name, value = line.split(":", 1)
289 except ValueError:
290 raise HTTPInputError("no colon in header line")
291 self.add(
292 name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes
293 )
295 @classmethod
296 def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":
297 """Returns a dictionary from HTTP header text.
299 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
300 >>> sorted(h.items())
301 [('Content-Length', '42'), ('Content-Type', 'text/html')]
303 .. versionchanged:: 5.1
305 Raises `HTTPInputError` on malformed headers instead of a
306 mix of `KeyError`, and `ValueError`.
308 """
309 # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which
310 # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which
311 # they are to be interpreted as utf-8). For historical reasons, this method handled this by
312 # expecting both callers to decode the headers to strings before parsing them. This wasn't a
313 # problem until we started doing stricter validation of the characters allowed in HTTP
314 # headers (using ABNF rules defined in terms of byte values), which inadvertently started
315 # disallowing non-latin1 characters in multipart/form-data filenames.
316 #
317 # This method should have accepted bytes and a desired encoding, but this change is being
318 # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes
319 # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes
320 # smuggled through the latin1 encoding) or to accept any non-control unicode characters
321 # as required by multipart/form-data. This method will change to accept bytes in a future
322 # release.
323 h = cls()
325 start = 0
326 while True:
327 lf = headers.find("\n", start)
328 if lf == -1:
329 h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)
330 break
331 line = headers[start : lf + 1]
332 start = lf + 1
333 h.parse_line(line, _chars_are_bytes=_chars_are_bytes)
334 return h
336 # MutableMapping abstract method implementations.
338 def __setitem__(self, name: str, value: str) -> None:
339 norm_name = _normalize_header(name)
340 self._combined_cache[norm_name] = value
341 self._as_list[norm_name] = [value]
343 def __contains__(self, name: object) -> bool:
344 # This is an important optimization to avoid the expensive concatenation
345 # in __getitem__ when it's not needed.
346 if not isinstance(name, str):
347 return False
348 return name in self._as_list
350 def __getitem__(self, name: str) -> str:
351 header = _normalize_header(name)
352 if header not in self._combined_cache:
353 self._combined_cache[header] = ",".join(self._as_list[header])
354 return self._combined_cache[header]
356 def __delitem__(self, name: str) -> None:
357 norm_name = _normalize_header(name)
358 del self._combined_cache[norm_name]
359 del self._as_list[norm_name]
361 def __len__(self) -> int:
362 return len(self._as_list)
364 def __iter__(self) -> Iterator[typing.Any]:
365 return iter(self._as_list)
367 def copy(self) -> "HTTPHeaders":
368 # defined in dict but not in MutableMapping.
369 return HTTPHeaders(self)
371 # Use our overridden copy method for the copy.copy module.
372 # This makes shallow copies one level deeper, but preserves
373 # the appearance that HTTPHeaders is a single container.
374 __copy__ = copy
376 def __str__(self) -> str:
377 lines = []
378 for name, value in self.get_all():
379 lines.append(f"{name}: {value}\n")
380 return "".join(lines)
382 __unicode__ = __str__
385class HTTPServerRequest:
386 """A single HTTP request.
388 All attributes are type `str` unless otherwise noted.
390 .. attribute:: method
392 HTTP request method, e.g. "GET" or "POST"
394 .. attribute:: uri
396 The requested uri.
398 .. attribute:: path
400 The path portion of `uri`
402 .. attribute:: query
404 The query portion of `uri`
406 .. attribute:: version
408 HTTP version specified in request, e.g. "HTTP/1.1"
410 .. attribute:: headers
412 `.HTTPHeaders` dictionary-like object for request headers. Acts like
413 a case-insensitive dictionary with additional methods for repeated
414 headers.
416 .. attribute:: body
418 Request body, if present, as a byte string.
420 .. attribute:: remote_ip
422 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
423 will pass along the real IP address provided by a load balancer
424 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
426 .. versionchanged:: 3.1
427 The list format of ``X-Forwarded-For`` is now supported.
429 .. attribute:: protocol
431 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
432 is set, will pass along the protocol used by a load balancer if
433 reported via an ``X-Scheme`` header.
435 .. attribute:: host
437 The requested hostname, usually taken from the ``Host`` header.
439 .. attribute:: arguments
441 GET/POST arguments are available in the arguments property, which
442 maps arguments names to lists of values (to support multiple values
443 for individual names). Names are of type `str`, while arguments
444 are byte strings. Note that this is different from
445 `.RequestHandler.get_argument`, which returns argument values as
446 unicode strings.
448 .. attribute:: query_arguments
450 Same format as ``arguments``, but contains only arguments extracted
451 from the query string.
453 .. versionadded:: 3.2
455 .. attribute:: body_arguments
457 Same format as ``arguments``, but contains only arguments extracted
458 from the request body.
460 .. versionadded:: 3.2
462 .. attribute:: files
464 File uploads are available in the files property, which maps file
465 names to lists of `.HTTPFile`.
467 .. attribute:: connection
469 An HTTP request is attached to a single HTTP connection, which can
470 be accessed through the "connection" attribute. Since connections
471 are typically kept open in HTTP/1.1, multiple requests can be handled
472 sequentially on a single connection.
474 .. versionchanged:: 4.0
475 Moved from ``tornado.httpserver.HTTPRequest``.
477 .. deprecated:: 6.5.2
478 The ``host`` argument to the ``HTTPServerRequest`` constructor is deprecated. Use
479 ``headers["Host"]`` instead. This argument was mistakenly removed in Tornado 6.5.0 and
480 temporarily restored in 6.5.2.
481 """
483 path = None # type: str
484 query = None # type: str
486 # HACK: Used for stream_request_body
487 _body_future = None # type: Future[None]
489 def __init__(
490 self,
491 method: Optional[str] = None,
492 uri: Optional[str] = None,
493 version: str = "HTTP/1.0",
494 headers: Optional[HTTPHeaders] = None,
495 body: Optional[bytes] = None,
496 host: Optional[str] = None,
497 files: Optional[Dict[str, List["HTTPFile"]]] = None,
498 connection: Optional["HTTPConnection"] = None,
499 start_line: Optional["RequestStartLine"] = None,
500 server_connection: Optional[object] = None,
501 ) -> None:
502 if start_line is not None:
503 method, uri, version = start_line
504 self.method = method
505 self.uri = uri
506 self.version = version
507 self.headers = headers or HTTPHeaders()
508 self.body = body or b""
510 # set remote IP and protocol
511 context = getattr(connection, "context", None)
512 self.remote_ip = getattr(context, "remote_ip", None)
513 self.protocol = getattr(context, "protocol", "http")
515 try:
516 self.host = host or self.headers["Host"]
517 except KeyError:
518 if version == "HTTP/1.0":
519 # HTTP/1.0 does not require the Host header.
520 self.host = "127.0.0.1"
521 else:
522 raise HTTPInputError("Missing Host header")
523 if not _ABNF.host.fullmatch(self.host):
524 raise HTTPInputError("Invalid Host header: %r" % self.host)
525 if "," in self.host:
526 # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
527 # Server MUST respond with 400 Bad Request if multiple
528 # Host headers are present.
529 #
530 # We test for the presence of a comma instead of the number of
531 # headers received because a proxy may have converted
532 # multiple headers into a single comma-separated value
533 # (per RFC 9110 section 5.3).
534 #
535 # This is technically a departure from the RFC since the ABNF
536 # does not forbid commas in the host header. However, since
537 # commas are not allowed in DNS names, it is appropriate to
538 # disallow them. (The same argument could be made for other special
539 # characters, but commas are the most problematic since they could
540 # be used to exploit differences between proxies when multiple headers
541 # are supplied).
542 raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
543 self.host_name = split_host_and_port(self.host.lower())[0]
544 self.files = files or {}
545 self.connection = connection
546 self.server_connection = server_connection
547 self._start_time = time.time()
548 self._finish_time = None
550 if uri is not None:
551 self.path, sep, self.query = uri.partition("?")
552 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
553 self.query_arguments = copy.deepcopy(self.arguments)
554 self.body_arguments = {} # type: Dict[str, List[bytes]]
556 @property
557 def cookies(self) -> Dict[str, http.cookies.Morsel]:
558 """A dictionary of ``http.cookies.Morsel`` objects."""
559 if not hasattr(self, "_cookies"):
560 self._cookies = (
561 http.cookies.SimpleCookie()
562 ) # type: http.cookies.SimpleCookie
563 if "Cookie" in self.headers:
564 try:
565 parsed = parse_cookie(self.headers["Cookie"])
566 except Exception:
567 pass
568 else:
569 for k, v in parsed.items():
570 try:
571 self._cookies[k] = v
572 except Exception:
573 # SimpleCookie imposes some restrictions on keys;
574 # parse_cookie does not. Discard any cookies
575 # with disallowed keys.
576 pass
577 return self._cookies
579 def full_url(self) -> str:
580 """Reconstructs the full URL for this request."""
581 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]
583 def request_time(self) -> float:
584 """Returns the amount of time it took for this request to execute."""
585 if self._finish_time is None:
586 return time.time() - self._start_time
587 else:
588 return self._finish_time - self._start_time
590 def get_ssl_certificate(
591 self, binary_form: bool = False
592 ) -> Union[None, Dict, bytes]:
593 """Returns the client's SSL certificate, if any.
595 To use client certificates, the HTTPServer's
596 `ssl.SSLContext.verify_mode` field must be set, e.g.::
598 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
599 ssl_ctx.load_cert_chain("foo.crt", "foo.key")
600 ssl_ctx.load_verify_locations("cacerts.pem")
601 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
602 server = HTTPServer(app, ssl_options=ssl_ctx)
604 By default, the return value is a dictionary (or None, if no
605 client certificate is present). If ``binary_form`` is true, a
606 DER-encoded form of the certificate is returned instead. See
607 SSLSocket.getpeercert() in the standard library for more
608 details.
609 http://docs.python.org/library/ssl.html#sslsocket-objects
610 """
611 try:
612 if self.connection is None:
613 return None
614 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
615 return self.connection.stream.socket.getpeercert( # type: ignore
616 binary_form=binary_form
617 )
618 except SSLError:
619 return None
621 def _parse_body(self) -> None:
622 parse_body_arguments(
623 self.headers.get("Content-Type", ""),
624 self.body,
625 self.body_arguments,
626 self.files,
627 self.headers,
628 )
630 for k, v in self.body_arguments.items():
631 self.arguments.setdefault(k, []).extend(v)
633 def __repr__(self) -> str:
634 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
635 args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])
636 return f"{self.__class__.__name__}({args})"
639class HTTPInputError(Exception):
640 """Exception class for malformed HTTP requests or responses
641 from remote sources.
643 .. versionadded:: 4.0
644 """
646 pass
649class HTTPOutputError(Exception):
650 """Exception class for errors in HTTP output.
652 .. versionadded:: 4.0
653 """
655 pass
658class HTTPServerConnectionDelegate:
659 """Implement this interface to handle requests from `.HTTPServer`.
661 .. versionadded:: 4.0
662 """
664 def start_request(
665 self, server_conn: object, request_conn: "HTTPConnection"
666 ) -> "HTTPMessageDelegate":
667 """This method is called by the server when a new request has started.
669 :arg server_conn: is an opaque object representing the long-lived
670 (e.g. tcp-level) connection.
671 :arg request_conn: is a `.HTTPConnection` object for a single
672 request/response exchange.
674 This method should return a `.HTTPMessageDelegate`.
675 """
676 raise NotImplementedError()
678 def on_close(self, server_conn: object) -> None:
679 """This method is called when a connection has been closed.
681 :arg server_conn: is a server connection that has previously been
682 passed to ``start_request``.
683 """
684 pass
687class HTTPMessageDelegate:
688 """Implement this interface to handle an HTTP request or response.
690 .. versionadded:: 4.0
691 """
693 # TODO: genericize this class to avoid exposing the Union.
694 def headers_received(
695 self,
696 start_line: Union["RequestStartLine", "ResponseStartLine"],
697 headers: HTTPHeaders,
698 ) -> Optional[Awaitable[None]]:
699 """Called when the HTTP headers have been received and parsed.
701 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
702 depending on whether this is a client or server message.
703 :arg headers: a `.HTTPHeaders` instance.
705 Some `.HTTPConnection` methods can only be called during
706 ``headers_received``.
708 May return a `.Future`; if it does the body will not be read
709 until it is done.
710 """
711 pass
713 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
714 """Called when a chunk of data has been received.
716 May return a `.Future` for flow control.
717 """
718 pass
720 def finish(self) -> None:
721 """Called after the last chunk of data has been received."""
722 pass
724 def on_connection_close(self) -> None:
725 """Called if the connection is closed without finishing the request.
727 If ``headers_received`` is called, either ``finish`` or
728 ``on_connection_close`` will be called, but not both.
729 """
730 pass
733class HTTPConnection:
734 """Applications use this interface to write their responses.
736 .. versionadded:: 4.0
737 """
739 def write_headers(
740 self,
741 start_line: Union["RequestStartLine", "ResponseStartLine"],
742 headers: HTTPHeaders,
743 chunk: Optional[bytes] = None,
744 ) -> "Future[None]":
745 """Write an HTTP header block.
747 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
748 :arg headers: a `.HTTPHeaders` instance.
749 :arg chunk: the first (optional) chunk of data. This is an optimization
750 so that small responses can be written in the same call as their
751 headers.
753 The ``version`` field of ``start_line`` is ignored.
755 Returns a future for flow control.
757 .. versionchanged:: 6.0
759 The ``callback`` argument was removed.
760 """
761 raise NotImplementedError()
763 def write(self, chunk: bytes) -> "Future[None]":
764 """Writes a chunk of body data.
766 Returns a future for flow control.
768 .. versionchanged:: 6.0
770 The ``callback`` argument was removed.
771 """
772 raise NotImplementedError()
774 def finish(self) -> None:
775 """Indicates that the last body data has been written."""
776 raise NotImplementedError()
779def url_concat(
780 url: str,
781 args: Union[
782 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
783 ],
784) -> str:
785 """Concatenate url and arguments regardless of whether
786 url has existing query parameters.
788 ``args`` may be either a dictionary or a list of key-value pairs
789 (the latter allows for multiple values with the same key.
791 >>> url_concat("http://example.com/foo", dict(c="d"))
792 'http://example.com/foo?c=d'
793 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
794 'http://example.com/foo?a=b&c=d'
795 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
796 'http://example.com/foo?a=b&c=d&c=d2'
797 """
798 if args is None:
799 return url
800 parsed_url = urlparse(url)
801 if isinstance(args, dict):
802 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
803 parsed_query.extend(args.items())
804 elif isinstance(args, list) or isinstance(args, tuple):
805 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
806 parsed_query.extend(args)
807 else:
808 err = "'args' parameter should be dict, list or tuple. Not {0}".format(
809 type(args)
810 )
811 raise TypeError(err)
812 final_query = urlencode(parsed_query)
813 url = urlunparse(
814 (
815 parsed_url[0],
816 parsed_url[1],
817 parsed_url[2],
818 parsed_url[3],
819 final_query,
820 parsed_url[5],
821 )
822 )
823 return url
826class HTTPFile(ObjectDict):
827 """Represents a file uploaded via a form.
829 For backwards compatibility, its instance attributes are also
830 accessible as dictionary keys.
832 * ``filename``
833 * ``body``
834 * ``content_type``
835 """
837 filename: str
838 body: bytes
839 content_type: str
842def _parse_request_range(
843 range_header: str,
844) -> Optional[Tuple[Optional[int], Optional[int]]]:
845 """Parses a Range header.
847 Returns either ``None`` or tuple ``(start, end)``.
848 Note that while the HTTP headers use inclusive byte positions,
849 this method returns indexes suitable for use in slices.
851 >>> start, end = _parse_request_range("bytes=1-2")
852 >>> start, end
853 (1, 3)
854 >>> [0, 1, 2, 3, 4][start:end]
855 [1, 2]
856 >>> _parse_request_range("bytes=6-")
857 (6, None)
858 >>> _parse_request_range("bytes=-6")
859 (-6, None)
860 >>> _parse_request_range("bytes=-0")
861 (None, 0)
862 >>> _parse_request_range("bytes=")
863 (None, None)
864 >>> _parse_request_range("foo=42")
865 >>> _parse_request_range("bytes=1-2,6-10")
867 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
869 See [0] for the details of the range header.
871 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
872 """
873 unit, _, value = range_header.partition("=")
874 unit, value = unit.strip(), value.strip()
875 if unit != "bytes":
876 return None
877 start_b, _, end_b = value.partition("-")
878 try:
879 start = _int_or_none(start_b)
880 end = _int_or_none(end_b)
881 except ValueError:
882 return None
883 if end is not None:
884 if start is None:
885 if end != 0:
886 start = -end
887 end = None
888 else:
889 end += 1
890 return (start, end)
893def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
894 """Returns a suitable Content-Range header:
896 >>> print(_get_content_range(None, 1, 4))
897 bytes 0-0/4
898 >>> print(_get_content_range(1, 3, 4))
899 bytes 1-2/4
900 >>> print(_get_content_range(None, None, 4))
901 bytes 0-3/4
902 """
903 start = start or 0
904 end = (end or total) - 1
905 return f"bytes {start}-{end}/{total}"
908def _int_or_none(val: str) -> Optional[int]:
909 val = val.strip()
910 if val == "":
911 return None
912 return int(val)
915def parse_body_arguments(
916 content_type: str,
917 body: bytes,
918 arguments: Dict[str, List[bytes]],
919 files: Dict[str, List[HTTPFile]],
920 headers: Optional[HTTPHeaders] = None,
921) -> None:
922 """Parses a form request body.
924 Supports ``application/x-www-form-urlencoded`` and
925 ``multipart/form-data``. The ``content_type`` parameter should be
926 a string and ``body`` should be a byte string. The ``arguments``
927 and ``files`` parameters are dictionaries that will be updated
928 with the parsed contents.
929 """
930 if content_type.startswith("application/x-www-form-urlencoded"):
931 if headers and "Content-Encoding" in headers:
932 raise HTTPInputError(
933 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
934 )
935 try:
936 # real charset decoding will happen in RequestHandler.decode_argument()
937 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
938 except Exception as e:
939 raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
940 for name, values in uri_arguments.items():
941 if values:
942 arguments.setdefault(name, []).extend(values)
943 elif content_type.startswith("multipart/form-data"):
944 if headers and "Content-Encoding" in headers:
945 raise HTTPInputError(
946 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
947 )
948 try:
949 fields = content_type.split(";")
950 for field in fields:
951 k, sep, v = field.strip().partition("=")
952 if k == "boundary" and v:
953 parse_multipart_form_data(utf8(v), body, arguments, files)
954 break
955 else:
956 raise HTTPInputError("multipart boundary not found")
957 except Exception as e:
958 raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
961def parse_multipart_form_data(
962 boundary: bytes,
963 data: bytes,
964 arguments: Dict[str, List[bytes]],
965 files: Dict[str, List[HTTPFile]],
966) -> None:
967 """Parses a ``multipart/form-data`` body.
969 The ``boundary`` and ``data`` parameters are both byte strings.
970 The dictionaries given in the arguments and files parameters
971 will be updated with the contents of the body.
973 .. versionchanged:: 5.1
975 Now recognizes non-ASCII filenames in RFC 2231/5987
976 (``filename*=``) format.
977 """
978 # The standard allows for the boundary to be quoted in the header,
979 # although it's rare (it happens at least for google app engine
980 # xmpp). I think we're also supposed to handle backslash-escapes
981 # here but I'll save that until we see a client that uses them
982 # in the wild.
983 if boundary.startswith(b'"') and boundary.endswith(b'"'):
984 boundary = boundary[1:-1]
985 final_boundary_index = data.rfind(b"--" + boundary + b"--")
986 if final_boundary_index == -1:
987 raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
988 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
989 for part in parts:
990 if not part:
991 continue
992 eoh = part.find(b"\r\n\r\n")
993 if eoh == -1:
994 raise HTTPInputError("multipart/form-data missing headers")
995 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)
996 disp_header = headers.get("Content-Disposition", "")
997 disposition, disp_params = _parse_header(disp_header)
998 if disposition != "form-data" or not part.endswith(b"\r\n"):
999 raise HTTPInputError("Invalid multipart/form-data")
1000 value = part[eoh + 4 : -2]
1001 if not disp_params.get("name"):
1002 raise HTTPInputError("multipart/form-data missing name")
1003 name = disp_params["name"]
1004 if disp_params.get("filename"):
1005 ctype = headers.get("Content-Type", "application/unknown")
1006 files.setdefault(name, []).append(
1007 HTTPFile(
1008 filename=disp_params["filename"], body=value, content_type=ctype
1009 )
1010 )
1011 else:
1012 arguments.setdefault(name, []).append(value)
1015def format_timestamp(
1016 ts: Union[int, float, tuple, time.struct_time, datetime.datetime],
1017) -> str:
1018 """Formats a timestamp in the format used by HTTP.
1020 The argument may be a numeric timestamp as returned by `time.time`,
1021 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
1022 object. Naive `datetime.datetime` objects are assumed to represent
1023 UTC; aware objects are converted to UTC before formatting.
1025 >>> format_timestamp(1359312200)
1026 'Sun, 27 Jan 2013 18:43:20 GMT'
1027 """
1028 if isinstance(ts, (int, float)):
1029 time_num = ts
1030 elif isinstance(ts, (tuple, time.struct_time)):
1031 time_num = calendar.timegm(ts)
1032 elif isinstance(ts, datetime.datetime):
1033 time_num = calendar.timegm(ts.utctimetuple())
1034 else:
1035 raise TypeError("unknown timestamp type: %r" % ts)
1036 return email.utils.formatdate(time_num, usegmt=True)
1039class RequestStartLine(typing.NamedTuple):
1040 method: str
1041 path: str
1042 version: str
1045def parse_request_start_line(line: str) -> RequestStartLine:
1046 """Returns a (method, path, version) tuple for an HTTP 1.x request line.
1048 The response is a `typing.NamedTuple`.
1050 >>> parse_request_start_line("GET /foo HTTP/1.1")
1051 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
1052 """
1053 match = _ABNF.request_line.fullmatch(line)
1054 if not match:
1055 # https://tools.ietf.org/html/rfc7230#section-3.1.1
1056 # invalid request-line SHOULD respond with a 400 (Bad Request)
1057 raise HTTPInputError("Malformed HTTP request line")
1058 r = RequestStartLine(match.group(1), match.group(2), match.group(3))
1059 if not r.version.startswith("HTTP/1"):
1060 # HTTP/2 and above doesn't use parse_request_start_line.
1061 # This could be folded into the regex but we don't want to deviate
1062 # from the ABNF in the RFCs.
1063 raise HTTPInputError("Unexpected HTTP version %r" % r.version)
1064 return r
1067class ResponseStartLine(typing.NamedTuple):
1068 version: str
1069 code: int
1070 reason: str
1073def parse_response_start_line(line: str) -> ResponseStartLine:
1074 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
1076 The response is a `typing.NamedTuple`.
1078 >>> parse_response_start_line("HTTP/1.1 200 OK")
1079 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
1080 """
1081 match = _ABNF.status_line.fullmatch(line)
1082 if not match:
1083 raise HTTPInputError("Error parsing response start line")
1084 r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
1085 if not r.version.startswith("HTTP/1"):
1086 # HTTP/2 and above doesn't use parse_response_start_line.
1087 raise HTTPInputError("Unexpected HTTP version %r" % r.version)
1088 return r
1091# _parseparam and _parse_header are copied and modified from python2.7's cgi.py
1092# The original 2.7 version of this code did not correctly support some
1093# combinations of semicolons and double quotes.
1094# It has also been modified to support valueless parameters as seen in
1095# websocket extension negotiations, and to support non-ascii values in
1096# RFC 2231/5987 format.
1097#
1098# _parseparam has been further modified with the logic from
1099# https://github.com/python/cpython/pull/136072/files
1100# to avoid quadratic behavior when parsing semicolons in quoted strings.
1101#
1102# TODO: See if we can switch to email.message.Message for this functionality.
1103# This is the suggested replacement for the cgi.py module now that cgi has
1104# been removed from recent versions of Python. We need to verify that
1105# the email module is consistent with our existing behavior (and all relevant
1106# RFCs for multipart/form-data) before making this change.
1109def _parseparam(s: str) -> Generator[str, None, None]:
1110 start = 0
1111 while s.find(";", start) == start:
1112 start += 1
1113 end = s.find(";", start)
1114 ind, diff = start, 0
1115 while end > 0:
1116 diff += s.count('"', ind, end) - s.count('\\"', ind, end)
1117 if diff % 2 == 0:
1118 break
1119 end, ind = ind, s.find(";", end + 1)
1120 if end < 0:
1121 end = len(s)
1122 f = s[start:end]
1123 yield f.strip()
1124 start = end
1127def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
1128 r"""Parse a Content-type like header.
1130 Return the main content-type and a dictionary of options.
1132 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
1133 >>> ct, d = _parse_header(d)
1134 >>> ct
1135 'form-data'
1136 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
1137 True
1138 >>> d['foo']
1139 'b\\a"r'
1140 """
1141 parts = _parseparam(";" + line)
1142 key = next(parts)
1143 # decode_params treats first argument special, but we already stripped key
1144 params = [("Dummy", "value")]
1145 for p in parts:
1146 i = p.find("=")
1147 if i >= 0:
1148 name = p[:i].strip().lower()
1149 value = p[i + 1 :].strip()
1150 params.append((name, native_str(value)))
1151 decoded_params = email.utils.decode_params(params)
1152 decoded_params.pop(0) # get rid of the dummy again
1153 pdict = {}
1154 for name, decoded_value in decoded_params:
1155 value = email.utils.collapse_rfc2231_value(decoded_value)
1156 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
1157 value = value[1:-1]
1158 pdict[name] = value
1159 return key, pdict
1162def _encode_header(key: str, pdict: Dict[str, str]) -> str:
1163 """Inverse of _parse_header.
1165 >>> _encode_header('permessage-deflate',
1166 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
1167 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
1168 """
1169 if not pdict:
1170 return key
1171 out = [key]
1172 # Sort the parameters just to make it easy to test.
1173 for k, v in sorted(pdict.items()):
1174 if v is None:
1175 out.append(k)
1176 else:
1177 # TODO: quote if necessary.
1178 out.append(f"{k}={v}")
1179 return "; ".join(out)
1182def encode_username_password(
1183 username: Union[str, bytes], password: Union[str, bytes]
1184) -> bytes:
1185 """Encodes a username/password pair in the format used by HTTP auth.
1187 The return value is a byte string in the form ``username:password``.
1189 .. versionadded:: 5.1
1190 """
1191 if isinstance(username, unicode_type):
1192 username = unicodedata.normalize("NFC", username)
1193 if isinstance(password, unicode_type):
1194 password = unicodedata.normalize("NFC", password)
1195 return utf8(username) + b":" + utf8(password)
1198def doctests():
1199 # type: () -> unittest.TestSuite
1200 import doctest
1202 return doctest.DocTestSuite()
1205_netloc_re = re.compile(r"^(.+):(\d+)$")
1208def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
1209 """Returns ``(host, port)`` tuple from ``netloc``.
1211 Returned ``port`` will be ``None`` if not present.
1213 .. versionadded:: 4.1
1214 """
1215 match = _netloc_re.match(netloc)
1216 if match:
1217 host = match.group(1)
1218 port = int(match.group(2)) # type: Optional[int]
1219 else:
1220 host = netloc
1221 port = None
1222 return (host, port)
1225def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
1226 """Generator converting a result of ``parse_qs`` back to name-value pairs.
1228 .. versionadded:: 5.0
1229 """
1230 for k, vs in qs.items():
1231 for v in vs:
1232 yield (k, v)
1235_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
1238def _unquote_replace(m: re.Match) -> str:
1239 if m[1]:
1240 return chr(int(m[1], 8))
1241 else:
1242 return m[2]
1245def _unquote_cookie(s: str) -> str:
1246 """Handle double quotes and escaping in cookie values.
1248 This method is copied verbatim from the Python 3.13 standard
1249 library (http.cookies._unquote) so we don't have to depend on
1250 non-public interfaces.
1251 """
1252 # If there aren't any doublequotes,
1253 # then there can't be any special characters. See RFC 2109.
1254 if s is None or len(s) < 2:
1255 return s
1256 if s[0] != '"' or s[-1] != '"':
1257 return s
1259 # We have to assume that we must decode this string.
1260 # Down to work.
1262 # Remove the "s
1263 s = s[1:-1]
1265 # Check for special sequences. Examples:
1266 # \012 --> \n
1267 # \" --> "
1268 #
1269 return _unquote_sub(_unquote_replace, s)
1272def parse_cookie(cookie: str) -> Dict[str, str]:
1273 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
1275 This function attempts to mimic browser cookie parsing behavior;
1276 it specifically does not follow any of the cookie-related RFCs
1277 (because browsers don't either).
1279 The algorithm used is identical to that used by Django version 1.9.10.
1281 .. versionadded:: 4.4.2
1282 """
1283 cookiedict = {}
1284 for chunk in cookie.split(";"):
1285 if "=" in chunk:
1286 key, val = chunk.split("=", 1)
1287 else:
1288 # Assume an empty name per
1289 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
1290 key, val = "", chunk
1291 key, val = key.strip(), val.strip()
1292 if key or val:
1293 # unquote using Python's algorithm.
1294 cookiedict[key] = _unquote_cookie(val)
1295 return cookiedict