1#
2# Copyright 2009 Facebook
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain
6# a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
15
16"""HTTP utility code shared by clients and servers.
17
18This module also defines the `HTTPServerRequest` class which is exposed
19via `tornado.web.RequestHandler.request`.
20"""
21
22import calendar
23import collections.abc
24import copy
25import datetime
26import email.utils
27from functools import lru_cache
28from http.client import responses
29import http.cookies
30import re
31from ssl import SSLError
32import time
33import unicodedata
34from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
35
36from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
37from tornado.util import ObjectDict, unicode_type
38
39
40# responses is unused in this file, but we re-export it to other files.
41# Reference it so pyflakes doesn't complain.
42responses
43
44import typing
45from typing import (
46 Tuple,
47 Iterable,
48 List,
49 Mapping,
50 Iterator,
51 Dict,
52 Union,
53 Optional,
54 Awaitable,
55 Generator,
56 AnyStr,
57)
58
59if typing.TYPE_CHECKING:
60 from typing import Deque # noqa: F401
61 from asyncio import Future # noqa: F401
62 import unittest # noqa: F401
63
64 # This can be done unconditionally in the base class of HTTPHeaders
65 # after we drop support for Python 3.8.
66 StrMutableMapping = collections.abc.MutableMapping[str, str]
67else:
68 StrMutableMapping = collections.abc.MutableMapping
69
70# To be used with str.strip() and related methods.
71HTTP_WHITESPACE = " \t"
72
73# Roughly the inverse of RequestHandler._VALID_HEADER_CHARS, but permits
74# chars greater than \xFF (which may appear after decoding utf8).
75_FORBIDDEN_HEADER_CHARS_RE = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]")
76
77
78class _ABNF:
79 """Class that holds a subset of ABNF rules from RFC 9110 and friends.
80
81 Class attributes are re.Pattern objects, with the same name as in the RFC
82 (with hyphens changed to underscores). Currently contains only the subset
83 we use (which is why this class is not public). Unfortunately the fields
84 cannot be alphabetized as they are in the RFCs because of dependencies.
85 """
86
87 # RFC 3986 (URI)
88 # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
89 # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
90 # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
91 # position, not only for their use in IPv6 literals.
92 uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
93 uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
94 uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
95 uri_host = re.compile(
96 rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
97 )
98 uri_port = re.compile(r"[0-9]*")
99
100 # RFC 5234 (ABNF)
101 VCHAR = re.compile(r"[\x21-\x7E]")
102
103 # RFC 9110 (HTTP Semantics)
104 obs_text = re.compile(r"[\x80-\xFF]")
105 field_vchar = re.compile(rf"(?:{VCHAR.pattern}|{obs_text.pattern})")
106 # Not exactly from the RFC to simplify and combine field-content and field-value.
107 field_value = re.compile(
108 rf"|"
109 rf"{field_vchar.pattern}|"
110 rf"{field_vchar.pattern}(?:{field_vchar.pattern}| |\t)*{field_vchar.pattern}"
111 )
112 tchar = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]")
113 token = re.compile(rf"{tchar.pattern}+")
114 field_name = token
115 method = token
116 host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
117
118 # RFC 9112 (HTTP/1.1)
119 HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
120 reason_phrase = re.compile(rf"(?:[\t ]|{VCHAR.pattern}|{obs_text.pattern})+")
121 # request_target delegates to the URI RFC 3986, which is complex and may be
122 # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
123 # characters). Instead, we allow everything but control chars and whitespace.
124 request_target = re.compile(rf"{field_vchar.pattern}+")
125 request_line = re.compile(
126 rf"({method.pattern}) ({request_target.pattern}) ({HTTP_version.pattern})"
127 )
128 status_code = re.compile(r"[0-9]{3}")
129 status_line = re.compile(
130 rf"({HTTP_version.pattern}) ({status_code.pattern}) ({reason_phrase.pattern})?"
131 )
132
133
134@lru_cache(1000)
135def _normalize_header(name: str) -> str:
136 """Map a header name to Http-Header-Case.
137
138 >>> _normalize_header("coNtent-TYPE")
139 'Content-Type'
140 """
141 return "-".join([w.capitalize() for w in name.split("-")])
142
143
144class HTTPHeaders(StrMutableMapping):
145 """A dictionary that maintains ``Http-Header-Case`` for all keys.
146
147 Supports multiple values per key via a pair of new methods,
148 `add()` and `get_list()`. The regular dictionary interface
149 returns a single value per key, with multiple values joined by a
150 comma.
151
152 >>> h = HTTPHeaders({"content-type": "text/html"})
153 >>> list(h.keys())
154 ['Content-Type']
155 >>> h["Content-Type"]
156 'text/html'
157
158 >>> h.add("Set-Cookie", "A=B")
159 >>> h.add("Set-Cookie", "C=D")
160 >>> h["set-cookie"]
161 'A=B,C=D'
162 >>> h.get_list("set-cookie")
163 ['A=B', 'C=D']
164
165 >>> for (k,v) in sorted(h.get_all()):
166 ... print('%s: %s' % (k,v))
167 ...
168 Content-Type: text/html
169 Set-Cookie: A=B
170 Set-Cookie: C=D
171 """
172
173 @typing.overload
174 def __init__(self, __arg: Mapping[str, List[str]]) -> None:
175 pass
176
177 @typing.overload # noqa: F811
178 def __init__(self, __arg: Mapping[str, str]) -> None:
179 pass
180
181 @typing.overload # noqa: F811
182 def __init__(self, *args: Tuple[str, str]) -> None:
183 pass
184
185 @typing.overload # noqa: F811
186 def __init__(self, **kwargs: str) -> None:
187 pass
188
189 def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
190 self._dict = {} # type: typing.Dict[str, str]
191 self._as_list = {} # type: typing.Dict[str, typing.List[str]]
192 self._last_key = None # type: Optional[str]
193 if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
194 # Copy constructor
195 for k, v in args[0].get_all():
196 self.add(k, v)
197 else:
198 # Dict-style initialization
199 self.update(*args, **kwargs)
200
201 # new public methods
202
203 def add(self, name: str, value: str, *, _chars_are_bytes: bool = True) -> None:
204 """Adds a new value for the given key."""
205 if not _ABNF.field_name.fullmatch(name):
206 raise HTTPInputError("Invalid header name %r" % name)
207 if _chars_are_bytes:
208 if not _ABNF.field_value.fullmatch(to_unicode(value)):
209 # TODO: the fact we still support bytes here (contrary to type annotations)
210 # and still test for it should probably be changed.
211 raise HTTPInputError("Invalid header value %r" % value)
212 else:
213 if _FORBIDDEN_HEADER_CHARS_RE.search(value):
214 raise HTTPInputError("Invalid header value %r" % value)
215 norm_name = _normalize_header(name)
216 self._last_key = norm_name
217 if norm_name in self:
218 self._dict[norm_name] = (
219 native_str(self[norm_name]) + "," + native_str(value)
220 )
221 self._as_list[norm_name].append(value)
222 else:
223 self[norm_name] = value
224
225 def get_list(self, name: str) -> List[str]:
226 """Returns all values for the given header as a list."""
227 norm_name = _normalize_header(name)
228 return self._as_list.get(norm_name, [])
229
230 def get_all(self) -> Iterable[Tuple[str, str]]:
231 """Returns an iterable of all (name, value) pairs.
232
233 If a header has multiple values, multiple pairs will be
234 returned with the same name.
235 """
236 for name, values in self._as_list.items():
237 for value in values:
238 yield (name, value)
239
240 def parse_line(self, line: str, *, _chars_are_bytes: bool = True) -> None:
241 r"""Updates the dictionary with a single header line.
242
243 >>> h = HTTPHeaders()
244 >>> h.parse_line("Content-Type: text/html")
245 >>> h.get('content-type')
246 'text/html'
247 >>> h.parse_line("Content-Length: 42\r\n")
248 >>> h.get('content-type')
249 'text/html'
250
251 .. versionchanged:: 6.5
252 Now supports lines with or without the trailing CRLF, making it possible
253 to pass lines from AsyncHTTPClient's header_callback directly to this method.
254
255 .. deprecated:: 6.5
256 In Tornado 7.0, certain deprecated features of HTTP will become errors.
257 Specifically, line folding and the use of LF (with CR) as a line separator
258 will be removed.
259 """
260 if m := re.search(r"\r?\n$", line):
261 # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line
262 # terminator and ignore any preceding CR.
263 # TODO(7.0): Remove this support for LF-only line endings.
264 line = line[: m.start()]
265 if not line:
266 # Empty line, or the final CRLF of a header block.
267 return
268 if line[0] in HTTP_WHITESPACE:
269 # continuation of a multi-line header
270 # TODO(7.0): Remove support for line folding.
271 if self._last_key is None:
272 raise HTTPInputError("first header line cannot start with whitespace")
273 new_part = " " + line.strip(HTTP_WHITESPACE)
274 if _chars_are_bytes:
275 if not _ABNF.field_value.fullmatch(new_part[1:]):
276 raise HTTPInputError("Invalid header continuation %r" % new_part)
277 else:
278 if _FORBIDDEN_HEADER_CHARS_RE.search(new_part):
279 raise HTTPInputError("Invalid header value %r" % new_part)
280 self._as_list[self._last_key][-1] += new_part
281 self._dict[self._last_key] += new_part
282 else:
283 try:
284 name, value = line.split(":", 1)
285 except ValueError:
286 raise HTTPInputError("no colon in header line")
287 self.add(
288 name, value.strip(HTTP_WHITESPACE), _chars_are_bytes=_chars_are_bytes
289 )
290
291 @classmethod
292 def parse(cls, headers: str, *, _chars_are_bytes: bool = True) -> "HTTPHeaders":
293 """Returns a dictionary from HTTP header text.
294
295 >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
296 >>> sorted(h.items())
297 [('Content-Length', '42'), ('Content-Type', 'text/html')]
298
299 .. versionchanged:: 5.1
300
301 Raises `HTTPInputError` on malformed headers instead of a
302 mix of `KeyError`, and `ValueError`.
303
304 """
305 # _chars_are_bytes is a hack. This method is used in two places, HTTP headers (in which
306 # non-ascii characters are to be interpreted as latin-1) and multipart/form-data (in which
307 # they are to be interpreted as utf-8). For historical reasons, this method handled this by
308 # expecting both callers to decode the headers to strings before parsing them. This wasn't a
309 # problem until we started doing stricter validation of the characters allowed in HTTP
310 # headers (using ABNF rules defined in terms of byte values), which inadvertently started
311 # disallowing non-latin1 characters in multipart/form-data filenames.
312 #
313 # This method should have accepted bytes and a desired encoding, but this change is being
314 # introduced in a patch release that shouldn't change the API. Instead, the _chars_are_bytes
315 # flag decides whether to use HTTP-style ABNF validation (treating the string as bytes
316 # smuggled through the latin1 encoding) or to accept any non-control unicode characters
317 # as required by multipart/form-data. This method will change to accept bytes in a future
318 # release.
319 h = cls()
320
321 start = 0
322 while True:
323 lf = headers.find("\n", start)
324 if lf == -1:
325 h.parse_line(headers[start:], _chars_are_bytes=_chars_are_bytes)
326 break
327 line = headers[start : lf + 1]
328 start = lf + 1
329 h.parse_line(line, _chars_are_bytes=_chars_are_bytes)
330 return h
331
332 # MutableMapping abstract method implementations.
333
334 def __setitem__(self, name: str, value: str) -> None:
335 norm_name = _normalize_header(name)
336 self._dict[norm_name] = value
337 self._as_list[norm_name] = [value]
338
339 def __getitem__(self, name: str) -> str:
340 return self._dict[_normalize_header(name)]
341
342 def __delitem__(self, name: str) -> None:
343 norm_name = _normalize_header(name)
344 del self._dict[norm_name]
345 del self._as_list[norm_name]
346
347 def __len__(self) -> int:
348 return len(self._dict)
349
350 def __iter__(self) -> Iterator[typing.Any]:
351 return iter(self._dict)
352
353 def copy(self) -> "HTTPHeaders":
354 # defined in dict but not in MutableMapping.
355 return HTTPHeaders(self)
356
357 # Use our overridden copy method for the copy.copy module.
358 # This makes shallow copies one level deeper, but preserves
359 # the appearance that HTTPHeaders is a single container.
360 __copy__ = copy
361
362 def __str__(self) -> str:
363 lines = []
364 for name, value in self.get_all():
365 lines.append(f"{name}: {value}\n")
366 return "".join(lines)
367
368 __unicode__ = __str__
369
370
371class HTTPServerRequest:
372 """A single HTTP request.
373
374 All attributes are type `str` unless otherwise noted.
375
376 .. attribute:: method
377
378 HTTP request method, e.g. "GET" or "POST"
379
380 .. attribute:: uri
381
382 The requested uri.
383
384 .. attribute:: path
385
386 The path portion of `uri`
387
388 .. attribute:: query
389
390 The query portion of `uri`
391
392 .. attribute:: version
393
394 HTTP version specified in request, e.g. "HTTP/1.1"
395
396 .. attribute:: headers
397
398 `.HTTPHeaders` dictionary-like object for request headers. Acts like
399 a case-insensitive dictionary with additional methods for repeated
400 headers.
401
402 .. attribute:: body
403
404 Request body, if present, as a byte string.
405
406 .. attribute:: remote_ip
407
408 Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
409 will pass along the real IP address provided by a load balancer
410 in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
411
412 .. versionchanged:: 3.1
413 The list format of ``X-Forwarded-For`` is now supported.
414
415 .. attribute:: protocol
416
417 The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
418 is set, will pass along the protocol used by a load balancer if
419 reported via an ``X-Scheme`` header.
420
421 .. attribute:: host
422
423 The requested hostname, usually taken from the ``Host`` header.
424
425 .. attribute:: arguments
426
427 GET/POST arguments are available in the arguments property, which
428 maps arguments names to lists of values (to support multiple values
429 for individual names). Names are of type `str`, while arguments
430 are byte strings. Note that this is different from
431 `.RequestHandler.get_argument`, which returns argument values as
432 unicode strings.
433
434 .. attribute:: query_arguments
435
436 Same format as ``arguments``, but contains only arguments extracted
437 from the query string.
438
439 .. versionadded:: 3.2
440
441 .. attribute:: body_arguments
442
443 Same format as ``arguments``, but contains only arguments extracted
444 from the request body.
445
446 .. versionadded:: 3.2
447
448 .. attribute:: files
449
450 File uploads are available in the files property, which maps file
451 names to lists of `.HTTPFile`.
452
453 .. attribute:: connection
454
455 An HTTP request is attached to a single HTTP connection, which can
456 be accessed through the "connection" attribute. Since connections
457 are typically kept open in HTTP/1.1, multiple requests can be handled
458 sequentially on a single connection.
459
460 .. versionchanged:: 4.0
461 Moved from ``tornado.httpserver.HTTPRequest``.
462 """
463
464 path = None # type: str
465 query = None # type: str
466
467 # HACK: Used for stream_request_body
468 _body_future = None # type: Future[None]
469
470 def __init__(
471 self,
472 method: Optional[str] = None,
473 uri: Optional[str] = None,
474 version: str = "HTTP/1.0",
475 headers: Optional[HTTPHeaders] = None,
476 body: Optional[bytes] = None,
477 # host: Optional[str] = None,
478 files: Optional[Dict[str, List["HTTPFile"]]] = None,
479 connection: Optional["HTTPConnection"] = None,
480 start_line: Optional["RequestStartLine"] = None,
481 server_connection: Optional[object] = None,
482 ) -> None:
483 if start_line is not None:
484 method, uri, version = start_line
485 self.method = method
486 self.uri = uri
487 self.version = version
488 self.headers = headers or HTTPHeaders()
489 self.body = body or b""
490
491 # set remote IP and protocol
492 context = getattr(connection, "context", None)
493 self.remote_ip = getattr(context, "remote_ip", None)
494 self.protocol = getattr(context, "protocol", "http")
495
496 try:
497 self.host = self.headers["Host"]
498 except KeyError:
499 if version == "HTTP/1.0":
500 # HTTP/1.0 does not require the Host header.
501 self.host = "127.0.0.1"
502 else:
503 raise HTTPInputError("Missing Host header")
504 if not _ABNF.host.fullmatch(self.host):
505 print(_ABNF.host.pattern)
506 raise HTTPInputError("Invalid Host header: %r" % self.host)
507 if "," in self.host:
508 # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
509 # Server MUST respond with 400 Bad Request if multiple
510 # Host headers are present.
511 #
512 # We test for the presence of a comma instead of the number of
513 # headers received because a proxy may have converted
514 # multiple headers into a single comma-separated value
515 # (per RFC 9110 section 5.3).
516 #
517 # This is technically a departure from the RFC since the ABNF
518 # does not forbid commas in the host header. However, since
519 # commas are not allowed in DNS names, it is appropriate to
520 # disallow them. (The same argument could be made for other special
521 # characters, but commas are the most problematic since they could
522 # be used to exploit differences between proxies when multiple headers
523 # are supplied).
524 raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
525 self.host_name = split_host_and_port(self.host.lower())[0]
526 self.files = files or {}
527 self.connection = connection
528 self.server_connection = server_connection
529 self._start_time = time.time()
530 self._finish_time = None
531
532 if uri is not None:
533 self.path, sep, self.query = uri.partition("?")
534 self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
535 self.query_arguments = copy.deepcopy(self.arguments)
536 self.body_arguments = {} # type: Dict[str, List[bytes]]
537
538 @property
539 def cookies(self) -> Dict[str, http.cookies.Morsel]:
540 """A dictionary of ``http.cookies.Morsel`` objects."""
541 if not hasattr(self, "_cookies"):
542 self._cookies = (
543 http.cookies.SimpleCookie()
544 ) # type: http.cookies.SimpleCookie
545 if "Cookie" in self.headers:
546 try:
547 parsed = parse_cookie(self.headers["Cookie"])
548 except Exception:
549 pass
550 else:
551 for k, v in parsed.items():
552 try:
553 self._cookies[k] = v
554 except Exception:
555 # SimpleCookie imposes some restrictions on keys;
556 # parse_cookie does not. Discard any cookies
557 # with disallowed keys.
558 pass
559 return self._cookies
560
561 def full_url(self) -> str:
562 """Reconstructs the full URL for this request."""
563 return self.protocol + "://" + self.host + self.uri # type: ignore[operator]
564
565 def request_time(self) -> float:
566 """Returns the amount of time it took for this request to execute."""
567 if self._finish_time is None:
568 return time.time() - self._start_time
569 else:
570 return self._finish_time - self._start_time
571
572 def get_ssl_certificate(
573 self, binary_form: bool = False
574 ) -> Union[None, Dict, bytes]:
575 """Returns the client's SSL certificate, if any.
576
577 To use client certificates, the HTTPServer's
578 `ssl.SSLContext.verify_mode` field must be set, e.g.::
579
580 ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
581 ssl_ctx.load_cert_chain("foo.crt", "foo.key")
582 ssl_ctx.load_verify_locations("cacerts.pem")
583 ssl_ctx.verify_mode = ssl.CERT_REQUIRED
584 server = HTTPServer(app, ssl_options=ssl_ctx)
585
586 By default, the return value is a dictionary (or None, if no
587 client certificate is present). If ``binary_form`` is true, a
588 DER-encoded form of the certificate is returned instead. See
589 SSLSocket.getpeercert() in the standard library for more
590 details.
591 http://docs.python.org/library/ssl.html#sslsocket-objects
592 """
593 try:
594 if self.connection is None:
595 return None
596 # TODO: add a method to HTTPConnection for this so it can work with HTTP/2
597 return self.connection.stream.socket.getpeercert( # type: ignore
598 binary_form=binary_form
599 )
600 except SSLError:
601 return None
602
603 def _parse_body(self) -> None:
604 parse_body_arguments(
605 self.headers.get("Content-Type", ""),
606 self.body,
607 self.body_arguments,
608 self.files,
609 self.headers,
610 )
611
612 for k, v in self.body_arguments.items():
613 self.arguments.setdefault(k, []).extend(v)
614
615 def __repr__(self) -> str:
616 attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
617 args = ", ".join([f"{n}={getattr(self, n)!r}" for n in attrs])
618 return f"{self.__class__.__name__}({args})"
619
620
621class HTTPInputError(Exception):
622 """Exception class for malformed HTTP requests or responses
623 from remote sources.
624
625 .. versionadded:: 4.0
626 """
627
628 pass
629
630
631class HTTPOutputError(Exception):
632 """Exception class for errors in HTTP output.
633
634 .. versionadded:: 4.0
635 """
636
637 pass
638
639
640class HTTPServerConnectionDelegate:
641 """Implement this interface to handle requests from `.HTTPServer`.
642
643 .. versionadded:: 4.0
644 """
645
646 def start_request(
647 self, server_conn: object, request_conn: "HTTPConnection"
648 ) -> "HTTPMessageDelegate":
649 """This method is called by the server when a new request has started.
650
651 :arg server_conn: is an opaque object representing the long-lived
652 (e.g. tcp-level) connection.
653 :arg request_conn: is a `.HTTPConnection` object for a single
654 request/response exchange.
655
656 This method should return a `.HTTPMessageDelegate`.
657 """
658 raise NotImplementedError()
659
660 def on_close(self, server_conn: object) -> None:
661 """This method is called when a connection has been closed.
662
663 :arg server_conn: is a server connection that has previously been
664 passed to ``start_request``.
665 """
666 pass
667
668
669class HTTPMessageDelegate:
670 """Implement this interface to handle an HTTP request or response.
671
672 .. versionadded:: 4.0
673 """
674
675 # TODO: genericize this class to avoid exposing the Union.
676 def headers_received(
677 self,
678 start_line: Union["RequestStartLine", "ResponseStartLine"],
679 headers: HTTPHeaders,
680 ) -> Optional[Awaitable[None]]:
681 """Called when the HTTP headers have been received and parsed.
682
683 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
684 depending on whether this is a client or server message.
685 :arg headers: a `.HTTPHeaders` instance.
686
687 Some `.HTTPConnection` methods can only be called during
688 ``headers_received``.
689
690 May return a `.Future`; if it does the body will not be read
691 until it is done.
692 """
693 pass
694
695 def data_received(self, chunk: bytes) -> Optional[Awaitable[None]]:
696 """Called when a chunk of data has been received.
697
698 May return a `.Future` for flow control.
699 """
700 pass
701
702 def finish(self) -> None:
703 """Called after the last chunk of data has been received."""
704 pass
705
706 def on_connection_close(self) -> None:
707 """Called if the connection is closed without finishing the request.
708
709 If ``headers_received`` is called, either ``finish`` or
710 ``on_connection_close`` will be called, but not both.
711 """
712 pass
713
714
715class HTTPConnection:
716 """Applications use this interface to write their responses.
717
718 .. versionadded:: 4.0
719 """
720
721 def write_headers(
722 self,
723 start_line: Union["RequestStartLine", "ResponseStartLine"],
724 headers: HTTPHeaders,
725 chunk: Optional[bytes] = None,
726 ) -> "Future[None]":
727 """Write an HTTP header block.
728
729 :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
730 :arg headers: a `.HTTPHeaders` instance.
731 :arg chunk: the first (optional) chunk of data. This is an optimization
732 so that small responses can be written in the same call as their
733 headers.
734
735 The ``version`` field of ``start_line`` is ignored.
736
737 Returns a future for flow control.
738
739 .. versionchanged:: 6.0
740
741 The ``callback`` argument was removed.
742 """
743 raise NotImplementedError()
744
745 def write(self, chunk: bytes) -> "Future[None]":
746 """Writes a chunk of body data.
747
748 Returns a future for flow control.
749
750 .. versionchanged:: 6.0
751
752 The ``callback`` argument was removed.
753 """
754 raise NotImplementedError()
755
756 def finish(self) -> None:
757 """Indicates that the last body data has been written."""
758 raise NotImplementedError()
759
760
761def url_concat(
762 url: str,
763 args: Union[
764 None, Dict[str, str], List[Tuple[str, str]], Tuple[Tuple[str, str], ...]
765 ],
766) -> str:
767 """Concatenate url and arguments regardless of whether
768 url has existing query parameters.
769
770 ``args`` may be either a dictionary or a list of key-value pairs
771 (the latter allows for multiple values with the same key.
772
773 >>> url_concat("http://example.com/foo", dict(c="d"))
774 'http://example.com/foo?c=d'
775 >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
776 'http://example.com/foo?a=b&c=d'
777 >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
778 'http://example.com/foo?a=b&c=d&c=d2'
779 """
780 if args is None:
781 return url
782 parsed_url = urlparse(url)
783 if isinstance(args, dict):
784 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
785 parsed_query.extend(args.items())
786 elif isinstance(args, list) or isinstance(args, tuple):
787 parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
788 parsed_query.extend(args)
789 else:
790 err = "'args' parameter should be dict, list or tuple. Not {0}".format(
791 type(args)
792 )
793 raise TypeError(err)
794 final_query = urlencode(parsed_query)
795 url = urlunparse(
796 (
797 parsed_url[0],
798 parsed_url[1],
799 parsed_url[2],
800 parsed_url[3],
801 final_query,
802 parsed_url[5],
803 )
804 )
805 return url
806
807
808class HTTPFile(ObjectDict):
809 """Represents a file uploaded via a form.
810
811 For backwards compatibility, its instance attributes are also
812 accessible as dictionary keys.
813
814 * ``filename``
815 * ``body``
816 * ``content_type``
817 """
818
819 filename: str
820 body: bytes
821 content_type: str
822
823
824def _parse_request_range(
825 range_header: str,
826) -> Optional[Tuple[Optional[int], Optional[int]]]:
827 """Parses a Range header.
828
829 Returns either ``None`` or tuple ``(start, end)``.
830 Note that while the HTTP headers use inclusive byte positions,
831 this method returns indexes suitable for use in slices.
832
833 >>> start, end = _parse_request_range("bytes=1-2")
834 >>> start, end
835 (1, 3)
836 >>> [0, 1, 2, 3, 4][start:end]
837 [1, 2]
838 >>> _parse_request_range("bytes=6-")
839 (6, None)
840 >>> _parse_request_range("bytes=-6")
841 (-6, None)
842 >>> _parse_request_range("bytes=-0")
843 (None, 0)
844 >>> _parse_request_range("bytes=")
845 (None, None)
846 >>> _parse_request_range("foo=42")
847 >>> _parse_request_range("bytes=1-2,6-10")
848
849 Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
850
851 See [0] for the details of the range header.
852
853 [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
854 """
855 unit, _, value = range_header.partition("=")
856 unit, value = unit.strip(), value.strip()
857 if unit != "bytes":
858 return None
859 start_b, _, end_b = value.partition("-")
860 try:
861 start = _int_or_none(start_b)
862 end = _int_or_none(end_b)
863 except ValueError:
864 return None
865 if end is not None:
866 if start is None:
867 if end != 0:
868 start = -end
869 end = None
870 else:
871 end += 1
872 return (start, end)
873
874
875def _get_content_range(start: Optional[int], end: Optional[int], total: int) -> str:
876 """Returns a suitable Content-Range header:
877
878 >>> print(_get_content_range(None, 1, 4))
879 bytes 0-0/4
880 >>> print(_get_content_range(1, 3, 4))
881 bytes 1-2/4
882 >>> print(_get_content_range(None, None, 4))
883 bytes 0-3/4
884 """
885 start = start or 0
886 end = (end or total) - 1
887 return f"bytes {start}-{end}/{total}"
888
889
890def _int_or_none(val: str) -> Optional[int]:
891 val = val.strip()
892 if val == "":
893 return None
894 return int(val)
895
896
897def parse_body_arguments(
898 content_type: str,
899 body: bytes,
900 arguments: Dict[str, List[bytes]],
901 files: Dict[str, List[HTTPFile]],
902 headers: Optional[HTTPHeaders] = None,
903) -> None:
904 """Parses a form request body.
905
906 Supports ``application/x-www-form-urlencoded`` and
907 ``multipart/form-data``. The ``content_type`` parameter should be
908 a string and ``body`` should be a byte string. The ``arguments``
909 and ``files`` parameters are dictionaries that will be updated
910 with the parsed contents.
911 """
912 if content_type.startswith("application/x-www-form-urlencoded"):
913 if headers and "Content-Encoding" in headers:
914 raise HTTPInputError(
915 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
916 )
917 try:
918 # real charset decoding will happen in RequestHandler.decode_argument()
919 uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
920 except Exception as e:
921 raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
922 for name, values in uri_arguments.items():
923 if values:
924 arguments.setdefault(name, []).extend(values)
925 elif content_type.startswith("multipart/form-data"):
926 if headers and "Content-Encoding" in headers:
927 raise HTTPInputError(
928 "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
929 )
930 try:
931 fields = content_type.split(";")
932 for field in fields:
933 k, sep, v = field.strip().partition("=")
934 if k == "boundary" and v:
935 parse_multipart_form_data(utf8(v), body, arguments, files)
936 break
937 else:
938 raise HTTPInputError("multipart boundary not found")
939 except Exception as e:
940 raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
941
942
943def parse_multipart_form_data(
944 boundary: bytes,
945 data: bytes,
946 arguments: Dict[str, List[bytes]],
947 files: Dict[str, List[HTTPFile]],
948) -> None:
949 """Parses a ``multipart/form-data`` body.
950
951 The ``boundary`` and ``data`` parameters are both byte strings.
952 The dictionaries given in the arguments and files parameters
953 will be updated with the contents of the body.
954
955 .. versionchanged:: 5.1
956
957 Now recognizes non-ASCII filenames in RFC 2231/5987
958 (``filename*=``) format.
959 """
960 # The standard allows for the boundary to be quoted in the header,
961 # although it's rare (it happens at least for google app engine
962 # xmpp). I think we're also supposed to handle backslash-escapes
963 # here but I'll save that until we see a client that uses them
964 # in the wild.
965 if boundary.startswith(b'"') and boundary.endswith(b'"'):
966 boundary = boundary[1:-1]
967 final_boundary_index = data.rfind(b"--" + boundary + b"--")
968 if final_boundary_index == -1:
969 raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
970 parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
971 for part in parts:
972 if not part:
973 continue
974 eoh = part.find(b"\r\n\r\n")
975 if eoh == -1:
976 raise HTTPInputError("multipart/form-data missing headers")
977 headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"), _chars_are_bytes=False)
978 disp_header = headers.get("Content-Disposition", "")
979 disposition, disp_params = _parse_header(disp_header)
980 if disposition != "form-data" or not part.endswith(b"\r\n"):
981 raise HTTPInputError("Invalid multipart/form-data")
982 value = part[eoh + 4 : -2]
983 if not disp_params.get("name"):
984 raise HTTPInputError("multipart/form-data missing name")
985 name = disp_params["name"]
986 if disp_params.get("filename"):
987 ctype = headers.get("Content-Type", "application/unknown")
988 files.setdefault(name, []).append(
989 HTTPFile(
990 filename=disp_params["filename"], body=value, content_type=ctype
991 )
992 )
993 else:
994 arguments.setdefault(name, []).append(value)
995
996
997def format_timestamp(
998 ts: Union[int, float, tuple, time.struct_time, datetime.datetime],
999) -> str:
1000 """Formats a timestamp in the format used by HTTP.
1001
1002 The argument may be a numeric timestamp as returned by `time.time`,
1003 a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
1004 object. Naive `datetime.datetime` objects are assumed to represent
1005 UTC; aware objects are converted to UTC before formatting.
1006
1007 >>> format_timestamp(1359312200)
1008 'Sun, 27 Jan 2013 18:43:20 GMT'
1009 """
1010 if isinstance(ts, (int, float)):
1011 time_num = ts
1012 elif isinstance(ts, (tuple, time.struct_time)):
1013 time_num = calendar.timegm(ts)
1014 elif isinstance(ts, datetime.datetime):
1015 time_num = calendar.timegm(ts.utctimetuple())
1016 else:
1017 raise TypeError("unknown timestamp type: %r" % ts)
1018 return email.utils.formatdate(time_num, usegmt=True)
1019
1020
1021class RequestStartLine(typing.NamedTuple):
1022 method: str
1023 path: str
1024 version: str
1025
1026
1027def parse_request_start_line(line: str) -> RequestStartLine:
1028 """Returns a (method, path, version) tuple for an HTTP 1.x request line.
1029
1030 The response is a `typing.NamedTuple`.
1031
1032 >>> parse_request_start_line("GET /foo HTTP/1.1")
1033 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
1034 """
1035 match = _ABNF.request_line.fullmatch(line)
1036 if not match:
1037 # https://tools.ietf.org/html/rfc7230#section-3.1.1
1038 # invalid request-line SHOULD respond with a 400 (Bad Request)
1039 raise HTTPInputError("Malformed HTTP request line")
1040 r = RequestStartLine(match.group(1), match.group(2), match.group(3))
1041 if not r.version.startswith("HTTP/1"):
1042 # HTTP/2 and above doesn't use parse_request_start_line.
1043 # This could be folded into the regex but we don't want to deviate
1044 # from the ABNF in the RFCs.
1045 raise HTTPInputError("Unexpected HTTP version %r" % r.version)
1046 return r
1047
1048
1049class ResponseStartLine(typing.NamedTuple):
1050 version: str
1051 code: int
1052 reason: str
1053
1054
1055def parse_response_start_line(line: str) -> ResponseStartLine:
1056 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
1057
1058 The response is a `typing.NamedTuple`.
1059
1060 >>> parse_response_start_line("HTTP/1.1 200 OK")
1061 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
1062 """
1063 match = _ABNF.status_line.fullmatch(line)
1064 if not match:
1065 raise HTTPInputError("Error parsing response start line")
1066 r = ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
1067 if not r.version.startswith("HTTP/1"):
1068 # HTTP/2 and above doesn't use parse_response_start_line.
1069 raise HTTPInputError("Unexpected HTTP version %r" % r.version)
1070 return r
1071
1072
1073# _parseparam and _parse_header are copied and modified from python2.7's cgi.py
1074# The original 2.7 version of this code did not correctly support some
1075# combinations of semicolons and double quotes.
1076# It has also been modified to support valueless parameters as seen in
1077# websocket extension negotiations, and to support non-ascii values in
1078# RFC 2231/5987 format.
1079
1080
1081def _parseparam(s: str) -> Generator[str, None, None]:
1082 while s[:1] == ";":
1083 s = s[1:]
1084 end = s.find(";")
1085 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
1086 end = s.find(";", end + 1)
1087 if end < 0:
1088 end = len(s)
1089 f = s[:end]
1090 yield f.strip()
1091 s = s[end:]
1092
1093
1094def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
1095 r"""Parse a Content-type like header.
1096
1097 Return the main content-type and a dictionary of options.
1098
1099 >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
1100 >>> ct, d = _parse_header(d)
1101 >>> ct
1102 'form-data'
1103 >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
1104 True
1105 >>> d['foo']
1106 'b\\a"r'
1107 """
1108 parts = _parseparam(";" + line)
1109 key = next(parts)
1110 # decode_params treats first argument special, but we already stripped key
1111 params = [("Dummy", "value")]
1112 for p in parts:
1113 i = p.find("=")
1114 if i >= 0:
1115 name = p[:i].strip().lower()
1116 value = p[i + 1 :].strip()
1117 params.append((name, native_str(value)))
1118 decoded_params = email.utils.decode_params(params)
1119 decoded_params.pop(0) # get rid of the dummy again
1120 pdict = {}
1121 for name, decoded_value in decoded_params:
1122 value = email.utils.collapse_rfc2231_value(decoded_value)
1123 if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
1124 value = value[1:-1]
1125 pdict[name] = value
1126 return key, pdict
1127
1128
1129def _encode_header(key: str, pdict: Dict[str, str]) -> str:
1130 """Inverse of _parse_header.
1131
1132 >>> _encode_header('permessage-deflate',
1133 ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
1134 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
1135 """
1136 if not pdict:
1137 return key
1138 out = [key]
1139 # Sort the parameters just to make it easy to test.
1140 for k, v in sorted(pdict.items()):
1141 if v is None:
1142 out.append(k)
1143 else:
1144 # TODO: quote if necessary.
1145 out.append(f"{k}={v}")
1146 return "; ".join(out)
1147
1148
1149def encode_username_password(
1150 username: Union[str, bytes], password: Union[str, bytes]
1151) -> bytes:
1152 """Encodes a username/password pair in the format used by HTTP auth.
1153
1154 The return value is a byte string in the form ``username:password``.
1155
1156 .. versionadded:: 5.1
1157 """
1158 if isinstance(username, unicode_type):
1159 username = unicodedata.normalize("NFC", username)
1160 if isinstance(password, unicode_type):
1161 password = unicodedata.normalize("NFC", password)
1162 return utf8(username) + b":" + utf8(password)
1163
1164
1165def doctests():
1166 # type: () -> unittest.TestSuite
1167 import doctest
1168
1169 return doctest.DocTestSuite()
1170
1171
1172_netloc_re = re.compile(r"^(.+):(\d+)$")
1173
1174
1175def split_host_and_port(netloc: str) -> Tuple[str, Optional[int]]:
1176 """Returns ``(host, port)`` tuple from ``netloc``.
1177
1178 Returned ``port`` will be ``None`` if not present.
1179
1180 .. versionadded:: 4.1
1181 """
1182 match = _netloc_re.match(netloc)
1183 if match:
1184 host = match.group(1)
1185 port = int(match.group(2)) # type: Optional[int]
1186 else:
1187 host = netloc
1188 port = None
1189 return (host, port)
1190
1191
1192def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
1193 """Generator converting a result of ``parse_qs`` back to name-value pairs.
1194
1195 .. versionadded:: 5.0
1196 """
1197 for k, vs in qs.items():
1198 for v in vs:
1199 yield (k, v)
1200
1201
1202_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
1203
1204
1205def _unquote_replace(m: re.Match) -> str:
1206 if m[1]:
1207 return chr(int(m[1], 8))
1208 else:
1209 return m[2]
1210
1211
1212def _unquote_cookie(s: str) -> str:
1213 """Handle double quotes and escaping in cookie values.
1214
1215 This method is copied verbatim from the Python 3.13 standard
1216 library (http.cookies._unquote) so we don't have to depend on
1217 non-public interfaces.
1218 """
1219 # If there aren't any doublequotes,
1220 # then there can't be any special characters. See RFC 2109.
1221 if s is None or len(s) < 2:
1222 return s
1223 if s[0] != '"' or s[-1] != '"':
1224 return s
1225
1226 # We have to assume that we must decode this string.
1227 # Down to work.
1228
1229 # Remove the "s
1230 s = s[1:-1]
1231
1232 # Check for special sequences. Examples:
1233 # \012 --> \n
1234 # \" --> "
1235 #
1236 return _unquote_sub(_unquote_replace, s)
1237
1238
1239def parse_cookie(cookie: str) -> Dict[str, str]:
1240 """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
1241
1242 This function attempts to mimic browser cookie parsing behavior;
1243 it specifically does not follow any of the cookie-related RFCs
1244 (because browsers don't either).
1245
1246 The algorithm used is identical to that used by Django version 1.9.10.
1247
1248 .. versionadded:: 4.4.2
1249 """
1250 cookiedict = {}
1251 for chunk in cookie.split(";"):
1252 if "=" in chunk:
1253 key, val = chunk.split("=", 1)
1254 else:
1255 # Assume an empty name per
1256 # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
1257 key, val = "", chunk
1258 key, val = key.strip(), val.strip()
1259 if key or val:
1260 # unquote using Python's algorithm.
1261 cookiedict[key] = _unquote_cookie(val)
1262 return cookiedict