Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/http/client.py: 18%
741 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:05 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:05 +0000
1r"""HTTP/1.1 client library
3<intro stuff goes here>
4<other stuff, too>
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
10 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |\_____________________________
24 | | getresponse() raises
25 | response = getresponse() | ConnectionError
26 v v
27 Unread-response Idle
28 [Response-headers-read]
29 |\____________________
30 | |
31 | response.read() | putrequest()
32 v v
33 Idle Req-started-unread-response
34 ______/|
35 / |
36 response.read() | | ( putheader() )* endheaders()
37 v v
38 Request-started Req-sent-unread-response
39 |
40 | response.read()
41 v
42 Request-sent
44This diagram presents the following rules:
45 -- a second request may not be started until {response-headers-read}
46 -- a response [object] cannot be retrieved until {request-sent}
47 -- there is no differentiation between an unread response body and a
48 partially read response body
50Note: this enforcement is applied by the HTTPConnection class. The
51 HTTPResponse class does not enforce this state machine, which
52 implies sophisticated clients may accelerate the request/response
53 pipeline. Caution should be taken, though: accelerating the states
54 beyond the above pattern may imply knowledge of the server's
55 connection-close behavior for certain requests. For example, it
56 is impossible to tell whether the server will close the connection
57 UNTIL the response headers have been read; this means that further
58 requests cannot be placed into the pipeline until it is known that
59 the server will NOT be closing the connection.
61Logical State __state __response
62------------- ------- ----------
63Idle _CS_IDLE None
64Request-started _CS_REQ_STARTED None
65Request-sent _CS_REQ_SENT None
66Unread-response _CS_IDLE <response_class>
67Req-started-unread-response _CS_REQ_STARTED <response_class>
68Req-sent-unread-response _CS_REQ_SENT <response_class>
69"""
71import email.parser
72import email.message
73import http
74import io
75import re
76import socket
77import collections.abc
78from urllib.parse import urlsplit
80# HTTPMessage, parse_headers(), and the HTTP status code constants are
81# intentionally omitted for simplicity
82__all__ = ["HTTPResponse", "HTTPConnection",
83 "HTTPException", "NotConnected", "UnknownProtocol",
84 "UnknownTransferEncoding", "UnimplementedFileMode",
85 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
86 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
87 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
88 "responses"]
90HTTP_PORT = 80
91HTTPS_PORT = 443
93_UNKNOWN = 'UNKNOWN'
95# connection states
96_CS_IDLE = 'Idle'
97_CS_REQ_STARTED = 'Request-started'
98_CS_REQ_SENT = 'Request-sent'
101# hack to maintain backwards compatibility
102globals().update(http.HTTPStatus.__members__)
104# another hack to maintain backwards compatibility
105# Mapping status codes to official W3C names
106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
108# maximal line length when calling readline().
109_MAXLINE = 65536
110_MAXHEADERS = 100
112# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
113#
114# VCHAR = %x21-7E
115# obs-text = %x80-FF
116# header-field = field-name ":" OWS field-value OWS
117# field-name = token
118# field-value = *( field-content / obs-fold )
119# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
120# field-vchar = VCHAR / obs-text
121#
122# obs-fold = CRLF 1*( SP / HTAB )
123# ; obsolete line folding
124# ; see Section 3.2.4
126# token = 1*tchar
127#
128# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
129# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
130# / DIGIT / ALPHA
131# ; any VCHAR, except delimiters
132#
133# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
135# the patterns for both name and value are more lenient than RFC
136# definitions to allow for backwards compatibility
137_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
138_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
140# These characters are not allowed within HTTP URL paths.
141# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
142# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
143# Prevents CVE-2019-9740. Includes control characters such as \r\n.
144# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
145_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
146# Arguably only these _should_ allowed:
147# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
148# We are more lenient for assumed real world compatibility purposes.
150# These characters are not allowed within HTTP method names
151# to prevent http header injection.
152_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
154# We always set the Content-Length header for these methods because some
155# servers will otherwise respond with a 411
156_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
159def _encode(data, name='data'):
160 """Call data.encode("latin-1") but show a better error message."""
161 try:
162 return data.encode("latin-1")
163 except UnicodeEncodeError as err:
164 raise UnicodeEncodeError(
165 err.encoding,
166 err.object,
167 err.start,
168 err.end,
169 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
170 "if you want to send it encoded in UTF-8." %
171 (name.title(), data[err.start:err.end], name)) from None
174class HTTPMessage(email.message.Message):
175 # XXX The only usage of this method is in
176 # http.server.CGIHTTPRequestHandler. Maybe move the code there so
177 # that it doesn't need to be part of the public API. The API has
178 # never been defined so this could cause backwards compatibility
179 # issues.
181 def getallmatchingheaders(self, name):
182 """Find all header lines matching a given header name.
184 Look through the list of headers and find all lines matching a given
185 header name (and their continuation lines). A list of the lines is
186 returned, without interpretation. If the header does not occur, an
187 empty list is returned. If the header occurs multiple times, all
188 occurrences are returned. Case is not important in the header name.
190 """
191 name = name.lower() + ':'
192 n = len(name)
193 lst = []
194 hit = 0
195 for line in self.keys():
196 if line[:n].lower() == name:
197 hit = 1
198 elif not line[:1].isspace():
199 hit = 0
200 if hit:
201 lst.append(line)
202 return lst
204def _read_headers(fp):
205 """Reads potential header lines into a list from a file pointer.
207 Length of line is limited by _MAXLINE, and number of
208 headers is limited by _MAXHEADERS.
209 """
210 headers = []
211 while True:
212 line = fp.readline(_MAXLINE + 1)
213 if len(line) > _MAXLINE:
214 raise LineTooLong("header line")
215 headers.append(line)
216 if len(headers) > _MAXHEADERS:
217 raise HTTPException("got more than %d headers" % _MAXHEADERS)
218 if line in (b'\r\n', b'\n', b''):
219 break
220 return headers
222def parse_headers(fp, _class=HTTPMessage):
223 """Parses only RFC2822 headers from a file pointer.
225 email Parser wants to see strings rather than bytes.
226 But a TextIOWrapper around self.rfile would buffer too many bytes
227 from the stream, bytes which we later need to read as bytes.
228 So we read the correct bytes here, as bytes, for email Parser
229 to parse.
231 """
232 headers = _read_headers(fp)
233 hstring = b''.join(headers).decode('iso-8859-1')
234 return email.parser.Parser(_class=_class).parsestr(hstring)
237class HTTPResponse(io.BufferedIOBase):
239 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
241 # The bytes from the socket object are iso-8859-1 strings.
242 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
243 # text following RFC 2047. The basic status line parsing only
244 # accepts iso-8859-1.
246 def __init__(self, sock, debuglevel=0, method=None, url=None):
247 # If the response includes a content-length header, we need to
248 # make sure that the client doesn't read more than the
249 # specified number of bytes. If it does, it will block until
250 # the server times out and closes the connection. This will
251 # happen if a self.fp.read() is done (without a size) whether
252 # self.fp is buffered or not. So, no self.fp.read() by
253 # clients unless they know what they are doing.
254 self.fp = sock.makefile("rb")
255 self.debuglevel = debuglevel
256 self._method = method
258 # The HTTPResponse object is returned via urllib. The clients
259 # of http and urllib expect different attributes for the
260 # headers. headers is used here and supports urllib. msg is
261 # provided as a backwards compatibility layer for http
262 # clients.
264 self.headers = self.msg = None
266 # from the Status-Line of the response
267 self.version = _UNKNOWN # HTTP-Version
268 self.status = _UNKNOWN # Status-Code
269 self.reason = _UNKNOWN # Reason-Phrase
271 self.chunked = _UNKNOWN # is "chunked" being used?
272 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
273 self.length = _UNKNOWN # number of bytes left in response
274 self.will_close = _UNKNOWN # conn will close at end of response
276 def _read_status(self):
277 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
278 if len(line) > _MAXLINE:
279 raise LineTooLong("status line")
280 if self.debuglevel > 0:
281 print("reply:", repr(line))
282 if not line:
283 # Presumably, the server closed the connection before
284 # sending a valid response.
285 raise RemoteDisconnected("Remote end closed connection without"
286 " response")
287 try:
288 version, status, reason = line.split(None, 2)
289 except ValueError:
290 try:
291 version, status = line.split(None, 1)
292 reason = ""
293 except ValueError:
294 # empty version will cause next test to fail.
295 version = ""
296 if not version.startswith("HTTP/"):
297 self._close_conn()
298 raise BadStatusLine(line)
300 # The status code is a three-digit number
301 try:
302 status = int(status)
303 if status < 100 or status > 999:
304 raise BadStatusLine(line)
305 except ValueError:
306 raise BadStatusLine(line)
307 return version, status, reason
309 def begin(self):
310 if self.headers is not None:
311 # we've already started reading the response
312 return
314 # read until we get a non-100 response
315 while True:
316 version, status, reason = self._read_status()
317 if status != CONTINUE:
318 break
319 # skip the header from the 100 response
320 skipped_headers = _read_headers(self.fp)
321 if self.debuglevel > 0:
322 print("headers:", skipped_headers)
323 del skipped_headers
325 self.code = self.status = status
326 self.reason = reason.strip()
327 if version in ("HTTP/1.0", "HTTP/0.9"):
328 # Some servers might still return "0.9", treat it as 1.0 anyway
329 self.version = 10
330 elif version.startswith("HTTP/1."):
331 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
332 else:
333 raise UnknownProtocol(version)
335 self.headers = self.msg = parse_headers(self.fp)
337 if self.debuglevel > 0:
338 for hdr, val in self.headers.items():
339 print("header:", hdr + ":", val)
341 # are we using the chunked-style of transfer encoding?
342 tr_enc = self.headers.get("transfer-encoding")
343 if tr_enc and tr_enc.lower() == "chunked":
344 self.chunked = True
345 self.chunk_left = None
346 else:
347 self.chunked = False
349 # will the connection close at the end of the response?
350 self.will_close = self._check_close()
352 # do we have a Content-Length?
353 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
354 self.length = None
355 length = self.headers.get("content-length")
356 if length and not self.chunked:
357 try:
358 self.length = int(length)
359 except ValueError:
360 self.length = None
361 else:
362 if self.length < 0: # ignore nonsensical negative lengths
363 self.length = None
364 else:
365 self.length = None
367 # does the body have a fixed length? (of zero)
368 if (status == NO_CONTENT or status == NOT_MODIFIED or
369 100 <= status < 200 or # 1xx codes
370 self._method == "HEAD"):
371 self.length = 0
373 # if the connection remains open, and we aren't using chunked, and
374 # a content-length was not provided, then assume that the connection
375 # WILL close.
376 if (not self.will_close and
377 not self.chunked and
378 self.length is None):
379 self.will_close = True
381 def _check_close(self):
382 conn = self.headers.get("connection")
383 if self.version == 11:
384 # An HTTP/1.1 proxy is assumed to stay open unless
385 # explicitly closed.
386 if conn and "close" in conn.lower():
387 return True
388 return False
390 # Some HTTP/1.0 implementations have support for persistent
391 # connections, using rules different than HTTP/1.1.
393 # For older HTTP, Keep-Alive indicates persistent connection.
394 if self.headers.get("keep-alive"):
395 return False
397 # At least Akamai returns a "Connection: Keep-Alive" header,
398 # which was supposed to be sent by the client.
399 if conn and "keep-alive" in conn.lower():
400 return False
402 # Proxy-Connection is a netscape hack.
403 pconn = self.headers.get("proxy-connection")
404 if pconn and "keep-alive" in pconn.lower():
405 return False
407 # otherwise, assume it will close
408 return True
410 def _close_conn(self):
411 fp = self.fp
412 self.fp = None
413 fp.close()
415 def close(self):
416 try:
417 super().close() # set "closed" flag
418 finally:
419 if self.fp:
420 self._close_conn()
422 # These implementations are for the benefit of io.BufferedReader.
424 # XXX This class should probably be revised to act more like
425 # the "raw stream" that BufferedReader expects.
427 def flush(self):
428 super().flush()
429 if self.fp:
430 self.fp.flush()
432 def readable(self):
433 """Always returns True"""
434 return True
436 # End of "raw stream" methods
438 def isclosed(self):
439 """True if the connection is closed."""
440 # NOTE: it is possible that we will not ever call self.close(). This
441 # case occurs when will_close is TRUE, length is None, and we
442 # read up to the last byte, but NOT past it.
443 #
444 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
445 # called, meaning self.isclosed() is meaningful.
446 return self.fp is None
448 def read(self, amt=None):
449 if self.fp is None:
450 return b""
452 if self._method == "HEAD":
453 self._close_conn()
454 return b""
456 if amt is not None:
457 # Amount is given, implement using readinto
458 b = bytearray(amt)
459 n = self.readinto(b)
460 return memoryview(b)[:n].tobytes()
461 else:
462 # Amount is not given (unbounded read) so we must check self.length
463 # and self.chunked
465 if self.chunked:
466 return self._readall_chunked()
468 if self.length is None:
469 s = self.fp.read()
470 else:
471 try:
472 s = self._safe_read(self.length)
473 except IncompleteRead:
474 self._close_conn()
475 raise
476 self.length = 0
477 self._close_conn() # we read everything
478 return s
480 def readinto(self, b):
481 """Read up to len(b) bytes into bytearray b and return the number
482 of bytes read.
483 """
485 if self.fp is None:
486 return 0
488 if self._method == "HEAD":
489 self._close_conn()
490 return 0
492 if self.chunked:
493 return self._readinto_chunked(b)
495 if self.length is not None:
496 if len(b) > self.length:
497 # clip the read to the "end of response"
498 b = memoryview(b)[0:self.length]
500 # we do not use _safe_read() here because this may be a .will_close
501 # connection, and the user is reading more bytes than will be provided
502 # (for example, reading in 1k chunks)
503 n = self.fp.readinto(b)
504 if not n and b:
505 # Ideally, we would raise IncompleteRead if the content-length
506 # wasn't satisfied, but it might break compatibility.
507 self._close_conn()
508 elif self.length is not None:
509 self.length -= n
510 if not self.length:
511 self._close_conn()
512 return n
514 def _read_next_chunk_size(self):
515 # Read the next chunk size from the file
516 line = self.fp.readline(_MAXLINE + 1)
517 if len(line) > _MAXLINE:
518 raise LineTooLong("chunk size")
519 i = line.find(b";")
520 if i >= 0:
521 line = line[:i] # strip chunk-extensions
522 try:
523 return int(line, 16)
524 except ValueError:
525 # close the connection as protocol synchronisation is
526 # probably lost
527 self._close_conn()
528 raise
530 def _read_and_discard_trailer(self):
531 # read and discard trailer up to the CRLF terminator
532 ### note: we shouldn't have any trailers!
533 while True:
534 line = self.fp.readline(_MAXLINE + 1)
535 if len(line) > _MAXLINE:
536 raise LineTooLong("trailer line")
537 if not line:
538 # a vanishingly small number of sites EOF without
539 # sending the trailer
540 break
541 if line in (b'\r\n', b'\n', b''):
542 break
544 def _get_chunk_left(self):
545 # return self.chunk_left, reading a new chunk if necessary.
546 # chunk_left == 0: at the end of the current chunk, need to close it
547 # chunk_left == None: No current chunk, should read next.
548 # This function returns non-zero or None if the last chunk has
549 # been read.
550 chunk_left = self.chunk_left
551 if not chunk_left: # Can be 0 or None
552 if chunk_left is not None:
553 # We are at the end of chunk, discard chunk end
554 self._safe_read(2) # toss the CRLF at the end of the chunk
555 try:
556 chunk_left = self._read_next_chunk_size()
557 except ValueError:
558 raise IncompleteRead(b'')
559 if chunk_left == 0:
560 # last chunk: 1*("0") [ chunk-extension ] CRLF
561 self._read_and_discard_trailer()
562 # we read everything; close the "file"
563 self._close_conn()
564 chunk_left = None
565 self.chunk_left = chunk_left
566 return chunk_left
568 def _readall_chunked(self):
569 assert self.chunked != _UNKNOWN
570 value = []
571 try:
572 while True:
573 chunk_left = self._get_chunk_left()
574 if chunk_left is None:
575 break
576 value.append(self._safe_read(chunk_left))
577 self.chunk_left = 0
578 return b''.join(value)
579 except IncompleteRead:
580 raise IncompleteRead(b''.join(value))
582 def _readinto_chunked(self, b):
583 assert self.chunked != _UNKNOWN
584 total_bytes = 0
585 mvb = memoryview(b)
586 try:
587 while True:
588 chunk_left = self._get_chunk_left()
589 if chunk_left is None:
590 return total_bytes
592 if len(mvb) <= chunk_left:
593 n = self._safe_readinto(mvb)
594 self.chunk_left = chunk_left - n
595 return total_bytes + n
597 temp_mvb = mvb[:chunk_left]
598 n = self._safe_readinto(temp_mvb)
599 mvb = mvb[n:]
600 total_bytes += n
601 self.chunk_left = 0
603 except IncompleteRead:
604 raise IncompleteRead(bytes(b[0:total_bytes]))
606 def _safe_read(self, amt):
607 """Read the number of bytes requested.
609 This function should be used when <amt> bytes "should" be present for
610 reading. If the bytes are truly not available (due to EOF), then the
611 IncompleteRead exception can be used to detect the problem.
612 """
613 data = self.fp.read(amt)
614 if len(data) < amt:
615 raise IncompleteRead(data, amt-len(data))
616 return data
618 def _safe_readinto(self, b):
619 """Same as _safe_read, but for reading into a buffer."""
620 amt = len(b)
621 n = self.fp.readinto(b)
622 if n < amt:
623 raise IncompleteRead(bytes(b[:n]), amt-n)
624 return n
626 def read1(self, n=-1):
627 """Read with at most one underlying system call. If at least one
628 byte is buffered, return that instead.
629 """
630 if self.fp is None or self._method == "HEAD":
631 return b""
632 if self.chunked:
633 return self._read1_chunked(n)
634 if self.length is not None and (n < 0 or n > self.length):
635 n = self.length
636 result = self.fp.read1(n)
637 if not result and n:
638 self._close_conn()
639 elif self.length is not None:
640 self.length -= len(result)
641 return result
643 def peek(self, n=-1):
644 # Having this enables IOBase.readline() to read more than one
645 # byte at a time
646 if self.fp is None or self._method == "HEAD":
647 return b""
648 if self.chunked:
649 return self._peek_chunked(n)
650 return self.fp.peek(n)
652 def readline(self, limit=-1):
653 if self.fp is None or self._method == "HEAD":
654 return b""
655 if self.chunked:
656 # Fallback to IOBase readline which uses peek() and read()
657 return super().readline(limit)
658 if self.length is not None and (limit < 0 or limit > self.length):
659 limit = self.length
660 result = self.fp.readline(limit)
661 if not result and limit:
662 self._close_conn()
663 elif self.length is not None:
664 self.length -= len(result)
665 return result
667 def _read1_chunked(self, n):
668 # Strictly speaking, _get_chunk_left() may cause more than one read,
669 # but that is ok, since that is to satisfy the chunked protocol.
670 chunk_left = self._get_chunk_left()
671 if chunk_left is None or n == 0:
672 return b''
673 if not (0 <= n <= chunk_left):
674 n = chunk_left # if n is negative or larger than chunk_left
675 read = self.fp.read1(n)
676 self.chunk_left -= len(read)
677 if not read:
678 raise IncompleteRead(b"")
679 return read
681 def _peek_chunked(self, n):
682 # Strictly speaking, _get_chunk_left() may cause more than one read,
683 # but that is ok, since that is to satisfy the chunked protocol.
684 try:
685 chunk_left = self._get_chunk_left()
686 except IncompleteRead:
687 return b'' # peek doesn't worry about protocol
688 if chunk_left is None:
689 return b'' # eof
690 # peek is allowed to return more than requested. Just request the
691 # entire chunk, and truncate what we get.
692 return self.fp.peek(chunk_left)[:chunk_left]
694 def fileno(self):
695 return self.fp.fileno()
697 def getheader(self, name, default=None):
698 '''Returns the value of the header matching *name*.
700 If there are multiple matching headers, the values are
701 combined into a single string separated by commas and spaces.
703 If no matching header is found, returns *default* or None if
704 the *default* is not specified.
706 If the headers are unknown, raises http.client.ResponseNotReady.
708 '''
709 if self.headers is None:
710 raise ResponseNotReady()
711 headers = self.headers.get_all(name) or default
712 if isinstance(headers, str) or not hasattr(headers, '__iter__'):
713 return headers
714 else:
715 return ', '.join(headers)
717 def getheaders(self):
718 """Return list of (header, value) tuples."""
719 if self.headers is None:
720 raise ResponseNotReady()
721 return list(self.headers.items())
723 # We override IOBase.__iter__ so that it doesn't check for closed-ness
725 def __iter__(self):
726 return self
728 # For compatibility with old-style urllib responses.
730 def info(self):
731 '''Returns an instance of the class mimetools.Message containing
732 meta-information associated with the URL.
734 When the method is HTTP, these headers are those returned by
735 the server at the head of the retrieved HTML page (including
736 Content-Length and Content-Type).
738 When the method is FTP, a Content-Length header will be
739 present if (as is now usual) the server passed back a file
740 length in response to the FTP retrieval request. A
741 Content-Type header will be present if the MIME type can be
742 guessed.
744 When the method is local-file, returned headers will include
745 a Date representing the file's last-modified time, a
746 Content-Length giving file size, and a Content-Type
747 containing a guess at the file's type. See also the
748 description of the mimetools module.
750 '''
751 return self.headers
753 def geturl(self):
754 '''Return the real URL of the page.
756 In some cases, the HTTP server redirects a client to another
757 URL. The urlopen() function handles this transparently, but in
758 some cases the caller needs to know which URL the client was
759 redirected to. The geturl() method can be used to get at this
760 redirected URL.
762 '''
763 return self.url
765 def getcode(self):
766 '''Return the HTTP status code that was sent with the response,
767 or None if the URL is not an HTTP URL.
769 '''
770 return self.status
772class HTTPConnection:
774 _http_vsn = 11
775 _http_vsn_str = 'HTTP/1.1'
777 response_class = HTTPResponse
778 default_port = HTTP_PORT
779 auto_open = 1
780 debuglevel = 0
782 @staticmethod
783 def _is_textIO(stream):
784 """Test whether a file-like object is a text or a binary stream.
785 """
786 return isinstance(stream, io.TextIOBase)
788 @staticmethod
789 def _get_content_length(body, method):
790 """Get the content-length based on the body.
792 If the body is None, we set Content-Length: 0 for methods that expect
793 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
794 any method if the body is a str or bytes-like object and not a file.
795 """
796 if body is None:
797 # do an explicit check for not None here to distinguish
798 # between unset and set but empty
799 if method.upper() in _METHODS_EXPECTING_BODY:
800 return 0
801 else:
802 return None
804 if hasattr(body, 'read'):
805 # file-like object.
806 return None
808 try:
809 # does it implement the buffer protocol (bytes, bytearray, array)?
810 mv = memoryview(body)
811 return mv.nbytes
812 except TypeError:
813 pass
815 if isinstance(body, str):
816 return len(body)
818 return None
820 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
821 source_address=None, blocksize=8192):
822 self.timeout = timeout
823 self.source_address = source_address
824 self.blocksize = blocksize
825 self.sock = None
826 self._buffer = []
827 self.__response = None
828 self.__state = _CS_IDLE
829 self._method = None
830 self._tunnel_host = None
831 self._tunnel_port = None
832 self._tunnel_headers = {}
834 (self.host, self.port) = self._get_hostport(host, port)
836 self._validate_host(self.host)
838 # This is stored as an instance variable to allow unit
839 # tests to replace it with a suitable mockup
840 self._create_connection = socket.create_connection
842 def set_tunnel(self, host, port=None, headers=None):
843 """Set up host and port for HTTP CONNECT tunnelling.
845 In a connection that uses HTTP CONNECT tunneling, the host passed to the
846 constructor is used as a proxy server that relays all communication to
847 the endpoint passed to `set_tunnel`. This done by sending an HTTP
848 CONNECT request to the proxy server when the connection is established.
850 This method must be called before the HTTP connection has been
851 established.
853 The headers argument should be a mapping of extra HTTP headers to send
854 with the CONNECT request.
855 """
857 if self.sock:
858 raise RuntimeError("Can't set up tunnel for established connection")
860 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
861 if headers:
862 self._tunnel_headers = headers
863 else:
864 self._tunnel_headers.clear()
866 def _get_hostport(self, host, port):
867 if port is None:
868 i = host.rfind(':')
869 j = host.rfind(']') # ipv6 addresses have [...]
870 if i > j:
871 try:
872 port = int(host[i+1:])
873 except ValueError:
874 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
875 port = self.default_port
876 else:
877 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
878 host = host[:i]
879 else:
880 port = self.default_port
881 if host and host[0] == '[' and host[-1] == ']':
882 host = host[1:-1]
884 return (host, port)
886 def set_debuglevel(self, level):
887 self.debuglevel = level
889 def _tunnel(self):
890 connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
891 self._tunnel_host.encode("ascii"), self._tunnel_port)
892 headers = [connect]
893 for header, value in self._tunnel_headers.items():
894 headers.append(f"{header}: {value}\r\n".encode("latin-1"))
895 headers.append(b"\r\n")
896 # Making a single send() call instead of one per line encourages
897 # the host OS to use a more optimal packet size instead of
898 # potentially emitting a series of small packets.
899 self.send(b"".join(headers))
900 del headers
902 response = self.response_class(self.sock, method=self._method)
903 (version, code, message) = response._read_status()
905 if code != http.HTTPStatus.OK:
906 self.close()
907 raise OSError(f"Tunnel connection failed: {code} {message.strip()}")
908 while True:
909 line = response.fp.readline(_MAXLINE + 1)
910 if len(line) > _MAXLINE:
911 raise LineTooLong("header line")
912 if not line:
913 # for sites which EOF without sending a trailer
914 break
915 if line in (b'\r\n', b'\n', b''):
916 break
918 if self.debuglevel > 0:
919 print('header:', line.decode())
921 def connect(self):
922 """Connect to the host and port specified in __init__."""
923 self.sock = self._create_connection(
924 (self.host,self.port), self.timeout, self.source_address)
925 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
927 if self._tunnel_host:
928 self._tunnel()
930 def close(self):
931 """Close the connection to the HTTP server."""
932 self.__state = _CS_IDLE
933 try:
934 sock = self.sock
935 if sock:
936 self.sock = None
937 sock.close() # close it manually... there may be other refs
938 finally:
939 response = self.__response
940 if response:
941 self.__response = None
942 response.close()
944 def send(self, data):
945 """Send `data' to the server.
946 ``data`` can be a string object, a bytes object, an array object, a
947 file-like object that supports a .read() method, or an iterable object.
948 """
950 if self.sock is None:
951 if self.auto_open:
952 self.connect()
953 else:
954 raise NotConnected()
956 if self.debuglevel > 0:
957 print("send:", repr(data))
958 if hasattr(data, "read") :
959 if self.debuglevel > 0:
960 print("sendIng a read()able")
961 encode = self._is_textIO(data)
962 if encode and self.debuglevel > 0:
963 print("encoding file using iso-8859-1")
964 while 1:
965 datablock = data.read(self.blocksize)
966 if not datablock:
967 break
968 if encode:
969 datablock = datablock.encode("iso-8859-1")
970 self.sock.sendall(datablock)
971 return
972 try:
973 self.sock.sendall(data)
974 except TypeError:
975 if isinstance(data, collections.abc.Iterable):
976 for d in data:
977 self.sock.sendall(d)
978 else:
979 raise TypeError("data should be a bytes-like object "
980 "or an iterable, got %r" % type(data))
982 def _output(self, s):
983 """Add a line of output to the current request buffer.
985 Assumes that the line does *not* end with \\r\\n.
986 """
987 self._buffer.append(s)
989 def _read_readable(self, readable):
990 if self.debuglevel > 0:
991 print("sendIng a read()able")
992 encode = self._is_textIO(readable)
993 if encode and self.debuglevel > 0:
994 print("encoding file using iso-8859-1")
995 while True:
996 datablock = readable.read(self.blocksize)
997 if not datablock:
998 break
999 if encode:
1000 datablock = datablock.encode("iso-8859-1")
1001 yield datablock
1003 def _send_output(self, message_body=None, encode_chunked=False):
1004 """Send the currently buffered request and clear the buffer.
1006 Appends an extra \\r\\n to the buffer.
1007 A message_body may be specified, to be appended to the request.
1008 """
1009 self._buffer.extend((b"", b""))
1010 msg = b"\r\n".join(self._buffer)
1011 del self._buffer[:]
1012 self.send(msg)
1014 if message_body is not None:
1016 # create a consistent interface to message_body
1017 if hasattr(message_body, 'read'):
1018 # Let file-like take precedence over byte-like. This
1019 # is needed to allow the current position of mmap'ed
1020 # files to be taken into account.
1021 chunks = self._read_readable(message_body)
1022 else:
1023 try:
1024 # this is solely to check to see if message_body
1025 # implements the buffer API. it /would/ be easier
1026 # to capture if PyObject_CheckBuffer was exposed
1027 # to Python.
1028 memoryview(message_body)
1029 except TypeError:
1030 try:
1031 chunks = iter(message_body)
1032 except TypeError:
1033 raise TypeError("message_body should be a bytes-like "
1034 "object or an iterable, got %r"
1035 % type(message_body))
1036 else:
1037 # the object implements the buffer interface and
1038 # can be passed directly into socket methods
1039 chunks = (message_body,)
1041 for chunk in chunks:
1042 if not chunk:
1043 if self.debuglevel > 0:
1044 print('Zero length chunk ignored')
1045 continue
1047 if encode_chunked and self._http_vsn == 11:
1048 # chunked encoding
1049 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
1050 + b'\r\n'
1051 self.send(chunk)
1053 if encode_chunked and self._http_vsn == 11:
1054 # end chunked transfer
1055 self.send(b'0\r\n\r\n')
1057 def putrequest(self, method, url, skip_host=False,
1058 skip_accept_encoding=False):
1059 """Send a request to the server.
1061 `method' specifies an HTTP request method, e.g. 'GET'.
1062 `url' specifies the object being requested, e.g. '/index.html'.
1063 `skip_host' if True does not add automatically a 'Host:' header
1064 `skip_accept_encoding' if True does not add automatically an
1065 'Accept-Encoding:' header
1066 """
1068 # if a prior response has been completed, then forget about it.
1069 if self.__response and self.__response.isclosed():
1070 self.__response = None
1073 # in certain cases, we cannot issue another request on this connection.
1074 # this occurs when:
1075 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
1076 # 2) a response to a previous request has signalled that it is going
1077 # to close the connection upon completion.
1078 # 3) the headers for the previous response have not been read, thus
1079 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
1080 #
1081 # if there is no prior response, then we can request at will.
1082 #
1083 # if point (2) is true, then we will have passed the socket to the
1084 # response (effectively meaning, "there is no prior response"), and
1085 # will open a new one when a new request is made.
1086 #
1087 # Note: if a prior response exists, then we *can* start a new request.
1088 # We are not allowed to begin fetching the response to this new
1089 # request, however, until that prior response is complete.
1090 #
1091 if self.__state == _CS_IDLE:
1092 self.__state = _CS_REQ_STARTED
1093 else:
1094 raise CannotSendRequest(self.__state)
1096 self._validate_method(method)
1098 # Save the method for use later in the response phase
1099 self._method = method
1101 url = url or '/'
1102 self._validate_path(url)
1104 request = '%s %s %s' % (method, url, self._http_vsn_str)
1106 self._output(self._encode_request(request))
1108 if self._http_vsn == 11:
1109 # Issue some standard headers for better HTTP/1.1 compliance
1111 if not skip_host:
1112 # this header is issued *only* for HTTP/1.1
1113 # connections. more specifically, this means it is
1114 # only issued when the client uses the new
1115 # HTTPConnection() class. backwards-compat clients
1116 # will be using HTTP/1.0 and those clients may be
1117 # issuing this header themselves. we should NOT issue
1118 # it twice; some web servers (such as Apache) barf
1119 # when they see two Host: headers
1121 # If we need a non-standard port,include it in the
1122 # header. If the request is going through a proxy,
1123 # but the host of the actual URL, not the host of the
1124 # proxy.
1126 netloc = ''
1127 if url.startswith('http'):
1128 nil, netloc, nil, nil, nil = urlsplit(url)
1130 if netloc:
1131 try:
1132 netloc_enc = netloc.encode("ascii")
1133 except UnicodeEncodeError:
1134 netloc_enc = netloc.encode("idna")
1135 self.putheader('Host', netloc_enc)
1136 else:
1137 if self._tunnel_host:
1138 host = self._tunnel_host
1139 port = self._tunnel_port
1140 else:
1141 host = self.host
1142 port = self.port
1144 try:
1145 host_enc = host.encode("ascii")
1146 except UnicodeEncodeError:
1147 host_enc = host.encode("idna")
1149 # As per RFC 273, IPv6 address should be wrapped with []
1150 # when used as Host header
1152 if host.find(':') >= 0:
1153 host_enc = b'[' + host_enc + b']'
1155 if port == self.default_port:
1156 self.putheader('Host', host_enc)
1157 else:
1158 host_enc = host_enc.decode("ascii")
1159 self.putheader('Host', "%s:%s" % (host_enc, port))
1161 # note: we are assuming that clients will not attempt to set these
1162 # headers since *this* library must deal with the
1163 # consequences. this also means that when the supporting
1164 # libraries are updated to recognize other forms, then this
1165 # code should be changed (removed or updated).
1167 # we only want a Content-Encoding of "identity" since we don't
1168 # support encodings such as x-gzip or x-deflate.
1169 if not skip_accept_encoding:
1170 self.putheader('Accept-Encoding', 'identity')
1172 # we can accept "chunked" Transfer-Encodings, but no others
1173 # NOTE: no TE header implies *only* "chunked"
1174 #self.putheader('TE', 'chunked')
1176 # if TE is supplied in the header, then it must appear in a
1177 # Connection header.
1178 #self.putheader('Connection', 'TE')
1180 else:
1181 # For HTTP/1.0, the server will assume "not chunked"
1182 pass
1184 def _encode_request(self, request):
1185 # ASCII also helps prevent CVE-2019-9740.
1186 return request.encode('ascii')
1188 def _validate_method(self, method):
1189 """Validate a method name for putrequest."""
1190 # prevent http header injection
1191 match = _contains_disallowed_method_pchar_re.search(method)
1192 if match:
1193 raise ValueError(
1194 f"method can't contain control characters. {method!r} "
1195 f"(found at least {match.group()!r})")
1197 def _validate_path(self, url):
1198 """Validate a url for putrequest."""
1199 # Prevent CVE-2019-9740.
1200 match = _contains_disallowed_url_pchar_re.search(url)
1201 if match:
1202 raise InvalidURL(f"URL can't contain control characters. {url!r} "
1203 f"(found at least {match.group()!r})")
1205 def _validate_host(self, host):
1206 """Validate a host so it doesn't contain control characters."""
1207 # Prevent CVE-2019-18348.
1208 match = _contains_disallowed_url_pchar_re.search(host)
1209 if match:
1210 raise InvalidURL(f"URL can't contain control characters. {host!r} "
1211 f"(found at least {match.group()!r})")
1213 def putheader(self, header, *values):
1214 """Send a request header line to the server.
1216 For example: h.putheader('Accept', 'text/html')
1217 """
1218 if self.__state != _CS_REQ_STARTED:
1219 raise CannotSendHeader()
1221 if hasattr(header, 'encode'):
1222 header = header.encode('ascii')
1224 if not _is_legal_header_name(header):
1225 raise ValueError('Invalid header name %r' % (header,))
1227 values = list(values)
1228 for i, one_value in enumerate(values):
1229 if hasattr(one_value, 'encode'):
1230 values[i] = one_value.encode('latin-1')
1231 elif isinstance(one_value, int):
1232 values[i] = str(one_value).encode('ascii')
1234 if _is_illegal_header_value(values[i]):
1235 raise ValueError('Invalid header value %r' % (values[i],))
1237 value = b'\r\n\t'.join(values)
1238 header = header + b': ' + value
1239 self._output(header)
1241 def endheaders(self, message_body=None, *, encode_chunked=False):
1242 """Indicate that the last header line has been sent to the server.
1244 This method sends the request to the server. The optional message_body
1245 argument can be used to pass a message body associated with the
1246 request.
1247 """
1248 if self.__state == _CS_REQ_STARTED:
1249 self.__state = _CS_REQ_SENT
1250 else:
1251 raise CannotSendHeader()
1252 self._send_output(message_body, encode_chunked=encode_chunked)
1254 def request(self, method, url, body=None, headers={}, *,
1255 encode_chunked=False):
1256 """Send a complete request to the server."""
1257 self._send_request(method, url, body, headers, encode_chunked)
1259 def _send_request(self, method, url, body, headers, encode_chunked):
1260 # Honor explicitly requested Host: and Accept-Encoding: headers.
1261 header_names = frozenset(k.lower() for k in headers)
1262 skips = {}
1263 if 'host' in header_names:
1264 skips['skip_host'] = 1
1265 if 'accept-encoding' in header_names:
1266 skips['skip_accept_encoding'] = 1
1268 self.putrequest(method, url, **skips)
1270 # chunked encoding will happen if HTTP/1.1 is used and either
1271 # the caller passes encode_chunked=True or the following
1272 # conditions hold:
1273 # 1. content-length has not been explicitly set
1274 # 2. the body is a file or iterable, but not a str or bytes-like
1275 # 3. Transfer-Encoding has NOT been explicitly set by the caller
1277 if 'content-length' not in header_names:
1278 # only chunk body if not explicitly set for backwards
1279 # compatibility, assuming the client code is already handling the
1280 # chunking
1281 if 'transfer-encoding' not in header_names:
1282 # if content-length cannot be automatically determined, fall
1283 # back to chunked encoding
1284 encode_chunked = False
1285 content_length = self._get_content_length(body, method)
1286 if content_length is None:
1287 if body is not None:
1288 if self.debuglevel > 0:
1289 print('Unable to determine size of %r' % body)
1290 encode_chunked = True
1291 self.putheader('Transfer-Encoding', 'chunked')
1292 else:
1293 self.putheader('Content-Length', str(content_length))
1294 else:
1295 encode_chunked = False
1297 for hdr, value in headers.items():
1298 self.putheader(hdr, value)
1299 if isinstance(body, str):
1300 # RFC 2616 Section 3.7.1 says that text default has a
1301 # default charset of iso-8859-1.
1302 body = _encode(body, 'body')
1303 self.endheaders(body, encode_chunked=encode_chunked)
1305 def getresponse(self):
1306 """Get the response from the server.
1308 If the HTTPConnection is in the correct state, returns an
1309 instance of HTTPResponse or of whatever object is returned by
1310 the response_class variable.
1312 If a request has not been sent or if a previous response has
1313 not be handled, ResponseNotReady is raised. If the HTTP
1314 response indicates that the connection should be closed, then
1315 it will be closed before the response is returned. When the
1316 connection is closed, the underlying socket is closed.
1317 """
1319 # if a prior response has been completed, then forget about it.
1320 if self.__response and self.__response.isclosed():
1321 self.__response = None
1323 # if a prior response exists, then it must be completed (otherwise, we
1324 # cannot read this response's header to determine the connection-close
1325 # behavior)
1326 #
1327 # note: if a prior response existed, but was connection-close, then the
1328 # socket and response were made independent of this HTTPConnection
1329 # object since a new request requires that we open a whole new
1330 # connection
1331 #
1332 # this means the prior response had one of two states:
1333 # 1) will_close: this connection was reset and the prior socket and
1334 # response operate independently
1335 # 2) persistent: the response was retained and we await its
1336 # isclosed() status to become true.
1337 #
1338 if self.__state != _CS_REQ_SENT or self.__response:
1339 raise ResponseNotReady(self.__state)
1341 if self.debuglevel > 0:
1342 response = self.response_class(self.sock, self.debuglevel,
1343 method=self._method)
1344 else:
1345 response = self.response_class(self.sock, method=self._method)
1347 try:
1348 try:
1349 response.begin()
1350 except ConnectionError:
1351 self.close()
1352 raise
1353 assert response.will_close != _UNKNOWN
1354 self.__state = _CS_IDLE
1356 if response.will_close:
1357 # this effectively passes the connection to the response
1358 self.close()
1359 else:
1360 # remember this, so we can tell when it is complete
1361 self.__response = response
1363 return response
1364 except:
1365 response.close()
1366 raise
1368try:
1369 import ssl
1370except ImportError:
1371 pass
1372else:
1373 class HTTPSConnection(HTTPConnection):
1374 "This class allows communication via SSL."
1376 default_port = HTTPS_PORT
1378 # XXX Should key_file and cert_file be deprecated in favour of context?
1380 def __init__(self, host, port=None, key_file=None, cert_file=None,
1381 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1382 source_address=None, *, context=None,
1383 check_hostname=None, blocksize=8192):
1384 super(HTTPSConnection, self).__init__(host, port, timeout,
1385 source_address,
1386 blocksize=blocksize)
1387 if (key_file is not None or cert_file is not None or
1388 check_hostname is not None):
1389 import warnings
1390 warnings.warn("key_file, cert_file and check_hostname are "
1391 "deprecated, use a custom context instead.",
1392 DeprecationWarning, 2)
1393 self.key_file = key_file
1394 self.cert_file = cert_file
1395 if context is None:
1396 context = ssl._create_default_https_context()
1397 # enable PHA for TLS 1.3 connections if available
1398 if context.post_handshake_auth is not None:
1399 context.post_handshake_auth = True
1400 will_verify = context.verify_mode != ssl.CERT_NONE
1401 if check_hostname is None:
1402 check_hostname = context.check_hostname
1403 if check_hostname and not will_verify:
1404 raise ValueError("check_hostname needs a SSL context with "
1405 "either CERT_OPTIONAL or CERT_REQUIRED")
1406 if key_file or cert_file:
1407 context.load_cert_chain(cert_file, key_file)
1408 # cert and key file means the user wants to authenticate.
1409 # enable TLS 1.3 PHA implicitly even for custom contexts.
1410 if context.post_handshake_auth is not None:
1411 context.post_handshake_auth = True
1412 self._context = context
1413 if check_hostname is not None:
1414 self._context.check_hostname = check_hostname
1416 def connect(self):
1417 "Connect to a host on a given (SSL) port."
1419 super().connect()
1421 if self._tunnel_host:
1422 server_hostname = self._tunnel_host
1423 else:
1424 server_hostname = self.host
1426 self.sock = self._context.wrap_socket(self.sock,
1427 server_hostname=server_hostname)
1429 __all__.append("HTTPSConnection")
1431class HTTPException(Exception):
1432 # Subclasses that define an __init__ must call Exception.__init__
1433 # or define self.args. Otherwise, str() will fail.
1434 pass
1436class NotConnected(HTTPException):
1437 pass
1439class InvalidURL(HTTPException):
1440 pass
1442class UnknownProtocol(HTTPException):
1443 def __init__(self, version):
1444 self.args = version,
1445 self.version = version
1447class UnknownTransferEncoding(HTTPException):
1448 pass
1450class UnimplementedFileMode(HTTPException):
1451 pass
1453class IncompleteRead(HTTPException):
1454 def __init__(self, partial, expected=None):
1455 self.args = partial,
1456 self.partial = partial
1457 self.expected = expected
1458 def __repr__(self):
1459 if self.expected is not None:
1460 e = ', %i more expected' % self.expected
1461 else:
1462 e = ''
1463 return '%s(%i bytes read%s)' % (self.__class__.__name__,
1464 len(self.partial), e)
1465 __str__ = object.__str__
1467class ImproperConnectionState(HTTPException):
1468 pass
1470class CannotSendRequest(ImproperConnectionState):
1471 pass
1473class CannotSendHeader(ImproperConnectionState):
1474 pass
1476class ResponseNotReady(ImproperConnectionState):
1477 pass
1479class BadStatusLine(HTTPException):
1480 def __init__(self, line):
1481 if not line:
1482 line = repr(line)
1483 self.args = line,
1484 self.line = line
1486class LineTooLong(HTTPException):
1487 def __init__(self, line_type):
1488 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1489 % (_MAXLINE, line_type))
1491class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1492 def __init__(self, *pos, **kw):
1493 BadStatusLine.__init__(self, "")
1494 ConnectionResetError.__init__(self, *pos, **kw)
1496# for backwards compatibility
1497error = HTTPException