1"""HTTP/1.1 client library
2
3A backport of the Python 3.3 http/client.py module for python-future.
4
5<intro stuff goes here>
6<other stuff, too>
7
8HTTPConnection goes through a number of "states", which define when a client
9may legally make another request or fetch the response for a particular
10request. This diagram details these state transitions:
11
12 (null)
13 |
14 | HTTPConnection()
15 v
16 Idle
17 |
18 | putrequest()
19 v
20 Request-started
21 |
22 | ( putheader() )* endheaders()
23 v
24 Request-sent
25 |
26 | response = getresponse()
27 v
28 Unread-response [Response-headers-read]
29 |\____________________
30 | |
31 | response.read() | putrequest()
32 v v
33 Idle Req-started-unread-response
34 ______/|
35 / |
36 response.read() | | ( putheader() )* endheaders()
37 v v
38 Request-started Req-sent-unread-response
39 |
40 | response.read()
41 v
42 Request-sent
43
44This diagram presents the following rules:
45 -- a second request may not be started until {response-headers-read}
46 -- a response [object] cannot be retrieved until {request-sent}
47 -- there is no differentiation between an unread response body and a
48 partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51 HTTPResponse class does not enforce this state machine, which
52 implies sophisticated clients may accelerate the request/response
53 pipeline. Caution should be taken, though: accelerating the states
54 beyond the above pattern may imply knowledge of the server's
55 connection-close behavior for certain requests. For example, it
56 is impossible to tell whether the server will close the connection
57 UNTIL the response headers have been read; this means that further
58 requests cannot be placed into the pipeline until it is known that
59 the server will NOT be closing the connection.
60
61Logical State __state __response
62------------- ------- ----------
63Idle _CS_IDLE None
64Request-started _CS_REQ_STARTED None
65Request-sent _CS_REQ_SENT None
66Unread-response _CS_IDLE <response_class>
67Req-started-unread-response _CS_REQ_STARTED <response_class>
68Req-sent-unread-response _CS_REQ_SENT <response_class>
69"""
70
71from __future__ import (absolute_import, division,
72 print_function, unicode_literals)
73from future.builtins import bytes, int, str, super
74from future.utils import PY2
75
76from future.backports.email import parser as email_parser
77from future.backports.email import message as email_message
78from future.backports.misc import create_connection as socket_create_connection
79import io
80import os
81import socket
82from future.backports.urllib.parse import urlsplit
83import warnings
84from array import array
85
86if PY2:
87 from collections import Iterable
88else:
89 from collections.abc import Iterable
90
91__all__ = ["HTTPResponse", "HTTPConnection",
92 "HTTPException", "NotConnected", "UnknownProtocol",
93 "UnknownTransferEncoding", "UnimplementedFileMode",
94 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
95 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
96 "BadStatusLine", "error", "responses"]
97
98HTTP_PORT = 80
99HTTPS_PORT = 443
100
101_UNKNOWN = 'UNKNOWN'
102
103# connection states
104_CS_IDLE = 'Idle'
105_CS_REQ_STARTED = 'Request-started'
106_CS_REQ_SENT = 'Request-sent'
107
108# status codes
109# informational
110CONTINUE = 100
111SWITCHING_PROTOCOLS = 101
112PROCESSING = 102
113
114# successful
115OK = 200
116CREATED = 201
117ACCEPTED = 202
118NON_AUTHORITATIVE_INFORMATION = 203
119NO_CONTENT = 204
120RESET_CONTENT = 205
121PARTIAL_CONTENT = 206
122MULTI_STATUS = 207
123IM_USED = 226
124
125# redirection
126MULTIPLE_CHOICES = 300
127MOVED_PERMANENTLY = 301
128FOUND = 302
129SEE_OTHER = 303
130NOT_MODIFIED = 304
131USE_PROXY = 305
132TEMPORARY_REDIRECT = 307
133
134# client error
135BAD_REQUEST = 400
136UNAUTHORIZED = 401
137PAYMENT_REQUIRED = 402
138FORBIDDEN = 403
139NOT_FOUND = 404
140METHOD_NOT_ALLOWED = 405
141NOT_ACCEPTABLE = 406
142PROXY_AUTHENTICATION_REQUIRED = 407
143REQUEST_TIMEOUT = 408
144CONFLICT = 409
145GONE = 410
146LENGTH_REQUIRED = 411
147PRECONDITION_FAILED = 412
148REQUEST_ENTITY_TOO_LARGE = 413
149REQUEST_URI_TOO_LONG = 414
150UNSUPPORTED_MEDIA_TYPE = 415
151REQUESTED_RANGE_NOT_SATISFIABLE = 416
152EXPECTATION_FAILED = 417
153UNPROCESSABLE_ENTITY = 422
154LOCKED = 423
155FAILED_DEPENDENCY = 424
156UPGRADE_REQUIRED = 426
157PRECONDITION_REQUIRED = 428
158TOO_MANY_REQUESTS = 429
159REQUEST_HEADER_FIELDS_TOO_LARGE = 431
160
161# server error
162INTERNAL_SERVER_ERROR = 500
163NOT_IMPLEMENTED = 501
164BAD_GATEWAY = 502
165SERVICE_UNAVAILABLE = 503
166GATEWAY_TIMEOUT = 504
167HTTP_VERSION_NOT_SUPPORTED = 505
168INSUFFICIENT_STORAGE = 507
169NOT_EXTENDED = 510
170NETWORK_AUTHENTICATION_REQUIRED = 511
171
172# Mapping status codes to official W3C names
173responses = {
174 100: 'Continue',
175 101: 'Switching Protocols',
176
177 200: 'OK',
178 201: 'Created',
179 202: 'Accepted',
180 203: 'Non-Authoritative Information',
181 204: 'No Content',
182 205: 'Reset Content',
183 206: 'Partial Content',
184
185 300: 'Multiple Choices',
186 301: 'Moved Permanently',
187 302: 'Found',
188 303: 'See Other',
189 304: 'Not Modified',
190 305: 'Use Proxy',
191 306: '(Unused)',
192 307: 'Temporary Redirect',
193
194 400: 'Bad Request',
195 401: 'Unauthorized',
196 402: 'Payment Required',
197 403: 'Forbidden',
198 404: 'Not Found',
199 405: 'Method Not Allowed',
200 406: 'Not Acceptable',
201 407: 'Proxy Authentication Required',
202 408: 'Request Timeout',
203 409: 'Conflict',
204 410: 'Gone',
205 411: 'Length Required',
206 412: 'Precondition Failed',
207 413: 'Request Entity Too Large',
208 414: 'Request-URI Too Long',
209 415: 'Unsupported Media Type',
210 416: 'Requested Range Not Satisfiable',
211 417: 'Expectation Failed',
212 428: 'Precondition Required',
213 429: 'Too Many Requests',
214 431: 'Request Header Fields Too Large',
215
216 500: 'Internal Server Error',
217 501: 'Not Implemented',
218 502: 'Bad Gateway',
219 503: 'Service Unavailable',
220 504: 'Gateway Timeout',
221 505: 'HTTP Version Not Supported',
222 511: 'Network Authentication Required',
223}
224
225# maximal amount of data to read at one time in _safe_read
226MAXAMOUNT = 1048576
227
228# maximal line length when calling readline().
229_MAXLINE = 65536
230_MAXHEADERS = 100
231
232
233class HTTPMessage(email_message.Message):
234 # XXX The only usage of this method is in
235 # http.server.CGIHTTPRequestHandler. Maybe move the code there so
236 # that it doesn't need to be part of the public API. The API has
237 # never been defined so this could cause backwards compatibility
238 # issues.
239
240 def getallmatchingheaders(self, name):
241 """Find all header lines matching a given header name.
242
243 Look through the list of headers and find all lines matching a given
244 header name (and their continuation lines). A list of the lines is
245 returned, without interpretation. If the header does not occur, an
246 empty list is returned. If the header occurs multiple times, all
247 occurrences are returned. Case is not important in the header name.
248
249 """
250 name = name.lower() + ':'
251 n = len(name)
252 lst = []
253 hit = 0
254 for line in self.keys():
255 if line[:n].lower() == name:
256 hit = 1
257 elif not line[:1].isspace():
258 hit = 0
259 if hit:
260 lst.append(line)
261 return lst
262
263def parse_headers(fp, _class=HTTPMessage):
264 """Parses only RFC2822 headers from a file pointer.
265
266 email Parser wants to see strings rather than bytes.
267 But a TextIOWrapper around self.rfile would buffer too many bytes
268 from the stream, bytes which we later need to read as bytes.
269 So we read the correct bytes here, as bytes, for email Parser
270 to parse.
271
272 """
273 headers = []
274 while True:
275 line = fp.readline(_MAXLINE + 1)
276 if len(line) > _MAXLINE:
277 raise LineTooLong("header line")
278 headers.append(line)
279 if len(headers) > _MAXHEADERS:
280 raise HTTPException("got more than %d headers" % _MAXHEADERS)
281 if line in (b'\r\n', b'\n', b''):
282 break
283 hstring = bytes(b'').join(headers).decode('iso-8859-1')
284 return email_parser.Parser(_class=_class).parsestr(hstring)
285
286
287_strict_sentinel = object()
288
289class HTTPResponse(io.RawIOBase):
290
291 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
292
293 # The bytes from the socket object are iso-8859-1 strings.
294 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
295 # text following RFC 2047. The basic status line parsing only
296 # accepts iso-8859-1.
297
298 def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None):
299 # If the response includes a content-length header, we need to
300 # make sure that the client doesn't read more than the
301 # specified number of bytes. If it does, it will block until
302 # the server times out and closes the connection. This will
303 # happen if a self.fp.read() is done (without a size) whether
304 # self.fp is buffered or not. So, no self.fp.read() by
305 # clients unless they know what they are doing.
306 self.fp = sock.makefile("rb")
307 self.debuglevel = debuglevel
308 if strict is not _strict_sentinel:
309 warnings.warn("the 'strict' argument isn't supported anymore; "
310 "http.client now always assumes HTTP/1.x compliant servers.",
311 DeprecationWarning, 2)
312 self._method = method
313
314 # The HTTPResponse object is returned via urllib. The clients
315 # of http and urllib expect different attributes for the
316 # headers. headers is used here and supports urllib. msg is
317 # provided as a backwards compatibility layer for http
318 # clients.
319
320 self.headers = self.msg = None
321
322 # from the Status-Line of the response
323 self.version = _UNKNOWN # HTTP-Version
324 self.status = _UNKNOWN # Status-Code
325 self.reason = _UNKNOWN # Reason-Phrase
326
327 self.chunked = _UNKNOWN # is "chunked" being used?
328 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
329 self.length = _UNKNOWN # number of bytes left in response
330 self.will_close = _UNKNOWN # conn will close at end of response
331
332 def _read_status(self):
333 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
334 if len(line) > _MAXLINE:
335 raise LineTooLong("status line")
336 if self.debuglevel > 0:
337 print("reply:", repr(line))
338 if not line:
339 # Presumably, the server closed the connection before
340 # sending a valid response.
341 raise BadStatusLine(line)
342 try:
343 version, status, reason = line.split(None, 2)
344 except ValueError:
345 try:
346 version, status = line.split(None, 1)
347 reason = ""
348 except ValueError:
349 # empty version will cause next test to fail.
350 version = ""
351 if not version.startswith("HTTP/"):
352 self._close_conn()
353 raise BadStatusLine(line)
354
355 # The status code is a three-digit number
356 try:
357 status = int(status)
358 if status < 100 or status > 999:
359 raise BadStatusLine(line)
360 except ValueError:
361 raise BadStatusLine(line)
362 return version, status, reason
363
364 def begin(self):
365 if self.headers is not None:
366 # we've already started reading the response
367 return
368
369 # read until we get a non-100 response
370 while True:
371 version, status, reason = self._read_status()
372 if status != CONTINUE:
373 break
374 # skip the header from the 100 response
375 while True:
376 skip = self.fp.readline(_MAXLINE + 1)
377 if len(skip) > _MAXLINE:
378 raise LineTooLong("header line")
379 skip = skip.strip()
380 if not skip:
381 break
382 if self.debuglevel > 0:
383 print("header:", skip)
384
385 self.code = self.status = status
386 self.reason = reason.strip()
387 if version in ("HTTP/1.0", "HTTP/0.9"):
388 # Some servers might still return "0.9", treat it as 1.0 anyway
389 self.version = 10
390 elif version.startswith("HTTP/1."):
391 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
392 else:
393 raise UnknownProtocol(version)
394
395 self.headers = self.msg = parse_headers(self.fp)
396
397 if self.debuglevel > 0:
398 for hdr in self.headers:
399 print("header:", hdr, end=" ")
400
401 # are we using the chunked-style of transfer encoding?
402 tr_enc = self.headers.get("transfer-encoding")
403 if tr_enc and tr_enc.lower() == "chunked":
404 self.chunked = True
405 self.chunk_left = None
406 else:
407 self.chunked = False
408
409 # will the connection close at the end of the response?
410 self.will_close = self._check_close()
411
412 # do we have a Content-Length?
413 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
414 self.length = None
415 length = self.headers.get("content-length")
416
417 # are we using the chunked-style of transfer encoding?
418 tr_enc = self.headers.get("transfer-encoding")
419 if length and not self.chunked:
420 try:
421 self.length = int(length)
422 except ValueError:
423 self.length = None
424 else:
425 if self.length < 0: # ignore nonsensical negative lengths
426 self.length = None
427 else:
428 self.length = None
429
430 # does the body have a fixed length? (of zero)
431 if (status == NO_CONTENT or status == NOT_MODIFIED or
432 100 <= status < 200 or # 1xx codes
433 self._method == "HEAD"):
434 self.length = 0
435
436 # if the connection remains open, and we aren't using chunked, and
437 # a content-length was not provided, then assume that the connection
438 # WILL close.
439 if (not self.will_close and
440 not self.chunked and
441 self.length is None):
442 self.will_close = True
443
444 def _check_close(self):
445 conn = self.headers.get("connection")
446 if self.version == 11:
447 # An HTTP/1.1 proxy is assumed to stay open unless
448 # explicitly closed.
449 conn = self.headers.get("connection")
450 if conn and "close" in conn.lower():
451 return True
452 return False
453
454 # Some HTTP/1.0 implementations have support for persistent
455 # connections, using rules different than HTTP/1.1.
456
457 # For older HTTP, Keep-Alive indicates persistent connection.
458 if self.headers.get("keep-alive"):
459 return False
460
461 # At least Akamai returns a "Connection: Keep-Alive" header,
462 # which was supposed to be sent by the client.
463 if conn and "keep-alive" in conn.lower():
464 return False
465
466 # Proxy-Connection is a netscape hack.
467 pconn = self.headers.get("proxy-connection")
468 if pconn and "keep-alive" in pconn.lower():
469 return False
470
471 # otherwise, assume it will close
472 return True
473
474 def _close_conn(self):
475 fp = self.fp
476 self.fp = None
477 fp.close()
478
479 def close(self):
480 super().close() # set "closed" flag
481 if self.fp:
482 self._close_conn()
483
484 # These implementations are for the benefit of io.BufferedReader.
485
486 # XXX This class should probably be revised to act more like
487 # the "raw stream" that BufferedReader expects.
488
489 def flush(self):
490 super().flush()
491 if self.fp:
492 self.fp.flush()
493
494 def readable(self):
495 return True
496
497 # End of "raw stream" methods
498
499 def isclosed(self):
500 """True if the connection is closed."""
501 # NOTE: it is possible that we will not ever call self.close(). This
502 # case occurs when will_close is TRUE, length is None, and we
503 # read up to the last byte, but NOT past it.
504 #
505 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
506 # called, meaning self.isclosed() is meaningful.
507 return self.fp is None
508
509 def read(self, amt=None):
510 if self.fp is None:
511 return bytes(b"")
512
513 if self._method == "HEAD":
514 self._close_conn()
515 return bytes(b"")
516
517 if amt is not None:
518 # Amount is given, so call base class version
519 # (which is implemented in terms of self.readinto)
520 return bytes(super(HTTPResponse, self).read(amt))
521 else:
522 # Amount is not given (unbounded read) so we must check self.length
523 # and self.chunked
524
525 if self.chunked:
526 return self._readall_chunked()
527
528 if self.length is None:
529 s = self.fp.read()
530 else:
531 try:
532 s = self._safe_read(self.length)
533 except IncompleteRead:
534 self._close_conn()
535 raise
536 self.length = 0
537 self._close_conn() # we read everything
538 return bytes(s)
539
540 def readinto(self, b):
541 if self.fp is None:
542 return 0
543
544 if self._method == "HEAD":
545 self._close_conn()
546 return 0
547
548 if self.chunked:
549 return self._readinto_chunked(b)
550
551 if self.length is not None:
552 if len(b) > self.length:
553 # clip the read to the "end of response"
554 b = memoryview(b)[0:self.length]
555
556 # we do not use _safe_read() here because this may be a .will_close
557 # connection, and the user is reading more bytes than will be provided
558 # (for example, reading in 1k chunks)
559
560 if PY2:
561 data = self.fp.read(len(b))
562 n = len(data)
563 b[:n] = data
564 else:
565 n = self.fp.readinto(b)
566
567 if not n and b:
568 # Ideally, we would raise IncompleteRead if the content-length
569 # wasn't satisfied, but it might break compatibility.
570 self._close_conn()
571 elif self.length is not None:
572 self.length -= n
573 if not self.length:
574 self._close_conn()
575 return n
576
577 def _read_next_chunk_size(self):
578 # Read the next chunk size from the file
579 line = self.fp.readline(_MAXLINE + 1)
580 if len(line) > _MAXLINE:
581 raise LineTooLong("chunk size")
582 i = line.find(b";")
583 if i >= 0:
584 line = line[:i] # strip chunk-extensions
585 try:
586 return int(line, 16)
587 except ValueError:
588 # close the connection as protocol synchronisation is
589 # probably lost
590 self._close_conn()
591 raise
592
593 def _read_and_discard_trailer(self):
594 # read and discard trailer up to the CRLF terminator
595 ### note: we shouldn't have any trailers!
596 while True:
597 line = self.fp.readline(_MAXLINE + 1)
598 if len(line) > _MAXLINE:
599 raise LineTooLong("trailer line")
600 if not line:
601 # a vanishingly small number of sites EOF without
602 # sending the trailer
603 break
604 if line in (b'\r\n', b'\n', b''):
605 break
606
607 def _readall_chunked(self):
608 assert self.chunked != _UNKNOWN
609 chunk_left = self.chunk_left
610 value = []
611 while True:
612 if chunk_left is None:
613 try:
614 chunk_left = self._read_next_chunk_size()
615 if chunk_left == 0:
616 break
617 except ValueError:
618 raise IncompleteRead(bytes(b'').join(value))
619 value.append(self._safe_read(chunk_left))
620
621 # we read the whole chunk, get another
622 self._safe_read(2) # toss the CRLF at the end of the chunk
623 chunk_left = None
624
625 self._read_and_discard_trailer()
626
627 # we read everything; close the "file"
628 self._close_conn()
629
630 return bytes(b'').join(value)
631
632 def _readinto_chunked(self, b):
633 assert self.chunked != _UNKNOWN
634 chunk_left = self.chunk_left
635
636 total_bytes = 0
637 mvb = memoryview(b)
638 while True:
639 if chunk_left is None:
640 try:
641 chunk_left = self._read_next_chunk_size()
642 if chunk_left == 0:
643 break
644 except ValueError:
645 raise IncompleteRead(bytes(b[0:total_bytes]))
646
647 if len(mvb) < chunk_left:
648 n = self._safe_readinto(mvb)
649 self.chunk_left = chunk_left - n
650 return total_bytes + n
651 elif len(mvb) == chunk_left:
652 n = self._safe_readinto(mvb)
653 self._safe_read(2) # toss the CRLF at the end of the chunk
654 self.chunk_left = None
655 return total_bytes + n
656 else:
657 temp_mvb = mvb[0:chunk_left]
658 n = self._safe_readinto(temp_mvb)
659 mvb = mvb[n:]
660 total_bytes += n
661
662 # we read the whole chunk, get another
663 self._safe_read(2) # toss the CRLF at the end of the chunk
664 chunk_left = None
665
666 self._read_and_discard_trailer()
667
668 # we read everything; close the "file"
669 self._close_conn()
670
671 return total_bytes
672
673 def _safe_read(self, amt):
674 """Read the number of bytes requested, compensating for partial reads.
675
676 Normally, we have a blocking socket, but a read() can be interrupted
677 by a signal (resulting in a partial read).
678
679 Note that we cannot distinguish between EOF and an interrupt when zero
680 bytes have been read. IncompleteRead() will be raised in this
681 situation.
682
683 This function should be used when <amt> bytes "should" be present for
684 reading. If the bytes are truly not available (due to EOF), then the
685 IncompleteRead exception can be used to detect the problem.
686 """
687 s = []
688 while amt > 0:
689 chunk = self.fp.read(min(amt, MAXAMOUNT))
690 if not chunk:
691 raise IncompleteRead(bytes(b'').join(s), amt)
692 s.append(chunk)
693 amt -= len(chunk)
694 return bytes(b"").join(s)
695
696 def _safe_readinto(self, b):
697 """Same as _safe_read, but for reading into a buffer."""
698 total_bytes = 0
699 mvb = memoryview(b)
700 while total_bytes < len(b):
701 if MAXAMOUNT < len(mvb):
702 temp_mvb = mvb[0:MAXAMOUNT]
703 if PY2:
704 data = self.fp.read(len(temp_mvb))
705 n = len(data)
706 temp_mvb[:n] = data
707 else:
708 n = self.fp.readinto(temp_mvb)
709 else:
710 if PY2:
711 data = self.fp.read(len(mvb))
712 n = len(data)
713 mvb[:n] = data
714 else:
715 n = self.fp.readinto(mvb)
716 if not n:
717 raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
718 mvb = mvb[n:]
719 total_bytes += n
720 return total_bytes
721
722 def fileno(self):
723 return self.fp.fileno()
724
725 def getheader(self, name, default=None):
726 if self.headers is None:
727 raise ResponseNotReady()
728 headers = self.headers.get_all(name) or default
729 if isinstance(headers, str) or not hasattr(headers, '__iter__'):
730 return headers
731 else:
732 return ', '.join(headers)
733
734 def getheaders(self):
735 """Return list of (header, value) tuples."""
736 if self.headers is None:
737 raise ResponseNotReady()
738 return list(self.headers.items())
739
740 # We override IOBase.__iter__ so that it doesn't check for closed-ness
741
742 def __iter__(self):
743 return self
744
745 # For compatibility with old-style urllib responses.
746
747 def info(self):
748 return self.headers
749
750 def geturl(self):
751 return self.url
752
753 def getcode(self):
754 return self.status
755
756class HTTPConnection(object):
757
758 _http_vsn = 11
759 _http_vsn_str = 'HTTP/1.1'
760
761 response_class = HTTPResponse
762 default_port = HTTP_PORT
763 auto_open = 1
764 debuglevel = 0
765
766 def __init__(self, host, port=None, strict=_strict_sentinel,
767 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
768 if strict is not _strict_sentinel:
769 warnings.warn("the 'strict' argument isn't supported anymore; "
770 "http.client now always assumes HTTP/1.x compliant servers.",
771 DeprecationWarning, 2)
772 self.timeout = timeout
773 self.source_address = source_address
774 self.sock = None
775 self._buffer = []
776 self.__response = None
777 self.__state = _CS_IDLE
778 self._method = None
779 self._tunnel_host = None
780 self._tunnel_port = None
781 self._tunnel_headers = {}
782
783 self._set_hostport(host, port)
784
785 def set_tunnel(self, host, port=None, headers=None):
786 """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
787
788 The headers argument should be a mapping of extra HTTP headers
789 to send with the CONNECT request.
790 """
791 self._tunnel_host = host
792 self._tunnel_port = port
793 if headers:
794 self._tunnel_headers = headers
795 else:
796 self._tunnel_headers.clear()
797
798 def _set_hostport(self, host, port):
799 if port is None:
800 i = host.rfind(':')
801 j = host.rfind(']') # ipv6 addresses have [...]
802 if i > j:
803 try:
804 port = int(host[i+1:])
805 except ValueError:
806 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
807 port = self.default_port
808 else:
809 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
810 host = host[:i]
811 else:
812 port = self.default_port
813 if host and host[0] == '[' and host[-1] == ']':
814 host = host[1:-1]
815 self.host = host
816 self.port = port
817
818 def set_debuglevel(self, level):
819 self.debuglevel = level
820
821 def _tunnel(self):
822 self._set_hostport(self._tunnel_host, self._tunnel_port)
823 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)
824 connect_bytes = connect_str.encode("ascii")
825 self.send(connect_bytes)
826 for header, value in self._tunnel_headers.items():
827 header_str = "%s: %s\r\n" % (header, value)
828 header_bytes = header_str.encode("latin-1")
829 self.send(header_bytes)
830 self.send(bytes(b'\r\n'))
831
832 response = self.response_class(self.sock, method=self._method)
833 (version, code, message) = response._read_status()
834
835 if code != 200:
836 self.close()
837 raise socket.error("Tunnel connection failed: %d %s" % (code,
838 message.strip()))
839 while True:
840 line = response.fp.readline(_MAXLINE + 1)
841 if len(line) > _MAXLINE:
842 raise LineTooLong("header line")
843 if not line:
844 # for sites which EOF without sending a trailer
845 break
846 if line in (b'\r\n', b'\n', b''):
847 break
848
849 def connect(self):
850 """Connect to the host and port specified in __init__."""
851 self.sock = socket_create_connection((self.host,self.port),
852 self.timeout, self.source_address)
853 if self._tunnel_host:
854 self._tunnel()
855
856 def close(self):
857 """Close the connection to the HTTP server."""
858 if self.sock:
859 self.sock.close() # close it manually... there may be other refs
860 self.sock = None
861 if self.__response:
862 self.__response.close()
863 self.__response = None
864 self.__state = _CS_IDLE
865
866 def send(self, data):
867 """Send `data' to the server.
868 ``data`` can be a string object, a bytes object, an array object, a
869 file-like object that supports a .read() method, or an iterable object.
870 """
871
872 if self.sock is None:
873 if self.auto_open:
874 self.connect()
875 else:
876 raise NotConnected()
877
878 if self.debuglevel > 0:
879 print("send:", repr(data))
880 blocksize = 8192
881 # Python 2.7 array objects have a read method which is incompatible
882 # with the 2-arg calling syntax below.
883 if hasattr(data, "read") and not isinstance(data, array):
884 if self.debuglevel > 0:
885 print("sendIng a read()able")
886 encode = False
887 try:
888 mode = data.mode
889 except AttributeError:
890 # io.BytesIO and other file-like objects don't have a `mode`
891 # attribute.
892 pass
893 else:
894 if "b" not in mode:
895 encode = True
896 if self.debuglevel > 0:
897 print("encoding file using iso-8859-1")
898 while 1:
899 datablock = data.read(blocksize)
900 if not datablock:
901 break
902 if encode:
903 datablock = datablock.encode("iso-8859-1")
904 self.sock.sendall(datablock)
905 return
906 try:
907 self.sock.sendall(data)
908 except TypeError:
909 if isinstance(data, Iterable):
910 for d in data:
911 self.sock.sendall(d)
912 else:
913 raise TypeError("data should be a bytes-like object "
914 "or an iterable, got %r" % type(data))
915
916 def _output(self, s):
917 """Add a line of output to the current request buffer.
918
919 Assumes that the line does *not* end with \\r\\n.
920 """
921 self._buffer.append(s)
922
923 def _send_output(self, message_body=None):
924 """Send the currently buffered request and clear the buffer.
925
926 Appends an extra \\r\\n to the buffer.
927 A message_body may be specified, to be appended to the request.
928 """
929 self._buffer.extend((bytes(b""), bytes(b"")))
930 msg = bytes(b"\r\n").join(self._buffer)
931 del self._buffer[:]
932 # If msg and message_body are sent in a single send() call,
933 # it will avoid performance problems caused by the interaction
934 # between delayed ack and the Nagle algorithm.
935 if isinstance(message_body, bytes):
936 msg += message_body
937 message_body = None
938 self.send(msg)
939 if message_body is not None:
940 # message_body was not a string (i.e. it is a file), and
941 # we must run the risk of Nagle.
942 self.send(message_body)
943
944 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
945 """Send a request to the server.
946
947 `method' specifies an HTTP request method, e.g. 'GET'.
948 `url' specifies the object being requested, e.g. '/index.html'.
949 `skip_host' if True does not add automatically a 'Host:' header
950 `skip_accept_encoding' if True does not add automatically an
951 'Accept-Encoding:' header
952 """
953
954 # if a prior response has been completed, then forget about it.
955 if self.__response and self.__response.isclosed():
956 self.__response = None
957
958
959 # in certain cases, we cannot issue another request on this connection.
960 # this occurs when:
961 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
962 # 2) a response to a previous request has signalled that it is going
963 # to close the connection upon completion.
964 # 3) the headers for the previous response have not been read, thus
965 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
966 #
967 # if there is no prior response, then we can request at will.
968 #
969 # if point (2) is true, then we will have passed the socket to the
970 # response (effectively meaning, "there is no prior response"), and
971 # will open a new one when a new request is made.
972 #
973 # Note: if a prior response exists, then we *can* start a new request.
974 # We are not allowed to begin fetching the response to this new
975 # request, however, until that prior response is complete.
976 #
977 if self.__state == _CS_IDLE:
978 self.__state = _CS_REQ_STARTED
979 else:
980 raise CannotSendRequest(self.__state)
981
982 # Save the method we use, we need it later in the response phase
983 self._method = method
984 if not url:
985 url = '/'
986 request = '%s %s %s' % (method, url, self._http_vsn_str)
987
988 # Non-ASCII characters should have been eliminated earlier
989 self._output(request.encode('ascii'))
990
991 if self._http_vsn == 11:
992 # Issue some standard headers for better HTTP/1.1 compliance
993
994 if not skip_host:
995 # this header is issued *only* for HTTP/1.1
996 # connections. more specifically, this means it is
997 # only issued when the client uses the new
998 # HTTPConnection() class. backwards-compat clients
999 # will be using HTTP/1.0 and those clients may be
1000 # issuing this header themselves. we should NOT issue
1001 # it twice; some web servers (such as Apache) barf
1002 # when they see two Host: headers
1003
1004 # If we need a non-standard port,include it in the
1005 # header. If the request is going through a proxy,
1006 # but the host of the actual URL, not the host of the
1007 # proxy.
1008
1009 netloc = ''
1010 if url.startswith('http'):
1011 nil, netloc, nil, nil, nil = urlsplit(url)
1012
1013 if netloc:
1014 try:
1015 netloc_enc = netloc.encode("ascii")
1016 except UnicodeEncodeError:
1017 netloc_enc = netloc.encode("idna")
1018 self.putheader('Host', netloc_enc)
1019 else:
1020 try:
1021 host_enc = self.host.encode("ascii")
1022 except UnicodeEncodeError:
1023 host_enc = self.host.encode("idna")
1024
1025 # As per RFC 273, IPv6 address should be wrapped with []
1026 # when used as Host header
1027
1028 if self.host.find(':') >= 0:
1029 host_enc = bytes(b'[' + host_enc + b']')
1030
1031 if self.port == self.default_port:
1032 self.putheader('Host', host_enc)
1033 else:
1034 host_enc = host_enc.decode("ascii")
1035 self.putheader('Host', "%s:%s" % (host_enc, self.port))
1036
1037 # note: we are assuming that clients will not attempt to set these
1038 # headers since *this* library must deal with the
1039 # consequences. this also means that when the supporting
1040 # libraries are updated to recognize other forms, then this
1041 # code should be changed (removed or updated).
1042
1043 # we only want a Content-Encoding of "identity" since we don't
1044 # support encodings such as x-gzip or x-deflate.
1045 if not skip_accept_encoding:
1046 self.putheader('Accept-Encoding', 'identity')
1047
1048 # we can accept "chunked" Transfer-Encodings, but no others
1049 # NOTE: no TE header implies *only* "chunked"
1050 #self.putheader('TE', 'chunked')
1051
1052 # if TE is supplied in the header, then it must appear in a
1053 # Connection header.
1054 #self.putheader('Connection', 'TE')
1055
1056 else:
1057 # For HTTP/1.0, the server will assume "not chunked"
1058 pass
1059
1060 def putheader(self, header, *values):
1061 """Send a request header line to the server.
1062
1063 For example: h.putheader('Accept', 'text/html')
1064 """
1065 if self.__state != _CS_REQ_STARTED:
1066 raise CannotSendHeader()
1067
1068 if hasattr(header, 'encode'):
1069 header = header.encode('ascii')
1070 values = list(values)
1071 for i, one_value in enumerate(values):
1072 if hasattr(one_value, 'encode'):
1073 values[i] = one_value.encode('latin-1')
1074 elif isinstance(one_value, int):
1075 values[i] = str(one_value).encode('ascii')
1076 value = bytes(b'\r\n\t').join(values)
1077 header = header + bytes(b': ') + value
1078 self._output(header)
1079
1080 def endheaders(self, message_body=None):
1081 """Indicate that the last header line has been sent to the server.
1082
1083 This method sends the request to the server. The optional message_body
1084 argument can be used to pass a message body associated with the
1085 request. The message body will be sent in the same packet as the
1086 message headers if it is a string, otherwise it is sent as a separate
1087 packet.
1088 """
1089 if self.__state == _CS_REQ_STARTED:
1090 self.__state = _CS_REQ_SENT
1091 else:
1092 raise CannotSendHeader()
1093 self._send_output(message_body)
1094
1095 def request(self, method, url, body=None, headers={}):
1096 """Send a complete request to the server."""
1097 self._send_request(method, url, body, headers)
1098
1099 def _set_content_length(self, body):
1100 # Set the content-length based on the body.
1101 thelen = None
1102 try:
1103 thelen = str(len(body))
1104 except TypeError as te:
1105 # If this is a file-like object, try to
1106 # fstat its file descriptor
1107 try:
1108 thelen = str(os.fstat(body.fileno()).st_size)
1109 except (AttributeError, OSError):
1110 # Don't send a length if this failed
1111 if self.debuglevel > 0: print("Cannot stat!!")
1112
1113 if thelen is not None:
1114 self.putheader('Content-Length', thelen)
1115
1116 def _send_request(self, method, url, body, headers):
1117 # Honor explicitly requested Host: and Accept-Encoding: headers.
1118 header_names = dict.fromkeys([k.lower() for k in headers])
1119 skips = {}
1120 if 'host' in header_names:
1121 skips['skip_host'] = 1
1122 if 'accept-encoding' in header_names:
1123 skips['skip_accept_encoding'] = 1
1124
1125 self.putrequest(method, url, **skips)
1126
1127 if body is not None and ('content-length' not in header_names):
1128 self._set_content_length(body)
1129 for hdr, value in headers.items():
1130 self.putheader(hdr, value)
1131 if isinstance(body, str):
1132 # RFC 2616 Section 3.7.1 says that text default has a
1133 # default charset of iso-8859-1.
1134 body = body.encode('iso-8859-1')
1135 self.endheaders(body)
1136
1137 def getresponse(self):
1138 """Get the response from the server.
1139
1140 If the HTTPConnection is in the correct state, returns an
1141 instance of HTTPResponse or of whatever object is returned by
1142 class the response_class variable.
1143
1144 If a request has not been sent or if a previous response has
1145 not be handled, ResponseNotReady is raised. If the HTTP
1146 response indicates that the connection should be closed, then
1147 it will be closed before the response is returned. When the
1148 connection is closed, the underlying socket is closed.
1149 """
1150
1151 # if a prior response has been completed, then forget about it.
1152 if self.__response and self.__response.isclosed():
1153 self.__response = None
1154
1155 # if a prior response exists, then it must be completed (otherwise, we
1156 # cannot read this response's header to determine the connection-close
1157 # behavior)
1158 #
1159 # note: if a prior response existed, but was connection-close, then the
1160 # socket and response were made independent of this HTTPConnection
1161 # object since a new request requires that we open a whole new
1162 # connection
1163 #
1164 # this means the prior response had one of two states:
1165 # 1) will_close: this connection was reset and the prior socket and
1166 # response operate independently
1167 # 2) persistent: the response was retained and we await its
1168 # isclosed() status to become true.
1169 #
1170 if self.__state != _CS_REQ_SENT or self.__response:
1171 raise ResponseNotReady(self.__state)
1172
1173 if self.debuglevel > 0:
1174 response = self.response_class(self.sock, self.debuglevel,
1175 method=self._method)
1176 else:
1177 response = self.response_class(self.sock, method=self._method)
1178
1179 response.begin()
1180 assert response.will_close != _UNKNOWN
1181 self.__state = _CS_IDLE
1182
1183 if response.will_close:
1184 # this effectively passes the connection to the response
1185 self.close()
1186 else:
1187 # remember this, so we can tell when it is complete
1188 self.__response = response
1189
1190 return response
1191
1192try:
1193 import ssl
1194 from ssl import SSLContext
1195except ImportError:
1196 pass
1197else:
1198 class HTTPSConnection(HTTPConnection):
1199 "This class allows communication via SSL."
1200
1201 default_port = HTTPS_PORT
1202
1203 # XXX Should key_file and cert_file be deprecated in favour of context?
1204
1205 def __init__(self, host, port=None, key_file=None, cert_file=None,
1206 strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1207 source_address=None, **_3to2kwargs):
1208 if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname']
1209 else: check_hostname = None
1210 if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context']
1211 else: context = None
1212 super(HTTPSConnection, self).__init__(host, port, strict, timeout,
1213 source_address)
1214 self.key_file = key_file
1215 self.cert_file = cert_file
1216 if context is None:
1217 # Some reasonable defaults
1218 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
1219 context.options |= ssl.OP_NO_SSLv2
1220 will_verify = context.verify_mode != ssl.CERT_NONE
1221 if check_hostname is None:
1222 check_hostname = will_verify
1223 elif check_hostname and not will_verify:
1224 raise ValueError("check_hostname needs a SSL context with "
1225 "either CERT_OPTIONAL or CERT_REQUIRED")
1226 if key_file or cert_file:
1227 context.load_cert_chain(cert_file, key_file)
1228 self._context = context
1229 self._check_hostname = check_hostname
1230
1231 def connect(self):
1232 "Connect to a host on a given (SSL) port."
1233
1234 sock = socket_create_connection((self.host, self.port),
1235 self.timeout, self.source_address)
1236
1237 if self._tunnel_host:
1238 self.sock = sock
1239 self._tunnel()
1240
1241 server_hostname = self.host if ssl.HAS_SNI else None
1242 self.sock = self._context.wrap_socket(sock,
1243 server_hostname=server_hostname)
1244 try:
1245 if self._check_hostname:
1246 ssl.match_hostname(self.sock.getpeercert(), self.host)
1247 except Exception:
1248 self.sock.shutdown(socket.SHUT_RDWR)
1249 self.sock.close()
1250 raise
1251
1252 __all__.append("HTTPSConnection")
1253
1254
1255 # ######################################
1256 # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
1257 # # doesn't exist in the Py2.7 stdlib
1258 # class HTTPSConnection(HTTPConnection):
1259 # "This class allows communication via SSL."
1260
1261 # default_port = HTTPS_PORT
1262
1263 # def __init__(self, host, port=None, key_file=None, cert_file=None,
1264 # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1265 # source_address=None):
1266 # HTTPConnection.__init__(self, host, port, strict, timeout,
1267 # source_address)
1268 # self.key_file = key_file
1269 # self.cert_file = cert_file
1270
1271 # def connect(self):
1272 # "Connect to a host on a given (SSL) port."
1273
1274 # sock = socket_create_connection((self.host, self.port),
1275 # self.timeout, self.source_address)
1276 # if self._tunnel_host:
1277 # self.sock = sock
1278 # self._tunnel()
1279 # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1280
1281 # __all__.append("HTTPSConnection")
1282 # ######################################
1283
1284
1285class HTTPException(Exception):
1286 # Subclasses that define an __init__ must call Exception.__init__
1287 # or define self.args. Otherwise, str() will fail.
1288 pass
1289
1290class NotConnected(HTTPException):
1291 pass
1292
1293class InvalidURL(HTTPException):
1294 pass
1295
1296class UnknownProtocol(HTTPException):
1297 def __init__(self, version):
1298 self.args = version,
1299 self.version = version
1300
1301class UnknownTransferEncoding(HTTPException):
1302 pass
1303
1304class UnimplementedFileMode(HTTPException):
1305 pass
1306
1307class IncompleteRead(HTTPException):
1308 def __init__(self, partial, expected=None):
1309 self.args = partial,
1310 self.partial = partial
1311 self.expected = expected
1312 def __repr__(self):
1313 if self.expected is not None:
1314 e = ', %i more expected' % self.expected
1315 else:
1316 e = ''
1317 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1318 def __str__(self):
1319 return repr(self)
1320
1321class ImproperConnectionState(HTTPException):
1322 pass
1323
1324class CannotSendRequest(ImproperConnectionState):
1325 pass
1326
1327class CannotSendHeader(ImproperConnectionState):
1328 pass
1329
1330class ResponseNotReady(ImproperConnectionState):
1331 pass
1332
1333class BadStatusLine(HTTPException):
1334 def __init__(self, line):
1335 if not line:
1336 line = repr(line)
1337 self.args = line,
1338 self.line = line
1339
1340class LineTooLong(HTTPException):
1341 def __init__(self, line_type):
1342 HTTPException.__init__(self, "got more than %d bytes when reading %s"
1343 % (_MAXLINE, line_type))
1344
1345# for backwards compatibility
1346error = HTTPException