Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/httplib2/__init__.py: 19%
914 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
1# -*- coding: utf-8 -*-
2"""Small, fast HTTP client library for Python."""
4__author__ = "Joe Gregorio (joe@bitworking.org)"
5__copyright__ = "Copyright 2006, Joe Gregorio"
6__contributors__ = [
7 "Thomas Broyer (t.broyer@ltgt.net)",
8 "James Antill",
9 "Xavier Verges Farrero",
10 "Jonathan Feinberg",
11 "Blair Zajac",
12 "Sam Ruby",
13 "Louis Nyffenegger",
14 "Mark Pilgrim",
15 "Alex Yu",
16 "Lai Han",
17]
18__license__ = "MIT"
19__version__ = "0.22.0"
21import base64
22import calendar
23import copy
24import email
25import email.feedparser
26from email import header
27import email.message
28import email.utils
29import errno
30from gettext import gettext as _
31import gzip
32from hashlib import md5 as _md5
33from hashlib import sha1 as _sha
34import hmac
35import http.client
36import io
37import os
38import random
39import re
40import socket
41import ssl
42import sys
43import time
44import urllib.parse
45import zlib
47try:
48 import socks
49except ImportError:
50 # TODO: remove this fallback and copypasted socksipy module upon py2/3 merge,
51 # idea is to have soft-dependency on any compatible module called socks
52 from . import socks
53from . import auth
54from .error import *
55from .iri2uri import iri2uri
58def has_timeout(timeout):
59 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"):
60 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT
61 return timeout is not None
64__all__ = [
65 "debuglevel",
66 "FailedToDecompressContent",
67 "Http",
68 "HttpLib2Error",
69 "ProxyInfo",
70 "RedirectLimit",
71 "RedirectMissingLocation",
72 "Response",
73 "RETRIES",
74 "UnimplementedDigestAuthOptionError",
75 "UnimplementedHmacDigestAuthOptionError",
76]
78# The httplib debug level, set to a non-zero value to get debug output
79debuglevel = 0
81# A request will be tried 'RETRIES' times if it fails at the socket/connection level.
82RETRIES = 2
85# Open Items:
86# -----------
88# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
90# Pluggable cache storage (supports storing the cache in
91# flat files by default. We need a plug-in architecture
92# that can support Berkeley DB and Squid)
94# == Known Issues ==
95# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
96# Does not handle Cache-Control: max-stale
97# Does not use Age: headers when calculating cache freshness.
99# The number of redirections to follow before giving up.
100# Note that only GET redirects are automatically followed.
101# Will also honor 301 requests by saving that info and never
102# requesting that URI again.
103DEFAULT_MAX_REDIRECTS = 5
105# Which headers are hop-by-hop headers by default
106HOP_BY_HOP = [
107 "connection",
108 "keep-alive",
109 "proxy-authenticate",
110 "proxy-authorization",
111 "te",
112 "trailers",
113 "transfer-encoding",
114 "upgrade",
115]
117# https://tools.ietf.org/html/rfc7231#section-8.1.3
118SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE")
120# To change, assign to `Http().redirect_codes`
121REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308))
124from httplib2 import certs
126CA_CERTS = certs.where()
128# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated.
129# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means:
130# > Selects the highest protocol version that both the client and server support.
131# > Despite the name, this option can select “TLS” protocols as well as “SSL”.
132# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23
134# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated.
135# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections.
136# > The protocol enables CERT_REQUIRED and check_hostname by default.
137# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS
139DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23")
142def _build_ssl_context(
143 disable_ssl_certificate_validation,
144 ca_certs,
145 cert_file=None,
146 key_file=None,
147 maximum_version=None,
148 minimum_version=None,
149 key_password=None,
150):
151 if not hasattr(ssl, "SSLContext"):
152 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext")
154 context = ssl.SSLContext(DEFAULT_TLS_VERSION)
155 # check_hostname and verify_mode should be set in opposite order during disable
156 # https://bugs.python.org/issue31431
157 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"):
158 context.check_hostname = not disable_ssl_certificate_validation
159 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED
161 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+.
162 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version
163 if maximum_version is not None:
164 if hasattr(context, "maximum_version"):
165 if isinstance(maximum_version, str):
166 maximum_version = getattr(ssl.TLSVersion, maximum_version)
167 context.maximum_version = maximum_version
168 else:
169 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer")
170 if minimum_version is not None:
171 if hasattr(context, "minimum_version"):
172 if isinstance(minimum_version, str):
173 minimum_version = getattr(ssl.TLSVersion, minimum_version)
174 context.minimum_version = minimum_version
175 else:
176 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer")
177 # check_hostname requires python 3.4+
178 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname
179 # if check_hostname is not supported.
180 if hasattr(context, "check_hostname"):
181 context.check_hostname = not disable_ssl_certificate_validation
183 context.load_verify_locations(ca_certs)
185 if cert_file:
186 context.load_cert_chain(cert_file, key_file, key_password)
188 return context
191def _get_end2end_headers(response):
192 hopbyhop = list(HOP_BY_HOP)
193 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")])
194 return [header for header in list(response.keys()) if header not in hopbyhop]
197_missing = object()
200def _errno_from_exception(e):
201 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass
202 errno = getattr(e, "errno", _missing)
203 if errno is not _missing:
204 return errno
206 # socket.error and common wrap in .args
207 args = getattr(e, "args", None)
208 if args:
209 return _errno_from_exception(args[0])
211 # pysocks.ProxyError wraps in .socket_err
212 # https://github.com/httplib2/httplib2/pull/202
213 socket_err = getattr(e, "socket_err", None)
214 if socket_err:
215 return _errno_from_exception(socket_err)
217 return None
220URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
223def parse_uri(uri):
224 """Parses a URI using the regex given in Appendix B of RFC 3986.
226 (scheme, authority, path, query, fragment) = parse_uri(uri)
227 """
228 groups = URI.match(uri).groups()
229 return (groups[1], groups[3], groups[4], groups[6], groups[8])
232def urlnorm(uri):
233 (scheme, authority, path, query, fragment) = parse_uri(uri)
234 if not scheme or not authority:
235 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
236 authority = authority.lower()
237 scheme = scheme.lower()
238 if not path:
239 path = "/"
240 # Could do syntax based normalization of the URI before
241 # computing the digest. See Section 6.2.2 of Std 66.
242 request_uri = query and "?".join([path, query]) or path
243 scheme = scheme.lower()
244 defrag_uri = scheme + "://" + authority + request_uri
245 return scheme, authority, request_uri, defrag_uri
248# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
249re_url_scheme = re.compile(r"^\w+://")
250re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII)
253def safename(filename):
254 """Return a filename suitable for the cache.
255 Strips dangerous and common characters to create a filename we
256 can use to store the cache in.
257 """
258 if isinstance(filename, bytes):
259 filename_bytes = filename
260 filename = filename.decode("utf-8")
261 else:
262 filename_bytes = filename.encode("utf-8")
263 filemd5 = _md5(filename_bytes).hexdigest()
264 filename = re_url_scheme.sub("", filename)
265 filename = re_unsafe.sub("", filename)
267 # limit length of filename (vital for Windows)
268 # https://github.com/httplib2/httplib2/pull/74
269 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5>
270 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars
271 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum:
272 filename = filename[:90]
274 return ",".join((filename, filemd5))
277NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+")
280def _normalize_headers(headers):
281 return dict(
282 [
283 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),)
284 for (key, value) in headers.items()
285 ]
286 )
289def _convert_byte_str(s):
290 if not isinstance(s, str):
291 return str(s, "utf-8")
292 return s
295def _parse_cache_control(headers):
296 retval = {}
297 if "cache-control" in headers:
298 parts = headers["cache-control"].split(",")
299 parts_with_args = [
300 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")
301 ]
302 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
303 retval = dict(parts_with_args + parts_wo_args)
304 return retval
307# Whether to use a strict mode to parse WWW-Authenticate headers
308# Might lead to bad results in case of ill-formed header value,
309# so disabled by default, falling back to relaxed parsing.
310# Set to true to turn on, useful for testing servers.
311USE_WWW_AUTH_STRICT_PARSING = 0
314def _entry_disposition(response_headers, request_headers):
315 """Determine freshness from the Date, Expires and Cache-Control headers.
317 We don't handle the following:
319 1. Cache-Control: max-stale
320 2. Age: headers are not used in the calculations.
322 Not that this algorithm is simpler than you might think
323 because we are operating as a private (non-shared) cache.
324 This lets us ignore 's-maxage'. We can also ignore
325 'proxy-invalidate' since we aren't a proxy.
326 We will never return a stale document as
327 fresh as a design decision, and thus the non-implementation
328 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
329 since we operate as if every server has sent 'must-revalidate'.
330 Since we are private we get to ignore both 'public' and
331 'private' parameters. We also ignore 'no-transform' since
332 we don't do any transformations.
333 The 'no-store' parameter is handled at a higher level.
334 So the only Cache-Control parameters we look at are:
336 no-cache
337 only-if-cached
338 max-age
339 min-fresh
340 """
342 retval = "STALE"
343 cc = _parse_cache_control(request_headers)
344 cc_response = _parse_cache_control(response_headers)
346 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1:
347 retval = "TRANSPARENT"
348 if "cache-control" not in request_headers:
349 request_headers["cache-control"] = "no-cache"
350 elif "no-cache" in cc:
351 retval = "TRANSPARENT"
352 elif "no-cache" in cc_response:
353 retval = "STALE"
354 elif "only-if-cached" in cc:
355 retval = "FRESH"
356 elif "date" in response_headers:
357 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"]))
358 now = time.time()
359 current_age = max(0, now - date)
360 if "max-age" in cc_response:
361 try:
362 freshness_lifetime = int(cc_response["max-age"])
363 except ValueError:
364 freshness_lifetime = 0
365 elif "expires" in response_headers:
366 expires = email.utils.parsedate_tz(response_headers["expires"])
367 if None == expires:
368 freshness_lifetime = 0
369 else:
370 freshness_lifetime = max(0, calendar.timegm(expires) - date)
371 else:
372 freshness_lifetime = 0
373 if "max-age" in cc:
374 try:
375 freshness_lifetime = int(cc["max-age"])
376 except ValueError:
377 freshness_lifetime = 0
378 if "min-fresh" in cc:
379 try:
380 min_fresh = int(cc["min-fresh"])
381 except ValueError:
382 min_fresh = 0
383 current_age += min_fresh
384 if freshness_lifetime > current_age:
385 retval = "FRESH"
386 return retval
389def _decompressContent(response, new_content):
390 content = new_content
391 try:
392 encoding = response.get("content-encoding", None)
393 if encoding in ["gzip", "deflate"]:
394 if encoding == "gzip":
395 content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read()
396 if encoding == "deflate":
397 try:
398 content = zlib.decompress(content, zlib.MAX_WBITS)
399 except (IOError, zlib.error):
400 content = zlib.decompress(content, -zlib.MAX_WBITS)
401 response["content-length"] = str(len(content))
402 # Record the historical presence of the encoding in a way the won't interfere.
403 response["-content-encoding"] = response["content-encoding"]
404 del response["content-encoding"]
405 except (IOError, zlib.error):
406 content = ""
407 raise FailedToDecompressContent(
408 _("Content purported to be compressed with %s but failed to decompress.") % response.get("content-encoding"),
409 response,
410 content,
411 )
412 return content
415def _bind_write_headers(msg):
416 def _write_headers(self):
417 # Self refers to the Generator object.
418 for h, v in msg.items():
419 print("%s:" % h, end=" ", file=self._fp)
420 if isinstance(v, header.Header):
421 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
422 else:
423 # email.Header got lots of smarts, so use it.
424 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h)
425 print(headers.encode(), file=self._fp)
426 # A blank line always separates headers from body.
427 print(file=self._fp)
429 return _write_headers
432def _updateCache(request_headers, response_headers, content, cache, cachekey):
433 if cachekey:
434 cc = _parse_cache_control(request_headers)
435 cc_response = _parse_cache_control(response_headers)
436 if "no-store" in cc or "no-store" in cc_response:
437 cache.delete(cachekey)
438 else:
439 info = email.message.Message()
440 for key, value in response_headers.items():
441 if key not in ["status", "content-encoding", "transfer-encoding"]:
442 info[key] = value
444 # Add annotations to the cache to indicate what headers
445 # are variant for this request.
446 vary = response_headers.get("vary", None)
447 if vary:
448 vary_headers = vary.lower().replace(" ", "").split(",")
449 for header in vary_headers:
450 key = "-varied-%s" % header
451 try:
452 info[key] = request_headers[header]
453 except KeyError:
454 pass
456 status = response_headers.status
457 if status == 304:
458 status = 200
460 status_header = "status: %d\r\n" % status
462 try:
463 header_str = info.as_string()
464 except UnicodeEncodeError:
465 setattr(info, "_write_headers", _bind_write_headers(info))
466 header_str = info.as_string()
468 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
469 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content])
471 cache.set(cachekey, text)
474def _cnonce():
475 dig = _md5(
476 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8")
477 ).hexdigest()
478 return dig[:16]
481def _wsse_username_token(cnonce, iso_now, password):
482 return (
483 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8")
484 )
487# For credentials we need two things, first
488# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
489# Then we also need a list of URIs that have already demanded authentication
490# That list is tricky since sub-URIs can take the same auth, or the
491# auth scheme may change as you descend the tree.
492# So we also need each Auth instance to be able to tell us
493# how close to the 'top' it is.
496class Authentication(object):
497 def __init__(self, credentials, host, request_uri, headers, response, content, http):
498 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
499 self.path = path
500 self.host = host
501 self.credentials = credentials
502 self.http = http
504 def depth(self, request_uri):
505 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
506 return request_uri[len(self.path) :].count("/")
508 def inscope(self, host, request_uri):
509 # XXX Should we normalize the request_uri?
510 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
511 return (host == self.host) and path.startswith(self.path)
513 def request(self, method, request_uri, headers, content):
514 """Modify the request headers to add the appropriate
515 Authorization header. Over-rise this in sub-classes."""
516 pass
518 def response(self, response, content):
519 """Gives us a chance to update with new nonces
520 or such returned from the last authorized response.
521 Over-rise this in sub-classes if necessary.
523 Return TRUE is the request is to be retried, for
524 example Digest may return stale=true.
525 """
526 return False
528 def __eq__(self, auth):
529 return False
531 def __ne__(self, auth):
532 return True
534 def __lt__(self, auth):
535 return True
537 def __gt__(self, auth):
538 return False
540 def __le__(self, auth):
541 return True
543 def __ge__(self, auth):
544 return False
546 def __bool__(self):
547 return True
550class BasicAuthentication(Authentication):
551 def __init__(self, credentials, host, request_uri, headers, response, content, http):
552 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
554 def request(self, method, request_uri, headers, content):
555 """Modify the request headers to add the appropriate
556 Authorization header."""
557 headers["authorization"] = "Basic " + base64.b64encode(
558 ("%s:%s" % self.credentials).encode("utf-8")
559 ).strip().decode("utf-8")
562class DigestAuthentication(Authentication):
563 """Only do qop='auth' and MD5, since that
564 is all Apache currently implements"""
566 def __init__(self, credentials, host, request_uri, headers, response, content, http):
567 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
568 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"]
569 qop = self.challenge.get("qop", "auth")
570 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None
571 if self.challenge["qop"] is None:
572 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop))
573 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper()
574 if self.challenge["algorithm"] != "MD5":
575 raise UnimplementedDigestAuthOptionError(
576 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
577 )
578 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],])
579 self.challenge["nc"] = 1
581 def request(self, method, request_uri, headers, content, cnonce=None):
582 """Modify the request headers"""
583 H = lambda x: _md5(x.encode("utf-8")).hexdigest()
584 KD = lambda s, d: H("%s:%s" % (s, d))
585 A2 = "".join([method, ":", request_uri])
586 self.challenge["cnonce"] = cnonce or _cnonce()
587 request_digest = '"%s"' % KD(
588 H(self.A1),
589 "%s:%s:%s:%s:%s"
590 % (
591 self.challenge["nonce"],
592 "%08x" % self.challenge["nc"],
593 self.challenge["cnonce"],
594 self.challenge["qop"],
595 H(A2),
596 ),
597 )
598 headers["authorization"] = (
599 'Digest username="%s", realm="%s", nonce="%s", '
600 'uri="%s", algorithm=%s, response=%s, qop=%s, '
601 'nc=%08x, cnonce="%s"'
602 ) % (
603 self.credentials[0],
604 self.challenge["realm"],
605 self.challenge["nonce"],
606 request_uri,
607 self.challenge["algorithm"],
608 request_digest,
609 self.challenge["qop"],
610 self.challenge["nc"],
611 self.challenge["cnonce"],
612 )
613 if self.challenge.get("opaque"):
614 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"]
615 self.challenge["nc"] += 1
617 def response(self, response, content):
618 if "authentication-info" not in response:
619 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {})
620 if "true" == challenge.get("stale"):
621 self.challenge["nonce"] = challenge["nonce"]
622 self.challenge["nc"] = 1
623 return True
624 else:
625 updated_challenge = auth._parse_authentication_info(response, "authentication-info")
627 if "nextnonce" in updated_challenge:
628 self.challenge["nonce"] = updated_challenge["nextnonce"]
629 self.challenge["nc"] = 1
630 return False
633class HmacDigestAuthentication(Authentication):
634 """Adapted from Robert Sayre's code and DigestAuthentication above."""
636 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
638 def __init__(self, credentials, host, request_uri, headers, response, content, http):
639 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
640 challenge = auth._parse_www_authenticate(response, "www-authenticate")
641 self.challenge = challenge["hmacdigest"]
642 # TODO: self.challenge['domain']
643 self.challenge["reason"] = self.challenge.get("reason", "unauthorized")
644 if self.challenge["reason"] not in ["unauthorized", "integrity"]:
645 self.challenge["reason"] = "unauthorized"
646 self.challenge["salt"] = self.challenge.get("salt", "")
647 if not self.challenge.get("snonce"):
648 raise UnimplementedHmacDigestAuthOptionError(
649 _("The challenge doesn't contain a server nonce, or this one is empty.")
650 )
651 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1")
652 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]:
653 raise UnimplementedHmacDigestAuthOptionError(
654 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
655 )
656 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1")
657 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]:
658 raise UnimplementedHmacDigestAuthOptionError(
659 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"])
660 )
661 if self.challenge["algorithm"] == "HMAC-MD5":
662 self.hashmod = _md5
663 else:
664 self.hashmod = _sha
665 if self.challenge["pw-algorithm"] == "MD5":
666 self.pwhashmod = _md5
667 else:
668 self.pwhashmod = _sha
669 self.key = "".join(
670 [
671 self.credentials[0],
672 ":",
673 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(),
674 ":",
675 self.challenge["realm"],
676 ]
677 )
678 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
680 def request(self, method, request_uri, headers, content):
681 """Modify the request headers"""
682 keys = _get_end2end_headers(headers)
683 keylist = "".join(["%s " % k for k in keys])
684 headers_val = "".join([headers[k] for k in keys])
685 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
686 cnonce = _cnonce()
687 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,)
688 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
689 headers["authorization"] = (
690 'HMACDigest username="%s", realm="%s", snonce="%s",'
691 ' cnonce="%s", uri="%s", created="%s", '
692 'response="%s", headers="%s"'
693 ) % (
694 self.credentials[0],
695 self.challenge["realm"],
696 self.challenge["snonce"],
697 cnonce,
698 request_uri,
699 created,
700 request_digest,
701 keylist,
702 )
704 def response(self, response, content):
705 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {})
706 if challenge.get("reason") in ["integrity", "stale"]:
707 return True
708 return False
711class WsseAuthentication(Authentication):
712 """This is thinly tested and should not be relied upon.
713 At this time there isn't any third party server to test against.
714 Blogger and TypePad implemented this algorithm at one point
715 but Blogger has since switched to Basic over HTTPS and
716 TypePad has implemented it wrong, by never issuing a 401
717 challenge but instead requiring your client to telepathically know that
718 their endpoint is expecting WSSE profile="UsernameToken"."""
720 def __init__(self, credentials, host, request_uri, headers, response, content, http):
721 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
723 def request(self, method, request_uri, headers, content):
724 """Modify the request headers to add the appropriate
725 Authorization header."""
726 headers["authorization"] = 'WSSE profile="UsernameToken"'
727 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
728 cnonce = _cnonce()
729 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
730 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % (
731 self.credentials[0],
732 password_digest,
733 cnonce,
734 iso_now,
735 )
738class GoogleLoginAuthentication(Authentication):
739 def __init__(self, credentials, host, request_uri, headers, response, content, http):
740 from urllib.parse import urlencode
742 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
743 challenge = auth._parse_www_authenticate(response, "www-authenticate")
744 service = challenge["googlelogin"].get("service", "xapi")
745 # Bloggger actually returns the service in the challenge
746 # For the rest we guess based on the URI
747 if service == "xapi" and request_uri.find("calendar") > 0:
748 service = "cl"
749 # No point in guessing Base or Spreadsheet
750 # elif request_uri.find("spreadsheets") > 0:
751 # service = "wise"
753 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],)
754 resp, content = self.http.request(
755 "https://www.google.com/accounts/ClientLogin",
756 method="POST",
757 body=urlencode(auth),
758 headers={"Content-Type": "application/x-www-form-urlencoded"},
759 )
760 lines = content.split("\n")
761 d = dict([tuple(line.split("=", 1)) for line in lines if line])
762 if resp.status == 403:
763 self.Auth = ""
764 else:
765 self.Auth = d["Auth"]
767 def request(self, method, request_uri, headers, content):
768 """Modify the request headers to add the appropriate
769 Authorization header."""
770 headers["authorization"] = "GoogleLogin Auth=" + self.Auth
773AUTH_SCHEME_CLASSES = {
774 "basic": BasicAuthentication,
775 "wsse": WsseAuthentication,
776 "digest": DigestAuthentication,
777 "hmacdigest": HmacDigestAuthentication,
778 "googlelogin": GoogleLoginAuthentication,
779}
781AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
784class FileCache(object):
785 """Uses a local directory as a store for cached files.
786 Not really safe to use if multiple threads or processes are going to
787 be running on the same cache.
788 """
790 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
791 self.cache = cache
792 self.safe = safe
793 if not os.path.exists(cache):
794 os.makedirs(self.cache)
796 def get(self, key):
797 retval = None
798 cacheFullPath = os.path.join(self.cache, self.safe(key))
799 try:
800 f = open(cacheFullPath, "rb")
801 retval = f.read()
802 f.close()
803 except IOError:
804 pass
805 return retval
807 def set(self, key, value):
808 cacheFullPath = os.path.join(self.cache, self.safe(key))
809 f = open(cacheFullPath, "wb")
810 f.write(value)
811 f.close()
813 def delete(self, key):
814 cacheFullPath = os.path.join(self.cache, self.safe(key))
815 if os.path.exists(cacheFullPath):
816 os.remove(cacheFullPath)
819class Credentials(object):
820 def __init__(self):
821 self.credentials = []
823 def add(self, name, password, domain=""):
824 self.credentials.append((domain.lower(), name, password))
826 def clear(self):
827 self.credentials = []
829 def iter(self, domain):
830 for (cdomain, name, password) in self.credentials:
831 if cdomain == "" or domain == cdomain:
832 yield (name, password)
835class KeyCerts(Credentials):
836 """Identical to Credentials except that
837 name/password are mapped to key/cert."""
839 def add(self, key, cert, domain, password):
840 self.credentials.append((domain.lower(), key, cert, password))
842 def iter(self, domain):
843 for (cdomain, key, cert, password) in self.credentials:
844 if cdomain == "" or domain == cdomain:
845 yield (key, cert, password)
848class AllHosts(object):
849 pass
852class ProxyInfo(object):
853 """Collect information required to use a proxy."""
855 bypass_hosts = ()
857 def __init__(
858 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None,
859 ):
860 """Args:
862 proxy_type: The type of proxy server. This must be set to one of
863 socks.PROXY_TYPE_XXX constants. For example: p =
864 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost',
865 proxy_port=8000)
866 proxy_host: The hostname or IP address of the proxy server.
867 proxy_port: The port that the proxy server is running on.
868 proxy_rdns: If True (default), DNS queries will not be performed
869 locally, and instead, handed to the proxy to resolve. This is useful
870 if the network does not allow resolution of non-local names. In
871 httplib2 0.9 and earlier, this defaulted to False.
872 proxy_user: The username used to authenticate with the proxy server.
873 proxy_pass: The password used to authenticate with the proxy server.
874 proxy_headers: Additional or modified headers for the proxy connect
875 request.
876 """
877 if isinstance(proxy_user, bytes):
878 proxy_user = proxy_user.decode()
879 if isinstance(proxy_pass, bytes):
880 proxy_pass = proxy_pass.decode()
881 (
882 self.proxy_type,
883 self.proxy_host,
884 self.proxy_port,
885 self.proxy_rdns,
886 self.proxy_user,
887 self.proxy_pass,
888 self.proxy_headers,
889 ) = (
890 proxy_type,
891 proxy_host,
892 proxy_port,
893 proxy_rdns,
894 proxy_user,
895 proxy_pass,
896 proxy_headers,
897 )
899 def astuple(self):
900 return (
901 self.proxy_type,
902 self.proxy_host,
903 self.proxy_port,
904 self.proxy_rdns,
905 self.proxy_user,
906 self.proxy_pass,
907 self.proxy_headers,
908 )
910 def isgood(self):
911 return socks and (self.proxy_host != None) and (self.proxy_port != None)
913 def applies_to(self, hostname):
914 return not self.bypass_host(hostname)
916 def bypass_host(self, hostname):
917 """Has this host been excluded from the proxy config"""
918 if self.bypass_hosts is AllHosts:
919 return True
921 hostname = "." + hostname.lstrip(".")
922 for skip_name in self.bypass_hosts:
923 # *.suffix
924 if skip_name.startswith(".") and hostname.endswith(skip_name):
925 return True
926 # exact match
927 if hostname == "." + skip_name:
928 return True
929 return False
931 def __repr__(self):
932 return (
933 "<ProxyInfo type={p.proxy_type} "
934 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}"
935 + " user={p.proxy_user} headers={p.proxy_headers}>"
936 ).format(p=self)
939def proxy_info_from_environment(method="http"):
940 """Read proxy info from the environment variables.
941 """
942 if method not in ("http", "https"):
943 return
945 env_var = method + "_proxy"
946 url = os.environ.get(env_var, os.environ.get(env_var.upper()))
947 if not url:
948 return
949 return proxy_info_from_url(url, method, noproxy=None)
952def proxy_info_from_url(url, method="http", noproxy=None):
953 """Construct a ProxyInfo from a URL (such as http_proxy env var)
954 """
955 url = urllib.parse.urlparse(url)
957 proxy_type = 3 # socks.PROXY_TYPE_HTTP
958 pi = ProxyInfo(
959 proxy_type=proxy_type,
960 proxy_host=url.hostname,
961 proxy_port=url.port or dict(https=443, http=80)[method],
962 proxy_user=url.username or None,
963 proxy_pass=url.password or None,
964 proxy_headers=None,
965 )
967 bypass_hosts = []
968 # If not given an explicit noproxy value, respect values in env vars.
969 if noproxy is None:
970 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", ""))
971 # Special case: A single '*' character means all hosts should be bypassed.
972 if noproxy == "*":
973 bypass_hosts = AllHosts
974 elif noproxy.strip():
975 bypass_hosts = noproxy.split(",")
976 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string.
978 pi.bypass_hosts = bypass_hosts
979 return pi
982class HTTPConnectionWithTimeout(http.client.HTTPConnection):
983 """HTTPConnection subclass that supports timeouts
985 HTTPConnection subclass that supports timeouts
987 All timeouts are in seconds. If None is passed for timeout then
988 Python's default timeout for sockets will be used. See for example
989 the docs of socket.setdefaulttimeout():
990 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
991 """
993 def __init__(self, host, port=None, timeout=None, proxy_info=None):
994 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout)
996 self.proxy_info = proxy_info
997 if proxy_info and not isinstance(proxy_info, ProxyInfo):
998 self.proxy_info = proxy_info("http")
1000 def connect(self):
1001 """Connect to the host and port specified in __init__."""
1002 if self.proxy_info and socks is None:
1003 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!")
1004 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1005 use_proxy = True
1006 (
1007 proxy_type,
1008 proxy_host,
1009 proxy_port,
1010 proxy_rdns,
1011 proxy_user,
1012 proxy_pass,
1013 proxy_headers,
1014 ) = self.proxy_info.astuple()
1016 host = proxy_host
1017 port = proxy_port
1018 else:
1019 use_proxy = False
1021 host = self.host
1022 port = self.port
1023 proxy_type = None
1025 socket_err = None
1027 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
1028 af, socktype, proto, canonname, sa = res
1029 try:
1030 if use_proxy:
1031 self.sock = socks.socksocket(af, socktype, proto)
1032 self.sock.setproxy(
1033 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1034 )
1035 else:
1036 self.sock = socket.socket(af, socktype, proto)
1037 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1038 if has_timeout(self.timeout):
1039 self.sock.settimeout(self.timeout)
1040 if self.debuglevel > 0:
1041 print("connect: ({0}, {1}) ************".format(self.host, self.port))
1042 if use_proxy:
1043 print(
1044 "proxy: {0} ************".format(
1045 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1046 )
1047 )
1049 self.sock.connect((self.host, self.port) + sa[2:])
1050 except socket.error as e:
1051 socket_err = e
1052 if self.debuglevel > 0:
1053 print("connect fail: ({0}, {1})".format(self.host, self.port))
1054 if use_proxy:
1055 print(
1056 "proxy: {0}".format(
1057 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1058 )
1059 )
1060 if self.sock:
1061 self.sock.close()
1062 self.sock = None
1063 continue
1064 break
1065 if not self.sock:
1066 raise socket_err
1069class HTTPSConnectionWithTimeout(http.client.HTTPSConnection):
1070 """This class allows communication via SSL.
1072 All timeouts are in seconds. If None is passed for timeout then
1073 Python's default timeout for sockets will be used. See for example
1074 the docs of socket.setdefaulttimeout():
1075 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1076 """
1078 def __init__(
1079 self,
1080 host,
1081 port=None,
1082 key_file=None,
1083 cert_file=None,
1084 timeout=None,
1085 proxy_info=None,
1086 ca_certs=None,
1087 disable_ssl_certificate_validation=False,
1088 tls_maximum_version=None,
1089 tls_minimum_version=None,
1090 key_password=None,
1091 ):
1093 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1094 self.ca_certs = ca_certs if ca_certs else CA_CERTS
1096 self.proxy_info = proxy_info
1097 if proxy_info and not isinstance(proxy_info, ProxyInfo):
1098 self.proxy_info = proxy_info("https")
1100 context = _build_ssl_context(
1101 self.disable_ssl_certificate_validation,
1102 self.ca_certs,
1103 cert_file,
1104 key_file,
1105 maximum_version=tls_maximum_version,
1106 minimum_version=tls_minimum_version,
1107 key_password=key_password,
1108 )
1109 super(HTTPSConnectionWithTimeout, self).__init__(
1110 host, port=port, timeout=timeout, context=context,
1111 )
1112 self.key_file = key_file
1113 self.cert_file = cert_file
1114 self.key_password = key_password
1116 def connect(self):
1117 """Connect to a host on a given (SSL) port."""
1118 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1119 use_proxy = True
1120 (
1121 proxy_type,
1122 proxy_host,
1123 proxy_port,
1124 proxy_rdns,
1125 proxy_user,
1126 proxy_pass,
1127 proxy_headers,
1128 ) = self.proxy_info.astuple()
1130 host = proxy_host
1131 port = proxy_port
1132 else:
1133 use_proxy = False
1135 host = self.host
1136 port = self.port
1137 proxy_type = None
1138 proxy_headers = None
1140 socket_err = None
1142 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1143 for family, socktype, proto, canonname, sockaddr in address_info:
1144 try:
1145 if use_proxy:
1146 sock = socks.socksocket(family, socktype, proto)
1148 sock.setproxy(
1149 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1150 )
1151 else:
1152 sock = socket.socket(family, socktype, proto)
1153 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1154 if has_timeout(self.timeout):
1155 sock.settimeout(self.timeout)
1156 sock.connect((self.host, self.port))
1158 self.sock = self._context.wrap_socket(sock, server_hostname=self.host)
1160 # Python 3.3 compatibility: emulate the check_hostname behavior
1161 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation:
1162 try:
1163 ssl.match_hostname(self.sock.getpeercert(), self.host)
1164 except Exception:
1165 self.sock.shutdown(socket.SHUT_RDWR)
1166 self.sock.close()
1167 raise
1169 if self.debuglevel > 0:
1170 print("connect: ({0}, {1})".format(self.host, self.port))
1171 if use_proxy:
1172 print(
1173 "proxy: {0}".format(
1174 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1175 )
1176 )
1177 except (ssl.SSLError, ssl.CertificateError) as e:
1178 if sock:
1179 sock.close()
1180 if self.sock:
1181 self.sock.close()
1182 self.sock = None
1183 raise
1184 except (socket.timeout, socket.gaierror):
1185 raise
1186 except socket.error as e:
1187 socket_err = e
1188 if self.debuglevel > 0:
1189 print("connect fail: ({0}, {1})".format(self.host, self.port))
1190 if use_proxy:
1191 print(
1192 "proxy: {0}".format(
1193 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1194 )
1195 )
1196 if self.sock:
1197 self.sock.close()
1198 self.sock = None
1199 continue
1200 break
1201 if not self.sock:
1202 raise socket_err
1205SCHEME_TO_CONNECTION = {
1206 "http": HTTPConnectionWithTimeout,
1207 "https": HTTPSConnectionWithTimeout,
1208}
1211class Http(object):
1212 """An HTTP client that handles:
1214 - all methods
1215 - caching
1216 - ETags
1217 - compression,
1218 - HTTPS
1219 - Basic
1220 - Digest
1221 - WSSE
1223 and more.
1224 """
1226 def __init__(
1227 self,
1228 cache=None,
1229 timeout=None,
1230 proxy_info=proxy_info_from_environment,
1231 ca_certs=None,
1232 disable_ssl_certificate_validation=False,
1233 tls_maximum_version=None,
1234 tls_minimum_version=None,
1235 ):
1236 """If 'cache' is a string then it is used as a directory name for
1237 a disk cache. Otherwise it must be an object that supports the
1238 same interface as FileCache.
1240 All timeouts are in seconds. If None is passed for timeout
1241 then Python's default timeout for sockets will be used. See
1242 for example the docs of socket.setdefaulttimeout():
1243 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1245 `proxy_info` may be:
1246 - a callable that takes the http scheme ('http' or 'https') and
1247 returns a ProxyInfo instance per request. By default, uses
1248 proxy_info_from_environment.
1249 - a ProxyInfo instance (static proxy config).
1250 - None (proxy disabled).
1252 ca_certs is the path of a file containing root CA certificates for SSL
1253 server certificate validation. By default, a CA cert file bundled with
1254 httplib2 is used.
1256 If disable_ssl_certificate_validation is true, SSL cert validation will
1257 not be performed.
1259 tls_maximum_version / tls_minimum_version require Python 3.7+ /
1260 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+.
1261 """
1262 self.proxy_info = proxy_info
1263 self.ca_certs = ca_certs
1264 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1265 self.tls_maximum_version = tls_maximum_version
1266 self.tls_minimum_version = tls_minimum_version
1267 # Map domain name to an httplib connection
1268 self.connections = {}
1269 # The location of the cache, for now a directory
1270 # where cached responses are held.
1271 if cache and isinstance(cache, str):
1272 self.cache = FileCache(cache)
1273 else:
1274 self.cache = cache
1276 # Name/password
1277 self.credentials = Credentials()
1279 # Key/cert
1280 self.certificates = KeyCerts()
1282 # authorization objects
1283 self.authorizations = []
1285 # If set to False then no redirects are followed, even safe ones.
1286 self.follow_redirects = True
1288 self.redirect_codes = REDIRECT_CODES
1290 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1291 # which methods get an "if-match:" etag header added to them.
1292 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1294 self.safe_methods = list(SAFE_METHODS)
1296 # If 'follow_redirects' is True, and this is set to True then
1297 # all redirecs are followed, including unsafe ones.
1298 self.follow_all_redirects = False
1300 self.ignore_etag = False
1302 self.force_exception_to_status_code = False
1304 self.timeout = timeout
1306 # Keep Authorization: headers on a redirect.
1307 self.forward_authorization_headers = False
1309 def close(self):
1310 """Close persistent connections, clear sensitive data.
1311 Not thread-safe, requires external synchronization against concurrent requests.
1312 """
1313 existing, self.connections = self.connections, {}
1314 for _, c in existing.items():
1315 c.close()
1316 self.certificates.clear()
1317 self.clear_credentials()
1319 def __getstate__(self):
1320 state_dict = copy.copy(self.__dict__)
1321 # In case request is augmented by some foreign object such as
1322 # credentials which handle auth
1323 if "request" in state_dict:
1324 del state_dict["request"]
1325 if "connections" in state_dict:
1326 del state_dict["connections"]
1327 return state_dict
1329 def __setstate__(self, state):
1330 self.__dict__.update(state)
1331 self.connections = {}
1333 def _auth_from_challenge(self, host, request_uri, headers, response, content):
1334 """A generator that creates Authorization objects
1335 that can be applied to requests.
1336 """
1337 challenges = auth._parse_www_authenticate(response, "www-authenticate")
1338 for cred in self.credentials.iter(host):
1339 for scheme in AUTH_SCHEME_ORDER:
1340 if scheme in challenges:
1341 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1343 def add_credentials(self, name, password, domain=""):
1344 """Add a name and password that will be used
1345 any time a request requires authentication."""
1346 self.credentials.add(name, password, domain)
1348 def add_certificate(self, key, cert, domain, password=None):
1349 """Add a key and cert that will be used
1350 any time a request requires authentication."""
1351 self.certificates.add(key, cert, domain, password)
1353 def clear_credentials(self):
1354 """Remove all the names and passwords
1355 that are used for authentication"""
1356 self.credentials.clear()
1357 self.authorizations = []
1359 def _conn_request(self, conn, request_uri, method, body, headers):
1360 i = 0
1361 seen_bad_status_line = False
1362 while i < RETRIES:
1363 i += 1
1364 try:
1365 if conn.sock is None:
1366 conn.connect()
1367 conn.request(method, request_uri, body, headers)
1368 except socket.timeout:
1369 conn.close()
1370 raise
1371 except socket.gaierror:
1372 conn.close()
1373 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1374 except socket.error as e:
1375 errno_ = _errno_from_exception(e)
1376 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES:
1377 continue # retry on potentially transient errors
1378 raise
1379 except http.client.HTTPException:
1380 if conn.sock is None:
1381 if i < RETRIES - 1:
1382 conn.close()
1383 conn.connect()
1384 continue
1385 else:
1386 conn.close()
1387 raise
1388 if i < RETRIES - 1:
1389 conn.close()
1390 conn.connect()
1391 continue
1392 # Just because the server closed the connection doesn't apparently mean
1393 # that the server didn't send a response.
1394 pass
1395 try:
1396 response = conn.getresponse()
1397 except (http.client.BadStatusLine, http.client.ResponseNotReady):
1398 # If we get a BadStatusLine on the first try then that means
1399 # the connection just went stale, so retry regardless of the
1400 # number of RETRIES set.
1401 if not seen_bad_status_line and i == 1:
1402 i = 0
1403 seen_bad_status_line = True
1404 conn.close()
1405 conn.connect()
1406 continue
1407 else:
1408 conn.close()
1409 raise
1410 except socket.timeout:
1411 raise
1412 except (socket.error, http.client.HTTPException):
1413 conn.close()
1414 if i == 0:
1415 conn.close()
1416 conn.connect()
1417 continue
1418 else:
1419 raise
1420 else:
1421 content = b""
1422 if method == "HEAD":
1423 conn.close()
1424 else:
1425 content = response.read()
1426 response = Response(response)
1427 if method != "HEAD":
1428 content = _decompressContent(response, content)
1430 break
1431 return (response, content)
1433 def _request(
1434 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey,
1435 ):
1436 """Do the actual request using the connection object
1437 and also follow one level of redirects if necessary"""
1439 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1440 auth = auths and sorted(auths)[0][1] or None
1441 if auth:
1442 auth.request(method, request_uri, headers, body)
1444 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1446 if auth:
1447 if auth.response(response, body):
1448 auth.request(method, request_uri, headers, body)
1449 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1450 response._stale_digest = 1
1452 if response.status == 401:
1453 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1454 authorization.request(method, request_uri, headers, body)
1455 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1456 if response.status != 401:
1457 self.authorizations.append(authorization)
1458 authorization.response(response, body)
1459 break
1461 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308):
1462 if self.follow_redirects and response.status in self.redirect_codes:
1463 # Pick out the location header and basically start from the beginning
1464 # remembering first to strip the ETag header and decrement our 'depth'
1465 if redirections:
1466 if "location" not in response and response.status != 300:
1467 raise RedirectMissingLocation(
1468 _("Redirected but the response is missing a Location: header."), response, content,
1469 )
1470 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1471 if "location" in response:
1472 location = response["location"]
1473 (scheme, authority, path, query, fragment) = parse_uri(location)
1474 if authority == None:
1475 response["location"] = urllib.parse.urljoin(absolute_uri, location)
1476 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
1477 response["-x-permanent-redirect-url"] = response["location"]
1478 if "content-location" not in response:
1479 response["content-location"] = absolute_uri
1480 _updateCache(headers, response, content, self.cache, cachekey)
1481 if "if-none-match" in headers:
1482 del headers["if-none-match"]
1483 if "if-modified-since" in headers:
1484 del headers["if-modified-since"]
1485 if "authorization" in headers and not self.forward_authorization_headers:
1486 del headers["authorization"]
1487 if "location" in response:
1488 location = response["location"]
1489 old_response = copy.deepcopy(response)
1490 if "content-location" not in old_response:
1491 old_response["content-location"] = absolute_uri
1492 redirect_method = method
1493 if response.status in [302, 303]:
1494 redirect_method = "GET"
1495 body = None
1496 (response, content) = self.request(
1497 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1,
1498 )
1499 response.previous = old_response
1500 else:
1501 raise RedirectLimit(
1502 "Redirected more times than redirection_limit allows.", response, content,
1503 )
1504 elif response.status in [200, 203] and method in self.safe_methods:
1505 # Don't cache 206's since we aren't going to handle byte range requests
1506 if "content-location" not in response:
1507 response["content-location"] = absolute_uri
1508 _updateCache(headers, response, content, self.cache, cachekey)
1510 return (response, content)
1512 def _normalize_headers(self, headers):
1513 return _normalize_headers(headers)
1515 # Need to catch and rebrand some exceptions
1516 # Then need to optionally turn all exceptions into status codes
1517 # including all socket.* and httplib.* exceptions.
1519 def request(
1520 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None,
1521 ):
1522 """ Performs a single HTTP request.
1523The 'uri' is the URI of the HTTP resource and can begin
1524with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1526The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1527There is no restriction on the methods allowed.
1529The 'body' is the entity body to be sent with the request. It is a string
1530object.
1532Any extra headers that are to be sent with the request should be provided in the
1533'headers' dictionary.
1535The maximum number of redirect to follow before raising an
1536exception is 'redirections. The default is 5.
1538The return value is a tuple of (response, content), the first
1539being and instance of the 'Response' class, the second being
1540a string that contains the response entity body.
1541 """
1542 conn_key = ""
1544 try:
1545 if headers is None:
1546 headers = {}
1547 else:
1548 headers = self._normalize_headers(headers)
1550 if "user-agent" not in headers:
1551 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__
1553 uri = iri2uri(uri)
1554 # Prevent CWE-75 space injection to manipulate request via part of uri.
1555 # Prevent CWE-93 CRLF injection to modify headers via part of uri.
1556 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A")
1558 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1560 conn_key = scheme + ":" + authority
1561 conn = self.connections.get(conn_key)
1562 if conn is None:
1563 if not connection_type:
1564 connection_type = SCHEME_TO_CONNECTION[scheme]
1565 certs = list(self.certificates.iter(authority))
1566 if issubclass(connection_type, HTTPSConnectionWithTimeout):
1567 if certs:
1568 conn = self.connections[conn_key] = connection_type(
1569 authority,
1570 key_file=certs[0][0],
1571 cert_file=certs[0][1],
1572 timeout=self.timeout,
1573 proxy_info=self.proxy_info,
1574 ca_certs=self.ca_certs,
1575 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1576 tls_maximum_version=self.tls_maximum_version,
1577 tls_minimum_version=self.tls_minimum_version,
1578 key_password=certs[0][2],
1579 )
1580 else:
1581 conn = self.connections[conn_key] = connection_type(
1582 authority,
1583 timeout=self.timeout,
1584 proxy_info=self.proxy_info,
1585 ca_certs=self.ca_certs,
1586 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1587 tls_maximum_version=self.tls_maximum_version,
1588 tls_minimum_version=self.tls_minimum_version,
1589 )
1590 else:
1591 conn = self.connections[conn_key] = connection_type(
1592 authority, timeout=self.timeout, proxy_info=self.proxy_info
1593 )
1594 conn.set_debuglevel(debuglevel)
1596 if "range" not in headers and "accept-encoding" not in headers:
1597 headers["accept-encoding"] = "gzip, deflate"
1599 info = email.message.Message()
1600 cachekey = None
1601 cached_value = None
1602 if self.cache:
1603 cachekey = defrag_uri
1604 cached_value = self.cache.get(cachekey)
1605 if cached_value:
1606 try:
1607 info, content = cached_value.split(b"\r\n\r\n", 1)
1608 info = email.message_from_bytes(info)
1609 for k, v in info.items():
1610 if v.startswith("=?") and v.endswith("?="):
1611 info.replace_header(k, str(*email.header.decode_header(v)[0]))
1612 except (IndexError, ValueError):
1613 self.cache.delete(cachekey)
1614 cachekey = None
1615 cached_value = None
1617 if (
1618 method in self.optimistic_concurrency_methods
1619 and self.cache
1620 and "etag" in info
1621 and not self.ignore_etag
1622 and "if-match" not in headers
1623 ):
1624 # http://www.w3.org/1999/04/Editing/
1625 headers["if-match"] = info["etag"]
1627 # https://tools.ietf.org/html/rfc7234
1628 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
1629 # when a non-error status code is received in response to an unsafe request method.
1630 if self.cache and cachekey and method not in self.safe_methods:
1631 self.cache.delete(cachekey)
1633 # Check the vary header in the cache to see if this request
1634 # matches what varies in the cache.
1635 if method in self.safe_methods and "vary" in info:
1636 vary = info["vary"]
1637 vary_headers = vary.lower().replace(" ", "").split(",")
1638 for header in vary_headers:
1639 key = "-varied-%s" % header
1640 value = info[key]
1641 if headers.get(header, None) != value:
1642 cached_value = None
1643 break
1645 if (
1646 self.cache
1647 and cached_value
1648 and (method in self.safe_methods or info["status"] == "308")
1649 and "range" not in headers
1650 ):
1651 redirect_method = method
1652 if info["status"] not in ("307", "308"):
1653 redirect_method = "GET"
1654 if "-x-permanent-redirect-url" in info:
1655 # Should cached permanent redirects be counted in our redirection count? For now, yes.
1656 if redirections <= 0:
1657 raise RedirectLimit(
1658 "Redirected more times than redirection_limit allows.", {}, "",
1659 )
1660 (response, new_content) = self.request(
1661 info["-x-permanent-redirect-url"],
1662 method=redirect_method,
1663 headers=headers,
1664 redirections=redirections - 1,
1665 )
1666 response.previous = Response(info)
1667 response.previous.fromcache = True
1668 else:
1669 # Determine our course of action:
1670 # Is the cached entry fresh or stale?
1671 # Has the client requested a non-cached response?
1672 #
1673 # There seems to be three possible answers:
1674 # 1. [FRESH] Return the cache entry w/o doing a GET
1675 # 2. [STALE] Do the GET (but add in cache validators if available)
1676 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1677 entry_disposition = _entry_disposition(info, headers)
1679 if entry_disposition == "FRESH":
1680 response = Response(info)
1681 response.fromcache = True
1682 return (response, content)
1684 if entry_disposition == "STALE":
1685 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers:
1686 headers["if-none-match"] = info["etag"]
1687 if "last-modified" in info and not "last-modified" in headers:
1688 headers["if-modified-since"] = info["last-modified"]
1689 elif entry_disposition == "TRANSPARENT":
1690 pass
1692 (response, new_content) = self._request(
1693 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1694 )
1696 if response.status == 304 and method == "GET":
1697 # Rewrite the cache entry with the new end-to-end headers
1698 # Take all headers that are in response
1699 # and overwrite their values in info.
1700 # unless they are hop-by-hop, or are listed in the connection header.
1702 for key in _get_end2end_headers(response):
1703 info[key] = response[key]
1704 merged_response = Response(info)
1705 if hasattr(response, "_stale_digest"):
1706 merged_response._stale_digest = response._stale_digest
1707 _updateCache(headers, merged_response, content, self.cache, cachekey)
1708 response = merged_response
1709 response.status = 200
1710 response.fromcache = True
1712 elif response.status == 200:
1713 content = new_content
1714 else:
1715 self.cache.delete(cachekey)
1716 content = new_content
1717 else:
1718 cc = _parse_cache_control(headers)
1719 if "only-if-cached" in cc:
1720 info["status"] = "504"
1721 response = Response(info)
1722 content = b""
1723 else:
1724 (response, content) = self._request(
1725 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1726 )
1727 except Exception as e:
1728 is_timeout = isinstance(e, socket.timeout)
1729 if is_timeout:
1730 conn = self.connections.pop(conn_key, None)
1731 if conn:
1732 conn.close()
1734 if self.force_exception_to_status_code:
1735 if isinstance(e, HttpLib2ErrorWithResponse):
1736 response = e.response
1737 content = e.content
1738 response.status = 500
1739 response.reason = str(e)
1740 elif isinstance(e, socket.timeout):
1741 content = b"Request Timeout"
1742 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),})
1743 response.reason = "Request Timeout"
1744 else:
1745 content = str(e).encode("utf-8")
1746 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),})
1747 response.reason = "Bad Request"
1748 else:
1749 raise
1751 return (response, content)
1754class Response(dict):
1755 """An object more like email.message than httplib.HTTPResponse."""
1757 """Is this response from our local cache"""
1758 fromcache = False
1759 """HTTP protocol version used by server.
1761 10 for HTTP/1.0, 11 for HTTP/1.1.
1762 """
1763 version = 11
1765 "Status code returned by server. "
1766 status = 200
1767 """Reason phrase returned by server."""
1768 reason = "Ok"
1770 previous = None
1772 def __init__(self, info):
1773 # info is either an email.message or
1774 # an httplib.HTTPResponse object.
1775 if isinstance(info, http.client.HTTPResponse):
1776 for key, value in info.getheaders():
1777 key = key.lower()
1778 prev = self.get(key)
1779 if prev is not None:
1780 value = ", ".join((prev, value))
1781 self[key] = value
1782 self.status = info.status
1783 self["status"] = str(self.status)
1784 self.reason = info.reason
1785 self.version = info.version
1786 elif isinstance(info, email.message.Message):
1787 for key, value in list(info.items()):
1788 self[key.lower()] = value
1789 self.status = int(self["status"])
1790 else:
1791 for key, value in info.items():
1792 self[key.lower()] = value
1793 self.status = int(self.get("status", self.status))
1795 def __getattr__(self, name):
1796 if name == "dict":
1797 return self
1798 else:
1799 raise AttributeError(name)