Coverage for /pythoncovmergedfiles/medio/medio/src/httplib2/httplib2/__init__.py: 19%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""Small, fast HTTP client library for Python."""
4__author__ = "Joe Gregorio (joe@bitworking.org)"
5__copyright__ = "Copyright 2006, Joe Gregorio"
6__contributors__ = [
7 "Thomas Broyer (t.broyer@ltgt.net)",
8 "James Antill",
9 "Xavier Verges Farrero",
10 "Jonathan Feinberg",
11 "Blair Zajac",
12 "Sam Ruby",
13 "Louis Nyffenegger",
14 "Mark Pilgrim",
15 "Alex Yu",
16 "Lai Han",
17]
18__license__ = "MIT"
19__version__ = "0.31.0"
21import base64
22import calendar
23import copy
24import email
25import email.feedparser
26from email import header
27import email.message
28import email.utils
29import errno
30from gettext import gettext as _
31import gzip
32from hashlib import md5 as _md5
33from hashlib import sha1 as _sha
34import hmac
35import http.client
36import io
37import os
38import random
39import re
40import socket
41import ssl
42import sys
43import time
44import urllib.parse
45import zlib
47try:
48 import socks
49except ImportError:
50 socks = None
51from . import auth
52from .error import *
53from .iri2uri import iri2uri
56def has_timeout(timeout):
57 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"):
58 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT
59 return timeout is not None
62__all__ = [
63 "debuglevel",
64 "FailedToDecompressContent",
65 "Http",
66 "HttpLib2Error",
67 "ProxyInfo",
68 "RedirectLimit",
69 "RedirectMissingLocation",
70 "Response",
71 "RETRIES",
72 "UnimplementedDigestAuthOptionError",
73 "UnimplementedHmacDigestAuthOptionError",
74]
76# The httplib debug level, set to a non-zero value to get debug output
77debuglevel = 0
79# A request will be tried 'RETRIES' times if it fails at the socket/connection level.
80RETRIES = 2
83# Open Items:
84# -----------
86# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
88# Pluggable cache storage (supports storing the cache in
89# flat files by default. We need a plug-in architecture
90# that can support Berkeley DB and Squid)
92# == Known Issues ==
93# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
94# Does not handle Cache-Control: max-stale
95# Does not use Age: headers when calculating cache freshness.
97# The number of redirections to follow before giving up.
98# Note that only GET redirects are automatically followed.
99# Will also honor 301 requests by saving that info and never
100# requesting that URI again.
101DEFAULT_MAX_REDIRECTS = 5
103# Which headers are hop-by-hop headers by default
104HOP_BY_HOP = [
105 "connection",
106 "keep-alive",
107 "proxy-authenticate",
108 "proxy-authorization",
109 "te",
110 "trailers",
111 "transfer-encoding",
112 "upgrade",
113]
115# https://tools.ietf.org/html/rfc7231#section-8.1.3
116SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE")
118# To change, assign to `Http().redirect_codes`
119REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308))
122from httplib2 import certs
124CA_CERTS = certs.where()
126# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated.
127# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means:
128# > Selects the highest protocol version that both the client and server support.
129# > Despite the name, this option can select “TLS” protocols as well as “SSL”.
130# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23
132# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated.
133# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections.
134# > The protocol enables CERT_REQUIRED and check_hostname by default.
135# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS
137DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23")
140def _build_ssl_context(
141 disable_ssl_certificate_validation,
142 ca_certs,
143 cert_file=None,
144 key_file=None,
145 maximum_version=None,
146 minimum_version=None,
147 key_password=None,
148):
149 if not hasattr(ssl, "SSLContext"):
150 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext")
152 context = ssl.SSLContext(DEFAULT_TLS_VERSION)
153 # check_hostname and verify_mode should be set in opposite order during disable
154 # https://bugs.python.org/issue31431
155 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"):
156 context.check_hostname = not disable_ssl_certificate_validation
157 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED
159 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+.
160 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version
161 if maximum_version is not None:
162 if hasattr(context, "maximum_version"):
163 if isinstance(maximum_version, str):
164 maximum_version = getattr(ssl.TLSVersion, maximum_version)
165 context.maximum_version = maximum_version
166 else:
167 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer")
168 if minimum_version is not None:
169 if hasattr(context, "minimum_version"):
170 if isinstance(minimum_version, str):
171 minimum_version = getattr(ssl.TLSVersion, minimum_version)
172 context.minimum_version = minimum_version
173 else:
174 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer")
175 # check_hostname requires python 3.4+
176 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname
177 # if check_hostname is not supported.
178 if hasattr(context, "check_hostname"):
179 context.check_hostname = not disable_ssl_certificate_validation
181 if not disable_ssl_certificate_validation:
182 context.load_verify_locations(ca_certs)
184 if cert_file:
185 context.load_cert_chain(cert_file, key_file, key_password)
187 return context
190def _get_end2end_headers(response):
191 hopbyhop = list(HOP_BY_HOP)
192 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")])
193 return [header for header in list(response.keys()) if header not in hopbyhop]
196_missing = object()
199def _errno_from_exception(e):
200 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass
201 errno = getattr(e, "errno", _missing)
202 if errno is not _missing:
203 return errno
205 # socket.error and common wrap in .args
206 args = getattr(e, "args", None)
207 if args:
208 return _errno_from_exception(args[0])
210 # pysocks.ProxyError wraps in .socket_err
211 # https://github.com/httplib2/httplib2/pull/202
212 socket_err = getattr(e, "socket_err", None)
213 if socket_err:
214 return _errno_from_exception(socket_err)
216 return None
219URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
222def parse_uri(uri):
223 """Parses a URI using the regex given in Appendix B of RFC 3986.
225 (scheme, authority, path, query, fragment) = parse_uri(uri)
226 """
227 groups = URI.match(uri).groups()
228 return (groups[1], groups[3], groups[4], groups[6], groups[8])
231def urlnorm(uri):
232 (scheme, authority, path, query, fragment) = parse_uri(uri)
233 if not scheme or not authority:
234 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
235 authority = authority.lower()
236 scheme = scheme.lower()
237 if not path:
238 path = "/"
239 # Could do syntax based normalization of the URI before
240 # computing the digest. See Section 6.2.2 of Std 66.
241 request_uri = query and "?".join([path, query]) or path
242 scheme = scheme.lower()
243 defrag_uri = scheme + "://" + authority + request_uri
244 return scheme, authority, request_uri, defrag_uri
247# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
248re_url_scheme = re.compile(r"^\w+://")
249re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII)
252def safename(filename):
253 """Return a filename suitable for the cache.
254 Strips dangerous and common characters to create a filename we
255 can use to store the cache in.
256 """
257 if isinstance(filename, bytes):
258 filename_bytes = filename
259 filename = filename.decode("utf-8")
260 else:
261 filename_bytes = filename.encode("utf-8")
262 filemd5 = _md5(filename_bytes).hexdigest()
263 filename = re_url_scheme.sub("", filename)
264 filename = re_unsafe.sub("", filename)
266 # limit length of filename (vital for Windows)
267 # https://github.com/httplib2/httplib2/pull/74
268 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5>
269 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars
270 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum:
271 filename = filename[:90]
273 return ",".join((filename, filemd5))
276NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+")
279def _normalize_headers(headers):
280 return dict(
281 [
282 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),)
283 for (key, value) in headers.items()
284 ]
285 )
288def _convert_byte_str(s):
289 if not isinstance(s, str):
290 return str(s, "utf-8")
291 return s
294def _parse_cache_control(headers):
295 retval = {}
296 if "cache-control" in headers:
297 parts = headers["cache-control"].split(",")
298 parts_with_args = [
299 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")
300 ]
301 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
302 retval = dict(parts_with_args + parts_wo_args)
303 return retval
306# Whether to use a strict mode to parse WWW-Authenticate headers
307# Might lead to bad results in case of ill-formed header value,
308# so disabled by default, falling back to relaxed parsing.
309# Set to true to turn on, useful for testing servers.
310USE_WWW_AUTH_STRICT_PARSING = 0
313def _entry_disposition(response_headers, request_headers):
314 """Determine freshness from the Date, Expires and Cache-Control headers.
316 We don't handle the following:
318 1. Cache-Control: max-stale
319 2. Age: headers are not used in the calculations.
321 Not that this algorithm is simpler than you might think
322 because we are operating as a private (non-shared) cache.
323 This lets us ignore 's-maxage'. We can also ignore
324 'proxy-invalidate' since we aren't a proxy.
325 We will never return a stale document as
326 fresh as a design decision, and thus the non-implementation
327 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
328 since we operate as if every server has sent 'must-revalidate'.
329 Since we are private we get to ignore both 'public' and
330 'private' parameters. We also ignore 'no-transform' since
331 we don't do any transformations.
332 The 'no-store' parameter is handled at a higher level.
333 So the only Cache-Control parameters we look at are:
335 no-cache
336 only-if-cached
337 max-age
338 min-fresh
339 """
341 retval = "STALE"
342 cc = _parse_cache_control(request_headers)
343 cc_response = _parse_cache_control(response_headers)
345 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1:
346 retval = "TRANSPARENT"
347 if "cache-control" not in request_headers:
348 request_headers["cache-control"] = "no-cache"
349 elif "no-cache" in cc:
350 retval = "TRANSPARENT"
351 elif "no-cache" in cc_response:
352 retval = "STALE"
353 elif "only-if-cached" in cc:
354 retval = "FRESH"
355 elif "date" in response_headers:
356 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"]))
357 now = time.time()
358 current_age = max(0, now - date)
359 if "max-age" in cc_response:
360 try:
361 freshness_lifetime = int(cc_response["max-age"])
362 except ValueError:
363 freshness_lifetime = 0
364 elif "expires" in response_headers:
365 expires = email.utils.parsedate_tz(response_headers["expires"])
366 if None == expires:
367 freshness_lifetime = 0
368 else:
369 freshness_lifetime = max(0, calendar.timegm(expires) - date)
370 else:
371 freshness_lifetime = 0
372 if "max-age" in cc:
373 try:
374 freshness_lifetime = int(cc["max-age"])
375 except ValueError:
376 freshness_lifetime = 0
377 if "min-fresh" in cc:
378 try:
379 min_fresh = int(cc["min-fresh"])
380 except ValueError:
381 min_fresh = 0
382 current_age += min_fresh
383 if freshness_lifetime > current_age:
384 retval = "FRESH"
385 return retval
388def _decompressContent(response, new_content):
389 content = new_content
390 try:
391 encoding = response.get("content-encoding", None)
392 if encoding in ["gzip", "deflate"]:
393 if encoding == "gzip":
394 content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read()
395 if encoding == "deflate":
396 try:
397 content = zlib.decompress(content, zlib.MAX_WBITS)
398 except (IOError, zlib.error):
399 content = zlib.decompress(content, -zlib.MAX_WBITS)
400 response["content-length"] = str(len(content))
401 # Record the historical presence of the encoding in a way the won't interfere.
402 response["-content-encoding"] = response["content-encoding"]
403 del response["content-encoding"]
404 except (IOError, zlib.error):
405 content = ""
406 raise FailedToDecompressContent(
407 _("Content purported to be compressed with %s but failed to decompress.") % response.get("content-encoding"),
408 response,
409 content,
410 )
411 return content
414def _bind_write_headers(msg):
415 def _write_headers(self):
416 # Self refers to the Generator object.
417 for h, v in msg.items():
418 print("%s:" % h, end=" ", file=self._fp)
419 if isinstance(v, header.Header):
420 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
421 else:
422 # email.Header got lots of smarts, so use it.
423 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h)
424 print(headers.encode(), file=self._fp)
425 # A blank line always separates headers from body.
426 print(file=self._fp)
428 return _write_headers
431def _updateCache(request_headers, response_headers, content, cache, cachekey):
432 if cachekey:
433 cc = _parse_cache_control(request_headers)
434 cc_response = _parse_cache_control(response_headers)
435 if "no-store" in cc or "no-store" in cc_response:
436 cache.delete(cachekey)
437 else:
438 info = email.message.Message()
439 for key, value in response_headers.items():
440 if key not in ["status", "content-encoding", "transfer-encoding"]:
441 info[key] = value
443 # Add annotations to the cache to indicate what headers
444 # are variant for this request.
445 vary = response_headers.get("vary", None)
446 if vary:
447 vary_headers = vary.lower().replace(" ", "").split(",")
448 for header in vary_headers:
449 key = "-varied-%s" % header
450 try:
451 info[key] = request_headers[header]
452 except KeyError:
453 pass
455 status = response_headers.status
456 if status == 304:
457 status = 200
459 status_header = "status: %d\r\n" % status
461 try:
462 header_str = info.as_string()
463 except UnicodeEncodeError:
464 setattr(info, "_write_headers", _bind_write_headers(info))
465 header_str = info.as_string()
467 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
468 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content])
470 cache.set(cachekey, text)
473def _cnonce():
474 dig = _md5(
475 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8")
476 ).hexdigest()
477 return dig[:16]
480def _wsse_username_token(cnonce, iso_now, password):
481 return (
482 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8")
483 )
486# For credentials we need two things, first
487# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
488# Then we also need a list of URIs that have already demanded authentication
489# That list is tricky since sub-URIs can take the same auth, or the
490# auth scheme may change as you descend the tree.
491# So we also need each Auth instance to be able to tell us
492# how close to the 'top' it is.
495class Authentication(object):
496 def __init__(self, credentials, host, request_uri, headers, response, content, http):
497 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
498 self.path = path
499 self.host = host
500 self.credentials = credentials
501 self.http = http
503 def depth(self, request_uri):
504 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
505 return request_uri[len(self.path) :].count("/")
507 def inscope(self, host, request_uri):
508 # XXX Should we normalize the request_uri?
509 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
510 return (host == self.host) and path.startswith(self.path)
512 def request(self, method, request_uri, headers, content):
513 """Modify the request headers to add the appropriate
514 Authorization header. Over-rise this in sub-classes."""
515 pass
517 def response(self, response, content):
518 """Gives us a chance to update with new nonces
519 or such returned from the last authorized response.
520 Over-rise this in sub-classes if necessary.
522 Return TRUE is the request is to be retried, for
523 example Digest may return stale=true.
524 """
525 return False
527 def __eq__(self, auth):
528 return False
530 def __ne__(self, auth):
531 return True
533 def __lt__(self, auth):
534 return True
536 def __gt__(self, auth):
537 return False
539 def __le__(self, auth):
540 return True
542 def __ge__(self, auth):
543 return False
545 def __bool__(self):
546 return True
549class BasicAuthentication(Authentication):
550 def __init__(self, credentials, host, request_uri, headers, response, content, http):
551 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
553 def request(self, method, request_uri, headers, content):
554 """Modify the request headers to add the appropriate
555 Authorization header."""
556 headers["authorization"] = "Basic " + base64.b64encode(
557 ("%s:%s" % self.credentials).encode("utf-8")
558 ).strip().decode("utf-8")
561class DigestAuthentication(Authentication):
562 """Only do qop='auth' and MD5, since that
563 is all Apache currently implements"""
565 def __init__(self, credentials, host, request_uri, headers, response, content, http):
566 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
567 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"]
568 qop = self.challenge.get("qop", "auth")
569 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None
570 if self.challenge["qop"] is None:
571 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop))
572 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper()
573 if self.challenge["algorithm"] != "MD5":
574 raise UnimplementedDigestAuthOptionError(
575 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
576 )
577 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],])
578 self.challenge["nc"] = 1
580 def request(self, method, request_uri, headers, content, cnonce=None):
581 """Modify the request headers"""
582 H = lambda x: _md5(x.encode("utf-8")).hexdigest()
583 KD = lambda s, d: H("%s:%s" % (s, d))
584 A2 = "".join([method, ":", request_uri])
585 self.challenge["cnonce"] = cnonce or _cnonce()
586 request_digest = '"%s"' % KD(
587 H(self.A1),
588 "%s:%s:%s:%s:%s"
589 % (
590 self.challenge["nonce"],
591 "%08x" % self.challenge["nc"],
592 self.challenge["cnonce"],
593 self.challenge["qop"],
594 H(A2),
595 ),
596 )
597 headers["authorization"] = (
598 'Digest username="%s", realm="%s", nonce="%s", '
599 'uri="%s", algorithm=%s, response=%s, qop=%s, '
600 'nc=%08x, cnonce="%s"'
601 ) % (
602 self.credentials[0],
603 self.challenge["realm"],
604 self.challenge["nonce"],
605 request_uri,
606 self.challenge["algorithm"],
607 request_digest,
608 self.challenge["qop"],
609 self.challenge["nc"],
610 self.challenge["cnonce"],
611 )
612 if self.challenge.get("opaque"):
613 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"]
614 self.challenge["nc"] += 1
616 def response(self, response, content):
617 if "authentication-info" not in response:
618 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {})
619 if "true" == challenge.get("stale"):
620 self.challenge["nonce"] = challenge["nonce"]
621 self.challenge["nc"] = 1
622 return True
623 else:
624 updated_challenge = auth._parse_authentication_info(response, "authentication-info")
626 if "nextnonce" in updated_challenge:
627 self.challenge["nonce"] = updated_challenge["nextnonce"]
628 self.challenge["nc"] = 1
629 return False
632class HmacDigestAuthentication(Authentication):
633 """Adapted from Robert Sayre's code and DigestAuthentication above."""
635 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
637 def __init__(self, credentials, host, request_uri, headers, response, content, http):
638 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
639 challenge = auth._parse_www_authenticate(response, "www-authenticate")
640 self.challenge = challenge["hmacdigest"]
641 # TODO: self.challenge['domain']
642 self.challenge["reason"] = self.challenge.get("reason", "unauthorized")
643 if self.challenge["reason"] not in ["unauthorized", "integrity"]:
644 self.challenge["reason"] = "unauthorized"
645 self.challenge["salt"] = self.challenge.get("salt", "")
646 if not self.challenge.get("snonce"):
647 raise UnimplementedHmacDigestAuthOptionError(
648 _("The challenge doesn't contain a server nonce, or this one is empty.")
649 )
650 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1")
651 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]:
652 raise UnimplementedHmacDigestAuthOptionError(
653 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
654 )
655 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1")
656 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]:
657 raise UnimplementedHmacDigestAuthOptionError(
658 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"])
659 )
660 if self.challenge["algorithm"] == "HMAC-MD5":
661 self.hashmod = _md5
662 else:
663 self.hashmod = _sha
664 if self.challenge["pw-algorithm"] == "MD5":
665 self.pwhashmod = _md5
666 else:
667 self.pwhashmod = _sha
668 self.key = "".join(
669 [
670 self.credentials[0],
671 ":",
672 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(),
673 ":",
674 self.challenge["realm"],
675 ]
676 )
677 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
679 def request(self, method, request_uri, headers, content):
680 """Modify the request headers"""
681 keys = _get_end2end_headers(headers)
682 keylist = "".join(["%s " % k for k in keys])
683 headers_val = "".join([headers[k] for k in keys])
684 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
685 cnonce = _cnonce()
686 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,)
687 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
688 headers["authorization"] = (
689 'HMACDigest username="%s", realm="%s", snonce="%s",'
690 ' cnonce="%s", uri="%s", created="%s", '
691 'response="%s", headers="%s"'
692 ) % (
693 self.credentials[0],
694 self.challenge["realm"],
695 self.challenge["snonce"],
696 cnonce,
697 request_uri,
698 created,
699 request_digest,
700 keylist,
701 )
703 def response(self, response, content):
704 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {})
705 if challenge.get("reason") in ["integrity", "stale"]:
706 return True
707 return False
710class WsseAuthentication(Authentication):
711 """This is thinly tested and should not be relied upon.
712 At this time there isn't any third party server to test against.
713 Blogger and TypePad implemented this algorithm at one point
714 but Blogger has since switched to Basic over HTTPS and
715 TypePad has implemented it wrong, by never issuing a 401
716 challenge but instead requiring your client to telepathically know that
717 their endpoint is expecting WSSE profile="UsernameToken"."""
719 def __init__(self, credentials, host, request_uri, headers, response, content, http):
720 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
722 def request(self, method, request_uri, headers, content):
723 """Modify the request headers to add the appropriate
724 Authorization header."""
725 headers["authorization"] = 'WSSE profile="UsernameToken"'
726 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
727 cnonce = _cnonce()
728 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
729 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % (
730 self.credentials[0],
731 password_digest,
732 cnonce,
733 iso_now,
734 )
737class GoogleLoginAuthentication(Authentication):
738 def __init__(self, credentials, host, request_uri, headers, response, content, http):
739 from urllib.parse import urlencode
741 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
742 challenge = auth._parse_www_authenticate(response, "www-authenticate")
743 service = challenge["googlelogin"].get("service", "xapi")
744 # Bloggger actually returns the service in the challenge
745 # For the rest we guess based on the URI
746 if service == "xapi" and request_uri.find("calendar") > 0:
747 service = "cl"
748 # No point in guessing Base or Spreadsheet
749 # elif request_uri.find("spreadsheets") > 0:
750 # service = "wise"
752 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],)
753 resp, content = self.http.request(
754 "https://www.google.com/accounts/ClientLogin",
755 method="POST",
756 body=urlencode(auth),
757 headers={"Content-Type": "application/x-www-form-urlencoded"},
758 )
759 lines = content.split("\n")
760 d = dict([tuple(line.split("=", 1)) for line in lines if line])
761 if resp.status == 403:
762 self.Auth = ""
763 else:
764 self.Auth = d["Auth"]
766 def request(self, method, request_uri, headers, content):
767 """Modify the request headers to add the appropriate
768 Authorization header."""
769 headers["authorization"] = "GoogleLogin Auth=" + self.Auth
772AUTH_SCHEME_CLASSES = {
773 "basic": BasicAuthentication,
774 "wsse": WsseAuthentication,
775 "digest": DigestAuthentication,
776 "hmacdigest": HmacDigestAuthentication,
777 "googlelogin": GoogleLoginAuthentication,
778}
780AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
783class FileCache(object):
784 """Uses a local directory as a store for cached files.
785 Not really safe to use if multiple threads or processes are going to
786 be running on the same cache.
787 """
789 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
790 self.cache = cache
791 self.safe = safe
792 if not os.path.exists(cache):
793 os.makedirs(self.cache)
795 def get(self, key):
796 retval = None
797 cacheFullPath = os.path.join(self.cache, self.safe(key))
798 try:
799 f = open(cacheFullPath, "rb")
800 retval = f.read()
801 f.close()
802 except IOError:
803 pass
804 return retval
806 def set(self, key, value):
807 cacheFullPath = os.path.join(self.cache, self.safe(key))
808 f = open(cacheFullPath, "wb")
809 f.write(value)
810 f.close()
812 def delete(self, key):
813 cacheFullPath = os.path.join(self.cache, self.safe(key))
814 if os.path.exists(cacheFullPath):
815 os.remove(cacheFullPath)
818class Credentials(object):
819 def __init__(self):
820 self.credentials = []
822 def add(self, name, password, domain=""):
823 self.credentials.append((domain.lower(), name, password))
825 def clear(self):
826 self.credentials = []
828 def iter(self, domain):
829 for (cdomain, name, password) in self.credentials:
830 if cdomain == "" or domain == cdomain:
831 yield (name, password)
834class KeyCerts(Credentials):
835 """Identical to Credentials except that
836 name/password are mapped to key/cert."""
838 def add(self, key, cert, domain, password):
839 self.credentials.append((domain.lower(), key, cert, password))
841 def iter(self, domain):
842 for (cdomain, key, cert, password) in self.credentials:
843 if cdomain == "" or domain == cdomain:
844 yield (key, cert, password)
847class AllHosts(object):
848 pass
851class ProxyInfo(object):
852 """Collect information required to use a proxy."""
854 bypass_hosts = ()
856 def __init__(
857 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None,
858 ):
859 """Args:
861 proxy_type: The type of proxy server. This must be set to one of
862 socks.PROXY_TYPE_XXX constants. For example: p =
863 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost',
864 proxy_port=8000)
865 proxy_host: The hostname or IP address of the proxy server.
866 proxy_port: The port that the proxy server is running on.
867 proxy_rdns: If True (default), DNS queries will not be performed
868 locally, and instead, handed to the proxy to resolve. This is useful
869 if the network does not allow resolution of non-local names. In
870 httplib2 0.9 and earlier, this defaulted to False.
871 proxy_user: The username used to authenticate with the proxy server.
872 proxy_pass: The password used to authenticate with the proxy server.
873 proxy_headers: Additional or modified headers for the proxy connect
874 request.
875 """
876 if isinstance(proxy_user, bytes):
877 proxy_user = proxy_user.decode()
878 if isinstance(proxy_pass, bytes):
879 proxy_pass = proxy_pass.decode()
880 (
881 self.proxy_type,
882 self.proxy_host,
883 self.proxy_port,
884 self.proxy_rdns,
885 self.proxy_user,
886 self.proxy_pass,
887 self.proxy_headers,
888 ) = (
889 proxy_type,
890 proxy_host,
891 proxy_port,
892 proxy_rdns,
893 proxy_user,
894 proxy_pass,
895 proxy_headers,
896 )
898 def astuple(self):
899 return (
900 self.proxy_type,
901 self.proxy_host,
902 self.proxy_port,
903 self.proxy_rdns,
904 self.proxy_user,
905 self.proxy_pass,
906 self.proxy_headers,
907 )
909 def isgood(self):
910 return socks and (self.proxy_host != None) and (self.proxy_port != None)
912 def applies_to(self, hostname):
913 return not self.bypass_host(hostname)
915 def bypass_host(self, hostname):
916 """Has this host been excluded from the proxy config"""
917 if self.bypass_hosts is AllHosts:
918 return True
920 hostname = "." + hostname.lstrip(".")
921 for skip_name in self.bypass_hosts:
922 # *.suffix
923 if skip_name.startswith(".") and hostname.endswith(skip_name):
924 return True
925 # exact match
926 if hostname == "." + skip_name:
927 return True
928 return False
930 def __repr__(self):
931 return (
932 "<ProxyInfo type={p.proxy_type} "
933 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}"
934 + " user={p.proxy_user} headers={p.proxy_headers}>"
935 ).format(p=self)
938def proxy_info_from_environment(method="http"):
939 """Read proxy info from the environment variables.
940 """
941 if method not in ("http", "https"):
942 return
944 env_var = method + "_proxy"
945 url = os.environ.get(env_var, os.environ.get(env_var.upper()))
946 if not url:
947 return
948 return proxy_info_from_url(url, method, noproxy=None)
951def proxy_info_from_url(url, method="http", noproxy=None):
952 """Construct a ProxyInfo from a URL (such as http_proxy env var)
953 """
954 url = urllib.parse.urlparse(url)
956 proxy_type = 3 # socks.PROXY_TYPE_HTTP
957 if url.scheme == "socks4":
958 proxy_type = 1 # socks.PROXY_TYPE_SOCKS4
959 elif url.scheme == "socks5" or url.scheme == "socks":
960 proxy_type = 2 # socks.PROXY_TYPE_SOCKS5
961 pi = ProxyInfo(
962 proxy_type=proxy_type,
963 proxy_host=url.hostname,
964 proxy_port=url.port or dict(https=443, http=80)[method],
965 proxy_user=url.username or None,
966 proxy_pass=url.password or None,
967 proxy_headers=None,
968 )
970 bypass_hosts = []
971 # If not given an explicit noproxy value, respect values in env vars.
972 if noproxy is None:
973 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", ""))
974 # Special case: A single '*' character means all hosts should be bypassed.
975 if noproxy == "*":
976 bypass_hosts = AllHosts
977 elif noproxy.strip():
978 bypass_hosts = noproxy.split(",")
979 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string.
981 pi.bypass_hosts = bypass_hosts
982 return pi
985class HTTPConnectionWithTimeout(http.client.HTTPConnection):
986 """HTTPConnection subclass that supports timeouts
988 HTTPConnection subclass that supports timeouts
990 All timeouts are in seconds. If None is passed for timeout then
991 Python's default timeout for sockets will be used. See for example
992 the docs of socket.setdefaulttimeout():
993 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
994 """
996 def __init__(self, host, port=None, timeout=None, proxy_info=None):
997 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout)
999 self.proxy_info = proxy_info
1000 if proxy_info and not isinstance(proxy_info, ProxyInfo):
1001 self.proxy_info = proxy_info("http")
1003 def connect(self):
1004 """Connect to the host and port specified in __init__."""
1005 if self.proxy_info and socks is None:
1006 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!")
1007 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1008 use_proxy = True
1009 (
1010 proxy_type,
1011 proxy_host,
1012 proxy_port,
1013 proxy_rdns,
1014 proxy_user,
1015 proxy_pass,
1016 proxy_headers,
1017 ) = self.proxy_info.astuple()
1019 host = proxy_host
1020 port = proxy_port
1021 else:
1022 use_proxy = False
1024 host = self.host
1025 port = self.port
1026 proxy_type = None
1028 socket_err = None
1030 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
1031 af, socktype, proto, canonname, sa = res
1032 try:
1033 if use_proxy:
1034 self.sock = socks.socksocket(af, socktype, proto)
1035 self.sock.setproxy(
1036 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1037 )
1038 else:
1039 self.sock = socket.socket(af, socktype, proto)
1040 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1041 if has_timeout(self.timeout):
1042 self.sock.settimeout(self.timeout)
1043 if self.debuglevel > 0:
1044 print("connect: ({0}, {1}) ************".format(self.host, self.port))
1045 if use_proxy:
1046 print(
1047 "proxy: {0} ************".format(
1048 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1049 )
1050 )
1052 self.sock.connect((self.host, self.port) + sa[2:])
1053 except socket.error as e:
1054 socket_err = e
1055 if self.debuglevel > 0:
1056 print("connect fail: ({0}, {1})".format(self.host, self.port))
1057 if use_proxy:
1058 print(
1059 "proxy: {0}".format(
1060 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1061 )
1062 )
1063 if self.sock:
1064 self.sock.close()
1065 self.sock = None
1066 continue
1067 break
1068 if not self.sock:
1069 raise socket_err
1072class HTTPSConnectionWithTimeout(http.client.HTTPSConnection):
1073 """This class allows communication via SSL.
1075 All timeouts are in seconds. If None is passed for timeout then
1076 Python's default timeout for sockets will be used. See for example
1077 the docs of socket.setdefaulttimeout():
1078 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1079 """
1081 def __init__(
1082 self,
1083 host,
1084 port=None,
1085 key_file=None,
1086 cert_file=None,
1087 timeout=None,
1088 proxy_info=None,
1089 ca_certs=None,
1090 disable_ssl_certificate_validation=False,
1091 tls_maximum_version=None,
1092 tls_minimum_version=None,
1093 key_password=None,
1094 ):
1096 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1097 self.ca_certs = ca_certs if ca_certs else CA_CERTS
1099 self.proxy_info = proxy_info
1100 if proxy_info and not isinstance(proxy_info, ProxyInfo):
1101 self.proxy_info = proxy_info("https")
1103 context = _build_ssl_context(
1104 self.disable_ssl_certificate_validation,
1105 self.ca_certs,
1106 cert_file,
1107 key_file,
1108 maximum_version=tls_maximum_version,
1109 minimum_version=tls_minimum_version,
1110 key_password=key_password,
1111 )
1112 super(HTTPSConnectionWithTimeout, self).__init__(
1113 host, port=port, timeout=timeout, context=context,
1114 )
1115 self.key_file = key_file
1116 self.cert_file = cert_file
1117 self.key_password = key_password
1119 def connect(self):
1120 """Connect to a host on a given (SSL) port."""
1121 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1122 use_proxy = True
1123 (
1124 proxy_type,
1125 proxy_host,
1126 proxy_port,
1127 proxy_rdns,
1128 proxy_user,
1129 proxy_pass,
1130 proxy_headers,
1131 ) = self.proxy_info.astuple()
1133 host = proxy_host
1134 port = proxy_port
1135 else:
1136 use_proxy = False
1138 host = self.host
1139 port = self.port
1140 proxy_type = None
1141 proxy_headers = None
1143 socket_err = None
1145 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1146 for family, socktype, proto, canonname, sockaddr in address_info:
1147 try:
1148 if use_proxy:
1149 sock = socks.socksocket(family, socktype, proto)
1151 sock.setproxy(
1152 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1153 )
1154 else:
1155 sock = socket.socket(family, socktype, proto)
1156 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1157 if has_timeout(self.timeout):
1158 sock.settimeout(self.timeout)
1159 sock.connect((self.host, self.port))
1161 self.sock = self._context.wrap_socket(sock, server_hostname=self.host)
1163 # Python 3.3 compatibility: emulate the check_hostname behavior
1164 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation:
1165 try:
1166 ssl.match_hostname(self.sock.getpeercert(), self.host)
1167 except Exception:
1168 self.sock.shutdown(socket.SHUT_RDWR)
1169 self.sock.close()
1170 raise
1172 if self.debuglevel > 0:
1173 print("connect: ({0}, {1})".format(self.host, self.port))
1174 if use_proxy:
1175 print(
1176 "proxy: {0}".format(
1177 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1178 )
1179 )
1180 except (ssl.SSLError, ssl.CertificateError) as e:
1181 if sock:
1182 sock.close()
1183 if self.sock:
1184 self.sock.close()
1185 self.sock = None
1186 raise
1187 except (socket.timeout, socket.gaierror):
1188 raise
1189 except socket.error as e:
1190 socket_err = e
1191 if self.debuglevel > 0:
1192 print("connect fail: ({0}, {1})".format(self.host, self.port))
1193 if use_proxy:
1194 print(
1195 "proxy: {0}".format(
1196 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1197 )
1198 )
1199 if self.sock:
1200 self.sock.close()
1201 self.sock = None
1202 continue
1203 break
1204 if not self.sock:
1205 raise socket_err
1208SCHEME_TO_CONNECTION = {
1209 "http": HTTPConnectionWithTimeout,
1210 "https": HTTPSConnectionWithTimeout,
1211}
1214class Http(object):
1215 """An HTTP client that handles:
1217 - all methods
1218 - caching
1219 - ETags
1220 - compression,
1221 - HTTPS
1222 - Basic
1223 - Digest
1224 - WSSE
1226 and more.
1227 """
1229 def __init__(
1230 self,
1231 cache=None,
1232 timeout=None,
1233 proxy_info=proxy_info_from_environment,
1234 ca_certs=None,
1235 disable_ssl_certificate_validation=False,
1236 tls_maximum_version=None,
1237 tls_minimum_version=None,
1238 ):
1239 """If 'cache' is a string then it is used as a directory name for
1240 a disk cache. Otherwise it must be an object that supports the
1241 same interface as FileCache.
1243 All timeouts are in seconds. If None is passed for timeout
1244 then Python's default timeout for sockets will be used. See
1245 for example the docs of socket.setdefaulttimeout():
1246 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1248 `proxy_info` may be:
1249 - a callable that takes the http scheme ('http' or 'https') and
1250 returns a ProxyInfo instance per request. By default, uses
1251 proxy_info_from_environment.
1252 - a ProxyInfo instance (static proxy config).
1253 - None (proxy disabled).
1255 ca_certs is the path of a file containing root CA certificates for SSL
1256 server certificate validation. By default, a CA cert file bundled with
1257 httplib2 is used.
1259 If disable_ssl_certificate_validation is true, SSL cert validation will
1260 not be performed.
1262 tls_maximum_version / tls_minimum_version require Python 3.7+ /
1263 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+.
1264 """
1265 self.proxy_info = proxy_info
1266 self.ca_certs = ca_certs
1267 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1268 self.tls_maximum_version = tls_maximum_version
1269 self.tls_minimum_version = tls_minimum_version
1270 # Map domain name to an httplib connection
1271 self.connections = {}
1272 # The location of the cache, for now a directory
1273 # where cached responses are held.
1274 if cache and isinstance(cache, str):
1275 self.cache = FileCache(cache)
1276 else:
1277 self.cache = cache
1279 # Name/password
1280 self.credentials = Credentials()
1282 # Key/cert
1283 self.certificates = KeyCerts()
1285 # authorization objects
1286 self.authorizations = []
1288 # If set to False then no redirects are followed, even safe ones.
1289 self.follow_redirects = True
1291 self.redirect_codes = REDIRECT_CODES
1293 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1294 # which methods get an "if-match:" etag header added to them.
1295 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1297 self.safe_methods = list(SAFE_METHODS)
1299 # If 'follow_redirects' is True, and this is set to True then
1300 # all redirecs are followed, including unsafe ones.
1301 self.follow_all_redirects = False
1303 self.ignore_etag = False
1305 self.force_exception_to_status_code = False
1307 self.timeout = timeout
1309 # Keep Authorization: headers on a redirect.
1310 self.forward_authorization_headers = False
1312 def close(self):
1313 """Close persistent connections, clear sensitive data.
1314 Not thread-safe, requires external synchronization against concurrent requests.
1315 """
1316 existing, self.connections = self.connections, {}
1317 for _, c in existing.items():
1318 c.close()
1319 self.certificates.clear()
1320 self.clear_credentials()
1322 def __getstate__(self):
1323 state_dict = copy.copy(self.__dict__)
1324 # In case request is augmented by some foreign object such as
1325 # credentials which handle auth
1326 if "request" in state_dict:
1327 del state_dict["request"]
1328 if "connections" in state_dict:
1329 del state_dict["connections"]
1330 return state_dict
1332 def __setstate__(self, state):
1333 self.__dict__.update(state)
1334 self.connections = {}
1336 def _auth_from_challenge(self, host, request_uri, headers, response, content):
1337 """A generator that creates Authorization objects
1338 that can be applied to requests.
1339 """
1340 challenges = auth._parse_www_authenticate(response, "www-authenticate")
1341 for cred in self.credentials.iter(host):
1342 for scheme in AUTH_SCHEME_ORDER:
1343 if scheme in challenges:
1344 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1346 def add_credentials(self, name, password, domain=""):
1347 """Add a name and password that will be used
1348 any time a request requires authentication."""
1349 self.credentials.add(name, password, domain)
1351 def add_certificate(self, key, cert, domain, password=None):
1352 """Add a key and cert that will be used
1353 any time a request requires authentication."""
1354 self.certificates.add(key, cert, domain, password)
1356 def clear_credentials(self):
1357 """Remove all the names and passwords
1358 that are used for authentication"""
1359 self.credentials.clear()
1360 self.authorizations = []
1362 def _conn_request(self, conn, request_uri, method, body, headers):
1363 i = 0
1364 seen_bad_status_line = False
1365 while i < RETRIES:
1366 i += 1
1367 try:
1368 if conn.sock is None:
1369 conn.connect()
1370 conn.request(method, request_uri, body, headers)
1371 except socket.timeout:
1372 conn.close()
1373 raise
1374 except socket.gaierror:
1375 conn.close()
1376 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1377 except socket.error as e:
1378 errno_ = _errno_from_exception(e)
1379 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES:
1380 continue # retry on potentially transient errors
1381 raise
1382 except http.client.HTTPException:
1383 if conn.sock is None:
1384 if i < RETRIES - 1:
1385 conn.close()
1386 conn.connect()
1387 continue
1388 else:
1389 conn.close()
1390 raise
1391 if i < RETRIES - 1:
1392 conn.close()
1393 conn.connect()
1394 continue
1395 # Just because the server closed the connection doesn't apparently mean
1396 # that the server didn't send a response.
1397 pass
1398 try:
1399 response = conn.getresponse()
1400 except (http.client.BadStatusLine, http.client.ResponseNotReady):
1401 # If we get a BadStatusLine on the first try then that means
1402 # the connection just went stale, so retry regardless of the
1403 # number of RETRIES set.
1404 if not seen_bad_status_line and i == 1:
1405 i = 0
1406 seen_bad_status_line = True
1407 conn.close()
1408 conn.connect()
1409 continue
1410 else:
1411 conn.close()
1412 raise
1413 except socket.timeout:
1414 raise
1415 except (socket.error, http.client.HTTPException):
1416 conn.close()
1417 if i == 0:
1418 conn.close()
1419 conn.connect()
1420 continue
1421 else:
1422 raise
1423 else:
1424 content = b""
1425 if method == "HEAD":
1426 conn.close()
1427 else:
1428 content = response.read()
1429 response = Response(response)
1430 if method != "HEAD":
1431 content = _decompressContent(response, content)
1433 break
1434 return (response, content)
1436 def _request(
1437 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey,
1438 ):
1439 """Do the actual request using the connection object
1440 and also follow one level of redirects if necessary"""
1442 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1443 auth = auths and sorted(auths)[0][1] or None
1444 if auth:
1445 auth.request(method, request_uri, headers, body)
1447 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1449 if auth:
1450 if auth.response(response, body):
1451 auth.request(method, request_uri, headers, body)
1452 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1453 response._stale_digest = 1
1455 if response.status == 401:
1456 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1457 authorization.request(method, request_uri, headers, body)
1458 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1459 if response.status != 401:
1460 self.authorizations.append(authorization)
1461 authorization.response(response, body)
1462 break
1464 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308):
1465 if self.follow_redirects and response.status in self.redirect_codes:
1466 # Pick out the location header and basically start from the beginning
1467 # remembering first to strip the ETag header and decrement our 'depth'
1468 if redirections:
1469 if "location" not in response and response.status != 300:
1470 raise RedirectMissingLocation(
1471 _("Redirected but the response is missing a Location: header."), response, content,
1472 )
1473 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1474 if "location" in response:
1475 location = response["location"]
1476 (scheme, authority, path, query, fragment) = parse_uri(location)
1477 if authority == None:
1478 response["location"] = urllib.parse.urljoin(absolute_uri, location)
1479 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
1480 response["-x-permanent-redirect-url"] = response["location"]
1481 if "content-location" not in response:
1482 response["content-location"] = absolute_uri
1483 _updateCache(headers, response, content, self.cache, cachekey)
1484 if "if-none-match" in headers:
1485 del headers["if-none-match"]
1486 if "if-modified-since" in headers:
1487 del headers["if-modified-since"]
1488 if "authorization" in headers and not self.forward_authorization_headers:
1489 del headers["authorization"]
1490 if "location" in response:
1491 location = response["location"]
1492 old_response = copy.deepcopy(response)
1493 if "content-location" not in old_response:
1494 old_response["content-location"] = absolute_uri
1495 redirect_method = method
1496 if response.status in [302, 303]:
1497 redirect_method = "GET"
1498 body = None
1499 (response, content) = self.request(
1500 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1,
1501 )
1502 response.previous = old_response
1503 else:
1504 raise RedirectLimit(
1505 "Redirected more times than redirection_limit allows.", response, content,
1506 )
1507 elif response.status in [200, 203] and method in self.safe_methods:
1508 # Don't cache 206's since we aren't going to handle byte range requests
1509 if "content-location" not in response:
1510 response["content-location"] = absolute_uri
1511 _updateCache(headers, response, content, self.cache, cachekey)
1513 return (response, content)
1515 def _normalize_headers(self, headers):
1516 return _normalize_headers(headers)
1518 # Need to catch and rebrand some exceptions
1519 # Then need to optionally turn all exceptions into status codes
1520 # including all socket.* and httplib.* exceptions.
1522 def request(
1523 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None,
1524 ):
1525 """ Performs a single HTTP request.
1526The 'uri' is the URI of the HTTP resource and can begin
1527with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1529The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1530There is no restriction on the methods allowed.
1532The 'body' is the entity body to be sent with the request. It is a string
1533object.
1535Any extra headers that are to be sent with the request should be provided in the
1536'headers' dictionary.
1538The maximum number of redirect to follow before raising an
1539exception is 'redirections. The default is 5.
1541The return value is a tuple of (response, content), the first
1542being and instance of the 'Response' class, the second being
1543a string that contains the response entity body.
1544 """
1545 conn_key = ""
1547 try:
1548 if headers is None:
1549 headers = {}
1550 else:
1551 headers = self._normalize_headers(headers)
1553 if "user-agent" not in headers:
1554 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__
1556 uri = iri2uri(uri)
1557 # Prevent CWE-75 space injection to manipulate request via part of uri.
1558 # Prevent CWE-93 CRLF injection to modify headers via part of uri.
1559 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A")
1561 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1563 conn_key = scheme + ":" + authority
1564 conn = self.connections.get(conn_key)
1565 if conn is None:
1566 if not connection_type:
1567 connection_type = SCHEME_TO_CONNECTION[scheme]
1568 certs = list(self.certificates.iter(authority))
1569 if issubclass(connection_type, HTTPSConnectionWithTimeout):
1570 if certs:
1571 conn = self.connections[conn_key] = connection_type(
1572 authority,
1573 key_file=certs[0][0],
1574 cert_file=certs[0][1],
1575 timeout=self.timeout,
1576 proxy_info=self.proxy_info,
1577 ca_certs=self.ca_certs,
1578 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1579 tls_maximum_version=self.tls_maximum_version,
1580 tls_minimum_version=self.tls_minimum_version,
1581 key_password=certs[0][2],
1582 )
1583 else:
1584 conn = self.connections[conn_key] = connection_type(
1585 authority,
1586 timeout=self.timeout,
1587 proxy_info=self.proxy_info,
1588 ca_certs=self.ca_certs,
1589 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1590 tls_maximum_version=self.tls_maximum_version,
1591 tls_minimum_version=self.tls_minimum_version,
1592 )
1593 else:
1594 conn = self.connections[conn_key] = connection_type(
1595 authority, timeout=self.timeout, proxy_info=self.proxy_info
1596 )
1597 conn.set_debuglevel(debuglevel)
1599 if "range" not in headers and "accept-encoding" not in headers:
1600 headers["accept-encoding"] = "gzip, deflate"
1602 info = email.message.Message()
1603 cachekey = None
1604 cached_value = None
1605 if self.cache:
1606 cachekey = defrag_uri
1607 cached_value = self.cache.get(cachekey)
1608 if cached_value:
1609 try:
1610 info, content = cached_value.split(b"\r\n\r\n", 1)
1611 info = email.message_from_bytes(info)
1612 for k, v in info.items():
1613 if v.startswith("=?") and v.endswith("?="):
1614 info.replace_header(k, str(*email.header.decode_header(v)[0]))
1615 except (IndexError, ValueError):
1616 self.cache.delete(cachekey)
1617 cachekey = None
1618 cached_value = None
1620 if (
1621 method in self.optimistic_concurrency_methods
1622 and self.cache
1623 and "etag" in info
1624 and not self.ignore_etag
1625 and "if-match" not in headers
1626 ):
1627 # http://www.w3.org/1999/04/Editing/
1628 headers["if-match"] = info["etag"]
1630 # https://tools.ietf.org/html/rfc7234
1631 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
1632 # when a non-error status code is received in response to an unsafe request method.
1633 if self.cache and cachekey and method not in self.safe_methods:
1634 self.cache.delete(cachekey)
1636 # Check the vary header in the cache to see if this request
1637 # matches what varies in the cache.
1638 if method in self.safe_methods and "vary" in info:
1639 vary = info["vary"]
1640 vary_headers = vary.lower().replace(" ", "").split(",")
1641 for header in vary_headers:
1642 key = "-varied-%s" % header
1643 value = info[key]
1644 if headers.get(header, None) != value:
1645 cached_value = None
1646 break
1648 if (
1649 self.cache
1650 and cached_value
1651 and (method in self.safe_methods or info["status"] == "308")
1652 and "range" not in headers
1653 ):
1654 redirect_method = method
1655 if info["status"] not in ("307", "308"):
1656 redirect_method = "GET"
1657 if "-x-permanent-redirect-url" in info:
1658 # Should cached permanent redirects be counted in our redirection count? For now, yes.
1659 if redirections <= 0:
1660 raise RedirectLimit(
1661 "Redirected more times than redirection_limit allows.", {}, "",
1662 )
1663 (response, new_content) = self.request(
1664 info["-x-permanent-redirect-url"],
1665 method=redirect_method,
1666 headers=headers,
1667 redirections=redirections - 1,
1668 )
1669 response.previous = Response(info)
1670 response.previous.fromcache = True
1671 else:
1672 # Determine our course of action:
1673 # Is the cached entry fresh or stale?
1674 # Has the client requested a non-cached response?
1675 #
1676 # There seems to be three possible answers:
1677 # 1. [FRESH] Return the cache entry w/o doing a GET
1678 # 2. [STALE] Do the GET (but add in cache validators if available)
1679 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1680 entry_disposition = _entry_disposition(info, headers)
1682 if entry_disposition == "FRESH":
1683 response = Response(info)
1684 response.fromcache = True
1685 return (response, content)
1687 if entry_disposition == "STALE":
1688 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers:
1689 headers["if-none-match"] = info["etag"]
1690 if "last-modified" in info and not "last-modified" in headers:
1691 headers["if-modified-since"] = info["last-modified"]
1692 elif entry_disposition == "TRANSPARENT":
1693 pass
1695 (response, new_content) = self._request(
1696 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1697 )
1699 if response.status == 304 and method == "GET":
1700 # Rewrite the cache entry with the new end-to-end headers
1701 # Take all headers that are in response
1702 # and overwrite their values in info.
1703 # unless they are hop-by-hop, or are listed in the connection header.
1705 for key in _get_end2end_headers(response):
1706 info[key] = response[key]
1707 merged_response = Response(info)
1708 if hasattr(response, "_stale_digest"):
1709 merged_response._stale_digest = response._stale_digest
1710 _updateCache(headers, merged_response, content, self.cache, cachekey)
1711 response = merged_response
1712 response.status = 200
1713 response.fromcache = True
1715 elif response.status == 200:
1716 content = new_content
1717 else:
1718 self.cache.delete(cachekey)
1719 content = new_content
1720 else:
1721 cc = _parse_cache_control(headers)
1722 if "only-if-cached" in cc:
1723 info["status"] = "504"
1724 response = Response(info)
1725 content = b""
1726 else:
1727 (response, content) = self._request(
1728 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1729 )
1730 except Exception as e:
1731 is_timeout = isinstance(e, socket.timeout)
1732 if is_timeout:
1733 conn = self.connections.pop(conn_key, None)
1734 if conn:
1735 conn.close()
1737 if self.force_exception_to_status_code:
1738 if isinstance(e, HttpLib2ErrorWithResponse):
1739 response = e.response
1740 content = e.content
1741 response.status = 500
1742 response.reason = str(e)
1743 elif isinstance(e, socket.timeout):
1744 content = b"Request Timeout"
1745 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),})
1746 response.reason = "Request Timeout"
1747 else:
1748 content = str(e).encode("utf-8")
1749 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),})
1750 response.reason = "Bad Request"
1751 else:
1752 raise
1754 return (response, content)
1757class Response(dict):
1758 """An object more like email.message than httplib.HTTPResponse."""
1760 """Is this response from our local cache"""
1761 fromcache = False
1762 """HTTP protocol version used by server.
1764 10 for HTTP/1.0, 11 for HTTP/1.1.
1765 """
1766 version = 11
1768 "Status code returned by server. "
1769 status = 200
1770 """Reason phrase returned by server."""
1771 reason = "Ok"
1773 previous = None
1775 def __init__(self, info):
1776 # info is either an email.message or
1777 # an httplib.HTTPResponse object.
1778 if isinstance(info, http.client.HTTPResponse):
1779 for key, value in info.getheaders():
1780 key = key.lower()
1781 prev = self.get(key)
1782 if prev is not None:
1783 value = ", ".join((prev, value))
1784 self[key] = value
1785 self.status = info.status
1786 self["status"] = str(self.status)
1787 self.reason = info.reason
1788 self.version = info.version
1789 elif isinstance(info, email.message.Message):
1790 for key, value in list(info.items()):
1791 self[key.lower()] = value
1792 self.status = int(self["status"])
1793 else:
1794 for key, value in info.items():
1795 self[key.lower()] = value
1796 self.status = int(self.get("status", self.status))
1798 def __getattr__(self, name):
1799 if name == "dict":
1800 return self
1801 else:
1802 raise AttributeError(name)