Coverage for /pythoncovmergedfiles/medio/medio/src/httplib2/httplib2/__init__.py: 19%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""Small, fast HTTP client library for Python."""
4import functools
6from httplib2.decode import ZlibDecoder, DecoderProtocol, LimitDecoder, DeflateDecoder
8__author__ = "Joe Gregorio (joe@bitworking.org)"
9__copyright__ = "Copyright 2006, Joe Gregorio"
10__contributors__ = [
11 "Thomas Broyer (t.broyer@ltgt.net)",
12 "James Antill",
13 "Xavier Verges Farrero",
14 "Jonathan Feinberg",
15 "Blair Zajac",
16 "Sam Ruby",
17 "Louis Nyffenegger",
18 "Mark Pilgrim",
19 "Alex Yu",
20 "Lai Han",
21]
22__license__ = "MIT"
23__version__ = "0.32.0"
25import base64
26import calendar
27import copy
28import email
29import email.feedparser
30from email import header
31import email.message
32import email.utils
33import errno
34from gettext import gettext as _
35import gzip
36from hashlib import md5 as _md5
37from hashlib import sha1 as _sha
38import hmac
39import http.client
40import io
41import os
42import random
43import re
44import socket
45import ssl
46import sys
47import time
48import urllib.parse
49import zlib
51try:
52 import socks
53except ImportError:
54 socks = None
55from . import auth
56from .error import *
57from .iri2uri import iri2uri
60def has_timeout(timeout):
61 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"):
62 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT
63 return timeout is not None
66__all__ = [
67 "debuglevel",
68 "FailedToDecompressContent",
69 "Http",
70 "HttpLib2Error",
71 "ProxyInfo",
72 "RedirectLimit",
73 "RedirectMissingLocation",
74 "Response",
75 "RETRIES",
76 "UnimplementedDigestAuthOptionError",
77 "UnimplementedHmacDigestAuthOptionError",
78]
80# The httplib debug level, set to a non-zero value to get debug output
81debuglevel = 0
83# A request will be tried 'RETRIES' times if it fails at the socket/connection level.
84RETRIES = 2
87# Open Items:
88# -----------
90# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
92# Pluggable cache storage (supports storing the cache in
93# flat files by default. We need a plug-in architecture
94# that can support Berkeley DB and Squid)
96# == Known Issues ==
97# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
98# Does not handle Cache-Control: max-stale
99# Does not use Age: headers when calculating cache freshness.
101# The number of redirections to follow before giving up.
102# Note that only GET redirects are automatically followed.
103# Will also honor 301 requests by saving that info and never
104# requesting that URI again.
105DEFAULT_MAX_REDIRECTS = 5
107# Which headers are hop-by-hop headers by default
108HOP_BY_HOP = [
109 "connection",
110 "keep-alive",
111 "proxy-authenticate",
112 "proxy-authorization",
113 "te",
114 "trailers",
115 "transfer-encoding",
116 "upgrade",
117]
119# https://tools.ietf.org/html/rfc7231#section-8.1.3
120SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE")
122# To change, assign to `Http().redirect_codes`
123REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308))
126from httplib2 import certs
128CA_CERTS = certs.where()
130# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated.
131# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means:
132# > Selects the highest protocol version that both the client and server support.
133# > Despite the name, this option can select “TLS” protocols as well as “SSL”.
134# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23
136# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated.
137# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections.
138# > The protocol enables CERT_REQUIRED and check_hostname by default.
139# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS
141DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23")
144def _build_ssl_context(
145 disable_ssl_certificate_validation,
146 ca_certs,
147 cert_file=None,
148 key_file=None,
149 maximum_version=None,
150 minimum_version=None,
151 key_password=None,
152):
153 if not hasattr(ssl, "SSLContext"):
154 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext")
156 context = ssl.SSLContext(DEFAULT_TLS_VERSION)
157 # check_hostname and verify_mode should be set in opposite order during disable
158 # https://bugs.python.org/issue31431
159 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"):
160 context.check_hostname = not disable_ssl_certificate_validation
161 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED
163 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+.
164 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version
165 if maximum_version is not None:
166 if hasattr(context, "maximum_version"):
167 if isinstance(maximum_version, str):
168 maximum_version = getattr(ssl.TLSVersion, maximum_version)
169 context.maximum_version = maximum_version
170 else:
171 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer")
172 if minimum_version is not None:
173 if hasattr(context, "minimum_version"):
174 if isinstance(minimum_version, str):
175 minimum_version = getattr(ssl.TLSVersion, minimum_version)
176 context.minimum_version = minimum_version
177 else:
178 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer")
179 # check_hostname requires python 3.4+
180 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname
181 # if check_hostname is not supported.
182 if hasattr(context, "check_hostname"):
183 context.check_hostname = not disable_ssl_certificate_validation
185 if not disable_ssl_certificate_validation:
186 context.load_verify_locations(ca_certs)
188 if cert_file:
189 context.load_cert_chain(cert_file, key_file, key_password)
191 return context
194def _get_end2end_headers(response):
195 hopbyhop = list(HOP_BY_HOP)
196 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")])
197 return [header for header in list(response.keys()) if header not in hopbyhop]
200_missing = object()
203def _errno_from_exception(e):
204 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass
205 errno = getattr(e, "errno", _missing)
206 if errno is not _missing:
207 return errno
209 # socket.error and common wrap in .args
210 args = getattr(e, "args", None)
211 if args:
212 return _errno_from_exception(args[0])
214 # pysocks.ProxyError wraps in .socket_err
215 # https://github.com/httplib2/httplib2/pull/202
216 socket_err = getattr(e, "socket_err", None)
217 if socket_err:
218 return _errno_from_exception(socket_err)
220 return None
223URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
226def parse_uri(uri):
227 """Parses a URI using the regex given in Appendix B of RFC 3986.
229 (scheme, authority, path, query, fragment) = parse_uri(uri)
230 """
231 groups = URI.match(uri).groups()
232 return (groups[1], groups[3], groups[4], groups[6], groups[8])
235def urlnorm(uri):
236 (scheme, authority, path, query, fragment) = parse_uri(uri)
237 if not scheme or not authority:
238 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
239 authority = authority.lower()
240 scheme = scheme.lower()
241 if not path:
242 path = "/"
243 # Could do syntax based normalization of the URI before
244 # computing the digest. See Section 6.2.2 of Std 66.
245 request_uri = query and "?".join([path, query]) or path
246 scheme = scheme.lower()
247 defrag_uri = scheme + "://" + authority + request_uri
248 return scheme, authority, request_uri, defrag_uri
251# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
252re_url_scheme = re.compile(r"^\w+://")
253re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII)
256def safename(filename):
257 """Return a filename suitable for the cache.
258 Strips dangerous and common characters to create a filename we
259 can use to store the cache in.
260 """
261 if isinstance(filename, bytes):
262 filename_bytes = filename
263 filename = filename.decode("utf-8")
264 else:
265 filename_bytes = filename.encode("utf-8")
266 filemd5 = _md5(filename_bytes).hexdigest()
267 filename = re_url_scheme.sub("", filename)
268 filename = re_unsafe.sub("", filename)
270 # limit length of filename (vital for Windows)
271 # https://github.com/httplib2/httplib2/pull/74
272 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5>
273 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars
274 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum:
275 filename = filename[:90]
277 return ",".join((filename, filemd5))
280NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+")
283def _normalize_headers(headers):
284 return dict(
285 [
286 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),)
287 for (key, value) in headers.items()
288 ]
289 )
292def _convert_byte_str(s):
293 if not isinstance(s, str):
294 return str(s, "utf-8")
295 return s
298def _parse_cache_control(headers):
299 retval = {}
300 if "cache-control" in headers:
301 parts = headers["cache-control"].split(",")
302 parts_with_args = [
303 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")
304 ]
305 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
306 retval = dict(parts_with_args + parts_wo_args)
307 return retval
310# Whether to use a strict mode to parse WWW-Authenticate headers
311# Might lead to bad results in case of ill-formed header value,
312# so disabled by default, falling back to relaxed parsing.
313# Set to true to turn on, useful for testing servers.
314USE_WWW_AUTH_STRICT_PARSING = 0
317def _entry_disposition(response_headers, request_headers):
318 """Determine freshness from the Date, Expires and Cache-Control headers.
320 We don't handle the following:
322 1. Cache-Control: max-stale
323 2. Age: headers are not used in the calculations.
325 Not that this algorithm is simpler than you might think
326 because we are operating as a private (non-shared) cache.
327 This lets us ignore 's-maxage'. We can also ignore
328 'proxy-invalidate' since we aren't a proxy.
329 We will never return a stale document as
330 fresh as a design decision, and thus the non-implementation
331 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
332 since we operate as if every server has sent 'must-revalidate'.
333 Since we are private we get to ignore both 'public' and
334 'private' parameters. We also ignore 'no-transform' since
335 we don't do any transformations.
336 The 'no-store' parameter is handled at a higher level.
337 So the only Cache-Control parameters we look at are:
339 no-cache
340 only-if-cached
341 max-age
342 min-fresh
343 """
345 retval = "STALE"
346 cc = _parse_cache_control(request_headers)
347 cc_response = _parse_cache_control(response_headers)
349 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1:
350 retval = "TRANSPARENT"
351 if "cache-control" not in request_headers:
352 request_headers["cache-control"] = "no-cache"
353 elif "no-cache" in cc:
354 retval = "TRANSPARENT"
355 elif "no-cache" in cc_response:
356 retval = "STALE"
357 elif "only-if-cached" in cc:
358 retval = "FRESH"
359 elif "date" in response_headers:
360 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"]))
361 now = time.time()
362 current_age = max(0, now - date)
363 if "max-age" in cc_response:
364 try:
365 freshness_lifetime = int(cc_response["max-age"])
366 except ValueError:
367 freshness_lifetime = 0
368 elif "expires" in response_headers:
369 expires = email.utils.parsedate_tz(response_headers["expires"])
370 if None == expires:
371 freshness_lifetime = 0
372 else:
373 freshness_lifetime = max(0, calendar.timegm(expires) - date)
374 else:
375 freshness_lifetime = 0
376 if "max-age" in cc:
377 try:
378 freshness_lifetime = int(cc["max-age"])
379 except ValueError:
380 freshness_lifetime = 0
381 if "min-fresh" in cc:
382 try:
383 min_fresh = int(cc["min-fresh"])
384 except ValueError:
385 min_fresh = 0
386 current_age += min_fresh
387 if freshness_lifetime > current_age:
388 retval = "FRESH"
389 return retval
392def _decompressContent(response, new_content, limit_kwargs):
393 content = new_content
394 encoding_header = "content-encoding"
395 encoding = response.get(encoding_header, None)
396 limit_wrap = functools.partial(LimitDecoder, **limit_kwargs)
397 try:
398 if encoding in ["gzip", "deflate", "zlib"]:
399 try:
400 content = limit_wrap(ZlibDecoder()).consume_bytes(new_content, 0)
401 except (IOError, zlib.error):
402 content = limit_wrap(DeflateDecoder()).consume_bytes(new_content, 0)
403 response["content-length"] = str(len(content))
404 # Record the historical presence of the encoding in a way the won't interfere.
405 response["-content-encoding"] = response.pop(encoding_header)
406 except (IOError, zlib.error):
407 content = ""
408 raise FailedToDecompressContent(
409 _("Content purported to be compressed with %s but failed to decompress.")
410 % encoding,
411 response,
412 content,
413 )
414 return content
417def _bind_write_headers(msg):
418 def _write_headers(self):
419 # Self refers to the Generator object.
420 for h, v in msg.items():
421 print("%s:" % h, end=" ", file=self._fp)
422 if isinstance(v, header.Header):
423 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
424 else:
425 # email.Header got lots of smarts, so use it.
426 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h)
427 print(headers.encode(), file=self._fp)
428 # A blank line always separates headers from body.
429 print(file=self._fp)
431 return _write_headers
434def _updateCache(request_headers, response_headers, content, cache, cachekey):
435 if cachekey:
436 cc = _parse_cache_control(request_headers)
437 cc_response = _parse_cache_control(response_headers)
438 if "no-store" in cc or "no-store" in cc_response:
439 cache.delete(cachekey)
440 else:
441 info = email.message.Message()
442 for key, value in response_headers.items():
443 if key not in ["status", "content-encoding", "transfer-encoding"]:
444 info[key] = value
446 # Add annotations to the cache to indicate what headers
447 # are variant for this request.
448 vary = response_headers.get("vary", None)
449 if vary:
450 vary_headers = vary.lower().replace(" ", "").split(",")
451 for header in vary_headers:
452 key = "-varied-%s" % header
453 try:
454 info[key] = request_headers[header]
455 except KeyError:
456 pass
458 status = response_headers.status
459 if status == 304:
460 status = 200
462 status_header = "status: %d\r\n" % status
464 try:
465 header_str = info.as_string()
466 except UnicodeEncodeError:
467 setattr(info, "_write_headers", _bind_write_headers(info))
468 header_str = info.as_string()
470 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
471 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content])
473 cache.set(cachekey, text)
476def _cnonce():
477 dig = _md5(
478 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8")
479 ).hexdigest()
480 return dig[:16]
483def _wsse_username_token(cnonce, iso_now, password):
484 return (
485 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8")
486 )
489# For credentials we need two things, first
490# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
491# Then we also need a list of URIs that have already demanded authentication
492# That list is tricky since sub-URIs can take the same auth, or the
493# auth scheme may change as you descend the tree.
494# So we also need each Auth instance to be able to tell us
495# how close to the 'top' it is.
498class Authentication(object):
499 def __init__(self, credentials, host, request_uri, headers, response, content, http):
500 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
501 self.path = path
502 self.host = host
503 self.credentials = credentials
504 self.http = http
506 def depth(self, request_uri):
507 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
508 return request_uri[len(self.path) :].count("/")
510 def inscope(self, host, request_uri):
511 # XXX Should we normalize the request_uri?
512 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
513 return (host == self.host) and path.startswith(self.path)
515 def request(self, method, request_uri, headers, content):
516 """Modify the request headers to add the appropriate
517 Authorization header. Over-rise this in sub-classes."""
518 pass
520 def response(self, response, content):
521 """Gives us a chance to update with new nonces
522 or such returned from the last authorized response.
523 Over-rise this in sub-classes if necessary.
525 Return TRUE is the request is to be retried, for
526 example Digest may return stale=true.
527 """
528 return False
530 def __eq__(self, auth):
531 return False
533 def __ne__(self, auth):
534 return True
536 def __lt__(self, auth):
537 return True
539 def __gt__(self, auth):
540 return False
542 def __le__(self, auth):
543 return True
545 def __ge__(self, auth):
546 return False
548 def __bool__(self):
549 return True
552class BasicAuthentication(Authentication):
553 def __init__(self, credentials, host, request_uri, headers, response, content, http):
554 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
556 def request(self, method, request_uri, headers, content):
557 """Modify the request headers to add the appropriate
558 Authorization header."""
559 headers["authorization"] = "Basic " + base64.b64encode(
560 ("%s:%s" % self.credentials).encode("utf-8")
561 ).strip().decode("utf-8")
564class DigestAuthentication(Authentication):
565 """Only do qop='auth' and MD5, since that
566 is all Apache currently implements"""
568 def __init__(self, credentials, host, request_uri, headers, response, content, http):
569 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
570 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"]
571 qop = self.challenge.get("qop", "auth")
572 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None
573 if self.challenge["qop"] is None:
574 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop))
575 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper()
576 if self.challenge["algorithm"] != "MD5":
577 raise UnimplementedDigestAuthOptionError(
578 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
579 )
580 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],])
581 self.challenge["nc"] = 1
583 def request(self, method, request_uri, headers, content, cnonce=None):
584 """Modify the request headers"""
585 H = lambda x: _md5(x.encode("utf-8")).hexdigest()
586 KD = lambda s, d: H("%s:%s" % (s, d))
587 A2 = "".join([method, ":", request_uri])
588 self.challenge["cnonce"] = cnonce or _cnonce()
589 request_digest = '"%s"' % KD(
590 H(self.A1),
591 "%s:%s:%s:%s:%s"
592 % (
593 self.challenge["nonce"],
594 "%08x" % self.challenge["nc"],
595 self.challenge["cnonce"],
596 self.challenge["qop"],
597 H(A2),
598 ),
599 )
600 headers["authorization"] = (
601 'Digest username="%s", realm="%s", nonce="%s", '
602 'uri="%s", algorithm=%s, response=%s, qop=%s, '
603 'nc=%08x, cnonce="%s"'
604 ) % (
605 self.credentials[0],
606 self.challenge["realm"],
607 self.challenge["nonce"],
608 request_uri,
609 self.challenge["algorithm"],
610 request_digest,
611 self.challenge["qop"],
612 self.challenge["nc"],
613 self.challenge["cnonce"],
614 )
615 if self.challenge.get("opaque"):
616 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"]
617 self.challenge["nc"] += 1
619 def response(self, response, content):
620 if "authentication-info" not in response:
621 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {})
622 if "true" == challenge.get("stale"):
623 self.challenge["nonce"] = challenge["nonce"]
624 self.challenge["nc"] = 1
625 return True
626 else:
627 updated_challenge = auth._parse_authentication_info(response, "authentication-info")
629 if "nextnonce" in updated_challenge:
630 self.challenge["nonce"] = updated_challenge["nextnonce"]
631 self.challenge["nc"] = 1
632 return False
635class HmacDigestAuthentication(Authentication):
636 """Adapted from Robert Sayre's code and DigestAuthentication above."""
638 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
640 def __init__(self, credentials, host, request_uri, headers, response, content, http):
641 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
642 challenge = auth._parse_www_authenticate(response, "www-authenticate")
643 self.challenge = challenge["hmacdigest"]
644 # TODO: self.challenge['domain']
645 self.challenge["reason"] = self.challenge.get("reason", "unauthorized")
646 if self.challenge["reason"] not in ["unauthorized", "integrity"]:
647 self.challenge["reason"] = "unauthorized"
648 self.challenge["salt"] = self.challenge.get("salt", "")
649 if not self.challenge.get("snonce"):
650 raise UnimplementedHmacDigestAuthOptionError(
651 _("The challenge doesn't contain a server nonce, or this one is empty.")
652 )
653 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1")
654 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]:
655 raise UnimplementedHmacDigestAuthOptionError(
656 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
657 )
658 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1")
659 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]:
660 raise UnimplementedHmacDigestAuthOptionError(
661 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"])
662 )
663 if self.challenge["algorithm"] == "HMAC-MD5":
664 self.hashmod = _md5
665 else:
666 self.hashmod = _sha
667 if self.challenge["pw-algorithm"] == "MD5":
668 self.pwhashmod = _md5
669 else:
670 self.pwhashmod = _sha
671 self.key = "".join(
672 [
673 self.credentials[0],
674 ":",
675 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(),
676 ":",
677 self.challenge["realm"],
678 ]
679 )
680 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
682 def request(self, method, request_uri, headers, content):
683 """Modify the request headers"""
684 keys = _get_end2end_headers(headers)
685 keylist = "".join(["%s " % k for k in keys])
686 headers_val = "".join([headers[k] for k in keys])
687 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
688 cnonce = _cnonce()
689 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,)
690 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
691 headers["authorization"] = (
692 'HMACDigest username="%s", realm="%s", snonce="%s",'
693 ' cnonce="%s", uri="%s", created="%s", '
694 'response="%s", headers="%s"'
695 ) % (
696 self.credentials[0],
697 self.challenge["realm"],
698 self.challenge["snonce"],
699 cnonce,
700 request_uri,
701 created,
702 request_digest,
703 keylist,
704 )
706 def response(self, response, content):
707 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {})
708 if challenge.get("reason") in ["integrity", "stale"]:
709 return True
710 return False
713class WsseAuthentication(Authentication):
714 """This is thinly tested and should not be relied upon.
715 At this time there isn't any third party server to test against.
716 Blogger and TypePad implemented this algorithm at one point
717 but Blogger has since switched to Basic over HTTPS and
718 TypePad has implemented it wrong, by never issuing a 401
719 challenge but instead requiring your client to telepathically know that
720 their endpoint is expecting WSSE profile="UsernameToken"."""
722 def __init__(self, credentials, host, request_uri, headers, response, content, http):
723 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
725 def request(self, method, request_uri, headers, content):
726 """Modify the request headers to add the appropriate
727 Authorization header."""
728 headers["authorization"] = 'WSSE profile="UsernameToken"'
729 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
730 cnonce = _cnonce()
731 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
732 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % (
733 self.credentials[0],
734 password_digest,
735 cnonce,
736 iso_now,
737 )
740class GoogleLoginAuthentication(Authentication):
741 def __init__(self, credentials, host, request_uri, headers, response, content, http):
742 from urllib.parse import urlencode
744 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
745 challenge = auth._parse_www_authenticate(response, "www-authenticate")
746 service = challenge["googlelogin"].get("service", "xapi")
747 # Bloggger actually returns the service in the challenge
748 # For the rest we guess based on the URI
749 if service == "xapi" and request_uri.find("calendar") > 0:
750 service = "cl"
751 # No point in guessing Base or Spreadsheet
752 # elif request_uri.find("spreadsheets") > 0:
753 # service = "wise"
755 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],)
756 resp, content = self.http.request(
757 "https://www.google.com/accounts/ClientLogin",
758 method="POST",
759 body=urlencode(auth),
760 headers={"Content-Type": "application/x-www-form-urlencoded"},
761 )
762 lines = content.split("\n")
763 d = dict([tuple(line.split("=", 1)) for line in lines if line])
764 if resp.status == 403:
765 self.Auth = ""
766 else:
767 self.Auth = d["Auth"]
769 def request(self, method, request_uri, headers, content):
770 """Modify the request headers to add the appropriate
771 Authorization header."""
772 headers["authorization"] = "GoogleLogin Auth=" + self.Auth
775AUTH_SCHEME_CLASSES = {
776 "basic": BasicAuthentication,
777 "wsse": WsseAuthentication,
778 "digest": DigestAuthentication,
779 "hmacdigest": HmacDigestAuthentication,
780 "googlelogin": GoogleLoginAuthentication,
781}
783AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
786class FileCache(object):
787 """Uses a local directory as a store for cached files.
788 Not really safe to use if multiple threads or processes are going to
789 be running on the same cache.
790 """
792 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
793 self.cache = cache
794 self.safe = safe
795 if not os.path.exists(cache):
796 os.makedirs(self.cache)
798 def get(self, key):
799 retval = None
800 cacheFullPath = os.path.join(self.cache, self.safe(key))
801 try:
802 f = open(cacheFullPath, "rb")
803 retval = f.read()
804 f.close()
805 except IOError:
806 pass
807 return retval
809 def set(self, key, value):
810 cacheFullPath = os.path.join(self.cache, self.safe(key))
811 f = open(cacheFullPath, "wb")
812 f.write(value)
813 f.close()
815 def delete(self, key):
816 cacheFullPath = os.path.join(self.cache, self.safe(key))
817 if os.path.exists(cacheFullPath):
818 os.remove(cacheFullPath)
821class Credentials(object):
822 def __init__(self):
823 self.credentials = []
825 def add(self, name, password, domain=""):
826 self.credentials.append((domain.lower(), name, password))
828 def clear(self):
829 self.credentials = []
831 def iter(self, domain):
832 for (cdomain, name, password) in self.credentials:
833 if cdomain == "" or domain == cdomain:
834 yield (name, password)
837class KeyCerts(Credentials):
838 """Identical to Credentials except that
839 name/password are mapped to key/cert."""
841 def add(self, key, cert, domain, password):
842 self.credentials.append((domain.lower(), key, cert, password))
844 def iter(self, domain):
845 for (cdomain, key, cert, password) in self.credentials:
846 if cdomain == "" or domain == cdomain:
847 yield (key, cert, password)
850class AllHosts(object):
851 pass
854class ProxyInfo(object):
855 """Collect information required to use a proxy."""
857 bypass_hosts = ()
859 def __init__(
860 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None,
861 ):
862 """Args:
864 proxy_type: The type of proxy server. This must be set to one of
865 socks.PROXY_TYPE_XXX constants. For example: p =
866 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost',
867 proxy_port=8000)
868 proxy_host: The hostname or IP address of the proxy server.
869 proxy_port: The port that the proxy server is running on.
870 proxy_rdns: If True (default), DNS queries will not be performed
871 locally, and instead, handed to the proxy to resolve. This is useful
872 if the network does not allow resolution of non-local names. In
873 httplib2 0.9 and earlier, this defaulted to False.
874 proxy_user: The username used to authenticate with the proxy server.
875 proxy_pass: The password used to authenticate with the proxy server.
876 proxy_headers: Additional or modified headers for the proxy connect
877 request.
878 """
879 if isinstance(proxy_user, bytes):
880 proxy_user = proxy_user.decode()
881 if isinstance(proxy_pass, bytes):
882 proxy_pass = proxy_pass.decode()
883 (
884 self.proxy_type,
885 self.proxy_host,
886 self.proxy_port,
887 self.proxy_rdns,
888 self.proxy_user,
889 self.proxy_pass,
890 self.proxy_headers,
891 ) = (
892 proxy_type,
893 proxy_host,
894 proxy_port,
895 proxy_rdns,
896 proxy_user,
897 proxy_pass,
898 proxy_headers,
899 )
901 def astuple(self):
902 return (
903 self.proxy_type,
904 self.proxy_host,
905 self.proxy_port,
906 self.proxy_rdns,
907 self.proxy_user,
908 self.proxy_pass,
909 self.proxy_headers,
910 )
912 def isgood(self):
913 return socks and (self.proxy_host != None) and (self.proxy_port != None)
915 def applies_to(self, hostname):
916 return not self.bypass_host(hostname)
918 def bypass_host(self, hostname):
919 """Has this host been excluded from the proxy config"""
920 if self.bypass_hosts is AllHosts:
921 return True
923 hostname = "." + hostname.lstrip(".")
924 for skip_name in self.bypass_hosts:
925 # *.suffix
926 if skip_name.startswith(".") and hostname.endswith(skip_name):
927 return True
928 # exact match
929 if hostname == "." + skip_name:
930 return True
931 return False
933 def __repr__(self):
934 return (
935 "<ProxyInfo type={p.proxy_type} "
936 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}"
937 + " user={p.proxy_user} headers={p.proxy_headers}>"
938 ).format(p=self)
941def proxy_info_from_environment(method="http"):
942 """Read proxy info from the environment variables.
943 """
944 if method not in ("http", "https"):
945 return
947 env_var = method + "_proxy"
948 url = os.environ.get(env_var, os.environ.get(env_var.upper()))
949 if not url:
950 return
951 return proxy_info_from_url(url, method, noproxy=None)
954def proxy_info_from_url(url, method="http", noproxy=None):
955 """Construct a ProxyInfo from a URL (such as http_proxy env var)
956 """
957 url = urllib.parse.urlparse(url)
959 proxy_type = 3 # socks.PROXY_TYPE_HTTP
960 if url.scheme == "socks4":
961 proxy_type = 1 # socks.PROXY_TYPE_SOCKS4
962 elif url.scheme == "socks5" or url.scheme == "socks":
963 proxy_type = 2 # socks.PROXY_TYPE_SOCKS5
964 pi = ProxyInfo(
965 proxy_type=proxy_type,
966 proxy_host=url.hostname,
967 proxy_port=url.port or dict(https=443, http=80)[method],
968 proxy_user=url.username or None,
969 proxy_pass=url.password or None,
970 proxy_headers=None,
971 )
973 bypass_hosts = []
974 # If not given an explicit noproxy value, respect values in env vars.
975 if noproxy is None:
976 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", ""))
977 # Special case: A single '*' character means all hosts should be bypassed.
978 if noproxy == "*":
979 bypass_hosts = AllHosts
980 elif noproxy.strip():
981 bypass_hosts = noproxy.split(",")
982 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string.
984 pi.bypass_hosts = bypass_hosts
985 return pi
988class HTTPConnectionWithTimeout(http.client.HTTPConnection):
989 """HTTPConnection subclass that supports timeouts
991 HTTPConnection subclass that supports timeouts
993 All timeouts are in seconds. If None is passed for timeout then
994 Python's default timeout for sockets will be used. See for example
995 the docs of socket.setdefaulttimeout():
996 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
997 """
999 def __init__(self, host, port=None, timeout=None, proxy_info=None):
1000 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout)
1002 self.proxy_info = proxy_info
1003 if proxy_info and not isinstance(proxy_info, ProxyInfo):
1004 self.proxy_info = proxy_info("http")
1006 def connect(self):
1007 """Connect to the host and port specified in __init__."""
1008 if self.proxy_info and socks is None:
1009 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!")
1010 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1011 use_proxy = True
1012 (
1013 proxy_type,
1014 proxy_host,
1015 proxy_port,
1016 proxy_rdns,
1017 proxy_user,
1018 proxy_pass,
1019 proxy_headers,
1020 ) = self.proxy_info.astuple()
1022 host = proxy_host
1023 port = proxy_port
1024 else:
1025 use_proxy = False
1027 host = self.host
1028 port = self.port
1029 proxy_type = None
1031 socket_err = None
1033 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
1034 af, socktype, proto, canonname, sa = res
1035 try:
1036 if use_proxy:
1037 self.sock = socks.socksocket(af, socktype, proto)
1038 self.sock.setproxy(
1039 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1040 )
1041 else:
1042 self.sock = socket.socket(af, socktype, proto)
1043 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1044 if has_timeout(self.timeout):
1045 self.sock.settimeout(self.timeout)
1046 if self.debuglevel > 0:
1047 print("connect: ({0}, {1}) ************".format(self.host, self.port))
1048 if use_proxy:
1049 print(
1050 "proxy: {0} ************".format(
1051 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1052 )
1053 )
1055 self.sock.connect((self.host, self.port) + sa[2:])
1056 except socket.error as e:
1057 socket_err = e
1058 if self.debuglevel > 0:
1059 print("connect fail: ({0}, {1})".format(self.host, self.port))
1060 if use_proxy:
1061 print(
1062 "proxy: {0}".format(
1063 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1064 )
1065 )
1066 if self.sock:
1067 self.sock.close()
1068 self.sock = None
1069 continue
1070 break
1071 if not self.sock:
1072 raise socket_err
1075class HTTPSConnectionWithTimeout(http.client.HTTPSConnection):
1076 """This class allows communication via SSL.
1078 All timeouts are in seconds. If None is passed for timeout then
1079 Python's default timeout for sockets will be used. See for example
1080 the docs of socket.setdefaulttimeout():
1081 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1082 """
1084 def __init__(
1085 self,
1086 host,
1087 port=None,
1088 key_file=None,
1089 cert_file=None,
1090 timeout=None,
1091 proxy_info=None,
1092 ca_certs=None,
1093 disable_ssl_certificate_validation=False,
1094 tls_maximum_version=None,
1095 tls_minimum_version=None,
1096 key_password=None,
1097 ):
1099 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1100 self.ca_certs = ca_certs if ca_certs else CA_CERTS
1102 self.proxy_info = proxy_info
1103 if proxy_info and not isinstance(proxy_info, ProxyInfo):
1104 self.proxy_info = proxy_info("https")
1106 context = _build_ssl_context(
1107 self.disable_ssl_certificate_validation,
1108 self.ca_certs,
1109 cert_file,
1110 key_file,
1111 maximum_version=tls_maximum_version,
1112 minimum_version=tls_minimum_version,
1113 key_password=key_password,
1114 )
1115 super(HTTPSConnectionWithTimeout, self).__init__(
1116 host, port=port, timeout=timeout, context=context,
1117 )
1118 self.key_file = key_file
1119 self.cert_file = cert_file
1120 self.key_password = key_password
1122 def connect(self):
1123 """Connect to a host on a given (SSL) port."""
1124 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1125 use_proxy = True
1126 (
1127 proxy_type,
1128 proxy_host,
1129 proxy_port,
1130 proxy_rdns,
1131 proxy_user,
1132 proxy_pass,
1133 proxy_headers,
1134 ) = self.proxy_info.astuple()
1136 host = proxy_host
1137 port = proxy_port
1138 else:
1139 use_proxy = False
1141 host = self.host
1142 port = self.port
1143 proxy_type = None
1144 proxy_headers = None
1146 socket_err = None
1148 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1149 for family, socktype, proto, canonname, sockaddr in address_info:
1150 try:
1151 if use_proxy:
1152 sock = socks.socksocket(family, socktype, proto)
1154 sock.setproxy(
1155 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1156 )
1157 else:
1158 sock = socket.socket(family, socktype, proto)
1159 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1160 if has_timeout(self.timeout):
1161 sock.settimeout(self.timeout)
1162 sock.connect((self.host, self.port))
1164 self.sock = self._context.wrap_socket(sock, server_hostname=self.host)
1166 # Python 3.3 compatibility: emulate the check_hostname behavior
1167 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation:
1168 try:
1169 ssl.match_hostname(self.sock.getpeercert(), self.host)
1170 except Exception:
1171 self.sock.shutdown(socket.SHUT_RDWR)
1172 self.sock.close()
1173 raise
1175 if self.debuglevel > 0:
1176 print("connect: ({0}, {1})".format(self.host, self.port))
1177 if use_proxy:
1178 print(
1179 "proxy: {0}".format(
1180 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1181 )
1182 )
1183 except (ssl.SSLError, ssl.CertificateError) as e:
1184 if sock:
1185 sock.close()
1186 if self.sock:
1187 self.sock.close()
1188 self.sock = None
1189 raise
1190 except (socket.timeout, socket.gaierror):
1191 raise
1192 except socket.error as e:
1193 socket_err = e
1194 if self.debuglevel > 0:
1195 print("connect fail: ({0}, {1})".format(self.host, self.port))
1196 if use_proxy:
1197 print(
1198 "proxy: {0}".format(
1199 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1200 )
1201 )
1202 if self.sock:
1203 self.sock.close()
1204 self.sock = None
1205 continue
1206 break
1207 if not self.sock:
1208 raise socket_err
1211SCHEME_TO_CONNECTION = {
1212 "http": HTTPConnectionWithTimeout,
1213 "https": HTTPSConnectionWithTimeout,
1214}
1217class Http(object):
1218 """An HTTP client that handles:
1220 - all methods
1221 - caching
1222 - ETags
1223 - compression,
1224 - HTTPS
1225 - Basic
1226 - Digest
1227 - WSSE
1229 and more.
1230 """
1232 def __init__(
1233 self,
1234 cache=None,
1235 timeout=None,
1236 proxy_info=proxy_info_from_environment,
1237 ca_certs=None,
1238 disable_ssl_certificate_validation=False,
1239 tls_maximum_version=None,
1240 tls_minimum_version=None,
1241 decode_limit_hard=None,
1242 decode_limit_safe=None,
1243 decode_limit_ratio=None,
1244 decode_limit_chunk=None,
1245 ):
1246 """If 'cache' is a string then it is used as a directory name for
1247 a disk cache. Otherwise it must be an object that supports the
1248 same interface as FileCache.
1250 All timeouts are in seconds. If None is passed for timeout
1251 then Python's default timeout for sockets will be used. See
1252 for example the docs of socket.setdefaulttimeout():
1253 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1255 `proxy_info` may be:
1256 - a callable that takes the http scheme ('http' or 'https') and
1257 returns a ProxyInfo instance per request. By default, uses
1258 proxy_info_from_environment.
1259 - a ProxyInfo instance (static proxy config).
1260 - None (proxy disabled).
1262 ca_certs is the path of a file containing root CA certificates for SSL
1263 server certificate validation. By default, a CA cert file bundled with
1264 httplib2 is used.
1266 If disable_ssl_certificate_validation is true, SSL cert validation will
1267 not be performed.
1269 tls_maximum_version / tls_minimum_version require Python 3.7+ /
1270 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+.
1272 `decode_limit_{hard,safe,ratio,chunk}` options configure `httplib2.decode.LimitDecoder` in attempt order:
1273 - Http() argument - top priority
1274 - environment httplib2_decode_limit_{hard,safe,ratio,chunk}
1275 - LimitDecoder defaults - least priority
1276 """
1277 self.proxy_info = proxy_info
1278 self.ca_certs = ca_certs
1279 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1280 self.tls_maximum_version = tls_maximum_version
1281 self.tls_minimum_version = tls_minimum_version
1282 # Map domain name to an httplib connection
1283 self.connections = {}
1284 # The location of the cache, for now a directory
1285 # where cached responses are held.
1286 if cache and isinstance(cache, str):
1287 self.cache = FileCache(cache)
1288 else:
1289 self.cache = cache
1291 # Name/password
1292 self.credentials = Credentials()
1294 # Key/cert
1295 self.certificates = KeyCerts()
1297 # authorization objects
1298 self.authorizations = []
1300 # If set to False then no redirects are followed, even safe ones.
1301 self.follow_redirects = True
1303 self.redirect_codes = REDIRECT_CODES
1305 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1306 # which methods get an "if-match:" etag header added to them.
1307 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1309 self.safe_methods = list(SAFE_METHODS)
1311 # If 'follow_redirects' is True, and this is set to True then
1312 # all redirecs are followed, including unsafe ones.
1313 self.follow_all_redirects = False
1315 self.ignore_etag = False
1317 self.force_exception_to_status_code = False
1319 self.timeout = timeout
1321 # Keep Authorization: headers on a redirect.
1322 self.forward_authorization_headers = False
1324 limit_kwargs = dict(
1325 hard_limit=try_value_or_env(
1326 int, decode_limit_hard, "httplib2_decode_limit_hard"
1327 ),
1328 safe_limit=try_value_or_env(
1329 int, decode_limit_safe, "httplib2_decode_limit_safe"
1330 ),
1331 ratio=try_value_or_env(
1332 float, decode_limit_ratio, "httplib2_decode_limit_ratio"
1333 ),
1334 chunk_size=try_value_or_env(
1335 int, decode_limit_chunk, "httplib2_decode_limit_chunk"
1336 ),
1337 )
1338 self.limit_kwargs = {k: v for k, v in limit_kwargs.items() if v is not None}
1340 def close(self):
1341 """Close persistent connections, clear sensitive data.
1342 Not thread-safe, requires external synchronization against concurrent requests.
1343 """
1344 existing, self.connections = self.connections, {}
1345 for _, c in existing.items():
1346 c.close()
1347 self.certificates.clear()
1348 self.clear_credentials()
1350 def __getstate__(self):
1351 state_dict = copy.copy(self.__dict__)
1352 # In case request is augmented by some foreign object such as
1353 # credentials which handle auth
1354 if "request" in state_dict:
1355 del state_dict["request"]
1356 if "connections" in state_dict:
1357 del state_dict["connections"]
1358 return state_dict
1360 def __setstate__(self, state):
1361 self.__dict__.update(state)
1362 self.connections = {}
1364 def _auth_from_challenge(self, host, request_uri, headers, response, content):
1365 """A generator that creates Authorization objects
1366 that can be applied to requests.
1367 """
1368 challenges = auth._parse_www_authenticate(response, "www-authenticate")
1369 for cred in self.credentials.iter(host):
1370 for scheme in AUTH_SCHEME_ORDER:
1371 if scheme in challenges:
1372 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1374 def add_credentials(self, name, password, domain=""):
1375 """Add a name and password that will be used
1376 any time a request requires authentication."""
1377 self.credentials.add(name, password, domain)
1379 def add_certificate(self, key, cert, domain, password=None):
1380 """Add a key and cert that will be used
1381 any time a request requires authentication."""
1382 self.certificates.add(key, cert, domain, password)
1384 def clear_credentials(self):
1385 """Remove all the names and passwords
1386 that are used for authentication"""
1387 self.credentials.clear()
1388 self.authorizations = []
1390 def _conn_request(self, conn, request_uri, method, body, headers):
1391 i = 0
1392 seen_bad_status_line = False
1393 while i < RETRIES:
1394 i += 1
1395 try:
1396 if conn.sock is None:
1397 conn.connect()
1398 conn.request(method, request_uri, body, headers)
1399 except socket.timeout:
1400 conn.close()
1401 raise
1402 except socket.gaierror:
1403 conn.close()
1404 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1405 except socket.error as e:
1406 errno_ = _errno_from_exception(e)
1407 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES:
1408 continue # retry on potentially transient errors
1409 raise
1410 except http.client.HTTPException:
1411 if conn.sock is None:
1412 if i < RETRIES - 1:
1413 conn.close()
1414 conn.connect()
1415 continue
1416 else:
1417 conn.close()
1418 raise
1419 if i < RETRIES - 1:
1420 conn.close()
1421 conn.connect()
1422 continue
1423 # Just because the server closed the connection doesn't apparently mean
1424 # that the server didn't send a response.
1425 pass
1426 try:
1427 response = conn.getresponse()
1428 except (http.client.BadStatusLine, http.client.ResponseNotReady):
1429 # If we get a BadStatusLine on the first try then that means
1430 # the connection just went stale, so retry regardless of the
1431 # number of RETRIES set.
1432 if not seen_bad_status_line and i == 1:
1433 i = 0
1434 seen_bad_status_line = True
1435 conn.close()
1436 conn.connect()
1437 continue
1438 else:
1439 conn.close()
1440 raise
1441 except socket.timeout:
1442 raise
1443 except (socket.error, http.client.HTTPException):
1444 conn.close()
1445 if i == 0:
1446 conn.close()
1447 conn.connect()
1448 continue
1449 else:
1450 raise
1451 else:
1452 content = b""
1453 if method == "HEAD":
1454 conn.close()
1455 else:
1456 content = response.read()
1457 response = Response(response)
1458 if method != "HEAD":
1459 content = _decompressContent(response, content, self.limit_kwargs)
1461 break
1462 return (response, content)
1464 def _request(
1465 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey,
1466 ):
1467 """Do the actual request using the connection object
1468 and also follow one level of redirects if necessary"""
1470 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1471 auth = auths and sorted(auths)[0][1] or None
1472 if auth:
1473 auth.request(method, request_uri, headers, body)
1475 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1477 if auth:
1478 if auth.response(response, body):
1479 auth.request(method, request_uri, headers, body)
1480 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1481 response._stale_digest = 1
1483 if response.status == 401:
1484 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1485 authorization.request(method, request_uri, headers, body)
1486 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1487 if response.status != 401:
1488 self.authorizations.append(authorization)
1489 authorization.response(response, body)
1490 break
1492 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308):
1493 if self.follow_redirects and response.status in self.redirect_codes:
1494 # Pick out the location header and basically start from the beginning
1495 # remembering first to strip the ETag header and decrement our 'depth'
1496 if redirections:
1497 if "location" not in response and response.status != 300:
1498 raise RedirectMissingLocation(
1499 _("Redirected but the response is missing a Location: header."), response, content,
1500 )
1501 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1502 if "location" in response:
1503 location = response["location"]
1504 (scheme, authority, path, query, fragment) = parse_uri(location)
1505 if authority == None:
1506 response["location"] = urllib.parse.urljoin(absolute_uri, location)
1507 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
1508 response["-x-permanent-redirect-url"] = response["location"]
1509 if "content-location" not in response:
1510 response["content-location"] = absolute_uri
1511 _updateCache(headers, response, content, self.cache, cachekey)
1512 if "if-none-match" in headers:
1513 del headers["if-none-match"]
1514 if "if-modified-since" in headers:
1515 del headers["if-modified-since"]
1516 if "authorization" in headers and not self.forward_authorization_headers:
1517 del headers["authorization"]
1518 if "location" in response:
1519 location = response["location"]
1520 old_response = copy.deepcopy(response)
1521 if "content-location" not in old_response:
1522 old_response["content-location"] = absolute_uri
1523 redirect_method = method
1524 if response.status in [302, 303]:
1525 redirect_method = "GET"
1526 body = None
1527 (response, content) = self.request(
1528 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1,
1529 )
1530 response.previous = old_response
1531 else:
1532 raise RedirectLimit(
1533 "Redirected more times than redirection_limit allows.", response, content,
1534 )
1535 elif response.status in [200, 203] and method in self.safe_methods:
1536 # Don't cache 206's since we aren't going to handle byte range requests
1537 if "content-location" not in response:
1538 response["content-location"] = absolute_uri
1539 _updateCache(headers, response, content, self.cache, cachekey)
1541 return (response, content)
1543 def _normalize_headers(self, headers):
1544 return _normalize_headers(headers)
1546 # Need to catch and rebrand some exceptions
1547 # Then need to optionally turn all exceptions into status codes
1548 # including all socket.* and httplib.* exceptions.
1550 def request(
1551 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None,
1552 ):
1553 """ Performs a single HTTP request.
1554The 'uri' is the URI of the HTTP resource and can begin
1555with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1557The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1558There is no restriction on the methods allowed.
1560The 'body' is the entity body to be sent with the request. It is a string
1561object.
1563Any extra headers that are to be sent with the request should be provided in the
1564'headers' dictionary.
1566The maximum number of redirect to follow before raising an
1567exception is 'redirections. The default is 5.
1569The return value is a tuple of (response, content), the first
1570being and instance of the 'Response' class, the second being
1571a string that contains the response entity body.
1572 """
1573 conn_key = ""
1575 try:
1576 if headers is None:
1577 headers = {}
1578 else:
1579 headers = self._normalize_headers(headers)
1581 if "user-agent" not in headers:
1582 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__
1584 uri = iri2uri(uri)
1585 # Prevent CWE-75 space injection to manipulate request via part of uri.
1586 # Prevent CWE-93 CRLF injection to modify headers via part of uri.
1587 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A")
1589 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1591 conn_key = scheme + ":" + authority
1592 conn = self.connections.get(conn_key)
1593 if conn is None:
1594 if not connection_type:
1595 connection_type = SCHEME_TO_CONNECTION[scheme]
1596 certs = list(self.certificates.iter(authority))
1597 if issubclass(connection_type, HTTPSConnectionWithTimeout):
1598 if certs:
1599 conn = self.connections[conn_key] = connection_type(
1600 authority,
1601 key_file=certs[0][0],
1602 cert_file=certs[0][1],
1603 timeout=self.timeout,
1604 proxy_info=self.proxy_info,
1605 ca_certs=self.ca_certs,
1606 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1607 tls_maximum_version=self.tls_maximum_version,
1608 tls_minimum_version=self.tls_minimum_version,
1609 key_password=certs[0][2],
1610 )
1611 else:
1612 conn = self.connections[conn_key] = connection_type(
1613 authority,
1614 timeout=self.timeout,
1615 proxy_info=self.proxy_info,
1616 ca_certs=self.ca_certs,
1617 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1618 tls_maximum_version=self.tls_maximum_version,
1619 tls_minimum_version=self.tls_minimum_version,
1620 )
1621 else:
1622 conn = self.connections[conn_key] = connection_type(
1623 authority, timeout=self.timeout, proxy_info=self.proxy_info
1624 )
1625 conn.set_debuglevel(debuglevel)
1627 if "range" not in headers and "accept-encoding" not in headers:
1628 headers["accept-encoding"] = "gzip, deflate"
1630 info = email.message.Message()
1631 cachekey = None
1632 cached_value = None
1633 if self.cache:
1634 cachekey = defrag_uri
1635 cached_value = self.cache.get(cachekey)
1636 if cached_value:
1637 try:
1638 info, content = cached_value.split(b"\r\n\r\n", 1)
1639 info = email.message_from_bytes(info)
1640 for k, v in info.items():
1641 if v.startswith("=?") and v.endswith("?="):
1642 info.replace_header(k, str(*email.header.decode_header(v)[0]))
1643 except (IndexError, ValueError):
1644 self.cache.delete(cachekey)
1645 cachekey = None
1646 cached_value = None
1648 if (
1649 method in self.optimistic_concurrency_methods
1650 and self.cache
1651 and "etag" in info
1652 and not self.ignore_etag
1653 and "if-match" not in headers
1654 ):
1655 # http://www.w3.org/1999/04/Editing/
1656 headers["if-match"] = info["etag"]
1658 # https://tools.ietf.org/html/rfc7234
1659 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
1660 # when a non-error status code is received in response to an unsafe request method.
1661 if self.cache and cachekey and method not in self.safe_methods:
1662 self.cache.delete(cachekey)
1664 # Check the vary header in the cache to see if this request
1665 # matches what varies in the cache.
1666 if method in self.safe_methods and "vary" in info:
1667 vary = info["vary"]
1668 vary_headers = vary.lower().replace(" ", "").split(",")
1669 for header in vary_headers:
1670 key = "-varied-%s" % header
1671 value = info[key]
1672 if headers.get(header, None) != value:
1673 cached_value = None
1674 break
1676 if (
1677 self.cache
1678 and cached_value
1679 and (method in self.safe_methods or info["status"] == "308")
1680 and "range" not in headers
1681 ):
1682 redirect_method = method
1683 if info["status"] not in ("307", "308"):
1684 redirect_method = "GET"
1685 if "-x-permanent-redirect-url" in info:
1686 # Should cached permanent redirects be counted in our redirection count? For now, yes.
1687 if redirections <= 0:
1688 raise RedirectLimit(
1689 "Redirected more times than redirection_limit allows.", {}, "",
1690 )
1691 (response, new_content) = self.request(
1692 info["-x-permanent-redirect-url"],
1693 method=redirect_method,
1694 headers=headers,
1695 redirections=redirections - 1,
1696 )
1697 response.previous = Response(info)
1698 response.previous.fromcache = True
1699 else:
1700 # Determine our course of action:
1701 # Is the cached entry fresh or stale?
1702 # Has the client requested a non-cached response?
1703 #
1704 # There seems to be three possible answers:
1705 # 1. [FRESH] Return the cache entry w/o doing a GET
1706 # 2. [STALE] Do the GET (but add in cache validators if available)
1707 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1708 entry_disposition = _entry_disposition(info, headers)
1710 if entry_disposition == "FRESH":
1711 response = Response(info)
1712 response.fromcache = True
1713 return (response, content)
1715 if entry_disposition == "STALE":
1716 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers:
1717 headers["if-none-match"] = info["etag"]
1718 if "last-modified" in info and not "last-modified" in headers:
1719 headers["if-modified-since"] = info["last-modified"]
1720 elif entry_disposition == "TRANSPARENT":
1721 pass
1723 (response, new_content) = self._request(
1724 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1725 )
1727 if response.status == 304 and method == "GET":
1728 # Rewrite the cache entry with the new end-to-end headers
1729 # Take all headers that are in response
1730 # and overwrite their values in info.
1731 # unless they are hop-by-hop, or are listed in the connection header.
1733 for key in _get_end2end_headers(response):
1734 info[key] = response[key]
1735 merged_response = Response(info)
1736 if hasattr(response, "_stale_digest"):
1737 merged_response._stale_digest = response._stale_digest
1738 _updateCache(headers, merged_response, content, self.cache, cachekey)
1739 response = merged_response
1740 response.status = 200
1741 response.fromcache = True
1743 elif response.status == 200:
1744 content = new_content
1745 else:
1746 self.cache.delete(cachekey)
1747 content = new_content
1748 else:
1749 cc = _parse_cache_control(headers)
1750 if "only-if-cached" in cc:
1751 info["status"] = "504"
1752 response = Response(info)
1753 content = b""
1754 else:
1755 (response, content) = self._request(
1756 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1757 )
1758 except Exception as e:
1759 is_timeout = isinstance(e, socket.timeout)
1760 if is_timeout:
1761 conn = self.connections.pop(conn_key, None)
1762 if conn:
1763 conn.close()
1765 if self.force_exception_to_status_code:
1766 if isinstance(e, HttpLib2ErrorWithResponse):
1767 response = e.response
1768 content = e.content
1769 response.status = 500
1770 response.reason = str(e)
1771 elif isinstance(e, socket.timeout):
1772 content = b"Request Timeout"
1773 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),})
1774 response.reason = "Request Timeout"
1775 else:
1776 content = str(e).encode("utf-8")
1777 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),})
1778 response.reason = "Bad Request"
1779 else:
1780 raise
1782 return (response, content)
1785class Response(dict):
1786 """An object more like email.message than httplib.HTTPResponse."""
1788 """Is this response from our local cache"""
1789 fromcache = False
1790 """HTTP protocol version used by server.
1792 10 for HTTP/1.0, 11 for HTTP/1.1.
1793 """
1794 version = 11
1796 "Status code returned by server. "
1797 status = 200
1798 """Reason phrase returned by server."""
1799 reason = "Ok"
1801 previous = None
1803 def __init__(self, info):
1804 # info is either an email.message or
1805 # an httplib.HTTPResponse object.
1806 if isinstance(info, http.client.HTTPResponse):
1807 for key, value in info.getheaders():
1808 key = key.lower()
1809 prev = self.get(key)
1810 if prev is not None:
1811 value = ", ".join((prev, value))
1812 self[key] = value
1813 self.status = info.status
1814 self["status"] = str(self.status)
1815 self.reason = info.reason
1816 self.version = info.version
1817 elif isinstance(info, email.message.Message):
1818 for key, value in list(info.items()):
1819 self[key.lower()] = value
1820 self.status = int(self["status"])
1821 else:
1822 for key, value in info.items():
1823 self[key.lower()] = value
1824 self.status = int(self.get("status", self.status))
1826 def __getattr__(self, name):
1827 if name == "dict":
1828 return self
1829 else:
1830 raise AttributeError(name)
1833def try_value_or_env(to, value, env_key, default=None):
1834 candidates = (value, os.environ.get(env_key), os.environ.get(env_key.upper()))
1835 # same as `to(x1) or to(x2) or to(x3)` except accepting falsey values like 0
1836 for x in candidates:
1837 if x is None:
1838 continue
1839 try:
1840 return to(x)
1841 except ValueError:
1842 pass
1843 return default