Coverage for /pythoncovmergedfiles/medio/medio/src/httplib2/httplib2/__init__.py: 19%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""Small, fast HTTP client library for Python."""
4__author__ = "Joe Gregorio (joe@bitworking.org)"
5__copyright__ = "Copyright 2006, Joe Gregorio"
6__contributors__ = [
7 "Thomas Broyer (t.broyer@ltgt.net)",
8 "James Antill",
9 "Xavier Verges Farrero",
10 "Jonathan Feinberg",
11 "Blair Zajac",
12 "Sam Ruby",
13 "Louis Nyffenegger",
14 "Mark Pilgrim",
15 "Alex Yu",
16 "Lai Han",
17]
18__license__ = "MIT"
19__version__ = "0.22.0"
21import base64
22import calendar
23import copy
24import email
25import email.feedparser
26from email import header
27import email.message
28import email.utils
29import errno
30from gettext import gettext as _
31import gzip
32from hashlib import md5 as _md5
33from hashlib import sha1 as _sha
34import hmac
35import http.client
36import io
37import os
38import random
39import re
40import socket
41import ssl
42import sys
43import time
44import urllib.parse
45import zlib
47try:
48 import socks
49except ImportError:
50 socks = None
51from . import auth
52from .error import *
53from .iri2uri import iri2uri
56def has_timeout(timeout):
57 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"):
58 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT
59 return timeout is not None
62__all__ = [
63 "debuglevel",
64 "FailedToDecompressContent",
65 "Http",
66 "HttpLib2Error",
67 "ProxyInfo",
68 "RedirectLimit",
69 "RedirectMissingLocation",
70 "Response",
71 "RETRIES",
72 "UnimplementedDigestAuthOptionError",
73 "UnimplementedHmacDigestAuthOptionError",
74]
76# The httplib debug level, set to a non-zero value to get debug output
77debuglevel = 0
79# A request will be tried 'RETRIES' times if it fails at the socket/connection level.
80RETRIES = 2
83# Open Items:
84# -----------
86# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
88# Pluggable cache storage (supports storing the cache in
89# flat files by default. We need a plug-in architecture
90# that can support Berkeley DB and Squid)
92# == Known Issues ==
93# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
94# Does not handle Cache-Control: max-stale
95# Does not use Age: headers when calculating cache freshness.
97# The number of redirections to follow before giving up.
98# Note that only GET redirects are automatically followed.
99# Will also honor 301 requests by saving that info and never
100# requesting that URI again.
101DEFAULT_MAX_REDIRECTS = 5
103# Which headers are hop-by-hop headers by default
104HOP_BY_HOP = [
105 "connection",
106 "keep-alive",
107 "proxy-authenticate",
108 "proxy-authorization",
109 "te",
110 "trailers",
111 "transfer-encoding",
112 "upgrade",
113]
115# https://tools.ietf.org/html/rfc7231#section-8.1.3
116SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE")
118# To change, assign to `Http().redirect_codes`
119REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308))
122from httplib2 import certs
124CA_CERTS = certs.where()
126# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated.
127# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means:
128# > Selects the highest protocol version that both the client and server support.
129# > Despite the name, this option can select “TLS” protocols as well as “SSL”.
130# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23
132# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated.
133# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections.
134# > The protocol enables CERT_REQUIRED and check_hostname by default.
135# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS
137DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23")
140def _build_ssl_context(
141 disable_ssl_certificate_validation,
142 ca_certs,
143 cert_file=None,
144 key_file=None,
145 maximum_version=None,
146 minimum_version=None,
147 key_password=None,
148):
149 if not hasattr(ssl, "SSLContext"):
150 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext")
152 context = ssl.SSLContext(DEFAULT_TLS_VERSION)
153 # check_hostname and verify_mode should be set in opposite order during disable
154 # https://bugs.python.org/issue31431
155 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"):
156 context.check_hostname = not disable_ssl_certificate_validation
157 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED
159 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+.
160 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version
161 if maximum_version is not None:
162 if hasattr(context, "maximum_version"):
163 if isinstance(maximum_version, str):
164 maximum_version = getattr(ssl.TLSVersion, maximum_version)
165 context.maximum_version = maximum_version
166 else:
167 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer")
168 if minimum_version is not None:
169 if hasattr(context, "minimum_version"):
170 if isinstance(minimum_version, str):
171 minimum_version = getattr(ssl.TLSVersion, minimum_version)
172 context.minimum_version = minimum_version
173 else:
174 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer")
175 # check_hostname requires python 3.4+
176 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname
177 # if check_hostname is not supported.
178 if hasattr(context, "check_hostname"):
179 context.check_hostname = not disable_ssl_certificate_validation
181 context.load_verify_locations(ca_certs)
183 if cert_file:
184 context.load_cert_chain(cert_file, key_file, key_password)
186 return context
189def _get_end2end_headers(response):
190 hopbyhop = list(HOP_BY_HOP)
191 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")])
192 return [header for header in list(response.keys()) if header not in hopbyhop]
195_missing = object()
198def _errno_from_exception(e):
199 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass
200 errno = getattr(e, "errno", _missing)
201 if errno is not _missing:
202 return errno
204 # socket.error and common wrap in .args
205 args = getattr(e, "args", None)
206 if args:
207 return _errno_from_exception(args[0])
209 # pysocks.ProxyError wraps in .socket_err
210 # https://github.com/httplib2/httplib2/pull/202
211 socket_err = getattr(e, "socket_err", None)
212 if socket_err:
213 return _errno_from_exception(socket_err)
215 return None
218URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
221def parse_uri(uri):
222 """Parses a URI using the regex given in Appendix B of RFC 3986.
224 (scheme, authority, path, query, fragment) = parse_uri(uri)
225 """
226 groups = URI.match(uri).groups()
227 return (groups[1], groups[3], groups[4], groups[6], groups[8])
230def urlnorm(uri):
231 (scheme, authority, path, query, fragment) = parse_uri(uri)
232 if not scheme or not authority:
233 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
234 authority = authority.lower()
235 scheme = scheme.lower()
236 if not path:
237 path = "/"
238 # Could do syntax based normalization of the URI before
239 # computing the digest. See Section 6.2.2 of Std 66.
240 request_uri = query and "?".join([path, query]) or path
241 scheme = scheme.lower()
242 defrag_uri = scheme + "://" + authority + request_uri
243 return scheme, authority, request_uri, defrag_uri
246# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
247re_url_scheme = re.compile(r"^\w+://")
248re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII)
251def safename(filename):
252 """Return a filename suitable for the cache.
253 Strips dangerous and common characters to create a filename we
254 can use to store the cache in.
255 """
256 if isinstance(filename, bytes):
257 filename_bytes = filename
258 filename = filename.decode("utf-8")
259 else:
260 filename_bytes = filename.encode("utf-8")
261 filemd5 = _md5(filename_bytes).hexdigest()
262 filename = re_url_scheme.sub("", filename)
263 filename = re_unsafe.sub("", filename)
265 # limit length of filename (vital for Windows)
266 # https://github.com/httplib2/httplib2/pull/74
267 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5>
268 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars
269 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum:
270 filename = filename[:90]
272 return ",".join((filename, filemd5))
275NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+")
278def _normalize_headers(headers):
279 return dict(
280 [
281 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),)
282 for (key, value) in headers.items()
283 ]
284 )
287def _convert_byte_str(s):
288 if not isinstance(s, str):
289 return str(s, "utf-8")
290 return s
293def _parse_cache_control(headers):
294 retval = {}
295 if "cache-control" in headers:
296 parts = headers["cache-control"].split(",")
297 parts_with_args = [
298 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")
299 ]
300 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
301 retval = dict(parts_with_args + parts_wo_args)
302 return retval
305# Whether to use a strict mode to parse WWW-Authenticate headers
306# Might lead to bad results in case of ill-formed header value,
307# so disabled by default, falling back to relaxed parsing.
308# Set to true to turn on, useful for testing servers.
309USE_WWW_AUTH_STRICT_PARSING = 0
312def _entry_disposition(response_headers, request_headers):
313 """Determine freshness from the Date, Expires and Cache-Control headers.
315 We don't handle the following:
317 1. Cache-Control: max-stale
318 2. Age: headers are not used in the calculations.
320 Not that this algorithm is simpler than you might think
321 because we are operating as a private (non-shared) cache.
322 This lets us ignore 's-maxage'. We can also ignore
323 'proxy-invalidate' since we aren't a proxy.
324 We will never return a stale document as
325 fresh as a design decision, and thus the non-implementation
326 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
327 since we operate as if every server has sent 'must-revalidate'.
328 Since we are private we get to ignore both 'public' and
329 'private' parameters. We also ignore 'no-transform' since
330 we don't do any transformations.
331 The 'no-store' parameter is handled at a higher level.
332 So the only Cache-Control parameters we look at are:
334 no-cache
335 only-if-cached
336 max-age
337 min-fresh
338 """
340 retval = "STALE"
341 cc = _parse_cache_control(request_headers)
342 cc_response = _parse_cache_control(response_headers)
344 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1:
345 retval = "TRANSPARENT"
346 if "cache-control" not in request_headers:
347 request_headers["cache-control"] = "no-cache"
348 elif "no-cache" in cc:
349 retval = "TRANSPARENT"
350 elif "no-cache" in cc_response:
351 retval = "STALE"
352 elif "only-if-cached" in cc:
353 retval = "FRESH"
354 elif "date" in response_headers:
355 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"]))
356 now = time.time()
357 current_age = max(0, now - date)
358 if "max-age" in cc_response:
359 try:
360 freshness_lifetime = int(cc_response["max-age"])
361 except ValueError:
362 freshness_lifetime = 0
363 elif "expires" in response_headers:
364 expires = email.utils.parsedate_tz(response_headers["expires"])
365 if None == expires:
366 freshness_lifetime = 0
367 else:
368 freshness_lifetime = max(0, calendar.timegm(expires) - date)
369 else:
370 freshness_lifetime = 0
371 if "max-age" in cc:
372 try:
373 freshness_lifetime = int(cc["max-age"])
374 except ValueError:
375 freshness_lifetime = 0
376 if "min-fresh" in cc:
377 try:
378 min_fresh = int(cc["min-fresh"])
379 except ValueError:
380 min_fresh = 0
381 current_age += min_fresh
382 if freshness_lifetime > current_age:
383 retval = "FRESH"
384 return retval
387def _decompressContent(response, new_content):
388 content = new_content
389 try:
390 encoding = response.get("content-encoding", None)
391 if encoding in ["gzip", "deflate"]:
392 if encoding == "gzip":
393 content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read()
394 if encoding == "deflate":
395 try:
396 content = zlib.decompress(content, zlib.MAX_WBITS)
397 except (IOError, zlib.error):
398 content = zlib.decompress(content, -zlib.MAX_WBITS)
399 response["content-length"] = str(len(content))
400 # Record the historical presence of the encoding in a way the won't interfere.
401 response["-content-encoding"] = response["content-encoding"]
402 del response["content-encoding"]
403 except (IOError, zlib.error):
404 content = ""
405 raise FailedToDecompressContent(
406 _("Content purported to be compressed with %s but failed to decompress.") % response.get("content-encoding"),
407 response,
408 content,
409 )
410 return content
413def _bind_write_headers(msg):
414 def _write_headers(self):
415 # Self refers to the Generator object.
416 for h, v in msg.items():
417 print("%s:" % h, end=" ", file=self._fp)
418 if isinstance(v, header.Header):
419 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
420 else:
421 # email.Header got lots of smarts, so use it.
422 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h)
423 print(headers.encode(), file=self._fp)
424 # A blank line always separates headers from body.
425 print(file=self._fp)
427 return _write_headers
430def _updateCache(request_headers, response_headers, content, cache, cachekey):
431 if cachekey:
432 cc = _parse_cache_control(request_headers)
433 cc_response = _parse_cache_control(response_headers)
434 if "no-store" in cc or "no-store" in cc_response:
435 cache.delete(cachekey)
436 else:
437 info = email.message.Message()
438 for key, value in response_headers.items():
439 if key not in ["status", "content-encoding", "transfer-encoding"]:
440 info[key] = value
442 # Add annotations to the cache to indicate what headers
443 # are variant for this request.
444 vary = response_headers.get("vary", None)
445 if vary:
446 vary_headers = vary.lower().replace(" ", "").split(",")
447 for header in vary_headers:
448 key = "-varied-%s" % header
449 try:
450 info[key] = request_headers[header]
451 except KeyError:
452 pass
454 status = response_headers.status
455 if status == 304:
456 status = 200
458 status_header = "status: %d\r\n" % status
460 try:
461 header_str = info.as_string()
462 except UnicodeEncodeError:
463 setattr(info, "_write_headers", _bind_write_headers(info))
464 header_str = info.as_string()
466 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
467 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content])
469 cache.set(cachekey, text)
472def _cnonce():
473 dig = _md5(
474 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8")
475 ).hexdigest()
476 return dig[:16]
479def _wsse_username_token(cnonce, iso_now, password):
480 return (
481 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8")
482 )
485# For credentials we need two things, first
486# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
487# Then we also need a list of URIs that have already demanded authentication
488# That list is tricky since sub-URIs can take the same auth, or the
489# auth scheme may change as you descend the tree.
490# So we also need each Auth instance to be able to tell us
491# how close to the 'top' it is.
494class Authentication(object):
495 def __init__(self, credentials, host, request_uri, headers, response, content, http):
496 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
497 self.path = path
498 self.host = host
499 self.credentials = credentials
500 self.http = http
502 def depth(self, request_uri):
503 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
504 return request_uri[len(self.path) :].count("/")
506 def inscope(self, host, request_uri):
507 # XXX Should we normalize the request_uri?
508 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
509 return (host == self.host) and path.startswith(self.path)
511 def request(self, method, request_uri, headers, content):
512 """Modify the request headers to add the appropriate
513 Authorization header. Over-rise this in sub-classes."""
514 pass
516 def response(self, response, content):
517 """Gives us a chance to update with new nonces
518 or such returned from the last authorized response.
519 Over-rise this in sub-classes if necessary.
521 Return TRUE is the request is to be retried, for
522 example Digest may return stale=true.
523 """
524 return False
526 def __eq__(self, auth):
527 return False
529 def __ne__(self, auth):
530 return True
532 def __lt__(self, auth):
533 return True
535 def __gt__(self, auth):
536 return False
538 def __le__(self, auth):
539 return True
541 def __ge__(self, auth):
542 return False
544 def __bool__(self):
545 return True
548class BasicAuthentication(Authentication):
549 def __init__(self, credentials, host, request_uri, headers, response, content, http):
550 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
552 def request(self, method, request_uri, headers, content):
553 """Modify the request headers to add the appropriate
554 Authorization header."""
555 headers["authorization"] = "Basic " + base64.b64encode(
556 ("%s:%s" % self.credentials).encode("utf-8")
557 ).strip().decode("utf-8")
560class DigestAuthentication(Authentication):
561 """Only do qop='auth' and MD5, since that
562 is all Apache currently implements"""
564 def __init__(self, credentials, host, request_uri, headers, response, content, http):
565 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
566 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"]
567 qop = self.challenge.get("qop", "auth")
568 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None
569 if self.challenge["qop"] is None:
570 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop))
571 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper()
572 if self.challenge["algorithm"] != "MD5":
573 raise UnimplementedDigestAuthOptionError(
574 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
575 )
576 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],])
577 self.challenge["nc"] = 1
579 def request(self, method, request_uri, headers, content, cnonce=None):
580 """Modify the request headers"""
581 H = lambda x: _md5(x.encode("utf-8")).hexdigest()
582 KD = lambda s, d: H("%s:%s" % (s, d))
583 A2 = "".join([method, ":", request_uri])
584 self.challenge["cnonce"] = cnonce or _cnonce()
585 request_digest = '"%s"' % KD(
586 H(self.A1),
587 "%s:%s:%s:%s:%s"
588 % (
589 self.challenge["nonce"],
590 "%08x" % self.challenge["nc"],
591 self.challenge["cnonce"],
592 self.challenge["qop"],
593 H(A2),
594 ),
595 )
596 headers["authorization"] = (
597 'Digest username="%s", realm="%s", nonce="%s", '
598 'uri="%s", algorithm=%s, response=%s, qop=%s, '
599 'nc=%08x, cnonce="%s"'
600 ) % (
601 self.credentials[0],
602 self.challenge["realm"],
603 self.challenge["nonce"],
604 request_uri,
605 self.challenge["algorithm"],
606 request_digest,
607 self.challenge["qop"],
608 self.challenge["nc"],
609 self.challenge["cnonce"],
610 )
611 if self.challenge.get("opaque"):
612 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"]
613 self.challenge["nc"] += 1
615 def response(self, response, content):
616 if "authentication-info" not in response:
617 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {})
618 if "true" == challenge.get("stale"):
619 self.challenge["nonce"] = challenge["nonce"]
620 self.challenge["nc"] = 1
621 return True
622 else:
623 updated_challenge = auth._parse_authentication_info(response, "authentication-info")
625 if "nextnonce" in updated_challenge:
626 self.challenge["nonce"] = updated_challenge["nextnonce"]
627 self.challenge["nc"] = 1
628 return False
631class HmacDigestAuthentication(Authentication):
632 """Adapted from Robert Sayre's code and DigestAuthentication above."""
634 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
636 def __init__(self, credentials, host, request_uri, headers, response, content, http):
637 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
638 challenge = auth._parse_www_authenticate(response, "www-authenticate")
639 self.challenge = challenge["hmacdigest"]
640 # TODO: self.challenge['domain']
641 self.challenge["reason"] = self.challenge.get("reason", "unauthorized")
642 if self.challenge["reason"] not in ["unauthorized", "integrity"]:
643 self.challenge["reason"] = "unauthorized"
644 self.challenge["salt"] = self.challenge.get("salt", "")
645 if not self.challenge.get("snonce"):
646 raise UnimplementedHmacDigestAuthOptionError(
647 _("The challenge doesn't contain a server nonce, or this one is empty.")
648 )
649 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1")
650 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]:
651 raise UnimplementedHmacDigestAuthOptionError(
652 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
653 )
654 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1")
655 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]:
656 raise UnimplementedHmacDigestAuthOptionError(
657 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"])
658 )
659 if self.challenge["algorithm"] == "HMAC-MD5":
660 self.hashmod = _md5
661 else:
662 self.hashmod = _sha
663 if self.challenge["pw-algorithm"] == "MD5":
664 self.pwhashmod = _md5
665 else:
666 self.pwhashmod = _sha
667 self.key = "".join(
668 [
669 self.credentials[0],
670 ":",
671 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(),
672 ":",
673 self.challenge["realm"],
674 ]
675 )
676 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
678 def request(self, method, request_uri, headers, content):
679 """Modify the request headers"""
680 keys = _get_end2end_headers(headers)
681 keylist = "".join(["%s " % k for k in keys])
682 headers_val = "".join([headers[k] for k in keys])
683 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
684 cnonce = _cnonce()
685 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,)
686 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
687 headers["authorization"] = (
688 'HMACDigest username="%s", realm="%s", snonce="%s",'
689 ' cnonce="%s", uri="%s", created="%s", '
690 'response="%s", headers="%s"'
691 ) % (
692 self.credentials[0],
693 self.challenge["realm"],
694 self.challenge["snonce"],
695 cnonce,
696 request_uri,
697 created,
698 request_digest,
699 keylist,
700 )
702 def response(self, response, content):
703 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {})
704 if challenge.get("reason") in ["integrity", "stale"]:
705 return True
706 return False
709class WsseAuthentication(Authentication):
710 """This is thinly tested and should not be relied upon.
711 At this time there isn't any third party server to test against.
712 Blogger and TypePad implemented this algorithm at one point
713 but Blogger has since switched to Basic over HTTPS and
714 TypePad has implemented it wrong, by never issuing a 401
715 challenge but instead requiring your client to telepathically know that
716 their endpoint is expecting WSSE profile="UsernameToken"."""
718 def __init__(self, credentials, host, request_uri, headers, response, content, http):
719 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
721 def request(self, method, request_uri, headers, content):
722 """Modify the request headers to add the appropriate
723 Authorization header."""
724 headers["authorization"] = 'WSSE profile="UsernameToken"'
725 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
726 cnonce = _cnonce()
727 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
728 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % (
729 self.credentials[0],
730 password_digest,
731 cnonce,
732 iso_now,
733 )
736class GoogleLoginAuthentication(Authentication):
737 def __init__(self, credentials, host, request_uri, headers, response, content, http):
738 from urllib.parse import urlencode
740 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
741 challenge = auth._parse_www_authenticate(response, "www-authenticate")
742 service = challenge["googlelogin"].get("service", "xapi")
743 # Bloggger actually returns the service in the challenge
744 # For the rest we guess based on the URI
745 if service == "xapi" and request_uri.find("calendar") > 0:
746 service = "cl"
747 # No point in guessing Base or Spreadsheet
748 # elif request_uri.find("spreadsheets") > 0:
749 # service = "wise"
751 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],)
752 resp, content = self.http.request(
753 "https://www.google.com/accounts/ClientLogin",
754 method="POST",
755 body=urlencode(auth),
756 headers={"Content-Type": "application/x-www-form-urlencoded"},
757 )
758 lines = content.split("\n")
759 d = dict([tuple(line.split("=", 1)) for line in lines if line])
760 if resp.status == 403:
761 self.Auth = ""
762 else:
763 self.Auth = d["Auth"]
765 def request(self, method, request_uri, headers, content):
766 """Modify the request headers to add the appropriate
767 Authorization header."""
768 headers["authorization"] = "GoogleLogin Auth=" + self.Auth
771AUTH_SCHEME_CLASSES = {
772 "basic": BasicAuthentication,
773 "wsse": WsseAuthentication,
774 "digest": DigestAuthentication,
775 "hmacdigest": HmacDigestAuthentication,
776 "googlelogin": GoogleLoginAuthentication,
777}
779AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
782class FileCache(object):
783 """Uses a local directory as a store for cached files.
784 Not really safe to use if multiple threads or processes are going to
785 be running on the same cache.
786 """
788 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
789 self.cache = cache
790 self.safe = safe
791 if not os.path.exists(cache):
792 os.makedirs(self.cache)
794 def get(self, key):
795 retval = None
796 cacheFullPath = os.path.join(self.cache, self.safe(key))
797 try:
798 f = open(cacheFullPath, "rb")
799 retval = f.read()
800 f.close()
801 except IOError:
802 pass
803 return retval
805 def set(self, key, value):
806 cacheFullPath = os.path.join(self.cache, self.safe(key))
807 f = open(cacheFullPath, "wb")
808 f.write(value)
809 f.close()
811 def delete(self, key):
812 cacheFullPath = os.path.join(self.cache, self.safe(key))
813 if os.path.exists(cacheFullPath):
814 os.remove(cacheFullPath)
817class Credentials(object):
818 def __init__(self):
819 self.credentials = []
821 def add(self, name, password, domain=""):
822 self.credentials.append((domain.lower(), name, password))
824 def clear(self):
825 self.credentials = []
827 def iter(self, domain):
828 for (cdomain, name, password) in self.credentials:
829 if cdomain == "" or domain == cdomain:
830 yield (name, password)
833class KeyCerts(Credentials):
834 """Identical to Credentials except that
835 name/password are mapped to key/cert."""
837 def add(self, key, cert, domain, password):
838 self.credentials.append((domain.lower(), key, cert, password))
840 def iter(self, domain):
841 for (cdomain, key, cert, password) in self.credentials:
842 if cdomain == "" or domain == cdomain:
843 yield (key, cert, password)
846class AllHosts(object):
847 pass
850class ProxyInfo(object):
851 """Collect information required to use a proxy."""
853 bypass_hosts = ()
855 def __init__(
856 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None,
857 ):
858 """Args:
860 proxy_type: The type of proxy server. This must be set to one of
861 socks.PROXY_TYPE_XXX constants. For example: p =
862 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost',
863 proxy_port=8000)
864 proxy_host: The hostname or IP address of the proxy server.
865 proxy_port: The port that the proxy server is running on.
866 proxy_rdns: If True (default), DNS queries will not be performed
867 locally, and instead, handed to the proxy to resolve. This is useful
868 if the network does not allow resolution of non-local names. In
869 httplib2 0.9 and earlier, this defaulted to False.
870 proxy_user: The username used to authenticate with the proxy server.
871 proxy_pass: The password used to authenticate with the proxy server.
872 proxy_headers: Additional or modified headers for the proxy connect
873 request.
874 """
875 if isinstance(proxy_user, bytes):
876 proxy_user = proxy_user.decode()
877 if isinstance(proxy_pass, bytes):
878 proxy_pass = proxy_pass.decode()
879 (
880 self.proxy_type,
881 self.proxy_host,
882 self.proxy_port,
883 self.proxy_rdns,
884 self.proxy_user,
885 self.proxy_pass,
886 self.proxy_headers,
887 ) = (
888 proxy_type,
889 proxy_host,
890 proxy_port,
891 proxy_rdns,
892 proxy_user,
893 proxy_pass,
894 proxy_headers,
895 )
897 def astuple(self):
898 return (
899 self.proxy_type,
900 self.proxy_host,
901 self.proxy_port,
902 self.proxy_rdns,
903 self.proxy_user,
904 self.proxy_pass,
905 self.proxy_headers,
906 )
908 def isgood(self):
909 return socks and (self.proxy_host != None) and (self.proxy_port != None)
911 def applies_to(self, hostname):
912 return not self.bypass_host(hostname)
914 def bypass_host(self, hostname):
915 """Has this host been excluded from the proxy config"""
916 if self.bypass_hosts is AllHosts:
917 return True
919 hostname = "." + hostname.lstrip(".")
920 for skip_name in self.bypass_hosts:
921 # *.suffix
922 if skip_name.startswith(".") and hostname.endswith(skip_name):
923 return True
924 # exact match
925 if hostname == "." + skip_name:
926 return True
927 return False
929 def __repr__(self):
930 return (
931 "<ProxyInfo type={p.proxy_type} "
932 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}"
933 + " user={p.proxy_user} headers={p.proxy_headers}>"
934 ).format(p=self)
937def proxy_info_from_environment(method="http"):
938 """Read proxy info from the environment variables.
939 """
940 if method not in ("http", "https"):
941 return
943 env_var = method + "_proxy"
944 url = os.environ.get(env_var, os.environ.get(env_var.upper()))
945 if not url:
946 return
947 return proxy_info_from_url(url, method, noproxy=None)
950def proxy_info_from_url(url, method="http", noproxy=None):
951 """Construct a ProxyInfo from a URL (such as http_proxy env var)
952 """
953 url = urllib.parse.urlparse(url)
955 proxy_type = 3 # socks.PROXY_TYPE_HTTP
956 pi = ProxyInfo(
957 proxy_type=proxy_type,
958 proxy_host=url.hostname,
959 proxy_port=url.port or dict(https=443, http=80)[method],
960 proxy_user=url.username or None,
961 proxy_pass=url.password or None,
962 proxy_headers=None,
963 )
965 bypass_hosts = []
966 # If not given an explicit noproxy value, respect values in env vars.
967 if noproxy is None:
968 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", ""))
969 # Special case: A single '*' character means all hosts should be bypassed.
970 if noproxy == "*":
971 bypass_hosts = AllHosts
972 elif noproxy.strip():
973 bypass_hosts = noproxy.split(",")
974 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string.
976 pi.bypass_hosts = bypass_hosts
977 return pi
980class HTTPConnectionWithTimeout(http.client.HTTPConnection):
981 """HTTPConnection subclass that supports timeouts
983 HTTPConnection subclass that supports timeouts
985 All timeouts are in seconds. If None is passed for timeout then
986 Python's default timeout for sockets will be used. See for example
987 the docs of socket.setdefaulttimeout():
988 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
989 """
991 def __init__(self, host, port=None, timeout=None, proxy_info=None):
992 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout)
994 self.proxy_info = proxy_info
995 if proxy_info and not isinstance(proxy_info, ProxyInfo):
996 self.proxy_info = proxy_info("http")
998 def connect(self):
999 """Connect to the host and port specified in __init__."""
1000 if self.proxy_info and socks is None:
1001 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!")
1002 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1003 use_proxy = True
1004 (
1005 proxy_type,
1006 proxy_host,
1007 proxy_port,
1008 proxy_rdns,
1009 proxy_user,
1010 proxy_pass,
1011 proxy_headers,
1012 ) = self.proxy_info.astuple()
1014 host = proxy_host
1015 port = proxy_port
1016 else:
1017 use_proxy = False
1019 host = self.host
1020 port = self.port
1021 proxy_type = None
1023 socket_err = None
1025 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
1026 af, socktype, proto, canonname, sa = res
1027 try:
1028 if use_proxy:
1029 self.sock = socks.socksocket(af, socktype, proto)
1030 self.sock.setproxy(
1031 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1032 )
1033 else:
1034 self.sock = socket.socket(af, socktype, proto)
1035 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1036 if has_timeout(self.timeout):
1037 self.sock.settimeout(self.timeout)
1038 if self.debuglevel > 0:
1039 print("connect: ({0}, {1}) ************".format(self.host, self.port))
1040 if use_proxy:
1041 print(
1042 "proxy: {0} ************".format(
1043 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1044 )
1045 )
1047 self.sock.connect((self.host, self.port) + sa[2:])
1048 except socket.error as e:
1049 socket_err = e
1050 if self.debuglevel > 0:
1051 print("connect fail: ({0}, {1})".format(self.host, self.port))
1052 if use_proxy:
1053 print(
1054 "proxy: {0}".format(
1055 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1056 )
1057 )
1058 if self.sock:
1059 self.sock.close()
1060 self.sock = None
1061 continue
1062 break
1063 if not self.sock:
1064 raise socket_err
1067class HTTPSConnectionWithTimeout(http.client.HTTPSConnection):
1068 """This class allows communication via SSL.
1070 All timeouts are in seconds. If None is passed for timeout then
1071 Python's default timeout for sockets will be used. See for example
1072 the docs of socket.setdefaulttimeout():
1073 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1074 """
1076 def __init__(
1077 self,
1078 host,
1079 port=None,
1080 key_file=None,
1081 cert_file=None,
1082 timeout=None,
1083 proxy_info=None,
1084 ca_certs=None,
1085 disable_ssl_certificate_validation=False,
1086 tls_maximum_version=None,
1087 tls_minimum_version=None,
1088 key_password=None,
1089 ):
1091 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1092 self.ca_certs = ca_certs if ca_certs else CA_CERTS
1094 self.proxy_info = proxy_info
1095 if proxy_info and not isinstance(proxy_info, ProxyInfo):
1096 self.proxy_info = proxy_info("https")
1098 context = _build_ssl_context(
1099 self.disable_ssl_certificate_validation,
1100 self.ca_certs,
1101 cert_file,
1102 key_file,
1103 maximum_version=tls_maximum_version,
1104 minimum_version=tls_minimum_version,
1105 key_password=key_password,
1106 )
1107 super(HTTPSConnectionWithTimeout, self).__init__(
1108 host, port=port, timeout=timeout, context=context,
1109 )
1110 self.key_file = key_file
1111 self.cert_file = cert_file
1112 self.key_password = key_password
1114 def connect(self):
1115 """Connect to a host on a given (SSL) port."""
1116 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
1117 use_proxy = True
1118 (
1119 proxy_type,
1120 proxy_host,
1121 proxy_port,
1122 proxy_rdns,
1123 proxy_user,
1124 proxy_pass,
1125 proxy_headers,
1126 ) = self.proxy_info.astuple()
1128 host = proxy_host
1129 port = proxy_port
1130 else:
1131 use_proxy = False
1133 host = self.host
1134 port = self.port
1135 proxy_type = None
1136 proxy_headers = None
1138 socket_err = None
1140 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1141 for family, socktype, proto, canonname, sockaddr in address_info:
1142 try:
1143 if use_proxy:
1144 sock = socks.socksocket(family, socktype, proto)
1146 sock.setproxy(
1147 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
1148 )
1149 else:
1150 sock = socket.socket(family, socktype, proto)
1151 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1152 if has_timeout(self.timeout):
1153 sock.settimeout(self.timeout)
1154 sock.connect((self.host, self.port))
1156 self.sock = self._context.wrap_socket(sock, server_hostname=self.host)
1158 # Python 3.3 compatibility: emulate the check_hostname behavior
1159 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation:
1160 try:
1161 ssl.match_hostname(self.sock.getpeercert(), self.host)
1162 except Exception:
1163 self.sock.shutdown(socket.SHUT_RDWR)
1164 self.sock.close()
1165 raise
1167 if self.debuglevel > 0:
1168 print("connect: ({0}, {1})".format(self.host, self.port))
1169 if use_proxy:
1170 print(
1171 "proxy: {0}".format(
1172 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1173 )
1174 )
1175 except (ssl.SSLError, ssl.CertificateError) as e:
1176 if sock:
1177 sock.close()
1178 if self.sock:
1179 self.sock.close()
1180 self.sock = None
1181 raise
1182 except (socket.timeout, socket.gaierror):
1183 raise
1184 except socket.error as e:
1185 socket_err = e
1186 if self.debuglevel > 0:
1187 print("connect fail: ({0}, {1})".format(self.host, self.port))
1188 if use_proxy:
1189 print(
1190 "proxy: {0}".format(
1191 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
1192 )
1193 )
1194 if self.sock:
1195 self.sock.close()
1196 self.sock = None
1197 continue
1198 break
1199 if not self.sock:
1200 raise socket_err
1203SCHEME_TO_CONNECTION = {
1204 "http": HTTPConnectionWithTimeout,
1205 "https": HTTPSConnectionWithTimeout,
1206}
1209class Http(object):
1210 """An HTTP client that handles:
1212 - all methods
1213 - caching
1214 - ETags
1215 - compression,
1216 - HTTPS
1217 - Basic
1218 - Digest
1219 - WSSE
1221 and more.
1222 """
1224 def __init__(
1225 self,
1226 cache=None,
1227 timeout=None,
1228 proxy_info=proxy_info_from_environment,
1229 ca_certs=None,
1230 disable_ssl_certificate_validation=False,
1231 tls_maximum_version=None,
1232 tls_minimum_version=None,
1233 ):
1234 """If 'cache' is a string then it is used as a directory name for
1235 a disk cache. Otherwise it must be an object that supports the
1236 same interface as FileCache.
1238 All timeouts are in seconds. If None is passed for timeout
1239 then Python's default timeout for sockets will be used. See
1240 for example the docs of socket.setdefaulttimeout():
1241 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1243 `proxy_info` may be:
1244 - a callable that takes the http scheme ('http' or 'https') and
1245 returns a ProxyInfo instance per request. By default, uses
1246 proxy_info_from_environment.
1247 - a ProxyInfo instance (static proxy config).
1248 - None (proxy disabled).
1250 ca_certs is the path of a file containing root CA certificates for SSL
1251 server certificate validation. By default, a CA cert file bundled with
1252 httplib2 is used.
1254 If disable_ssl_certificate_validation is true, SSL cert validation will
1255 not be performed.
1257 tls_maximum_version / tls_minimum_version require Python 3.7+ /
1258 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+.
1259 """
1260 self.proxy_info = proxy_info
1261 self.ca_certs = ca_certs
1262 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
1263 self.tls_maximum_version = tls_maximum_version
1264 self.tls_minimum_version = tls_minimum_version
1265 # Map domain name to an httplib connection
1266 self.connections = {}
1267 # The location of the cache, for now a directory
1268 # where cached responses are held.
1269 if cache and isinstance(cache, str):
1270 self.cache = FileCache(cache)
1271 else:
1272 self.cache = cache
1274 # Name/password
1275 self.credentials = Credentials()
1277 # Key/cert
1278 self.certificates = KeyCerts()
1280 # authorization objects
1281 self.authorizations = []
1283 # If set to False then no redirects are followed, even safe ones.
1284 self.follow_redirects = True
1286 self.redirect_codes = REDIRECT_CODES
1288 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1289 # which methods get an "if-match:" etag header added to them.
1290 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1292 self.safe_methods = list(SAFE_METHODS)
1294 # If 'follow_redirects' is True, and this is set to True then
1295 # all redirecs are followed, including unsafe ones.
1296 self.follow_all_redirects = False
1298 self.ignore_etag = False
1300 self.force_exception_to_status_code = False
1302 self.timeout = timeout
1304 # Keep Authorization: headers on a redirect.
1305 self.forward_authorization_headers = False
1307 def close(self):
1308 """Close persistent connections, clear sensitive data.
1309 Not thread-safe, requires external synchronization against concurrent requests.
1310 """
1311 existing, self.connections = self.connections, {}
1312 for _, c in existing.items():
1313 c.close()
1314 self.certificates.clear()
1315 self.clear_credentials()
1317 def __getstate__(self):
1318 state_dict = copy.copy(self.__dict__)
1319 # In case request is augmented by some foreign object such as
1320 # credentials which handle auth
1321 if "request" in state_dict:
1322 del state_dict["request"]
1323 if "connections" in state_dict:
1324 del state_dict["connections"]
1325 return state_dict
1327 def __setstate__(self, state):
1328 self.__dict__.update(state)
1329 self.connections = {}
1331 def _auth_from_challenge(self, host, request_uri, headers, response, content):
1332 """A generator that creates Authorization objects
1333 that can be applied to requests.
1334 """
1335 challenges = auth._parse_www_authenticate(response, "www-authenticate")
1336 for cred in self.credentials.iter(host):
1337 for scheme in AUTH_SCHEME_ORDER:
1338 if scheme in challenges:
1339 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1341 def add_credentials(self, name, password, domain=""):
1342 """Add a name and password that will be used
1343 any time a request requires authentication."""
1344 self.credentials.add(name, password, domain)
1346 def add_certificate(self, key, cert, domain, password=None):
1347 """Add a key and cert that will be used
1348 any time a request requires authentication."""
1349 self.certificates.add(key, cert, domain, password)
1351 def clear_credentials(self):
1352 """Remove all the names and passwords
1353 that are used for authentication"""
1354 self.credentials.clear()
1355 self.authorizations = []
1357 def _conn_request(self, conn, request_uri, method, body, headers):
1358 i = 0
1359 seen_bad_status_line = False
1360 while i < RETRIES:
1361 i += 1
1362 try:
1363 if conn.sock is None:
1364 conn.connect()
1365 conn.request(method, request_uri, body, headers)
1366 except socket.timeout:
1367 conn.close()
1368 raise
1369 except socket.gaierror:
1370 conn.close()
1371 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1372 except socket.error as e:
1373 errno_ = _errno_from_exception(e)
1374 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES:
1375 continue # retry on potentially transient errors
1376 raise
1377 except http.client.HTTPException:
1378 if conn.sock is None:
1379 if i < RETRIES - 1:
1380 conn.close()
1381 conn.connect()
1382 continue
1383 else:
1384 conn.close()
1385 raise
1386 if i < RETRIES - 1:
1387 conn.close()
1388 conn.connect()
1389 continue
1390 # Just because the server closed the connection doesn't apparently mean
1391 # that the server didn't send a response.
1392 pass
1393 try:
1394 response = conn.getresponse()
1395 except (http.client.BadStatusLine, http.client.ResponseNotReady):
1396 # If we get a BadStatusLine on the first try then that means
1397 # the connection just went stale, so retry regardless of the
1398 # number of RETRIES set.
1399 if not seen_bad_status_line and i == 1:
1400 i = 0
1401 seen_bad_status_line = True
1402 conn.close()
1403 conn.connect()
1404 continue
1405 else:
1406 conn.close()
1407 raise
1408 except socket.timeout:
1409 raise
1410 except (socket.error, http.client.HTTPException):
1411 conn.close()
1412 if i == 0:
1413 conn.close()
1414 conn.connect()
1415 continue
1416 else:
1417 raise
1418 else:
1419 content = b""
1420 if method == "HEAD":
1421 conn.close()
1422 else:
1423 content = response.read()
1424 response = Response(response)
1425 if method != "HEAD":
1426 content = _decompressContent(response, content)
1428 break
1429 return (response, content)
1431 def _request(
1432 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey,
1433 ):
1434 """Do the actual request using the connection object
1435 and also follow one level of redirects if necessary"""
1437 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1438 auth = auths and sorted(auths)[0][1] or None
1439 if auth:
1440 auth.request(method, request_uri, headers, body)
1442 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1444 if auth:
1445 if auth.response(response, body):
1446 auth.request(method, request_uri, headers, body)
1447 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1448 response._stale_digest = 1
1450 if response.status == 401:
1451 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1452 authorization.request(method, request_uri, headers, body)
1453 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1454 if response.status != 401:
1455 self.authorizations.append(authorization)
1456 authorization.response(response, body)
1457 break
1459 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308):
1460 if self.follow_redirects and response.status in self.redirect_codes:
1461 # Pick out the location header and basically start from the beginning
1462 # remembering first to strip the ETag header and decrement our 'depth'
1463 if redirections:
1464 if "location" not in response and response.status != 300:
1465 raise RedirectMissingLocation(
1466 _("Redirected but the response is missing a Location: header."), response, content,
1467 )
1468 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1469 if "location" in response:
1470 location = response["location"]
1471 (scheme, authority, path, query, fragment) = parse_uri(location)
1472 if authority == None:
1473 response["location"] = urllib.parse.urljoin(absolute_uri, location)
1474 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
1475 response["-x-permanent-redirect-url"] = response["location"]
1476 if "content-location" not in response:
1477 response["content-location"] = absolute_uri
1478 _updateCache(headers, response, content, self.cache, cachekey)
1479 if "if-none-match" in headers:
1480 del headers["if-none-match"]
1481 if "if-modified-since" in headers:
1482 del headers["if-modified-since"]
1483 if "authorization" in headers and not self.forward_authorization_headers:
1484 del headers["authorization"]
1485 if "location" in response:
1486 location = response["location"]
1487 old_response = copy.deepcopy(response)
1488 if "content-location" not in old_response:
1489 old_response["content-location"] = absolute_uri
1490 redirect_method = method
1491 if response.status in [302, 303]:
1492 redirect_method = "GET"
1493 body = None
1494 (response, content) = self.request(
1495 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1,
1496 )
1497 response.previous = old_response
1498 else:
1499 raise RedirectLimit(
1500 "Redirected more times than redirection_limit allows.", response, content,
1501 )
1502 elif response.status in [200, 203] and method in self.safe_methods:
1503 # Don't cache 206's since we aren't going to handle byte range requests
1504 if "content-location" not in response:
1505 response["content-location"] = absolute_uri
1506 _updateCache(headers, response, content, self.cache, cachekey)
1508 return (response, content)
1510 def _normalize_headers(self, headers):
1511 return _normalize_headers(headers)
1513 # Need to catch and rebrand some exceptions
1514 # Then need to optionally turn all exceptions into status codes
1515 # including all socket.* and httplib.* exceptions.
1517 def request(
1518 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None,
1519 ):
1520 """ Performs a single HTTP request.
1521The 'uri' is the URI of the HTTP resource and can begin
1522with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
1524The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
1525There is no restriction on the methods allowed.
1527The 'body' is the entity body to be sent with the request. It is a string
1528object.
1530Any extra headers that are to be sent with the request should be provided in the
1531'headers' dictionary.
1533The maximum number of redirect to follow before raising an
1534exception is 'redirections. The default is 5.
1536The return value is a tuple of (response, content), the first
1537being and instance of the 'Response' class, the second being
1538a string that contains the response entity body.
1539 """
1540 conn_key = ""
1542 try:
1543 if headers is None:
1544 headers = {}
1545 else:
1546 headers = self._normalize_headers(headers)
1548 if "user-agent" not in headers:
1549 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__
1551 uri = iri2uri(uri)
1552 # Prevent CWE-75 space injection to manipulate request via part of uri.
1553 # Prevent CWE-93 CRLF injection to modify headers via part of uri.
1554 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A")
1556 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1558 conn_key = scheme + ":" + authority
1559 conn = self.connections.get(conn_key)
1560 if conn is None:
1561 if not connection_type:
1562 connection_type = SCHEME_TO_CONNECTION[scheme]
1563 certs = list(self.certificates.iter(authority))
1564 if issubclass(connection_type, HTTPSConnectionWithTimeout):
1565 if certs:
1566 conn = self.connections[conn_key] = connection_type(
1567 authority,
1568 key_file=certs[0][0],
1569 cert_file=certs[0][1],
1570 timeout=self.timeout,
1571 proxy_info=self.proxy_info,
1572 ca_certs=self.ca_certs,
1573 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1574 tls_maximum_version=self.tls_maximum_version,
1575 tls_minimum_version=self.tls_minimum_version,
1576 key_password=certs[0][2],
1577 )
1578 else:
1579 conn = self.connections[conn_key] = connection_type(
1580 authority,
1581 timeout=self.timeout,
1582 proxy_info=self.proxy_info,
1583 ca_certs=self.ca_certs,
1584 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
1585 tls_maximum_version=self.tls_maximum_version,
1586 tls_minimum_version=self.tls_minimum_version,
1587 )
1588 else:
1589 conn = self.connections[conn_key] = connection_type(
1590 authority, timeout=self.timeout, proxy_info=self.proxy_info
1591 )
1592 conn.set_debuglevel(debuglevel)
1594 if "range" not in headers and "accept-encoding" not in headers:
1595 headers["accept-encoding"] = "gzip, deflate"
1597 info = email.message.Message()
1598 cachekey = None
1599 cached_value = None
1600 if self.cache:
1601 cachekey = defrag_uri
1602 cached_value = self.cache.get(cachekey)
1603 if cached_value:
1604 try:
1605 info, content = cached_value.split(b"\r\n\r\n", 1)
1606 info = email.message_from_bytes(info)
1607 for k, v in info.items():
1608 if v.startswith("=?") and v.endswith("?="):
1609 info.replace_header(k, str(*email.header.decode_header(v)[0]))
1610 except (IndexError, ValueError):
1611 self.cache.delete(cachekey)
1612 cachekey = None
1613 cached_value = None
1615 if (
1616 method in self.optimistic_concurrency_methods
1617 and self.cache
1618 and "etag" in info
1619 and not self.ignore_etag
1620 and "if-match" not in headers
1621 ):
1622 # http://www.w3.org/1999/04/Editing/
1623 headers["if-match"] = info["etag"]
1625 # https://tools.ietf.org/html/rfc7234
1626 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
1627 # when a non-error status code is received in response to an unsafe request method.
1628 if self.cache and cachekey and method not in self.safe_methods:
1629 self.cache.delete(cachekey)
1631 # Check the vary header in the cache to see if this request
1632 # matches what varies in the cache.
1633 if method in self.safe_methods and "vary" in info:
1634 vary = info["vary"]
1635 vary_headers = vary.lower().replace(" ", "").split(",")
1636 for header in vary_headers:
1637 key = "-varied-%s" % header
1638 value = info[key]
1639 if headers.get(header, None) != value:
1640 cached_value = None
1641 break
1643 if (
1644 self.cache
1645 and cached_value
1646 and (method in self.safe_methods or info["status"] == "308")
1647 and "range" not in headers
1648 ):
1649 redirect_method = method
1650 if info["status"] not in ("307", "308"):
1651 redirect_method = "GET"
1652 if "-x-permanent-redirect-url" in info:
1653 # Should cached permanent redirects be counted in our redirection count? For now, yes.
1654 if redirections <= 0:
1655 raise RedirectLimit(
1656 "Redirected more times than redirection_limit allows.", {}, "",
1657 )
1658 (response, new_content) = self.request(
1659 info["-x-permanent-redirect-url"],
1660 method=redirect_method,
1661 headers=headers,
1662 redirections=redirections - 1,
1663 )
1664 response.previous = Response(info)
1665 response.previous.fromcache = True
1666 else:
1667 # Determine our course of action:
1668 # Is the cached entry fresh or stale?
1669 # Has the client requested a non-cached response?
1670 #
1671 # There seems to be three possible answers:
1672 # 1. [FRESH] Return the cache entry w/o doing a GET
1673 # 2. [STALE] Do the GET (but add in cache validators if available)
1674 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1675 entry_disposition = _entry_disposition(info, headers)
1677 if entry_disposition == "FRESH":
1678 response = Response(info)
1679 response.fromcache = True
1680 return (response, content)
1682 if entry_disposition == "STALE":
1683 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers:
1684 headers["if-none-match"] = info["etag"]
1685 if "last-modified" in info and not "last-modified" in headers:
1686 headers["if-modified-since"] = info["last-modified"]
1687 elif entry_disposition == "TRANSPARENT":
1688 pass
1690 (response, new_content) = self._request(
1691 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1692 )
1694 if response.status == 304 and method == "GET":
1695 # Rewrite the cache entry with the new end-to-end headers
1696 # Take all headers that are in response
1697 # and overwrite their values in info.
1698 # unless they are hop-by-hop, or are listed in the connection header.
1700 for key in _get_end2end_headers(response):
1701 info[key] = response[key]
1702 merged_response = Response(info)
1703 if hasattr(response, "_stale_digest"):
1704 merged_response._stale_digest = response._stale_digest
1705 _updateCache(headers, merged_response, content, self.cache, cachekey)
1706 response = merged_response
1707 response.status = 200
1708 response.fromcache = True
1710 elif response.status == 200:
1711 content = new_content
1712 else:
1713 self.cache.delete(cachekey)
1714 content = new_content
1715 else:
1716 cc = _parse_cache_control(headers)
1717 if "only-if-cached" in cc:
1718 info["status"] = "504"
1719 response = Response(info)
1720 content = b""
1721 else:
1722 (response, content) = self._request(
1723 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1724 )
1725 except Exception as e:
1726 is_timeout = isinstance(e, socket.timeout)
1727 if is_timeout:
1728 conn = self.connections.pop(conn_key, None)
1729 if conn:
1730 conn.close()
1732 if self.force_exception_to_status_code:
1733 if isinstance(e, HttpLib2ErrorWithResponse):
1734 response = e.response
1735 content = e.content
1736 response.status = 500
1737 response.reason = str(e)
1738 elif isinstance(e, socket.timeout):
1739 content = b"Request Timeout"
1740 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),})
1741 response.reason = "Request Timeout"
1742 else:
1743 content = str(e).encode("utf-8")
1744 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),})
1745 response.reason = "Bad Request"
1746 else:
1747 raise
1749 return (response, content)
1752class Response(dict):
1753 """An object more like email.message than httplib.HTTPResponse."""
1755 """Is this response from our local cache"""
1756 fromcache = False
1757 """HTTP protocol version used by server.
1759 10 for HTTP/1.0, 11 for HTTP/1.1.
1760 """
1761 version = 11
1763 "Status code returned by server. "
1764 status = 200
1765 """Reason phrase returned by server."""
1766 reason = "Ok"
1768 previous = None
1770 def __init__(self, info):
1771 # info is either an email.message or
1772 # an httplib.HTTPResponse object.
1773 if isinstance(info, http.client.HTTPResponse):
1774 for key, value in info.getheaders():
1775 key = key.lower()
1776 prev = self.get(key)
1777 if prev is not None:
1778 value = ", ".join((prev, value))
1779 self[key] = value
1780 self.status = info.status
1781 self["status"] = str(self.status)
1782 self.reason = info.reason
1783 self.version = info.version
1784 elif isinstance(info, email.message.Message):
1785 for key, value in list(info.items()):
1786 self[key.lower()] = value
1787 self.status = int(self["status"])
1788 else:
1789 for key, value in info.items():
1790 self[key.lower()] = value
1791 self.status = int(self.get("status", self.status))
1793 def __getattr__(self, name):
1794 if name == "dict":
1795 return self
1796 else:
1797 raise AttributeError(name)