Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/httplib2/__init__.py: 19%

914 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1# -*- coding: utf-8 -*- 

2"""Small, fast HTTP client library for Python.""" 

3 

4__author__ = "Joe Gregorio (joe@bitworking.org)" 

5__copyright__ = "Copyright 2006, Joe Gregorio" 

6__contributors__ = [ 

7 "Thomas Broyer (t.broyer@ltgt.net)", 

8 "James Antill", 

9 "Xavier Verges Farrero", 

10 "Jonathan Feinberg", 

11 "Blair Zajac", 

12 "Sam Ruby", 

13 "Louis Nyffenegger", 

14 "Mark Pilgrim", 

15 "Alex Yu", 

16 "Lai Han", 

17] 

18__license__ = "MIT" 

19__version__ = "0.22.0" 

20 

21import base64 

22import calendar 

23import copy 

24import email 

25import email.feedparser 

26from email import header 

27import email.message 

28import email.utils 

29import errno 

30from gettext import gettext as _ 

31import gzip 

32from hashlib import md5 as _md5 

33from hashlib import sha1 as _sha 

34import hmac 

35import http.client 

36import io 

37import os 

38import random 

39import re 

40import socket 

41import ssl 

42import sys 

43import time 

44import urllib.parse 

45import zlib 

46 

47try: 

48 import socks 

49except ImportError: 

50 # TODO: remove this fallback and copypasted socksipy module upon py2/3 merge, 

51 # idea is to have soft-dependency on any compatible module called socks 

52 from . import socks 

53from . import auth 

54from .error import * 

55from .iri2uri import iri2uri 

56 

57 

58def has_timeout(timeout): 

59 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"): 

60 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT 

61 return timeout is not None 

62 

63 

64__all__ = [ 

65 "debuglevel", 

66 "FailedToDecompressContent", 

67 "Http", 

68 "HttpLib2Error", 

69 "ProxyInfo", 

70 "RedirectLimit", 

71 "RedirectMissingLocation", 

72 "Response", 

73 "RETRIES", 

74 "UnimplementedDigestAuthOptionError", 

75 "UnimplementedHmacDigestAuthOptionError", 

76] 

77 

78# The httplib debug level, set to a non-zero value to get debug output 

79debuglevel = 0 

80 

81# A request will be tried 'RETRIES' times if it fails at the socket/connection level. 

82RETRIES = 2 

83 

84 

85# Open Items: 

86# ----------- 

87 

88# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 

89 

90# Pluggable cache storage (supports storing the cache in 

91# flat files by default. We need a plug-in architecture 

92# that can support Berkeley DB and Squid) 

93 

94# == Known Issues == 

95# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 

96# Does not handle Cache-Control: max-stale 

97# Does not use Age: headers when calculating cache freshness. 

98 

99# The number of redirections to follow before giving up. 

100# Note that only GET redirects are automatically followed. 

101# Will also honor 301 requests by saving that info and never 

102# requesting that URI again. 

103DEFAULT_MAX_REDIRECTS = 5 

104 

105# Which headers are hop-by-hop headers by default 

106HOP_BY_HOP = [ 

107 "connection", 

108 "keep-alive", 

109 "proxy-authenticate", 

110 "proxy-authorization", 

111 "te", 

112 "trailers", 

113 "transfer-encoding", 

114 "upgrade", 

115] 

116 

117# https://tools.ietf.org/html/rfc7231#section-8.1.3 

118SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE") 

119 

120# To change, assign to `Http().redirect_codes` 

121REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308)) 

122 

123 

124from httplib2 import certs 

125 

126CA_CERTS = certs.where() 

127 

128# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated. 

129# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means: 

130# > Selects the highest protocol version that both the client and server support. 

131# > Despite the name, this option can select “TLS” protocols as well as “SSL”. 

132# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23 

133 

134# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated. 

135# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections. 

136# > The protocol enables CERT_REQUIRED and check_hostname by default. 

137# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS 

138 

139DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23") 

140 

141 

142def _build_ssl_context( 

143 disable_ssl_certificate_validation, 

144 ca_certs, 

145 cert_file=None, 

146 key_file=None, 

147 maximum_version=None, 

148 minimum_version=None, 

149 key_password=None, 

150): 

151 if not hasattr(ssl, "SSLContext"): 

152 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext") 

153 

154 context = ssl.SSLContext(DEFAULT_TLS_VERSION) 

155 # check_hostname and verify_mode should be set in opposite order during disable 

156 # https://bugs.python.org/issue31431 

157 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"): 

158 context.check_hostname = not disable_ssl_certificate_validation 

159 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED 

160 

161 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+. 

162 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version 

163 if maximum_version is not None: 

164 if hasattr(context, "maximum_version"): 

165 if isinstance(maximum_version, str): 

166 maximum_version = getattr(ssl.TLSVersion, maximum_version) 

167 context.maximum_version = maximum_version 

168 else: 

169 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer") 

170 if minimum_version is not None: 

171 if hasattr(context, "minimum_version"): 

172 if isinstance(minimum_version, str): 

173 minimum_version = getattr(ssl.TLSVersion, minimum_version) 

174 context.minimum_version = minimum_version 

175 else: 

176 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer") 

177 # check_hostname requires python 3.4+ 

178 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname 

179 # if check_hostname is not supported. 

180 if hasattr(context, "check_hostname"): 

181 context.check_hostname = not disable_ssl_certificate_validation 

182 

183 context.load_verify_locations(ca_certs) 

184 

185 if cert_file: 

186 context.load_cert_chain(cert_file, key_file, key_password) 

187 

188 return context 

189 

190 

191def _get_end2end_headers(response): 

192 hopbyhop = list(HOP_BY_HOP) 

193 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")]) 

194 return [header for header in list(response.keys()) if header not in hopbyhop] 

195 

196 

197_missing = object() 

198 

199 

200def _errno_from_exception(e): 

201 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass 

202 errno = getattr(e, "errno", _missing) 

203 if errno is not _missing: 

204 return errno 

205 

206 # socket.error and common wrap in .args 

207 args = getattr(e, "args", None) 

208 if args: 

209 return _errno_from_exception(args[0]) 

210 

211 # pysocks.ProxyError wraps in .socket_err 

212 # https://github.com/httplib2/httplib2/pull/202 

213 socket_err = getattr(e, "socket_err", None) 

214 if socket_err: 

215 return _errno_from_exception(socket_err) 

216 

217 return None 

218 

219 

220URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 

221 

222 

223def parse_uri(uri): 

224 """Parses a URI using the regex given in Appendix B of RFC 3986. 

225 

226 (scheme, authority, path, query, fragment) = parse_uri(uri) 

227 """ 

228 groups = URI.match(uri).groups() 

229 return (groups[1], groups[3], groups[4], groups[6], groups[8]) 

230 

231 

232def urlnorm(uri): 

233 (scheme, authority, path, query, fragment) = parse_uri(uri) 

234 if not scheme or not authority: 

235 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 

236 authority = authority.lower() 

237 scheme = scheme.lower() 

238 if not path: 

239 path = "/" 

240 # Could do syntax based normalization of the URI before 

241 # computing the digest. See Section 6.2.2 of Std 66. 

242 request_uri = query and "?".join([path, query]) or path 

243 scheme = scheme.lower() 

244 defrag_uri = scheme + "://" + authority + request_uri 

245 return scheme, authority, request_uri, defrag_uri 

246 

247 

248# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 

249re_url_scheme = re.compile(r"^\w+://") 

250re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII) 

251 

252 

253def safename(filename): 

254 """Return a filename suitable for the cache. 

255 Strips dangerous and common characters to create a filename we 

256 can use to store the cache in. 

257 """ 

258 if isinstance(filename, bytes): 

259 filename_bytes = filename 

260 filename = filename.decode("utf-8") 

261 else: 

262 filename_bytes = filename.encode("utf-8") 

263 filemd5 = _md5(filename_bytes).hexdigest() 

264 filename = re_url_scheme.sub("", filename) 

265 filename = re_unsafe.sub("", filename) 

266 

267 # limit length of filename (vital for Windows) 

268 # https://github.com/httplib2/httplib2/pull/74 

269 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5> 

270 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars 

271 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum: 

272 filename = filename[:90] 

273 

274 return ",".join((filename, filemd5)) 

275 

276 

277NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+") 

278 

279 

280def _normalize_headers(headers): 

281 return dict( 

282 [ 

283 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),) 

284 for (key, value) in headers.items() 

285 ] 

286 ) 

287 

288 

289def _convert_byte_str(s): 

290 if not isinstance(s, str): 

291 return str(s, "utf-8") 

292 return s 

293 

294 

295def _parse_cache_control(headers): 

296 retval = {} 

297 if "cache-control" in headers: 

298 parts = headers["cache-control"].split(",") 

299 parts_with_args = [ 

300 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=") 

301 ] 

302 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")] 

303 retval = dict(parts_with_args + parts_wo_args) 

304 return retval 

305 

306 

307# Whether to use a strict mode to parse WWW-Authenticate headers 

308# Might lead to bad results in case of ill-formed header value, 

309# so disabled by default, falling back to relaxed parsing. 

310# Set to true to turn on, useful for testing servers. 

311USE_WWW_AUTH_STRICT_PARSING = 0 

312 

313 

314def _entry_disposition(response_headers, request_headers): 

315 """Determine freshness from the Date, Expires and Cache-Control headers. 

316 

317 We don't handle the following: 

318 

319 1. Cache-Control: max-stale 

320 2. Age: headers are not used in the calculations. 

321 

322 Not that this algorithm is simpler than you might think 

323 because we are operating as a private (non-shared) cache. 

324 This lets us ignore 's-maxage'. We can also ignore 

325 'proxy-invalidate' since we aren't a proxy. 

326 We will never return a stale document as 

327 fresh as a design decision, and thus the non-implementation 

328 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 

329 since we operate as if every server has sent 'must-revalidate'. 

330 Since we are private we get to ignore both 'public' and 

331 'private' parameters. We also ignore 'no-transform' since 

332 we don't do any transformations. 

333 The 'no-store' parameter is handled at a higher level. 

334 So the only Cache-Control parameters we look at are: 

335 

336 no-cache 

337 only-if-cached 

338 max-age 

339 min-fresh 

340 """ 

341 

342 retval = "STALE" 

343 cc = _parse_cache_control(request_headers) 

344 cc_response = _parse_cache_control(response_headers) 

345 

346 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1: 

347 retval = "TRANSPARENT" 

348 if "cache-control" not in request_headers: 

349 request_headers["cache-control"] = "no-cache" 

350 elif "no-cache" in cc: 

351 retval = "TRANSPARENT" 

352 elif "no-cache" in cc_response: 

353 retval = "STALE" 

354 elif "only-if-cached" in cc: 

355 retval = "FRESH" 

356 elif "date" in response_headers: 

357 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"])) 

358 now = time.time() 

359 current_age = max(0, now - date) 

360 if "max-age" in cc_response: 

361 try: 

362 freshness_lifetime = int(cc_response["max-age"]) 

363 except ValueError: 

364 freshness_lifetime = 0 

365 elif "expires" in response_headers: 

366 expires = email.utils.parsedate_tz(response_headers["expires"]) 

367 if None == expires: 

368 freshness_lifetime = 0 

369 else: 

370 freshness_lifetime = max(0, calendar.timegm(expires) - date) 

371 else: 

372 freshness_lifetime = 0 

373 if "max-age" in cc: 

374 try: 

375 freshness_lifetime = int(cc["max-age"]) 

376 except ValueError: 

377 freshness_lifetime = 0 

378 if "min-fresh" in cc: 

379 try: 

380 min_fresh = int(cc["min-fresh"]) 

381 except ValueError: 

382 min_fresh = 0 

383 current_age += min_fresh 

384 if freshness_lifetime > current_age: 

385 retval = "FRESH" 

386 return retval 

387 

388 

389def _decompressContent(response, new_content): 

390 content = new_content 

391 try: 

392 encoding = response.get("content-encoding", None) 

393 if encoding in ["gzip", "deflate"]: 

394 if encoding == "gzip": 

395 content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read() 

396 if encoding == "deflate": 

397 try: 

398 content = zlib.decompress(content, zlib.MAX_WBITS) 

399 except (IOError, zlib.error): 

400 content = zlib.decompress(content, -zlib.MAX_WBITS) 

401 response["content-length"] = str(len(content)) 

402 # Record the historical presence of the encoding in a way the won't interfere. 

403 response["-content-encoding"] = response["content-encoding"] 

404 del response["content-encoding"] 

405 except (IOError, zlib.error): 

406 content = "" 

407 raise FailedToDecompressContent( 

408 _("Content purported to be compressed with %s but failed to decompress.") % response.get("content-encoding"), 

409 response, 

410 content, 

411 ) 

412 return content 

413 

414 

415def _bind_write_headers(msg): 

416 def _write_headers(self): 

417 # Self refers to the Generator object. 

418 for h, v in msg.items(): 

419 print("%s:" % h, end=" ", file=self._fp) 

420 if isinstance(v, header.Header): 

421 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp) 

422 else: 

423 # email.Header got lots of smarts, so use it. 

424 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h) 

425 print(headers.encode(), file=self._fp) 

426 # A blank line always separates headers from body. 

427 print(file=self._fp) 

428 

429 return _write_headers 

430 

431 

432def _updateCache(request_headers, response_headers, content, cache, cachekey): 

433 if cachekey: 

434 cc = _parse_cache_control(request_headers) 

435 cc_response = _parse_cache_control(response_headers) 

436 if "no-store" in cc or "no-store" in cc_response: 

437 cache.delete(cachekey) 

438 else: 

439 info = email.message.Message() 

440 for key, value in response_headers.items(): 

441 if key not in ["status", "content-encoding", "transfer-encoding"]: 

442 info[key] = value 

443 

444 # Add annotations to the cache to indicate what headers 

445 # are variant for this request. 

446 vary = response_headers.get("vary", None) 

447 if vary: 

448 vary_headers = vary.lower().replace(" ", "").split(",") 

449 for header in vary_headers: 

450 key = "-varied-%s" % header 

451 try: 

452 info[key] = request_headers[header] 

453 except KeyError: 

454 pass 

455 

456 status = response_headers.status 

457 if status == 304: 

458 status = 200 

459 

460 status_header = "status: %d\r\n" % status 

461 

462 try: 

463 header_str = info.as_string() 

464 except UnicodeEncodeError: 

465 setattr(info, "_write_headers", _bind_write_headers(info)) 

466 header_str = info.as_string() 

467 

468 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 

469 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content]) 

470 

471 cache.set(cachekey, text) 

472 

473 

474def _cnonce(): 

475 dig = _md5( 

476 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8") 

477 ).hexdigest() 

478 return dig[:16] 

479 

480 

481def _wsse_username_token(cnonce, iso_now, password): 

482 return ( 

483 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8") 

484 ) 

485 

486 

487# For credentials we need two things, first 

488# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 

489# Then we also need a list of URIs that have already demanded authentication 

490# That list is tricky since sub-URIs can take the same auth, or the 

491# auth scheme may change as you descend the tree. 

492# So we also need each Auth instance to be able to tell us 

493# how close to the 'top' it is. 

494 

495 

496class Authentication(object): 

497 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

498 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

499 self.path = path 

500 self.host = host 

501 self.credentials = credentials 

502 self.http = http 

503 

504 def depth(self, request_uri): 

505 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

506 return request_uri[len(self.path) :].count("/") 

507 

508 def inscope(self, host, request_uri): 

509 # XXX Should we normalize the request_uri? 

510 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

511 return (host == self.host) and path.startswith(self.path) 

512 

513 def request(self, method, request_uri, headers, content): 

514 """Modify the request headers to add the appropriate 

515 Authorization header. Over-rise this in sub-classes.""" 

516 pass 

517 

518 def response(self, response, content): 

519 """Gives us a chance to update with new nonces 

520 or such returned from the last authorized response. 

521 Over-rise this in sub-classes if necessary. 

522 

523 Return TRUE is the request is to be retried, for 

524 example Digest may return stale=true. 

525 """ 

526 return False 

527 

528 def __eq__(self, auth): 

529 return False 

530 

531 def __ne__(self, auth): 

532 return True 

533 

534 def __lt__(self, auth): 

535 return True 

536 

537 def __gt__(self, auth): 

538 return False 

539 

540 def __le__(self, auth): 

541 return True 

542 

543 def __ge__(self, auth): 

544 return False 

545 

546 def __bool__(self): 

547 return True 

548 

549 

550class BasicAuthentication(Authentication): 

551 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

552 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

553 

554 def request(self, method, request_uri, headers, content): 

555 """Modify the request headers to add the appropriate 

556 Authorization header.""" 

557 headers["authorization"] = "Basic " + base64.b64encode( 

558 ("%s:%s" % self.credentials).encode("utf-8") 

559 ).strip().decode("utf-8") 

560 

561 

562class DigestAuthentication(Authentication): 

563 """Only do qop='auth' and MD5, since that 

564 is all Apache currently implements""" 

565 

566 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

567 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

568 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"] 

569 qop = self.challenge.get("qop", "auth") 

570 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None 

571 if self.challenge["qop"] is None: 

572 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop)) 

573 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper() 

574 if self.challenge["algorithm"] != "MD5": 

575 raise UnimplementedDigestAuthOptionError( 

576 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 

577 ) 

578 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],]) 

579 self.challenge["nc"] = 1 

580 

581 def request(self, method, request_uri, headers, content, cnonce=None): 

582 """Modify the request headers""" 

583 H = lambda x: _md5(x.encode("utf-8")).hexdigest() 

584 KD = lambda s, d: H("%s:%s" % (s, d)) 

585 A2 = "".join([method, ":", request_uri]) 

586 self.challenge["cnonce"] = cnonce or _cnonce() 

587 request_digest = '"%s"' % KD( 

588 H(self.A1), 

589 "%s:%s:%s:%s:%s" 

590 % ( 

591 self.challenge["nonce"], 

592 "%08x" % self.challenge["nc"], 

593 self.challenge["cnonce"], 

594 self.challenge["qop"], 

595 H(A2), 

596 ), 

597 ) 

598 headers["authorization"] = ( 

599 'Digest username="%s", realm="%s", nonce="%s", ' 

600 'uri="%s", algorithm=%s, response=%s, qop=%s, ' 

601 'nc=%08x, cnonce="%s"' 

602 ) % ( 

603 self.credentials[0], 

604 self.challenge["realm"], 

605 self.challenge["nonce"], 

606 request_uri, 

607 self.challenge["algorithm"], 

608 request_digest, 

609 self.challenge["qop"], 

610 self.challenge["nc"], 

611 self.challenge["cnonce"], 

612 ) 

613 if self.challenge.get("opaque"): 

614 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"] 

615 self.challenge["nc"] += 1 

616 

617 def response(self, response, content): 

618 if "authentication-info" not in response: 

619 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {}) 

620 if "true" == challenge.get("stale"): 

621 self.challenge["nonce"] = challenge["nonce"] 

622 self.challenge["nc"] = 1 

623 return True 

624 else: 

625 updated_challenge = auth._parse_authentication_info(response, "authentication-info") 

626 

627 if "nextnonce" in updated_challenge: 

628 self.challenge["nonce"] = updated_challenge["nextnonce"] 

629 self.challenge["nc"] = 1 

630 return False 

631 

632 

633class HmacDigestAuthentication(Authentication): 

634 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 

635 

636 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 

637 

638 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

639 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

640 challenge = auth._parse_www_authenticate(response, "www-authenticate") 

641 self.challenge = challenge["hmacdigest"] 

642 # TODO: self.challenge['domain'] 

643 self.challenge["reason"] = self.challenge.get("reason", "unauthorized") 

644 if self.challenge["reason"] not in ["unauthorized", "integrity"]: 

645 self.challenge["reason"] = "unauthorized" 

646 self.challenge["salt"] = self.challenge.get("salt", "") 

647 if not self.challenge.get("snonce"): 

648 raise UnimplementedHmacDigestAuthOptionError( 

649 _("The challenge doesn't contain a server nonce, or this one is empty.") 

650 ) 

651 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1") 

652 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]: 

653 raise UnimplementedHmacDigestAuthOptionError( 

654 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 

655 ) 

656 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1") 

657 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]: 

658 raise UnimplementedHmacDigestAuthOptionError( 

659 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"]) 

660 ) 

661 if self.challenge["algorithm"] == "HMAC-MD5": 

662 self.hashmod = _md5 

663 else: 

664 self.hashmod = _sha 

665 if self.challenge["pw-algorithm"] == "MD5": 

666 self.pwhashmod = _md5 

667 else: 

668 self.pwhashmod = _sha 

669 self.key = "".join( 

670 [ 

671 self.credentials[0], 

672 ":", 

673 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(), 

674 ":", 

675 self.challenge["realm"], 

676 ] 

677 ) 

678 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 

679 

680 def request(self, method, request_uri, headers, content): 

681 """Modify the request headers""" 

682 keys = _get_end2end_headers(headers) 

683 keylist = "".join(["%s " % k for k in keys]) 

684 headers_val = "".join([headers[k] for k in keys]) 

685 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 

686 cnonce = _cnonce() 

687 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,) 

688 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 

689 headers["authorization"] = ( 

690 'HMACDigest username="%s", realm="%s", snonce="%s",' 

691 ' cnonce="%s", uri="%s", created="%s", ' 

692 'response="%s", headers="%s"' 

693 ) % ( 

694 self.credentials[0], 

695 self.challenge["realm"], 

696 self.challenge["snonce"], 

697 cnonce, 

698 request_uri, 

699 created, 

700 request_digest, 

701 keylist, 

702 ) 

703 

704 def response(self, response, content): 

705 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {}) 

706 if challenge.get("reason") in ["integrity", "stale"]: 

707 return True 

708 return False 

709 

710 

711class WsseAuthentication(Authentication): 

712 """This is thinly tested and should not be relied upon. 

713 At this time there isn't any third party server to test against. 

714 Blogger and TypePad implemented this algorithm at one point 

715 but Blogger has since switched to Basic over HTTPS and 

716 TypePad has implemented it wrong, by never issuing a 401 

717 challenge but instead requiring your client to telepathically know that 

718 their endpoint is expecting WSSE profile="UsernameToken".""" 

719 

720 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

721 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

722 

723 def request(self, method, request_uri, headers, content): 

724 """Modify the request headers to add the appropriate 

725 Authorization header.""" 

726 headers["authorization"] = 'WSSE profile="UsernameToken"' 

727 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 

728 cnonce = _cnonce() 

729 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 

730 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % ( 

731 self.credentials[0], 

732 password_digest, 

733 cnonce, 

734 iso_now, 

735 ) 

736 

737 

738class GoogleLoginAuthentication(Authentication): 

739 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

740 from urllib.parse import urlencode 

741 

742 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

743 challenge = auth._parse_www_authenticate(response, "www-authenticate") 

744 service = challenge["googlelogin"].get("service", "xapi") 

745 # Bloggger actually returns the service in the challenge 

746 # For the rest we guess based on the URI 

747 if service == "xapi" and request_uri.find("calendar") > 0: 

748 service = "cl" 

749 # No point in guessing Base or Spreadsheet 

750 # elif request_uri.find("spreadsheets") > 0: 

751 # service = "wise" 

752 

753 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],) 

754 resp, content = self.http.request( 

755 "https://www.google.com/accounts/ClientLogin", 

756 method="POST", 

757 body=urlencode(auth), 

758 headers={"Content-Type": "application/x-www-form-urlencoded"}, 

759 ) 

760 lines = content.split("\n") 

761 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 

762 if resp.status == 403: 

763 self.Auth = "" 

764 else: 

765 self.Auth = d["Auth"] 

766 

767 def request(self, method, request_uri, headers, content): 

768 """Modify the request headers to add the appropriate 

769 Authorization header.""" 

770 headers["authorization"] = "GoogleLogin Auth=" + self.Auth 

771 

772 

773AUTH_SCHEME_CLASSES = { 

774 "basic": BasicAuthentication, 

775 "wsse": WsseAuthentication, 

776 "digest": DigestAuthentication, 

777 "hmacdigest": HmacDigestAuthentication, 

778 "googlelogin": GoogleLoginAuthentication, 

779} 

780 

781AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 

782 

783 

784class FileCache(object): 

785 """Uses a local directory as a store for cached files. 

786 Not really safe to use if multiple threads or processes are going to 

787 be running on the same cache. 

788 """ 

789 

790 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 

791 self.cache = cache 

792 self.safe = safe 

793 if not os.path.exists(cache): 

794 os.makedirs(self.cache) 

795 

796 def get(self, key): 

797 retval = None 

798 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

799 try: 

800 f = open(cacheFullPath, "rb") 

801 retval = f.read() 

802 f.close() 

803 except IOError: 

804 pass 

805 return retval 

806 

807 def set(self, key, value): 

808 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

809 f = open(cacheFullPath, "wb") 

810 f.write(value) 

811 f.close() 

812 

813 def delete(self, key): 

814 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

815 if os.path.exists(cacheFullPath): 

816 os.remove(cacheFullPath) 

817 

818 

819class Credentials(object): 

820 def __init__(self): 

821 self.credentials = [] 

822 

823 def add(self, name, password, domain=""): 

824 self.credentials.append((domain.lower(), name, password)) 

825 

826 def clear(self): 

827 self.credentials = [] 

828 

829 def iter(self, domain): 

830 for (cdomain, name, password) in self.credentials: 

831 if cdomain == "" or domain == cdomain: 

832 yield (name, password) 

833 

834 

835class KeyCerts(Credentials): 

836 """Identical to Credentials except that 

837 name/password are mapped to key/cert.""" 

838 

839 def add(self, key, cert, domain, password): 

840 self.credentials.append((domain.lower(), key, cert, password)) 

841 

842 def iter(self, domain): 

843 for (cdomain, key, cert, password) in self.credentials: 

844 if cdomain == "" or domain == cdomain: 

845 yield (key, cert, password) 

846 

847 

848class AllHosts(object): 

849 pass 

850 

851 

852class ProxyInfo(object): 

853 """Collect information required to use a proxy.""" 

854 

855 bypass_hosts = () 

856 

857 def __init__( 

858 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None, 

859 ): 

860 """Args: 

861 

862 proxy_type: The type of proxy server. This must be set to one of 

863 socks.PROXY_TYPE_XXX constants. For example: p = 

864 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', 

865 proxy_port=8000) 

866 proxy_host: The hostname or IP address of the proxy server. 

867 proxy_port: The port that the proxy server is running on. 

868 proxy_rdns: If True (default), DNS queries will not be performed 

869 locally, and instead, handed to the proxy to resolve. This is useful 

870 if the network does not allow resolution of non-local names. In 

871 httplib2 0.9 and earlier, this defaulted to False. 

872 proxy_user: The username used to authenticate with the proxy server. 

873 proxy_pass: The password used to authenticate with the proxy server. 

874 proxy_headers: Additional or modified headers for the proxy connect 

875 request. 

876 """ 

877 if isinstance(proxy_user, bytes): 

878 proxy_user = proxy_user.decode() 

879 if isinstance(proxy_pass, bytes): 

880 proxy_pass = proxy_pass.decode() 

881 ( 

882 self.proxy_type, 

883 self.proxy_host, 

884 self.proxy_port, 

885 self.proxy_rdns, 

886 self.proxy_user, 

887 self.proxy_pass, 

888 self.proxy_headers, 

889 ) = ( 

890 proxy_type, 

891 proxy_host, 

892 proxy_port, 

893 proxy_rdns, 

894 proxy_user, 

895 proxy_pass, 

896 proxy_headers, 

897 ) 

898 

899 def astuple(self): 

900 return ( 

901 self.proxy_type, 

902 self.proxy_host, 

903 self.proxy_port, 

904 self.proxy_rdns, 

905 self.proxy_user, 

906 self.proxy_pass, 

907 self.proxy_headers, 

908 ) 

909 

910 def isgood(self): 

911 return socks and (self.proxy_host != None) and (self.proxy_port != None) 

912 

913 def applies_to(self, hostname): 

914 return not self.bypass_host(hostname) 

915 

916 def bypass_host(self, hostname): 

917 """Has this host been excluded from the proxy config""" 

918 if self.bypass_hosts is AllHosts: 

919 return True 

920 

921 hostname = "." + hostname.lstrip(".") 

922 for skip_name in self.bypass_hosts: 

923 # *.suffix 

924 if skip_name.startswith(".") and hostname.endswith(skip_name): 

925 return True 

926 # exact match 

927 if hostname == "." + skip_name: 

928 return True 

929 return False 

930 

931 def __repr__(self): 

932 return ( 

933 "<ProxyInfo type={p.proxy_type} " 

934 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}" 

935 + " user={p.proxy_user} headers={p.proxy_headers}>" 

936 ).format(p=self) 

937 

938 

939def proxy_info_from_environment(method="http"): 

940 """Read proxy info from the environment variables. 

941 """ 

942 if method not in ("http", "https"): 

943 return 

944 

945 env_var = method + "_proxy" 

946 url = os.environ.get(env_var, os.environ.get(env_var.upper())) 

947 if not url: 

948 return 

949 return proxy_info_from_url(url, method, noproxy=None) 

950 

951 

952def proxy_info_from_url(url, method="http", noproxy=None): 

953 """Construct a ProxyInfo from a URL (such as http_proxy env var) 

954 """ 

955 url = urllib.parse.urlparse(url) 

956 

957 proxy_type = 3 # socks.PROXY_TYPE_HTTP 

958 pi = ProxyInfo( 

959 proxy_type=proxy_type, 

960 proxy_host=url.hostname, 

961 proxy_port=url.port or dict(https=443, http=80)[method], 

962 proxy_user=url.username or None, 

963 proxy_pass=url.password or None, 

964 proxy_headers=None, 

965 ) 

966 

967 bypass_hosts = [] 

968 # If not given an explicit noproxy value, respect values in env vars. 

969 if noproxy is None: 

970 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", "")) 

971 # Special case: A single '*' character means all hosts should be bypassed. 

972 if noproxy == "*": 

973 bypass_hosts = AllHosts 

974 elif noproxy.strip(): 

975 bypass_hosts = noproxy.split(",") 

976 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string. 

977 

978 pi.bypass_hosts = bypass_hosts 

979 return pi 

980 

981 

982class HTTPConnectionWithTimeout(http.client.HTTPConnection): 

983 """HTTPConnection subclass that supports timeouts 

984 

985 HTTPConnection subclass that supports timeouts 

986 

987 All timeouts are in seconds. If None is passed for timeout then 

988 Python's default timeout for sockets will be used. See for example 

989 the docs of socket.setdefaulttimeout(): 

990 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

991 """ 

992 

993 def __init__(self, host, port=None, timeout=None, proxy_info=None): 

994 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout) 

995 

996 self.proxy_info = proxy_info 

997 if proxy_info and not isinstance(proxy_info, ProxyInfo): 

998 self.proxy_info = proxy_info("http") 

999 

1000 def connect(self): 

1001 """Connect to the host and port specified in __init__.""" 

1002 if self.proxy_info and socks is None: 

1003 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!") 

1004 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host): 

1005 use_proxy = True 

1006 ( 

1007 proxy_type, 

1008 proxy_host, 

1009 proxy_port, 

1010 proxy_rdns, 

1011 proxy_user, 

1012 proxy_pass, 

1013 proxy_headers, 

1014 ) = self.proxy_info.astuple() 

1015 

1016 host = proxy_host 

1017 port = proxy_port 

1018 else: 

1019 use_proxy = False 

1020 

1021 host = self.host 

1022 port = self.port 

1023 proxy_type = None 

1024 

1025 socket_err = None 

1026 

1027 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 

1028 af, socktype, proto, canonname, sa = res 

1029 try: 

1030 if use_proxy: 

1031 self.sock = socks.socksocket(af, socktype, proto) 

1032 self.sock.setproxy( 

1033 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, 

1034 ) 

1035 else: 

1036 self.sock = socket.socket(af, socktype, proto) 

1037 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

1038 if has_timeout(self.timeout): 

1039 self.sock.settimeout(self.timeout) 

1040 if self.debuglevel > 0: 

1041 print("connect: ({0}, {1}) ************".format(self.host, self.port)) 

1042 if use_proxy: 

1043 print( 

1044 "proxy: {0} ************".format( 

1045 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1046 ) 

1047 ) 

1048 

1049 self.sock.connect((self.host, self.port) + sa[2:]) 

1050 except socket.error as e: 

1051 socket_err = e 

1052 if self.debuglevel > 0: 

1053 print("connect fail: ({0}, {1})".format(self.host, self.port)) 

1054 if use_proxy: 

1055 print( 

1056 "proxy: {0}".format( 

1057 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1058 ) 

1059 ) 

1060 if self.sock: 

1061 self.sock.close() 

1062 self.sock = None 

1063 continue 

1064 break 

1065 if not self.sock: 

1066 raise socket_err 

1067 

1068 

1069class HTTPSConnectionWithTimeout(http.client.HTTPSConnection): 

1070 """This class allows communication via SSL. 

1071 

1072 All timeouts are in seconds. If None is passed for timeout then 

1073 Python's default timeout for sockets will be used. See for example 

1074 the docs of socket.setdefaulttimeout(): 

1075 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

1076 """ 

1077 

1078 def __init__( 

1079 self, 

1080 host, 

1081 port=None, 

1082 key_file=None, 

1083 cert_file=None, 

1084 timeout=None, 

1085 proxy_info=None, 

1086 ca_certs=None, 

1087 disable_ssl_certificate_validation=False, 

1088 tls_maximum_version=None, 

1089 tls_minimum_version=None, 

1090 key_password=None, 

1091 ): 

1092 

1093 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 

1094 self.ca_certs = ca_certs if ca_certs else CA_CERTS 

1095 

1096 self.proxy_info = proxy_info 

1097 if proxy_info and not isinstance(proxy_info, ProxyInfo): 

1098 self.proxy_info = proxy_info("https") 

1099 

1100 context = _build_ssl_context( 

1101 self.disable_ssl_certificate_validation, 

1102 self.ca_certs, 

1103 cert_file, 

1104 key_file, 

1105 maximum_version=tls_maximum_version, 

1106 minimum_version=tls_minimum_version, 

1107 key_password=key_password, 

1108 ) 

1109 super(HTTPSConnectionWithTimeout, self).__init__( 

1110 host, port=port, timeout=timeout, context=context, 

1111 ) 

1112 self.key_file = key_file 

1113 self.cert_file = cert_file 

1114 self.key_password = key_password 

1115 

1116 def connect(self): 

1117 """Connect to a host on a given (SSL) port.""" 

1118 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host): 

1119 use_proxy = True 

1120 ( 

1121 proxy_type, 

1122 proxy_host, 

1123 proxy_port, 

1124 proxy_rdns, 

1125 proxy_user, 

1126 proxy_pass, 

1127 proxy_headers, 

1128 ) = self.proxy_info.astuple() 

1129 

1130 host = proxy_host 

1131 port = proxy_port 

1132 else: 

1133 use_proxy = False 

1134 

1135 host = self.host 

1136 port = self.port 

1137 proxy_type = None 

1138 proxy_headers = None 

1139 

1140 socket_err = None 

1141 

1142 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) 

1143 for family, socktype, proto, canonname, sockaddr in address_info: 

1144 try: 

1145 if use_proxy: 

1146 sock = socks.socksocket(family, socktype, proto) 

1147 

1148 sock.setproxy( 

1149 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, 

1150 ) 

1151 else: 

1152 sock = socket.socket(family, socktype, proto) 

1153 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

1154 if has_timeout(self.timeout): 

1155 sock.settimeout(self.timeout) 

1156 sock.connect((self.host, self.port)) 

1157 

1158 self.sock = self._context.wrap_socket(sock, server_hostname=self.host) 

1159 

1160 # Python 3.3 compatibility: emulate the check_hostname behavior 

1161 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation: 

1162 try: 

1163 ssl.match_hostname(self.sock.getpeercert(), self.host) 

1164 except Exception: 

1165 self.sock.shutdown(socket.SHUT_RDWR) 

1166 self.sock.close() 

1167 raise 

1168 

1169 if self.debuglevel > 0: 

1170 print("connect: ({0}, {1})".format(self.host, self.port)) 

1171 if use_proxy: 

1172 print( 

1173 "proxy: {0}".format( 

1174 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1175 ) 

1176 ) 

1177 except (ssl.SSLError, ssl.CertificateError) as e: 

1178 if sock: 

1179 sock.close() 

1180 if self.sock: 

1181 self.sock.close() 

1182 self.sock = None 

1183 raise 

1184 except (socket.timeout, socket.gaierror): 

1185 raise 

1186 except socket.error as e: 

1187 socket_err = e 

1188 if self.debuglevel > 0: 

1189 print("connect fail: ({0}, {1})".format(self.host, self.port)) 

1190 if use_proxy: 

1191 print( 

1192 "proxy: {0}".format( 

1193 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1194 ) 

1195 ) 

1196 if self.sock: 

1197 self.sock.close() 

1198 self.sock = None 

1199 continue 

1200 break 

1201 if not self.sock: 

1202 raise socket_err 

1203 

1204 

1205SCHEME_TO_CONNECTION = { 

1206 "http": HTTPConnectionWithTimeout, 

1207 "https": HTTPSConnectionWithTimeout, 

1208} 

1209 

1210 

1211class Http(object): 

1212 """An HTTP client that handles: 

1213 

1214 - all methods 

1215 - caching 

1216 - ETags 

1217 - compression, 

1218 - HTTPS 

1219 - Basic 

1220 - Digest 

1221 - WSSE 

1222 

1223 and more. 

1224 """ 

1225 

1226 def __init__( 

1227 self, 

1228 cache=None, 

1229 timeout=None, 

1230 proxy_info=proxy_info_from_environment, 

1231 ca_certs=None, 

1232 disable_ssl_certificate_validation=False, 

1233 tls_maximum_version=None, 

1234 tls_minimum_version=None, 

1235 ): 

1236 """If 'cache' is a string then it is used as a directory name for 

1237 a disk cache. Otherwise it must be an object that supports the 

1238 same interface as FileCache. 

1239 

1240 All timeouts are in seconds. If None is passed for timeout 

1241 then Python's default timeout for sockets will be used. See 

1242 for example the docs of socket.setdefaulttimeout(): 

1243 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

1244 

1245 `proxy_info` may be: 

1246 - a callable that takes the http scheme ('http' or 'https') and 

1247 returns a ProxyInfo instance per request. By default, uses 

1248 proxy_info_from_environment. 

1249 - a ProxyInfo instance (static proxy config). 

1250 - None (proxy disabled). 

1251 

1252 ca_certs is the path of a file containing root CA certificates for SSL 

1253 server certificate validation. By default, a CA cert file bundled with 

1254 httplib2 is used. 

1255 

1256 If disable_ssl_certificate_validation is true, SSL cert validation will 

1257 not be performed. 

1258 

1259 tls_maximum_version / tls_minimum_version require Python 3.7+ / 

1260 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+. 

1261 """ 

1262 self.proxy_info = proxy_info 

1263 self.ca_certs = ca_certs 

1264 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 

1265 self.tls_maximum_version = tls_maximum_version 

1266 self.tls_minimum_version = tls_minimum_version 

1267 # Map domain name to an httplib connection 

1268 self.connections = {} 

1269 # The location of the cache, for now a directory 

1270 # where cached responses are held. 

1271 if cache and isinstance(cache, str): 

1272 self.cache = FileCache(cache) 

1273 else: 

1274 self.cache = cache 

1275 

1276 # Name/password 

1277 self.credentials = Credentials() 

1278 

1279 # Key/cert 

1280 self.certificates = KeyCerts() 

1281 

1282 # authorization objects 

1283 self.authorizations = [] 

1284 

1285 # If set to False then no redirects are followed, even safe ones. 

1286 self.follow_redirects = True 

1287 

1288 self.redirect_codes = REDIRECT_CODES 

1289 

1290 # Which HTTP methods do we apply optimistic concurrency to, i.e. 

1291 # which methods get an "if-match:" etag header added to them. 

1292 self.optimistic_concurrency_methods = ["PUT", "PATCH"] 

1293 

1294 self.safe_methods = list(SAFE_METHODS) 

1295 

1296 # If 'follow_redirects' is True, and this is set to True then 

1297 # all redirecs are followed, including unsafe ones. 

1298 self.follow_all_redirects = False 

1299 

1300 self.ignore_etag = False 

1301 

1302 self.force_exception_to_status_code = False 

1303 

1304 self.timeout = timeout 

1305 

1306 # Keep Authorization: headers on a redirect. 

1307 self.forward_authorization_headers = False 

1308 

1309 def close(self): 

1310 """Close persistent connections, clear sensitive data. 

1311 Not thread-safe, requires external synchronization against concurrent requests. 

1312 """ 

1313 existing, self.connections = self.connections, {} 

1314 for _, c in existing.items(): 

1315 c.close() 

1316 self.certificates.clear() 

1317 self.clear_credentials() 

1318 

1319 def __getstate__(self): 

1320 state_dict = copy.copy(self.__dict__) 

1321 # In case request is augmented by some foreign object such as 

1322 # credentials which handle auth 

1323 if "request" in state_dict: 

1324 del state_dict["request"] 

1325 if "connections" in state_dict: 

1326 del state_dict["connections"] 

1327 return state_dict 

1328 

1329 def __setstate__(self, state): 

1330 self.__dict__.update(state) 

1331 self.connections = {} 

1332 

1333 def _auth_from_challenge(self, host, request_uri, headers, response, content): 

1334 """A generator that creates Authorization objects 

1335 that can be applied to requests. 

1336 """ 

1337 challenges = auth._parse_www_authenticate(response, "www-authenticate") 

1338 for cred in self.credentials.iter(host): 

1339 for scheme in AUTH_SCHEME_ORDER: 

1340 if scheme in challenges: 

1341 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) 

1342 

1343 def add_credentials(self, name, password, domain=""): 

1344 """Add a name and password that will be used 

1345 any time a request requires authentication.""" 

1346 self.credentials.add(name, password, domain) 

1347 

1348 def add_certificate(self, key, cert, domain, password=None): 

1349 """Add a key and cert that will be used 

1350 any time a request requires authentication.""" 

1351 self.certificates.add(key, cert, domain, password) 

1352 

1353 def clear_credentials(self): 

1354 """Remove all the names and passwords 

1355 that are used for authentication""" 

1356 self.credentials.clear() 

1357 self.authorizations = [] 

1358 

1359 def _conn_request(self, conn, request_uri, method, body, headers): 

1360 i = 0 

1361 seen_bad_status_line = False 

1362 while i < RETRIES: 

1363 i += 1 

1364 try: 

1365 if conn.sock is None: 

1366 conn.connect() 

1367 conn.request(method, request_uri, body, headers) 

1368 except socket.timeout: 

1369 conn.close() 

1370 raise 

1371 except socket.gaierror: 

1372 conn.close() 

1373 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 

1374 except socket.error as e: 

1375 errno_ = _errno_from_exception(e) 

1376 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES: 

1377 continue # retry on potentially transient errors 

1378 raise 

1379 except http.client.HTTPException: 

1380 if conn.sock is None: 

1381 if i < RETRIES - 1: 

1382 conn.close() 

1383 conn.connect() 

1384 continue 

1385 else: 

1386 conn.close() 

1387 raise 

1388 if i < RETRIES - 1: 

1389 conn.close() 

1390 conn.connect() 

1391 continue 

1392 # Just because the server closed the connection doesn't apparently mean 

1393 # that the server didn't send a response. 

1394 pass 

1395 try: 

1396 response = conn.getresponse() 

1397 except (http.client.BadStatusLine, http.client.ResponseNotReady): 

1398 # If we get a BadStatusLine on the first try then that means 

1399 # the connection just went stale, so retry regardless of the 

1400 # number of RETRIES set. 

1401 if not seen_bad_status_line and i == 1: 

1402 i = 0 

1403 seen_bad_status_line = True 

1404 conn.close() 

1405 conn.connect() 

1406 continue 

1407 else: 

1408 conn.close() 

1409 raise 

1410 except socket.timeout: 

1411 raise 

1412 except (socket.error, http.client.HTTPException): 

1413 conn.close() 

1414 if i == 0: 

1415 conn.close() 

1416 conn.connect() 

1417 continue 

1418 else: 

1419 raise 

1420 else: 

1421 content = b"" 

1422 if method == "HEAD": 

1423 conn.close() 

1424 else: 

1425 content = response.read() 

1426 response = Response(response) 

1427 if method != "HEAD": 

1428 content = _decompressContent(response, content) 

1429 

1430 break 

1431 return (response, content) 

1432 

1433 def _request( 

1434 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey, 

1435 ): 

1436 """Do the actual request using the connection object 

1437 and also follow one level of redirects if necessary""" 

1438 

1439 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] 

1440 auth = auths and sorted(auths)[0][1] or None 

1441 if auth: 

1442 auth.request(method, request_uri, headers, body) 

1443 

1444 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1445 

1446 if auth: 

1447 if auth.response(response, body): 

1448 auth.request(method, request_uri, headers, body) 

1449 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1450 response._stale_digest = 1 

1451 

1452 if response.status == 401: 

1453 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): 

1454 authorization.request(method, request_uri, headers, body) 

1455 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1456 if response.status != 401: 

1457 self.authorizations.append(authorization) 

1458 authorization.response(response, body) 

1459 break 

1460 

1461 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308): 

1462 if self.follow_redirects and response.status in self.redirect_codes: 

1463 # Pick out the location header and basically start from the beginning 

1464 # remembering first to strip the ETag header and decrement our 'depth' 

1465 if redirections: 

1466 if "location" not in response and response.status != 300: 

1467 raise RedirectMissingLocation( 

1468 _("Redirected but the response is missing a Location: header."), response, content, 

1469 ) 

1470 # Fix-up relative redirects (which violate an RFC 2616 MUST) 

1471 if "location" in response: 

1472 location = response["location"] 

1473 (scheme, authority, path, query, fragment) = parse_uri(location) 

1474 if authority == None: 

1475 response["location"] = urllib.parse.urljoin(absolute_uri, location) 

1476 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)): 

1477 response["-x-permanent-redirect-url"] = response["location"] 

1478 if "content-location" not in response: 

1479 response["content-location"] = absolute_uri 

1480 _updateCache(headers, response, content, self.cache, cachekey) 

1481 if "if-none-match" in headers: 

1482 del headers["if-none-match"] 

1483 if "if-modified-since" in headers: 

1484 del headers["if-modified-since"] 

1485 if "authorization" in headers and not self.forward_authorization_headers: 

1486 del headers["authorization"] 

1487 if "location" in response: 

1488 location = response["location"] 

1489 old_response = copy.deepcopy(response) 

1490 if "content-location" not in old_response: 

1491 old_response["content-location"] = absolute_uri 

1492 redirect_method = method 

1493 if response.status in [302, 303]: 

1494 redirect_method = "GET" 

1495 body = None 

1496 (response, content) = self.request( 

1497 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1, 

1498 ) 

1499 response.previous = old_response 

1500 else: 

1501 raise RedirectLimit( 

1502 "Redirected more times than redirection_limit allows.", response, content, 

1503 ) 

1504 elif response.status in [200, 203] and method in self.safe_methods: 

1505 # Don't cache 206's since we aren't going to handle byte range requests 

1506 if "content-location" not in response: 

1507 response["content-location"] = absolute_uri 

1508 _updateCache(headers, response, content, self.cache, cachekey) 

1509 

1510 return (response, content) 

1511 

1512 def _normalize_headers(self, headers): 

1513 return _normalize_headers(headers) 

1514 

1515 # Need to catch and rebrand some exceptions 

1516 # Then need to optionally turn all exceptions into status codes 

1517 # including all socket.* and httplib.* exceptions. 

1518 

1519 def request( 

1520 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None, 

1521 ): 

1522 """ Performs a single HTTP request. 

1523The 'uri' is the URI of the HTTP resource and can begin 

1524with either 'http' or 'https'. The value of 'uri' must be an absolute URI. 

1525 

1526The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. 

1527There is no restriction on the methods allowed. 

1528 

1529The 'body' is the entity body to be sent with the request. It is a string 

1530object. 

1531 

1532Any extra headers that are to be sent with the request should be provided in the 

1533'headers' dictionary. 

1534 

1535The maximum number of redirect to follow before raising an 

1536exception is 'redirections. The default is 5. 

1537 

1538The return value is a tuple of (response, content), the first 

1539being and instance of the 'Response' class, the second being 

1540a string that contains the response entity body. 

1541 """ 

1542 conn_key = "" 

1543 

1544 try: 

1545 if headers is None: 

1546 headers = {} 

1547 else: 

1548 headers = self._normalize_headers(headers) 

1549 

1550 if "user-agent" not in headers: 

1551 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__ 

1552 

1553 uri = iri2uri(uri) 

1554 # Prevent CWE-75 space injection to manipulate request via part of uri. 

1555 # Prevent CWE-93 CRLF injection to modify headers via part of uri. 

1556 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A") 

1557 

1558 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 

1559 

1560 conn_key = scheme + ":" + authority 

1561 conn = self.connections.get(conn_key) 

1562 if conn is None: 

1563 if not connection_type: 

1564 connection_type = SCHEME_TO_CONNECTION[scheme] 

1565 certs = list(self.certificates.iter(authority)) 

1566 if issubclass(connection_type, HTTPSConnectionWithTimeout): 

1567 if certs: 

1568 conn = self.connections[conn_key] = connection_type( 

1569 authority, 

1570 key_file=certs[0][0], 

1571 cert_file=certs[0][1], 

1572 timeout=self.timeout, 

1573 proxy_info=self.proxy_info, 

1574 ca_certs=self.ca_certs, 

1575 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 

1576 tls_maximum_version=self.tls_maximum_version, 

1577 tls_minimum_version=self.tls_minimum_version, 

1578 key_password=certs[0][2], 

1579 ) 

1580 else: 

1581 conn = self.connections[conn_key] = connection_type( 

1582 authority, 

1583 timeout=self.timeout, 

1584 proxy_info=self.proxy_info, 

1585 ca_certs=self.ca_certs, 

1586 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 

1587 tls_maximum_version=self.tls_maximum_version, 

1588 tls_minimum_version=self.tls_minimum_version, 

1589 ) 

1590 else: 

1591 conn = self.connections[conn_key] = connection_type( 

1592 authority, timeout=self.timeout, proxy_info=self.proxy_info 

1593 ) 

1594 conn.set_debuglevel(debuglevel) 

1595 

1596 if "range" not in headers and "accept-encoding" not in headers: 

1597 headers["accept-encoding"] = "gzip, deflate" 

1598 

1599 info = email.message.Message() 

1600 cachekey = None 

1601 cached_value = None 

1602 if self.cache: 

1603 cachekey = defrag_uri 

1604 cached_value = self.cache.get(cachekey) 

1605 if cached_value: 

1606 try: 

1607 info, content = cached_value.split(b"\r\n\r\n", 1) 

1608 info = email.message_from_bytes(info) 

1609 for k, v in info.items(): 

1610 if v.startswith("=?") and v.endswith("?="): 

1611 info.replace_header(k, str(*email.header.decode_header(v)[0])) 

1612 except (IndexError, ValueError): 

1613 self.cache.delete(cachekey) 

1614 cachekey = None 

1615 cached_value = None 

1616 

1617 if ( 

1618 method in self.optimistic_concurrency_methods 

1619 and self.cache 

1620 and "etag" in info 

1621 and not self.ignore_etag 

1622 and "if-match" not in headers 

1623 ): 

1624 # http://www.w3.org/1999/04/Editing/ 

1625 headers["if-match"] = info["etag"] 

1626 

1627 # https://tools.ietf.org/html/rfc7234 

1628 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location 

1629 # when a non-error status code is received in response to an unsafe request method. 

1630 if self.cache and cachekey and method not in self.safe_methods: 

1631 self.cache.delete(cachekey) 

1632 

1633 # Check the vary header in the cache to see if this request 

1634 # matches what varies in the cache. 

1635 if method in self.safe_methods and "vary" in info: 

1636 vary = info["vary"] 

1637 vary_headers = vary.lower().replace(" ", "").split(",") 

1638 for header in vary_headers: 

1639 key = "-varied-%s" % header 

1640 value = info[key] 

1641 if headers.get(header, None) != value: 

1642 cached_value = None 

1643 break 

1644 

1645 if ( 

1646 self.cache 

1647 and cached_value 

1648 and (method in self.safe_methods or info["status"] == "308") 

1649 and "range" not in headers 

1650 ): 

1651 redirect_method = method 

1652 if info["status"] not in ("307", "308"): 

1653 redirect_method = "GET" 

1654 if "-x-permanent-redirect-url" in info: 

1655 # Should cached permanent redirects be counted in our redirection count? For now, yes. 

1656 if redirections <= 0: 

1657 raise RedirectLimit( 

1658 "Redirected more times than redirection_limit allows.", {}, "", 

1659 ) 

1660 (response, new_content) = self.request( 

1661 info["-x-permanent-redirect-url"], 

1662 method=redirect_method, 

1663 headers=headers, 

1664 redirections=redirections - 1, 

1665 ) 

1666 response.previous = Response(info) 

1667 response.previous.fromcache = True 

1668 else: 

1669 # Determine our course of action: 

1670 # Is the cached entry fresh or stale? 

1671 # Has the client requested a non-cached response? 

1672 # 

1673 # There seems to be three possible answers: 

1674 # 1. [FRESH] Return the cache entry w/o doing a GET 

1675 # 2. [STALE] Do the GET (but add in cache validators if available) 

1676 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 

1677 entry_disposition = _entry_disposition(info, headers) 

1678 

1679 if entry_disposition == "FRESH": 

1680 response = Response(info) 

1681 response.fromcache = True 

1682 return (response, content) 

1683 

1684 if entry_disposition == "STALE": 

1685 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers: 

1686 headers["if-none-match"] = info["etag"] 

1687 if "last-modified" in info and not "last-modified" in headers: 

1688 headers["if-modified-since"] = info["last-modified"] 

1689 elif entry_disposition == "TRANSPARENT": 

1690 pass 

1691 

1692 (response, new_content) = self._request( 

1693 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey, 

1694 ) 

1695 

1696 if response.status == 304 and method == "GET": 

1697 # Rewrite the cache entry with the new end-to-end headers 

1698 # Take all headers that are in response 

1699 # and overwrite their values in info. 

1700 # unless they are hop-by-hop, or are listed in the connection header. 

1701 

1702 for key in _get_end2end_headers(response): 

1703 info[key] = response[key] 

1704 merged_response = Response(info) 

1705 if hasattr(response, "_stale_digest"): 

1706 merged_response._stale_digest = response._stale_digest 

1707 _updateCache(headers, merged_response, content, self.cache, cachekey) 

1708 response = merged_response 

1709 response.status = 200 

1710 response.fromcache = True 

1711 

1712 elif response.status == 200: 

1713 content = new_content 

1714 else: 

1715 self.cache.delete(cachekey) 

1716 content = new_content 

1717 else: 

1718 cc = _parse_cache_control(headers) 

1719 if "only-if-cached" in cc: 

1720 info["status"] = "504" 

1721 response = Response(info) 

1722 content = b"" 

1723 else: 

1724 (response, content) = self._request( 

1725 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey, 

1726 ) 

1727 except Exception as e: 

1728 is_timeout = isinstance(e, socket.timeout) 

1729 if is_timeout: 

1730 conn = self.connections.pop(conn_key, None) 

1731 if conn: 

1732 conn.close() 

1733 

1734 if self.force_exception_to_status_code: 

1735 if isinstance(e, HttpLib2ErrorWithResponse): 

1736 response = e.response 

1737 content = e.content 

1738 response.status = 500 

1739 response.reason = str(e) 

1740 elif isinstance(e, socket.timeout): 

1741 content = b"Request Timeout" 

1742 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),}) 

1743 response.reason = "Request Timeout" 

1744 else: 

1745 content = str(e).encode("utf-8") 

1746 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),}) 

1747 response.reason = "Bad Request" 

1748 else: 

1749 raise 

1750 

1751 return (response, content) 

1752 

1753 

1754class Response(dict): 

1755 """An object more like email.message than httplib.HTTPResponse.""" 

1756 

1757 """Is this response from our local cache""" 

1758 fromcache = False 

1759 """HTTP protocol version used by server. 

1760 

1761 10 for HTTP/1.0, 11 for HTTP/1.1. 

1762 """ 

1763 version = 11 

1764 

1765 "Status code returned by server. " 

1766 status = 200 

1767 """Reason phrase returned by server.""" 

1768 reason = "Ok" 

1769 

1770 previous = None 

1771 

1772 def __init__(self, info): 

1773 # info is either an email.message or 

1774 # an httplib.HTTPResponse object. 

1775 if isinstance(info, http.client.HTTPResponse): 

1776 for key, value in info.getheaders(): 

1777 key = key.lower() 

1778 prev = self.get(key) 

1779 if prev is not None: 

1780 value = ", ".join((prev, value)) 

1781 self[key] = value 

1782 self.status = info.status 

1783 self["status"] = str(self.status) 

1784 self.reason = info.reason 

1785 self.version = info.version 

1786 elif isinstance(info, email.message.Message): 

1787 for key, value in list(info.items()): 

1788 self[key.lower()] = value 

1789 self.status = int(self["status"]) 

1790 else: 

1791 for key, value in info.items(): 

1792 self[key.lower()] = value 

1793 self.status = int(self.get("status", self.status)) 

1794 

1795 def __getattr__(self, name): 

1796 if name == "dict": 

1797 return self 

1798 else: 

1799 raise AttributeError(name)