Coverage for /pythoncovmergedfiles/medio/medio/src/httplib2/httplib2/__init__.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

920 statements  

1# -*- coding: utf-8 -*- 

2"""Small, fast HTTP client library for Python.""" 

3 

4__author__ = "Joe Gregorio (joe@bitworking.org)" 

5__copyright__ = "Copyright 2006, Joe Gregorio" 

6__contributors__ = [ 

7 "Thomas Broyer (t.broyer@ltgt.net)", 

8 "James Antill", 

9 "Xavier Verges Farrero", 

10 "Jonathan Feinberg", 

11 "Blair Zajac", 

12 "Sam Ruby", 

13 "Louis Nyffenegger", 

14 "Mark Pilgrim", 

15 "Alex Yu", 

16 "Lai Han", 

17] 

18__license__ = "MIT" 

19__version__ = "0.22.0" 

20 

21import base64 

22import calendar 

23import copy 

24import email 

25import email.feedparser 

26from email import header 

27import email.message 

28import email.utils 

29import errno 

30from gettext import gettext as _ 

31import gzip 

32from hashlib import md5 as _md5 

33from hashlib import sha1 as _sha 

34import hmac 

35import http.client 

36import io 

37import os 

38import random 

39import re 

40import socket 

41import ssl 

42import sys 

43import time 

44import urllib.parse 

45import zlib 

46 

47try: 

48 import socks 

49except ImportError: 

50 socks = None 

51from . import auth 

52from .error import * 

53from .iri2uri import iri2uri 

54 

55 

56def has_timeout(timeout): 

57 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"): 

58 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT 

59 return timeout is not None 

60 

61 

62__all__ = [ 

63 "debuglevel", 

64 "FailedToDecompressContent", 

65 "Http", 

66 "HttpLib2Error", 

67 "ProxyInfo", 

68 "RedirectLimit", 

69 "RedirectMissingLocation", 

70 "Response", 

71 "RETRIES", 

72 "UnimplementedDigestAuthOptionError", 

73 "UnimplementedHmacDigestAuthOptionError", 

74] 

75 

76# The httplib debug level, set to a non-zero value to get debug output 

77debuglevel = 0 

78 

79# A request will be tried 'RETRIES' times if it fails at the socket/connection level. 

80RETRIES = 2 

81 

82 

83# Open Items: 

84# ----------- 

85 

86# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 

87 

88# Pluggable cache storage (supports storing the cache in 

89# flat files by default. We need a plug-in architecture 

90# that can support Berkeley DB and Squid) 

91 

92# == Known Issues == 

93# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 

94# Does not handle Cache-Control: max-stale 

95# Does not use Age: headers when calculating cache freshness. 

96 

97# The number of redirections to follow before giving up. 

98# Note that only GET redirects are automatically followed. 

99# Will also honor 301 requests by saving that info and never 

100# requesting that URI again. 

101DEFAULT_MAX_REDIRECTS = 5 

102 

103# Which headers are hop-by-hop headers by default 

104HOP_BY_HOP = [ 

105 "connection", 

106 "keep-alive", 

107 "proxy-authenticate", 

108 "proxy-authorization", 

109 "te", 

110 "trailers", 

111 "transfer-encoding", 

112 "upgrade", 

113] 

114 

115# https://tools.ietf.org/html/rfc7231#section-8.1.3 

116SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE") 

117 

118# To change, assign to `Http().redirect_codes` 

119REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308)) 

120 

121 

122from httplib2 import certs 

123 

124CA_CERTS = certs.where() 

125 

126# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated. 

127# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means: 

128# > Selects the highest protocol version that both the client and server support. 

129# > Despite the name, this option can select “TLS” protocols as well as “SSL”. 

130# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23 

131 

132# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated. 

133# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections. 

134# > The protocol enables CERT_REQUIRED and check_hostname by default. 

135# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS 

136 

137DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23") 

138 

139 

140def _build_ssl_context( 

141 disable_ssl_certificate_validation, 

142 ca_certs, 

143 cert_file=None, 

144 key_file=None, 

145 maximum_version=None, 

146 minimum_version=None, 

147 key_password=None, 

148): 

149 if not hasattr(ssl, "SSLContext"): 

150 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext") 

151 

152 context = ssl.SSLContext(DEFAULT_TLS_VERSION) 

153 # check_hostname and verify_mode should be set in opposite order during disable 

154 # https://bugs.python.org/issue31431 

155 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"): 

156 context.check_hostname = not disable_ssl_certificate_validation 

157 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED 

158 

159 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+. 

160 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version 

161 if maximum_version is not None: 

162 if hasattr(context, "maximum_version"): 

163 if isinstance(maximum_version, str): 

164 maximum_version = getattr(ssl.TLSVersion, maximum_version) 

165 context.maximum_version = maximum_version 

166 else: 

167 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer") 

168 if minimum_version is not None: 

169 if hasattr(context, "minimum_version"): 

170 if isinstance(minimum_version, str): 

171 minimum_version = getattr(ssl.TLSVersion, minimum_version) 

172 context.minimum_version = minimum_version 

173 else: 

174 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer") 

175 # check_hostname requires python 3.4+ 

176 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname 

177 # if check_hostname is not supported. 

178 if hasattr(context, "check_hostname"): 

179 context.check_hostname = not disable_ssl_certificate_validation 

180 

181 context.load_verify_locations(ca_certs) 

182 

183 if cert_file: 

184 context.load_cert_chain(cert_file, key_file, key_password) 

185 

186 return context 

187 

188 

189def _get_end2end_headers(response): 

190 hopbyhop = list(HOP_BY_HOP) 

191 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")]) 

192 return [header for header in list(response.keys()) if header not in hopbyhop] 

193 

194 

195_missing = object() 

196 

197 

198def _errno_from_exception(e): 

199 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass 

200 errno = getattr(e, "errno", _missing) 

201 if errno is not _missing: 

202 return errno 

203 

204 # socket.error and common wrap in .args 

205 args = getattr(e, "args", None) 

206 if args: 

207 return _errno_from_exception(args[0]) 

208 

209 # pysocks.ProxyError wraps in .socket_err 

210 # https://github.com/httplib2/httplib2/pull/202 

211 socket_err = getattr(e, "socket_err", None) 

212 if socket_err: 

213 return _errno_from_exception(socket_err) 

214 

215 return None 

216 

217 

218URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 

219 

220 

221def parse_uri(uri): 

222 """Parses a URI using the regex given in Appendix B of RFC 3986. 

223 

224 (scheme, authority, path, query, fragment) = parse_uri(uri) 

225 """ 

226 groups = URI.match(uri).groups() 

227 return (groups[1], groups[3], groups[4], groups[6], groups[8]) 

228 

229 

230def urlnorm(uri): 

231 (scheme, authority, path, query, fragment) = parse_uri(uri) 

232 if not scheme or not authority: 

233 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 

234 authority = authority.lower() 

235 scheme = scheme.lower() 

236 if not path: 

237 path = "/" 

238 # Could do syntax based normalization of the URI before 

239 # computing the digest. See Section 6.2.2 of Std 66. 

240 request_uri = query and "?".join([path, query]) or path 

241 scheme = scheme.lower() 

242 defrag_uri = scheme + "://" + authority + request_uri 

243 return scheme, authority, request_uri, defrag_uri 

244 

245 

246# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 

247re_url_scheme = re.compile(r"^\w+://") 

248re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII) 

249 

250 

251def safename(filename): 

252 """Return a filename suitable for the cache. 

253 Strips dangerous and common characters to create a filename we 

254 can use to store the cache in. 

255 """ 

256 if isinstance(filename, bytes): 

257 filename_bytes = filename 

258 filename = filename.decode("utf-8") 

259 else: 

260 filename_bytes = filename.encode("utf-8") 

261 filemd5 = _md5(filename_bytes).hexdigest() 

262 filename = re_url_scheme.sub("", filename) 

263 filename = re_unsafe.sub("", filename) 

264 

265 # limit length of filename (vital for Windows) 

266 # https://github.com/httplib2/httplib2/pull/74 

267 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5> 

268 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars 

269 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum: 

270 filename = filename[:90] 

271 

272 return ",".join((filename, filemd5)) 

273 

274 

275NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+") 

276 

277 

278def _normalize_headers(headers): 

279 return dict( 

280 [ 

281 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),) 

282 for (key, value) in headers.items() 

283 ] 

284 ) 

285 

286 

287def _convert_byte_str(s): 

288 if not isinstance(s, str): 

289 return str(s, "utf-8") 

290 return s 

291 

292 

293def _parse_cache_control(headers): 

294 retval = {} 

295 if "cache-control" in headers: 

296 parts = headers["cache-control"].split(",") 

297 parts_with_args = [ 

298 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=") 

299 ] 

300 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")] 

301 retval = dict(parts_with_args + parts_wo_args) 

302 return retval 

303 

304 

305# Whether to use a strict mode to parse WWW-Authenticate headers 

306# Might lead to bad results in case of ill-formed header value, 

307# so disabled by default, falling back to relaxed parsing. 

308# Set to true to turn on, useful for testing servers. 

309USE_WWW_AUTH_STRICT_PARSING = 0 

310 

311 

312def _entry_disposition(response_headers, request_headers): 

313 """Determine freshness from the Date, Expires and Cache-Control headers. 

314 

315 We don't handle the following: 

316 

317 1. Cache-Control: max-stale 

318 2. Age: headers are not used in the calculations. 

319 

320 Not that this algorithm is simpler than you might think 

321 because we are operating as a private (non-shared) cache. 

322 This lets us ignore 's-maxage'. We can also ignore 

323 'proxy-invalidate' since we aren't a proxy. 

324 We will never return a stale document as 

325 fresh as a design decision, and thus the non-implementation 

326 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 

327 since we operate as if every server has sent 'must-revalidate'. 

328 Since we are private we get to ignore both 'public' and 

329 'private' parameters. We also ignore 'no-transform' since 

330 we don't do any transformations. 

331 The 'no-store' parameter is handled at a higher level. 

332 So the only Cache-Control parameters we look at are: 

333 

334 no-cache 

335 only-if-cached 

336 max-age 

337 min-fresh 

338 """ 

339 

340 retval = "STALE" 

341 cc = _parse_cache_control(request_headers) 

342 cc_response = _parse_cache_control(response_headers) 

343 

344 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1: 

345 retval = "TRANSPARENT" 

346 if "cache-control" not in request_headers: 

347 request_headers["cache-control"] = "no-cache" 

348 elif "no-cache" in cc: 

349 retval = "TRANSPARENT" 

350 elif "no-cache" in cc_response: 

351 retval = "STALE" 

352 elif "only-if-cached" in cc: 

353 retval = "FRESH" 

354 elif "date" in response_headers: 

355 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"])) 

356 now = time.time() 

357 current_age = max(0, now - date) 

358 if "max-age" in cc_response: 

359 try: 

360 freshness_lifetime = int(cc_response["max-age"]) 

361 except ValueError: 

362 freshness_lifetime = 0 

363 elif "expires" in response_headers: 

364 expires = email.utils.parsedate_tz(response_headers["expires"]) 

365 if None == expires: 

366 freshness_lifetime = 0 

367 else: 

368 freshness_lifetime = max(0, calendar.timegm(expires) - date) 

369 else: 

370 freshness_lifetime = 0 

371 if "max-age" in cc: 

372 try: 

373 freshness_lifetime = int(cc["max-age"]) 

374 except ValueError: 

375 freshness_lifetime = 0 

376 if "min-fresh" in cc: 

377 try: 

378 min_fresh = int(cc["min-fresh"]) 

379 except ValueError: 

380 min_fresh = 0 

381 current_age += min_fresh 

382 if freshness_lifetime > current_age: 

383 retval = "FRESH" 

384 return retval 

385 

386 

387def _decompressContent(response, new_content): 

388 content = new_content 

389 try: 

390 encoding = response.get("content-encoding", None) 

391 if encoding in ["gzip", "deflate"]: 

392 if encoding == "gzip": 

393 content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read() 

394 if encoding == "deflate": 

395 try: 

396 content = zlib.decompress(content, zlib.MAX_WBITS) 

397 except (IOError, zlib.error): 

398 content = zlib.decompress(content, -zlib.MAX_WBITS) 

399 response["content-length"] = str(len(content)) 

400 # Record the historical presence of the encoding in a way the won't interfere. 

401 response["-content-encoding"] = response["content-encoding"] 

402 del response["content-encoding"] 

403 except (IOError, zlib.error): 

404 content = "" 

405 raise FailedToDecompressContent( 

406 _("Content purported to be compressed with %s but failed to decompress.") % response.get("content-encoding"), 

407 response, 

408 content, 

409 ) 

410 return content 

411 

412 

413def _bind_write_headers(msg): 

414 def _write_headers(self): 

415 # Self refers to the Generator object. 

416 for h, v in msg.items(): 

417 print("%s:" % h, end=" ", file=self._fp) 

418 if isinstance(v, header.Header): 

419 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp) 

420 else: 

421 # email.Header got lots of smarts, so use it. 

422 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h) 

423 print(headers.encode(), file=self._fp) 

424 # A blank line always separates headers from body. 

425 print(file=self._fp) 

426 

427 return _write_headers 

428 

429 

430def _updateCache(request_headers, response_headers, content, cache, cachekey): 

431 if cachekey: 

432 cc = _parse_cache_control(request_headers) 

433 cc_response = _parse_cache_control(response_headers) 

434 if "no-store" in cc or "no-store" in cc_response: 

435 cache.delete(cachekey) 

436 else: 

437 info = email.message.Message() 

438 for key, value in response_headers.items(): 

439 if key not in ["status", "content-encoding", "transfer-encoding"]: 

440 info[key] = value 

441 

442 # Add annotations to the cache to indicate what headers 

443 # are variant for this request. 

444 vary = response_headers.get("vary", None) 

445 if vary: 

446 vary_headers = vary.lower().replace(" ", "").split(",") 

447 for header in vary_headers: 

448 key = "-varied-%s" % header 

449 try: 

450 info[key] = request_headers[header] 

451 except KeyError: 

452 pass 

453 

454 status = response_headers.status 

455 if status == 304: 

456 status = 200 

457 

458 status_header = "status: %d\r\n" % status 

459 

460 try: 

461 header_str = info.as_string() 

462 except UnicodeEncodeError: 

463 setattr(info, "_write_headers", _bind_write_headers(info)) 

464 header_str = info.as_string() 

465 

466 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 

467 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content]) 

468 

469 cache.set(cachekey, text) 

470 

471 

472def _cnonce(): 

473 dig = _md5( 

474 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8") 

475 ).hexdigest() 

476 return dig[:16] 

477 

478 

479def _wsse_username_token(cnonce, iso_now, password): 

480 return ( 

481 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8") 

482 ) 

483 

484 

485# For credentials we need two things, first 

486# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 

487# Then we also need a list of URIs that have already demanded authentication 

488# That list is tricky since sub-URIs can take the same auth, or the 

489# auth scheme may change as you descend the tree. 

490# So we also need each Auth instance to be able to tell us 

491# how close to the 'top' it is. 

492 

493 

494class Authentication(object): 

495 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

496 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

497 self.path = path 

498 self.host = host 

499 self.credentials = credentials 

500 self.http = http 

501 

502 def depth(self, request_uri): 

503 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

504 return request_uri[len(self.path) :].count("/") 

505 

506 def inscope(self, host, request_uri): 

507 # XXX Should we normalize the request_uri? 

508 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

509 return (host == self.host) and path.startswith(self.path) 

510 

511 def request(self, method, request_uri, headers, content): 

512 """Modify the request headers to add the appropriate 

513 Authorization header. Over-rise this in sub-classes.""" 

514 pass 

515 

516 def response(self, response, content): 

517 """Gives us a chance to update with new nonces 

518 or such returned from the last authorized response. 

519 Over-rise this in sub-classes if necessary. 

520 

521 Return TRUE is the request is to be retried, for 

522 example Digest may return stale=true. 

523 """ 

524 return False 

525 

526 def __eq__(self, auth): 

527 return False 

528 

529 def __ne__(self, auth): 

530 return True 

531 

532 def __lt__(self, auth): 

533 return True 

534 

535 def __gt__(self, auth): 

536 return False 

537 

538 def __le__(self, auth): 

539 return True 

540 

541 def __ge__(self, auth): 

542 return False 

543 

544 def __bool__(self): 

545 return True 

546 

547 

548class BasicAuthentication(Authentication): 

549 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

550 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

551 

552 def request(self, method, request_uri, headers, content): 

553 """Modify the request headers to add the appropriate 

554 Authorization header.""" 

555 headers["authorization"] = "Basic " + base64.b64encode( 

556 ("%s:%s" % self.credentials).encode("utf-8") 

557 ).strip().decode("utf-8") 

558 

559 

560class DigestAuthentication(Authentication): 

561 """Only do qop='auth' and MD5, since that 

562 is all Apache currently implements""" 

563 

564 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

565 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

566 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"] 

567 qop = self.challenge.get("qop", "auth") 

568 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None 

569 if self.challenge["qop"] is None: 

570 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop)) 

571 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper() 

572 if self.challenge["algorithm"] != "MD5": 

573 raise UnimplementedDigestAuthOptionError( 

574 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 

575 ) 

576 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],]) 

577 self.challenge["nc"] = 1 

578 

579 def request(self, method, request_uri, headers, content, cnonce=None): 

580 """Modify the request headers""" 

581 H = lambda x: _md5(x.encode("utf-8")).hexdigest() 

582 KD = lambda s, d: H("%s:%s" % (s, d)) 

583 A2 = "".join([method, ":", request_uri]) 

584 self.challenge["cnonce"] = cnonce or _cnonce() 

585 request_digest = '"%s"' % KD( 

586 H(self.A1), 

587 "%s:%s:%s:%s:%s" 

588 % ( 

589 self.challenge["nonce"], 

590 "%08x" % self.challenge["nc"], 

591 self.challenge["cnonce"], 

592 self.challenge["qop"], 

593 H(A2), 

594 ), 

595 ) 

596 headers["authorization"] = ( 

597 'Digest username="%s", realm="%s", nonce="%s", ' 

598 'uri="%s", algorithm=%s, response=%s, qop=%s, ' 

599 'nc=%08x, cnonce="%s"' 

600 ) % ( 

601 self.credentials[0], 

602 self.challenge["realm"], 

603 self.challenge["nonce"], 

604 request_uri, 

605 self.challenge["algorithm"], 

606 request_digest, 

607 self.challenge["qop"], 

608 self.challenge["nc"], 

609 self.challenge["cnonce"], 

610 ) 

611 if self.challenge.get("opaque"): 

612 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"] 

613 self.challenge["nc"] += 1 

614 

615 def response(self, response, content): 

616 if "authentication-info" not in response: 

617 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {}) 

618 if "true" == challenge.get("stale"): 

619 self.challenge["nonce"] = challenge["nonce"] 

620 self.challenge["nc"] = 1 

621 return True 

622 else: 

623 updated_challenge = auth._parse_authentication_info(response, "authentication-info") 

624 

625 if "nextnonce" in updated_challenge: 

626 self.challenge["nonce"] = updated_challenge["nextnonce"] 

627 self.challenge["nc"] = 1 

628 return False 

629 

630 

631class HmacDigestAuthentication(Authentication): 

632 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 

633 

634 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 

635 

636 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

637 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

638 challenge = auth._parse_www_authenticate(response, "www-authenticate") 

639 self.challenge = challenge["hmacdigest"] 

640 # TODO: self.challenge['domain'] 

641 self.challenge["reason"] = self.challenge.get("reason", "unauthorized") 

642 if self.challenge["reason"] not in ["unauthorized", "integrity"]: 

643 self.challenge["reason"] = "unauthorized" 

644 self.challenge["salt"] = self.challenge.get("salt", "") 

645 if not self.challenge.get("snonce"): 

646 raise UnimplementedHmacDigestAuthOptionError( 

647 _("The challenge doesn't contain a server nonce, or this one is empty.") 

648 ) 

649 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1") 

650 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]: 

651 raise UnimplementedHmacDigestAuthOptionError( 

652 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 

653 ) 

654 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1") 

655 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]: 

656 raise UnimplementedHmacDigestAuthOptionError( 

657 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"]) 

658 ) 

659 if self.challenge["algorithm"] == "HMAC-MD5": 

660 self.hashmod = _md5 

661 else: 

662 self.hashmod = _sha 

663 if self.challenge["pw-algorithm"] == "MD5": 

664 self.pwhashmod = _md5 

665 else: 

666 self.pwhashmod = _sha 

667 self.key = "".join( 

668 [ 

669 self.credentials[0], 

670 ":", 

671 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(), 

672 ":", 

673 self.challenge["realm"], 

674 ] 

675 ) 

676 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 

677 

678 def request(self, method, request_uri, headers, content): 

679 """Modify the request headers""" 

680 keys = _get_end2end_headers(headers) 

681 keylist = "".join(["%s " % k for k in keys]) 

682 headers_val = "".join([headers[k] for k in keys]) 

683 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 

684 cnonce = _cnonce() 

685 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,) 

686 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 

687 headers["authorization"] = ( 

688 'HMACDigest username="%s", realm="%s", snonce="%s",' 

689 ' cnonce="%s", uri="%s", created="%s", ' 

690 'response="%s", headers="%s"' 

691 ) % ( 

692 self.credentials[0], 

693 self.challenge["realm"], 

694 self.challenge["snonce"], 

695 cnonce, 

696 request_uri, 

697 created, 

698 request_digest, 

699 keylist, 

700 ) 

701 

702 def response(self, response, content): 

703 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {}) 

704 if challenge.get("reason") in ["integrity", "stale"]: 

705 return True 

706 return False 

707 

708 

709class WsseAuthentication(Authentication): 

710 """This is thinly tested and should not be relied upon. 

711 At this time there isn't any third party server to test against. 

712 Blogger and TypePad implemented this algorithm at one point 

713 but Blogger has since switched to Basic over HTTPS and 

714 TypePad has implemented it wrong, by never issuing a 401 

715 challenge but instead requiring your client to telepathically know that 

716 their endpoint is expecting WSSE profile="UsernameToken".""" 

717 

718 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

719 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

720 

721 def request(self, method, request_uri, headers, content): 

722 """Modify the request headers to add the appropriate 

723 Authorization header.""" 

724 headers["authorization"] = 'WSSE profile="UsernameToken"' 

725 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 

726 cnonce = _cnonce() 

727 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 

728 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % ( 

729 self.credentials[0], 

730 password_digest, 

731 cnonce, 

732 iso_now, 

733 ) 

734 

735 

736class GoogleLoginAuthentication(Authentication): 

737 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

738 from urllib.parse import urlencode 

739 

740 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

741 challenge = auth._parse_www_authenticate(response, "www-authenticate") 

742 service = challenge["googlelogin"].get("service", "xapi") 

743 # Bloggger actually returns the service in the challenge 

744 # For the rest we guess based on the URI 

745 if service == "xapi" and request_uri.find("calendar") > 0: 

746 service = "cl" 

747 # No point in guessing Base or Spreadsheet 

748 # elif request_uri.find("spreadsheets") > 0: 

749 # service = "wise" 

750 

751 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],) 

752 resp, content = self.http.request( 

753 "https://www.google.com/accounts/ClientLogin", 

754 method="POST", 

755 body=urlencode(auth), 

756 headers={"Content-Type": "application/x-www-form-urlencoded"}, 

757 ) 

758 lines = content.split("\n") 

759 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 

760 if resp.status == 403: 

761 self.Auth = "" 

762 else: 

763 self.Auth = d["Auth"] 

764 

765 def request(self, method, request_uri, headers, content): 

766 """Modify the request headers to add the appropriate 

767 Authorization header.""" 

768 headers["authorization"] = "GoogleLogin Auth=" + self.Auth 

769 

770 

771AUTH_SCHEME_CLASSES = { 

772 "basic": BasicAuthentication, 

773 "wsse": WsseAuthentication, 

774 "digest": DigestAuthentication, 

775 "hmacdigest": HmacDigestAuthentication, 

776 "googlelogin": GoogleLoginAuthentication, 

777} 

778 

779AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 

780 

781 

782class FileCache(object): 

783 """Uses a local directory as a store for cached files. 

784 Not really safe to use if multiple threads or processes are going to 

785 be running on the same cache. 

786 """ 

787 

788 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 

789 self.cache = cache 

790 self.safe = safe 

791 if not os.path.exists(cache): 

792 os.makedirs(self.cache) 

793 

794 def get(self, key): 

795 retval = None 

796 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

797 try: 

798 f = open(cacheFullPath, "rb") 

799 retval = f.read() 

800 f.close() 

801 except IOError: 

802 pass 

803 return retval 

804 

805 def set(self, key, value): 

806 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

807 f = open(cacheFullPath, "wb") 

808 f.write(value) 

809 f.close() 

810 

811 def delete(self, key): 

812 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

813 if os.path.exists(cacheFullPath): 

814 os.remove(cacheFullPath) 

815 

816 

817class Credentials(object): 

818 def __init__(self): 

819 self.credentials = [] 

820 

821 def add(self, name, password, domain=""): 

822 self.credentials.append((domain.lower(), name, password)) 

823 

824 def clear(self): 

825 self.credentials = [] 

826 

827 def iter(self, domain): 

828 for (cdomain, name, password) in self.credentials: 

829 if cdomain == "" or domain == cdomain: 

830 yield (name, password) 

831 

832 

833class KeyCerts(Credentials): 

834 """Identical to Credentials except that 

835 name/password are mapped to key/cert.""" 

836 

837 def add(self, key, cert, domain, password): 

838 self.credentials.append((domain.lower(), key, cert, password)) 

839 

840 def iter(self, domain): 

841 for (cdomain, key, cert, password) in self.credentials: 

842 if cdomain == "" or domain == cdomain: 

843 yield (key, cert, password) 

844 

845 

846class AllHosts(object): 

847 pass 

848 

849 

850class ProxyInfo(object): 

851 """Collect information required to use a proxy.""" 

852 

853 bypass_hosts = () 

854 

855 def __init__( 

856 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None, 

857 ): 

858 """Args: 

859 

860 proxy_type: The type of proxy server. This must be set to one of 

861 socks.PROXY_TYPE_XXX constants. For example: p = 

862 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', 

863 proxy_port=8000) 

864 proxy_host: The hostname or IP address of the proxy server. 

865 proxy_port: The port that the proxy server is running on. 

866 proxy_rdns: If True (default), DNS queries will not be performed 

867 locally, and instead, handed to the proxy to resolve. This is useful 

868 if the network does not allow resolution of non-local names. In 

869 httplib2 0.9 and earlier, this defaulted to False. 

870 proxy_user: The username used to authenticate with the proxy server. 

871 proxy_pass: The password used to authenticate with the proxy server. 

872 proxy_headers: Additional or modified headers for the proxy connect 

873 request. 

874 """ 

875 if isinstance(proxy_user, bytes): 

876 proxy_user = proxy_user.decode() 

877 if isinstance(proxy_pass, bytes): 

878 proxy_pass = proxy_pass.decode() 

879 ( 

880 self.proxy_type, 

881 self.proxy_host, 

882 self.proxy_port, 

883 self.proxy_rdns, 

884 self.proxy_user, 

885 self.proxy_pass, 

886 self.proxy_headers, 

887 ) = ( 

888 proxy_type, 

889 proxy_host, 

890 proxy_port, 

891 proxy_rdns, 

892 proxy_user, 

893 proxy_pass, 

894 proxy_headers, 

895 ) 

896 

897 def astuple(self): 

898 return ( 

899 self.proxy_type, 

900 self.proxy_host, 

901 self.proxy_port, 

902 self.proxy_rdns, 

903 self.proxy_user, 

904 self.proxy_pass, 

905 self.proxy_headers, 

906 ) 

907 

908 def isgood(self): 

909 return socks and (self.proxy_host != None) and (self.proxy_port != None) 

910 

911 def applies_to(self, hostname): 

912 return not self.bypass_host(hostname) 

913 

914 def bypass_host(self, hostname): 

915 """Has this host been excluded from the proxy config""" 

916 if self.bypass_hosts is AllHosts: 

917 return True 

918 

919 hostname = "." + hostname.lstrip(".") 

920 for skip_name in self.bypass_hosts: 

921 # *.suffix 

922 if skip_name.startswith(".") and hostname.endswith(skip_name): 

923 return True 

924 # exact match 

925 if hostname == "." + skip_name: 

926 return True 

927 return False 

928 

929 def __repr__(self): 

930 return ( 

931 "<ProxyInfo type={p.proxy_type} " 

932 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}" 

933 + " user={p.proxy_user} headers={p.proxy_headers}>" 

934 ).format(p=self) 

935 

936 

937def proxy_info_from_environment(method="http"): 

938 """Read proxy info from the environment variables. 

939 """ 

940 if method not in ("http", "https"): 

941 return 

942 

943 env_var = method + "_proxy" 

944 url = os.environ.get(env_var, os.environ.get(env_var.upper())) 

945 if not url: 

946 return 

947 return proxy_info_from_url(url, method, noproxy=None) 

948 

949 

950def proxy_info_from_url(url, method="http", noproxy=None): 

951 """Construct a ProxyInfo from a URL (such as http_proxy env var) 

952 """ 

953 url = urllib.parse.urlparse(url) 

954 

955 proxy_type = 3 # socks.PROXY_TYPE_HTTP 

956 pi = ProxyInfo( 

957 proxy_type=proxy_type, 

958 proxy_host=url.hostname, 

959 proxy_port=url.port or dict(https=443, http=80)[method], 

960 proxy_user=url.username or None, 

961 proxy_pass=url.password or None, 

962 proxy_headers=None, 

963 ) 

964 

965 bypass_hosts = [] 

966 # If not given an explicit noproxy value, respect values in env vars. 

967 if noproxy is None: 

968 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", "")) 

969 # Special case: A single '*' character means all hosts should be bypassed. 

970 if noproxy == "*": 

971 bypass_hosts = AllHosts 

972 elif noproxy.strip(): 

973 bypass_hosts = noproxy.split(",") 

974 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string. 

975 

976 pi.bypass_hosts = bypass_hosts 

977 return pi 

978 

979 

980class HTTPConnectionWithTimeout(http.client.HTTPConnection): 

981 """HTTPConnection subclass that supports timeouts 

982 

983 HTTPConnection subclass that supports timeouts 

984 

985 All timeouts are in seconds. If None is passed for timeout then 

986 Python's default timeout for sockets will be used. See for example 

987 the docs of socket.setdefaulttimeout(): 

988 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

989 """ 

990 

991 def __init__(self, host, port=None, timeout=None, proxy_info=None): 

992 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout) 

993 

994 self.proxy_info = proxy_info 

995 if proxy_info and not isinstance(proxy_info, ProxyInfo): 

996 self.proxy_info = proxy_info("http") 

997 

998 def connect(self): 

999 """Connect to the host and port specified in __init__.""" 

1000 if self.proxy_info and socks is None: 

1001 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!") 

1002 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host): 

1003 use_proxy = True 

1004 ( 

1005 proxy_type, 

1006 proxy_host, 

1007 proxy_port, 

1008 proxy_rdns, 

1009 proxy_user, 

1010 proxy_pass, 

1011 proxy_headers, 

1012 ) = self.proxy_info.astuple() 

1013 

1014 host = proxy_host 

1015 port = proxy_port 

1016 else: 

1017 use_proxy = False 

1018 

1019 host = self.host 

1020 port = self.port 

1021 proxy_type = None 

1022 

1023 socket_err = None 

1024 

1025 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 

1026 af, socktype, proto, canonname, sa = res 

1027 try: 

1028 if use_proxy: 

1029 self.sock = socks.socksocket(af, socktype, proto) 

1030 self.sock.setproxy( 

1031 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, 

1032 ) 

1033 else: 

1034 self.sock = socket.socket(af, socktype, proto) 

1035 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

1036 if has_timeout(self.timeout): 

1037 self.sock.settimeout(self.timeout) 

1038 if self.debuglevel > 0: 

1039 print("connect: ({0}, {1}) ************".format(self.host, self.port)) 

1040 if use_proxy: 

1041 print( 

1042 "proxy: {0} ************".format( 

1043 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1044 ) 

1045 ) 

1046 

1047 self.sock.connect((self.host, self.port) + sa[2:]) 

1048 except socket.error as e: 

1049 socket_err = e 

1050 if self.debuglevel > 0: 

1051 print("connect fail: ({0}, {1})".format(self.host, self.port)) 

1052 if use_proxy: 

1053 print( 

1054 "proxy: {0}".format( 

1055 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1056 ) 

1057 ) 

1058 if self.sock: 

1059 self.sock.close() 

1060 self.sock = None 

1061 continue 

1062 break 

1063 if not self.sock: 

1064 raise socket_err 

1065 

1066 

1067class HTTPSConnectionWithTimeout(http.client.HTTPSConnection): 

1068 """This class allows communication via SSL. 

1069 

1070 All timeouts are in seconds. If None is passed for timeout then 

1071 Python's default timeout for sockets will be used. See for example 

1072 the docs of socket.setdefaulttimeout(): 

1073 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

1074 """ 

1075 

1076 def __init__( 

1077 self, 

1078 host, 

1079 port=None, 

1080 key_file=None, 

1081 cert_file=None, 

1082 timeout=None, 

1083 proxy_info=None, 

1084 ca_certs=None, 

1085 disable_ssl_certificate_validation=False, 

1086 tls_maximum_version=None, 

1087 tls_minimum_version=None, 

1088 key_password=None, 

1089 ): 

1090 

1091 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 

1092 self.ca_certs = ca_certs if ca_certs else CA_CERTS 

1093 

1094 self.proxy_info = proxy_info 

1095 if proxy_info and not isinstance(proxy_info, ProxyInfo): 

1096 self.proxy_info = proxy_info("https") 

1097 

1098 context = _build_ssl_context( 

1099 self.disable_ssl_certificate_validation, 

1100 self.ca_certs, 

1101 cert_file, 

1102 key_file, 

1103 maximum_version=tls_maximum_version, 

1104 minimum_version=tls_minimum_version, 

1105 key_password=key_password, 

1106 ) 

1107 super(HTTPSConnectionWithTimeout, self).__init__( 

1108 host, port=port, timeout=timeout, context=context, 

1109 ) 

1110 self.key_file = key_file 

1111 self.cert_file = cert_file 

1112 self.key_password = key_password 

1113 

1114 def connect(self): 

1115 """Connect to a host on a given (SSL) port.""" 

1116 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host): 

1117 use_proxy = True 

1118 ( 

1119 proxy_type, 

1120 proxy_host, 

1121 proxy_port, 

1122 proxy_rdns, 

1123 proxy_user, 

1124 proxy_pass, 

1125 proxy_headers, 

1126 ) = self.proxy_info.astuple() 

1127 

1128 host = proxy_host 

1129 port = proxy_port 

1130 else: 

1131 use_proxy = False 

1132 

1133 host = self.host 

1134 port = self.port 

1135 proxy_type = None 

1136 proxy_headers = None 

1137 

1138 socket_err = None 

1139 

1140 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) 

1141 for family, socktype, proto, canonname, sockaddr in address_info: 

1142 try: 

1143 if use_proxy: 

1144 sock = socks.socksocket(family, socktype, proto) 

1145 

1146 sock.setproxy( 

1147 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, 

1148 ) 

1149 else: 

1150 sock = socket.socket(family, socktype, proto) 

1151 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

1152 if has_timeout(self.timeout): 

1153 sock.settimeout(self.timeout) 

1154 sock.connect((self.host, self.port)) 

1155 

1156 self.sock = self._context.wrap_socket(sock, server_hostname=self.host) 

1157 

1158 # Python 3.3 compatibility: emulate the check_hostname behavior 

1159 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation: 

1160 try: 

1161 ssl.match_hostname(self.sock.getpeercert(), self.host) 

1162 except Exception: 

1163 self.sock.shutdown(socket.SHUT_RDWR) 

1164 self.sock.close() 

1165 raise 

1166 

1167 if self.debuglevel > 0: 

1168 print("connect: ({0}, {1})".format(self.host, self.port)) 

1169 if use_proxy: 

1170 print( 

1171 "proxy: {0}".format( 

1172 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1173 ) 

1174 ) 

1175 except (ssl.SSLError, ssl.CertificateError) as e: 

1176 if sock: 

1177 sock.close() 

1178 if self.sock: 

1179 self.sock.close() 

1180 self.sock = None 

1181 raise 

1182 except (socket.timeout, socket.gaierror): 

1183 raise 

1184 except socket.error as e: 

1185 socket_err = e 

1186 if self.debuglevel > 0: 

1187 print("connect fail: ({0}, {1})".format(self.host, self.port)) 

1188 if use_proxy: 

1189 print( 

1190 "proxy: {0}".format( 

1191 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1192 ) 

1193 ) 

1194 if self.sock: 

1195 self.sock.close() 

1196 self.sock = None 

1197 continue 

1198 break 

1199 if not self.sock: 

1200 raise socket_err 

1201 

1202 

1203SCHEME_TO_CONNECTION = { 

1204 "http": HTTPConnectionWithTimeout, 

1205 "https": HTTPSConnectionWithTimeout, 

1206} 

1207 

1208 

1209class Http(object): 

1210 """An HTTP client that handles: 

1211 

1212 - all methods 

1213 - caching 

1214 - ETags 

1215 - compression, 

1216 - HTTPS 

1217 - Basic 

1218 - Digest 

1219 - WSSE 

1220 

1221 and more. 

1222 """ 

1223 

1224 def __init__( 

1225 self, 

1226 cache=None, 

1227 timeout=None, 

1228 proxy_info=proxy_info_from_environment, 

1229 ca_certs=None, 

1230 disable_ssl_certificate_validation=False, 

1231 tls_maximum_version=None, 

1232 tls_minimum_version=None, 

1233 ): 

1234 """If 'cache' is a string then it is used as a directory name for 

1235 a disk cache. Otherwise it must be an object that supports the 

1236 same interface as FileCache. 

1237 

1238 All timeouts are in seconds. If None is passed for timeout 

1239 then Python's default timeout for sockets will be used. See 

1240 for example the docs of socket.setdefaulttimeout(): 

1241 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

1242 

1243 `proxy_info` may be: 

1244 - a callable that takes the http scheme ('http' or 'https') and 

1245 returns a ProxyInfo instance per request. By default, uses 

1246 proxy_info_from_environment. 

1247 - a ProxyInfo instance (static proxy config). 

1248 - None (proxy disabled). 

1249 

1250 ca_certs is the path of a file containing root CA certificates for SSL 

1251 server certificate validation. By default, a CA cert file bundled with 

1252 httplib2 is used. 

1253 

1254 If disable_ssl_certificate_validation is true, SSL cert validation will 

1255 not be performed. 

1256 

1257 tls_maximum_version / tls_minimum_version require Python 3.7+ / 

1258 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+. 

1259 """ 

1260 self.proxy_info = proxy_info 

1261 self.ca_certs = ca_certs 

1262 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 

1263 self.tls_maximum_version = tls_maximum_version 

1264 self.tls_minimum_version = tls_minimum_version 

1265 # Map domain name to an httplib connection 

1266 self.connections = {} 

1267 # The location of the cache, for now a directory 

1268 # where cached responses are held. 

1269 if cache and isinstance(cache, str): 

1270 self.cache = FileCache(cache) 

1271 else: 

1272 self.cache = cache 

1273 

1274 # Name/password 

1275 self.credentials = Credentials() 

1276 

1277 # Key/cert 

1278 self.certificates = KeyCerts() 

1279 

1280 # authorization objects 

1281 self.authorizations = [] 

1282 

1283 # If set to False then no redirects are followed, even safe ones. 

1284 self.follow_redirects = True 

1285 

1286 self.redirect_codes = REDIRECT_CODES 

1287 

1288 # Which HTTP methods do we apply optimistic concurrency to, i.e. 

1289 # which methods get an "if-match:" etag header added to them. 

1290 self.optimistic_concurrency_methods = ["PUT", "PATCH"] 

1291 

1292 self.safe_methods = list(SAFE_METHODS) 

1293 

1294 # If 'follow_redirects' is True, and this is set to True then 

1295 # all redirecs are followed, including unsafe ones. 

1296 self.follow_all_redirects = False 

1297 

1298 self.ignore_etag = False 

1299 

1300 self.force_exception_to_status_code = False 

1301 

1302 self.timeout = timeout 

1303 

1304 # Keep Authorization: headers on a redirect. 

1305 self.forward_authorization_headers = False 

1306 

1307 def close(self): 

1308 """Close persistent connections, clear sensitive data. 

1309 Not thread-safe, requires external synchronization against concurrent requests. 

1310 """ 

1311 existing, self.connections = self.connections, {} 

1312 for _, c in existing.items(): 

1313 c.close() 

1314 self.certificates.clear() 

1315 self.clear_credentials() 

1316 

1317 def __getstate__(self): 

1318 state_dict = copy.copy(self.__dict__) 

1319 # In case request is augmented by some foreign object such as 

1320 # credentials which handle auth 

1321 if "request" in state_dict: 

1322 del state_dict["request"] 

1323 if "connections" in state_dict: 

1324 del state_dict["connections"] 

1325 return state_dict 

1326 

1327 def __setstate__(self, state): 

1328 self.__dict__.update(state) 

1329 self.connections = {} 

1330 

1331 def _auth_from_challenge(self, host, request_uri, headers, response, content): 

1332 """A generator that creates Authorization objects 

1333 that can be applied to requests. 

1334 """ 

1335 challenges = auth._parse_www_authenticate(response, "www-authenticate") 

1336 for cred in self.credentials.iter(host): 

1337 for scheme in AUTH_SCHEME_ORDER: 

1338 if scheme in challenges: 

1339 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) 

1340 

1341 def add_credentials(self, name, password, domain=""): 

1342 """Add a name and password that will be used 

1343 any time a request requires authentication.""" 

1344 self.credentials.add(name, password, domain) 

1345 

1346 def add_certificate(self, key, cert, domain, password=None): 

1347 """Add a key and cert that will be used 

1348 any time a request requires authentication.""" 

1349 self.certificates.add(key, cert, domain, password) 

1350 

1351 def clear_credentials(self): 

1352 """Remove all the names and passwords 

1353 that are used for authentication""" 

1354 self.credentials.clear() 

1355 self.authorizations = [] 

1356 

1357 def _conn_request(self, conn, request_uri, method, body, headers): 

1358 i = 0 

1359 seen_bad_status_line = False 

1360 while i < RETRIES: 

1361 i += 1 

1362 try: 

1363 if conn.sock is None: 

1364 conn.connect() 

1365 conn.request(method, request_uri, body, headers) 

1366 except socket.timeout: 

1367 conn.close() 

1368 raise 

1369 except socket.gaierror: 

1370 conn.close() 

1371 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 

1372 except socket.error as e: 

1373 errno_ = _errno_from_exception(e) 

1374 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES: 

1375 continue # retry on potentially transient errors 

1376 raise 

1377 except http.client.HTTPException: 

1378 if conn.sock is None: 

1379 if i < RETRIES - 1: 

1380 conn.close() 

1381 conn.connect() 

1382 continue 

1383 else: 

1384 conn.close() 

1385 raise 

1386 if i < RETRIES - 1: 

1387 conn.close() 

1388 conn.connect() 

1389 continue 

1390 # Just because the server closed the connection doesn't apparently mean 

1391 # that the server didn't send a response. 

1392 pass 

1393 try: 

1394 response = conn.getresponse() 

1395 except (http.client.BadStatusLine, http.client.ResponseNotReady): 

1396 # If we get a BadStatusLine on the first try then that means 

1397 # the connection just went stale, so retry regardless of the 

1398 # number of RETRIES set. 

1399 if not seen_bad_status_line and i == 1: 

1400 i = 0 

1401 seen_bad_status_line = True 

1402 conn.close() 

1403 conn.connect() 

1404 continue 

1405 else: 

1406 conn.close() 

1407 raise 

1408 except socket.timeout: 

1409 raise 

1410 except (socket.error, http.client.HTTPException): 

1411 conn.close() 

1412 if i == 0: 

1413 conn.close() 

1414 conn.connect() 

1415 continue 

1416 else: 

1417 raise 

1418 else: 

1419 content = b"" 

1420 if method == "HEAD": 

1421 conn.close() 

1422 else: 

1423 content = response.read() 

1424 response = Response(response) 

1425 if method != "HEAD": 

1426 content = _decompressContent(response, content) 

1427 

1428 break 

1429 return (response, content) 

1430 

1431 def _request( 

1432 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey, 

1433 ): 

1434 """Do the actual request using the connection object 

1435 and also follow one level of redirects if necessary""" 

1436 

1437 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] 

1438 auth = auths and sorted(auths)[0][1] or None 

1439 if auth: 

1440 auth.request(method, request_uri, headers, body) 

1441 

1442 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1443 

1444 if auth: 

1445 if auth.response(response, body): 

1446 auth.request(method, request_uri, headers, body) 

1447 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1448 response._stale_digest = 1 

1449 

1450 if response.status == 401: 

1451 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): 

1452 authorization.request(method, request_uri, headers, body) 

1453 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1454 if response.status != 401: 

1455 self.authorizations.append(authorization) 

1456 authorization.response(response, body) 

1457 break 

1458 

1459 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308): 

1460 if self.follow_redirects and response.status in self.redirect_codes: 

1461 # Pick out the location header and basically start from the beginning 

1462 # remembering first to strip the ETag header and decrement our 'depth' 

1463 if redirections: 

1464 if "location" not in response and response.status != 300: 

1465 raise RedirectMissingLocation( 

1466 _("Redirected but the response is missing a Location: header."), response, content, 

1467 ) 

1468 # Fix-up relative redirects (which violate an RFC 2616 MUST) 

1469 if "location" in response: 

1470 location = response["location"] 

1471 (scheme, authority, path, query, fragment) = parse_uri(location) 

1472 if authority == None: 

1473 response["location"] = urllib.parse.urljoin(absolute_uri, location) 

1474 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)): 

1475 response["-x-permanent-redirect-url"] = response["location"] 

1476 if "content-location" not in response: 

1477 response["content-location"] = absolute_uri 

1478 _updateCache(headers, response, content, self.cache, cachekey) 

1479 if "if-none-match" in headers: 

1480 del headers["if-none-match"] 

1481 if "if-modified-since" in headers: 

1482 del headers["if-modified-since"] 

1483 if "authorization" in headers and not self.forward_authorization_headers: 

1484 del headers["authorization"] 

1485 if "location" in response: 

1486 location = response["location"] 

1487 old_response = copy.deepcopy(response) 

1488 if "content-location" not in old_response: 

1489 old_response["content-location"] = absolute_uri 

1490 redirect_method = method 

1491 if response.status in [302, 303]: 

1492 redirect_method = "GET" 

1493 body = None 

1494 (response, content) = self.request( 

1495 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1, 

1496 ) 

1497 response.previous = old_response 

1498 else: 

1499 raise RedirectLimit( 

1500 "Redirected more times than redirection_limit allows.", response, content, 

1501 ) 

1502 elif response.status in [200, 203] and method in self.safe_methods: 

1503 # Don't cache 206's since we aren't going to handle byte range requests 

1504 if "content-location" not in response: 

1505 response["content-location"] = absolute_uri 

1506 _updateCache(headers, response, content, self.cache, cachekey) 

1507 

1508 return (response, content) 

1509 

1510 def _normalize_headers(self, headers): 

1511 return _normalize_headers(headers) 

1512 

1513 # Need to catch and rebrand some exceptions 

1514 # Then need to optionally turn all exceptions into status codes 

1515 # including all socket.* and httplib.* exceptions. 

1516 

1517 def request( 

1518 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None, 

1519 ): 

1520 """ Performs a single HTTP request. 

1521The 'uri' is the URI of the HTTP resource and can begin 

1522with either 'http' or 'https'. The value of 'uri' must be an absolute URI. 

1523 

1524The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. 

1525There is no restriction on the methods allowed. 

1526 

1527The 'body' is the entity body to be sent with the request. It is a string 

1528object. 

1529 

1530Any extra headers that are to be sent with the request should be provided in the 

1531'headers' dictionary. 

1532 

1533The maximum number of redirect to follow before raising an 

1534exception is 'redirections. The default is 5. 

1535 

1536The return value is a tuple of (response, content), the first 

1537being and instance of the 'Response' class, the second being 

1538a string that contains the response entity body. 

1539 """ 

1540 conn_key = "" 

1541 

1542 try: 

1543 if headers is None: 

1544 headers = {} 

1545 else: 

1546 headers = self._normalize_headers(headers) 

1547 

1548 if "user-agent" not in headers: 

1549 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__ 

1550 

1551 uri = iri2uri(uri) 

1552 # Prevent CWE-75 space injection to manipulate request via part of uri. 

1553 # Prevent CWE-93 CRLF injection to modify headers via part of uri. 

1554 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A") 

1555 

1556 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 

1557 

1558 conn_key = scheme + ":" + authority 

1559 conn = self.connections.get(conn_key) 

1560 if conn is None: 

1561 if not connection_type: 

1562 connection_type = SCHEME_TO_CONNECTION[scheme] 

1563 certs = list(self.certificates.iter(authority)) 

1564 if issubclass(connection_type, HTTPSConnectionWithTimeout): 

1565 if certs: 

1566 conn = self.connections[conn_key] = connection_type( 

1567 authority, 

1568 key_file=certs[0][0], 

1569 cert_file=certs[0][1], 

1570 timeout=self.timeout, 

1571 proxy_info=self.proxy_info, 

1572 ca_certs=self.ca_certs, 

1573 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 

1574 tls_maximum_version=self.tls_maximum_version, 

1575 tls_minimum_version=self.tls_minimum_version, 

1576 key_password=certs[0][2], 

1577 ) 

1578 else: 

1579 conn = self.connections[conn_key] = connection_type( 

1580 authority, 

1581 timeout=self.timeout, 

1582 proxy_info=self.proxy_info, 

1583 ca_certs=self.ca_certs, 

1584 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 

1585 tls_maximum_version=self.tls_maximum_version, 

1586 tls_minimum_version=self.tls_minimum_version, 

1587 ) 

1588 else: 

1589 conn = self.connections[conn_key] = connection_type( 

1590 authority, timeout=self.timeout, proxy_info=self.proxy_info 

1591 ) 

1592 conn.set_debuglevel(debuglevel) 

1593 

1594 if "range" not in headers and "accept-encoding" not in headers: 

1595 headers["accept-encoding"] = "gzip, deflate" 

1596 

1597 info = email.message.Message() 

1598 cachekey = None 

1599 cached_value = None 

1600 if self.cache: 

1601 cachekey = defrag_uri 

1602 cached_value = self.cache.get(cachekey) 

1603 if cached_value: 

1604 try: 

1605 info, content = cached_value.split(b"\r\n\r\n", 1) 

1606 info = email.message_from_bytes(info) 

1607 for k, v in info.items(): 

1608 if v.startswith("=?") and v.endswith("?="): 

1609 info.replace_header(k, str(*email.header.decode_header(v)[0])) 

1610 except (IndexError, ValueError): 

1611 self.cache.delete(cachekey) 

1612 cachekey = None 

1613 cached_value = None 

1614 

1615 if ( 

1616 method in self.optimistic_concurrency_methods 

1617 and self.cache 

1618 and "etag" in info 

1619 and not self.ignore_etag 

1620 and "if-match" not in headers 

1621 ): 

1622 # http://www.w3.org/1999/04/Editing/ 

1623 headers["if-match"] = info["etag"] 

1624 

1625 # https://tools.ietf.org/html/rfc7234 

1626 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location 

1627 # when a non-error status code is received in response to an unsafe request method. 

1628 if self.cache and cachekey and method not in self.safe_methods: 

1629 self.cache.delete(cachekey) 

1630 

1631 # Check the vary header in the cache to see if this request 

1632 # matches what varies in the cache. 

1633 if method in self.safe_methods and "vary" in info: 

1634 vary = info["vary"] 

1635 vary_headers = vary.lower().replace(" ", "").split(",") 

1636 for header in vary_headers: 

1637 key = "-varied-%s" % header 

1638 value = info[key] 

1639 if headers.get(header, None) != value: 

1640 cached_value = None 

1641 break 

1642 

1643 if ( 

1644 self.cache 

1645 and cached_value 

1646 and (method in self.safe_methods or info["status"] == "308") 

1647 and "range" not in headers 

1648 ): 

1649 redirect_method = method 

1650 if info["status"] not in ("307", "308"): 

1651 redirect_method = "GET" 

1652 if "-x-permanent-redirect-url" in info: 

1653 # Should cached permanent redirects be counted in our redirection count? For now, yes. 

1654 if redirections <= 0: 

1655 raise RedirectLimit( 

1656 "Redirected more times than redirection_limit allows.", {}, "", 

1657 ) 

1658 (response, new_content) = self.request( 

1659 info["-x-permanent-redirect-url"], 

1660 method=redirect_method, 

1661 headers=headers, 

1662 redirections=redirections - 1, 

1663 ) 

1664 response.previous = Response(info) 

1665 response.previous.fromcache = True 

1666 else: 

1667 # Determine our course of action: 

1668 # Is the cached entry fresh or stale? 

1669 # Has the client requested a non-cached response? 

1670 # 

1671 # There seems to be three possible answers: 

1672 # 1. [FRESH] Return the cache entry w/o doing a GET 

1673 # 2. [STALE] Do the GET (but add in cache validators if available) 

1674 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 

1675 entry_disposition = _entry_disposition(info, headers) 

1676 

1677 if entry_disposition == "FRESH": 

1678 response = Response(info) 

1679 response.fromcache = True 

1680 return (response, content) 

1681 

1682 if entry_disposition == "STALE": 

1683 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers: 

1684 headers["if-none-match"] = info["etag"] 

1685 if "last-modified" in info and not "last-modified" in headers: 

1686 headers["if-modified-since"] = info["last-modified"] 

1687 elif entry_disposition == "TRANSPARENT": 

1688 pass 

1689 

1690 (response, new_content) = self._request( 

1691 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey, 

1692 ) 

1693 

1694 if response.status == 304 and method == "GET": 

1695 # Rewrite the cache entry with the new end-to-end headers 

1696 # Take all headers that are in response 

1697 # and overwrite their values in info. 

1698 # unless they are hop-by-hop, or are listed in the connection header. 

1699 

1700 for key in _get_end2end_headers(response): 

1701 info[key] = response[key] 

1702 merged_response = Response(info) 

1703 if hasattr(response, "_stale_digest"): 

1704 merged_response._stale_digest = response._stale_digest 

1705 _updateCache(headers, merged_response, content, self.cache, cachekey) 

1706 response = merged_response 

1707 response.status = 200 

1708 response.fromcache = True 

1709 

1710 elif response.status == 200: 

1711 content = new_content 

1712 else: 

1713 self.cache.delete(cachekey) 

1714 content = new_content 

1715 else: 

1716 cc = _parse_cache_control(headers) 

1717 if "only-if-cached" in cc: 

1718 info["status"] = "504" 

1719 response = Response(info) 

1720 content = b"" 

1721 else: 

1722 (response, content) = self._request( 

1723 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey, 

1724 ) 

1725 except Exception as e: 

1726 is_timeout = isinstance(e, socket.timeout) 

1727 if is_timeout: 

1728 conn = self.connections.pop(conn_key, None) 

1729 if conn: 

1730 conn.close() 

1731 

1732 if self.force_exception_to_status_code: 

1733 if isinstance(e, HttpLib2ErrorWithResponse): 

1734 response = e.response 

1735 content = e.content 

1736 response.status = 500 

1737 response.reason = str(e) 

1738 elif isinstance(e, socket.timeout): 

1739 content = b"Request Timeout" 

1740 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),}) 

1741 response.reason = "Request Timeout" 

1742 else: 

1743 content = str(e).encode("utf-8") 

1744 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),}) 

1745 response.reason = "Bad Request" 

1746 else: 

1747 raise 

1748 

1749 return (response, content) 

1750 

1751 

1752class Response(dict): 

1753 """An object more like email.message than httplib.HTTPResponse.""" 

1754 

1755 """Is this response from our local cache""" 

1756 fromcache = False 

1757 """HTTP protocol version used by server. 

1758 

1759 10 for HTTP/1.0, 11 for HTTP/1.1. 

1760 """ 

1761 version = 11 

1762 

1763 "Status code returned by server. " 

1764 status = 200 

1765 """Reason phrase returned by server.""" 

1766 reason = "Ok" 

1767 

1768 previous = None 

1769 

1770 def __init__(self, info): 

1771 # info is either an email.message or 

1772 # an httplib.HTTPResponse object. 

1773 if isinstance(info, http.client.HTTPResponse): 

1774 for key, value in info.getheaders(): 

1775 key = key.lower() 

1776 prev = self.get(key) 

1777 if prev is not None: 

1778 value = ", ".join((prev, value)) 

1779 self[key] = value 

1780 self.status = info.status 

1781 self["status"] = str(self.status) 

1782 self.reason = info.reason 

1783 self.version = info.version 

1784 elif isinstance(info, email.message.Message): 

1785 for key, value in list(info.items()): 

1786 self[key.lower()] = value 

1787 self.status = int(self["status"]) 

1788 else: 

1789 for key, value in info.items(): 

1790 self[key.lower()] = value 

1791 self.status = int(self.get("status", self.status)) 

1792 

1793 def __getattr__(self, name): 

1794 if name == "dict": 

1795 return self 

1796 else: 

1797 raise AttributeError(name)