Coverage for /pythoncovmergedfiles/medio/medio/src/httplib2/httplib2/__init__.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

937 statements  

1# -*- coding: utf-8 -*- 

2"""Small, fast HTTP client library for Python.""" 

3 

4import functools 

5 

6from httplib2.decode import ZlibDecoder, DecoderProtocol, LimitDecoder, DeflateDecoder 

7 

8__author__ = "Joe Gregorio (joe@bitworking.org)" 

9__copyright__ = "Copyright 2006, Joe Gregorio" 

10__contributors__ = [ 

11 "Thomas Broyer (t.broyer@ltgt.net)", 

12 "James Antill", 

13 "Xavier Verges Farrero", 

14 "Jonathan Feinberg", 

15 "Blair Zajac", 

16 "Sam Ruby", 

17 "Louis Nyffenegger", 

18 "Mark Pilgrim", 

19 "Alex Yu", 

20 "Lai Han", 

21] 

22__license__ = "MIT" 

23__version__ = "0.32.0" 

24 

25import base64 

26import calendar 

27import copy 

28import email 

29import email.feedparser 

30from email import header 

31import email.message 

32import email.utils 

33import errno 

34from gettext import gettext as _ 

35import gzip 

36from hashlib import md5 as _md5 

37from hashlib import sha1 as _sha 

38import hmac 

39import http.client 

40import io 

41import os 

42import random 

43import re 

44import socket 

45import ssl 

46import sys 

47import time 

48import urllib.parse 

49import zlib 

50 

51try: 

52 import socks 

53except ImportError: 

54 socks = None 

55from . import auth 

56from .error import * 

57from .iri2uri import iri2uri 

58 

59 

60def has_timeout(timeout): 

61 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"): 

62 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT 

63 return timeout is not None 

64 

65 

66__all__ = [ 

67 "debuglevel", 

68 "FailedToDecompressContent", 

69 "Http", 

70 "HttpLib2Error", 

71 "ProxyInfo", 

72 "RedirectLimit", 

73 "RedirectMissingLocation", 

74 "Response", 

75 "RETRIES", 

76 "UnimplementedDigestAuthOptionError", 

77 "UnimplementedHmacDigestAuthOptionError", 

78] 

79 

80# The httplib debug level, set to a non-zero value to get debug output 

81debuglevel = 0 

82 

83# A request will be tried 'RETRIES' times if it fails at the socket/connection level. 

84RETRIES = 2 

85 

86 

87# Open Items: 

88# ----------- 

89 

90# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 

91 

92# Pluggable cache storage (supports storing the cache in 

93# flat files by default. We need a plug-in architecture 

94# that can support Berkeley DB and Squid) 

95 

96# == Known Issues == 

97# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 

98# Does not handle Cache-Control: max-stale 

99# Does not use Age: headers when calculating cache freshness. 

100 

101# The number of redirections to follow before giving up. 

102# Note that only GET redirects are automatically followed. 

103# Will also honor 301 requests by saving that info and never 

104# requesting that URI again. 

105DEFAULT_MAX_REDIRECTS = 5 

106 

107# Which headers are hop-by-hop headers by default 

108HOP_BY_HOP = [ 

109 "connection", 

110 "keep-alive", 

111 "proxy-authenticate", 

112 "proxy-authorization", 

113 "te", 

114 "trailers", 

115 "transfer-encoding", 

116 "upgrade", 

117] 

118 

119# https://tools.ietf.org/html/rfc7231#section-8.1.3 

120SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE") 

121 

122# To change, assign to `Http().redirect_codes` 

123REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308)) 

124 

125 

126from httplib2 import certs 

127 

128CA_CERTS = certs.where() 

129 

130# PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated. 

131# Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means: 

132# > Selects the highest protocol version that both the client and server support. 

133# > Despite the name, this option can select “TLS” protocols as well as “SSL”. 

134# source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23 

135 

136# PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated. 

137# > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections. 

138# > The protocol enables CERT_REQUIRED and check_hostname by default. 

139# source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS 

140 

141DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23") 

142 

143 

144def _build_ssl_context( 

145 disable_ssl_certificate_validation, 

146 ca_certs, 

147 cert_file=None, 

148 key_file=None, 

149 maximum_version=None, 

150 minimum_version=None, 

151 key_password=None, 

152): 

153 if not hasattr(ssl, "SSLContext"): 

154 raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext") 

155 

156 context = ssl.SSLContext(DEFAULT_TLS_VERSION) 

157 # check_hostname and verify_mode should be set in opposite order during disable 

158 # https://bugs.python.org/issue31431 

159 if disable_ssl_certificate_validation and hasattr(context, "check_hostname"): 

160 context.check_hostname = not disable_ssl_certificate_validation 

161 context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED 

162 

163 # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+. 

164 # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version 

165 if maximum_version is not None: 

166 if hasattr(context, "maximum_version"): 

167 if isinstance(maximum_version, str): 

168 maximum_version = getattr(ssl.TLSVersion, maximum_version) 

169 context.maximum_version = maximum_version 

170 else: 

171 raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer") 

172 if minimum_version is not None: 

173 if hasattr(context, "minimum_version"): 

174 if isinstance(minimum_version, str): 

175 minimum_version = getattr(ssl.TLSVersion, minimum_version) 

176 context.minimum_version = minimum_version 

177 else: 

178 raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer") 

179 # check_hostname requires python 3.4+ 

180 # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname 

181 # if check_hostname is not supported. 

182 if hasattr(context, "check_hostname"): 

183 context.check_hostname = not disable_ssl_certificate_validation 

184 

185 if not disable_ssl_certificate_validation: 

186 context.load_verify_locations(ca_certs) 

187 

188 if cert_file: 

189 context.load_cert_chain(cert_file, key_file, key_password) 

190 

191 return context 

192 

193 

194def _get_end2end_headers(response): 

195 hopbyhop = list(HOP_BY_HOP) 

196 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")]) 

197 return [header for header in list(response.keys()) if header not in hopbyhop] 

198 

199 

200_missing = object() 

201 

202 

203def _errno_from_exception(e): 

204 # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass 

205 errno = getattr(e, "errno", _missing) 

206 if errno is not _missing: 

207 return errno 

208 

209 # socket.error and common wrap in .args 

210 args = getattr(e, "args", None) 

211 if args: 

212 return _errno_from_exception(args[0]) 

213 

214 # pysocks.ProxyError wraps in .socket_err 

215 # https://github.com/httplib2/httplib2/pull/202 

216 socket_err = getattr(e, "socket_err", None) 

217 if socket_err: 

218 return _errno_from_exception(socket_err) 

219 

220 return None 

221 

222 

223URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 

224 

225 

226def parse_uri(uri): 

227 """Parses a URI using the regex given in Appendix B of RFC 3986. 

228 

229 (scheme, authority, path, query, fragment) = parse_uri(uri) 

230 """ 

231 groups = URI.match(uri).groups() 

232 return (groups[1], groups[3], groups[4], groups[6], groups[8]) 

233 

234 

235def urlnorm(uri): 

236 (scheme, authority, path, query, fragment) = parse_uri(uri) 

237 if not scheme or not authority: 

238 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 

239 authority = authority.lower() 

240 scheme = scheme.lower() 

241 if not path: 

242 path = "/" 

243 # Could do syntax based normalization of the URI before 

244 # computing the digest. See Section 6.2.2 of Std 66. 

245 request_uri = query and "?".join([path, query]) or path 

246 scheme = scheme.lower() 

247 defrag_uri = scheme + "://" + authority + request_uri 

248 return scheme, authority, request_uri, defrag_uri 

249 

250 

251# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 

252re_url_scheme = re.compile(r"^\w+://") 

253re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII) 

254 

255 

256def safename(filename): 

257 """Return a filename suitable for the cache. 

258 Strips dangerous and common characters to create a filename we 

259 can use to store the cache in. 

260 """ 

261 if isinstance(filename, bytes): 

262 filename_bytes = filename 

263 filename = filename.decode("utf-8") 

264 else: 

265 filename_bytes = filename.encode("utf-8") 

266 filemd5 = _md5(filename_bytes).hexdigest() 

267 filename = re_url_scheme.sub("", filename) 

268 filename = re_unsafe.sub("", filename) 

269 

270 # limit length of filename (vital for Windows) 

271 # https://github.com/httplib2/httplib2/pull/74 

272 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5> 

273 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars 

274 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum: 

275 filename = filename[:90] 

276 

277 return ",".join((filename, filemd5)) 

278 

279 

280NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+") 

281 

282 

283def _normalize_headers(headers): 

284 return dict( 

285 [ 

286 (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),) 

287 for (key, value) in headers.items() 

288 ] 

289 ) 

290 

291 

292def _convert_byte_str(s): 

293 if not isinstance(s, str): 

294 return str(s, "utf-8") 

295 return s 

296 

297 

298def _parse_cache_control(headers): 

299 retval = {} 

300 if "cache-control" in headers: 

301 parts = headers["cache-control"].split(",") 

302 parts_with_args = [ 

303 tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=") 

304 ] 

305 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")] 

306 retval = dict(parts_with_args + parts_wo_args) 

307 return retval 

308 

309 

310# Whether to use a strict mode to parse WWW-Authenticate headers 

311# Might lead to bad results in case of ill-formed header value, 

312# so disabled by default, falling back to relaxed parsing. 

313# Set to true to turn on, useful for testing servers. 

314USE_WWW_AUTH_STRICT_PARSING = 0 

315 

316 

317def _entry_disposition(response_headers, request_headers): 

318 """Determine freshness from the Date, Expires and Cache-Control headers. 

319 

320 We don't handle the following: 

321 

322 1. Cache-Control: max-stale 

323 2. Age: headers are not used in the calculations. 

324 

325 Not that this algorithm is simpler than you might think 

326 because we are operating as a private (non-shared) cache. 

327 This lets us ignore 's-maxage'. We can also ignore 

328 'proxy-invalidate' since we aren't a proxy. 

329 We will never return a stale document as 

330 fresh as a design decision, and thus the non-implementation 

331 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 

332 since we operate as if every server has sent 'must-revalidate'. 

333 Since we are private we get to ignore both 'public' and 

334 'private' parameters. We also ignore 'no-transform' since 

335 we don't do any transformations. 

336 The 'no-store' parameter is handled at a higher level. 

337 So the only Cache-Control parameters we look at are: 

338 

339 no-cache 

340 only-if-cached 

341 max-age 

342 min-fresh 

343 """ 

344 

345 retval = "STALE" 

346 cc = _parse_cache_control(request_headers) 

347 cc_response = _parse_cache_control(response_headers) 

348 

349 if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1: 

350 retval = "TRANSPARENT" 

351 if "cache-control" not in request_headers: 

352 request_headers["cache-control"] = "no-cache" 

353 elif "no-cache" in cc: 

354 retval = "TRANSPARENT" 

355 elif "no-cache" in cc_response: 

356 retval = "STALE" 

357 elif "only-if-cached" in cc: 

358 retval = "FRESH" 

359 elif "date" in response_headers: 

360 date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"])) 

361 now = time.time() 

362 current_age = max(0, now - date) 

363 if "max-age" in cc_response: 

364 try: 

365 freshness_lifetime = int(cc_response["max-age"]) 

366 except ValueError: 

367 freshness_lifetime = 0 

368 elif "expires" in response_headers: 

369 expires = email.utils.parsedate_tz(response_headers["expires"]) 

370 if None == expires: 

371 freshness_lifetime = 0 

372 else: 

373 freshness_lifetime = max(0, calendar.timegm(expires) - date) 

374 else: 

375 freshness_lifetime = 0 

376 if "max-age" in cc: 

377 try: 

378 freshness_lifetime = int(cc["max-age"]) 

379 except ValueError: 

380 freshness_lifetime = 0 

381 if "min-fresh" in cc: 

382 try: 

383 min_fresh = int(cc["min-fresh"]) 

384 except ValueError: 

385 min_fresh = 0 

386 current_age += min_fresh 

387 if freshness_lifetime > current_age: 

388 retval = "FRESH" 

389 return retval 

390 

391 

392def _decompressContent(response, new_content, limit_kwargs): 

393 content = new_content 

394 encoding_header = "content-encoding" 

395 encoding = response.get(encoding_header, None) 

396 limit_wrap = functools.partial(LimitDecoder, **limit_kwargs) 

397 try: 

398 if encoding in ["gzip", "deflate", "zlib"]: 

399 try: 

400 content = limit_wrap(ZlibDecoder()).consume_bytes(new_content, 0) 

401 except (IOError, zlib.error): 

402 content = limit_wrap(DeflateDecoder()).consume_bytes(new_content, 0) 

403 response["content-length"] = str(len(content)) 

404 # Record the historical presence of the encoding in a way the won't interfere. 

405 response["-content-encoding"] = response.pop(encoding_header) 

406 except (IOError, zlib.error): 

407 content = "" 

408 raise FailedToDecompressContent( 

409 _("Content purported to be compressed with %s but failed to decompress.") 

410 % encoding, 

411 response, 

412 content, 

413 ) 

414 return content 

415 

416 

417def _bind_write_headers(msg): 

418 def _write_headers(self): 

419 # Self refers to the Generator object. 

420 for h, v in msg.items(): 

421 print("%s:" % h, end=" ", file=self._fp) 

422 if isinstance(v, header.Header): 

423 print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp) 

424 else: 

425 # email.Header got lots of smarts, so use it. 

426 headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h) 

427 print(headers.encode(), file=self._fp) 

428 # A blank line always separates headers from body. 

429 print(file=self._fp) 

430 

431 return _write_headers 

432 

433 

434def _updateCache(request_headers, response_headers, content, cache, cachekey): 

435 if cachekey: 

436 cc = _parse_cache_control(request_headers) 

437 cc_response = _parse_cache_control(response_headers) 

438 if "no-store" in cc or "no-store" in cc_response: 

439 cache.delete(cachekey) 

440 else: 

441 info = email.message.Message() 

442 for key, value in response_headers.items(): 

443 if key not in ["status", "content-encoding", "transfer-encoding"]: 

444 info[key] = value 

445 

446 # Add annotations to the cache to indicate what headers 

447 # are variant for this request. 

448 vary = response_headers.get("vary", None) 

449 if vary: 

450 vary_headers = vary.lower().replace(" ", "").split(",") 

451 for header in vary_headers: 

452 key = "-varied-%s" % header 

453 try: 

454 info[key] = request_headers[header] 

455 except KeyError: 

456 pass 

457 

458 status = response_headers.status 

459 if status == 304: 

460 status = 200 

461 

462 status_header = "status: %d\r\n" % status 

463 

464 try: 

465 header_str = info.as_string() 

466 except UnicodeEncodeError: 

467 setattr(info, "_write_headers", _bind_write_headers(info)) 

468 header_str = info.as_string() 

469 

470 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 

471 text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content]) 

472 

473 cache.set(cachekey, text) 

474 

475 

476def _cnonce(): 

477 dig = _md5( 

478 ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8") 

479 ).hexdigest() 

480 return dig[:16] 

481 

482 

483def _wsse_username_token(cnonce, iso_now, password): 

484 return ( 

485 base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8") 

486 ) 

487 

488 

489# For credentials we need two things, first 

490# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 

491# Then we also need a list of URIs that have already demanded authentication 

492# That list is tricky since sub-URIs can take the same auth, or the 

493# auth scheme may change as you descend the tree. 

494# So we also need each Auth instance to be able to tell us 

495# how close to the 'top' it is. 

496 

497 

498class Authentication(object): 

499 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

500 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

501 self.path = path 

502 self.host = host 

503 self.credentials = credentials 

504 self.http = http 

505 

506 def depth(self, request_uri): 

507 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

508 return request_uri[len(self.path) :].count("/") 

509 

510 def inscope(self, host, request_uri): 

511 # XXX Should we normalize the request_uri? 

512 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 

513 return (host == self.host) and path.startswith(self.path) 

514 

515 def request(self, method, request_uri, headers, content): 

516 """Modify the request headers to add the appropriate 

517 Authorization header. Over-rise this in sub-classes.""" 

518 pass 

519 

520 def response(self, response, content): 

521 """Gives us a chance to update with new nonces 

522 or such returned from the last authorized response. 

523 Over-rise this in sub-classes if necessary. 

524 

525 Return TRUE is the request is to be retried, for 

526 example Digest may return stale=true. 

527 """ 

528 return False 

529 

530 def __eq__(self, auth): 

531 return False 

532 

533 def __ne__(self, auth): 

534 return True 

535 

536 def __lt__(self, auth): 

537 return True 

538 

539 def __gt__(self, auth): 

540 return False 

541 

542 def __le__(self, auth): 

543 return True 

544 

545 def __ge__(self, auth): 

546 return False 

547 

548 def __bool__(self): 

549 return True 

550 

551 

552class BasicAuthentication(Authentication): 

553 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

554 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

555 

556 def request(self, method, request_uri, headers, content): 

557 """Modify the request headers to add the appropriate 

558 Authorization header.""" 

559 headers["authorization"] = "Basic " + base64.b64encode( 

560 ("%s:%s" % self.credentials).encode("utf-8") 

561 ).strip().decode("utf-8") 

562 

563 

564class DigestAuthentication(Authentication): 

565 """Only do qop='auth' and MD5, since that 

566 is all Apache currently implements""" 

567 

568 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

569 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

570 self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"] 

571 qop = self.challenge.get("qop", "auth") 

572 self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None 

573 if self.challenge["qop"] is None: 

574 raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop)) 

575 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper() 

576 if self.challenge["algorithm"] != "MD5": 

577 raise UnimplementedDigestAuthOptionError( 

578 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 

579 ) 

580 self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],]) 

581 self.challenge["nc"] = 1 

582 

583 def request(self, method, request_uri, headers, content, cnonce=None): 

584 """Modify the request headers""" 

585 H = lambda x: _md5(x.encode("utf-8")).hexdigest() 

586 KD = lambda s, d: H("%s:%s" % (s, d)) 

587 A2 = "".join([method, ":", request_uri]) 

588 self.challenge["cnonce"] = cnonce or _cnonce() 

589 request_digest = '"%s"' % KD( 

590 H(self.A1), 

591 "%s:%s:%s:%s:%s" 

592 % ( 

593 self.challenge["nonce"], 

594 "%08x" % self.challenge["nc"], 

595 self.challenge["cnonce"], 

596 self.challenge["qop"], 

597 H(A2), 

598 ), 

599 ) 

600 headers["authorization"] = ( 

601 'Digest username="%s", realm="%s", nonce="%s", ' 

602 'uri="%s", algorithm=%s, response=%s, qop=%s, ' 

603 'nc=%08x, cnonce="%s"' 

604 ) % ( 

605 self.credentials[0], 

606 self.challenge["realm"], 

607 self.challenge["nonce"], 

608 request_uri, 

609 self.challenge["algorithm"], 

610 request_digest, 

611 self.challenge["qop"], 

612 self.challenge["nc"], 

613 self.challenge["cnonce"], 

614 ) 

615 if self.challenge.get("opaque"): 

616 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"] 

617 self.challenge["nc"] += 1 

618 

619 def response(self, response, content): 

620 if "authentication-info" not in response: 

621 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {}) 

622 if "true" == challenge.get("stale"): 

623 self.challenge["nonce"] = challenge["nonce"] 

624 self.challenge["nc"] = 1 

625 return True 

626 else: 

627 updated_challenge = auth._parse_authentication_info(response, "authentication-info") 

628 

629 if "nextnonce" in updated_challenge: 

630 self.challenge["nonce"] = updated_challenge["nextnonce"] 

631 self.challenge["nc"] = 1 

632 return False 

633 

634 

635class HmacDigestAuthentication(Authentication): 

636 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 

637 

638 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 

639 

640 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

641 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

642 challenge = auth._parse_www_authenticate(response, "www-authenticate") 

643 self.challenge = challenge["hmacdigest"] 

644 # TODO: self.challenge['domain'] 

645 self.challenge["reason"] = self.challenge.get("reason", "unauthorized") 

646 if self.challenge["reason"] not in ["unauthorized", "integrity"]: 

647 self.challenge["reason"] = "unauthorized" 

648 self.challenge["salt"] = self.challenge.get("salt", "") 

649 if not self.challenge.get("snonce"): 

650 raise UnimplementedHmacDigestAuthOptionError( 

651 _("The challenge doesn't contain a server nonce, or this one is empty.") 

652 ) 

653 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1") 

654 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]: 

655 raise UnimplementedHmacDigestAuthOptionError( 

656 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 

657 ) 

658 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1") 

659 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]: 

660 raise UnimplementedHmacDigestAuthOptionError( 

661 _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"]) 

662 ) 

663 if self.challenge["algorithm"] == "HMAC-MD5": 

664 self.hashmod = _md5 

665 else: 

666 self.hashmod = _sha 

667 if self.challenge["pw-algorithm"] == "MD5": 

668 self.pwhashmod = _md5 

669 else: 

670 self.pwhashmod = _sha 

671 self.key = "".join( 

672 [ 

673 self.credentials[0], 

674 ":", 

675 self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(), 

676 ":", 

677 self.challenge["realm"], 

678 ] 

679 ) 

680 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 

681 

682 def request(self, method, request_uri, headers, content): 

683 """Modify the request headers""" 

684 keys = _get_end2end_headers(headers) 

685 keylist = "".join(["%s " % k for k in keys]) 

686 headers_val = "".join([headers[k] for k in keys]) 

687 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 

688 cnonce = _cnonce() 

689 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,) 

690 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 

691 headers["authorization"] = ( 

692 'HMACDigest username="%s", realm="%s", snonce="%s",' 

693 ' cnonce="%s", uri="%s", created="%s", ' 

694 'response="%s", headers="%s"' 

695 ) % ( 

696 self.credentials[0], 

697 self.challenge["realm"], 

698 self.challenge["snonce"], 

699 cnonce, 

700 request_uri, 

701 created, 

702 request_digest, 

703 keylist, 

704 ) 

705 

706 def response(self, response, content): 

707 challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {}) 

708 if challenge.get("reason") in ["integrity", "stale"]: 

709 return True 

710 return False 

711 

712 

713class WsseAuthentication(Authentication): 

714 """This is thinly tested and should not be relied upon. 

715 At this time there isn't any third party server to test against. 

716 Blogger and TypePad implemented this algorithm at one point 

717 but Blogger has since switched to Basic over HTTPS and 

718 TypePad has implemented it wrong, by never issuing a 401 

719 challenge but instead requiring your client to telepathically know that 

720 their endpoint is expecting WSSE profile="UsernameToken".""" 

721 

722 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

723 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

724 

725 def request(self, method, request_uri, headers, content): 

726 """Modify the request headers to add the appropriate 

727 Authorization header.""" 

728 headers["authorization"] = 'WSSE profile="UsernameToken"' 

729 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 

730 cnonce = _cnonce() 

731 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 

732 headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % ( 

733 self.credentials[0], 

734 password_digest, 

735 cnonce, 

736 iso_now, 

737 ) 

738 

739 

740class GoogleLoginAuthentication(Authentication): 

741 def __init__(self, credentials, host, request_uri, headers, response, content, http): 

742 from urllib.parse import urlencode 

743 

744 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) 

745 challenge = auth._parse_www_authenticate(response, "www-authenticate") 

746 service = challenge["googlelogin"].get("service", "xapi") 

747 # Bloggger actually returns the service in the challenge 

748 # For the rest we guess based on the URI 

749 if service == "xapi" and request_uri.find("calendar") > 0: 

750 service = "cl" 

751 # No point in guessing Base or Spreadsheet 

752 # elif request_uri.find("spreadsheets") > 0: 

753 # service = "wise" 

754 

755 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],) 

756 resp, content = self.http.request( 

757 "https://www.google.com/accounts/ClientLogin", 

758 method="POST", 

759 body=urlencode(auth), 

760 headers={"Content-Type": "application/x-www-form-urlencoded"}, 

761 ) 

762 lines = content.split("\n") 

763 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 

764 if resp.status == 403: 

765 self.Auth = "" 

766 else: 

767 self.Auth = d["Auth"] 

768 

769 def request(self, method, request_uri, headers, content): 

770 """Modify the request headers to add the appropriate 

771 Authorization header.""" 

772 headers["authorization"] = "GoogleLogin Auth=" + self.Auth 

773 

774 

775AUTH_SCHEME_CLASSES = { 

776 "basic": BasicAuthentication, 

777 "wsse": WsseAuthentication, 

778 "digest": DigestAuthentication, 

779 "hmacdigest": HmacDigestAuthentication, 

780 "googlelogin": GoogleLoginAuthentication, 

781} 

782 

783AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 

784 

785 

786class FileCache(object): 

787 """Uses a local directory as a store for cached files. 

788 Not really safe to use if multiple threads or processes are going to 

789 be running on the same cache. 

790 """ 

791 

792 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 

793 self.cache = cache 

794 self.safe = safe 

795 if not os.path.exists(cache): 

796 os.makedirs(self.cache) 

797 

798 def get(self, key): 

799 retval = None 

800 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

801 try: 

802 f = open(cacheFullPath, "rb") 

803 retval = f.read() 

804 f.close() 

805 except IOError: 

806 pass 

807 return retval 

808 

809 def set(self, key, value): 

810 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

811 f = open(cacheFullPath, "wb") 

812 f.write(value) 

813 f.close() 

814 

815 def delete(self, key): 

816 cacheFullPath = os.path.join(self.cache, self.safe(key)) 

817 if os.path.exists(cacheFullPath): 

818 os.remove(cacheFullPath) 

819 

820 

821class Credentials(object): 

822 def __init__(self): 

823 self.credentials = [] 

824 

825 def add(self, name, password, domain=""): 

826 self.credentials.append((domain.lower(), name, password)) 

827 

828 def clear(self): 

829 self.credentials = [] 

830 

831 def iter(self, domain): 

832 for (cdomain, name, password) in self.credentials: 

833 if cdomain == "" or domain == cdomain: 

834 yield (name, password) 

835 

836 

837class KeyCerts(Credentials): 

838 """Identical to Credentials except that 

839 name/password are mapped to key/cert.""" 

840 

841 def add(self, key, cert, domain, password): 

842 self.credentials.append((domain.lower(), key, cert, password)) 

843 

844 def iter(self, domain): 

845 for (cdomain, key, cert, password) in self.credentials: 

846 if cdomain == "" or domain == cdomain: 

847 yield (key, cert, password) 

848 

849 

850class AllHosts(object): 

851 pass 

852 

853 

854class ProxyInfo(object): 

855 """Collect information required to use a proxy.""" 

856 

857 bypass_hosts = () 

858 

859 def __init__( 

860 self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None, 

861 ): 

862 """Args: 

863 

864 proxy_type: The type of proxy server. This must be set to one of 

865 socks.PROXY_TYPE_XXX constants. For example: p = 

866 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', 

867 proxy_port=8000) 

868 proxy_host: The hostname or IP address of the proxy server. 

869 proxy_port: The port that the proxy server is running on. 

870 proxy_rdns: If True (default), DNS queries will not be performed 

871 locally, and instead, handed to the proxy to resolve. This is useful 

872 if the network does not allow resolution of non-local names. In 

873 httplib2 0.9 and earlier, this defaulted to False. 

874 proxy_user: The username used to authenticate with the proxy server. 

875 proxy_pass: The password used to authenticate with the proxy server. 

876 proxy_headers: Additional or modified headers for the proxy connect 

877 request. 

878 """ 

879 if isinstance(proxy_user, bytes): 

880 proxy_user = proxy_user.decode() 

881 if isinstance(proxy_pass, bytes): 

882 proxy_pass = proxy_pass.decode() 

883 ( 

884 self.proxy_type, 

885 self.proxy_host, 

886 self.proxy_port, 

887 self.proxy_rdns, 

888 self.proxy_user, 

889 self.proxy_pass, 

890 self.proxy_headers, 

891 ) = ( 

892 proxy_type, 

893 proxy_host, 

894 proxy_port, 

895 proxy_rdns, 

896 proxy_user, 

897 proxy_pass, 

898 proxy_headers, 

899 ) 

900 

901 def astuple(self): 

902 return ( 

903 self.proxy_type, 

904 self.proxy_host, 

905 self.proxy_port, 

906 self.proxy_rdns, 

907 self.proxy_user, 

908 self.proxy_pass, 

909 self.proxy_headers, 

910 ) 

911 

912 def isgood(self): 

913 return socks and (self.proxy_host != None) and (self.proxy_port != None) 

914 

915 def applies_to(self, hostname): 

916 return not self.bypass_host(hostname) 

917 

918 def bypass_host(self, hostname): 

919 """Has this host been excluded from the proxy config""" 

920 if self.bypass_hosts is AllHosts: 

921 return True 

922 

923 hostname = "." + hostname.lstrip(".") 

924 for skip_name in self.bypass_hosts: 

925 # *.suffix 

926 if skip_name.startswith(".") and hostname.endswith(skip_name): 

927 return True 

928 # exact match 

929 if hostname == "." + skip_name: 

930 return True 

931 return False 

932 

933 def __repr__(self): 

934 return ( 

935 "<ProxyInfo type={p.proxy_type} " 

936 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}" 

937 + " user={p.proxy_user} headers={p.proxy_headers}>" 

938 ).format(p=self) 

939 

940 

941def proxy_info_from_environment(method="http"): 

942 """Read proxy info from the environment variables. 

943 """ 

944 if method not in ("http", "https"): 

945 return 

946 

947 env_var = method + "_proxy" 

948 url = os.environ.get(env_var, os.environ.get(env_var.upper())) 

949 if not url: 

950 return 

951 return proxy_info_from_url(url, method, noproxy=None) 

952 

953 

954def proxy_info_from_url(url, method="http", noproxy=None): 

955 """Construct a ProxyInfo from a URL (such as http_proxy env var) 

956 """ 

957 url = urllib.parse.urlparse(url) 

958 

959 proxy_type = 3 # socks.PROXY_TYPE_HTTP 

960 if url.scheme == "socks4": 

961 proxy_type = 1 # socks.PROXY_TYPE_SOCKS4 

962 elif url.scheme == "socks5" or url.scheme == "socks": 

963 proxy_type = 2 # socks.PROXY_TYPE_SOCKS5 

964 pi = ProxyInfo( 

965 proxy_type=proxy_type, 

966 proxy_host=url.hostname, 

967 proxy_port=url.port or dict(https=443, http=80)[method], 

968 proxy_user=url.username or None, 

969 proxy_pass=url.password or None, 

970 proxy_headers=None, 

971 ) 

972 

973 bypass_hosts = [] 

974 # If not given an explicit noproxy value, respect values in env vars. 

975 if noproxy is None: 

976 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", "")) 

977 # Special case: A single '*' character means all hosts should be bypassed. 

978 if noproxy == "*": 

979 bypass_hosts = AllHosts 

980 elif noproxy.strip(): 

981 bypass_hosts = noproxy.split(",") 

982 bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string. 

983 

984 pi.bypass_hosts = bypass_hosts 

985 return pi 

986 

987 

988class HTTPConnectionWithTimeout(http.client.HTTPConnection): 

989 """HTTPConnection subclass that supports timeouts 

990 

991 HTTPConnection subclass that supports timeouts 

992 

993 All timeouts are in seconds. If None is passed for timeout then 

994 Python's default timeout for sockets will be used. See for example 

995 the docs of socket.setdefaulttimeout(): 

996 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

997 """ 

998 

999 def __init__(self, host, port=None, timeout=None, proxy_info=None): 

1000 http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout) 

1001 

1002 self.proxy_info = proxy_info 

1003 if proxy_info and not isinstance(proxy_info, ProxyInfo): 

1004 self.proxy_info = proxy_info("http") 

1005 

1006 def connect(self): 

1007 """Connect to the host and port specified in __init__.""" 

1008 if self.proxy_info and socks is None: 

1009 raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!") 

1010 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host): 

1011 use_proxy = True 

1012 ( 

1013 proxy_type, 

1014 proxy_host, 

1015 proxy_port, 

1016 proxy_rdns, 

1017 proxy_user, 

1018 proxy_pass, 

1019 proxy_headers, 

1020 ) = self.proxy_info.astuple() 

1021 

1022 host = proxy_host 

1023 port = proxy_port 

1024 else: 

1025 use_proxy = False 

1026 

1027 host = self.host 

1028 port = self.port 

1029 proxy_type = None 

1030 

1031 socket_err = None 

1032 

1033 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 

1034 af, socktype, proto, canonname, sa = res 

1035 try: 

1036 if use_proxy: 

1037 self.sock = socks.socksocket(af, socktype, proto) 

1038 self.sock.setproxy( 

1039 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, 

1040 ) 

1041 else: 

1042 self.sock = socket.socket(af, socktype, proto) 

1043 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

1044 if has_timeout(self.timeout): 

1045 self.sock.settimeout(self.timeout) 

1046 if self.debuglevel > 0: 

1047 print("connect: ({0}, {1}) ************".format(self.host, self.port)) 

1048 if use_proxy: 

1049 print( 

1050 "proxy: {0} ************".format( 

1051 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1052 ) 

1053 ) 

1054 

1055 self.sock.connect((self.host, self.port) + sa[2:]) 

1056 except socket.error as e: 

1057 socket_err = e 

1058 if self.debuglevel > 0: 

1059 print("connect fail: ({0}, {1})".format(self.host, self.port)) 

1060 if use_proxy: 

1061 print( 

1062 "proxy: {0}".format( 

1063 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1064 ) 

1065 ) 

1066 if self.sock: 

1067 self.sock.close() 

1068 self.sock = None 

1069 continue 

1070 break 

1071 if not self.sock: 

1072 raise socket_err 

1073 

1074 

1075class HTTPSConnectionWithTimeout(http.client.HTTPSConnection): 

1076 """This class allows communication via SSL. 

1077 

1078 All timeouts are in seconds. If None is passed for timeout then 

1079 Python's default timeout for sockets will be used. See for example 

1080 the docs of socket.setdefaulttimeout(): 

1081 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

1082 """ 

1083 

1084 def __init__( 

1085 self, 

1086 host, 

1087 port=None, 

1088 key_file=None, 

1089 cert_file=None, 

1090 timeout=None, 

1091 proxy_info=None, 

1092 ca_certs=None, 

1093 disable_ssl_certificate_validation=False, 

1094 tls_maximum_version=None, 

1095 tls_minimum_version=None, 

1096 key_password=None, 

1097 ): 

1098 

1099 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 

1100 self.ca_certs = ca_certs if ca_certs else CA_CERTS 

1101 

1102 self.proxy_info = proxy_info 

1103 if proxy_info and not isinstance(proxy_info, ProxyInfo): 

1104 self.proxy_info = proxy_info("https") 

1105 

1106 context = _build_ssl_context( 

1107 self.disable_ssl_certificate_validation, 

1108 self.ca_certs, 

1109 cert_file, 

1110 key_file, 

1111 maximum_version=tls_maximum_version, 

1112 minimum_version=tls_minimum_version, 

1113 key_password=key_password, 

1114 ) 

1115 super(HTTPSConnectionWithTimeout, self).__init__( 

1116 host, port=port, timeout=timeout, context=context, 

1117 ) 

1118 self.key_file = key_file 

1119 self.cert_file = cert_file 

1120 self.key_password = key_password 

1121 

1122 def connect(self): 

1123 """Connect to a host on a given (SSL) port.""" 

1124 if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host): 

1125 use_proxy = True 

1126 ( 

1127 proxy_type, 

1128 proxy_host, 

1129 proxy_port, 

1130 proxy_rdns, 

1131 proxy_user, 

1132 proxy_pass, 

1133 proxy_headers, 

1134 ) = self.proxy_info.astuple() 

1135 

1136 host = proxy_host 

1137 port = proxy_port 

1138 else: 

1139 use_proxy = False 

1140 

1141 host = self.host 

1142 port = self.port 

1143 proxy_type = None 

1144 proxy_headers = None 

1145 

1146 socket_err = None 

1147 

1148 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) 

1149 for family, socktype, proto, canonname, sockaddr in address_info: 

1150 try: 

1151 if use_proxy: 

1152 sock = socks.socksocket(family, socktype, proto) 

1153 

1154 sock.setproxy( 

1155 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, 

1156 ) 

1157 else: 

1158 sock = socket.socket(family, socktype, proto) 

1159 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

1160 if has_timeout(self.timeout): 

1161 sock.settimeout(self.timeout) 

1162 sock.connect((self.host, self.port)) 

1163 

1164 self.sock = self._context.wrap_socket(sock, server_hostname=self.host) 

1165 

1166 # Python 3.3 compatibility: emulate the check_hostname behavior 

1167 if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation: 

1168 try: 

1169 ssl.match_hostname(self.sock.getpeercert(), self.host) 

1170 except Exception: 

1171 self.sock.shutdown(socket.SHUT_RDWR) 

1172 self.sock.close() 

1173 raise 

1174 

1175 if self.debuglevel > 0: 

1176 print("connect: ({0}, {1})".format(self.host, self.port)) 

1177 if use_proxy: 

1178 print( 

1179 "proxy: {0}".format( 

1180 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1181 ) 

1182 ) 

1183 except (ssl.SSLError, ssl.CertificateError) as e: 

1184 if sock: 

1185 sock.close() 

1186 if self.sock: 

1187 self.sock.close() 

1188 self.sock = None 

1189 raise 

1190 except (socket.timeout, socket.gaierror): 

1191 raise 

1192 except socket.error as e: 

1193 socket_err = e 

1194 if self.debuglevel > 0: 

1195 print("connect fail: ({0}, {1})".format(self.host, self.port)) 

1196 if use_proxy: 

1197 print( 

1198 "proxy: {0}".format( 

1199 str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,)) 

1200 ) 

1201 ) 

1202 if self.sock: 

1203 self.sock.close() 

1204 self.sock = None 

1205 continue 

1206 break 

1207 if not self.sock: 

1208 raise socket_err 

1209 

1210 

1211SCHEME_TO_CONNECTION = { 

1212 "http": HTTPConnectionWithTimeout, 

1213 "https": HTTPSConnectionWithTimeout, 

1214} 

1215 

1216 

1217class Http(object): 

1218 """An HTTP client that handles: 

1219 

1220 - all methods 

1221 - caching 

1222 - ETags 

1223 - compression, 

1224 - HTTPS 

1225 - Basic 

1226 - Digest 

1227 - WSSE 

1228 

1229 and more. 

1230 """ 

1231 

1232 def __init__( 

1233 self, 

1234 cache=None, 

1235 timeout=None, 

1236 proxy_info=proxy_info_from_environment, 

1237 ca_certs=None, 

1238 disable_ssl_certificate_validation=False, 

1239 tls_maximum_version=None, 

1240 tls_minimum_version=None, 

1241 decode_limit_hard=None, 

1242 decode_limit_safe=None, 

1243 decode_limit_ratio=None, 

1244 decode_limit_chunk=None, 

1245 ): 

1246 """If 'cache' is a string then it is used as a directory name for 

1247 a disk cache. Otherwise it must be an object that supports the 

1248 same interface as FileCache. 

1249 

1250 All timeouts are in seconds. If None is passed for timeout 

1251 then Python's default timeout for sockets will be used. See 

1252 for example the docs of socket.setdefaulttimeout(): 

1253 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 

1254 

1255 `proxy_info` may be: 

1256 - a callable that takes the http scheme ('http' or 'https') and 

1257 returns a ProxyInfo instance per request. By default, uses 

1258 proxy_info_from_environment. 

1259 - a ProxyInfo instance (static proxy config). 

1260 - None (proxy disabled). 

1261 

1262 ca_certs is the path of a file containing root CA certificates for SSL 

1263 server certificate validation. By default, a CA cert file bundled with 

1264 httplib2 is used. 

1265 

1266 If disable_ssl_certificate_validation is true, SSL cert validation will 

1267 not be performed. 

1268 

1269 tls_maximum_version / tls_minimum_version require Python 3.7+ / 

1270 OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+. 

1271 

1272 `decode_limit_{hard,safe,ratio,chunk}` options configure `httplib2.decode.LimitDecoder` in attempt order: 

1273 - Http() argument - top priority 

1274 - environment httplib2_decode_limit_{hard,safe,ratio,chunk} 

1275 - LimitDecoder defaults - least priority 

1276 """ 

1277 self.proxy_info = proxy_info 

1278 self.ca_certs = ca_certs 

1279 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 

1280 self.tls_maximum_version = tls_maximum_version 

1281 self.tls_minimum_version = tls_minimum_version 

1282 # Map domain name to an httplib connection 

1283 self.connections = {} 

1284 # The location of the cache, for now a directory 

1285 # where cached responses are held. 

1286 if cache and isinstance(cache, str): 

1287 self.cache = FileCache(cache) 

1288 else: 

1289 self.cache = cache 

1290 

1291 # Name/password 

1292 self.credentials = Credentials() 

1293 

1294 # Key/cert 

1295 self.certificates = KeyCerts() 

1296 

1297 # authorization objects 

1298 self.authorizations = [] 

1299 

1300 # If set to False then no redirects are followed, even safe ones. 

1301 self.follow_redirects = True 

1302 

1303 self.redirect_codes = REDIRECT_CODES 

1304 

1305 # Which HTTP methods do we apply optimistic concurrency to, i.e. 

1306 # which methods get an "if-match:" etag header added to them. 

1307 self.optimistic_concurrency_methods = ["PUT", "PATCH"] 

1308 

1309 self.safe_methods = list(SAFE_METHODS) 

1310 

1311 # If 'follow_redirects' is True, and this is set to True then 

1312 # all redirecs are followed, including unsafe ones. 

1313 self.follow_all_redirects = False 

1314 

1315 self.ignore_etag = False 

1316 

1317 self.force_exception_to_status_code = False 

1318 

1319 self.timeout = timeout 

1320 

1321 # Keep Authorization: headers on a redirect. 

1322 self.forward_authorization_headers = False 

1323 

1324 limit_kwargs = dict( 

1325 hard_limit=try_value_or_env( 

1326 int, decode_limit_hard, "httplib2_decode_limit_hard" 

1327 ), 

1328 safe_limit=try_value_or_env( 

1329 int, decode_limit_safe, "httplib2_decode_limit_safe" 

1330 ), 

1331 ratio=try_value_or_env( 

1332 float, decode_limit_ratio, "httplib2_decode_limit_ratio" 

1333 ), 

1334 chunk_size=try_value_or_env( 

1335 int, decode_limit_chunk, "httplib2_decode_limit_chunk" 

1336 ), 

1337 ) 

1338 self.limit_kwargs = {k: v for k, v in limit_kwargs.items() if v is not None} 

1339 

1340 def close(self): 

1341 """Close persistent connections, clear sensitive data. 

1342 Not thread-safe, requires external synchronization against concurrent requests. 

1343 """ 

1344 existing, self.connections = self.connections, {} 

1345 for _, c in existing.items(): 

1346 c.close() 

1347 self.certificates.clear() 

1348 self.clear_credentials() 

1349 

1350 def __getstate__(self): 

1351 state_dict = copy.copy(self.__dict__) 

1352 # In case request is augmented by some foreign object such as 

1353 # credentials which handle auth 

1354 if "request" in state_dict: 

1355 del state_dict["request"] 

1356 if "connections" in state_dict: 

1357 del state_dict["connections"] 

1358 return state_dict 

1359 

1360 def __setstate__(self, state): 

1361 self.__dict__.update(state) 

1362 self.connections = {} 

1363 

1364 def _auth_from_challenge(self, host, request_uri, headers, response, content): 

1365 """A generator that creates Authorization objects 

1366 that can be applied to requests. 

1367 """ 

1368 challenges = auth._parse_www_authenticate(response, "www-authenticate") 

1369 for cred in self.credentials.iter(host): 

1370 for scheme in AUTH_SCHEME_ORDER: 

1371 if scheme in challenges: 

1372 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) 

1373 

1374 def add_credentials(self, name, password, domain=""): 

1375 """Add a name and password that will be used 

1376 any time a request requires authentication.""" 

1377 self.credentials.add(name, password, domain) 

1378 

1379 def add_certificate(self, key, cert, domain, password=None): 

1380 """Add a key and cert that will be used 

1381 any time a request requires authentication.""" 

1382 self.certificates.add(key, cert, domain, password) 

1383 

1384 def clear_credentials(self): 

1385 """Remove all the names and passwords 

1386 that are used for authentication""" 

1387 self.credentials.clear() 

1388 self.authorizations = [] 

1389 

1390 def _conn_request(self, conn, request_uri, method, body, headers): 

1391 i = 0 

1392 seen_bad_status_line = False 

1393 while i < RETRIES: 

1394 i += 1 

1395 try: 

1396 if conn.sock is None: 

1397 conn.connect() 

1398 conn.request(method, request_uri, body, headers) 

1399 except socket.timeout: 

1400 conn.close() 

1401 raise 

1402 except socket.gaierror: 

1403 conn.close() 

1404 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 

1405 except socket.error as e: 

1406 errno_ = _errno_from_exception(e) 

1407 if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES: 

1408 continue # retry on potentially transient errors 

1409 raise 

1410 except http.client.HTTPException: 

1411 if conn.sock is None: 

1412 if i < RETRIES - 1: 

1413 conn.close() 

1414 conn.connect() 

1415 continue 

1416 else: 

1417 conn.close() 

1418 raise 

1419 if i < RETRIES - 1: 

1420 conn.close() 

1421 conn.connect() 

1422 continue 

1423 # Just because the server closed the connection doesn't apparently mean 

1424 # that the server didn't send a response. 

1425 pass 

1426 try: 

1427 response = conn.getresponse() 

1428 except (http.client.BadStatusLine, http.client.ResponseNotReady): 

1429 # If we get a BadStatusLine on the first try then that means 

1430 # the connection just went stale, so retry regardless of the 

1431 # number of RETRIES set. 

1432 if not seen_bad_status_line and i == 1: 

1433 i = 0 

1434 seen_bad_status_line = True 

1435 conn.close() 

1436 conn.connect() 

1437 continue 

1438 else: 

1439 conn.close() 

1440 raise 

1441 except socket.timeout: 

1442 raise 

1443 except (socket.error, http.client.HTTPException): 

1444 conn.close() 

1445 if i == 0: 

1446 conn.close() 

1447 conn.connect() 

1448 continue 

1449 else: 

1450 raise 

1451 else: 

1452 content = b"" 

1453 if method == "HEAD": 

1454 conn.close() 

1455 else: 

1456 content = response.read() 

1457 response = Response(response) 

1458 if method != "HEAD": 

1459 content = _decompressContent(response, content, self.limit_kwargs) 

1460 

1461 break 

1462 return (response, content) 

1463 

1464 def _request( 

1465 self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey, 

1466 ): 

1467 """Do the actual request using the connection object 

1468 and also follow one level of redirects if necessary""" 

1469 

1470 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] 

1471 auth = auths and sorted(auths)[0][1] or None 

1472 if auth: 

1473 auth.request(method, request_uri, headers, body) 

1474 

1475 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1476 

1477 if auth: 

1478 if auth.response(response, body): 

1479 auth.request(method, request_uri, headers, body) 

1480 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1481 response._stale_digest = 1 

1482 

1483 if response.status == 401: 

1484 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): 

1485 authorization.request(method, request_uri, headers, body) 

1486 (response, content) = self._conn_request(conn, request_uri, method, body, headers) 

1487 if response.status != 401: 

1488 self.authorizations.append(authorization) 

1489 authorization.response(response, body) 

1490 break 

1491 

1492 if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308): 

1493 if self.follow_redirects and response.status in self.redirect_codes: 

1494 # Pick out the location header and basically start from the beginning 

1495 # remembering first to strip the ETag header and decrement our 'depth' 

1496 if redirections: 

1497 if "location" not in response and response.status != 300: 

1498 raise RedirectMissingLocation( 

1499 _("Redirected but the response is missing a Location: header."), response, content, 

1500 ) 

1501 # Fix-up relative redirects (which violate an RFC 2616 MUST) 

1502 if "location" in response: 

1503 location = response["location"] 

1504 (scheme, authority, path, query, fragment) = parse_uri(location) 

1505 if authority == None: 

1506 response["location"] = urllib.parse.urljoin(absolute_uri, location) 

1507 if response.status == 308 or (response.status == 301 and (method in self.safe_methods)): 

1508 response["-x-permanent-redirect-url"] = response["location"] 

1509 if "content-location" not in response: 

1510 response["content-location"] = absolute_uri 

1511 _updateCache(headers, response, content, self.cache, cachekey) 

1512 if "if-none-match" in headers: 

1513 del headers["if-none-match"] 

1514 if "if-modified-since" in headers: 

1515 del headers["if-modified-since"] 

1516 if "authorization" in headers and not self.forward_authorization_headers: 

1517 del headers["authorization"] 

1518 if "location" in response: 

1519 location = response["location"] 

1520 old_response = copy.deepcopy(response) 

1521 if "content-location" not in old_response: 

1522 old_response["content-location"] = absolute_uri 

1523 redirect_method = method 

1524 if response.status in [302, 303]: 

1525 redirect_method = "GET" 

1526 body = None 

1527 (response, content) = self.request( 

1528 location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1, 

1529 ) 

1530 response.previous = old_response 

1531 else: 

1532 raise RedirectLimit( 

1533 "Redirected more times than redirection_limit allows.", response, content, 

1534 ) 

1535 elif response.status in [200, 203] and method in self.safe_methods: 

1536 # Don't cache 206's since we aren't going to handle byte range requests 

1537 if "content-location" not in response: 

1538 response["content-location"] = absolute_uri 

1539 _updateCache(headers, response, content, self.cache, cachekey) 

1540 

1541 return (response, content) 

1542 

1543 def _normalize_headers(self, headers): 

1544 return _normalize_headers(headers) 

1545 

1546 # Need to catch and rebrand some exceptions 

1547 # Then need to optionally turn all exceptions into status codes 

1548 # including all socket.* and httplib.* exceptions. 

1549 

1550 def request( 

1551 self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None, 

1552 ): 

1553 """ Performs a single HTTP request. 

1554The 'uri' is the URI of the HTTP resource and can begin 

1555with either 'http' or 'https'. The value of 'uri' must be an absolute URI. 

1556 

1557The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. 

1558There is no restriction on the methods allowed. 

1559 

1560The 'body' is the entity body to be sent with the request. It is a string 

1561object. 

1562 

1563Any extra headers that are to be sent with the request should be provided in the 

1564'headers' dictionary. 

1565 

1566The maximum number of redirect to follow before raising an 

1567exception is 'redirections. The default is 5. 

1568 

1569The return value is a tuple of (response, content), the first 

1570being and instance of the 'Response' class, the second being 

1571a string that contains the response entity body. 

1572 """ 

1573 conn_key = "" 

1574 

1575 try: 

1576 if headers is None: 

1577 headers = {} 

1578 else: 

1579 headers = self._normalize_headers(headers) 

1580 

1581 if "user-agent" not in headers: 

1582 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__ 

1583 

1584 uri = iri2uri(uri) 

1585 # Prevent CWE-75 space injection to manipulate request via part of uri. 

1586 # Prevent CWE-93 CRLF injection to modify headers via part of uri. 

1587 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A") 

1588 

1589 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 

1590 

1591 conn_key = scheme + ":" + authority 

1592 conn = self.connections.get(conn_key) 

1593 if conn is None: 

1594 if not connection_type: 

1595 connection_type = SCHEME_TO_CONNECTION[scheme] 

1596 certs = list(self.certificates.iter(authority)) 

1597 if issubclass(connection_type, HTTPSConnectionWithTimeout): 

1598 if certs: 

1599 conn = self.connections[conn_key] = connection_type( 

1600 authority, 

1601 key_file=certs[0][0], 

1602 cert_file=certs[0][1], 

1603 timeout=self.timeout, 

1604 proxy_info=self.proxy_info, 

1605 ca_certs=self.ca_certs, 

1606 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 

1607 tls_maximum_version=self.tls_maximum_version, 

1608 tls_minimum_version=self.tls_minimum_version, 

1609 key_password=certs[0][2], 

1610 ) 

1611 else: 

1612 conn = self.connections[conn_key] = connection_type( 

1613 authority, 

1614 timeout=self.timeout, 

1615 proxy_info=self.proxy_info, 

1616 ca_certs=self.ca_certs, 

1617 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 

1618 tls_maximum_version=self.tls_maximum_version, 

1619 tls_minimum_version=self.tls_minimum_version, 

1620 ) 

1621 else: 

1622 conn = self.connections[conn_key] = connection_type( 

1623 authority, timeout=self.timeout, proxy_info=self.proxy_info 

1624 ) 

1625 conn.set_debuglevel(debuglevel) 

1626 

1627 if "range" not in headers and "accept-encoding" not in headers: 

1628 headers["accept-encoding"] = "gzip, deflate" 

1629 

1630 info = email.message.Message() 

1631 cachekey = None 

1632 cached_value = None 

1633 if self.cache: 

1634 cachekey = defrag_uri 

1635 cached_value = self.cache.get(cachekey) 

1636 if cached_value: 

1637 try: 

1638 info, content = cached_value.split(b"\r\n\r\n", 1) 

1639 info = email.message_from_bytes(info) 

1640 for k, v in info.items(): 

1641 if v.startswith("=?") and v.endswith("?="): 

1642 info.replace_header(k, str(*email.header.decode_header(v)[0])) 

1643 except (IndexError, ValueError): 

1644 self.cache.delete(cachekey) 

1645 cachekey = None 

1646 cached_value = None 

1647 

1648 if ( 

1649 method in self.optimistic_concurrency_methods 

1650 and self.cache 

1651 and "etag" in info 

1652 and not self.ignore_etag 

1653 and "if-match" not in headers 

1654 ): 

1655 # http://www.w3.org/1999/04/Editing/ 

1656 headers["if-match"] = info["etag"] 

1657 

1658 # https://tools.ietf.org/html/rfc7234 

1659 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location 

1660 # when a non-error status code is received in response to an unsafe request method. 

1661 if self.cache and cachekey and method not in self.safe_methods: 

1662 self.cache.delete(cachekey) 

1663 

1664 # Check the vary header in the cache to see if this request 

1665 # matches what varies in the cache. 

1666 if method in self.safe_methods and "vary" in info: 

1667 vary = info["vary"] 

1668 vary_headers = vary.lower().replace(" ", "").split(",") 

1669 for header in vary_headers: 

1670 key = "-varied-%s" % header 

1671 value = info[key] 

1672 if headers.get(header, None) != value: 

1673 cached_value = None 

1674 break 

1675 

1676 if ( 

1677 self.cache 

1678 and cached_value 

1679 and (method in self.safe_methods or info["status"] == "308") 

1680 and "range" not in headers 

1681 ): 

1682 redirect_method = method 

1683 if info["status"] not in ("307", "308"): 

1684 redirect_method = "GET" 

1685 if "-x-permanent-redirect-url" in info: 

1686 # Should cached permanent redirects be counted in our redirection count? For now, yes. 

1687 if redirections <= 0: 

1688 raise RedirectLimit( 

1689 "Redirected more times than redirection_limit allows.", {}, "", 

1690 ) 

1691 (response, new_content) = self.request( 

1692 info["-x-permanent-redirect-url"], 

1693 method=redirect_method, 

1694 headers=headers, 

1695 redirections=redirections - 1, 

1696 ) 

1697 response.previous = Response(info) 

1698 response.previous.fromcache = True 

1699 else: 

1700 # Determine our course of action: 

1701 # Is the cached entry fresh or stale? 

1702 # Has the client requested a non-cached response? 

1703 # 

1704 # There seems to be three possible answers: 

1705 # 1. [FRESH] Return the cache entry w/o doing a GET 

1706 # 2. [STALE] Do the GET (but add in cache validators if available) 

1707 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 

1708 entry_disposition = _entry_disposition(info, headers) 

1709 

1710 if entry_disposition == "FRESH": 

1711 response = Response(info) 

1712 response.fromcache = True 

1713 return (response, content) 

1714 

1715 if entry_disposition == "STALE": 

1716 if "etag" in info and not self.ignore_etag and not "if-none-match" in headers: 

1717 headers["if-none-match"] = info["etag"] 

1718 if "last-modified" in info and not "last-modified" in headers: 

1719 headers["if-modified-since"] = info["last-modified"] 

1720 elif entry_disposition == "TRANSPARENT": 

1721 pass 

1722 

1723 (response, new_content) = self._request( 

1724 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey, 

1725 ) 

1726 

1727 if response.status == 304 and method == "GET": 

1728 # Rewrite the cache entry with the new end-to-end headers 

1729 # Take all headers that are in response 

1730 # and overwrite their values in info. 

1731 # unless they are hop-by-hop, or are listed in the connection header. 

1732 

1733 for key in _get_end2end_headers(response): 

1734 info[key] = response[key] 

1735 merged_response = Response(info) 

1736 if hasattr(response, "_stale_digest"): 

1737 merged_response._stale_digest = response._stale_digest 

1738 _updateCache(headers, merged_response, content, self.cache, cachekey) 

1739 response = merged_response 

1740 response.status = 200 

1741 response.fromcache = True 

1742 

1743 elif response.status == 200: 

1744 content = new_content 

1745 else: 

1746 self.cache.delete(cachekey) 

1747 content = new_content 

1748 else: 

1749 cc = _parse_cache_control(headers) 

1750 if "only-if-cached" in cc: 

1751 info["status"] = "504" 

1752 response = Response(info) 

1753 content = b"" 

1754 else: 

1755 (response, content) = self._request( 

1756 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey, 

1757 ) 

1758 except Exception as e: 

1759 is_timeout = isinstance(e, socket.timeout) 

1760 if is_timeout: 

1761 conn = self.connections.pop(conn_key, None) 

1762 if conn: 

1763 conn.close() 

1764 

1765 if self.force_exception_to_status_code: 

1766 if isinstance(e, HttpLib2ErrorWithResponse): 

1767 response = e.response 

1768 content = e.content 

1769 response.status = 500 

1770 response.reason = str(e) 

1771 elif isinstance(e, socket.timeout): 

1772 content = b"Request Timeout" 

1773 response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),}) 

1774 response.reason = "Request Timeout" 

1775 else: 

1776 content = str(e).encode("utf-8") 

1777 response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),}) 

1778 response.reason = "Bad Request" 

1779 else: 

1780 raise 

1781 

1782 return (response, content) 

1783 

1784 

1785class Response(dict): 

1786 """An object more like email.message than httplib.HTTPResponse.""" 

1787 

1788 """Is this response from our local cache""" 

1789 fromcache = False 

1790 """HTTP protocol version used by server. 

1791 

1792 10 for HTTP/1.0, 11 for HTTP/1.1. 

1793 """ 

1794 version = 11 

1795 

1796 "Status code returned by server. " 

1797 status = 200 

1798 """Reason phrase returned by server.""" 

1799 reason = "Ok" 

1800 

1801 previous = None 

1802 

1803 def __init__(self, info): 

1804 # info is either an email.message or 

1805 # an httplib.HTTPResponse object. 

1806 if isinstance(info, http.client.HTTPResponse): 

1807 for key, value in info.getheaders(): 

1808 key = key.lower() 

1809 prev = self.get(key) 

1810 if prev is not None: 

1811 value = ", ".join((prev, value)) 

1812 self[key] = value 

1813 self.status = info.status 

1814 self["status"] = str(self.status) 

1815 self.reason = info.reason 

1816 self.version = info.version 

1817 elif isinstance(info, email.message.Message): 

1818 for key, value in list(info.items()): 

1819 self[key.lower()] = value 

1820 self.status = int(self["status"]) 

1821 else: 

1822 for key, value in info.items(): 

1823 self[key.lower()] = value 

1824 self.status = int(self.get("status", self.status)) 

1825 

1826 def __getattr__(self, name): 

1827 if name == "dict": 

1828 return self 

1829 else: 

1830 raise AttributeError(name) 

1831 

1832 

1833def try_value_or_env(to, value, env_key, default=None): 

1834 candidates = (value, os.environ.get(env_key), os.environ.get(env_key.upper())) 

1835 # same as `to(x1) or to(x2) or to(x3)` except accepting falsey values like 0 

1836 for x in candidates: 

1837 if x is None: 

1838 continue 

1839 try: 

1840 return to(x) 

1841 except ValueError: 

1842 pass 

1843 return default