Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/

1# SPDX-FileCopyrightText: 2015 Eric Larson

3# SPDX-License-Identifier: Apache-2.0

5"""

6The httplib2 algorithms ported for use with requests.

7"""

8from __future__ import annotations

10import calendar

11import logging

12import re

13import time

14from email.utils import parsedate_tz

15from typing import TYPE_CHECKING, Collection, Mapping

17from pip._vendor.requests.structures import CaseInsensitiveDict

19from pip._vendor.cachecontrol.cache import DictCache, SeparateBodyBaseCache

20from pip._vendor.cachecontrol.serialize import Serializer

22if TYPE_CHECKING:

23 from typing import Literal

25 from pip._vendor.requests import PreparedRequest

26 from pip._vendor.urllib3 import HTTPResponse

28 from pip._vendor.cachecontrol.cache import BaseCache

30logger = logging.getLogger(__name__)

32URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")

34PERMANENT_REDIRECT_STATUSES = (301, 308)

37def parse_uri(uri: str) -> tuple[str, str, str, str, str]:

38 """Parses a URI using the regex given in Appendix B of RFC 3986.

40 (scheme, authority, path, query, fragment) = parse_uri(uri)

41 """

42 match = URI.match(uri)

43 assert match is not None

44 groups = match.groups()

45 return (groups[1], groups[3], groups[4], groups[6], groups[8])

48class CacheController:

49 """An interface to see if request should cached or not."""

51 def __init__(

52 self,

53 cache: BaseCache | None = None,

54 cache_etags: bool = True,

55 serializer: Serializer | None = None,

56 status_codes: Collection[int] | None = None,

57 ):

58 self.cache = DictCache() if cache is None else cache

59 self.cache_etags = cache_etags

60 self.serializer = serializer or Serializer()

61 self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)

63 @classmethod

64 def _urlnorm(cls, uri: str) -> str:

65 """Normalize the URL to create a safe key for the cache"""

66 (scheme, authority, path, query, fragment) = parse_uri(uri)

67 if not scheme or not authority:

68 raise Exception("Only absolute URIs are allowed. uri = %s" % uri)

70 scheme = scheme.lower()

71 authority = authority.lower()

73 if not path:

74 path = "/"

76 # Could do syntax based normalization of the URI before

77 # computing the digest. See Section 6.2.2 of Std 66.

78 request_uri = query and "?".join([path, query]) or path

79 defrag_uri = scheme + "://" + authority + request_uri

81 return defrag_uri

83 @classmethod

84 def cache_url(cls, uri: str) -> str:

85 return cls._urlnorm(uri)

87 def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]:

88 known_directives = {

89 # https://tools.ietf.org/html/rfc7234#section-5.2

90 "max-age": (int, True),

91 "max-stale": (int, False),

92 "min-fresh": (int, True),

93 "no-cache": (None, False),

94 "no-store": (None, False),

95 "no-transform": (None, False),

96 "only-if-cached": (None, False),

97 "must-revalidate": (None, False),

98 "public": (None, False),

99 "private": (None, False),

100 "proxy-revalidate": (None, False),

101 "s-maxage": (int, True),

102 }

103

104 cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))

105

106 retval: dict[str, int | None] = {}

107

108 for cc_directive in cc_headers.split(","):

109 if not cc_directive.strip():

110 continue

111

112 parts = cc_directive.split("=", 1)

113 directive = parts[0].strip()

114

115 try:

116 typ, required = known_directives[directive]

117 except KeyError:

118 logger.debug("Ignoring unknown cache-control directive: %s", directive)

119 continue

120

121 if not typ or not required:

122 retval[directive] = None

123 if typ:

124 try:

125 retval[directive] = typ(parts[1].strip())

126 except IndexError:

127 if required:

128 logger.debug(

129 "Missing value for cache-control " "directive: %s",

130 directive,

131 )

132 except ValueError:

133 logger.debug(

134 "Invalid value for cache-control directive " "%s, must be %s",

135 directive,

136 typ.__name__,

137 )

138

139 return retval

140

141 def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:

142 """

143 Load a cached response, or return None if it's not available.

144 """

145 cache_url = request.url

146 assert cache_url is not None

147 cache_data = self.cache.get(cache_url)

148 if cache_data is None:

149 logger.debug("No cache entry available")

150 return None

151

152 if isinstance(self.cache, SeparateBodyBaseCache):

153 body_file = self.cache.get_body(cache_url)

154 else:

155 body_file = None

156

157 result = self.serializer.loads(request, cache_data, body_file)

158 if result is None:

159 logger.warning("Cache entry deserialization failed, entry ignored")

160 return result

161

162 def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]:

163 """

164 Return a cached response if it exists in the cache, otherwise

165 return False.

166 """

167 assert request.url is not None

168 cache_url = self.cache_url(request.url)

169 logger.debug('Looking up "%s" in the cache', cache_url)

170 cc = self.parse_cache_control(request.headers)

171

172 # Bail out if the request insists on fresh data

173 if "no-cache" in cc:

174 logger.debug('Request header has "no-cache", cache bypassed')

175 return False

176

177 if "max-age" in cc and cc["max-age"] == 0:

178 logger.debug('Request header has "max_age" as 0, cache bypassed')

179 return False

180

181 # Check whether we can load the response from the cache:

182 resp = self._load_from_cache(request)

183 if not resp:

184 return False

185

186 # If we have a cached permanent redirect, return it immediately. We

187 # don't need to test our response for other headers b/c it is

188 # intrinsically "cacheable" as it is Permanent.

189 #

190 # See:

191 # https://tools.ietf.org/html/rfc7231#section-6.4.2

192 #

193 # Client can try to refresh the value by repeating the request

194 # with cache busting headers as usual (ie no-cache).

195 if int(resp.status) in PERMANENT_REDIRECT_STATUSES:

196 msg = (

197 "Returning cached permanent redirect response "

198 "(ignoring date and etag information)"

199 )

200 logger.debug(msg)

201 return resp

202

203 headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)

204 if not headers or "date" not in headers:

205 if "etag" not in headers:

206 # Without date or etag, the cached response can never be used

207 # and should be deleted.

208 logger.debug("Purging cached response: no date or etag")

209 self.cache.delete(cache_url)

210 logger.debug("Ignoring cached response: no date")

211 return False

212

213 now = time.time()

214 time_tuple = parsedate_tz(headers["date"])

215 assert time_tuple is not None

216 date = calendar.timegm(time_tuple[:6])

217 current_age = max(0, now - date)

218 logger.debug("Current age based on date: %i", current_age)

219

220 # TODO: There is an assumption that the result will be a

221 # urllib3 response object. This may not be best since we

222 # could probably avoid instantiating or constructing the

223 # response until we know we need it.

224 resp_cc = self.parse_cache_control(headers)

225

226 # determine freshness

227 freshness_lifetime = 0

228

229 # Check the max-age pragma in the cache control header

230 max_age = resp_cc.get("max-age")

231 if max_age is not None:

232 freshness_lifetime = max_age

233 logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)

234

235 # If there isn't a max-age, check for an expires header

236 elif "expires" in headers:

237 expires = parsedate_tz(headers["expires"])

238 if expires is not None:

239 expire_time = calendar.timegm(expires[:6]) - date

240 freshness_lifetime = max(0, expire_time)

241 logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)

242

243 # Determine if we are setting freshness limit in the

244 # request. Note, this overrides what was in the response.

245 max_age = cc.get("max-age")

246 if max_age is not None:

247 freshness_lifetime = max_age

248 logger.debug(

249 "Freshness lifetime from request max-age: %i", freshness_lifetime

250 )

251

252 min_fresh = cc.get("min-fresh")

253 if min_fresh is not None:

254 # adjust our current age by our min fresh

255 current_age += min_fresh

256 logger.debug("Adjusted current age from min-fresh: %i", current_age)

257

258 # Return entry if it is fresh enough

259 if freshness_lifetime > current_age:

260 logger.debug('The response is "fresh", returning cached response')

261 logger.debug("%i > %i", freshness_lifetime, current_age)

262 return resp

263

264 # we're not fresh. If we don't have an Etag, clear it out

265 if "etag" not in headers:

266 logger.debug('The cached response is "stale" with no etag, purging')

267 self.cache.delete(cache_url)

268

269 # return the original handler

270 return False

271

272 def conditional_headers(self, request: PreparedRequest) -> dict[str, str]:

273 resp = self._load_from_cache(request)

274 new_headers = {}

275

276 if resp:

277 headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers)

278

279 if "etag" in headers:

280 new_headers["If-None-Match"] = headers["ETag"]

281

282 if "last-modified" in headers:

283 new_headers["If-Modified-Since"] = headers["Last-Modified"]

284

285 return new_headers

286

287 def _cache_set(

288 self,

289 cache_url: str,

290 request: PreparedRequest,

291 response: HTTPResponse,

292 body: bytes | None = None,

293 expires_time: int | None = None,

294 ) -> None:

295 """

296 Store the data in the cache.

297 """

298 if isinstance(self.cache, SeparateBodyBaseCache):

299 # We pass in the body separately; just put a placeholder empty

300 # string in the metadata.

301 self.cache.set(

302 cache_url,

303 self.serializer.dumps(request, response, b""),

304 expires=expires_time,

305 )

306 # body is None can happen when, for example, we're only updating

307 # headers, as is the case in update_cached_response().

308 if body is not None:

309 self.cache.set_body(cache_url, body)

310 else:

311 self.cache.set(

312 cache_url,

313 self.serializer.dumps(request, response, body),

314 expires=expires_time,

315 )

316

317 def cache_response(

318 self,

319 request: PreparedRequest,

320 response: HTTPResponse,

321 body: bytes | None = None,

322 status_codes: Collection[int] | None = None,

323 ) -> None:

324 """

325 Algorithm for caching requests.

326

327 This assumes a requests Response object.

328 """

329 # From httplib2: Don't cache 206's since we aren't going to

330 # handle byte range requests

331 cacheable_status_codes = status_codes or self.cacheable_status_codes

332 if response.status not in cacheable_status_codes:

333 logger.debug(

334 "Status code %s not in %s", response.status, cacheable_status_codes

335 )

336 return

337

338 response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(

339 response.headers

340 )

341

342 if "date" in response_headers:

343 time_tuple = parsedate_tz(response_headers["date"])

344 assert time_tuple is not None

345 date = calendar.timegm(time_tuple[:6])

346 else:

347 date = 0

348

349 # If we've been given a body, our response has a Content-Length, that

350 # Content-Length is valid then we can check to see if the body we've

351 # been given matches the expected size, and if it doesn't we'll just

352 # skip trying to cache it.

353 if (

354 body is not None

355 and "content-length" in response_headers

356 and response_headers["content-length"].isdigit()

357 and int(response_headers["content-length"]) != len(body)

358 ):

359 return

360

361 cc_req = self.parse_cache_control(request.headers)

362 cc = self.parse_cache_control(response_headers)

363

364 assert request.url is not None

365 cache_url = self.cache_url(request.url)

366 logger.debug('Updating cache with response from "%s"', cache_url)

367

368 # Delete it from the cache if we happen to have it stored there

369 no_store = False

370 if "no-store" in cc:

371 no_store = True

372 logger.debug('Response header has "no-store"')

373 if "no-store" in cc_req:

374 no_store = True

375 logger.debug('Request header has "no-store"')

376 if no_store and self.cache.get(cache_url):

377 logger.debug('Purging existing cache entry to honor "no-store"')

378 self.cache.delete(cache_url)

379 if no_store:

380 return

381

382 # https://tools.ietf.org/html/rfc7234#section-4.1:

383 # A Vary header field-value of "*" always fails to match.

384 # Storing such a response leads to a deserialization warning

385 # during cache lookup and is not allowed to ever be served,

386 # so storing it can be avoided.

387 if "*" in response_headers.get("vary", ""):

388 logger.debug('Response header has "Vary: *"')

389 return

390

391 # If we've been given an etag, then keep the response

392 if self.cache_etags and "etag" in response_headers:

393 expires_time = 0

394 if response_headers.get("expires"):

395 expires = parsedate_tz(response_headers["expires"])

396 if expires is not None:

397 expires_time = calendar.timegm(expires[:6]) - date

398

399 expires_time = max(expires_time, 14 * 86400)

400

401 logger.debug(f"etag object cached for {expires_time} seconds")

402 logger.debug("Caching due to etag")

403 self._cache_set(cache_url, request, response, body, expires_time)

404

405 # Add to the cache any permanent redirects. We do this before looking

406 # that the Date headers.

407 elif int(response.status) in PERMANENT_REDIRECT_STATUSES:

408 logger.debug("Caching permanent redirect")

409 self._cache_set(cache_url, request, response, b"")

410

411 # Add to the cache if the response headers demand it. If there

412 # is no date header then we can't do anything about expiring

413 # the cache.

414 elif "date" in response_headers:

415 time_tuple = parsedate_tz(response_headers["date"])

416 assert time_tuple is not None

417 date = calendar.timegm(time_tuple[:6])

418 # cache when there is a max-age > 0

419 max_age = cc.get("max-age")

420 if max_age is not None and max_age > 0:

421 logger.debug("Caching b/c date exists and max-age > 0")

422 expires_time = max_age

423 self._cache_set(

424 cache_url,

425 request,

426 response,

427 body,

428 expires_time,

429 )

430

431 # If the request can expire, it means we should cache it

432 # in the meantime.

433 elif "expires" in response_headers:

434 if response_headers["expires"]:

435 expires = parsedate_tz(response_headers["expires"])

436 if expires is not None:

437 expires_time = calendar.timegm(expires[:6]) - date

438 else:

439 expires_time = None

440

441 logger.debug(

442 "Caching b/c of expires header. expires in {} seconds".format(

443 expires_time

444 )

445 )

446 self._cache_set(

447 cache_url,

448 request,

449 response,

450 body,

451 expires_time,

452 )

453

454 def update_cached_response(

455 self, request: PreparedRequest, response: HTTPResponse

456 ) -> HTTPResponse:

457 """On a 304 we will get a new set of headers that we want to

458 update our cached value with, assuming we have one.

459

460 This should only ever be called when we've sent an ETag and

461 gotten a 304 as the response.

462 """

463 assert request.url is not None

464 cache_url = self.cache_url(request.url)

465 cached_response = self._load_from_cache(request)

466

467 if not cached_response:

468 # we didn't have a cached response

469 return response

470

471 # Lets update our headers with the headers from the new request:

472 # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1

473 #

474 # The server isn't supposed to send headers that would make

475 # the cached body invalid. But... just in case, we'll be sure

476 # to strip out ones we know that might be problmatic due to

477 # typical assumptions.

478 excluded_headers = ["content-length"]

479

480 cached_response.headers.update(

481 {

482 k: v

483 for k, v in response.headers.items() # type: ignore[no-untyped-call]

484 if k.lower() not in excluded_headers

485 }

486 )

487

488 # we want a 200 b/c we have content via the cache

489 cached_response.status = 200

490

491 # update our cache

492 self._cache_set(cache_url, request, cached_response)

493

494 return cached_response

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pip/_vendor/cachecontrol/controller.py: 12%

233 statements