Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/hypothesis/internal/cache.py: 72%

1# This file is part of Hypothesis, which may be found at

2# https://github.com/HypothesisWorks/hypothesis/

4# Copyright the Hypothesis Authors.

5# Individual contributors are listed in AUTHORS.rst and the git log.

7# This Source Code Form is subject to the terms of the Mozilla Public License,

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can

9# obtain one at https://mozilla.org/MPL/2.0/.

11import threading

12from collections import OrderedDict

13from typing import Any, Generic, TypeVar

15import attr

17from hypothesis.errors import InvalidArgument

19K = TypeVar("K")

20V = TypeVar("V")

23@attr.s(slots=True)

24class Entry(Generic[K, V]):

25 key: K = attr.ib()

26 value: V = attr.ib()

27 score: int = attr.ib()

28 pins: int = attr.ib(default=0)

30 @property

31 def sort_key(self) -> tuple[int, ...]:

32 if self.pins == 0:

33 # Unpinned entries are sorted by score.

34 return (0, self.score)

35 else:

36 # Pinned entries sort after unpinned ones. Beyond that, we don't

37 # worry about their relative order.

38 return (1,)

41class GenericCache(Generic[K, V]):

42 """Generic supertype for cache implementations.

44 Defines a dict-like mapping with a maximum size, where as well as mapping

45 to a value, each key also maps to a score. When a write would cause the

46 dict to exceed its maximum size, it first evicts the existing key with

47 the smallest score, then adds the new key to the map. If due to pinning

48 no key can be evicted, ValueError is raised.

50 A key has the following lifecycle:

52 1. key is written for the first time, the key is given the score

53 self.new_entry(key, value)

54 2. whenever an existing key is read or written, self.on_access(key, value,

55 score) is called. This returns a new score for the key.

56 3. After a key is evicted, self.on_evict(key, value, score) is called.

58 The cache will be in a valid state in all of these cases.

60 Implementations are expected to implement new_entry and optionally

61 on_access and on_evict to implement a specific scoring strategy.

62 """

64 __slots__ = ("_threadlocal", "max_size")

66 def __init__(self, max_size: int):

67 if max_size <= 0:

68 raise InvalidArgument("Cache size must be at least one.")

70 self.max_size = max_size

72 # Implementation: We store a binary heap of Entry objects in self.data,

73 # with the heap property requiring that a parent's score is <= that of

74 # its children. keys_to_index then maps keys to their index in the

75 # heap. We keep these two in sync automatically - the heap is never

76 # reordered without updating the index.

77 self._threadlocal = threading.local()

79 @property

80 def keys_to_indices(self) -> dict[K, int]:

81 try:

82 return self._threadlocal.keys_to_indices

83 except AttributeError:

84 self._threadlocal.keys_to_indices = {}

85 return self._threadlocal.keys_to_indices

87 @property

88 def data(self) -> list[Entry[K, V]]:

89 try:

90 return self._threadlocal.data

91 except AttributeError:

92 self._threadlocal.data = []

93 return self._threadlocal.data

95 def __len__(self) -> int:

96 assert len(self.keys_to_indices) == len(self.data)

97 return len(self.data)

99 def __contains__(self, key: K) -> bool:

100 return key in self.keys_to_indices

101

102 def __getitem__(self, key: K) -> V:

103 i = self.keys_to_indices[key]

104 result = self.data[i]

105 self.__entry_was_accessed(i)

106 return result.value

107

108 def __setitem__(self, key: K, value: V) -> None:

109 evicted = None

110 try:

111 i = self.keys_to_indices[key]

112 except KeyError:

113 entry = Entry(key, value, self.new_entry(key, value))

114 if len(self.data) >= self.max_size:

115 evicted = self.data[0]

116 if evicted.pins > 0:

117 raise ValueError(

118 "Cannot increase size of cache where all keys have been pinned."

119 ) from None

120

121 # it's not clear to me how this can occur with a thread-local

122 # cache, but we've seen failures here before (specifically under

123 # the windows ci tests).

124 try:

125 del self.keys_to_indices[evicted.key]

126 except KeyError: # pragma: no cover

127 pass

128

129 i = 0

130 self.data[0] = entry

131 else:

132 i = len(self.data)

133 self.data.append(entry)

134 self.keys_to_indices[key] = i

135 self.__balance(i)

136 else:

137 entry = self.data[i]

138 assert entry.key == key

139 entry.value = value

140 self.__entry_was_accessed(i)

141

142 if evicted is not None:

143 if self.data[0] is not entry:

144 assert evicted.sort_key <= self.data[0].sort_key

145 self.on_evict(evicted.key, evicted.value, evicted.score)

146

147 def __iter__(self):

148 return iter(self.keys_to_indices)

149

150 def pin(self, key: K, value: V) -> None:

151 """Mark ``key`` as pinned (with the given value). That is, it may not

152 be evicted until ``unpin(key)`` has been called. The same key may be

153 pinned multiple times, possibly changing its value, and will not be

154 unpinned until the same number of calls to unpin have been made.

155 """

156 self[key] = value

157

158 i = self.keys_to_indices[key]

159 entry = self.data[i]

160 entry.pins += 1

161 if entry.pins == 1:

162 self.__balance(i)

163

164 def unpin(self, key: K) -> None:

165 """Undo one previous call to ``pin(key)``. The value stays the same.

166 Once all calls are undone this key may be evicted as normal."""

167 i = self.keys_to_indices[key]

168 entry = self.data[i]

169 if entry.pins == 0:

170 raise ValueError(f"Key {key!r} has not been pinned")

171 entry.pins -= 1

172 if entry.pins == 0:

173 self.__balance(i)

174

175 def is_pinned(self, key: K) -> bool:

176 """Returns True if the key is currently pinned."""

177 i = self.keys_to_indices[key]

178 return self.data[i].pins > 0

179

180 def clear(self) -> None:

181 """Remove all keys, regardless of their pinned status."""

182 del self.data[:]

183 self.keys_to_indices.clear()

184

185 def __repr__(self) -> str:

186 return "{" + ", ".join(f"{e.key!r}: {e.value!r}" for e in self.data) + "}"

187

188 def new_entry(self, key: K, value: V) -> int:

189 """Called when a key is written that does not currently appear in the

190 map.

191

192 Returns the score to associate with the key.

193 """

194 raise NotImplementedError

195

196 def on_access(self, key: K, value: V, score: Any) -> Any:

197 """Called every time a key that is already in the map is read or

198 written.

199

200 Returns the new score for the key.

201 """

202 return score

203

204 def on_evict(self, key: K, value: V, score: Any) -> Any:

205 """Called after a key has been evicted, with the score it had had at

206 the point of eviction."""

207

208 def check_valid(self) -> None:

209 """Debugging method for use in tests.

210

211 Asserts that all of the cache's invariants hold. When everything

212 is working correctly this should be an expensive no-op.

213 """

214 assert len(self.keys_to_indices) == len(self.data)

215 for i, e in enumerate(self.data):

216 assert self.keys_to_indices[e.key] == i

217 for j in [i * 2 + 1, i * 2 + 2]:

218 if j < len(self.data):

219 assert e.sort_key <= self.data[j].sort_key, self.data

220

221 def __entry_was_accessed(self, i: int) -> None:

222 entry = self.data[i]

223 new_score = self.on_access(entry.key, entry.value, entry.score)

224 if new_score != entry.score:

225 entry.score = new_score

226 # changing the score of a pinned entry cannot unbalance the heap, as

227 # we place all pinned entries after unpinned ones, regardless of score.

228 if entry.pins == 0:

229 self.__balance(i)

230

231 def __swap(self, i: int, j: int) -> None:

232 assert i < j

233 assert self.data[j].sort_key < self.data[i].sort_key

234 self.data[i], self.data[j] = self.data[j], self.data[i]

235 self.keys_to_indices[self.data[i].key] = i

236 self.keys_to_indices[self.data[j].key] = j

237

238 def __balance(self, i: int) -> None:

239 """When we have made a modification to the heap such that

240 the heap property has been violated locally around i but previously

241 held for all other indexes (and no other values have been modified),

242 this fixes the heap so that the heap property holds everywhere."""

243 # bubble up (if score is too low for current position)

244 while (parent := (i - 1) // 2) >= 0:

245 if self.__out_of_order(parent, i):

246 self.__swap(parent, i)

247 i = parent

248 else:

249 break

250 # or bubble down (if score is too high for current position)

251 while children := [j for j in (2 * i + 1, 2 * i + 2) if j < len(self.data)]:

252 smallest_child = min(children, key=lambda j: self.data[j].sort_key)

253 if self.__out_of_order(i, smallest_child):

254 self.__swap(i, smallest_child)

255 i = smallest_child

256 else:

257 break

258

259 def __out_of_order(self, i: int, j: int) -> bool:

260 """Returns True if the indices i, j are in the wrong order.

261

262 i must be the parent of j.

263 """

264 assert i == (j - 1) // 2

265 return self.data[j].sort_key < self.data[i].sort_key

266

267

268class LRUReusedCache(GenericCache[K, V]):

269 """The only concrete implementation of GenericCache we use outside of tests

270 currently.

271

272 Adopts a modified least-recently used eviction policy: It evicts the key

273 that has been used least recently, but it will always preferentially evict

274 keys that have never been accessed after insertion. Among keys that have been

275 accessed, it ignores the number of accesses.

276

277 This retains most of the benefits of an LRU cache, but adds an element of

278 scan-resistance to the process: If we end up scanning through a large

279 number of keys without reusing them, this does not evict the existing

280 entries in preference for the new ones.

281 """

282

283 __slots__ = ("__tick",)

284

285 def __init__(self, max_size: int):

286 super().__init__(max_size)

287 self.__tick: int = 0

288

289 def tick(self) -> int:

290 self.__tick += 1

291 return self.__tick

292

293 def new_entry(self, key: K, value: V) -> Any:

294 return (1, self.tick())

295

296 def on_access(self, key: K, value: V, score: Any) -> Any:

297 return (2, self.tick())

298

299

300class LRUCache(Generic[K, V]):

301 """

302 This is a drop-in replacement for a GenericCache (despite the lack of inheritance)

303 in performance critical environments. It turns out that GenericCache's heap

304 balancing for arbitrary scores can be quite expensive compared to the doubly

305 linked list approach of lru_cache or OrderedDict.

306

307 This class is a pure LRU and does not provide any sort of affininty towards

308 the number of accesses beyond recency. If soft-pinning entries which have been

309 accessed at least once is important, use LRUReusedCache.

310 """

311

312 # Here are some nice performance references for lru_cache vs OrderedDict:

313 # https://github.com/python/cpython/issues/72426#issuecomment-1093727671

314 # https://discuss.python.org/t/simplify-lru-cache/18192/6

315 #

316 # We use OrderedDict here because it is unclear to me we can provide the same

317 # api as GenericCache using @lru_cache without messing with lru_cache internals.

318 #

319 # Anecdotally, OrderedDict seems quite competitive with lru_cache, but perhaps

320 # that is localized to our access patterns.

321

322 def __init__(self, max_size: int) -> None:

323 assert max_size > 0

324 self.max_size = max_size

325 self._threadlocal = threading.local()

326

327 @property

328 def cache(self) -> OrderedDict[K, V]:

329 try:

330 return self._threadlocal.cache

331 except AttributeError:

332 self._threadlocal.cache = OrderedDict()

333 return self._threadlocal.cache

334

335 def __setitem__(self, key: K, value: V) -> None:

336 self.cache[key] = value

337 self.cache.move_to_end(key)

338

339 while len(self.cache) > self.max_size:

340 self.cache.popitem(last=False)

341

342 def __getitem__(self, key: K) -> V:

343 val = self.cache[key]

344 self.cache.move_to_end(key)

345 return val

346

347 def __iter__(self):

348 return iter(self.cache)

349

350 def __len__(self) -> int:

351 return len(self.cache)

352

353 def __contains__(self, key: K) -> bool:

354 return key in self.cache

355

356 # implement GenericCache interface, for tests

357 def check_valid(self) -> None:

358 pass