Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/lru_cache.py: 29%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

201 statements  

1# lru_cache.py -- Simple LRU cache for dulwich 

2# Copyright (C) 2006, 2008 Canonical Ltd 

3# Copyright (C) 2022 Jelmer Vernooij <jelmer@jelmer.uk> 

4# 

5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

6# General Public License as public by the Free Software Foundation; version 2.0 

7# or (at your option) any later version. You can redistribute it and/or 

8# modify it under the terms of either of these two licenses. 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16# You should have received a copy of the licenses; if not, see 

17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

19# License, Version 2.0. 

20# 

21 

22"""A simple least-recently-used (LRU) cache.""" 

23 

24from typing import Callable, Dict, Generic, Iterable, Iterator, Optional, TypeVar 

25 

26_null_key = object() 

27 

28 

29K = TypeVar("K") 

30V = TypeVar("V") 

31 

32 

33class _LRUNode(Generic[K, V]): 

34 """This maintains the linked-list which is the lru internals.""" 

35 

36 __slots__ = ("prev", "next_key", "key", "value", "cleanup", "size") 

37 

38 prev: Optional["_LRUNode[K, V]"] 

39 next_key: K 

40 size: Optional[int] 

41 

42 def __init__(self, key: K, value: V, cleanup=None) -> None: 

43 self.prev = None 

44 self.next_key = _null_key # type: ignore 

45 self.key = key 

46 self.value = value 

47 self.cleanup = cleanup 

48 # TODO: We could compute this 'on-the-fly' like we used to, and remove 

49 # one pointer from this object, we just need to decide if it 

50 # actually costs us much of anything in normal usage 

51 self.size = None 

52 

53 def __repr__(self) -> str: 

54 if self.prev is None: 

55 prev_key = None 

56 else: 

57 prev_key = self.prev.key 

58 return f"{self.__class__.__name__}({self.key!r} n:{self.next_key!r} p:{prev_key!r})" 

59 

60 def run_cleanup(self) -> None: 

61 if self.cleanup is not None: 

62 self.cleanup(self.key, self.value) 

63 self.cleanup = None 

64 # Just make sure to break any refcycles, etc 

65 del self.value 

66 

67 

68class LRUCache(Generic[K, V]): 

69 """A class which manages a cache of entries, removing unused ones.""" 

70 

71 _least_recently_used: Optional[_LRUNode[K, V]] 

72 _most_recently_used: Optional[_LRUNode[K, V]] 

73 

74 def __init__( 

75 self, max_cache: int = 100, after_cleanup_count: Optional[int] = None 

76 ) -> None: 

77 self._cache: Dict[K, _LRUNode[K, V]] = {} 

78 # The "HEAD" of the lru linked list 

79 self._most_recently_used = None 

80 # The "TAIL" of the lru linked list 

81 self._least_recently_used = None 

82 self._update_max_cache(max_cache, after_cleanup_count) 

83 

84 def __contains__(self, key: K) -> bool: 

85 return key in self._cache 

86 

87 def __getitem__(self, key: K) -> V: 

88 cache = self._cache 

89 node = cache[key] 

90 # Inlined from _record_access to decrease the overhead of __getitem__ 

91 # We also have more knowledge about structure if __getitem__ is 

92 # succeeding, then we know that self._most_recently_used must not be 

93 # None, etc. 

94 mru = self._most_recently_used 

95 if node is mru: 

96 # Nothing to do, this node is already at the head of the queue 

97 return node.value 

98 # Remove this node from the old location 

99 node_prev = node.prev 

100 next_key = node.next_key 

101 # benchmarking shows that the lookup of _null_key in globals is faster 

102 # than the attribute lookup for (node is self._least_recently_used) 

103 if next_key is _null_key: 

104 # 'node' is the _least_recently_used, because it doesn't have a 

105 # 'next' item. So move the current lru to the previous node. 

106 self._least_recently_used = node_prev 

107 else: 

108 node_next = cache[next_key] 

109 node_next.prev = node_prev 

110 assert node_prev 

111 assert mru 

112 node_prev.next_key = next_key 

113 # Insert this node at the front of the list 

114 node.next_key = mru.key 

115 mru.prev = node 

116 self._most_recently_used = node 

117 node.prev = None 

118 return node.value 

119 

120 def __len__(self) -> int: 

121 return len(self._cache) 

122 

123 def _walk_lru(self) -> Iterator[_LRUNode[K, V]]: 

124 """Walk the LRU list, only meant to be used in tests.""" 

125 node = self._most_recently_used 

126 if node is not None: 

127 if node.prev is not None: 

128 raise AssertionError( 

129 "the _most_recently_used entry is not" 

130 " supposed to have a previous entry" 

131 f" {node}" 

132 ) 

133 while node is not None: 

134 if node.next_key is _null_key: 

135 if node is not self._least_recently_used: 

136 raise AssertionError( 

137 "only the last node should have" f" no next value: {node}" 

138 ) 

139 node_next = None 

140 else: 

141 node_next = self._cache[node.next_key] 

142 if node_next.prev is not node: 

143 raise AssertionError( 

144 "inconsistency found, node.next.prev" f" != node: {node}" 

145 ) 

146 if node.prev is None: 

147 if node is not self._most_recently_used: 

148 raise AssertionError( 

149 "only the _most_recently_used should" 

150 f" not have a previous node: {node}" 

151 ) 

152 else: 

153 if node.prev.next_key != node.key: 

154 raise AssertionError( 

155 "inconsistency found, node.prev.next" f" != node: {node}" 

156 ) 

157 yield node 

158 node = node_next 

159 

160 def add( 

161 self, key: K, value: V, cleanup: Optional[Callable[[K, V], None]] = None 

162 ) -> None: 

163 """Add a new value to the cache. 

164 

165 Also, if the entry is ever removed from the cache, call 

166 cleanup(key, value). 

167 

168 Args: 

169 key: The key to store it under 

170 value: The object to store 

171 cleanup: None or a function taking (key, value) to indicate 

172 'value' should be cleaned up. 

173 """ 

174 if key is _null_key: 

175 raise ValueError("cannot use _null_key as a key") 

176 if key in self._cache: 

177 node = self._cache[key] 

178 node.run_cleanup() 

179 node.value = value 

180 node.cleanup = cleanup 

181 else: 

182 node = _LRUNode(key, value, cleanup=cleanup) 

183 self._cache[key] = node 

184 self._record_access(node) 

185 

186 if len(self._cache) > self._max_cache: 

187 # Trigger the cleanup 

188 self.cleanup() 

189 

190 def cache_size(self) -> int: 

191 """Get the number of entries we will cache.""" 

192 return self._max_cache 

193 

194 def get(self, key: K, default: Optional[V] = None) -> Optional[V]: 

195 node = self._cache.get(key, None) 

196 if node is None: 

197 return default 

198 self._record_access(node) 

199 return node.value 

200 

201 def keys(self) -> Iterable[K]: 

202 """Get the list of keys currently cached. 

203 

204 Note that values returned here may not be available by the time you 

205 request them later. This is simply meant as a peak into the current 

206 state. 

207 

208 Returns: An unordered list of keys that are currently cached. 

209 """ 

210 return self._cache.keys() 

211 

212 def items(self) -> Dict[K, V]: 

213 """Get the key:value pairs as a dict.""" 

214 return {k: n.value for k, n in self._cache.items()} 

215 

216 def cleanup(self): 

217 """Clear the cache until it shrinks to the requested size. 

218 

219 This does not completely wipe the cache, just makes sure it is under 

220 the after_cleanup_count. 

221 """ 

222 # Make sure the cache is shrunk to the correct size 

223 while len(self._cache) > self._after_cleanup_count: 

224 self._remove_lru() 

225 

226 def __setitem__(self, key: K, value: V) -> None: 

227 """Add a value to the cache, there will be no cleanup function.""" 

228 self.add(key, value, cleanup=None) 

229 

230 def _record_access(self, node: _LRUNode[K, V]) -> None: 

231 """Record that key was accessed.""" 

232 # Move 'node' to the front of the queue 

233 if self._most_recently_used is None: 

234 self._most_recently_used = node 

235 self._least_recently_used = node 

236 return 

237 elif node is self._most_recently_used: 

238 # Nothing to do, this node is already at the head of the queue 

239 return 

240 # We've taken care of the tail pointer, remove the node, and insert it 

241 # at the front 

242 # REMOVE 

243 if node is self._least_recently_used: 

244 self._least_recently_used = node.prev 

245 if node.prev is not None: 

246 node.prev.next_key = node.next_key 

247 if node.next_key is not _null_key: 

248 node_next = self._cache[node.next_key] 

249 node_next.prev = node.prev 

250 # INSERT 

251 node.next_key = self._most_recently_used.key 

252 self._most_recently_used.prev = node 

253 self._most_recently_used = node 

254 node.prev = None 

255 

256 def _remove_node(self, node: _LRUNode[K, V]) -> None: 

257 if node is self._least_recently_used: 

258 self._least_recently_used = node.prev 

259 self._cache.pop(node.key) 

260 # If we have removed all entries, remove the head pointer as well 

261 if self._least_recently_used is None: 

262 self._most_recently_used = None 

263 node.run_cleanup() 

264 # Now remove this node from the linked list 

265 if node.prev is not None: 

266 node.prev.next_key = node.next_key 

267 if node.next_key is not _null_key: 

268 node_next = self._cache[node.next_key] 

269 node_next.prev = node.prev 

270 # And remove this node's pointers 

271 node.prev = None 

272 node.next_key = _null_key # type: ignore 

273 

274 def _remove_lru(self) -> None: 

275 """Remove one entry from the lru, and handle consequences. 

276 

277 If there are no more references to the lru, then this entry should be 

278 removed from the cache. 

279 """ 

280 assert self._least_recently_used 

281 self._remove_node(self._least_recently_used) 

282 

283 def clear(self) -> None: 

284 """Clear out all of the cache.""" 

285 # Clean up in LRU order 

286 while self._cache: 

287 self._remove_lru() 

288 

289 def resize(self, max_cache: int, after_cleanup_count: Optional[int] = None) -> None: 

290 """Change the number of entries that will be cached.""" 

291 self._update_max_cache(max_cache, after_cleanup_count=after_cleanup_count) 

292 

293 def _update_max_cache(self, max_cache, after_cleanup_count=None): 

294 self._max_cache = max_cache 

295 if after_cleanup_count is None: 

296 self._after_cleanup_count = self._max_cache * 8 / 10 

297 else: 

298 self._after_cleanup_count = min(after_cleanup_count, self._max_cache) 

299 self.cleanup() 

300 

301 

302class LRUSizeCache(LRUCache[K, V]): 

303 """An LRUCache that removes things based on the size of the values. 

304 

305 This differs in that it doesn't care how many actual items there are, 

306 it just restricts the cache to be cleaned up after so much data is stored. 

307 

308 The size of items added will be computed using compute_size(value), which 

309 defaults to len() if not supplied. 

310 """ 

311 

312 _compute_size: Callable[[V], int] 

313 

314 def __init__( 

315 self, 

316 max_size: int = 1024 * 1024, 

317 after_cleanup_size: Optional[int] = None, 

318 compute_size: Optional[Callable[[V], int]] = None, 

319 ) -> None: 

320 """Create a new LRUSizeCache. 

321 

322 Args: 

323 max_size: The max number of bytes to store before we start 

324 clearing out entries. 

325 after_cleanup_size: After cleaning up, shrink everything to this 

326 size. 

327 compute_size: A function to compute the size of the values. We 

328 use a function here, so that you can pass 'len' if you are just 

329 using simple strings, or a more complex function if you are using 

330 something like a list of strings, or even a custom object. 

331 The function should take the form "compute_size(value) => integer". 

332 If not supplied, it defaults to 'len()' 

333 """ 

334 self._value_size = 0 

335 if compute_size is None: 

336 self._compute_size = len # type: ignore 

337 else: 

338 self._compute_size = compute_size 

339 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) 

340 LRUCache.__init__(self, max_cache=max(int(max_size / 512), 1)) 

341 

342 def add( 

343 self, key: K, value: V, cleanup: Optional[Callable[[K, V], None]] = None 

344 ) -> None: 

345 """Add a new value to the cache. 

346 

347 Also, if the entry is ever removed from the cache, call 

348 cleanup(key, value). 

349 

350 Args: 

351 key: The key to store it under 

352 value: The object to store 

353 cleanup: None or a function taking (key, value) to indicate 

354 'value' should be cleaned up. 

355 """ 

356 if key is _null_key: 

357 raise ValueError("cannot use _null_key as a key") 

358 node = self._cache.get(key, None) 

359 value_len = self._compute_size(value) 

360 if value_len >= self._after_cleanup_size: 

361 # The new value is 'too big to fit', as it would fill up/overflow 

362 # the cache all by itself 

363 if node is not None: 

364 # We won't be replacing the old node, so just remove it 

365 self._remove_node(node) 

366 if cleanup is not None: 

367 cleanup(key, value) 

368 return 

369 if node is None: 

370 node = _LRUNode(key, value, cleanup=cleanup) 

371 self._cache[key] = node 

372 else: 

373 assert node.size is not None 

374 self._value_size -= node.size 

375 node.size = value_len 

376 self._value_size += value_len 

377 self._record_access(node) 

378 

379 if self._value_size > self._max_size: 

380 # Time to cleanup 

381 self.cleanup() 

382 

383 def cleanup(self) -> None: 

384 """Clear the cache until it shrinks to the requested size. 

385 

386 This does not completely wipe the cache, just makes sure it is under 

387 the after_cleanup_size. 

388 """ 

389 # Make sure the cache is shrunk to the correct size 

390 while self._value_size > self._after_cleanup_size: 

391 self._remove_lru() 

392 

393 def _remove_node(self, node: _LRUNode[K, V]) -> None: 

394 assert node.size is not None 

395 self._value_size -= node.size 

396 LRUCache._remove_node(self, node) 

397 

398 def resize(self, max_size: int, after_cleanup_size: Optional[int] = None) -> None: 

399 """Change the number of bytes that will be cached.""" 

400 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) 

401 max_cache = max(int(max_size / 512), 1) 

402 self._update_max_cache(max_cache) 

403 

404 def _update_max_size( 

405 self, max_size: int, after_cleanup_size: Optional[int] = None 

406 ) -> None: 

407 self._max_size = max_size 

408 if after_cleanup_size is None: 

409 self._after_cleanup_size = self._max_size * 8 // 10 

410 else: 

411 self._after_cleanup_size = min(after_cleanup_size, self._max_size)