1# lru_cache.py -- Simple LRU cache for dulwich
2# Copyright (C) 2006, 2008 Canonical Ltd
3# Copyright (C) 2022 Jelmer Vernooij <jelmer@jelmer.uk>
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as public by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
21
22"""A simple least-recently-used (LRU) cache."""
23
24from typing import Callable, Dict, Generic, Iterable, Iterator, Optional, TypeVar
25
26_null_key = object()
27
28
29K = TypeVar("K")
30V = TypeVar("V")
31
32
33class _LRUNode(Generic[K, V]):
34 """This maintains the linked-list which is the lru internals."""
35
36 __slots__ = ("prev", "next_key", "key", "value", "cleanup", "size")
37
38 prev: Optional["_LRUNode[K, V]"]
39 next_key: K
40 size: Optional[int]
41
42 def __init__(self, key: K, value: V, cleanup=None) -> None:
43 self.prev = None
44 self.next_key = _null_key # type: ignore
45 self.key = key
46 self.value = value
47 self.cleanup = cleanup
48 # TODO: We could compute this 'on-the-fly' like we used to, and remove
49 # one pointer from this object, we just need to decide if it
50 # actually costs us much of anything in normal usage
51 self.size = None
52
53 def __repr__(self) -> str:
54 if self.prev is None:
55 prev_key = None
56 else:
57 prev_key = self.prev.key
58 return f"{self.__class__.__name__}({self.key!r} n:{self.next_key!r} p:{prev_key!r})"
59
60 def run_cleanup(self) -> None:
61 if self.cleanup is not None:
62 self.cleanup(self.key, self.value)
63 self.cleanup = None
64 # Just make sure to break any refcycles, etc
65 del self.value
66
67
68class LRUCache(Generic[K, V]):
69 """A class which manages a cache of entries, removing unused ones."""
70
71 _least_recently_used: Optional[_LRUNode[K, V]]
72 _most_recently_used: Optional[_LRUNode[K, V]]
73
74 def __init__(
75 self, max_cache: int = 100, after_cleanup_count: Optional[int] = None
76 ) -> None:
77 self._cache: Dict[K, _LRUNode[K, V]] = {}
78 # The "HEAD" of the lru linked list
79 self._most_recently_used = None
80 # The "TAIL" of the lru linked list
81 self._least_recently_used = None
82 self._update_max_cache(max_cache, after_cleanup_count)
83
84 def __contains__(self, key: K) -> bool:
85 return key in self._cache
86
87 def __getitem__(self, key: K) -> V:
88 cache = self._cache
89 node = cache[key]
90 # Inlined from _record_access to decrease the overhead of __getitem__
91 # We also have more knowledge about structure if __getitem__ is
92 # succeeding, then we know that self._most_recently_used must not be
93 # None, etc.
94 mru = self._most_recently_used
95 if node is mru:
96 # Nothing to do, this node is already at the head of the queue
97 return node.value
98 # Remove this node from the old location
99 node_prev = node.prev
100 next_key = node.next_key
101 # benchmarking shows that the lookup of _null_key in globals is faster
102 # than the attribute lookup for (node is self._least_recently_used)
103 if next_key is _null_key:
104 # 'node' is the _least_recently_used, because it doesn't have a
105 # 'next' item. So move the current lru to the previous node.
106 self._least_recently_used = node_prev
107 else:
108 node_next = cache[next_key]
109 node_next.prev = node_prev
110 assert node_prev
111 assert mru
112 node_prev.next_key = next_key
113 # Insert this node at the front of the list
114 node.next_key = mru.key
115 mru.prev = node
116 self._most_recently_used = node
117 node.prev = None
118 return node.value
119
120 def __len__(self) -> int:
121 return len(self._cache)
122
123 def _walk_lru(self) -> Iterator[_LRUNode[K, V]]:
124 """Walk the LRU list, only meant to be used in tests."""
125 node = self._most_recently_used
126 if node is not None:
127 if node.prev is not None:
128 raise AssertionError(
129 "the _most_recently_used entry is not"
130 " supposed to have a previous entry"
131 f" {node}"
132 )
133 while node is not None:
134 if node.next_key is _null_key:
135 if node is not self._least_recently_used:
136 raise AssertionError(
137 "only the last node should have" f" no next value: {node}"
138 )
139 node_next = None
140 else:
141 node_next = self._cache[node.next_key]
142 if node_next.prev is not node:
143 raise AssertionError(
144 "inconsistency found, node.next.prev" f" != node: {node}"
145 )
146 if node.prev is None:
147 if node is not self._most_recently_used:
148 raise AssertionError(
149 "only the _most_recently_used should"
150 f" not have a previous node: {node}"
151 )
152 else:
153 if node.prev.next_key != node.key:
154 raise AssertionError(
155 "inconsistency found, node.prev.next" f" != node: {node}"
156 )
157 yield node
158 node = node_next
159
160 def add(
161 self, key: K, value: V, cleanup: Optional[Callable[[K, V], None]] = None
162 ) -> None:
163 """Add a new value to the cache.
164
165 Also, if the entry is ever removed from the cache, call
166 cleanup(key, value).
167
168 Args:
169 key: The key to store it under
170 value: The object to store
171 cleanup: None or a function taking (key, value) to indicate
172 'value' should be cleaned up.
173 """
174 if key is _null_key:
175 raise ValueError("cannot use _null_key as a key")
176 if key in self._cache:
177 node = self._cache[key]
178 node.run_cleanup()
179 node.value = value
180 node.cleanup = cleanup
181 else:
182 node = _LRUNode(key, value, cleanup=cleanup)
183 self._cache[key] = node
184 self._record_access(node)
185
186 if len(self._cache) > self._max_cache:
187 # Trigger the cleanup
188 self.cleanup()
189
190 def cache_size(self) -> int:
191 """Get the number of entries we will cache."""
192 return self._max_cache
193
194 def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
195 node = self._cache.get(key, None)
196 if node is None:
197 return default
198 self._record_access(node)
199 return node.value
200
201 def keys(self) -> Iterable[K]:
202 """Get the list of keys currently cached.
203
204 Note that values returned here may not be available by the time you
205 request them later. This is simply meant as a peak into the current
206 state.
207
208 Returns: An unordered list of keys that are currently cached.
209 """
210 return self._cache.keys()
211
212 def items(self) -> Dict[K, V]:
213 """Get the key:value pairs as a dict."""
214 return {k: n.value for k, n in self._cache.items()}
215
216 def cleanup(self):
217 """Clear the cache until it shrinks to the requested size.
218
219 This does not completely wipe the cache, just makes sure it is under
220 the after_cleanup_count.
221 """
222 # Make sure the cache is shrunk to the correct size
223 while len(self._cache) > self._after_cleanup_count:
224 self._remove_lru()
225
226 def __setitem__(self, key: K, value: V) -> None:
227 """Add a value to the cache, there will be no cleanup function."""
228 self.add(key, value, cleanup=None)
229
230 def _record_access(self, node: _LRUNode[K, V]) -> None:
231 """Record that key was accessed."""
232 # Move 'node' to the front of the queue
233 if self._most_recently_used is None:
234 self._most_recently_used = node
235 self._least_recently_used = node
236 return
237 elif node is self._most_recently_used:
238 # Nothing to do, this node is already at the head of the queue
239 return
240 # We've taken care of the tail pointer, remove the node, and insert it
241 # at the front
242 # REMOVE
243 if node is self._least_recently_used:
244 self._least_recently_used = node.prev
245 if node.prev is not None:
246 node.prev.next_key = node.next_key
247 if node.next_key is not _null_key:
248 node_next = self._cache[node.next_key]
249 node_next.prev = node.prev
250 # INSERT
251 node.next_key = self._most_recently_used.key
252 self._most_recently_used.prev = node
253 self._most_recently_used = node
254 node.prev = None
255
256 def _remove_node(self, node: _LRUNode[K, V]) -> None:
257 if node is self._least_recently_used:
258 self._least_recently_used = node.prev
259 self._cache.pop(node.key)
260 # If we have removed all entries, remove the head pointer as well
261 if self._least_recently_used is None:
262 self._most_recently_used = None
263 node.run_cleanup()
264 # Now remove this node from the linked list
265 if node.prev is not None:
266 node.prev.next_key = node.next_key
267 if node.next_key is not _null_key:
268 node_next = self._cache[node.next_key]
269 node_next.prev = node.prev
270 # And remove this node's pointers
271 node.prev = None
272 node.next_key = _null_key # type: ignore
273
274 def _remove_lru(self) -> None:
275 """Remove one entry from the lru, and handle consequences.
276
277 If there are no more references to the lru, then this entry should be
278 removed from the cache.
279 """
280 assert self._least_recently_used
281 self._remove_node(self._least_recently_used)
282
283 def clear(self) -> None:
284 """Clear out all of the cache."""
285 # Clean up in LRU order
286 while self._cache:
287 self._remove_lru()
288
289 def resize(self, max_cache: int, after_cleanup_count: Optional[int] = None) -> None:
290 """Change the number of entries that will be cached."""
291 self._update_max_cache(max_cache, after_cleanup_count=after_cleanup_count)
292
293 def _update_max_cache(self, max_cache, after_cleanup_count=None):
294 self._max_cache = max_cache
295 if after_cleanup_count is None:
296 self._after_cleanup_count = self._max_cache * 8 / 10
297 else:
298 self._after_cleanup_count = min(after_cleanup_count, self._max_cache)
299 self.cleanup()
300
301
302class LRUSizeCache(LRUCache[K, V]):
303 """An LRUCache that removes things based on the size of the values.
304
305 This differs in that it doesn't care how many actual items there are,
306 it just restricts the cache to be cleaned up after so much data is stored.
307
308 The size of items added will be computed using compute_size(value), which
309 defaults to len() if not supplied.
310 """
311
312 _compute_size: Callable[[V], int]
313
314 def __init__(
315 self,
316 max_size: int = 1024 * 1024,
317 after_cleanup_size: Optional[int] = None,
318 compute_size: Optional[Callable[[V], int]] = None,
319 ) -> None:
320 """Create a new LRUSizeCache.
321
322 Args:
323 max_size: The max number of bytes to store before we start
324 clearing out entries.
325 after_cleanup_size: After cleaning up, shrink everything to this
326 size.
327 compute_size: A function to compute the size of the values. We
328 use a function here, so that you can pass 'len' if you are just
329 using simple strings, or a more complex function if you are using
330 something like a list of strings, or even a custom object.
331 The function should take the form "compute_size(value) => integer".
332 If not supplied, it defaults to 'len()'
333 """
334 self._value_size = 0
335 if compute_size is None:
336 self._compute_size = len # type: ignore
337 else:
338 self._compute_size = compute_size
339 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
340 LRUCache.__init__(self, max_cache=max(int(max_size / 512), 1))
341
342 def add(
343 self, key: K, value: V, cleanup: Optional[Callable[[K, V], None]] = None
344 ) -> None:
345 """Add a new value to the cache.
346
347 Also, if the entry is ever removed from the cache, call
348 cleanup(key, value).
349
350 Args:
351 key: The key to store it under
352 value: The object to store
353 cleanup: None or a function taking (key, value) to indicate
354 'value' should be cleaned up.
355 """
356 if key is _null_key:
357 raise ValueError("cannot use _null_key as a key")
358 node = self._cache.get(key, None)
359 value_len = self._compute_size(value)
360 if value_len >= self._after_cleanup_size:
361 # The new value is 'too big to fit', as it would fill up/overflow
362 # the cache all by itself
363 if node is not None:
364 # We won't be replacing the old node, so just remove it
365 self._remove_node(node)
366 if cleanup is not None:
367 cleanup(key, value)
368 return
369 if node is None:
370 node = _LRUNode(key, value, cleanup=cleanup)
371 self._cache[key] = node
372 else:
373 assert node.size is not None
374 self._value_size -= node.size
375 node.size = value_len
376 self._value_size += value_len
377 self._record_access(node)
378
379 if self._value_size > self._max_size:
380 # Time to cleanup
381 self.cleanup()
382
383 def cleanup(self) -> None:
384 """Clear the cache until it shrinks to the requested size.
385
386 This does not completely wipe the cache, just makes sure it is under
387 the after_cleanup_size.
388 """
389 # Make sure the cache is shrunk to the correct size
390 while self._value_size > self._after_cleanup_size:
391 self._remove_lru()
392
393 def _remove_node(self, node: _LRUNode[K, V]) -> None:
394 assert node.size is not None
395 self._value_size -= node.size
396 LRUCache._remove_node(self, node)
397
398 def resize(self, max_size: int, after_cleanup_size: Optional[int] = None) -> None:
399 """Change the number of bytes that will be cached."""
400 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
401 max_cache = max(int(max_size / 512), 1)
402 self._update_max_cache(max_cache)
403
404 def _update_max_size(
405 self, max_size: int, after_cleanup_size: Optional[int] = None
406 ) -> None:
407 self._max_size = max_size
408 if after_cleanup_size is None:
409 self._after_cleanup_size = self._max_size * 8 // 10
410 else:
411 self._after_cleanup_size = min(after_cleanup_size, self._max_size)