Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/hypothesis/internal/cache.py: 63%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

145 statements  

1# This file is part of Hypothesis, which may be found at 

2# https://github.com/HypothesisWorks/hypothesis/ 

3# 

4# Copyright the Hypothesis Authors. 

5# Individual contributors are listed in AUTHORS.rst and the git log. 

6# 

7# This Source Code Form is subject to the terms of the Mozilla Public License, 

8# v. 2.0. If a copy of the MPL was not distributed with this file, You can 

9# obtain one at https://mozilla.org/MPL/2.0/. 

10 

11import attr 

12 

13 

14@attr.s(slots=True) 

15class Entry: 

16 key = attr.ib() 

17 value = attr.ib() 

18 score = attr.ib() 

19 pins = attr.ib(default=0) 

20 

21 @property 

22 def sort_key(self): 

23 if self.pins == 0: 

24 # Unpinned entries are sorted by score. 

25 return (0, self.score) 

26 else: 

27 # Pinned entries sort after unpinned ones. Beyond that, we don't 

28 # worry about their relative order. 

29 return (1,) 

30 

31 

32class GenericCache: 

33 """Generic supertype for cache implementations. 

34 

35 Defines a dict-like mapping with a maximum size, where as well as mapping 

36 to a value, each key also maps to a score. When a write would cause the 

37 dict to exceed its maximum size, it first evicts the existing key with 

38 the smallest score, then adds the new key to the map. 

39 

40 A key has the following lifecycle: 

41 

42 1. key is written for the first time, the key is given the score 

43 self.new_entry(key, value) 

44 2. whenever an existing key is read or written, self.on_access(key, value, 

45 score) is called. This returns a new score for the key. 

46 3. When a key is evicted, self.on_evict(key, value, score) is called. 

47 

48 The cache will be in a valid state in all of these cases. 

49 

50 Implementations are expected to implement new_entry and optionally 

51 on_access and on_evict to implement a specific scoring strategy. 

52 """ 

53 

54 __slots__ = ("keys_to_indices", "data", "max_size", "__pinned_entry_count") 

55 

56 def __init__(self, max_size): 

57 self.max_size = max_size 

58 

59 # Implementation: We store a binary heap of Entry objects in self.data, 

60 # with the heap property requiring that a parent's score is <= that of 

61 # its children. keys_to_index then maps keys to their index in the 

62 # heap. We keep these two in sync automatically - the heap is never 

63 # reordered without updating the index. 

64 self.keys_to_indices = {} 

65 self.data = [] 

66 self.__pinned_entry_count = 0 

67 

68 def __len__(self): 

69 assert len(self.keys_to_indices) == len(self.data) 

70 return len(self.data) 

71 

72 def __contains__(self, key): 

73 return key in self.keys_to_indices 

74 

75 def __getitem__(self, key): 

76 i = self.keys_to_indices[key] 

77 result = self.data[i] 

78 self.on_access(result.key, result.value, result.score) 

79 self.__balance(i) 

80 return result.value 

81 

82 def __setitem__(self, key, value): 

83 if self.max_size == 0: 

84 return 

85 evicted = None 

86 try: 

87 i = self.keys_to_indices[key] 

88 except KeyError: 

89 if self.max_size == self.__pinned_entry_count: 

90 raise ValueError( 

91 "Cannot increase size of cache where all keys have been pinned." 

92 ) from None 

93 entry = Entry(key, value, self.new_entry(key, value)) 

94 if len(self.data) >= self.max_size: 

95 evicted = self.data[0] 

96 assert evicted.pins == 0 

97 del self.keys_to_indices[evicted.key] 

98 i = 0 

99 self.data[0] = entry 

100 else: 

101 i = len(self.data) 

102 self.data.append(entry) 

103 self.keys_to_indices[key] = i 

104 else: 

105 entry = self.data[i] 

106 assert entry.key == key 

107 entry.value = value 

108 entry.score = self.on_access(entry.key, entry.value, entry.score) 

109 

110 self.__balance(i) 

111 

112 if evicted is not None: 

113 if self.data[0] is not entry: 

114 assert evicted.score <= self.data[0].score 

115 self.on_evict(evicted.key, evicted.value, evicted.score) 

116 

117 def __iter__(self): 

118 return iter(self.keys_to_indices) 

119 

120 def pin(self, key): 

121 """Mark ``key`` as pinned. That is, it may not be evicted until 

122 ``unpin(key)`` has been called. The same key may be pinned multiple 

123 times and will not be unpinned until the same number of calls to 

124 unpin have been made.""" 

125 i = self.keys_to_indices[key] 

126 entry = self.data[i] 

127 entry.pins += 1 

128 if entry.pins == 1: 

129 self.__pinned_entry_count += 1 

130 assert self.__pinned_entry_count <= self.max_size 

131 self.__balance(i) 

132 

133 def unpin(self, key): 

134 """Undo one previous call to ``pin(key)``. Once all calls are 

135 undone this key may be evicted as normal.""" 

136 i = self.keys_to_indices[key] 

137 entry = self.data[i] 

138 if entry.pins == 0: 

139 raise ValueError(f"Key {key!r} has not been pinned") 

140 entry.pins -= 1 

141 if entry.pins == 0: 

142 self.__pinned_entry_count -= 1 

143 self.__balance(i) 

144 

145 def is_pinned(self, key): 

146 """Returns True if the key is currently pinned.""" 

147 i = self.keys_to_indices[key] 

148 return self.data[i].pins > 0 

149 

150 def clear(self): 

151 """Remove all keys, clearing their pinned status.""" 

152 del self.data[:] 

153 self.keys_to_indices.clear() 

154 self.__pinned_entry_count = 0 

155 

156 def __repr__(self): 

157 return "{" + ", ".join(f"{e.key!r}: {e.value!r}" for e in self.data) + "}" 

158 

159 def new_entry(self, key, value): 

160 """Called when a key is written that does not currently appear in the 

161 map. 

162 

163 Returns the score to associate with the key. 

164 """ 

165 raise NotImplementedError 

166 

167 def on_access(self, key, value, score): 

168 """Called every time a key that is already in the map is read or 

169 written. 

170 

171 Returns the new score for the key. 

172 """ 

173 return score 

174 

175 def on_evict(self, key, value, score): 

176 """Called after a key has been evicted, with the score it had had at 

177 the point of eviction.""" 

178 

179 def check_valid(self): 

180 """Debugging method for use in tests. 

181 

182 Asserts that all of the cache's invariants hold. When everything 

183 is working correctly this should be an expensive no-op. 

184 """ 

185 for i, e in enumerate(self.data): 

186 assert self.keys_to_indices[e.key] == i 

187 for j in [i * 2 + 1, i * 2 + 2]: 

188 if j < len(self.data): 

189 assert e.score <= self.data[j].score, self.data 

190 

191 def __swap(self, i, j): 

192 assert i < j 

193 assert self.data[j].sort_key < self.data[i].sort_key 

194 self.data[i], self.data[j] = self.data[j], self.data[i] 

195 self.keys_to_indices[self.data[i].key] = i 

196 self.keys_to_indices[self.data[j].key] = j 

197 

198 def __balance(self, i): 

199 """When we have made a modification to the heap such that means that 

200 the heap property has been violated locally around i but previously 

201 held for all other indexes (and no other values have been modified), 

202 this fixes the heap so that the heap property holds everywhere.""" 

203 while i > 0: 

204 parent = (i - 1) // 2 

205 if self.__out_of_order(parent, i): 

206 self.__swap(parent, i) 

207 i = parent 

208 else: 

209 # This branch is never taken on versions of Python where dicts 

210 # preserve their insertion order (pypy or cpython >= 3.7) 

211 break # pragma: no cover 

212 while True: 

213 children = [j for j in (2 * i + 1, 2 * i + 2) if j < len(self.data)] 

214 if len(children) == 2: 

215 children.sort(key=lambda j: self.data[j].score) 

216 for j in children: 

217 if self.__out_of_order(i, j): 

218 self.__swap(i, j) 

219 i = j 

220 break 

221 else: 

222 break 

223 

224 def __out_of_order(self, i, j): 

225 """Returns True if the indices i, j are in the wrong order. 

226 

227 i must be the parent of j. 

228 """ 

229 assert i == (j - 1) // 2 

230 return self.data[j].sort_key < self.data[i].sort_key 

231 

232 

233class LRUReusedCache(GenericCache): 

234 """The only concrete implementation of GenericCache we use outside of tests 

235 currently. 

236 

237 Adopts a modified least-frequently used eviction policy: It evicts the key 

238 that has been used least recently, but it will always preferentially evict 

239 keys that have only ever been accessed once. Among keys that have been 

240 accessed more than once, it ignores the number of accesses. 

241 

242 This retains most of the benefits of an LRU cache, but adds an element of 

243 scan-resistance to the process: If we end up scanning through a large 

244 number of keys without reusing them, this does not evict the existing 

245 entries in preference for the new ones. 

246 """ 

247 

248 __slots__ = ("__tick",) 

249 

250 def __init__(self, max_size): 

251 super().__init__(max_size) 

252 self.__tick = 0 

253 

254 def tick(self): 

255 self.__tick += 1 

256 return self.__tick 

257 

258 def new_entry(self, key, value): 

259 return [1, self.tick()] 

260 

261 def on_access(self, key, value, score): 

262 score[0] = 2 

263 score[1] = self.tick() 

264 return score 

265 

266 def pin(self, key): 

267 try: 

268 super().pin(key) 

269 except KeyError: 

270 # The whole point of an LRU cache is that it might drop things for you 

271 assert key not in self.keys_to_indices 

272 

273 def unpin(self, key): 

274 try: 

275 super().unpin(key) 

276 except KeyError: 

277 assert key not in self.keys_to_indices