Line data Source code
1 : // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2 : // of this source code is governed by a BSD-style license that can be found in
3 : // the LICENSE file.
4 :
5 : package cache
6 :
7 : import (
8 : "fmt"
9 : "math/bits"
10 : "os"
11 : "runtime/debug"
12 : "strings"
13 : "time"
14 : "unsafe"
15 :
16 : "github.com/cockroachdb/pebble/internal/invariants"
17 : "github.com/cockroachdb/pebble/internal/manual"
18 : )
19 :
20 : var hashSeed = uint64(time.Now().UnixNano())
21 :
22 : // Fibonacci hash: https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
23 1 : func robinHoodHash(k key, shift uint32) uint32 {
24 1 : const m = 11400714819323198485
25 1 : h := hashSeed
26 1 : h ^= k.id * m
27 1 : h ^= uint64(k.fileNum.FileNum()) * m
28 1 : h ^= k.offset * m
29 1 : return uint32(h >> shift)
30 1 : }
31 :
32 : type robinHoodEntry struct {
33 : key key
34 : // Note that value may point to a Go allocated object (if the "invariants"
35 : // build tag was specified), even though the memory for the entry itself is
36 : // manually managed. This is technically a volation of the Cgo pointer rules:
37 : //
38 : // https://golang.org/cmd/cgo/#hdr-Passing_pointers
39 : //
40 : // Specifically, Go pointers should not be stored in C allocated memory. The
41 : // reason for this rule is that the Go GC will not look at C allocated memory
42 : // to find pointers to Go objects. If the only reference to a Go object is
43 : // stored in C allocated memory, the object will be reclaimed. What makes
44 : // this "safe" is that the Cache guarantees that there are other pointers to
45 : // the entry and shard which will keep them alive. In particular, every Go
46 : // allocated entry in the cache is referenced by the shard.entries map. And
47 : // every shard is referenced by the Cache.shards map.
48 : value *entry
49 : // The distance the entry is from its desired position.
50 : dist uint32
51 : }
52 :
53 : type robinHoodEntries struct {
54 : ptr unsafe.Pointer
55 : len uint32
56 : }
57 :
58 1 : func newRobinHoodEntries(n uint32) robinHoodEntries {
59 1 : size := uintptr(n) * unsafe.Sizeof(robinHoodEntry{})
60 1 : return robinHoodEntries{
61 1 : ptr: unsafe.Pointer(&(manual.New(int(size)))[0]),
62 1 : len: n,
63 1 : }
64 1 : }
65 :
66 1 : func (e robinHoodEntries) at(i uint32) *robinHoodEntry {
67 1 : return (*robinHoodEntry)(unsafe.Pointer(uintptr(e.ptr) +
68 1 : uintptr(i)*unsafe.Sizeof(robinHoodEntry{})))
69 1 : }
70 :
71 1 : func (e robinHoodEntries) free() {
72 1 : size := uintptr(e.len) * unsafe.Sizeof(robinHoodEntry{})
73 1 : buf := (*[manual.MaxArrayLen]byte)(e.ptr)[:size:size]
74 1 : manual.Free(buf)
75 1 : }
76 :
77 : // robinHoodMap is an implementation of Robin Hood hashing. Robin Hood hashing
78 : // is an open-address hash table using linear probing. The twist is that the
79 : // linear probe distance is reduced by moving existing entries when inserting
80 : // and deleting. This is accomplished by keeping track of how far an entry is
81 : // from its "desired" slot (hash of key modulo number of slots). During
82 : // insertion, if the new entry being inserted is farther from its desired slot
83 : // than the target entry, we swap the target and new entry. This effectively
84 : // steals from the "rich" target entry and gives to the "poor" new entry (thus
85 : // the origin of the name).
86 : //
87 : // An extension over the base Robin Hood hashing idea comes from
88 : // https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/. A cap
89 : // is placed on the max distance an entry can be from its desired slot. When
90 : // this threshold is reached during insertion, the size of the table is doubled
91 : // and insertion is restarted. Additionally, the entries slice is given "max
92 : // dist" extra entries on the end. The very last entry in the entries slice is
93 : // never used and acts as a sentinel which terminates loops. The previous
94 : // maxDist-1 entries act as the extra entries. For example, if the size of the
95 : // table is 2, maxDist is computed as 4 and the actual size of the entry slice
96 : // is 6.
97 : //
98 : // +---+---+---+---+---+---+
99 : // | 0 | 1 | 2 | 3 | 4 | 5 |
100 : // +---+---+---+---+---+---+
101 : // ^
102 : // size
103 : //
104 : // In this scenario, the target entry for a key will always be in the range
105 : // [0,1]. Valid entries may reside in the range [0,4] due to the linear probing
106 : // of up to maxDist entries. The entry at index 5 will never contain a value,
107 : // and instead acts as a sentinel (its distance is always 0). The max distance
108 : // threshold is set to log2(num-entries). This ensures that retrieval is O(log
109 : // N), though note that N is the number of total entries, not the count of
110 : // valid entries.
111 : //
112 : // Deletion is implemented via the backward shift delete mechanism instead of
113 : // tombstones. This preserves the performance of the table in the presence of
114 : // deletions. See
115 : // http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion
116 : // for details.
117 : type robinHoodMap struct {
118 : entries robinHoodEntries
119 : size uint32
120 : shift uint32
121 : count uint32
122 : maxDist uint32
123 : }
124 :
125 1 : func maxDistForSize(size uint32) uint32 {
126 1 : desired := uint32(bits.Len32(size))
127 1 : if desired < 4 {
128 1 : desired = 4
129 1 : }
130 1 : return desired
131 : }
132 :
133 1 : func newRobinHoodMap(initialCapacity int) *robinHoodMap {
134 1 : m := &robinHoodMap{}
135 1 : m.init(initialCapacity)
136 1 :
137 1 : // Note: this is a no-op if invariants are disabled or race is enabled.
138 1 : invariants.SetFinalizer(m, func(obj interface{}) {
139 0 : m := obj.(*robinHoodMap)
140 0 : if m.entries.ptr != nil {
141 0 : fmt.Fprintf(os.Stderr, "%p: robin-hood map not freed\n", m)
142 0 : os.Exit(1)
143 0 : }
144 : })
145 1 : return m
146 : }
147 :
148 1 : func (m *robinHoodMap) init(initialCapacity int) {
149 1 : if initialCapacity < 1 {
150 1 : initialCapacity = 1
151 1 : }
152 1 : targetSize := 1 << (uint(bits.Len(uint(2*initialCapacity-1))) - 1)
153 1 : m.rehash(uint32(targetSize))
154 : }
155 :
156 1 : func (m *robinHoodMap) free() {
157 1 : if m.entries.ptr != nil {
158 1 : m.entries.free()
159 1 : m.entries.ptr = nil
160 1 : }
161 : }
162 :
163 1 : func (m *robinHoodMap) rehash(size uint32) {
164 1 : oldEntries := m.entries
165 1 :
166 1 : m.size = size
167 1 : m.shift = uint32(64 - bits.Len32(m.size-1))
168 1 : m.maxDist = maxDistForSize(size)
169 1 : m.entries = newRobinHoodEntries(size + m.maxDist)
170 1 : m.count = 0
171 1 :
172 1 : for i := uint32(0); i < oldEntries.len; i++ {
173 1 : e := oldEntries.at(i)
174 1 : if e.value != nil {
175 1 : m.Put(e.key, e.value)
176 1 : }
177 : }
178 :
179 1 : if oldEntries.ptr != nil {
180 1 : oldEntries.free()
181 1 : }
182 : }
183 :
184 : // Find an entry containing the specified value. This is intended to be used
185 : // from debug and test code.
186 1 : func (m *robinHoodMap) findByValue(v *entry) *robinHoodEntry {
187 1 : for i := uint32(0); i < m.entries.len; i++ {
188 1 : e := m.entries.at(i)
189 1 : if e.value == v {
190 0 : return e
191 0 : }
192 : }
193 1 : return nil
194 : }
195 :
196 1 : func (m *robinHoodMap) Count() int {
197 1 : return int(m.count)
198 1 : }
199 :
200 1 : func (m *robinHoodMap) Put(k key, v *entry) {
201 1 : maybeExists := true
202 1 : n := robinHoodEntry{key: k, value: v, dist: 0}
203 1 : for i := robinHoodHash(k, m.shift); ; i++ {
204 1 : e := m.entries.at(i)
205 1 : if maybeExists && k == e.key {
206 1 : // Entry already exists: overwrite.
207 1 : e.value = n.value
208 1 : m.checkEntry(i)
209 1 : return
210 1 : }
211 :
212 1 : if e.value == nil {
213 1 : // Found an empty entry: insert here.
214 1 : *e = n
215 1 : m.count++
216 1 : m.checkEntry(i)
217 1 : return
218 1 : }
219 :
220 1 : if e.dist < n.dist {
221 1 : // Swap the new entry with the current entry because the current is
222 1 : // rich. We then continue to loop, looking for a new location for the
223 1 : // current entry. Note that this is also the not-found condition for
224 1 : // retrieval, which means that "k" is not present in the map. See Get().
225 1 : n, *e = *e, n
226 1 : m.checkEntry(i)
227 1 : maybeExists = false
228 1 : }
229 :
230 : // The new entry gradually moves away from its ideal position.
231 1 : n.dist++
232 1 :
233 1 : // If we've reached the max distance threshold, grow the table and restart
234 1 : // the insertion.
235 1 : if n.dist == m.maxDist {
236 1 : m.rehash(2 * m.size)
237 1 : i = robinHoodHash(n.key, m.shift) - 1
238 1 : n.dist = 0
239 1 : maybeExists = false
240 1 : }
241 : }
242 : }
243 :
244 1 : func (m *robinHoodMap) Get(k key) *entry {
245 1 : var dist uint32
246 1 : for i := robinHoodHash(k, m.shift); ; i++ {
247 1 : e := m.entries.at(i)
248 1 : if k == e.key {
249 1 : // Found.
250 1 : return e.value
251 1 : }
252 1 : if e.dist < dist {
253 1 : // Not found.
254 1 : return nil
255 1 : }
256 1 : dist++
257 : }
258 : }
259 :
260 1 : func (m *robinHoodMap) Delete(k key) {
261 1 : var dist uint32
262 1 : for i := robinHoodHash(k, m.shift); ; i++ {
263 1 : e := m.entries.at(i)
264 1 : if k == e.key {
265 1 : m.checkEntry(i)
266 1 : // We found the entry to delete. Shift the following entries backwards
267 1 : // until the next empty value or entry with a zero distance. Note that
268 1 : // empty values are guaranteed to have "dist == 0".
269 1 : m.count--
270 1 : for j := i + 1; ; j++ {
271 1 : t := m.entries.at(j)
272 1 : if t.dist == 0 {
273 1 : *e = robinHoodEntry{}
274 1 : return
275 1 : }
276 1 : e.key = t.key
277 1 : e.value = t.value
278 1 : e.dist = t.dist - 1
279 1 : e = t
280 1 : m.checkEntry(j)
281 : }
282 : }
283 1 : if dist > e.dist {
284 0 : // Not found.
285 0 : return
286 0 : }
287 1 : dist++
288 : }
289 : }
290 :
291 1 : func (m *robinHoodMap) checkEntry(i uint32) {
292 1 : if invariants.Enabled {
293 1 : e := m.entries.at(i)
294 1 : if e.value != nil {
295 1 : pos := robinHoodHash(e.key, m.shift)
296 1 : if (uint32(i) - pos) != e.dist {
297 0 : fmt.Fprintf(os.Stderr, "%d: invalid dist=%d, expected %d: %s\n%s",
298 0 : i, e.dist, uint32(i)-pos, e.key, debug.Stack())
299 0 : os.Exit(1)
300 0 : }
301 1 : if e.dist > m.maxDist {
302 0 : fmt.Fprintf(os.Stderr, "%d: invalid dist=%d > maxDist=%d: %s\n%s",
303 0 : i, e.dist, m.maxDist, e.key, debug.Stack())
304 0 : os.Exit(1)
305 0 : }
306 : }
307 : }
308 : }
309 :
310 0 : func (m *robinHoodMap) String() string {
311 0 : var buf strings.Builder
312 0 : fmt.Fprintf(&buf, "count: %d\n", m.count)
313 0 : for i := uint32(0); i < m.entries.len; i++ {
314 0 : e := m.entries.at(i)
315 0 : if e.value != nil {
316 0 : fmt.Fprintf(&buf, "%d: [%s,%p,%d]\n", i, e.key, e.value, e.dist)
317 0 : }
318 : }
319 0 : return buf.String()
320 : }
|