LCOV - code coverage report
Current view: top level - pebble/internal/cache - robin_hood.go (source / functions) Hit Total Coverage
Test: 2023-11-26 08:15Z 4cbc6446 - meta test only.lcov Lines: 142 180 78.9 %
Date: 2023-11-26 08:16:44 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : package cache
       6             : 
       7             : import (
       8             :         "fmt"
       9             :         "math/bits"
      10             :         "os"
      11             :         "runtime/debug"
      12             :         "strings"
      13             :         "time"
      14             :         "unsafe"
      15             : 
      16             :         "github.com/cockroachdb/pebble/internal/invariants"
      17             :         "github.com/cockroachdb/pebble/internal/manual"
      18             : )
      19             : 
      20             : var hashSeed = uint64(time.Now().UnixNano())
      21             : 
      22             : // Fibonacci hash: https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
      23           1 : func robinHoodHash(k key, shift uint32) uint32 {
      24           1 :         const m = 11400714819323198485
      25           1 :         h := hashSeed
      26           1 :         h ^= k.id * m
      27           1 :         h ^= uint64(k.fileNum.FileNum()) * m
      28           1 :         h ^= k.offset * m
      29           1 :         return uint32(h >> shift)
      30           1 : }
      31             : 
      32             : type robinHoodEntry struct {
      33             :         key key
      34             :         // Note that value may point to a Go allocated object (if the "invariants"
      35             :         // build tag was specified), even though the memory for the entry itself is
      36             :         // manually managed. This is technically a volation of the Cgo pointer rules:
      37             :         //
      38             :         //   https://golang.org/cmd/cgo/#hdr-Passing_pointers
      39             :         //
      40             :         // Specifically, Go pointers should not be stored in C allocated memory. The
      41             :         // reason for this rule is that the Go GC will not look at C allocated memory
      42             :         // to find pointers to Go objects. If the only reference to a Go object is
      43             :         // stored in C allocated memory, the object will be reclaimed. What makes
      44             :         // this "safe" is that the Cache guarantees that there are other pointers to
      45             :         // the entry and shard which will keep them alive. In particular, every Go
      46             :         // allocated entry in the cache is referenced by the shard.entries map. And
      47             :         // every shard is referenced by the Cache.shards map.
      48             :         value *entry
      49             :         // The distance the entry is from its desired position.
      50             :         dist uint32
      51             : }
      52             : 
      53             : type robinHoodEntries struct {
      54             :         ptr unsafe.Pointer
      55             :         len uint32
      56             : }
      57             : 
      58           1 : func newRobinHoodEntries(n uint32) robinHoodEntries {
      59           1 :         size := uintptr(n) * unsafe.Sizeof(robinHoodEntry{})
      60           1 :         return robinHoodEntries{
      61           1 :                 ptr: unsafe.Pointer(&(manual.New(int(size)))[0]),
      62           1 :                 len: n,
      63           1 :         }
      64           1 : }
      65             : 
      66           1 : func (e robinHoodEntries) at(i uint32) *robinHoodEntry {
      67           1 :         return (*robinHoodEntry)(unsafe.Pointer(uintptr(e.ptr) +
      68           1 :                 uintptr(i)*unsafe.Sizeof(robinHoodEntry{})))
      69           1 : }
      70             : 
      71           1 : func (e robinHoodEntries) free() {
      72           1 :         size := uintptr(e.len) * unsafe.Sizeof(robinHoodEntry{})
      73           1 :         buf := (*[manual.MaxArrayLen]byte)(e.ptr)[:size:size]
      74           1 :         manual.Free(buf)
      75           1 : }
      76             : 
      77             : // robinHoodMap is an implementation of Robin Hood hashing. Robin Hood hashing
      78             : // is an open-address hash table using linear probing. The twist is that the
      79             : // linear probe distance is reduced by moving existing entries when inserting
      80             : // and deleting. This is accomplished by keeping track of how far an entry is
      81             : // from its "desired" slot (hash of key modulo number of slots). During
      82             : // insertion, if the new entry being inserted is farther from its desired slot
      83             : // than the target entry, we swap the target and new entry. This effectively
      84             : // steals from the "rich" target entry and gives to the "poor" new entry (thus
      85             : // the origin of the name).
      86             : //
      87             : // An extension over the base Robin Hood hashing idea comes from
      88             : // https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/. A cap
      89             : // is placed on the max distance an entry can be from its desired slot. When
      90             : // this threshold is reached during insertion, the size of the table is doubled
      91             : // and insertion is restarted. Additionally, the entries slice is given "max
      92             : // dist" extra entries on the end. The very last entry in the entries slice is
      93             : // never used and acts as a sentinel which terminates loops. The previous
      94             : // maxDist-1 entries act as the extra entries. For example, if the size of the
      95             : // table is 2, maxDist is computed as 4 and the actual size of the entry slice
      96             : // is 6.
      97             : //
      98             : //      +---+---+---+---+---+---+
      99             : //      | 0 | 1 | 2 | 3 | 4 | 5 |
     100             : //      +---+---+---+---+---+---+
     101             : //              ^
     102             : //             size
     103             : //
     104             : // In this scenario, the target entry for a key will always be in the range
     105             : // [0,1]. Valid entries may reside in the range [0,4] due to the linear probing
     106             : // of up to maxDist entries. The entry at index 5 will never contain a value,
     107             : // and instead acts as a sentinel (its distance is always 0). The max distance
     108             : // threshold is set to log2(num-entries). This ensures that retrieval is O(log
     109             : // N), though note that N is the number of total entries, not the count of
     110             : // valid entries.
     111             : //
     112             : // Deletion is implemented via the backward shift delete mechanism instead of
     113             : // tombstones. This preserves the performance of the table in the presence of
     114             : // deletions. See
     115             : // http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion
     116             : // for details.
     117             : type robinHoodMap struct {
     118             :         entries robinHoodEntries
     119             :         size    uint32
     120             :         shift   uint32
     121             :         count   uint32
     122             :         maxDist uint32
     123             : }
     124             : 
     125           1 : func maxDistForSize(size uint32) uint32 {
     126           1 :         desired := uint32(bits.Len32(size))
     127           1 :         if desired < 4 {
     128           0 :                 desired = 4
     129           0 :         }
     130           1 :         return desired
     131             : }
     132             : 
     133           0 : func newRobinHoodMap(initialCapacity int) *robinHoodMap {
     134           0 :         m := &robinHoodMap{}
     135           0 :         m.init(initialCapacity)
     136           0 : 
     137           0 :         // Note: this is a no-op if invariants are disabled or race is enabled.
     138           0 :         invariants.SetFinalizer(m, func(obj interface{}) {
     139           0 :                 m := obj.(*robinHoodMap)
     140           0 :                 if m.entries.ptr != nil {
     141           0 :                         fmt.Fprintf(os.Stderr, "%p: robin-hood map not freed\n", m)
     142           0 :                         os.Exit(1)
     143           0 :                 }
     144             :         })
     145           0 :         return m
     146             : }
     147             : 
     148           1 : func (m *robinHoodMap) init(initialCapacity int) {
     149           1 :         if initialCapacity < 1 {
     150           0 :                 initialCapacity = 1
     151           0 :         }
     152           1 :         targetSize := 1 << (uint(bits.Len(uint(2*initialCapacity-1))) - 1)
     153           1 :         m.rehash(uint32(targetSize))
     154             : }
     155             : 
     156           1 : func (m *robinHoodMap) free() {
     157           1 :         if m.entries.ptr != nil {
     158           1 :                 m.entries.free()
     159           1 :                 m.entries.ptr = nil
     160           1 :         }
     161             : }
     162             : 
     163           1 : func (m *robinHoodMap) rehash(size uint32) {
     164           1 :         oldEntries := m.entries
     165           1 : 
     166           1 :         m.size = size
     167           1 :         m.shift = uint32(64 - bits.Len32(m.size-1))
     168           1 :         m.maxDist = maxDistForSize(size)
     169           1 :         m.entries = newRobinHoodEntries(size + m.maxDist)
     170           1 :         m.count = 0
     171           1 : 
     172           1 :         for i := uint32(0); i < oldEntries.len; i++ {
     173           1 :                 e := oldEntries.at(i)
     174           1 :                 if e.value != nil {
     175           1 :                         m.Put(e.key, e.value)
     176           1 :                 }
     177             :         }
     178             : 
     179           1 :         if oldEntries.ptr != nil {
     180           1 :                 oldEntries.free()
     181           1 :         }
     182             : }
     183             : 
     184             : // Find an entry containing the specified value. This is intended to be used
     185             : // from debug and test code.
     186           1 : func (m *robinHoodMap) findByValue(v *entry) *robinHoodEntry {
     187           1 :         for i := uint32(0); i < m.entries.len; i++ {
     188           1 :                 e := m.entries.at(i)
     189           1 :                 if e.value == v {
     190           0 :                         return e
     191           0 :                 }
     192             :         }
     193           1 :         return nil
     194             : }
     195             : 
     196           1 : func (m *robinHoodMap) Count() int {
     197           1 :         return int(m.count)
     198           1 : }
     199             : 
     200           1 : func (m *robinHoodMap) Put(k key, v *entry) {
     201           1 :         maybeExists := true
     202           1 :         n := robinHoodEntry{key: k, value: v, dist: 0}
     203           1 :         for i := robinHoodHash(k, m.shift); ; i++ {
     204           1 :                 e := m.entries.at(i)
     205           1 :                 if maybeExists && k == e.key {
     206           1 :                         // Entry already exists: overwrite.
     207           1 :                         e.value = n.value
     208           1 :                         m.checkEntry(i)
     209           1 :                         return
     210           1 :                 }
     211             : 
     212           1 :                 if e.value == nil {
     213           1 :                         // Found an empty entry: insert here.
     214           1 :                         *e = n
     215           1 :                         m.count++
     216           1 :                         m.checkEntry(i)
     217           1 :                         return
     218           1 :                 }
     219             : 
     220           1 :                 if e.dist < n.dist {
     221           1 :                         // Swap the new entry with the current entry because the current is
     222           1 :                         // rich. We then continue to loop, looking for a new location for the
     223           1 :                         // current entry. Note that this is also the not-found condition for
     224           1 :                         // retrieval, which means that "k" is not present in the map. See Get().
     225           1 :                         n, *e = *e, n
     226           1 :                         m.checkEntry(i)
     227           1 :                         maybeExists = false
     228           1 :                 }
     229             : 
     230             :                 // The new entry gradually moves away from its ideal position.
     231           1 :                 n.dist++
     232           1 : 
     233           1 :                 // If we've reached the max distance threshold, grow the table and restart
     234           1 :                 // the insertion.
     235           1 :                 if n.dist == m.maxDist {
     236           1 :                         m.rehash(2 * m.size)
     237           1 :                         i = robinHoodHash(n.key, m.shift) - 1
     238           1 :                         n.dist = 0
     239           1 :                         maybeExists = false
     240           1 :                 }
     241             :         }
     242             : }
     243             : 
     244           1 : func (m *robinHoodMap) Get(k key) *entry {
     245           1 :         var dist uint32
     246           1 :         for i := robinHoodHash(k, m.shift); ; i++ {
     247           1 :                 e := m.entries.at(i)
     248           1 :                 if k == e.key {
     249           1 :                         // Found.
     250           1 :                         return e.value
     251           1 :                 }
     252           1 :                 if e.dist < dist {
     253           1 :                         // Not found.
     254           1 :                         return nil
     255           1 :                 }
     256           1 :                 dist++
     257             :         }
     258             : }
     259             : 
     260           1 : func (m *robinHoodMap) Delete(k key) {
     261           1 :         var dist uint32
     262           1 :         for i := robinHoodHash(k, m.shift); ; i++ {
     263           1 :                 e := m.entries.at(i)
     264           1 :                 if k == e.key {
     265           1 :                         m.checkEntry(i)
     266           1 :                         // We found the entry to delete. Shift the following entries backwards
     267           1 :                         // until the next empty value or entry with a zero distance. Note that
     268           1 :                         // empty values are guaranteed to have "dist == 0".
     269           1 :                         m.count--
     270           1 :                         for j := i + 1; ; j++ {
     271           1 :                                 t := m.entries.at(j)
     272           1 :                                 if t.dist == 0 {
     273           1 :                                         *e = robinHoodEntry{}
     274           1 :                                         return
     275           1 :                                 }
     276           1 :                                 e.key = t.key
     277           1 :                                 e.value = t.value
     278           1 :                                 e.dist = t.dist - 1
     279           1 :                                 e = t
     280           1 :                                 m.checkEntry(j)
     281             :                         }
     282             :                 }
     283           1 :                 if dist > e.dist {
     284           0 :                         // Not found.
     285           0 :                         return
     286           0 :                 }
     287           1 :                 dist++
     288             :         }
     289             : }
     290             : 
     291           1 : func (m *robinHoodMap) checkEntry(i uint32) {
     292           1 :         if invariants.Enabled {
     293           1 :                 e := m.entries.at(i)
     294           1 :                 if e.value != nil {
     295           1 :                         pos := robinHoodHash(e.key, m.shift)
     296           1 :                         if (uint32(i) - pos) != e.dist {
     297           0 :                                 fmt.Fprintf(os.Stderr, "%d: invalid dist=%d, expected %d: %s\n%s",
     298           0 :                                         i, e.dist, uint32(i)-pos, e.key, debug.Stack())
     299           0 :                                 os.Exit(1)
     300           0 :                         }
     301           1 :                         if e.dist > m.maxDist {
     302           0 :                                 fmt.Fprintf(os.Stderr, "%d: invalid dist=%d > maxDist=%d: %s\n%s",
     303           0 :                                         i, e.dist, m.maxDist, e.key, debug.Stack())
     304           0 :                                 os.Exit(1)
     305           0 :                         }
     306             :                 }
     307             :         }
     308             : }
     309             : 
     310           0 : func (m *robinHoodMap) String() string {
     311           0 :         var buf strings.Builder
     312           0 :         fmt.Fprintf(&buf, "count: %d\n", m.count)
     313           0 :         for i := uint32(0); i < m.entries.len; i++ {
     314           0 :                 e := m.entries.at(i)
     315           0 :                 if e.value != nil {
     316           0 :                         fmt.Fprintf(&buf, "%d: [%s,%p,%d]\n", i, e.key, e.value, e.dist)
     317           0 :                 }
     318             :         }
     319           0 :         return buf.String()
     320             : }

Generated by: LCOV version 1.14