LCOV - code coverage report
Current view: top level - pebble/bloom - bloom.go (source / functions) Hit Total Coverage
Test: 2023-10-02 08:17Z aa077af6 - tests + meta.lcov Lines: 141 150 94.0 %
Date: 2023-10-02 08:18:44 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package bloom implements Bloom filters.
       6             : package bloom // import "github.com/cockroachdb/pebble/bloom"
       7             : 
       8             : import (
       9             :         "encoding/binary"
      10             :         "fmt"
      11             :         "sync"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/base"
      14             : )
      15             : 
      16             : const (
      17             :         cacheLineSize = 64
      18             :         cacheLineBits = cacheLineSize * 8
      19             : )
      20             : 
      21             : type tableFilter []byte
      22             : 
      23           2 : func (f tableFilter) MayContain(key []byte) bool {
      24           2 :         if len(f) <= 5 {
      25           2 :                 return false
      26           2 :         }
      27           2 :         n := len(f) - 5
      28           2 :         nProbes := f[n]
      29           2 :         nLines := binary.LittleEndian.Uint32(f[n+1:])
      30           2 :         cacheLineBits := 8 * (uint32(n) / nLines)
      31           2 : 
      32           2 :         h := hash(key)
      33           2 :         delta := h>>17 | h<<15
      34           2 :         b := (h % nLines) * cacheLineBits
      35           2 : 
      36           2 :         for j := uint8(0); j < nProbes; j++ {
      37           2 :                 bitPos := b + (h % cacheLineBits)
      38           2 :                 if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
      39           2 :                         return false
      40           2 :                 }
      41           2 :                 h += delta
      42             :         }
      43           2 :         return true
      44             : }
      45             : 
      46           2 : func calculateProbes(bitsPerKey int) uint32 {
      47           2 :         // We intentionally round down to reduce probing cost a little bit
      48           2 :         n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2)
      49           2 :         if n < 1 {
      50           2 :                 n = 1
      51           2 :         }
      52           2 :         if n > 30 {
      53           1 :                 n = 30
      54           1 :         }
      55           2 :         return n
      56             : }
      57             : 
      58             : // extend appends n zero bytes to b. It returns the overall slice (of length
      59             : // n+len(originalB)) and the slice of n trailing zeroes.
      60           2 : func extend(b []byte, n int) (overall, trailer []byte) {
      61           2 :         want := n + len(b)
      62           2 :         if want <= cap(b) {
      63           0 :                 overall = b[:want]
      64           0 :                 trailer = overall[len(b):]
      65           0 :                 for i := range trailer {
      66           0 :                         trailer[i] = 0
      67           0 :                 }
      68           2 :         } else {
      69           2 :                 // Grow the capacity exponentially, with a 1KiB minimum.
      70           2 :                 c := 1024
      71           2 :                 for c < want {
      72           1 :                         c += c / 4
      73           1 :                 }
      74           2 :                 overall = make([]byte, want, c)
      75           2 :                 trailer = overall[len(b):]
      76           2 :                 copy(overall, b)
      77             :         }
      78           2 :         return overall, trailer
      79             : }
      80             : 
      81             : // hash implements a hashing algorithm similar to the Murmur hash.
      82           2 : func hash(b []byte) uint32 {
      83           2 :         const (
      84           2 :                 seed = 0xbc9f1d34
      85           2 :                 m    = 0xc6a4a793
      86           2 :         )
      87           2 :         h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m))
      88           2 :         for ; len(b) >= 4; b = b[4:] {
      89           2 :                 h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
      90           2 :                 h *= m
      91           2 :                 h ^= h >> 16
      92           2 :         }
      93             : 
      94             :         // The code below first casts each byte to a signed 8-bit integer. This is
      95             :         // necessary to match RocksDB's behavior. Note that the `byte` type in Go is
      96             :         // unsigned. What is the difference between casting a signed 8-bit value vs
      97             :         // unsigned 8-bit value into an unsigned 32-bit value?
      98             :         // Sign-extension. Consider the value 250 which has the bit pattern 11111010:
      99             :         //
     100             :         //   uint32(250)        = 00000000000000000000000011111010
     101             :         //   uint32(int8(250))  = 11111111111111111111111111111010
     102             :         //
     103             :         // Note that the original LevelDB code did not explicitly cast to a signed
     104             :         // 8-bit value which left the behavior dependent on whether C characters were
     105             :         // signed or unsigned which is a compiler flag for gcc (-funsigned-char).
     106           2 :         switch len(b) {
     107           2 :         case 3:
     108           2 :                 h += uint32(int8(b[2])) << 16
     109           2 :                 fallthrough
     110           2 :         case 2:
     111           2 :                 h += uint32(int8(b[1])) << 8
     112           2 :                 fallthrough
     113           2 :         case 1:
     114           2 :                 h += uint32(int8(b[0]))
     115           2 :                 h *= m
     116           2 :                 h ^= h >> 24
     117             :         }
     118           2 :         return h
     119             : }
     120             : 
     121             : const hashBlockLen = 16384
     122             : 
     123             : type hashBlock [hashBlockLen]uint32
     124             : 
     125             : var hashBlockPool = sync.Pool{
     126           2 :         New: func() interface{} {
     127           2 :                 return &hashBlock{}
     128           2 :         },
     129             : }
     130             : 
     131             : type tableFilterWriter struct {
     132             :         bitsPerKey int
     133             : 
     134             :         numHashes int
     135             :         // We store the hashes in blocks.
     136             :         blocks   []*hashBlock
     137             :         lastHash uint32
     138             : 
     139             :         // Initial "in-line" storage for the blocks slice (to avoid some small
     140             :         // allocations).
     141             :         blocksBuf [16]*hashBlock
     142             : }
     143             : 
     144           2 : func newTableFilterWriter(bitsPerKey int) *tableFilterWriter {
     145           2 :         w := &tableFilterWriter{
     146           2 :                 bitsPerKey: bitsPerKey,
     147           2 :         }
     148           2 :         w.blocks = w.blocksBuf[:0]
     149           2 :         return w
     150           2 : }
     151             : 
     152             : // AddKey implements the base.FilterWriter interface.
     153           2 : func (w *tableFilterWriter) AddKey(key []byte) {
     154           2 :         h := hash(key)
     155           2 :         if w.numHashes != 0 && h == w.lastHash {
     156           2 :                 return
     157           2 :         }
     158           2 :         ofs := w.numHashes % hashBlockLen
     159           2 :         if ofs == 0 {
     160           2 :                 // Time for a new block.
     161           2 :                 w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock))
     162           2 :         }
     163           2 :         w.blocks[len(w.blocks)-1][ofs] = h
     164           2 :         w.numHashes++
     165           2 :         w.lastHash = h
     166             : }
     167             : 
     168             : // Finish implements the base.FilterWriter interface.
     169           2 : func (w *tableFilterWriter) Finish(buf []byte) []byte {
     170           2 :         // The table filter format matches the RocksDB full-file filter format.
     171           2 :         var nLines int
     172           2 :         if w.numHashes != 0 {
     173           2 :                 nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits)
     174           2 :                 // Make nLines an odd number to make sure more bits are involved when
     175           2 :                 // determining which block.
     176           2 :                 if nLines%2 == 0 {
     177           2 :                         nLines++
     178           2 :                 }
     179             :         }
     180             : 
     181           2 :         nBytes := nLines * cacheLineSize
     182           2 :         // +5: 4 bytes for num-lines, 1 byte for num-probes
     183           2 :         buf, filter := extend(buf, nBytes+5)
     184           2 : 
     185           2 :         if nLines != 0 {
     186           2 :                 nProbes := calculateProbes(w.bitsPerKey)
     187           2 :                 for bIdx, b := range w.blocks {
     188           2 :                         length := hashBlockLen
     189           2 :                         if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 {
     190           2 :                                 length = w.numHashes % hashBlockLen
     191           2 :                         }
     192           2 :                         for _, h := range b[:length] {
     193           2 :                                 delta := h>>17 | h<<15 // rotate right 17 bits
     194           2 :                                 b := (h % uint32(nLines)) * (cacheLineBits)
     195           2 :                                 for i := uint32(0); i < nProbes; i++ {
     196           2 :                                         bitPos := b + (h % cacheLineBits)
     197           2 :                                         filter[bitPos/8] |= (1 << (bitPos % 8))
     198           2 :                                         h += delta
     199           2 :                                 }
     200             :                         }
     201             :                 }
     202           2 :                 filter[nBytes] = byte(nProbes)
     203           2 :                 binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines))
     204             :         }
     205             : 
     206             :         // Release the hash blocks.
     207           2 :         for i, b := range w.blocks {
     208           2 :                 hashBlockPool.Put(b)
     209           2 :                 w.blocks[i] = nil
     210           2 :         }
     211           2 :         w.blocks = w.blocks[:0]
     212           2 :         w.numHashes = 0
     213           2 :         return buf
     214             : }
     215             : 
     216             : // FilterPolicy implements the FilterPolicy interface from the pebble package.
     217             : //
     218             : // The integer value is the approximate number of bits used per key. A good
     219             : // value is 10, which yields a filter with ~ 1% false positive rate.
     220             : type FilterPolicy int
     221             : 
     222             : var _ base.FilterPolicy = FilterPolicy(0)
     223             : 
     224             : // Name implements the pebble.FilterPolicy interface.
     225           2 : func (p FilterPolicy) Name() string {
     226           2 :         // This string looks arbitrary, but its value is written to LevelDB .sst
     227           2 :         // files, and should be this exact value to be compatible with those files
     228           2 :         // and with the C++ LevelDB code.
     229           2 :         return "rocksdb.BuiltinBloomFilter"
     230           2 : }
     231             : 
     232             : // MayContain implements the pebble.FilterPolicy interface.
     233           2 : func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool {
     234           2 :         switch ftype {
     235           2 :         case base.TableFilter:
     236           2 :                 return tableFilter(f).MayContain(key)
     237           0 :         default:
     238           0 :                 panic(fmt.Sprintf("unknown filter type: %v", ftype))
     239             :         }
     240             : }
     241             : 
     242             : // NewWriter implements the pebble.FilterPolicy interface.
     243           2 : func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter {
     244           2 :         switch ftype {
     245           2 :         case base.TableFilter:
     246           2 :                 return newTableFilterWriter(int(p))
     247           0 :         default:
     248           0 :                 panic(fmt.Sprintf("unknown filter type: %v", ftype))
     249             :         }
     250             : }

Generated by: LCOV version 1.14