LCOV - code coverage report
Current view: top level - pebble/bloom - bloom.go (source / functions) Hit Total Coverage
Test: 2023-09-19 08:16Z 57e248f9 - meta test only.lcov Lines: 137 150 91.3 %
Date: 2023-09-19 08:17:47 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package bloom implements Bloom filters.
       6             : package bloom // import "github.com/cockroachdb/pebble/bloom"
       7             : 
       8             : import (
       9             :         "encoding/binary"
      10             :         "fmt"
      11             :         "sync"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/base"
      14             : )
      15             : 
      16             : const (
      17             :         cacheLineSize = 64
      18             :         cacheLineBits = cacheLineSize * 8
      19             : )
      20             : 
      21             : type tableFilter []byte
      22             : 
      23           1 : func (f tableFilter) MayContain(key []byte) bool {
      24           1 :         if len(f) <= 5 {
      25           1 :                 return false
      26           1 :         }
      27           1 :         n := len(f) - 5
      28           1 :         nProbes := f[n]
      29           1 :         nLines := binary.LittleEndian.Uint32(f[n+1:])
      30           1 :         cacheLineBits := 8 * (uint32(n) / nLines)
      31           1 : 
      32           1 :         h := hash(key)
      33           1 :         delta := h>>17 | h<<15
      34           1 :         b := (h % nLines) * cacheLineBits
      35           1 : 
      36           1 :         for j := uint8(0); j < nProbes; j++ {
      37           1 :                 bitPos := b + (h % cacheLineBits)
      38           1 :                 if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
      39           1 :                         return false
      40           1 :                 }
      41           1 :                 h += delta
      42             :         }
      43           1 :         return true
      44             : }
      45             : 
      46           1 : func calculateProbes(bitsPerKey int) uint32 {
      47           1 :         // We intentionally round down to reduce probing cost a little bit
      48           1 :         n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2)
      49           1 :         if n < 1 {
      50           1 :                 n = 1
      51           1 :         }
      52           1 :         if n > 30 {
      53           0 :                 n = 30
      54           0 :         }
      55           1 :         return n
      56             : }
      57             : 
      58             : // extend appends n zero bytes to b. It returns the overall slice (of length
      59             : // n+len(originalB)) and the slice of n trailing zeroes.
      60           1 : func extend(b []byte, n int) (overall, trailer []byte) {
      61           1 :         want := n + len(b)
      62           1 :         if want <= cap(b) {
      63           0 :                 overall = b[:want]
      64           0 :                 trailer = overall[len(b):]
      65           0 :                 for i := range trailer {
      66           0 :                         trailer[i] = 0
      67           0 :                 }
      68           1 :         } else {
      69           1 :                 // Grow the capacity exponentially, with a 1KiB minimum.
      70           1 :                 c := 1024
      71           1 :                 for c < want {
      72           0 :                         c += c / 4
      73           0 :                 }
      74           1 :                 overall = make([]byte, want, c)
      75           1 :                 trailer = overall[len(b):]
      76           1 :                 copy(overall, b)
      77             :         }
      78           1 :         return overall, trailer
      79             : }
      80             : 
      81             : // hash implements a hashing algorithm similar to the Murmur hash.
      82           1 : func hash(b []byte) uint32 {
      83           1 :         const (
      84           1 :                 seed = 0xbc9f1d34
      85           1 :                 m    = 0xc6a4a793
      86           1 :         )
      87           1 :         h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m))
      88           1 :         for ; len(b) >= 4; b = b[4:] {
      89           1 :                 h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
      90           1 :                 h *= m
      91           1 :                 h ^= h >> 16
      92           1 :         }
      93             : 
      94             :         // The code below first casts each byte to a signed 8-bit integer. This is
      95             :         // necessary to match RocksDB's behavior. Note that the `byte` type in Go is
      96             :         // unsigned. What is the difference between casting a signed 8-bit value vs
      97             :         // unsigned 8-bit value into an unsigned 32-bit value?
      98             :         // Sign-extension. Consider the value 250 which has the bit pattern 11111010:
      99             :         //
     100             :         //   uint32(250)        = 00000000000000000000000011111010
     101             :         //   uint32(int8(250))  = 11111111111111111111111111111010
     102             :         //
     103             :         // Note that the original LevelDB code did not explicitly cast to a signed
     104             :         // 8-bit value which left the behavior dependent on whether C characters were
     105             :         // signed or unsigned which is a compiler flag for gcc (-funsigned-char).
     106           1 :         switch len(b) {
     107           1 :         case 3:
     108           1 :                 h += uint32(int8(b[2])) << 16
     109           1 :                 fallthrough
     110           1 :         case 2:
     111           1 :                 h += uint32(int8(b[1])) << 8
     112           1 :                 fallthrough
     113           1 :         case 1:
     114           1 :                 h += uint32(int8(b[0]))
     115           1 :                 h *= m
     116           1 :                 h ^= h >> 24
     117             :         }
     118           1 :         return h
     119             : }
     120             : 
     121             : const hashBlockLen = 16384
     122             : 
     123             : type hashBlock [hashBlockLen]uint32
     124             : 
     125             : var hashBlockPool = sync.Pool{
     126           1 :         New: func() interface{} {
     127           1 :                 return &hashBlock{}
     128           1 :         },
     129             : }
     130             : 
     131             : type tableFilterWriter struct {
     132             :         bitsPerKey int
     133             : 
     134             :         numHashes int
     135             :         // We store the hashes in blocks.
     136             :         blocks   []*hashBlock
     137             :         lastHash uint32
     138             : 
     139             :         // Initial "in-line" storage for the blocks slice (to avoid some small
     140             :         // allocations).
     141             :         blocksBuf [16]*hashBlock
     142             : }
     143             : 
     144           1 : func newTableFilterWriter(bitsPerKey int) *tableFilterWriter {
     145           1 :         w := &tableFilterWriter{
     146           1 :                 bitsPerKey: bitsPerKey,
     147           1 :         }
     148           1 :         w.blocks = w.blocksBuf[:0]
     149           1 :         return w
     150           1 : }
     151             : 
     152             : // AddKey implements the base.FilterWriter interface.
     153           1 : func (w *tableFilterWriter) AddKey(key []byte) {
     154           1 :         h := hash(key)
     155           1 :         if w.numHashes != 0 && h == w.lastHash {
     156           1 :                 return
     157           1 :         }
     158           1 :         ofs := w.numHashes % hashBlockLen
     159           1 :         if ofs == 0 {
     160           1 :                 // Time for a new block.
     161           1 :                 w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock))
     162           1 :         }
     163           1 :         w.blocks[len(w.blocks)-1][ofs] = h
     164           1 :         w.numHashes++
     165           1 :         w.lastHash = h
     166             : }
     167             : 
     168             : // Finish implements the base.FilterWriter interface.
     169           1 : func (w *tableFilterWriter) Finish(buf []byte) []byte {
     170           1 :         // The table filter format matches the RocksDB full-file filter format.
     171           1 :         var nLines int
     172           1 :         if w.numHashes != 0 {
     173           1 :                 nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits)
     174           1 :                 // Make nLines an odd number to make sure more bits are involved when
     175           1 :                 // determining which block.
     176           1 :                 if nLines%2 == 0 {
     177           1 :                         nLines++
     178           1 :                 }
     179             :         }
     180             : 
     181           1 :         nBytes := nLines * cacheLineSize
     182           1 :         // +5: 4 bytes for num-lines, 1 byte for num-probes
     183           1 :         buf, filter := extend(buf, nBytes+5)
     184           1 : 
     185           1 :         if nLines != 0 {
     186           1 :                 nProbes := calculateProbes(w.bitsPerKey)
     187           1 :                 for bIdx, b := range w.blocks {
     188           1 :                         length := hashBlockLen
     189           1 :                         if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 {
     190           1 :                                 length = w.numHashes % hashBlockLen
     191           1 :                         }
     192           1 :                         for _, h := range b[:length] {
     193           1 :                                 delta := h>>17 | h<<15 // rotate right 17 bits
     194           1 :                                 b := (h % uint32(nLines)) * (cacheLineBits)
     195           1 :                                 for i := uint32(0); i < nProbes; i++ {
     196           1 :                                         bitPos := b + (h % cacheLineBits)
     197           1 :                                         filter[bitPos/8] |= (1 << (bitPos % 8))
     198           1 :                                         h += delta
     199           1 :                                 }
     200             :                         }
     201             :                 }
     202           1 :                 filter[nBytes] = byte(nProbes)
     203           1 :                 binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines))
     204             :         }
     205             : 
     206             :         // Release the hash blocks.
     207           1 :         for i, b := range w.blocks {
     208           1 :                 hashBlockPool.Put(b)
     209           1 :                 w.blocks[i] = nil
     210           1 :         }
     211           1 :         w.blocks = w.blocks[:0]
     212           1 :         w.numHashes = 0
     213           1 :         return buf
     214             : }
     215             : 
     216             : // FilterPolicy implements the FilterPolicy interface from the pebble package.
     217             : //
     218             : // The integer value is the approximate number of bits used per key. A good
     219             : // value is 10, which yields a filter with ~ 1% false positive rate.
     220             : type FilterPolicy int
     221             : 
     222             : var _ base.FilterPolicy = FilterPolicy(0)
     223             : 
     224             : // Name implements the pebble.FilterPolicy interface.
     225           1 : func (p FilterPolicy) Name() string {
     226           1 :         // This string looks arbitrary, but its value is written to LevelDB .sst
     227           1 :         // files, and should be this exact value to be compatible with those files
     228           1 :         // and with the C++ LevelDB code.
     229           1 :         return "rocksdb.BuiltinBloomFilter"
     230           1 : }
     231             : 
     232             : // MayContain implements the pebble.FilterPolicy interface.
     233           1 : func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool {
     234           1 :         switch ftype {
     235           1 :         case base.TableFilter:
     236           1 :                 return tableFilter(f).MayContain(key)
     237           0 :         default:
     238           0 :                 panic(fmt.Sprintf("unknown filter type: %v", ftype))
     239             :         }
     240             : }
     241             : 
     242             : // NewWriter implements the pebble.FilterPolicy interface.
     243           1 : func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter {
     244           1 :         switch ftype {
     245           1 :         case base.TableFilter:
     246           1 :                 return newTableFilterWriter(int(p))
     247           0 :         default:
     248           0 :                 panic(fmt.Sprintf("unknown filter type: %v", ftype))
     249             :         }
     250             : }

Generated by: LCOV version 1.14