LCOV - code coverage report
Current view: top level - pebble/bloom - bloom.go (source / functions) Hit Total Coverage
Test: 2023-12-14 08:16Z 288bf0fb - meta test only.lcov Lines: 137 148 92.6 %
Date: 2023-12-14 08:17:01 Functions: 0 0 -

          Line data    Source code
       1             : // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
       2             : // of this source code is governed by a BSD-style license that can be found in
       3             : // the LICENSE file.
       4             : 
       5             : // Package bloom implements Bloom filters.
       6             : package bloom // import "github.com/cockroachdb/pebble/bloom"
       7             : 
       8             : import (
       9             :         "encoding/binary"
      10             :         "fmt"
      11             :         "sync"
      12             : 
      13             :         "github.com/cockroachdb/pebble/internal/base"
      14             : )
      15             : 
      16             : const (
      17             :         cacheLineSize = 64
      18             :         cacheLineBits = cacheLineSize * 8
      19             : )
      20             : 
      21             : type tableFilter []byte
      22             : 
      23           1 : func (f tableFilter) MayContain(key []byte) bool {
      24           1 :         if len(f) <= 5 {
      25           1 :                 return false
      26           1 :         }
      27           1 :         n := len(f) - 5
      28           1 :         nProbes := f[n]
      29           1 :         nLines := binary.LittleEndian.Uint32(f[n+1:])
      30           1 :         cacheLineBits := 8 * (uint32(n) / nLines)
      31           1 : 
      32           1 :         h := hash(key)
      33           1 :         delta := h>>17 | h<<15
      34           1 :         b := (h % nLines) * cacheLineBits
      35           1 : 
      36           1 :         for j := uint8(0); j < nProbes; j++ {
      37           1 :                 bitPos := b + (h % cacheLineBits)
      38           1 :                 if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
      39           1 :                         return false
      40           1 :                 }
      41           1 :                 h += delta
      42             :         }
      43           1 :         return true
      44             : }
      45             : 
      46           1 : func calculateProbes(bitsPerKey int) uint32 {
      47           1 :         // We intentionally round down to reduce probing cost a little bit
      48           1 :         n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2)
      49           1 :         if n < 1 {
      50           1 :                 n = 1
      51           1 :         }
      52           1 :         if n > 30 {
      53           0 :                 n = 30
      54           0 :         }
      55           1 :         return n
      56             : }
      57             : 
      58             : // extend appends n zero bytes to b. It returns the overall slice (of length
      59             : // n+len(originalB)) and the slice of n trailing zeroes.
      60           1 : func extend(b []byte, n int) (overall, trailer []byte) {
      61           1 :         want := n + len(b)
      62           1 :         if want <= cap(b) {
      63           0 :                 overall = b[:want]
      64           0 :                 trailer = overall[len(b):]
      65           0 :                 clear(trailer)
      66           1 :         } else {
      67           1 :                 // Grow the capacity exponentially, with a 1KiB minimum.
      68           1 :                 c := 1024
      69           1 :                 for c < want {
      70           0 :                         c += c / 4
      71           0 :                 }
      72           1 :                 overall = make([]byte, want, c)
      73           1 :                 trailer = overall[len(b):]
      74           1 :                 copy(overall, b)
      75             :         }
      76           1 :         return overall, trailer
      77             : }
      78             : 
      79             : // hash implements a hashing algorithm similar to the Murmur hash.
      80           1 : func hash(b []byte) uint32 {
      81           1 :         const (
      82           1 :                 seed = 0xbc9f1d34
      83           1 :                 m    = 0xc6a4a793
      84           1 :         )
      85           1 :         h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m))
      86           1 :         for ; len(b) >= 4; b = b[4:] {
      87           1 :                 h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
      88           1 :                 h *= m
      89           1 :                 h ^= h >> 16
      90           1 :         }
      91             : 
      92             :         // The code below first casts each byte to a signed 8-bit integer. This is
      93             :         // necessary to match RocksDB's behavior. Note that the `byte` type in Go is
      94             :         // unsigned. What is the difference between casting a signed 8-bit value vs
      95             :         // unsigned 8-bit value into an unsigned 32-bit value?
      96             :         // Sign-extension. Consider the value 250 which has the bit pattern 11111010:
      97             :         //
      98             :         //   uint32(250)        = 00000000000000000000000011111010
      99             :         //   uint32(int8(250))  = 11111111111111111111111111111010
     100             :         //
     101             :         // Note that the original LevelDB code did not explicitly cast to a signed
     102             :         // 8-bit value which left the behavior dependent on whether C characters were
     103             :         // signed or unsigned which is a compiler flag for gcc (-funsigned-char).
     104           1 :         switch len(b) {
     105           1 :         case 3:
     106           1 :                 h += uint32(int8(b[2])) << 16
     107           1 :                 fallthrough
     108           1 :         case 2:
     109           1 :                 h += uint32(int8(b[1])) << 8
     110           1 :                 fallthrough
     111           1 :         case 1:
     112           1 :                 h += uint32(int8(b[0]))
     113           1 :                 h *= m
     114           1 :                 h ^= h >> 24
     115             :         }
     116           1 :         return h
     117             : }
     118             : 
     119             : const hashBlockLen = 16384
     120             : 
     121             : type hashBlock [hashBlockLen]uint32
     122             : 
     123             : var hashBlockPool = sync.Pool{
     124           1 :         New: func() interface{} {
     125           1 :                 return &hashBlock{}
     126           1 :         },
     127             : }
     128             : 
     129             : type tableFilterWriter struct {
     130             :         bitsPerKey int
     131             : 
     132             :         numHashes int
     133             :         // We store the hashes in blocks.
     134             :         blocks   []*hashBlock
     135             :         lastHash uint32
     136             : 
     137             :         // Initial "in-line" storage for the blocks slice (to avoid some small
     138             :         // allocations).
     139             :         blocksBuf [16]*hashBlock
     140             : }
     141             : 
     142           1 : func newTableFilterWriter(bitsPerKey int) *tableFilterWriter {
     143           1 :         w := &tableFilterWriter{
     144           1 :                 bitsPerKey: bitsPerKey,
     145           1 :         }
     146           1 :         w.blocks = w.blocksBuf[:0]
     147           1 :         return w
     148           1 : }
     149             : 
     150             : // AddKey implements the base.FilterWriter interface.
     151           1 : func (w *tableFilterWriter) AddKey(key []byte) {
     152           1 :         h := hash(key)
     153           1 :         if w.numHashes != 0 && h == w.lastHash {
     154           1 :                 return
     155           1 :         }
     156           1 :         ofs := w.numHashes % hashBlockLen
     157           1 :         if ofs == 0 {
     158           1 :                 // Time for a new block.
     159           1 :                 w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock))
     160           1 :         }
     161           1 :         w.blocks[len(w.blocks)-1][ofs] = h
     162           1 :         w.numHashes++
     163           1 :         w.lastHash = h
     164             : }
     165             : 
     166             : // Finish implements the base.FilterWriter interface.
     167           1 : func (w *tableFilterWriter) Finish(buf []byte) []byte {
     168           1 :         // The table filter format matches the RocksDB full-file filter format.
     169           1 :         var nLines int
     170           1 :         if w.numHashes != 0 {
     171           1 :                 nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits)
     172           1 :                 // Make nLines an odd number to make sure more bits are involved when
     173           1 :                 // determining which block.
     174           1 :                 if nLines%2 == 0 {
     175           1 :                         nLines++
     176           1 :                 }
     177             :         }
     178             : 
     179           1 :         nBytes := nLines * cacheLineSize
     180           1 :         // +5: 4 bytes for num-lines, 1 byte for num-probes
     181           1 :         buf, filter := extend(buf, nBytes+5)
     182           1 : 
     183           1 :         if nLines != 0 {
     184           1 :                 nProbes := calculateProbes(w.bitsPerKey)
     185           1 :                 for bIdx, b := range w.blocks {
     186           1 :                         length := hashBlockLen
     187           1 :                         if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 {
     188           1 :                                 length = w.numHashes % hashBlockLen
     189           1 :                         }
     190           1 :                         for _, h := range b[:length] {
     191           1 :                                 delta := h>>17 | h<<15 // rotate right 17 bits
     192           1 :                                 b := (h % uint32(nLines)) * (cacheLineBits)
     193           1 :                                 for i := uint32(0); i < nProbes; i++ {
     194           1 :                                         bitPos := b + (h % cacheLineBits)
     195           1 :                                         filter[bitPos/8] |= (1 << (bitPos % 8))
     196           1 :                                         h += delta
     197           1 :                                 }
     198             :                         }
     199             :                 }
     200           1 :                 filter[nBytes] = byte(nProbes)
     201           1 :                 binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines))
     202             :         }
     203             : 
     204             :         // Release the hash blocks.
     205           1 :         for i, b := range w.blocks {
     206           1 :                 hashBlockPool.Put(b)
     207           1 :                 w.blocks[i] = nil
     208           1 :         }
     209           1 :         w.blocks = w.blocks[:0]
     210           1 :         w.numHashes = 0
     211           1 :         return buf
     212             : }
     213             : 
     214             : // FilterPolicy implements the FilterPolicy interface from the pebble package.
     215             : //
     216             : // The integer value is the approximate number of bits used per key. A good
     217             : // value is 10, which yields a filter with ~ 1% false positive rate.
     218             : type FilterPolicy int
     219             : 
     220             : var _ base.FilterPolicy = FilterPolicy(0)
     221             : 
     222             : // Name implements the pebble.FilterPolicy interface.
     223           1 : func (p FilterPolicy) Name() string {
     224           1 :         // This string looks arbitrary, but its value is written to LevelDB .sst
     225           1 :         // files, and should be this exact value to be compatible with those files
     226           1 :         // and with the C++ LevelDB code.
     227           1 :         return "rocksdb.BuiltinBloomFilter"
     228           1 : }
     229             : 
     230             : // MayContain implements the pebble.FilterPolicy interface.
     231           1 : func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool {
     232           1 :         switch ftype {
     233           1 :         case base.TableFilter:
     234           1 :                 return tableFilter(f).MayContain(key)
     235           0 :         default:
     236           0 :                 panic(fmt.Sprintf("unknown filter type: %v", ftype))
     237             :         }
     238             : }
     239             : 
     240             : // NewWriter implements the pebble.FilterPolicy interface.
     241           1 : func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter {
     242           1 :         switch ftype {
     243           1 :         case base.TableFilter:
     244           1 :                 return newTableFilterWriter(int(p))
     245           0 :         default:
     246           0 :                 panic(fmt.Sprintf("unknown filter type: %v", ftype))
     247             :         }
     248             : }

Generated by: LCOV version 1.14