Line data Source code
1 : // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 : // of this source code is governed by a BSD-style license that can be found in 3 : // the LICENSE file. 4 : 5 : // Package bloom implements Bloom filters. 6 : package bloom // import "github.com/cockroachdb/pebble/bloom" 7 : 8 : import ( 9 : "encoding/binary" 10 : "fmt" 11 : "sync" 12 : 13 : "github.com/cockroachdb/pebble/internal/base" 14 : ) 15 : 16 : const ( 17 : cacheLineSize = 64 18 : cacheLineBits = cacheLineSize * 8 19 : ) 20 : 21 : type tableFilter []byte 22 : 23 1 : func (f tableFilter) MayContain(key []byte) bool { 24 1 : if len(f) <= 5 { 25 1 : return false 26 1 : } 27 1 : n := len(f) - 5 28 1 : nProbes := f[n] 29 1 : nLines := binary.LittleEndian.Uint32(f[n+1:]) 30 1 : cacheLineBits := 8 * (uint32(n) / nLines) 31 1 : 32 1 : h := hash(key) 33 1 : delta := h>>17 | h<<15 34 1 : b := (h % nLines) * cacheLineBits 35 1 : 36 1 : for j := uint8(0); j < nProbes; j++ { 37 1 : bitPos := b + (h % cacheLineBits) 38 1 : if f[bitPos/8]&(1<<(bitPos%8)) == 0 { 39 1 : return false 40 1 : } 41 1 : h += delta 42 : } 43 1 : return true 44 : } 45 : 46 1 : func calculateProbes(bitsPerKey int) uint32 { 47 1 : // We intentionally round down to reduce probing cost a little bit 48 1 : n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2) 49 1 : if n < 1 { 50 1 : n = 1 51 1 : } 52 1 : if n > 30 { 53 0 : n = 30 54 0 : } 55 1 : return n 56 : } 57 : 58 : // extend appends n zero bytes to b. It returns the overall slice (of length 59 : // n+len(originalB)) and the slice of n trailing zeroes. 60 1 : func extend(b []byte, n int) (overall, trailer []byte) { 61 1 : want := n + len(b) 62 1 : if want <= cap(b) { 63 0 : overall = b[:want] 64 0 : trailer = overall[len(b):] 65 0 : for i := range trailer { 66 0 : trailer[i] = 0 67 0 : } 68 1 : } else { 69 1 : // Grow the capacity exponentially, with a 1KiB minimum. 70 1 : c := 1024 71 1 : for c < want { 72 0 : c += c / 4 73 0 : } 74 1 : overall = make([]byte, want, c) 75 1 : trailer = overall[len(b):] 76 1 : copy(overall, b) 77 : } 78 1 : return overall, trailer 79 : } 80 : 81 : // hash implements a hashing algorithm similar to the Murmur hash. 82 1 : func hash(b []byte) uint32 { 83 1 : const ( 84 1 : seed = 0xbc9f1d34 85 1 : m = 0xc6a4a793 86 1 : ) 87 1 : h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m)) 88 1 : for ; len(b) >= 4; b = b[4:] { 89 1 : h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 90 1 : h *= m 91 1 : h ^= h >> 16 92 1 : } 93 : 94 : // The code below first casts each byte to a signed 8-bit integer. This is 95 : // necessary to match RocksDB's behavior. Note that the `byte` type in Go is 96 : // unsigned. What is the difference between casting a signed 8-bit value vs 97 : // unsigned 8-bit value into an unsigned 32-bit value? 98 : // Sign-extension. Consider the value 250 which has the bit pattern 11111010: 99 : // 100 : // uint32(250) = 00000000000000000000000011111010 101 : // uint32(int8(250)) = 11111111111111111111111111111010 102 : // 103 : // Note that the original LevelDB code did not explicitly cast to a signed 104 : // 8-bit value which left the behavior dependent on whether C characters were 105 : // signed or unsigned which is a compiler flag for gcc (-funsigned-char). 106 1 : switch len(b) { 107 1 : case 3: 108 1 : h += uint32(int8(b[2])) << 16 109 1 : fallthrough 110 1 : case 2: 111 1 : h += uint32(int8(b[1])) << 8 112 1 : fallthrough 113 1 : case 1: 114 1 : h += uint32(int8(b[0])) 115 1 : h *= m 116 1 : h ^= h >> 24 117 : } 118 1 : return h 119 : } 120 : 121 : const hashBlockLen = 16384 122 : 123 : type hashBlock [hashBlockLen]uint32 124 : 125 : var hashBlockPool = sync.Pool{ 126 1 : New: func() interface{} { 127 1 : return &hashBlock{} 128 1 : }, 129 : } 130 : 131 : type tableFilterWriter struct { 132 : bitsPerKey int 133 : 134 : numHashes int 135 : // We store the hashes in blocks. 136 : blocks []*hashBlock 137 : lastHash uint32 138 : 139 : // Initial "in-line" storage for the blocks slice (to avoid some small 140 : // allocations). 141 : blocksBuf [16]*hashBlock 142 : } 143 : 144 1 : func newTableFilterWriter(bitsPerKey int) *tableFilterWriter { 145 1 : w := &tableFilterWriter{ 146 1 : bitsPerKey: bitsPerKey, 147 1 : } 148 1 : w.blocks = w.blocksBuf[:0] 149 1 : return w 150 1 : } 151 : 152 : // AddKey implements the base.FilterWriter interface. 153 1 : func (w *tableFilterWriter) AddKey(key []byte) { 154 1 : h := hash(key) 155 1 : if w.numHashes != 0 && h == w.lastHash { 156 1 : return 157 1 : } 158 1 : ofs := w.numHashes % hashBlockLen 159 1 : if ofs == 0 { 160 1 : // Time for a new block. 161 1 : w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock)) 162 1 : } 163 1 : w.blocks[len(w.blocks)-1][ofs] = h 164 1 : w.numHashes++ 165 1 : w.lastHash = h 166 : } 167 : 168 : // Finish implements the base.FilterWriter interface. 169 1 : func (w *tableFilterWriter) Finish(buf []byte) []byte { 170 1 : // The table filter format matches the RocksDB full-file filter format. 171 1 : var nLines int 172 1 : if w.numHashes != 0 { 173 1 : nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits) 174 1 : // Make nLines an odd number to make sure more bits are involved when 175 1 : // determining which block. 176 1 : if nLines%2 == 0 { 177 1 : nLines++ 178 1 : } 179 : } 180 : 181 1 : nBytes := nLines * cacheLineSize 182 1 : // +5: 4 bytes for num-lines, 1 byte for num-probes 183 1 : buf, filter := extend(buf, nBytes+5) 184 1 : 185 1 : if nLines != 0 { 186 1 : nProbes := calculateProbes(w.bitsPerKey) 187 1 : for bIdx, b := range w.blocks { 188 1 : length := hashBlockLen 189 1 : if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 { 190 1 : length = w.numHashes % hashBlockLen 191 1 : } 192 1 : for _, h := range b[:length] { 193 1 : delta := h>>17 | h<<15 // rotate right 17 bits 194 1 : b := (h % uint32(nLines)) * (cacheLineBits) 195 1 : for i := uint32(0); i < nProbes; i++ { 196 1 : bitPos := b + (h % cacheLineBits) 197 1 : filter[bitPos/8] |= (1 << (bitPos % 8)) 198 1 : h += delta 199 1 : } 200 : } 201 : } 202 1 : filter[nBytes] = byte(nProbes) 203 1 : binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines)) 204 : } 205 : 206 : // Release the hash blocks. 207 1 : for i, b := range w.blocks { 208 1 : hashBlockPool.Put(b) 209 1 : w.blocks[i] = nil 210 1 : } 211 1 : w.blocks = w.blocks[:0] 212 1 : w.numHashes = 0 213 1 : return buf 214 : } 215 : 216 : // FilterPolicy implements the FilterPolicy interface from the pebble package. 217 : // 218 : // The integer value is the approximate number of bits used per key. A good 219 : // value is 10, which yields a filter with ~ 1% false positive rate. 220 : type FilterPolicy int 221 : 222 : var _ base.FilterPolicy = FilterPolicy(0) 223 : 224 : // Name implements the pebble.FilterPolicy interface. 225 1 : func (p FilterPolicy) Name() string { 226 1 : // This string looks arbitrary, but its value is written to LevelDB .sst 227 1 : // files, and should be this exact value to be compatible with those files 228 1 : // and with the C++ LevelDB code. 229 1 : return "rocksdb.BuiltinBloomFilter" 230 1 : } 231 : 232 : // MayContain implements the pebble.FilterPolicy interface. 233 1 : func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool { 234 1 : switch ftype { 235 1 : case base.TableFilter: 236 1 : return tableFilter(f).MayContain(key) 237 0 : default: 238 0 : panic(fmt.Sprintf("unknown filter type: %v", ftype)) 239 : } 240 : } 241 : 242 : // NewWriter implements the pebble.FilterPolicy interface. 243 1 : func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter { 244 1 : switch ftype { 245 1 : case base.TableFilter: 246 1 : return newTableFilterWriter(int(p)) 247 0 : default: 248 0 : panic(fmt.Sprintf("unknown filter type: %v", ftype)) 249 : } 250 : }