Coverage Report

Created: 2026-05-16 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/rocksdb/util/bloom_impl.h
Line
Count
Source
1
//  Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
2
//  This source code is licensed under both the GPLv2 (found in the
3
//  COPYING file in the root directory) and Apache 2.0 License
4
//  (found in the LICENSE.Apache file in the root directory).
5
//
6
// Implementation details of various Bloom filter implementations used in
7
// RocksDB. (DynamicBloom is in a separate file for now because it
8
// supports concurrent write.)
9
10
#pragma once
11
#include <stddef.h>
12
#include <stdint.h>
13
14
#include <cmath>
15
16
#include "port/port.h"  // for PREFETCH
17
#include "rocksdb/slice.h"
18
#include "util/hash.h"
19
20
#ifdef __AVX2__
21
#include <immintrin.h>
22
#endif
23
24
namespace ROCKSDB_NAMESPACE {
25
26
class BloomMath {
27
 public:
28
  // False positive rate of a standard Bloom filter, for given ratio of
29
  // filter memory bits to added keys, and number of probes per operation.
30
  // (The false positive rate is effectively independent of scale, assuming
31
  // the implementation scales OK.)
32
0
  static double StandardFpRate(double bits_per_key, int num_probes) {
33
    // Standard very-good-estimate formula. See
34
    // https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
35
0
    return std::pow(1.0 - std::exp(-num_probes / bits_per_key), num_probes);
36
0
  }
37
38
  // False positive rate of a "blocked"/"shareded"/"cache-local" Bloom filter,
39
  // for given ratio of filter memory bits to added keys, number of probes per
40
  // operation (all within the given block or cache line size), and block or
41
  // cache line size.
42
  static double CacheLocalFpRate(double bits_per_key, int num_probes,
43
0
                                 int cache_line_bits) {
44
0
    if (bits_per_key <= 0.0) {
45
      // Fix a discontinuity
46
0
      return 1.0;
47
0
    }
48
0
    double keys_per_cache_line = cache_line_bits / bits_per_key;
49
    // A reasonable estimate is the average of the FP rates for one standard
50
    // deviation above and below the mean bucket occupancy. See
51
    // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#the-math
52
0
    double keys_stddev = std::sqrt(keys_per_cache_line);
53
0
    double crowded_fp = StandardFpRate(
54
0
        cache_line_bits / (keys_per_cache_line + keys_stddev), num_probes);
55
0
    double uncrowded_fp = StandardFpRate(
56
0
        cache_line_bits / (keys_per_cache_line - keys_stddev), num_probes);
57
0
    return (crowded_fp + uncrowded_fp) / 2;
58
0
  }
59
60
  // False positive rate of querying a new item against `num_keys` items, all
61
  // hashed to `fingerprint_bits` bits. (This assumes the fingerprint hashes
62
  // themselves are stored losslessly. See Section 4 of
63
  // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf)
64
0
  static double FingerprintFpRate(size_t num_keys, int fingerprint_bits) {
65
0
    double inv_fingerprint_space = std::pow(0.5, fingerprint_bits);
66
    // Base estimate assumes each key maps to a unique fingerprint.
67
    // Could be > 1 in extreme cases.
68
0
    double base_estimate = num_keys * inv_fingerprint_space;
69
    // To account for potential overlap, we choose between two formulas
70
0
    if (base_estimate > 0.0001) {
71
      // A very good formula assuming we don't construct a floating point
72
      // number extremely close to 1. Always produces a probability < 1.
73
0
      return 1.0 - std::exp(-base_estimate);
74
0
    } else {
75
      // A very good formula when base_estimate is far below 1. (Subtract
76
      // away the integral-approximated sum that some key has same hash as
77
      // one coming before it in a list.)
78
0
      return base_estimate - (base_estimate * base_estimate * 0.5);
79
0
    }
80
0
  }
81
82
  // Returns the probably of either of two independent(-ish) events
83
  // happening, given their probabilities. (This is useful for combining
84
  // results from StandardFpRate or CacheLocalFpRate with FingerprintFpRate
85
  // for a hash-efficient Bloom filter's FP rate. See Section 4 of
86
  // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf)
87
0
  static double IndependentProbabilitySum(double rate1, double rate2) {
88
    // Use formula that avoids floating point extremely close to 1 if
89
    // rates are extremely small.
90
0
    return rate1 + rate2 - (rate1 * rate2);
91
0
  }
92
};
93
94
// A fast, flexible, and accurate cache-local Bloom implementation with
95
// SIMD-optimized query performance (currently using AVX2 on Intel). Write
96
// performance and non-SIMD read are very good, benefiting from FastRange32
97
// used in place of % and single-cycle multiplication on recent processors.
98
//
99
// Most other SIMD Bloom implementations sacrifice flexibility and/or
100
// accuracy by requiring num_probes to be a power of two and restricting
101
// where each probe can occur in a cache line. This implementation sacrifices
102
// SIMD-optimization for add (might still be possible, especially with AVX512)
103
// in favor of allowing any num_probes, not crossing cache line boundary,
104
// and accuracy close to theoretical best accuracy for a cache-local Bloom.
105
// E.g. theoretical best for 10 bits/key, num_probes=6, and 512-bit bucket
106
// (Intel cache line size) is 0.9535% FP rate. This implementation yields
107
// about 0.957%. (Compare to LegacyLocalityBloomImpl<false> at 1.138%, or
108
// about 0.951% for 1024-bit buckets, cache line size for some ARM CPUs.)
109
//
110
// This implementation can use a 32-bit hash (let h2 be h1 * 0x9e3779b9) or
111
// a 64-bit hash (split into two uint32s). With many millions of keys, the
112
// false positive rate associated with using a 32-bit hash can dominate the
113
// false positive rate of the underlying filter. At 10 bits/key setting, the
114
// inflection point is about 40 million keys, so 32-bit hash is a bad idea
115
// with 10s of millions of keys or more.
116
//
117
// Despite accepting a 64-bit hash, this implementation uses 32-bit fastrange
118
// to pick a cache line, which can be faster than 64-bit in some cases.
119
// This only hurts accuracy as you get into 10s of GB for a single filter,
120
// and accuracy abruptly breaks down at 256GB (2^32 cache lines). Switch to
121
// 64-bit fastrange if you need filters so big. ;)
122
//
123
// Using only a 32-bit input hash within each cache line has negligible
124
// impact for any reasonable cache line / bucket size, for arbitrary filter
125
// size, and potentially saves intermediate data size in some cases vs.
126
// tracking full 64 bits. (Even in an implementation using 64-bit arithmetic
127
// to generate indices, I might do the same, as a single multiplication
128
// suffices to generate a sufficiently mixed 64 bits from 32 bits.)
129
//
130
// This implementation is currently tied to Intel cache line size, 64 bytes ==
131
// 512 bits. If there's sufficient demand for other cache line sizes, this is
132
// a pretty good implementation to extend, but slight performance enhancements
133
// are possible with an alternate implementation (probably not very compatible
134
// with SIMD):
135
// (1) Use rotation in addition to multiplication for remixing
136
// (like murmur hash). (Using multiplication alone *slightly* hurts accuracy
137
// because lower bits never depend on original upper bits.)
138
// (2) Extract more than one bit index from each re-mix. (Only if rotation
139
// or similar is part of remix, because otherwise you're making the
140
// multiplication-only problem worse.)
141
// (3) Re-mix full 64 bit hash, to get maximum number of bit indices per
142
// re-mix.
143
//
144
class FastLocalBloomImpl {
145
 public:
146
  // NOTE: this has only been validated to enough accuracy for producing
147
  // reasonable warnings / user feedback, not for making functional decisions.
148
  static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes,
149
0
                                int hash_bits) {
150
0
    return BloomMath::IndependentProbabilitySum(
151
0
        BloomMath::CacheLocalFpRate(8.0 * bytes / keys, num_probes,
152
0
                                    /*cache line bits*/ 512),
153
0
        BloomMath::FingerprintFpRate(keys, hash_bits));
154
0
  }
155
156
0
  static inline int ChooseNumProbes(int millibits_per_key) {
157
    // Since this implementation can (with AVX2) make up to 8 probes
158
    // for the same cost, we pick the most accurate num_probes, based
159
    // on actual tests of the implementation. Note that for higher
160
    // bits/key, the best choice for cache-local Bloom can be notably
161
    // smaller than standard bloom, e.g. 9 instead of 11 @ 16 b/k.
162
0
    if (millibits_per_key <= 2080) {
163
0
      return 1;
164
0
    } else if (millibits_per_key <= 3580) {
165
0
      return 2;
166
0
    } else if (millibits_per_key <= 5100) {
167
0
      return 3;
168
0
    } else if (millibits_per_key <= 6640) {
169
0
      return 4;
170
0
    } else if (millibits_per_key <= 8300) {
171
0
      return 5;
172
0
    } else if (millibits_per_key <= 10070) {
173
0
      return 6;
174
0
    } else if (millibits_per_key <= 11720) {
175
0
      return 7;
176
0
    } else if (millibits_per_key <= 14001) {
177
      // Would be something like <= 13800 but sacrificing *slightly* for
178
      // more settings using <= 8 probes.
179
0
      return 8;
180
0
    } else if (millibits_per_key <= 16050) {
181
0
      return 9;
182
0
    } else if (millibits_per_key <= 18300) {
183
0
      return 10;
184
0
    } else if (millibits_per_key <= 22001) {
185
0
      return 11;
186
0
    } else if (millibits_per_key <= 25501) {
187
0
      return 12;
188
0
    } else if (millibits_per_key > 50000) {
189
      // Top out at 24 probes (three sets of 8)
190
0
      return 24;
191
0
    } else {
192
      // Roughly optimal choices for remaining range
193
      // e.g.
194
      // 28000 -> 12, 28001 -> 13
195
      // 50000 -> 23, 50001 -> 24
196
0
      return (millibits_per_key - 1) / 2000 - 1;
197
0
    }
198
0
  }
199
200
  static inline void AddHash(uint32_t h1, uint32_t h2, uint32_t len_bytes,
201
0
                             int num_probes, char* data) {
202
0
    uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6;
203
0
    AddHashPrepared(h2, num_probes, data + bytes_to_cache_line);
204
0
  }
205
206
  static inline void AddHashPrepared(uint32_t h2, int num_probes,
207
0
                                     char* data_at_cache_line) {
208
0
    uint32_t h = h2;
209
0
    for (int i = 0; i < num_probes; ++i, h *= uint32_t{0x9e3779b9}) {
210
      // 9-bit address within 512 bit cache line
211
0
      int bitpos = h >> (32 - 9);
212
0
      data_at_cache_line[bitpos >> 3] |= (uint8_t{1} << (bitpos & 7));
213
0
    }
214
0
  }
215
216
  static inline void PrepareHash(uint32_t h1, uint32_t len_bytes,
217
                                 const char* data,
218
0
                                 uint32_t /*out*/* byte_offset) {
219
0
    uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6;
220
0
    PREFETCH(data + bytes_to_cache_line, 0 /* rw */, 1 /* locality */);
221
0
    PREFETCH(data + bytes_to_cache_line + 63, 0 /* rw */, 1 /* locality */);
222
0
    *byte_offset = bytes_to_cache_line;
223
0
  }
224
225
  static inline bool HashMayMatch(uint32_t h1, uint32_t h2, uint32_t len_bytes,
226
0
                                  int num_probes, const char* data) {
227
0
    uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6;
228
0
    return HashMayMatchPrepared(h2, num_probes, data + bytes_to_cache_line);
229
0
  }
230
231
  static inline bool HashMayMatchPrepared(uint32_t h2, int num_probes,
232
0
                                          const char* data_at_cache_line) {
233
0
    uint32_t h = h2;
234
0
#ifdef __AVX2__
235
0
    int rem_probes = num_probes;
236
237
    // NOTE: For better performance for num_probes in {1, 2, 9, 10, 17, 18,
238
    // etc.} one can insert specialized code for rem_probes <= 2, bypassing
239
    // the SIMD code in those cases. There is a detectable but minor overhead
240
    // applied to other values of num_probes (when not statically determined),
241
    // but smoother performance curve vs. num_probes. But for now, when
242
    // in doubt, don't add unnecessary code.
243
244
    // Powers of 32-bit golden ratio, mod 2**32.
245
0
    const __m256i multipliers =
246
0
        _mm256_setr_epi32(0x00000001, 0x9e3779b9, 0xe35e67b1, 0x734297e9,
247
0
                          0x35fbe861, 0xdeb7c719, 0x448b211, 0x3459b749);
248
249
0
    for (;;) {
250
      // Eight copies of hash
251
0
      __m256i hash_vector = _mm256_set1_epi32(h);
252
253
      // Same effect as repeated multiplication by 0x9e3779b9 thanks to
254
      // associativity of multiplication.
255
0
      hash_vector = _mm256_mullo_epi32(hash_vector, multipliers);
256
257
      // Now the top 9 bits of each of the eight 32-bit values in
258
      // hash_vector are bit addresses for probes within the cache line.
259
      // While the platform-independent code uses byte addressing (6 bits
260
      // to pick a byte + 3 bits to pick a bit within a byte), here we work
261
      // with 32-bit words (4 bits to pick a word + 5 bits to pick a bit
262
      // within a word) because that works well with AVX2 and is equivalent
263
      // under little-endian.
264
265
      // Shift each right by 28 bits to get 4-bit word addresses.
266
0
      const __m256i word_addresses = _mm256_srli_epi32(hash_vector, 28);
267
268
      // Gather 32-bit values spread over 512 bits by 4-bit address. In
269
      // essence, we are dereferencing eight pointers within the cache
270
      // line.
271
      //
272
      // Option 1: AVX2 gather (seems to be a little slow - understandable)
273
      // const __m256i value_vector =
274
      //     _mm256_i32gather_epi32(static_cast<const int
275
      //     *>(data_at_cache_line),
276
      //                            word_addresses,
277
      //                            /*bytes / i32*/ 4);
278
      // END Option 1
279
      // Potentially unaligned as we're not *always* cache-aligned -> loadu
280
0
      const __m256i* mm_data =
281
0
          reinterpret_cast<const __m256i*>(data_at_cache_line);
282
0
      __m256i lower = _mm256_loadu_si256(mm_data);
283
0
      __m256i upper = _mm256_loadu_si256(mm_data + 1);
284
      // Option 2: AVX512VL permute hack
285
      // Only negligibly faster than Option 3, so not yet worth supporting
286
      // const __m256i value_vector =
287
      //    _mm256_permutex2var_epi32(lower, word_addresses, upper);
288
      // END Option 2
289
      // Option 3: AVX2 permute+blend hack
290
      // Use lowest three bits to order probing values, as if all from same
291
      // 256 bit piece.
292
0
      lower = _mm256_permutevar8x32_epi32(lower, word_addresses);
293
0
      upper = _mm256_permutevar8x32_epi32(upper, word_addresses);
294
      // Just top 1 bit of address, to select between lower and upper.
295
0
      const __m256i upper_lower_selector = _mm256_srai_epi32(hash_vector, 31);
296
      // Finally: the next 8 probed 32-bit values, in probing sequence order.
297
0
      const __m256i value_vector =
298
0
          _mm256_blendv_epi8(lower, upper, upper_lower_selector);
299
      // END Option 3
300
301
      // We might not need to probe all 8, so build a mask for selecting only
302
      // what we need. (The k_selector(s) could be pre-computed but that
303
      // doesn't seem to make a noticeable performance difference.)
304
0
      const __m256i zero_to_seven = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
305
      // Subtract rem_probes from each of those constants
306
0
      __m256i k_selector =
307
0
          _mm256_sub_epi32(zero_to_seven, _mm256_set1_epi32(rem_probes));
308
      // Negative after subtract -> use/select
309
      // Keep only high bit (logical shift right each by 31).
310
0
      k_selector = _mm256_srli_epi32(k_selector, 31);
311
312
      // Strip off the 4 bit word address (shift left)
313
0
      __m256i bit_addresses = _mm256_slli_epi32(hash_vector, 4);
314
      // And keep only 5-bit (32 - 27) bit-within-32-bit-word addresses.
315
0
      bit_addresses = _mm256_srli_epi32(bit_addresses, 27);
316
      // Build a bit mask
317
0
      const __m256i bit_mask = _mm256_sllv_epi32(k_selector, bit_addresses);
318
319
      // Like ((~value_vector) & bit_mask) == 0)
320
0
      bool match = _mm256_testc_si256(value_vector, bit_mask) != 0;
321
322
      // This check first so that it's easy for branch predictor to optimize
323
      // num_probes <= 8 case, making it free of unpredictable branches.
324
0
      if (rem_probes <= 8) {
325
0
        return match;
326
0
      } else if (!match) {
327
0
        return false;
328
0
      }
329
      // otherwise
330
      // Need another iteration. 0xab25f4c1 == golden ratio to the 8th power
331
0
      h *= 0xab25f4c1;
332
0
      rem_probes -= 8;
333
0
    }
334
#else
335
    for (int i = 0; i < num_probes; ++i, h *= uint32_t{0x9e3779b9}) {
336
      // 9-bit address within 512 bit cache line
337
      int bitpos = h >> (32 - 9);
338
      if ((data_at_cache_line[bitpos >> 3] & (char(1) << (bitpos & 7))) == 0) {
339
        return false;
340
      }
341
    }
342
    return true;
343
#endif
344
0
  }
345
};
346
347
// A legacy Bloom filter implementation with no locality of probes (slow).
348
// It uses double hashing to generate a sequence of hash values.
349
// Asymptotic analysis is in [Kirsch,Mitzenmacher 2006], but known to have
350
// subtle accuracy flaws for practical sizes [Dillinger,Manolios 2004].
351
//
352
// DO NOT REUSE
353
//
354
class LegacyNoLocalityBloomImpl {
355
 public:
356
0
  static inline int ChooseNumProbes(int bits_per_key) {
357
    // We intentionally round down to reduce probing cost a little bit
358
0
    int num_probes = static_cast<int>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
359
0
    if (num_probes < 1) num_probes = 1;
360
0
    if (num_probes > 30) num_probes = 30;
361
0
    return num_probes;
362
0
  }
363
364
  static inline void AddHash(uint32_t h, uint32_t total_bits, int num_probes,
365
0
                             char* data) {
366
0
    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
367
0
    for (int i = 0; i < num_probes; i++) {
368
0
      const uint32_t bitpos = h % total_bits;
369
0
      data[bitpos / 8] |= (1 << (bitpos % 8));
370
0
      h += delta;
371
0
    }
372
0
  }
373
374
  static inline bool HashMayMatch(uint32_t h, uint32_t total_bits,
375
0
                                  int num_probes, const char* data) {
376
0
    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
377
0
    for (int i = 0; i < num_probes; i++) {
378
0
      const uint32_t bitpos = h % total_bits;
379
0
      if ((data[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
380
0
        return false;
381
0
      }
382
0
      h += delta;
383
0
    }
384
0
    return true;
385
0
  }
386
};
387
388
// A legacy Bloom filter implementation with probes local to a single
389
// cache line (fast). Because SST files might be transported between
390
// platforms, the cache line size is a parameter rather than hard coded.
391
// (But if specified as a constant parameter, an optimizing compiler
392
// should take advantage of that.)
393
//
394
// When ExtraRotates is false, this implementation is notably deficient in
395
// accuracy. Specifically, it uses double hashing with a 1/512 chance of the
396
// increment being zero (when cache line size is 512 bits). Thus, there's a
397
// 1/512 chance of probing only one index, which we'd expect to incur about
398
// a 1/2 * 1/512 or absolute 0.1% FP rate penalty. More detail at
399
// https://github.com/facebook/rocksdb/issues/4120
400
//
401
// DO NOT REUSE
402
//
403
template <bool ExtraRotates>
404
class LegacyLocalityBloomImpl {
405
 private:
406
0
  static inline uint32_t GetLine(uint32_t h, uint32_t num_lines) {
407
0
    uint32_t offset_h = ExtraRotates ? (h >> 11) | (h << 21) : h;
408
0
    return offset_h % num_lines;
409
0
  }
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<false>::GetLine(unsigned int, unsigned int)
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<true>::GetLine(unsigned int, unsigned int)
410
411
 public:
412
  // NOTE: this has only been validated to enough accuracy for producing
413
  // reasonable warnings / user feedback, not for making functional decisions.
414
0
  static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes) {
415
0
    double bits_per_key = 8.0 * bytes / keys;
416
0
    double filter_rate = BloomMath::CacheLocalFpRate(bits_per_key, num_probes,
417
0
                                                     /*cache line bits*/ 512);
418
0
    if (!ExtraRotates) {
419
      // Good estimate of impact of flaw in index computation.
420
      // Adds roughly 0.002 around 50 bits/key and 0.001 around 100 bits/key.
421
      // The + 22 shifts it nicely to fit for lower bits/key.
422
0
      filter_rate += 0.1 / (bits_per_key * 0.75 + 22);
423
0
    } else {
424
      // Not yet validated
425
0
      assert(false);
426
0
    }
427
    // Always uses 32-bit hash
428
0
    double fingerprint_rate = BloomMath::FingerprintFpRate(keys, 32);
429
0
    return BloomMath::IndependentProbabilitySum(filter_rate, fingerprint_rate);
430
0
  }
431
432
  static inline void AddHash(uint32_t h, uint32_t num_lines, int num_probes,
433
0
                             char* data, int log2_cache_line_bytes) {
434
0
    const int log2_cache_line_bits = log2_cache_line_bytes + 3;
435
436
0
    char* data_at_offset =
437
0
        data + (GetLine(h, num_lines) << log2_cache_line_bytes);
438
0
    const uint32_t delta = (h >> 17) | (h << 15);
439
0
    for (int i = 0; i < num_probes; ++i) {
440
      // Mask to bit-within-cache-line address
441
0
      const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
442
0
      data_at_offset[bitpos / 8] |= (1 << (bitpos % 8));
443
0
      if (ExtraRotates) {
444
0
        h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
445
0
      }
446
0
      h += delta;
447
0
    }
448
0
  }
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<false>::AddHash(unsigned int, unsigned int, int, char*, int)
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<true>::AddHash(unsigned int, unsigned int, int, char*, int)
449
450
  static inline void PrepareHashMayMatch(uint32_t h, uint32_t num_lines,
451
                                         const char* data,
452
                                         uint32_t /*out*/* byte_offset,
453
0
                                         int log2_cache_line_bytes) {
454
0
    uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
455
0
    PREFETCH(data + b, 0 /* rw */, 1 /* locality */);
456
0
    PREFETCH(data + b + ((1 << log2_cache_line_bytes) - 1), 0 /* rw */,
457
0
             1 /* locality */);
458
0
    *byte_offset = b;
459
0
  }
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<false>::PrepareHashMayMatch(unsigned int, unsigned int, char const*, unsigned int*, int)
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<true>::PrepareHashMayMatch(unsigned int, unsigned int, char const*, unsigned int*, int)
460
461
  static inline bool HashMayMatch(uint32_t h, uint32_t num_lines,
462
                                  int num_probes, const char* data,
463
0
                                  int log2_cache_line_bytes) {
464
0
    uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
465
0
    return HashMayMatchPrepared(h, num_probes, data + b, log2_cache_line_bytes);
466
0
  }
467
468
  static inline bool HashMayMatchPrepared(uint32_t h, int num_probes,
469
                                          const char* data_at_offset,
470
0
                                          int log2_cache_line_bytes) {
471
0
    const int log2_cache_line_bits = log2_cache_line_bytes + 3;
472
473
0
    const uint32_t delta = (h >> 17) | (h << 15);
474
0
    for (int i = 0; i < num_probes; ++i) {
475
      // Mask to bit-within-cache-line address
476
0
      const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
477
0
      if (((data_at_offset[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
478
0
        return false;
479
0
      }
480
0
      if (ExtraRotates) {
481
0
        h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
482
0
      }
483
0
      h += delta;
484
0
    }
485
0
    return true;
486
0
  }
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<false>::HashMayMatchPrepared(unsigned int, int, char const*, int)
Unexecuted instantiation: rocksdb::LegacyLocalityBloomImpl<true>::HashMayMatchPrepared(unsigned int, int, char const*, int)
487
};
488
489
}  // namespace ROCKSDB_NAMESPACE