/src/rocksdb/util/dynamic_bloom.h

Source
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <array>
#include <atomic>

#include "rocksdb/slice.h"
#include "table/multiget_context.h"
#include "util/atomic.h"
#include "util/hash.h"

namespace ROCKSDB_NAMESPACE {

class Slice;
class Allocator;
class Logger;

// A Bloom filter intended only to be used in memory, never serialized in a way
// that could lead to schema incompatibility. Supports opt-in lock-free
// concurrent access.
//
// This implementation is also intended for applications generally preferring
// speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
// For 1% FP rate, that means that the latency of a look-up triggered by an FP
// should be less than roughly 100x the cost of a Bloom filter op.
//
// For simplicity and performance, the current implementation requires
// num_probes to be a multiple of two and <= 10.
//
class DynamicBloom {
 public:
  // allocator: pass allocator to bloom filter, hence trace the usage of memory
  // total_bits: fixed total bits for the bloom
  // num_probes: number of hash probes for a single key
  // hash_func:  customized hash function
  // huge_page_tlb_size:  if >0, try to allocate bloom bytes from huge page TLB
  //                      within this page size. Need to reserve huge pages for
  //                      it to be allocated, like:
  //                         sysctl -w vm.nr_hugepages=20
  //                     See linux doc Documentation/vm/hugetlbpage.txt
  explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
                        uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
                        Logger* logger = nullptr);

  ~DynamicBloom() {}

  // Assuming single thread adding to the DynamicBloom
  void Add(const Slice& key);

  // Like Add, but may be called concurrently with other functions. Does not
  // establish happens-before relationship with other functions so requires some
  // external mechanism to ensure other threads can see the change.
  void AddConcurrently(const Slice& key);

  // Assuming single threaded access to this function.
  void AddHash(uint32_t hash);

  // Like AddHash, but may be called concurrently with other functions. Does not
  // establish happens-before relationship with other functions so requires some
  // external mechanism to ensure other threads can see the change.
  void AddHashConcurrently(uint32_t hash);

  // Multithreaded access to this function is OK
  bool MayContain(const Slice& key) const;

  void MayContain(int num_keys, Slice* keys, bool* may_match) const;

  // Multithreaded access to this function is OK
  bool MayContainHash(uint32_t hash) const;

  void Prefetch(uint32_t h);

 private:
  // Length of the structure, in 64-bit words. For this structure, "word"
  // will always refer to 64-bit words.
  uint32_t kLen;
  // We make the k probes in pairs, two for each 64-bit read/write. Thus,
  // this stores k/2, the number of words to double-probe.
  const uint32_t kNumDoubleProbes;

  RelaxedAtomic<uint64_t>* data_;

  // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
  // concurrency safety, working with bytes.
  template <typename OrFunc>
  void AddHash(uint32_t hash, const OrFunc& or_func);

  bool DoubleProbe(uint32_t h32, size_t a) const;
};

inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }

inline void DynamicBloom::AddConcurrently(const Slice& key) {
  AddHashConcurrently(BloomHash(key));
}

inline void DynamicBloom::AddHash(uint32_t hash) {
  AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) {
    ptr->StoreRelaxed(ptr->LoadRelaxed() | mask);
  });
}

inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
  AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) {
    // Happens-before between AddHash and MaybeContains is handled by
    // access to versions_->LastSequence(), so all we have to do here is
    // avoid races (so we don't give the compiler a license to mess up
    // our code) and not lose bits.  std::memory_order_relaxed is enough
    // for that.
    if ((mask & ptr->LoadRelaxed()) != mask) {
      ptr->FetchOrRelaxed(mask);
    }
  });
}

inline bool DynamicBloom::MayContain(const Slice& key) const {
  return (MayContainHash(BloomHash(key)));
}

inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
                                     bool* may_match) const {
  std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
  std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
  for (int i = 0; i < num_keys; ++i) {
    hashes[i] = BloomHash(keys[i]);
    size_t a = FastRange32(hashes[i], kLen);
    PREFETCH(data_ + a, 0, 3);
    byte_offsets[i] = a;
  }

  for (int i = 0; i < num_keys; i++) {
    may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
  }
}

#if defined(_MSC_VER)
#pragma warning(push)
// local variable is initialized but not referenced
#pragma warning(disable : 4189)
#endif
inline void DynamicBloom::Prefetch(uint32_t h32) {
  size_t a = FastRange32(h32, kLen);
  PREFETCH(data_ + a, 0, 3);
}
#if defined(_MSC_VER)
#pragma warning(pop)
#endif

// Speed hacks in this implementation:
// * Uses fastrange instead of %
// * Minimum logic to determine first (and all) probed memory addresses.
//   (Uses constant bit-xor offsets from the starting probe address.)
// * (Major) Two probes per 64-bit memory fetch/write.
//   Code simplification / optimization: only allow even number of probes.
// * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
// least on recent CPUs, integer multiplication is very cheap. Each 64-bit
// remix provides five pairs of bit addresses within a uint64_t.)
//   Code simplification / optimization: only allow up to 10 probes, from a
//   single 64-bit remix.
//
// The FP rate penalty for this implementation, vs. standard Bloom filter, is
// roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
// This implementation does not explicitly use the cache line size, but is
// effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
//
// NB: could easily be upgraded to support a 64-bit hash and
// total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
// because of false positives.)

inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
  size_t a = FastRange32(h32, kLen);
  PREFETCH(data_ + a, 0, 3);
  return DoubleProbe(h32, a);
}

inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
  // Expand/remix with 64-bit golden ratio
  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
  for (unsigned i = 0;; ++i) {
    // Two bit probes per uint64_t probe
    uint64_t mask =
        ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
    uint64_t val = data_[byte_offset ^ i].LoadRelaxed();
    if (i + 1 >= kNumDoubleProbes) {
      return (val & mask) == mask;
    } else if ((val & mask) != mask) {
      return false;
    }
    h = (h >> 12) | (h << 52);
  }
}

template <typename OrFunc>
inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
  size_t a = FastRange32(h32, kLen);
  PREFETCH(data_ + a, 0, 3);
  // Expand/remix with 64-bit golden ratio
  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
  for (unsigned i = 0;; ++i) {
    // Two bit probes per uint64_t probe
    uint64_t mask =
        ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
    or_func(&data_[a ^ i], mask);
    if (i + 1 >= kNumDoubleProbes) {
      return;
    }
    h = (h >> 12) | (h << 52);
  }
}

}  // namespace ROCKSDB_NAMESPACE

Coverage Report

Created: 2025-10-26 07:13

Line	Count	Source
1		// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2		// This source code is licensed under both the GPLv2 (found in the
3		// COPYING file in the root directory) and Apache 2.0 License
4		// (found in the LICENSE.Apache file in the root directory).
5
6		#pragma once
7
8		#include <array>
9		#include <atomic>
10
11		#include "rocksdb/slice.h"
12		#include "table/multiget_context.h"
13		#include "util/atomic.h"
14		#include "util/hash.h"
15
16		namespace ROCKSDB_NAMESPACE {
17
18		class Slice;
19		class Allocator;
20		class Logger;
21
22		// A Bloom filter intended only to be used in memory, never serialized in a way
23		// that could lead to schema incompatibility. Supports opt-in lock-free
24		// concurrent access.
25		//
26		// This implementation is also intended for applications generally preferring
27		// speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
28		// For 1% FP rate, that means that the latency of a look-up triggered by an FP
29		// should be less than roughly 100x the cost of a Bloom filter op.
30		//
31		// For simplicity and performance, the current implementation requires
32		// num_probes to be a multiple of two and <= 10.
33		//
34		class DynamicBloom {
35		public:
36		// allocator: pass allocator to bloom filter, hence trace the usage of memory
37		// total_bits: fixed total bits for the bloom
38		// num_probes: number of hash probes for a single key
39		// hash_func: customized hash function
40		// huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
41		// within this page size. Need to reserve huge pages for
42		// it to be allocated, like:
43		// sysctl -w vm.nr_hugepages=20
44		// See linux doc Documentation/vm/hugetlbpage.txt
45		explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
46		uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
47		Logger* logger = nullptr);
48
49	0	~DynamicBloom() {}
50
51		// Assuming single thread adding to the DynamicBloom
52		void Add(const Slice& key);
53
54		// Like Add, but may be called concurrently with other functions. Does not
55		// establish happens-before relationship with other functions so requires some
56		// external mechanism to ensure other threads can see the change.
57		void AddConcurrently(const Slice& key);
58
59		// Assuming single threaded access to this function.
60		void AddHash(uint32_t hash);
61
62		// Like AddHash, but may be called concurrently with other functions. Does not
63		// establish happens-before relationship with other functions so requires some
64		// external mechanism to ensure other threads can see the change.
65		void AddHashConcurrently(uint32_t hash);
66
67		// Multithreaded access to this function is OK
68		bool MayContain(const Slice& key) const;
69
70		void MayContain(int num_keys, Slice* keys, bool* may_match) const;
71
72		// Multithreaded access to this function is OK
73		bool MayContainHash(uint32_t hash) const;
74
75		void Prefetch(uint32_t h);
76
77		private:
78		// Length of the structure, in 64-bit words. For this structure, "word"
79		// will always refer to 64-bit words.
80		uint32_t kLen;
81		// We make the k probes in pairs, two for each 64-bit read/write. Thus,
82		// this stores k/2, the number of words to double-probe.
83		const uint32_t kNumDoubleProbes;
84
85		RelaxedAtomic<uint64_t>* data_;
86
87		// or_func(ptr, mask) should effect *ptr \|= mask with the appropriate
88		// concurrency safety, working with bytes.
89		template <typename OrFunc>
90		void AddHash(uint32_t hash, const OrFunc& or_func);
91
92		bool DoubleProbe(uint32_t h32, size_t a) const;
93		};
94
95	0	inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
96
97	0	inline void DynamicBloom::AddConcurrently(const Slice& key) {
98	0	AddHashConcurrently(BloomHash(key));
99	0	}
100
101	0	inline void DynamicBloom::AddHash(uint32_t hash) {
102	0	AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) {
103	0	ptr->StoreRelaxed(ptr->LoadRelaxed() \| mask);
104	0	});
105	0	}
106
107	0	inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
108	0	AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) {
109		// Happens-before between AddHash and MaybeContains is handled by
110		// access to versions_->LastSequence(), so all we have to do here is
111		// avoid races (so we don't give the compiler a license to mess up
112		// our code) and not lose bits. std::memory_order_relaxed is enough
113		// for that.
114	0	if ((mask & ptr->LoadRelaxed()) != mask) {
115	0	ptr->FetchOrRelaxed(mask);
116	0	}
117	0	});
118	0	}
119
120	0	inline bool DynamicBloom::MayContain(const Slice& key) const {
121	0	return (MayContainHash(BloomHash(key)));
122	0	}
123
124		inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
125	0	bool* may_match) const {
126	0	std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
127	0	std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
128	0	for (int i = 0; i < num_keys; ++i) {
129	0	hashes[i] = BloomHash(keys[i]);
130	0	size_t a = FastRange32(hashes[i], kLen);
131	0	PREFETCH(data_ + a, 0, 3);
132	0	byte_offsets[i] = a;
133	0	}
134
135	0	for (int i = 0; i < num_keys; i++) {
136	0	may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
137	0	}
138	0	}
139
140		#if defined(_MSC_VER)
141		#pragma warning(push)
142		// local variable is initialized but not referenced
143		#pragma warning(disable : 4189)
144		#endif
145	0	inline void DynamicBloom::Prefetch(uint32_t h32) {
146	0	size_t a = FastRange32(h32, kLen);
147	0	PREFETCH(data_ + a, 0, 3);
148	0	}
149		#if defined(_MSC_VER)
150		#pragma warning(pop)
151		#endif
152
153		// Speed hacks in this implementation:
154		// * Uses fastrange instead of %
155		// * Minimum logic to determine first (and all) probed memory addresses.
156		// (Uses constant bit-xor offsets from the starting probe address.)
157		// * (Major) Two probes per 64-bit memory fetch/write.
158		// Code simplification / optimization: only allow even number of probes.
159		// * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
160		// least on recent CPUs, integer multiplication is very cheap. Each 64-bit
161		// remix provides five pairs of bit addresses within a uint64_t.)
162		// Code simplification / optimization: only allow up to 10 probes, from a
163		// single 64-bit remix.
164		//
165		// The FP rate penalty for this implementation, vs. standard Bloom filter, is
166		// roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
167		// This implementation does not explicitly use the cache line size, but is
168		// effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
169		//
170		// NB: could easily be upgraded to support a 64-bit hash and
171		// total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
172		// because of false positives.)
173
174	0	inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
175	0	size_t a = FastRange32(h32, kLen);
176	0	PREFETCH(data_ + a, 0, 3);
177	0	return DoubleProbe(h32, a);
178	0	}
179
180	0	inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
181		// Expand/remix with 64-bit golden ratio
182	0	uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
183	0	for (unsigned i = 0;; ++i) {
184		// Two bit probes per uint64_t probe
185	0	uint64_t mask =
186	0	((uint64_t)1 << (h & 63)) \| ((uint64_t)1 << ((h >> 6) & 63));
187	0	uint64_t val = data_[byte_offset ^ i].LoadRelaxed();
188	0	if (i + 1 >= kNumDoubleProbes) {
189	0	return (val & mask) == mask;
190	0	} else if ((val & mask) != mask) {
191	0	return false;
192	0	}
193	0	h = (h >> 12) \| (h << 52);
194	0	}
195	0	}
196
197		template <typename OrFunc>
198	0	inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
199	0	size_t a = FastRange32(h32, kLen);
200	0	PREFETCH(data_ + a, 0, 3);
201		// Expand/remix with 64-bit golden ratio
202	0	uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
203	0	for (unsigned i = 0;; ++i) {
204		// Two bit probes per uint64_t probe
205	0	uint64_t mask =
206	0	((uint64_t)1 << (h & 63)) \| ((uint64_t)1 << ((h >> 6) & 63));
207	0	or_func(&data_[a ^ i], mask);
208	0	if (i + 1 >= kNumDoubleProbes) {
209	0	return;
210	0	}
211	0	h = (h >> 12) \| (h << 52);
212	0	}
213	0	} Unexecuted instantiation: void rocksdb::DynamicBloom::AddHash<rocksdb::DynamicBloom::AddHash(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>, unsigned long)#1}>(unsigned int, rocksdb::DynamicBloom::AddHash(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>, unsigned long)#1} const&) Unexecuted instantiation: void rocksdb::DynamicBloom::AddHash<rocksdb::DynamicBloom::AddHashConcurrently(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>, unsigned long)#1}>(unsigned int, rocksdb::DynamicBloom::AddHashConcurrently(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>, unsigned long)#1} const&)
214
215		} // namespace ROCKSDB_NAMESPACE