/src/rocksdb/util/dynamic_bloom.h
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | |
6 | | #pragma once |
7 | | |
8 | | #include <array> |
9 | | #include <atomic> |
10 | | |
11 | | #include "rocksdb/slice.h" |
12 | | #include "table/multiget_context.h" |
13 | | #include "util/atomic.h" |
14 | | #include "util/hash.h" |
15 | | |
16 | | namespace ROCKSDB_NAMESPACE { |
17 | | |
18 | | class Slice; |
19 | | class Allocator; |
20 | | class Logger; |
21 | | |
22 | | // A Bloom filter intended only to be used in memory, never serialized in a way |
23 | | // that could lead to schema incompatibility. Supports opt-in lock-free |
24 | | // concurrent access. |
25 | | // |
26 | | // This implementation is also intended for applications generally preferring |
27 | | // speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate. |
28 | | // For 1% FP rate, that means that the latency of a look-up triggered by an FP |
29 | | // should be less than roughly 100x the cost of a Bloom filter op. |
30 | | // |
31 | | // For simplicity and performance, the current implementation requires |
32 | | // num_probes to be a multiple of two and <= 10. |
33 | | // |
34 | | class DynamicBloom { |
35 | | public: |
36 | | // allocator: pass allocator to bloom filter, hence trace the usage of memory |
37 | | // total_bits: fixed total bits for the bloom |
38 | | // num_probes: number of hash probes for a single key |
39 | | // hash_func: customized hash function |
40 | | // huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB |
41 | | // within this page size. Need to reserve huge pages for |
42 | | // it to be allocated, like: |
43 | | // sysctl -w vm.nr_hugepages=20 |
44 | | // See linux doc Documentation/vm/hugetlbpage.txt |
45 | | explicit DynamicBloom(Allocator* allocator, uint32_t total_bits, |
46 | | uint32_t num_probes = 6, size_t huge_page_tlb_size = 0, |
47 | | Logger* logger = nullptr); |
48 | | |
49 | 0 | ~DynamicBloom() {} |
50 | | |
51 | | // Assuming single thread adding to the DynamicBloom |
52 | | void Add(const Slice& key); |
53 | | |
54 | | // Like Add, but may be called concurrently with other functions. Does not |
55 | | // establish happens-before relationship with other functions so requires some |
56 | | // external mechanism to ensure other threads can see the change. |
57 | | void AddConcurrently(const Slice& key); |
58 | | |
59 | | // Assuming single threaded access to this function. |
60 | | void AddHash(uint32_t hash); |
61 | | |
62 | | // Like AddHash, but may be called concurrently with other functions. Does not |
63 | | // establish happens-before relationship with other functions so requires some |
64 | | // external mechanism to ensure other threads can see the change. |
65 | | void AddHashConcurrently(uint32_t hash); |
66 | | |
67 | | // Multithreaded access to this function is OK |
68 | | bool MayContain(const Slice& key) const; |
69 | | |
70 | | void MayContain(int num_keys, Slice* keys, bool* may_match) const; |
71 | | |
72 | | // Multithreaded access to this function is OK |
73 | | bool MayContainHash(uint32_t hash) const; |
74 | | |
75 | | void Prefetch(uint32_t h); |
76 | | |
77 | | private: |
78 | | // Length of the structure, in 64-bit words. For this structure, "word" |
79 | | // will always refer to 64-bit words. |
80 | | uint32_t kLen; |
81 | | // We make the k probes in pairs, two for each 64-bit read/write. Thus, |
82 | | // this stores k/2, the number of words to double-probe. |
83 | | const uint32_t kNumDoubleProbes; |
84 | | |
85 | | RelaxedAtomic<uint64_t>* data_; |
86 | | |
87 | | // or_func(ptr, mask) should effect *ptr |= mask with the appropriate |
88 | | // concurrency safety, working with bytes. |
89 | | template <typename OrFunc> |
90 | | void AddHash(uint32_t hash, const OrFunc& or_func); |
91 | | |
92 | | bool DoubleProbe(uint32_t h32, size_t a) const; |
93 | | }; |
94 | | |
95 | 0 | inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); } |
96 | | |
97 | 0 | inline void DynamicBloom::AddConcurrently(const Slice& key) { |
98 | 0 | AddHashConcurrently(BloomHash(key)); |
99 | 0 | } |
100 | | |
101 | 0 | inline void DynamicBloom::AddHash(uint32_t hash) { |
102 | 0 | AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) { |
103 | 0 | ptr->StoreRelaxed(ptr->LoadRelaxed() | mask); |
104 | 0 | }); |
105 | 0 | } |
106 | | |
107 | 0 | inline void DynamicBloom::AddHashConcurrently(uint32_t hash) { |
108 | 0 | AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) { |
109 | | // Happens-before between AddHash and MaybeContains is handled by |
110 | | // access to versions_->LastSequence(), so all we have to do here is |
111 | | // avoid races (so we don't give the compiler a license to mess up |
112 | | // our code) and not lose bits. std::memory_order_relaxed is enough |
113 | | // for that. |
114 | 0 | if ((mask & ptr->LoadRelaxed()) != mask) { |
115 | 0 | ptr->FetchOrRelaxed(mask); |
116 | 0 | } |
117 | 0 | }); |
118 | 0 | } |
119 | | |
120 | 0 | inline bool DynamicBloom::MayContain(const Slice& key) const { |
121 | 0 | return (MayContainHash(BloomHash(key))); |
122 | 0 | } |
123 | | |
124 | | inline void DynamicBloom::MayContain(int num_keys, Slice* keys, |
125 | 0 | bool* may_match) const { |
126 | 0 | std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes; |
127 | 0 | std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets; |
128 | 0 | for (int i = 0; i < num_keys; ++i) { |
129 | 0 | hashes[i] = BloomHash(keys[i]); |
130 | 0 | size_t a = FastRange32(hashes[i], kLen); |
131 | 0 | PREFETCH(data_ + a, 0, 3); |
132 | 0 | byte_offsets[i] = a; |
133 | 0 | } |
134 | |
|
135 | 0 | for (int i = 0; i < num_keys; i++) { |
136 | 0 | may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]); |
137 | 0 | } |
138 | 0 | } |
139 | | |
140 | | #if defined(_MSC_VER) |
141 | | #pragma warning(push) |
142 | | // local variable is initialized but not referenced |
143 | | #pragma warning(disable : 4189) |
144 | | #endif |
145 | 0 | inline void DynamicBloom::Prefetch(uint32_t h32) { |
146 | 0 | size_t a = FastRange32(h32, kLen); |
147 | 0 | PREFETCH(data_ + a, 0, 3); |
148 | 0 | } |
149 | | #if defined(_MSC_VER) |
150 | | #pragma warning(pop) |
151 | | #endif |
152 | | |
153 | | // Speed hacks in this implementation: |
154 | | // * Uses fastrange instead of % |
155 | | // * Minimum logic to determine first (and all) probed memory addresses. |
156 | | // (Uses constant bit-xor offsets from the starting probe address.) |
157 | | // * (Major) Two probes per 64-bit memory fetch/write. |
158 | | // Code simplification / optimization: only allow even number of probes. |
159 | | // * Very fast and effective (murmur-like) hash expansion/re-mixing. (At |
160 | | // least on recent CPUs, integer multiplication is very cheap. Each 64-bit |
161 | | // remix provides five pairs of bit addresses within a uint64_t.) |
162 | | // Code simplification / optimization: only allow up to 10 probes, from a |
163 | | // single 64-bit remix. |
164 | | // |
165 | | // The FP rate penalty for this implementation, vs. standard Bloom filter, is |
166 | | // roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom. |
167 | | // This implementation does not explicitly use the cache line size, but is |
168 | | // effectively cache-local (up to 16 probes) because of the bit-xor offsetting. |
169 | | // |
170 | | // NB: could easily be upgraded to support a 64-bit hash and |
171 | | // total_bits > 2^32 (512MB). (The latter is a bad idea without the former, |
172 | | // because of false positives.) |
173 | | |
174 | 0 | inline bool DynamicBloom::MayContainHash(uint32_t h32) const { |
175 | 0 | size_t a = FastRange32(h32, kLen); |
176 | 0 | PREFETCH(data_ + a, 0, 3); |
177 | 0 | return DoubleProbe(h32, a); |
178 | 0 | } |
179 | | |
180 | 0 | inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const { |
181 | | // Expand/remix with 64-bit golden ratio |
182 | 0 | uint64_t h = 0x9e3779b97f4a7c13ULL * h32; |
183 | 0 | for (unsigned i = 0;; ++i) { |
184 | | // Two bit probes per uint64_t probe |
185 | 0 | uint64_t mask = |
186 | 0 | ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63)); |
187 | 0 | uint64_t val = data_[byte_offset ^ i].LoadRelaxed(); |
188 | 0 | if (i + 1 >= kNumDoubleProbes) { |
189 | 0 | return (val & mask) == mask; |
190 | 0 | } else if ((val & mask) != mask) { |
191 | 0 | return false; |
192 | 0 | } |
193 | 0 | h = (h >> 12) | (h << 52); |
194 | 0 | } |
195 | 0 | } |
196 | | |
197 | | template <typename OrFunc> |
198 | 0 | inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) { |
199 | 0 | size_t a = FastRange32(h32, kLen); |
200 | 0 | PREFETCH(data_ + a, 0, 3); |
201 | | // Expand/remix with 64-bit golden ratio |
202 | 0 | uint64_t h = 0x9e3779b97f4a7c13ULL * h32; |
203 | 0 | for (unsigned i = 0;; ++i) { |
204 | | // Two bit probes per uint64_t probe |
205 | 0 | uint64_t mask = |
206 | 0 | ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63)); |
207 | 0 | or_func(&data_[a ^ i], mask); |
208 | 0 | if (i + 1 >= kNumDoubleProbes) { |
209 | 0 | return; |
210 | 0 | } |
211 | 0 | h = (h >> 12) | (h << 52); |
212 | 0 | } |
213 | 0 | } Unexecuted instantiation: void rocksdb::DynamicBloom::AddHash<rocksdb::DynamicBloom::AddHash(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>*, unsigned long)#1}>(unsigned int, rocksdb::DynamicBloom::AddHash(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>*, unsigned long)#1} const&)Unexecuted instantiation: void rocksdb::DynamicBloom::AddHash<rocksdb::DynamicBloom::AddHashConcurrently(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>*, unsigned long)#1}>(unsigned int, rocksdb::DynamicBloom::AddHashConcurrently(unsigned int)::{lambda(rocksdb::RelaxedAtomic<unsigned long>*, unsigned long)#1} const&) |
214 | | |
215 | | } // namespace ROCKSDB_NAMESPACE |