/src/rocksdb/memtable/skiplistrep.cc
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | #include <random> |
7 | | |
8 | | #include "db/memtable.h" |
9 | | #include "memory/arena.h" |
10 | | #include "memtable/inlineskiplist.h" |
11 | | #include "rocksdb/memtablerep.h" |
12 | | #include "rocksdb/utilities/options_type.h" |
13 | | #include "util/string_util.h" |
14 | | |
15 | | namespace ROCKSDB_NAMESPACE { |
16 | | namespace { |
17 | | class SkipListRep : public MemTableRep { |
18 | | InlineSkipList<const MemTableRep::KeyComparator&> skip_list_; |
19 | | const MemTableRep::KeyComparator& cmp_; |
20 | | const SliceTransform* transform_; |
21 | | const size_t lookahead_; |
22 | | |
23 | | friend class LookaheadIterator; |
24 | | |
25 | | public: |
26 | | explicit SkipListRep(const MemTableRep::KeyComparator& compare, |
27 | | Allocator* allocator, const SliceTransform* transform, |
28 | | const size_t lookahead) |
29 | 239k | : MemTableRep(allocator), |
30 | 239k | skip_list_(compare, allocator), |
31 | 239k | cmp_(compare), |
32 | 239k | transform_(transform), |
33 | 239k | lookahead_(lookahead) {} |
34 | | |
35 | 932k | KeyHandle Allocate(const size_t len, char** buf) override { |
36 | 932k | *buf = skip_list_.AllocateKey(len); |
37 | 932k | return static_cast<KeyHandle>(*buf); |
38 | 932k | } |
39 | | |
40 | | // Insert key into the list. |
41 | | // REQUIRES: nothing that compares equal to key is currently in the list. |
42 | 0 | void Insert(KeyHandle handle) override { |
43 | 0 | skip_list_.Insert(static_cast<char*>(handle)); |
44 | 0 | } |
45 | | |
46 | 495k | bool InsertKey(KeyHandle handle) override { |
47 | 495k | return skip_list_.Insert(static_cast<char*>(handle)); |
48 | 495k | } |
49 | | |
50 | 0 | void InsertWithHint(KeyHandle handle, void** hint) override { |
51 | 0 | skip_list_.InsertWithHint(static_cast<char*>(handle), hint); |
52 | 0 | } |
53 | | |
54 | 0 | bool InsertKeyWithHint(KeyHandle handle, void** hint) override { |
55 | 0 | return skip_list_.InsertWithHint(static_cast<char*>(handle), hint); |
56 | 0 | } |
57 | | |
58 | 0 | void InsertWithHintConcurrently(KeyHandle handle, void** hint) override { |
59 | 0 | skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), hint); |
60 | 0 | } |
61 | | |
62 | 0 | bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override { |
63 | 0 | return skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), |
64 | 0 | hint); |
65 | 0 | } |
66 | | |
67 | 0 | void InsertConcurrently(KeyHandle handle) override { |
68 | 0 | skip_list_.InsertConcurrently(static_cast<char*>(handle)); |
69 | 0 | } |
70 | | |
71 | 437k | bool InsertKeyConcurrently(KeyHandle handle) override { |
72 | 437k | return skip_list_.InsertConcurrently(static_cast<char*>(handle)); |
73 | 437k | } |
74 | | |
75 | | // Returns true iff an entry that compares equal to key is in the list. |
76 | 0 | bool Contains(const char* key) const override { |
77 | 0 | return skip_list_.Contains(key); |
78 | 0 | } |
79 | | |
80 | 1.07M | size_t ApproximateMemoryUsage() override { |
81 | | // All memory is allocated through allocator; nothing to report here |
82 | 1.07M | return 0; |
83 | 1.07M | } |
84 | | |
85 | | void Get(const LookupKey& k, void* callback_args, |
86 | 19.4k | bool (*callback_func)(void* arg, const char* entry)) override { |
87 | 19.4k | SkipListRep::Iterator iter(&skip_list_); |
88 | 19.4k | Slice dummy_slice; |
89 | 19.4k | for (iter.Seek(dummy_slice, k.memtable_key().data()); |
90 | 19.4k | iter.Valid() && callback_func(callback_args, iter.key()); |
91 | 19.4k | iter.Next()) { |
92 | 0 | } |
93 | 19.4k | } |
94 | | |
95 | | Status GetAndValidate(const LookupKey& k, void* callback_args, |
96 | | bool (*callback_func)(void* arg, const char* entry), |
97 | | bool allow_data_in_errors, bool detect_key_out_of_order, |
98 | | const std::function<Status(const char*, bool)>& |
99 | 0 | key_validation_callback) override { |
100 | 0 | SkipListRep::Iterator iter(&skip_list_); |
101 | 0 | Slice dummy_slice; |
102 | 0 | Status status = iter.SeekAndValidate( |
103 | 0 | dummy_slice, k.memtable_key().data(), allow_data_in_errors, |
104 | 0 | detect_key_out_of_order, key_validation_callback); |
105 | 0 | for (; iter.Valid() && status.ok() && |
106 | 0 | callback_func(callback_args, iter.key()); |
107 | 0 | status = iter.NextAndValidate(allow_data_in_errors)) { |
108 | 0 | } |
109 | 0 | return status; |
110 | 0 | } |
111 | | |
112 | | Status MultiGet(size_t num_keys, const char* const* keys, |
113 | | void** callback_args, |
114 | | bool (*callback_func)(void* arg, const char* entry), |
115 | | bool allow_data_in_errors, bool detect_key_out_of_order, |
116 | | const std::function<Status(const char*, bool)>& |
117 | 0 | key_validation_callback) override { |
118 | 0 | return skip_list_.MultiGet(num_keys, keys, callback_args, callback_func, |
119 | 0 | allow_data_in_errors, detect_key_out_of_order, |
120 | 0 | key_validation_callback); |
121 | 0 | } |
122 | | |
123 | | uint64_t ApproximateNumEntries(const Slice& start_ikey, |
124 | 0 | const Slice& end_ikey) override { |
125 | 0 | return skip_list_.ApproximateNumEntries(start_ikey, end_ikey); |
126 | 0 | } |
127 | | |
128 | | void UniqueRandomSample(const uint64_t num_entries, |
129 | | const uint64_t target_sample_size, |
130 | 0 | std::unordered_set<const char*>* entries) override { |
131 | 0 | entries->clear(); |
132 | | // Avoid divide-by-0. |
133 | 0 | assert(target_sample_size > 0); |
134 | 0 | assert(num_entries > 0); |
135 | | // NOTE: the size of entries is not enforced to be exactly |
136 | | // target_sample_size at the end of this function, it might be slightly |
137 | | // greater or smaller. |
138 | 0 | SkipListRep::Iterator iter(&skip_list_); |
139 | | // There are two methods to create the subset of samples (size m) |
140 | | // from the table containing N elements: |
141 | | // 1-Iterate linearly through the N memtable entries. For each entry i, |
142 | | // add it to the sample set with a probability |
143 | | // (target_sample_size - entries.size() ) / (N-i). |
144 | | // |
145 | | // 2-Pick m random elements without repetition. |
146 | | // We pick Option 2 when m<sqrt(N) and |
147 | | // Option 1 when m > sqrt(N). |
148 | 0 | if (target_sample_size > |
149 | 0 | static_cast<uint64_t>(std::sqrt(1.0 * num_entries))) { |
150 | 0 | Random* rnd = Random::GetTLSInstance(); |
151 | 0 | iter.SeekToFirst(); |
152 | 0 | uint64_t counter = 0, num_samples_left = target_sample_size; |
153 | 0 | for (; iter.Valid() && (num_samples_left > 0); iter.Next(), counter++) { |
154 | | // Add entry to sample set with probability |
155 | | // num_samples_left/(num_entries - counter). |
156 | 0 | if (rnd->Next() % (num_entries - counter) < num_samples_left) { |
157 | 0 | entries->insert(iter.key()); |
158 | 0 | num_samples_left--; |
159 | 0 | } |
160 | 0 | } |
161 | 0 | } else { |
162 | | // Option 2: pick m random elements with no duplicates. |
163 | | // If Option 2 is picked, then target_sample_size<sqrt(N) |
164 | | // Using a set spares the need to check for duplicates. |
165 | 0 | for (uint64_t i = 0; i < target_sample_size; i++) { |
166 | | // We give it 5 attempts to find a non-duplicate |
167 | | // With 5 attempts, the chances of returning `entries` set |
168 | | // of size target_sample_size is: |
169 | | // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5] |
170 | | // which is monotonically increasing with N in the worse case |
171 | | // of target_sample_size=sqrt(N), and is always >99.9% for N>4. |
172 | | // At worst, for the final pick , when m=sqrt(N) there is |
173 | | // a probability of p= 1/sqrt(N) chances to find a duplicate. |
174 | 0 | for (uint64_t j = 0; j < 5; j++) { |
175 | 0 | iter.RandomSeek(); |
176 | | // unordered_set::insert returns pair<iterator, bool>. |
177 | | // The second element is true if an insert successfully happened. |
178 | | // If element is already in the set, this bool will be false, and |
179 | | // true otherwise. |
180 | 0 | if ((entries->insert(iter.key())).second) { |
181 | 0 | break; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | } |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | ~SkipListRep() override = default; |
189 | | |
190 | | // Iteration over the contents of a skip list |
191 | | class Iterator : public MemTableRep::Iterator { |
192 | | InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_; |
193 | | |
194 | | public: |
195 | | // Initialize an iterator over the specified list. |
196 | | // The returned iterator is not valid. |
197 | | explicit Iterator( |
198 | | const InlineSkipList<const MemTableRep::KeyComparator&>* list) |
199 | 61.6k | : iter_(list) {} |
200 | | |
201 | 61.6k | ~Iterator() override = default; |
202 | | |
203 | | // Returns true iff the iterator is positioned at a valid node. |
204 | 751k | bool Valid() const override { return iter_.Valid(); } |
205 | | |
206 | | // Returns the key at the current position. |
207 | | // REQUIRES: Valid() |
208 | 1.84M | const char* key() const override { |
209 | 1.84M | assert(Valid()); |
210 | 1.84M | return iter_.key(); |
211 | 1.84M | } |
212 | | |
213 | | // Advances to the next position. |
214 | | // REQUIRES: Valid() |
215 | 680k | void Next() override { |
216 | 680k | assert(Valid()); |
217 | 680k | iter_.Next(); |
218 | 680k | } |
219 | | |
220 | | // Advances to the previous position. |
221 | | // REQUIRES: Valid() |
222 | 7.03k | void Prev() override { |
223 | 7.03k | assert(Valid()); |
224 | 7.03k | iter_.Prev(); |
225 | 7.03k | } |
226 | | |
227 | | // Advance to the first entry with a key >= target |
228 | 28.9k | void Seek(const Slice& user_key, const char* memtable_key) override { |
229 | 28.9k | if (memtable_key != nullptr) { |
230 | 19.4k | iter_.Seek(memtable_key); |
231 | 19.4k | } else { |
232 | 9.53k | iter_.Seek(EncodeKey(&tmp_, user_key)); |
233 | 9.53k | } |
234 | 28.9k | } |
235 | | |
236 | | // Retreat to the last entry with a key <= target |
237 | 0 | void SeekForPrev(const Slice& user_key, const char* memtable_key) override { |
238 | 0 | if (memtable_key != nullptr) { |
239 | 0 | iter_.SeekForPrev(memtable_key); |
240 | 0 | } else { |
241 | 0 | iter_.SeekForPrev(EncodeKey(&tmp_, user_key)); |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | 0 | void RandomSeek() override { iter_.RandomSeek(); } |
246 | | |
247 | | // Position at the first entry in list. |
248 | | // Final state of iterator is Valid() iff list is not empty. |
249 | 31.6k | void SeekToFirst() override { iter_.SeekToFirst(); } |
250 | | |
251 | | // Position at the last entry in list. |
252 | | // Final state of iterator is Valid() iff list is not empty. |
253 | 3.77k | void SeekToLast() override { iter_.SeekToLast(); } |
254 | | |
255 | 0 | Status NextAndValidate(bool allow_data_in_errors) override { |
256 | 0 | assert(Valid()); |
257 | 0 | return iter_.NextAndValidate(allow_data_in_errors); |
258 | 0 | } |
259 | | |
260 | | Status SeekAndValidate(const Slice& user_key, const char* memtable_key, |
261 | | bool allow_data_in_errors, |
262 | | bool detect_key_out_of_order, |
263 | | const std::function<Status(const char*, bool)>& |
264 | 0 | key_validation_callback) override { |
265 | 0 | if (memtable_key != nullptr) { |
266 | 0 | return iter_.SeekAndValidate(memtable_key, allow_data_in_errors, |
267 | 0 | detect_key_out_of_order, |
268 | 0 | key_validation_callback); |
269 | 0 | } else { |
270 | 0 | return iter_.SeekAndValidate( |
271 | 0 | EncodeKey(&tmp_, user_key), allow_data_in_errors, |
272 | 0 | detect_key_out_of_order, key_validation_callback); |
273 | 0 | } |
274 | 0 | } |
275 | | |
276 | 0 | Status PrevAndValidate(bool allow_data_in_error) override { |
277 | 0 | assert(Valid()); |
278 | 0 | return iter_.PrevAndValidate(allow_data_in_error); |
279 | 0 | } |
280 | | |
281 | | protected: |
282 | | std::string tmp_; // For passing to EncodeKey |
283 | | }; |
284 | | |
285 | | // Iterator over the contents of a skip list which also keeps track of the |
286 | | // previously visited node. In Seek(), it examines a few nodes after it |
287 | | // first, falling back to O(log n) search from the head of the list only if |
288 | | // the target key hasn't been found. |
289 | | class LookaheadIterator : public MemTableRep::Iterator { |
290 | | public: |
291 | | explicit LookaheadIterator(const SkipListRep& rep) |
292 | 0 | : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {} |
293 | | |
294 | 0 | ~LookaheadIterator() override = default; |
295 | | |
296 | 0 | bool Valid() const override { return iter_.Valid(); } |
297 | | |
298 | 0 | const char* key() const override { |
299 | 0 | assert(Valid()); |
300 | 0 | return iter_.key(); |
301 | 0 | } |
302 | | |
303 | 0 | void Next() override { |
304 | 0 | assert(Valid()); |
305 | |
|
306 | 0 | bool advance_prev = true; |
307 | 0 | if (prev_.Valid()) { |
308 | 0 | auto k1 = rep_.UserKey(prev_.key()); |
309 | 0 | auto k2 = rep_.UserKey(iter_.key()); |
310 | |
|
311 | 0 | if (k1.compare(k2) == 0) { |
312 | | // same user key, don't move prev_ |
313 | 0 | advance_prev = false; |
314 | 0 | } else if (rep_.transform_) { |
315 | | // only advance prev_ if it has the same prefix as iter_ |
316 | 0 | auto t1 = rep_.transform_->Transform(k1); |
317 | 0 | auto t2 = rep_.transform_->Transform(k2); |
318 | 0 | advance_prev = t1.compare(t2) == 0; |
319 | 0 | } |
320 | 0 | } |
321 | |
|
322 | 0 | if (advance_prev) { |
323 | 0 | prev_ = iter_; |
324 | 0 | } |
325 | 0 | iter_.Next(); |
326 | 0 | } |
327 | | |
328 | 0 | void Prev() override { |
329 | 0 | assert(Valid()); |
330 | 0 | iter_.Prev(); |
331 | 0 | prev_ = iter_; |
332 | 0 | } |
333 | | |
334 | 0 | void Seek(const Slice& internal_key, const char* memtable_key) override { |
335 | 0 | const char* encoded_key = (memtable_key != nullptr) |
336 | 0 | ? memtable_key |
337 | 0 | : EncodeKey(&tmp_, internal_key); |
338 | |
|
339 | 0 | if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) { |
340 | | // prev_.key() is smaller or equal to our target key; do a quick |
341 | | // linear search (at most lookahead_ steps) starting from prev_ |
342 | 0 | iter_ = prev_; |
343 | |
|
344 | 0 | size_t cur = 0; |
345 | 0 | while (cur++ <= rep_.lookahead_ && iter_.Valid()) { |
346 | 0 | if (rep_.cmp_(encoded_key, iter_.key()) <= 0) { |
347 | 0 | return; |
348 | 0 | } |
349 | 0 | Next(); |
350 | 0 | } |
351 | 0 | } |
352 | | |
353 | 0 | iter_.Seek(encoded_key); |
354 | 0 | prev_ = iter_; |
355 | 0 | } |
356 | | |
357 | | void SeekForPrev(const Slice& internal_key, |
358 | 0 | const char* memtable_key) override { |
359 | 0 | const char* encoded_key = (memtable_key != nullptr) |
360 | 0 | ? memtable_key |
361 | 0 | : EncodeKey(&tmp_, internal_key); |
362 | 0 | iter_.SeekForPrev(encoded_key); |
363 | 0 | prev_ = iter_; |
364 | 0 | } |
365 | | |
366 | 0 | void SeekToFirst() override { |
367 | 0 | iter_.SeekToFirst(); |
368 | 0 | prev_ = iter_; |
369 | 0 | } |
370 | | |
371 | 0 | void SeekToLast() override { |
372 | 0 | iter_.SeekToLast(); |
373 | 0 | prev_ = iter_; |
374 | 0 | } |
375 | | |
376 | | protected: |
377 | | std::string tmp_; // For passing to EncodeKey |
378 | | |
379 | | private: |
380 | | const SkipListRep& rep_; |
381 | | InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_; |
382 | | InlineSkipList<const MemTableRep::KeyComparator&>::Iterator prev_; |
383 | | }; |
384 | | |
385 | 42.2k | MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override { |
386 | 42.2k | if (lookahead_ > 0) { |
387 | 0 | void* mem = |
388 | 0 | arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator)) |
389 | 0 | : operator new(sizeof(SkipListRep::LookaheadIterator)); |
390 | 0 | return new (mem) SkipListRep::LookaheadIterator(*this); |
391 | 42.2k | } else { |
392 | 42.2k | void* mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator)) |
393 | 42.2k | : operator new(sizeof(SkipListRep::Iterator)); |
394 | 42.2k | return new (mem) SkipListRep::Iterator(&skip_list_); |
395 | 42.2k | } |
396 | 42.2k | } |
397 | | }; |
398 | | } // namespace |
399 | | |
400 | | static std::unordered_map<std::string, OptionTypeInfo> skiplist_factory_info = { |
401 | | {"lookahead", |
402 | | {0, OptionType::kSizeT, OptionVerificationType::kNormal, |
403 | | OptionTypeFlags::kDontSerialize /*Since it is part of the ID*/}}, |
404 | | }; |
405 | | |
406 | 603k | SkipListFactory::SkipListFactory(size_t lookahead) : lookahead_(lookahead) { |
407 | 603k | RegisterOptions("SkipListFactoryOptions", &lookahead_, |
408 | 603k | &skiplist_factory_info); |
409 | 603k | } |
410 | | |
411 | 494k | std::string SkipListFactory::GetId() const { |
412 | 494k | std::string id = Name(); |
413 | 494k | if (lookahead_ > 0) { |
414 | 0 | id.append(":").append(std::to_string(lookahead_)); |
415 | 0 | } |
416 | 494k | return id; |
417 | 494k | } |
418 | | |
419 | | MemTableRep* SkipListFactory::CreateMemTableRep( |
420 | | const MemTableRep::KeyComparator& compare, Allocator* allocator, |
421 | 239k | const SliceTransform* transform, Logger* /*logger*/) { |
422 | 239k | return new SkipListRep(compare, allocator, transform, lookahead_); |
423 | 239k | } |
424 | | |
425 | | } // namespace ROCKSDB_NAMESPACE |