/src/rocksdb/memtable/inlineskiplist.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under both the GPLv2 (found in the |
3 | | // COPYING file in the root directory) and Apache 2.0 License |
4 | | // (found in the LICENSE.Apache file in the root directory). |
5 | | // |
6 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. Use of |
7 | | // this source code is governed by a BSD-style license that can be found |
8 | | // in the LICENSE file. See the AUTHORS file for names of contributors. |
9 | | // |
10 | | // InlineSkipList is derived from SkipList (skiplist.h), but it optimizes |
11 | | // the memory layout by requiring that the key storage be allocated through |
12 | | // the skip list instance. For the common case of SkipList<const char*, |
13 | | // Cmp> this saves 1 pointer per skip list node and gives better cache |
14 | | // locality, at the expense of wasted padding from using AllocateAligned |
15 | | // instead of Allocate for the keys. The unused padding will be from |
16 | | // 0 to sizeof(void*)-1 bytes, and the space savings are sizeof(void*) |
17 | | // bytes, so despite the padding the space used is always less than |
18 | | // SkipList<const char*, ..>. |
19 | | // |
20 | | // Thread safety ------------- |
21 | | // |
22 | | // Writes via Insert require external synchronization, most likely a mutex. |
23 | | // InsertConcurrently can be safely called concurrently with reads and |
24 | | // with other concurrent inserts. Reads require a guarantee that the |
25 | | // InlineSkipList will not be destroyed while the read is in progress. |
26 | | // Apart from that, reads progress without any internal locking or |
27 | | // synchronization. |
28 | | // |
29 | | // Invariants: |
30 | | // |
31 | | // (1) Allocated nodes are never deleted until the InlineSkipList is |
32 | | // destroyed. This is trivially guaranteed by the code since we never |
33 | | // delete any skip list nodes. |
34 | | // |
35 | | // (2) The contents of a Node except for the next/prev pointers are |
36 | | // immutable after the Node has been linked into the InlineSkipList. |
37 | | // Only Insert() modifies the list, and it is careful to initialize a |
38 | | // node and use release-stores to publish the nodes in one or more lists. |
39 | | // |
40 | | // ... prev vs. next pointer ordering ... |
41 | | // |
42 | | |
43 | | #pragma once |
44 | | #include <assert.h> |
45 | | #include <stdlib.h> |
46 | | |
47 | | #include <algorithm> |
48 | | #include <atomic> |
49 | | #include <type_traits> |
50 | | |
51 | | #include "memory/allocator.h" |
52 | | #include "port/likely.h" |
53 | | #include "port/port.h" |
54 | | #include "rocksdb/slice.h" |
55 | | #include "util/coding.h" |
56 | | #include "util/random.h" |
57 | | |
58 | | namespace ROCKSDB_NAMESPACE { |
59 | | |
60 | | template <class Comparator> |
61 | | class InlineSkipList { |
62 | | private: |
63 | | struct Node; |
64 | | struct Splice; |
65 | | |
66 | | public: |
67 | | using DecodedKey = |
68 | | typename std::remove_reference<Comparator>::type::DecodedType; |
69 | | |
70 | | static const uint16_t kMaxPossibleHeight = 32; |
71 | | |
72 | | // Create a new InlineSkipList object that will use "cmp" for comparing |
73 | | // keys, and will allocate memory using "*allocator". Objects allocated |
74 | | // in the allocator must remain allocated for the lifetime of the |
75 | | // skiplist object. |
76 | | explicit InlineSkipList(Comparator cmp, Allocator* allocator, |
77 | | int32_t max_height = 12, |
78 | | int32_t branching_factor = 4); |
79 | | // No copying allowed |
80 | | InlineSkipList(const InlineSkipList&) = delete; |
81 | | InlineSkipList& operator=(const InlineSkipList&) = delete; |
82 | | |
83 | | // Allocates a key and a skip-list node, returning a pointer to the key |
84 | | // portion of the node. This method is thread-safe if the allocator |
85 | | // is thread-safe. |
86 | | char* AllocateKey(size_t key_size); |
87 | | |
88 | | // Allocate a splice using allocator. |
89 | | Splice* AllocateSplice(); |
90 | | |
91 | | // Allocate a splice on heap. |
92 | | Splice* AllocateSpliceOnHeap(); |
93 | | |
94 | | // Inserts a key allocated by AllocateKey, after the actual key value |
95 | | // has been filled in. |
96 | | // |
97 | | // REQUIRES: nothing that compares equal to key is currently in the list. |
98 | | // REQUIRES: no concurrent calls to any of inserts. |
99 | | bool Insert(const char* key); |
100 | | |
101 | | // Inserts a key allocated by AllocateKey with a hint of last insert |
102 | | // position in the skip-list. If hint points to nullptr, a new hint will be |
103 | | // populated, which can be used in subsequent calls. |
104 | | // |
105 | | // It can be used to optimize the workload where there are multiple groups |
106 | | // of keys, and each key is likely to insert to a location close to the last |
107 | | // inserted key in the same group. One example is sequential inserts. |
108 | | // |
109 | | // REQUIRES: nothing that compares equal to key is currently in the list. |
110 | | // REQUIRES: no concurrent calls to any of inserts. |
111 | | bool InsertWithHint(const char* key, void** hint); |
112 | | |
113 | | // Like InsertConcurrently, but with a hint |
114 | | // |
115 | | // REQUIRES: nothing that compares equal to key is currently in the list. |
116 | | // REQUIRES: no concurrent calls that use same hint |
117 | | bool InsertWithHintConcurrently(const char* key, void** hint); |
118 | | |
119 | | // Like Insert, but external synchronization is not required. |
120 | | bool InsertConcurrently(const char* key); |
121 | | |
122 | | // Inserts a node into the skip list. key must have been allocated by |
123 | | // AllocateKey and then filled in by the caller. If UseCAS is true, |
124 | | // then external synchronization is not required, otherwise this method |
125 | | // may not be called concurrently with any other insertions. |
126 | | // |
127 | | // Regardless of whether UseCAS is true, the splice must be owned |
128 | | // exclusively by the current thread. If allow_partial_splice_fix is |
129 | | // true, then the cost of insertion is amortized O(log D), where D is |
130 | | // the distance from the splice to the inserted key (measured as the |
131 | | // number of intervening nodes). Note that this bound is very good for |
132 | | // sequential insertions! If allow_partial_splice_fix is false then |
133 | | // the existing splice will be ignored unless the current key is being |
134 | | // inserted immediately after the splice. allow_partial_splice_fix == |
135 | | // false has worse running time for the non-sequential case O(log N), |
136 | | // but a better constant factor. |
137 | | template <bool UseCAS> |
138 | | bool Insert(const char* key, Splice* splice, bool allow_partial_splice_fix); |
139 | | |
140 | | // Returns true iff an entry that compares equal to key is in the list. |
141 | | bool Contains(const char* key) const; |
142 | | |
143 | | // Return estimated number of entries smaller than `key`. |
144 | | uint64_t EstimateCount(const char* key) const; |
145 | | |
146 | | // Validate correctness of the skip-list. |
147 | | void TEST_Validate() const; |
148 | | |
149 | | // Iteration over the contents of a skip list |
150 | | class Iterator { |
151 | | public: |
152 | | // Initialize an iterator over the specified list. |
153 | | // The returned iterator is not valid. |
154 | | explicit Iterator(const InlineSkipList* list); |
155 | | |
156 | | // Change the underlying skiplist used for this iterator |
157 | | // This enables us not changing the iterator without deallocating |
158 | | // an old one and then allocating a new one |
159 | | void SetList(const InlineSkipList* list); |
160 | | |
161 | | // Returns true iff the iterator is positioned at a valid node. |
162 | | bool Valid() const; |
163 | | |
164 | | // Returns the key at the current position. |
165 | | // REQUIRES: Valid() |
166 | | const char* key() const; |
167 | | |
168 | | // Advances to the next position. |
169 | | // REQUIRES: Valid() |
170 | | void Next(); |
171 | | |
172 | | // Advances to the previous position. |
173 | | // REQUIRES: Valid() |
174 | | void Prev(); |
175 | | |
176 | | // Advance to the first entry with a key >= target |
177 | | void Seek(const char* target); |
178 | | |
179 | | // Retreat to the last entry with a key <= target |
180 | | void SeekForPrev(const char* target); |
181 | | |
182 | | // Advance to a random entry in the list. |
183 | | void RandomSeek(); |
184 | | |
185 | | // Position at the first entry in list. |
186 | | // Final state of iterator is Valid() iff list is not empty. |
187 | | void SeekToFirst(); |
188 | | |
189 | | // Position at the last entry in list. |
190 | | // Final state of iterator is Valid() iff list is not empty. |
191 | | void SeekToLast(); |
192 | | |
193 | | private: |
194 | | const InlineSkipList* list_; |
195 | | Node* node_; |
196 | | // Intentionally copyable |
197 | | }; |
198 | | |
199 | | private: |
200 | | const uint16_t kMaxHeight_; |
201 | | const uint16_t kBranching_; |
202 | | const uint32_t kScaledInverseBranching_; |
203 | | |
204 | | Allocator* const allocator_; // Allocator used for allocations of nodes |
205 | | // Immutable after construction |
206 | | Comparator const compare_; |
207 | | Node* const head_; |
208 | | |
209 | | // Modified only by Insert(). Read racily by readers, but stale |
210 | | // values are ok. |
211 | | std::atomic<int> max_height_; // Height of the entire list |
212 | | |
213 | | // seq_splice_ is a Splice used for insertions in the non-concurrent |
214 | | // case. It caches the prev and next found during the most recent |
215 | | // non-concurrent insertion. |
216 | | Splice* seq_splice_; |
217 | | |
218 | 23.9k | inline int GetMaxHeight() const { |
219 | 23.9k | return max_height_.load(std::memory_order_relaxed); |
220 | 23.9k | } |
221 | | |
222 | | int RandomHeight(); |
223 | | |
224 | | Node* AllocateNode(size_t key_size, int height); |
225 | | |
226 | 0 | bool Equal(const char* a, const char* b) const { |
227 | 0 | return (compare_(a, b) == 0); |
228 | 0 | } |
229 | | |
230 | 0 | bool LessThan(const char* a, const char* b) const { |
231 | 0 | return (compare_(a, b) < 0); |
232 | 0 | } |
233 | | |
234 | | // Return true if key is greater than the data stored in "n". Null n |
235 | | // is considered infinite. n should not be head_. |
236 | | bool KeyIsAfterNode(const char* key, Node* n) const; |
237 | | bool KeyIsAfterNode(const DecodedKey& key, Node* n) const; |
238 | | |
239 | | // Returns the earliest node with a key >= key. |
240 | | // Return nullptr if there is no such node. |
241 | | Node* FindGreaterOrEqual(const char* key) const; |
242 | | |
243 | | // Return the latest node with a key < key. |
244 | | // Return head_ if there is no such node. |
245 | | // Fills prev[level] with pointer to previous node at "level" for every |
246 | | // level in [0..max_height_-1], if prev is non-null. |
247 | | Node* FindLessThan(const char* key, Node** prev = nullptr) const; |
248 | | |
249 | | // Return the latest node with a key < key on bottom_level. Start searching |
250 | | // from root node on the level below top_level. |
251 | | // Fills prev[level] with pointer to previous node at "level" for every |
252 | | // level in [bottom_level..top_level-1], if prev is non-null. |
253 | | Node* FindLessThan(const char* key, Node** prev, Node* root, int top_level, |
254 | | int bottom_level) const; |
255 | | |
256 | | // Return the last node in the list. |
257 | | // Return head_ if list is empty. |
258 | | Node* FindLast() const; |
259 | | |
260 | | // Returns a random entry. |
261 | | Node* FindRandomEntry() const; |
262 | | |
263 | | // Traverses a single level of the list, setting *out_prev to the last |
264 | | // node before the key and *out_next to the first node after. Assumes |
265 | | // that the key is not present in the skip list. On entry, before should |
266 | | // point to a node that is before the key, and after should point to |
267 | | // a node that is after the key. after should be nullptr if a good after |
268 | | // node isn't conveniently available. |
269 | | template <bool prefetch_before> |
270 | | void FindSpliceForLevel(const DecodedKey& key, Node* before, Node* after, |
271 | | int level, Node** out_prev, Node** out_next); |
272 | | |
273 | | // Recomputes Splice levels from highest_level (inclusive) down to |
274 | | // lowest_level (inclusive). |
275 | | void RecomputeSpliceLevels(const DecodedKey& key, Splice* splice, |
276 | | int recompute_level); |
277 | | }; |
278 | | |
279 | | // Implementation details follow |
280 | | |
281 | | template <class Comparator> |
282 | | struct InlineSkipList<Comparator>::Splice { |
283 | | // The invariant of a Splice is that prev_[i+1].key <= prev_[i].key < |
284 | | // next_[i].key <= next_[i+1].key for all i. That means that if a |
285 | | // key is bracketed by prev_[i] and next_[i] then it is bracketed by |
286 | | // all higher levels. It is _not_ required that prev_[i]->Next(i) == |
287 | | // next_[i] (it probably did at some point in the past, but intervening |
288 | | // or concurrent operations might have inserted nodes in between). |
289 | | int height_ = 0; |
290 | | Node** prev_; |
291 | | Node** next_; |
292 | | }; |
293 | | |
294 | | // The Node data type is more of a pointer into custom-managed memory than |
295 | | // a traditional C++ struct. The key is stored in the bytes immediately |
296 | | // after the struct, and the next_ pointers for nodes with height > 1 are |
297 | | // stored immediately _before_ the struct. This avoids the need to include |
298 | | // any pointer or sizing data, which reduces per-node memory overheads. |
299 | | template <class Comparator> |
300 | | struct InlineSkipList<Comparator>::Node { |
301 | | // Stores the height of the node in the memory location normally used for |
302 | | // next_[0]. This is used for passing data from AllocateKey to Insert. |
303 | 2.66M | void StashHeight(const int height) { |
304 | 2.66M | assert(sizeof(int) <= sizeof(next_[0])); |
305 | 2.66M | memcpy(static_cast<void*>(&next_[0]), &height, sizeof(int)); |
306 | 2.66M | } |
307 | | |
308 | | // Retrieves the value passed to StashHeight. Undefined after a call |
309 | | // to SetNext or NoBarrier_SetNext. |
310 | 2.56M | int UnstashHeight() const { |
311 | 2.56M | int rv; |
312 | 2.56M | memcpy(&rv, &next_[0], sizeof(int)); |
313 | 2.56M | return rv; |
314 | 2.56M | } |
315 | | |
316 | 37.0M | const char* Key() const { return reinterpret_cast<const char*>(&next_[1]); } |
317 | | |
318 | | // Accessors/mutators for links. Wrapped in methods so we can add |
319 | | // the appropriate barriers as necessary, and perform the necessary |
320 | | // addressing trickery for storing links below the Node in memory. |
321 | 69.6M | Node* Next(int n) { |
322 | 69.6M | assert(n >= 0); |
323 | | // Use an 'acquire load' so that we observe a fully initialized |
324 | | // version of the returned Node. |
325 | 69.6M | return ((&next_[0] - n)->load(std::memory_order_acquire)); |
326 | 69.6M | } |
327 | | |
328 | 4.58M | void SetNext(int n, Node* x) { |
329 | 4.58M | assert(n >= 0); |
330 | | // Use a 'release store' so that anybody who reads through this |
331 | | // pointer observes a fully initialized version of the inserted node. |
332 | 4.58M | (&next_[0] - n)->store(x, std::memory_order_release); |
333 | 4.58M | } |
334 | | |
335 | 0 | bool CASNext(int n, Node* expected, Node* x) { |
336 | 0 | assert(n >= 0); |
337 | 0 | return (&next_[0] - n)->compare_exchange_strong(expected, x); |
338 | 0 | } |
339 | | |
340 | | // No-barrier variants that can be safely used in a few locations. |
341 | | Node* NoBarrier_Next(int n) { |
342 | | assert(n >= 0); |
343 | | return (&next_[0] - n)->load(std::memory_order_relaxed); |
344 | | } |
345 | | |
346 | 3.42M | void NoBarrier_SetNext(int n, Node* x) { |
347 | 3.42M | assert(n >= 0); |
348 | 3.42M | (&next_[0] - n)->store(x, std::memory_order_relaxed); |
349 | 3.42M | } |
350 | | |
351 | | // Insert node after prev on specific level. |
352 | | void InsertAfter(Node* prev, int level) { |
353 | | // NoBarrier_SetNext() suffices since we will add a barrier when |
354 | | // we publish a pointer to "this" in prev. |
355 | | NoBarrier_SetNext(level, prev->NoBarrier_Next(level)); |
356 | | prev->SetNext(level, this); |
357 | | } |
358 | | |
359 | | private: |
360 | | // next_[0] is the lowest level link (level 0). Higher levels are |
361 | | // stored _earlier_, so level 1 is at next_[-1]. |
362 | | std::atomic<Node*> next_[1]; |
363 | | }; |
364 | | |
365 | | template <class Comparator> |
366 | | inline InlineSkipList<Comparator>::Iterator::Iterator( |
367 | 39.4k | const InlineSkipList* list) { |
368 | 39.4k | SetList(list); |
369 | 39.4k | } |
370 | | |
371 | | template <class Comparator> |
372 | | inline void InlineSkipList<Comparator>::Iterator::SetList( |
373 | 39.4k | const InlineSkipList* list) { |
374 | 39.4k | list_ = list; |
375 | 39.4k | node_ = nullptr; |
376 | 39.4k | } |
377 | | |
378 | | template <class Comparator> |
379 | 1.75M | inline bool InlineSkipList<Comparator>::Iterator::Valid() const { |
380 | 1.75M | return node_ != nullptr; |
381 | 1.75M | } |
382 | | |
383 | | template <class Comparator> |
384 | 4.28M | inline const char* InlineSkipList<Comparator>::Iterator::key() const { |
385 | 4.28M | assert(Valid()); |
386 | 4.28M | return node_->Key(); |
387 | 4.28M | } |
388 | | |
389 | | template <class Comparator> |
390 | 1.70M | inline void InlineSkipList<Comparator>::Iterator::Next() { |
391 | 1.70M | assert(Valid()); |
392 | 1.70M | node_ = node_->Next(0); |
393 | 1.70M | } |
394 | | |
395 | | template <class Comparator> |
396 | 5.32k | inline void InlineSkipList<Comparator>::Iterator::Prev() { |
397 | | // Instead of using explicit "prev" links, we just search for the |
398 | | // last node that falls before key. |
399 | 5.32k | assert(Valid()); |
400 | 5.32k | node_ = list_->FindLessThan(node_->Key()); |
401 | 5.32k | if (node_ == list_->head_) { |
402 | 1.86k | node_ = nullptr; |
403 | 1.86k | } |
404 | 5.32k | } |
405 | | |
406 | | template <class Comparator> |
407 | 15.1k | inline void InlineSkipList<Comparator>::Iterator::Seek(const char* target) { |
408 | 15.1k | node_ = list_->FindGreaterOrEqual(target); |
409 | 15.1k | } |
410 | | |
411 | | template <class Comparator> |
412 | | inline void InlineSkipList<Comparator>::Iterator::SeekForPrev( |
413 | 0 | const char* target) { |
414 | 0 | Seek(target); |
415 | 0 | if (!Valid()) { |
416 | 0 | SeekToLast(); |
417 | 0 | } |
418 | 0 | while (Valid() && list_->LessThan(target, key())) { |
419 | 0 | Prev(); |
420 | 0 | } |
421 | 0 | } |
422 | | |
423 | | template <class Comparator> |
424 | 0 | inline void InlineSkipList<Comparator>::Iterator::RandomSeek() { |
425 | 0 | node_ = list_->FindRandomEntry(); |
426 | 0 | } |
427 | | |
428 | | template <class Comparator> |
429 | 22.8k | inline void InlineSkipList<Comparator>::Iterator::SeekToFirst() { |
430 | 22.8k | node_ = list_->head_->Next(0); |
431 | 22.8k | } |
432 | | |
433 | | template <class Comparator> |
434 | 3.40k | inline void InlineSkipList<Comparator>::Iterator::SeekToLast() { |
435 | 3.40k | node_ = list_->FindLast(); |
436 | 3.40k | if (node_ == list_->head_) { |
437 | 1.73k | node_ = nullptr; |
438 | 1.73k | } |
439 | 3.40k | } |
440 | | |
441 | | template <class Comparator> |
442 | 2.56M | int InlineSkipList<Comparator>::RandomHeight() { |
443 | 2.56M | auto rnd = Random::GetTLSInstance(); |
444 | | |
445 | | // Increase height with probability 1 in kBranching |
446 | 2.56M | int height = 1; |
447 | 3.42M | while (height < kMaxHeight_ && height < kMaxPossibleHeight && |
448 | 3.42M | rnd->Next() < kScaledInverseBranching_) { |
449 | 856k | height++; |
450 | 856k | } |
451 | 2.56M | assert(height > 0); |
452 | 2.56M | assert(height <= kMaxHeight_); |
453 | 2.56M | assert(height <= kMaxPossibleHeight); |
454 | 2.56M | return height; |
455 | 2.56M | } |
456 | | |
457 | | template <class Comparator> |
458 | | bool InlineSkipList<Comparator>::KeyIsAfterNode(const char* key, |
459 | | Node* n) const { |
460 | | // nullptr n is considered infinite |
461 | | assert(n != head_); |
462 | | return (n != nullptr) && (compare_(n->Key(), key) < 0); |
463 | | } |
464 | | |
465 | | template <class Comparator> |
466 | | bool InlineSkipList<Comparator>::KeyIsAfterNode(const DecodedKey& key, |
467 | 23.3M | Node* n) const { |
468 | | // nullptr n is considered infinite |
469 | 23.3M | assert(n != head_); |
470 | 23.3M | return (n != nullptr) && (compare_(n->Key(), key) < 0); |
471 | 23.3M | } |
472 | | |
473 | | template <class Comparator> |
474 | | typename InlineSkipList<Comparator>::Node* |
475 | 15.1k | InlineSkipList<Comparator>::FindGreaterOrEqual(const char* key) const { |
476 | | // Note: It looks like we could reduce duplication by implementing |
477 | | // this function as FindLessThan(key)->Next(0), but we wouldn't be able |
478 | | // to exit early on equality and the result wouldn't even be correct. |
479 | | // A concurrent insert might occur after FindLessThan(key) but before |
480 | | // we get a chance to call Next(0). |
481 | 15.1k | Node* x = head_; |
482 | 15.1k | int level = GetMaxHeight() - 1; |
483 | 15.1k | Node* last_bigger = nullptr; |
484 | 15.1k | const DecodedKey key_decoded = compare_.decode_key(key); |
485 | 23.5k | while (true) { |
486 | 23.5k | Node* next = x->Next(level); |
487 | 23.5k | if (next != nullptr) { |
488 | 16.3k | PREFETCH(next->Next(level), 0, 1); |
489 | 16.3k | } |
490 | | // Make sure the lists are sorted |
491 | 23.5k | assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x)); |
492 | | // Make sure we haven't overshot during our search |
493 | 23.5k | assert(x == head_ || KeyIsAfterNode(key_decoded, x)); |
494 | 23.5k | int cmp = (next == nullptr || next == last_bigger) |
495 | 23.5k | ? 1 |
496 | 23.5k | : compare_(next->Key(), key_decoded); |
497 | 23.5k | if (cmp == 0 || (cmp > 0 && level == 0)) { |
498 | 15.1k | return next; |
499 | 15.1k | } else if (cmp < 0) { |
500 | | // Keep searching in this list |
501 | 4.94k | x = next; |
502 | 4.94k | } else { |
503 | | // Switch to next list, reuse compare_() result |
504 | 3.44k | last_bigger = next; |
505 | 3.44k | level--; |
506 | 3.44k | } |
507 | 23.5k | } |
508 | 15.1k | } |
509 | | |
510 | | template <class Comparator> |
511 | | typename InlineSkipList<Comparator>::Node* |
512 | 5.32k | InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev) const { |
513 | 5.32k | return FindLessThan(key, prev, head_, GetMaxHeight(), 0); |
514 | 5.32k | } |
515 | | |
516 | | template <class Comparator> |
517 | | typename InlineSkipList<Comparator>::Node* |
518 | | InlineSkipList<Comparator>::FindLessThan(const char* key, Node** prev, |
519 | | Node* root, int top_level, |
520 | 5.32k | int bottom_level) const { |
521 | 5.32k | assert(top_level > bottom_level); |
522 | 5.32k | int level = top_level - 1; |
523 | 5.32k | Node* x = root; |
524 | | // KeyIsAfter(key, last_not_after) is definitely false |
525 | 5.32k | Node* last_not_after = nullptr; |
526 | 5.32k | const DecodedKey key_decoded = compare_.decode_key(key); |
527 | 21.7k | while (true) { |
528 | 21.7k | assert(x != nullptr); |
529 | 21.7k | Node* next = x->Next(level); |
530 | 21.7k | if (next != nullptr) { |
531 | 19.5k | PREFETCH(next->Next(level), 0, 1); |
532 | 19.5k | } |
533 | 21.7k | assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x)); |
534 | 21.7k | assert(x == head_ || KeyIsAfterNode(key_decoded, x)); |
535 | 21.7k | if (next != last_not_after && KeyIsAfterNode(key_decoded, next)) { |
536 | | // Keep searching in this list |
537 | 10.8k | assert(next != nullptr); |
538 | 10.8k | x = next; |
539 | 10.9k | } else { |
540 | 10.9k | if (prev != nullptr) { |
541 | 0 | prev[level] = x; |
542 | 0 | } |
543 | 10.9k | if (level == bottom_level) { |
544 | 5.32k | return x; |
545 | 5.64k | } else { |
546 | | // Switch to next list, reuse KeyIsAfterNode() result |
547 | 5.64k | last_not_after = next; |
548 | 5.64k | level--; |
549 | 5.64k | } |
550 | 10.9k | } |
551 | 21.7k | } |
552 | 5.32k | } |
553 | | |
554 | | template <class Comparator> |
555 | | typename InlineSkipList<Comparator>::Node* |
556 | 3.40k | InlineSkipList<Comparator>::FindLast() const { |
557 | 3.40k | Node* x = head_; |
558 | 3.40k | int level = GetMaxHeight() - 1; |
559 | 8.27k | while (true) { |
560 | 8.27k | Node* next = x->Next(level); |
561 | 8.27k | if (next == nullptr) { |
562 | 4.43k | if (level == 0) { |
563 | 3.40k | return x; |
564 | 3.40k | } else { |
565 | | // Switch to next list |
566 | 1.03k | level--; |
567 | 1.03k | } |
568 | 4.43k | } else { |
569 | 3.83k | x = next; |
570 | 3.83k | } |
571 | 8.27k | } |
572 | 3.40k | } |
573 | | |
574 | | template <class Comparator> |
575 | | typename InlineSkipList<Comparator>::Node* |
576 | 0 | InlineSkipList<Comparator>::FindRandomEntry() const { |
577 | | // TODO(bjlemaire): consider adding PREFETCH calls. |
578 | 0 | Node *x = head_, *scan_node = nullptr, *limit_node = nullptr; |
579 | | |
580 | | // We start at the max level. |
581 | | // FOr each level, we look at all the nodes at the level, and |
582 | | // we randomly pick one of them. Then decrement the level |
583 | | // and reiterate the process. |
584 | | // eg: assume GetMaxHeight()=5, and there are #100 elements (nodes). |
585 | | // level 4 nodes: lvl_nodes={#1, #15, #67, #84}. Randomly pick #15. |
586 | | // We will consider all the nodes between #15 (inclusive) and #67 |
587 | | // (exclusive). #67 is called 'limit_node' here. |
588 | | // level 3 nodes: lvl_nodes={#15, #21, #45, #51}. Randomly choose |
589 | | // #51. #67 remains 'limit_node'. |
590 | | // [...] |
591 | | // level 0 nodes: lvl_nodes={#56,#57,#58,#59}. Randomly pick $57. |
592 | | // Return Node #57. |
593 | 0 | std::vector<Node*> lvl_nodes; |
594 | 0 | Random* rnd = Random::GetTLSInstance(); |
595 | 0 | int level = GetMaxHeight() - 1; |
596 | |
|
597 | 0 | while (level >= 0) { |
598 | 0 | lvl_nodes.clear(); |
599 | 0 | scan_node = x; |
600 | 0 | while (scan_node != limit_node) { |
601 | 0 | lvl_nodes.push_back(scan_node); |
602 | 0 | scan_node = scan_node->Next(level); |
603 | 0 | } |
604 | 0 | uint32_t rnd_idx = rnd->Next() % lvl_nodes.size(); |
605 | 0 | x = lvl_nodes[rnd_idx]; |
606 | 0 | if (rnd_idx + 1 < lvl_nodes.size()) { |
607 | 0 | limit_node = lvl_nodes[rnd_idx + 1]; |
608 | 0 | } |
609 | 0 | level--; |
610 | 0 | } |
611 | | // There is a special case where x could still be the head_ |
612 | | // (note that the head_ contains no key). |
613 | 0 | return x == head_ && head_ != nullptr ? head_->Next(0) : x; |
614 | 0 | } |
615 | | |
616 | | template <class Comparator> |
617 | 0 | uint64_t InlineSkipList<Comparator>::EstimateCount(const char* key) const { |
618 | 0 | uint64_t count = 0; |
619 | |
|
620 | 0 | Node* x = head_; |
621 | 0 | int level = GetMaxHeight() - 1; |
622 | 0 | const DecodedKey key_decoded = compare_.decode_key(key); |
623 | 0 | while (true) { |
624 | 0 | assert(x == head_ || compare_(x->Key(), key_decoded) < 0); |
625 | 0 | Node* next = x->Next(level); |
626 | 0 | if (next != nullptr) { |
627 | 0 | PREFETCH(next->Next(level), 0, 1); |
628 | 0 | } |
629 | 0 | if (next == nullptr || compare_(next->Key(), key_decoded) >= 0) { |
630 | 0 | if (level == 0) { |
631 | 0 | return count; |
632 | 0 | } else { |
633 | | // Switch to next list |
634 | 0 | count *= kBranching_; |
635 | 0 | level--; |
636 | 0 | } |
637 | 0 | } else { |
638 | 0 | x = next; |
639 | 0 | count++; |
640 | 0 | } |
641 | 0 | } |
642 | 0 | } |
643 | | |
644 | | template <class Comparator> |
645 | | InlineSkipList<Comparator>::InlineSkipList(const Comparator cmp, |
646 | | Allocator* allocator, |
647 | | int32_t max_height, |
648 | | int32_t branching_factor) |
649 | | : kMaxHeight_(static_cast<uint16_t>(max_height)), |
650 | | kBranching_(static_cast<uint16_t>(branching_factor)), |
651 | | kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_), |
652 | | allocator_(allocator), |
653 | | compare_(cmp), |
654 | | head_(AllocateNode(0, max_height)), |
655 | | max_height_(1), |
656 | 96.6k | seq_splice_(AllocateSplice()) { |
657 | 96.6k | assert(max_height > 0 && kMaxHeight_ == static_cast<uint32_t>(max_height)); |
658 | 96.6k | assert(branching_factor > 1 && |
659 | 96.6k | kBranching_ == static_cast<uint32_t>(branching_factor)); |
660 | 96.6k | assert(kScaledInverseBranching_ > 0); |
661 | | |
662 | 1.25M | for (int i = 0; i < kMaxHeight_; ++i) { |
663 | 1.15M | head_->SetNext(i, nullptr); |
664 | 1.15M | } |
665 | 96.6k | } |
666 | | |
667 | | template <class Comparator> |
668 | 2.56M | char* InlineSkipList<Comparator>::AllocateKey(size_t key_size) { |
669 | 2.56M | return const_cast<char*>(AllocateNode(key_size, RandomHeight())->Key()); |
670 | 2.56M | } |
671 | | |
672 | | template <class Comparator> |
673 | | typename InlineSkipList<Comparator>::Node* |
674 | 2.66M | InlineSkipList<Comparator>::AllocateNode(size_t key_size, int height) { |
675 | 2.66M | auto prefix = sizeof(std::atomic<Node*>) * (height - 1); |
676 | | |
677 | | // prefix is space for the height - 1 pointers that we store before |
678 | | // the Node instance (next_[-(height - 1) .. -1]). Node starts at |
679 | | // raw + prefix, and holds the bottom-mode (level 0) skip list pointer |
680 | | // next_[0]. key_size is the bytes for the key, which comes just after |
681 | | // the Node. |
682 | 2.66M | char* raw = allocator_->AllocateAligned(prefix + sizeof(Node) + key_size); |
683 | 2.66M | Node* x = reinterpret_cast<Node*>(raw + prefix); |
684 | | |
685 | | // Once we've linked the node into the skip list we don't actually need |
686 | | // to know its height, because we can implicitly use the fact that we |
687 | | // traversed into a node at level h to known that h is a valid level |
688 | | // for that node. We need to convey the height to the Insert step, |
689 | | // however, so that it can perform the proper links. Since we're not |
690 | | // using the pointers at the moment, StashHeight temporarily borrow |
691 | | // storage from next_[0] for that purpose. |
692 | 2.66M | x->StashHeight(height); |
693 | 2.66M | return x; |
694 | 2.66M | } |
695 | | |
696 | | template <class Comparator> |
697 | | typename InlineSkipList<Comparator>::Splice* |
698 | 96.6k | InlineSkipList<Comparator>::AllocateSplice() { |
699 | | // size of prev_ and next_ |
700 | 96.6k | size_t array_size = sizeof(Node*) * (kMaxHeight_ + 1); |
701 | 96.6k | char* raw = allocator_->AllocateAligned(sizeof(Splice) + array_size * 2); |
702 | 96.6k | Splice* splice = reinterpret_cast<Splice*>(raw); |
703 | 96.6k | splice->height_ = 0; |
704 | 96.6k | splice->prev_ = reinterpret_cast<Node**>(raw + sizeof(Splice)); |
705 | 96.6k | splice->next_ = reinterpret_cast<Node**>(raw + sizeof(Splice) + array_size); |
706 | 96.6k | return splice; |
707 | 96.6k | } |
708 | | |
709 | | template <class Comparator> |
710 | | typename InlineSkipList<Comparator>::Splice* |
711 | 0 | InlineSkipList<Comparator>::AllocateSpliceOnHeap() { |
712 | 0 | size_t array_size = sizeof(Node*) * (kMaxHeight_ + 1); |
713 | 0 | char* raw = new char[sizeof(Splice) + array_size * 2]; |
714 | 0 | Splice* splice = reinterpret_cast<Splice*>(raw); |
715 | 0 | splice->height_ = 0; |
716 | 0 | splice->prev_ = reinterpret_cast<Node**>(raw + sizeof(Splice)); |
717 | 0 | splice->next_ = reinterpret_cast<Node**>(raw + sizeof(Splice) + array_size); |
718 | 0 | return splice; |
719 | 0 | } |
720 | | |
721 | | template <class Comparator> |
722 | 2.56M | bool InlineSkipList<Comparator>::Insert(const char* key) { |
723 | 2.56M | return Insert<false>(key, seq_splice_, false); |
724 | 2.56M | } |
725 | | |
726 | | template <class Comparator> |
727 | 0 | bool InlineSkipList<Comparator>::InsertConcurrently(const char* key) { |
728 | 0 | Node* prev[kMaxPossibleHeight]; |
729 | 0 | Node* next[kMaxPossibleHeight]; |
730 | 0 | Splice splice; |
731 | 0 | splice.prev_ = prev; |
732 | 0 | splice.next_ = next; |
733 | 0 | return Insert<true>(key, &splice, false); |
734 | 0 | } |
735 | | |
736 | | template <class Comparator> |
737 | 0 | bool InlineSkipList<Comparator>::InsertWithHint(const char* key, void** hint) { |
738 | 0 | assert(hint != nullptr); |
739 | 0 | Splice* splice = reinterpret_cast<Splice*>(*hint); |
740 | 0 | if (splice == nullptr) { |
741 | 0 | splice = AllocateSplice(); |
742 | 0 | *hint = splice; |
743 | 0 | } |
744 | 0 | return Insert<false>(key, splice, true); |
745 | 0 | } |
746 | | |
747 | | template <class Comparator> |
748 | | bool InlineSkipList<Comparator>::InsertWithHintConcurrently(const char* key, |
749 | 0 | void** hint) { |
750 | 0 | assert(hint != nullptr); |
751 | 0 | Splice* splice = reinterpret_cast<Splice*>(*hint); |
752 | 0 | if (splice == nullptr) { |
753 | 0 | splice = AllocateSpliceOnHeap(); |
754 | 0 | *hint = splice; |
755 | 0 | } |
756 | 0 | return Insert<true>(key, splice, true); |
757 | 0 | } |
758 | | |
759 | | template <class Comparator> |
760 | | template <bool prefetch_before> |
761 | | void InlineSkipList<Comparator>::FindSpliceForLevel(const DecodedKey& key, |
762 | | Node* before, Node* after, |
763 | | int level, Node** out_prev, |
764 | 13.3M | Node** out_next) { |
765 | 24.3M | while (true) { |
766 | 24.3M | Node* next = before->Next(level); |
767 | 24.3M | if (next != nullptr) { |
768 | 23.0M | PREFETCH(next->Next(level), 0, 1); |
769 | 23.0M | } |
770 | 24.3M | if (prefetch_before == true) { |
771 | 24.3M | if (next != nullptr && level > 0) { |
772 | 17.8M | PREFETCH(next->Next(level - 1), 0, 1); |
773 | 17.8M | } |
774 | 24.3M | } |
775 | 24.3M | assert(before == head_ || next == nullptr || |
776 | 24.3M | KeyIsAfterNode(next->Key(), before)); |
777 | 24.3M | assert(before == head_ || KeyIsAfterNode(key, before)); |
778 | 24.3M | if (next == after || !KeyIsAfterNode(key, next)) { |
779 | | // found it |
780 | 13.3M | *out_prev = before; |
781 | 13.3M | *out_next = next; |
782 | 13.3M | return; |
783 | 13.3M | } |
784 | 11.0M | before = next; |
785 | 11.0M | } |
786 | 13.3M | } void rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::FindSpliceForLevel<true>(rocksdb::Slice const&, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node*, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node*, int, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node**, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node**) Line | Count | Source | 764 | 13.3M | Node** out_next) { | 765 | 24.3M | while (true) { | 766 | 24.3M | Node* next = before->Next(level); | 767 | 24.3M | if (next != nullptr) { | 768 | 23.0M | PREFETCH(next->Next(level), 0, 1); | 769 | 23.0M | } | 770 | 24.3M | if (prefetch_before == true) { | 771 | 24.3M | if (next != nullptr && level > 0) { | 772 | 17.8M | PREFETCH(next->Next(level - 1), 0, 1); | 773 | 17.8M | } | 774 | 24.3M | } | 775 | 24.3M | assert(before == head_ || next == nullptr || | 776 | 24.3M | KeyIsAfterNode(next->Key(), before)); | 777 | 24.3M | assert(before == head_ || KeyIsAfterNode(key, before)); | 778 | 24.3M | if (next == after || !KeyIsAfterNode(key, next)) { | 779 | | // found it | 780 | 13.3M | *out_prev = before; | 781 | 13.3M | *out_next = next; | 782 | 13.3M | return; | 783 | 13.3M | } | 784 | 11.0M | before = next; | 785 | 11.0M | } | 786 | 13.3M | } |
Unexecuted instantiation: void rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::FindSpliceForLevel<false>(rocksdb::Slice const&, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node*, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node*, int, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node**, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Node**) |
787 | | |
788 | | template <class Comparator> |
789 | | void InlineSkipList<Comparator>::RecomputeSpliceLevels(const DecodedKey& key, |
790 | | Splice* splice, |
791 | 2.55M | int recompute_level) { |
792 | 2.55M | assert(recompute_level > 0); |
793 | 2.55M | assert(recompute_level <= splice->height_); |
794 | 15.8M | for (int i = recompute_level - 1; i >= 0; --i) { |
795 | 13.3M | FindSpliceForLevel<true>(key, splice->prev_[i + 1], splice->next_[i + 1], i, |
796 | 13.3M | &splice->prev_[i], &splice->next_[i]); |
797 | 13.3M | } |
798 | 2.55M | } |
799 | | |
800 | | template <class Comparator> |
801 | | template <bool UseCAS> |
802 | | bool InlineSkipList<Comparator>::Insert(const char* key, Splice* splice, |
803 | 2.56M | bool allow_partial_splice_fix) { |
804 | 2.56M | Node* x = reinterpret_cast<Node*>(const_cast<char*>(key)) - 1; |
805 | 2.56M | const DecodedKey key_decoded = compare_.decode_key(key); |
806 | 2.56M | int height = x->UnstashHeight(); |
807 | 2.56M | assert(height >= 1 && height <= kMaxHeight_); |
808 | | |
809 | 2.56M | int max_height = max_height_.load(std::memory_order_relaxed); |
810 | 2.56M | while (height > max_height) { |
811 | 24.8k | if (max_height_.compare_exchange_weak(max_height, height)) { |
812 | | // successfully updated it |
813 | 24.8k | max_height = height; |
814 | 24.8k | break; |
815 | 24.8k | } |
816 | | // else retry, possibly exiting the loop because somebody else |
817 | | // increased it |
818 | 24.8k | } |
819 | 2.56M | assert(max_height <= kMaxPossibleHeight); |
820 | | |
821 | 2.56M | int recompute_height = 0; |
822 | 2.56M | if (splice->height_ < max_height) { |
823 | | // Either splice has never been used or max_height has grown since |
824 | | // last use. We could potentially fix it in the latter case, but |
825 | | // that is tricky. |
826 | 46.9k | splice->prev_[max_height] = head_; |
827 | 46.9k | splice->next_[max_height] = nullptr; |
828 | 46.9k | splice->height_ = max_height; |
829 | 46.9k | recompute_height = max_height; |
830 | 2.51M | } else { |
831 | | // Splice is a valid proper-height splice that brackets some |
832 | | // key, but does it bracket this one? We need to validate it and |
833 | | // recompute a portion of the splice (levels 0..recompute_height-1) |
834 | | // that is a superset of all levels that don't bracket the new key. |
835 | | // Several choices are reasonable, because we have to balance the work |
836 | | // saved against the extra comparisons required to validate the Splice. |
837 | | // |
838 | | // One strategy is just to recompute all of orig_splice_height if the |
839 | | // bottom level isn't bracketing. This pessimistically assumes that |
840 | | // we will either get a perfect Splice hit (increasing sequential |
841 | | // inserts) or have no locality. |
842 | | // |
843 | | // Another strategy is to walk up the Splice's levels until we find |
844 | | // a level that brackets the key. This strategy lets the Splice |
845 | | // hint help for other cases: it turns insertion from O(log N) into |
846 | | // O(log D), where D is the number of nodes in between the key that |
847 | | // produced the Splice and the current insert (insertion is aided |
848 | | // whether the new key is before or after the splice). If you have |
849 | | // a way of using a prefix of the key to map directly to the closest |
850 | | // Splice out of O(sqrt(N)) Splices and we make it so that splices |
851 | | // can also be used as hints during read, then we end up with Oshman's |
852 | | // and Shavit's SkipTrie, which has O(log log N) lookup and insertion |
853 | | // (compare to O(log N) for skip list). |
854 | | // |
855 | | // We control the pessimistic strategy with allow_partial_splice_fix. |
856 | | // A good strategy is probably to be pessimistic for seq_splice_, |
857 | | // optimistic if the caller actually went to the work of providing |
858 | | // a Splice. |
859 | 5.02M | while (recompute_height < max_height) { |
860 | 2.51M | if (splice->prev_[recompute_height]->Next(recompute_height) != |
861 | 2.51M | splice->next_[recompute_height]) { |
862 | | // splice isn't tight at this level, there must have been some inserts |
863 | | // to this |
864 | | // location that didn't update the splice. We might only be a little |
865 | | // stale, but if |
866 | | // the splice is very stale it would be O(N) to fix it. We haven't used |
867 | | // up any of |
868 | | // our budget of comparisons, so always move up even if we are |
869 | | // pessimistic about |
870 | | // our chances of success. |
871 | 0 | ++recompute_height; |
872 | 2.51M | } else if (splice->prev_[recompute_height] != head_ && |
873 | 2.51M | !KeyIsAfterNode(key_decoded, |
874 | 2.51M | splice->prev_[recompute_height])) { |
875 | | // key is from before splice |
876 | 1.80M | if (allow_partial_splice_fix) { |
877 | | // skip all levels with the same node without more comparisons |
878 | 0 | Node* bad = splice->prev_[recompute_height]; |
879 | 0 | while (splice->prev_[recompute_height] == bad) { |
880 | 0 | ++recompute_height; |
881 | 0 | } |
882 | 1.80M | } else { |
883 | | // we're pessimistic, recompute everything |
884 | 1.80M | recompute_height = max_height; |
885 | 1.80M | } |
886 | 1.80M | } else if (KeyIsAfterNode(key_decoded, splice->next_[recompute_height])) { |
887 | | // key is from after splice |
888 | 695k | if (allow_partial_splice_fix) { |
889 | 0 | Node* bad = splice->next_[recompute_height]; |
890 | 0 | while (splice->next_[recompute_height] == bad) { |
891 | 0 | ++recompute_height; |
892 | 0 | } |
893 | 695k | } else { |
894 | 695k | recompute_height = max_height; |
895 | 695k | } |
896 | 695k | } else { |
897 | | // this level brackets the key, we won! |
898 | 14.8k | break; |
899 | 14.8k | } |
900 | 2.51M | } |
901 | 2.51M | } |
902 | 2.56M | assert(recompute_height <= max_height); |
903 | 2.56M | if (recompute_height > 0) { |
904 | 2.55M | RecomputeSpliceLevels(key_decoded, splice, recompute_height); |
905 | 2.55M | } |
906 | | |
907 | 2.56M | bool splice_is_valid = true; |
908 | 2.56M | if (UseCAS) { |
909 | 0 | for (int i = 0; i < height; ++i) { |
910 | 0 | while (true) { |
911 | | // Checking for duplicate keys on the level 0 is sufficient |
912 | 0 | if (UNLIKELY(i == 0 && splice->next_[i] != nullptr && |
913 | 0 | compare_(x->Key(), splice->next_[i]->Key()) >= 0)) { |
914 | | // duplicate key |
915 | 0 | return false; |
916 | 0 | } |
917 | 0 | if (UNLIKELY(i == 0 && splice->prev_[i] != head_ && |
918 | 0 | compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) { |
919 | | // duplicate key |
920 | 0 | return false; |
921 | 0 | } |
922 | 0 | assert(splice->next_[i] == nullptr || |
923 | 0 | compare_(x->Key(), splice->next_[i]->Key()) < 0); |
924 | 0 | assert(splice->prev_[i] == head_ || |
925 | 0 | compare_(splice->prev_[i]->Key(), x->Key()) < 0); |
926 | 0 | x->NoBarrier_SetNext(i, splice->next_[i]); |
927 | 0 | if (splice->prev_[i]->CASNext(i, splice->next_[i], x)) { |
928 | | // success |
929 | 0 | break; |
930 | 0 | } |
931 | | // CAS failed, we need to recompute prev and next. It is unlikely |
932 | | // to be helpful to try to use a different level as we redo the |
933 | | // search, because it should be unlikely that lots of nodes have |
934 | | // been inserted between prev[i] and next[i]. No point in using |
935 | | // next[i] as the after hint, because we know it is stale. |
936 | 0 | FindSpliceForLevel<false>(key_decoded, splice->prev_[i], nullptr, i, |
937 | 0 | &splice->prev_[i], &splice->next_[i]); |
938 | | |
939 | | // Since we've narrowed the bracket for level i, we might have |
940 | | // violated the Splice constraint between i and i-1. Make sure |
941 | | // we recompute the whole thing next time. |
942 | 0 | if (i > 0) { |
943 | 0 | splice_is_valid = false; |
944 | 0 | } |
945 | 0 | } |
946 | 0 | } |
947 | 2.56M | } else { |
948 | 5.98M | for (int i = 0; i < height; ++i) { |
949 | 3.42M | if (i >= recompute_height && |
950 | 3.42M | splice->prev_[i]->Next(i) != splice->next_[i]) { |
951 | 0 | FindSpliceForLevel<false>(key_decoded, splice->prev_[i], nullptr, i, |
952 | 0 | &splice->prev_[i], &splice->next_[i]); |
953 | 0 | } |
954 | | // Checking for duplicate keys on the level 0 is sufficient |
955 | 3.42M | if (UNLIKELY(i == 0 && splice->next_[i] != nullptr && |
956 | 3.42M | compare_(x->Key(), splice->next_[i]->Key()) >= 0)) { |
957 | | // duplicate key |
958 | 0 | return false; |
959 | 0 | } |
960 | 3.42M | if (UNLIKELY(i == 0 && splice->prev_[i] != head_ && |
961 | 3.42M | compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) { |
962 | | // duplicate key |
963 | 0 | return false; |
964 | 0 | } |
965 | 3.42M | assert(splice->next_[i] == nullptr || |
966 | 3.42M | compare_(x->Key(), splice->next_[i]->Key()) < 0); |
967 | 3.42M | assert(splice->prev_[i] == head_ || |
968 | 3.42M | compare_(splice->prev_[i]->Key(), x->Key()) < 0); |
969 | 3.42M | assert(splice->prev_[i]->Next(i) == splice->next_[i]); |
970 | 3.42M | x->NoBarrier_SetNext(i, splice->next_[i]); |
971 | 3.42M | splice->prev_[i]->SetNext(i, x); |
972 | 3.42M | } |
973 | 2.56M | } |
974 | 2.56M | if (splice_is_valid) { |
975 | 5.98M | for (int i = 0; i < height; ++i) { |
976 | 3.42M | splice->prev_[i] = x; |
977 | 3.42M | } |
978 | 2.56M | assert(splice->prev_[splice->height_] == head_); |
979 | 2.56M | assert(splice->next_[splice->height_] == nullptr); |
980 | 15.9M | for (int i = 0; i < splice->height_; ++i) { |
981 | 13.3M | assert(splice->next_[i] == nullptr || |
982 | 13.3M | compare_(key, splice->next_[i]->Key()) < 0); |
983 | 13.3M | assert(splice->prev_[i] == head_ || |
984 | 13.3M | compare_(splice->prev_[i]->Key(), key) <= 0); |
985 | 13.3M | assert(splice->prev_[i + 1] == splice->prev_[i] || |
986 | 13.3M | splice->prev_[i + 1] == head_ || |
987 | 13.3M | compare_(splice->prev_[i + 1]->Key(), splice->prev_[i]->Key()) < |
988 | 13.3M | 0); |
989 | 13.3M | assert(splice->next_[i + 1] == splice->next_[i] || |
990 | 13.3M | splice->next_[i + 1] == nullptr || |
991 | 13.3M | compare_(splice->next_[i]->Key(), splice->next_[i + 1]->Key()) < |
992 | 13.3M | 0); |
993 | 13.3M | } |
994 | 2.56M | } else { |
995 | 0 | splice->height_ = 0; |
996 | 0 | } |
997 | 2.56M | return true; |
998 | 2.56M | } bool rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Insert<false>(char const*, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Splice*, bool) Line | Count | Source | 803 | 2.56M | bool allow_partial_splice_fix) { | 804 | 2.56M | Node* x = reinterpret_cast<Node*>(const_cast<char*>(key)) - 1; | 805 | 2.56M | const DecodedKey key_decoded = compare_.decode_key(key); | 806 | 2.56M | int height = x->UnstashHeight(); | 807 | 2.56M | assert(height >= 1 && height <= kMaxHeight_); | 808 | | | 809 | 2.56M | int max_height = max_height_.load(std::memory_order_relaxed); | 810 | 2.56M | while (height > max_height) { | 811 | 24.8k | if (max_height_.compare_exchange_weak(max_height, height)) { | 812 | | // successfully updated it | 813 | 24.8k | max_height = height; | 814 | 24.8k | break; | 815 | 24.8k | } | 816 | | // else retry, possibly exiting the loop because somebody else | 817 | | // increased it | 818 | 24.8k | } | 819 | 2.56M | assert(max_height <= kMaxPossibleHeight); | 820 | | | 821 | 2.56M | int recompute_height = 0; | 822 | 2.56M | if (splice->height_ < max_height) { | 823 | | // Either splice has never been used or max_height has grown since | 824 | | // last use. We could potentially fix it in the latter case, but | 825 | | // that is tricky. | 826 | 46.9k | splice->prev_[max_height] = head_; | 827 | 46.9k | splice->next_[max_height] = nullptr; | 828 | 46.9k | splice->height_ = max_height; | 829 | 46.9k | recompute_height = max_height; | 830 | 2.51M | } else { | 831 | | // Splice is a valid proper-height splice that brackets some | 832 | | // key, but does it bracket this one? We need to validate it and | 833 | | // recompute a portion of the splice (levels 0..recompute_height-1) | 834 | | // that is a superset of all levels that don't bracket the new key. | 835 | | // Several choices are reasonable, because we have to balance the work | 836 | | // saved against the extra comparisons required to validate the Splice. | 837 | | // | 838 | | // One strategy is just to recompute all of orig_splice_height if the | 839 | | // bottom level isn't bracketing. This pessimistically assumes that | 840 | | // we will either get a perfect Splice hit (increasing sequential | 841 | | // inserts) or have no locality. | 842 | | // | 843 | | // Another strategy is to walk up the Splice's levels until we find | 844 | | // a level that brackets the key. This strategy lets the Splice | 845 | | // hint help for other cases: it turns insertion from O(log N) into | 846 | | // O(log D), where D is the number of nodes in between the key that | 847 | | // produced the Splice and the current insert (insertion is aided | 848 | | // whether the new key is before or after the splice). If you have | 849 | | // a way of using a prefix of the key to map directly to the closest | 850 | | // Splice out of O(sqrt(N)) Splices and we make it so that splices | 851 | | // can also be used as hints during read, then we end up with Oshman's | 852 | | // and Shavit's SkipTrie, which has O(log log N) lookup and insertion | 853 | | // (compare to O(log N) for skip list). | 854 | | // | 855 | | // We control the pessimistic strategy with allow_partial_splice_fix. | 856 | | // A good strategy is probably to be pessimistic for seq_splice_, | 857 | | // optimistic if the caller actually went to the work of providing | 858 | | // a Splice. | 859 | 5.02M | while (recompute_height < max_height) { | 860 | 2.51M | if (splice->prev_[recompute_height]->Next(recompute_height) != | 861 | 2.51M | splice->next_[recompute_height]) { | 862 | | // splice isn't tight at this level, there must have been some inserts | 863 | | // to this | 864 | | // location that didn't update the splice. We might only be a little | 865 | | // stale, but if | 866 | | // the splice is very stale it would be O(N) to fix it. We haven't used | 867 | | // up any of | 868 | | // our budget of comparisons, so always move up even if we are | 869 | | // pessimistic about | 870 | | // our chances of success. | 871 | 0 | ++recompute_height; | 872 | 2.51M | } else if (splice->prev_[recompute_height] != head_ && | 873 | 2.51M | !KeyIsAfterNode(key_decoded, | 874 | 2.51M | splice->prev_[recompute_height])) { | 875 | | // key is from before splice | 876 | 1.80M | if (allow_partial_splice_fix) { | 877 | | // skip all levels with the same node without more comparisons | 878 | 0 | Node* bad = splice->prev_[recompute_height]; | 879 | 0 | while (splice->prev_[recompute_height] == bad) { | 880 | 0 | ++recompute_height; | 881 | 0 | } | 882 | 1.80M | } else { | 883 | | // we're pessimistic, recompute everything | 884 | 1.80M | recompute_height = max_height; | 885 | 1.80M | } | 886 | 1.80M | } else if (KeyIsAfterNode(key_decoded, splice->next_[recompute_height])) { | 887 | | // key is from after splice | 888 | 695k | if (allow_partial_splice_fix) { | 889 | 0 | Node* bad = splice->next_[recompute_height]; | 890 | 0 | while (splice->next_[recompute_height] == bad) { | 891 | 0 | ++recompute_height; | 892 | 0 | } | 893 | 695k | } else { | 894 | 695k | recompute_height = max_height; | 895 | 695k | } | 896 | 695k | } else { | 897 | | // this level brackets the key, we won! | 898 | 14.8k | break; | 899 | 14.8k | } | 900 | 2.51M | } | 901 | 2.51M | } | 902 | 2.56M | assert(recompute_height <= max_height); | 903 | 2.56M | if (recompute_height > 0) { | 904 | 2.55M | RecomputeSpliceLevels(key_decoded, splice, recompute_height); | 905 | 2.55M | } | 906 | | | 907 | 2.56M | bool splice_is_valid = true; | 908 | 2.56M | if (UseCAS) { | 909 | 0 | for (int i = 0; i < height; ++i) { | 910 | 0 | while (true) { | 911 | | // Checking for duplicate keys on the level 0 is sufficient | 912 | 0 | if (UNLIKELY(i == 0 && splice->next_[i] != nullptr && | 913 | 0 | compare_(x->Key(), splice->next_[i]->Key()) >= 0)) { | 914 | | // duplicate key | 915 | 0 | return false; | 916 | 0 | } | 917 | 0 | if (UNLIKELY(i == 0 && splice->prev_[i] != head_ && | 918 | 0 | compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) { | 919 | | // duplicate key | 920 | 0 | return false; | 921 | 0 | } | 922 | 0 | assert(splice->next_[i] == nullptr || | 923 | 0 | compare_(x->Key(), splice->next_[i]->Key()) < 0); | 924 | 0 | assert(splice->prev_[i] == head_ || | 925 | 0 | compare_(splice->prev_[i]->Key(), x->Key()) < 0); | 926 | 0 | x->NoBarrier_SetNext(i, splice->next_[i]); | 927 | 0 | if (splice->prev_[i]->CASNext(i, splice->next_[i], x)) { | 928 | | // success | 929 | 0 | break; | 930 | 0 | } | 931 | | // CAS failed, we need to recompute prev and next. It is unlikely | 932 | | // to be helpful to try to use a different level as we redo the | 933 | | // search, because it should be unlikely that lots of nodes have | 934 | | // been inserted between prev[i] and next[i]. No point in using | 935 | | // next[i] as the after hint, because we know it is stale. | 936 | 0 | FindSpliceForLevel<false>(key_decoded, splice->prev_[i], nullptr, i, | 937 | 0 | &splice->prev_[i], &splice->next_[i]); | 938 | | | 939 | | // Since we've narrowed the bracket for level i, we might have | 940 | | // violated the Splice constraint between i and i-1. Make sure | 941 | | // we recompute the whole thing next time. | 942 | 0 | if (i > 0) { | 943 | 0 | splice_is_valid = false; | 944 | 0 | } | 945 | 0 | } | 946 | 0 | } | 947 | 2.56M | } else { | 948 | 5.98M | for (int i = 0; i < height; ++i) { | 949 | 3.42M | if (i >= recompute_height && | 950 | 3.42M | splice->prev_[i]->Next(i) != splice->next_[i]) { | 951 | 0 | FindSpliceForLevel<false>(key_decoded, splice->prev_[i], nullptr, i, | 952 | 0 | &splice->prev_[i], &splice->next_[i]); | 953 | 0 | } | 954 | | // Checking for duplicate keys on the level 0 is sufficient | 955 | 3.42M | if (UNLIKELY(i == 0 && splice->next_[i] != nullptr && | 956 | 3.42M | compare_(x->Key(), splice->next_[i]->Key()) >= 0)) { | 957 | | // duplicate key | 958 | 0 | return false; | 959 | 0 | } | 960 | 3.42M | if (UNLIKELY(i == 0 && splice->prev_[i] != head_ && | 961 | 3.42M | compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) { | 962 | | // duplicate key | 963 | 0 | return false; | 964 | 0 | } | 965 | 3.42M | assert(splice->next_[i] == nullptr || | 966 | 3.42M | compare_(x->Key(), splice->next_[i]->Key()) < 0); | 967 | 3.42M | assert(splice->prev_[i] == head_ || | 968 | 3.42M | compare_(splice->prev_[i]->Key(), x->Key()) < 0); | 969 | 3.42M | assert(splice->prev_[i]->Next(i) == splice->next_[i]); | 970 | 3.42M | x->NoBarrier_SetNext(i, splice->next_[i]); | 971 | 3.42M | splice->prev_[i]->SetNext(i, x); | 972 | 3.42M | } | 973 | 2.56M | } | 974 | 2.56M | if (splice_is_valid) { | 975 | 5.98M | for (int i = 0; i < height; ++i) { | 976 | 3.42M | splice->prev_[i] = x; | 977 | 3.42M | } | 978 | 2.56M | assert(splice->prev_[splice->height_] == head_); | 979 | 2.56M | assert(splice->next_[splice->height_] == nullptr); | 980 | 15.9M | for (int i = 0; i < splice->height_; ++i) { | 981 | 13.3M | assert(splice->next_[i] == nullptr || | 982 | 13.3M | compare_(key, splice->next_[i]->Key()) < 0); | 983 | 13.3M | assert(splice->prev_[i] == head_ || | 984 | 13.3M | compare_(splice->prev_[i]->Key(), key) <= 0); | 985 | 13.3M | assert(splice->prev_[i + 1] == splice->prev_[i] || | 986 | 13.3M | splice->prev_[i + 1] == head_ || | 987 | 13.3M | compare_(splice->prev_[i + 1]->Key(), splice->prev_[i]->Key()) < | 988 | 13.3M | 0); | 989 | 13.3M | assert(splice->next_[i + 1] == splice->next_[i] || | 990 | 13.3M | splice->next_[i + 1] == nullptr || | 991 | 13.3M | compare_(splice->next_[i]->Key(), splice->next_[i + 1]->Key()) < | 992 | 13.3M | 0); | 993 | 13.3M | } | 994 | 2.56M | } else { | 995 | 0 | splice->height_ = 0; | 996 | 0 | } | 997 | 2.56M | return true; | 998 | 2.56M | } |
Unexecuted instantiation: bool rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Insert<true>(char const*, rocksdb::InlineSkipList<rocksdb::MemTableRep::KeyComparator const&>::Splice*, bool) |
999 | | |
1000 | | template <class Comparator> |
1001 | 0 | bool InlineSkipList<Comparator>::Contains(const char* key) const { |
1002 | 0 | Node* x = FindGreaterOrEqual(key); |
1003 | 0 | if (x != nullptr && Equal(key, x->Key())) { |
1004 | 0 | return true; |
1005 | 0 | } else { |
1006 | 0 | return false; |
1007 | 0 | } |
1008 | 0 | } |
1009 | | |
1010 | | template <class Comparator> |
1011 | | void InlineSkipList<Comparator>::TEST_Validate() const { |
1012 | | // Interate over all levels at the same time, and verify nodes appear in |
1013 | | // the right order, and nodes appear in upper level also appear in lower |
1014 | | // levels. |
1015 | | Node* nodes[kMaxPossibleHeight]; |
1016 | | int max_height = GetMaxHeight(); |
1017 | | assert(max_height > 0); |
1018 | | for (int i = 0; i < max_height; i++) { |
1019 | | nodes[i] = head_; |
1020 | | } |
1021 | | while (nodes[0] != nullptr) { |
1022 | | Node* l0_next = nodes[0]->Next(0); |
1023 | | if (l0_next == nullptr) { |
1024 | | break; |
1025 | | } |
1026 | | assert(nodes[0] == head_ || compare_(nodes[0]->Key(), l0_next->Key()) < 0); |
1027 | | nodes[0] = l0_next; |
1028 | | |
1029 | | int i = 1; |
1030 | | while (i < max_height) { |
1031 | | Node* next = nodes[i]->Next(i); |
1032 | | if (next == nullptr) { |
1033 | | break; |
1034 | | } |
1035 | | auto cmp = compare_(nodes[0]->Key(), next->Key()); |
1036 | | assert(cmp <= 0); |
1037 | | if (cmp == 0) { |
1038 | | assert(next == nodes[0]); |
1039 | | nodes[i] = next; |
1040 | | } else { |
1041 | | break; |
1042 | | } |
1043 | | i++; |
1044 | | } |
1045 | | } |
1046 | | for (int i = 1; i < max_height; i++) { |
1047 | | assert(nodes[i] != nullptr && nodes[i]->Next(i) == nullptr); |
1048 | | } |
1049 | | } |
1050 | | |
1051 | | } // namespace ROCKSDB_NAMESPACE |