/src/abseil-cpp/absl/container/internal/raw_hash_set.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2018 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/container/internal/raw_hash_set.h" |
16 | | |
17 | | #include <atomic> |
18 | | #include <cassert> |
19 | | #include <cstddef> |
20 | | #include <cstdint> |
21 | | #include <cstring> |
22 | | |
23 | | #include "absl/base/attributes.h" |
24 | | #include "absl/base/config.h" |
25 | | #include "absl/base/dynamic_annotations.h" |
26 | | #include "absl/base/internal/endian.h" |
27 | | #include "absl/base/optimization.h" |
28 | | #include "absl/container/internal/container_memory.h" |
29 | | #include "absl/container/internal/hashtablez_sampler.h" |
30 | | #include "absl/hash/hash.h" |
31 | | |
32 | | namespace absl { |
33 | | ABSL_NAMESPACE_BEGIN |
34 | | namespace container_internal { |
35 | | |
36 | | // Represents a control byte corresponding to a full slot with arbitrary hash. |
37 | 0 | constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); } |
38 | | |
39 | | // We have space for `growth_info` before a single block of control bytes. A |
40 | | // single block of empty control bytes for tables without any slots allocated. |
41 | | // This enables removing a branch in the hot path of find(). In order to ensure |
42 | | // that the control bytes are aligned to 16, we have 16 bytes before the control |
43 | | // bytes even though growth_info only needs 8. |
44 | | alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = { |
45 | | ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), |
46 | | ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), |
47 | | ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), |
48 | | ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), |
49 | | ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, |
50 | | ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, |
51 | | ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, |
52 | | ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; |
53 | | |
54 | | // We need one full byte followed by a sentinel byte for iterator::operator++ to |
55 | | // work. We have a full group after kSentinel to be safe (in case operator++ is |
56 | | // changed to read a full group). |
57 | | ABSL_CONST_INIT ABSL_DLL const ctrl_t kSooControl[17] = { |
58 | | ZeroCtrlT(), ctrl_t::kSentinel, ZeroCtrlT(), ctrl_t::kEmpty, |
59 | | ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, |
60 | | ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, |
61 | | ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, |
62 | | ctrl_t::kEmpty}; |
63 | | static_assert(NumControlBytes(SooCapacity()) <= 17, |
64 | | "kSooControl capacity too small"); |
65 | | |
66 | | #ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL |
67 | | constexpr size_t Group::kWidth; |
68 | | #endif |
69 | | |
70 | | namespace { |
71 | | |
72 | | // Returns "random" seed. |
73 | 0 | inline size_t RandomSeed() { |
74 | 0 | #ifdef ABSL_HAVE_THREAD_LOCAL |
75 | 0 | static thread_local size_t counter = 0; |
76 | | // On Linux kernels >= 5.4 the MSAN runtime has a false-positive when |
77 | | // accessing thread local storage data from loaded libraries |
78 | | // (https://github.com/google/sanitizers/issues/1265), for this reason counter |
79 | | // needs to be annotated as initialized. |
80 | 0 | ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(&counter, sizeof(size_t)); |
81 | 0 | size_t value = ++counter; |
82 | | #else // ABSL_HAVE_THREAD_LOCAL |
83 | | static std::atomic<size_t> counter(0); |
84 | | size_t value = counter.fetch_add(1, std::memory_order_relaxed); |
85 | | #endif // ABSL_HAVE_THREAD_LOCAL |
86 | 0 | return value ^ static_cast<size_t>(reinterpret_cast<uintptr_t>(&counter)); |
87 | 0 | } |
88 | | |
89 | 0 | bool ShouldRehashForBugDetection(const ctrl_t* ctrl, size_t capacity) { |
90 | | // Note: we can't use the abseil-random library because abseil-random |
91 | | // depends on swisstable. We want to return true with probability |
92 | | // `min(1, RehashProbabilityConstant() / capacity())`. In order to do this, |
93 | | // we probe based on a random hash and see if the offset is less than |
94 | | // RehashProbabilityConstant(). |
95 | 0 | return probe(ctrl, capacity, absl::HashOf(RandomSeed())).offset() < |
96 | 0 | RehashProbabilityConstant(); |
97 | 0 | } |
98 | | |
99 | | } // namespace |
100 | | |
101 | 0 | GenerationType* EmptyGeneration() { |
102 | 0 | if (SwisstableGenerationsEnabled()) { |
103 | 0 | constexpr size_t kNumEmptyGenerations = 1024; |
104 | 0 | static constexpr GenerationType kEmptyGenerations[kNumEmptyGenerations]{}; |
105 | 0 | return const_cast<GenerationType*>( |
106 | 0 | &kEmptyGenerations[RandomSeed() % kNumEmptyGenerations]); |
107 | 0 | } |
108 | 0 | return nullptr; |
109 | 0 | } |
110 | | |
111 | | bool CommonFieldsGenerationInfoEnabled:: |
112 | | should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl, |
113 | 0 | size_t capacity) const { |
114 | 0 | if (reserved_growth_ == kReservedGrowthJustRanOut) return true; |
115 | 0 | if (reserved_growth_ > 0) return false; |
116 | 0 | return ShouldRehashForBugDetection(ctrl, capacity); |
117 | 0 | } |
118 | | |
119 | | bool CommonFieldsGenerationInfoEnabled::should_rehash_for_bug_detection_on_move( |
120 | 0 | const ctrl_t* ctrl, size_t capacity) const { |
121 | 0 | return ShouldRehashForBugDetection(ctrl, capacity); |
122 | 0 | } |
123 | | |
124 | | bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash, |
125 | 16 | const ctrl_t* ctrl) { |
126 | | // To avoid problems with weak hashes and single bit tests, we use % 13. |
127 | | // TODO(kfm,sbenza): revisit after we do unconditional mixing |
128 | 16 | return !is_small(capacity) && (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6; |
129 | 16 | } |
130 | | |
131 | | size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, |
132 | 0 | CommonFields& common) { |
133 | 0 | assert(common.capacity() == NextCapacity(SooCapacity())); |
134 | | // After resize from capacity 1 to 3, we always have exactly the slot with |
135 | | // index 1 occupied, so we need to insert either at index 0 or index 2. |
136 | 0 | assert(HashSetResizeHelper::SooSlotIndex() == 1); |
137 | 0 | PrepareInsertCommon(common); |
138 | 0 | const size_t offset = H1(hash, common.control()) & 2; |
139 | 0 | common.growth_info().OverwriteEmptyAsFull(); |
140 | 0 | SetCtrlInSingleGroupTable(common, offset, H2(hash), slot_size); |
141 | 0 | common.infoz().RecordInsert(hash, /*distance_from_desired=*/0); |
142 | 0 | return offset; |
143 | 0 | } |
144 | | |
145 | 0 | void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) { |
146 | 0 | assert(ctrl[capacity] == ctrl_t::kSentinel); |
147 | 0 | assert(IsValidCapacity(capacity)); |
148 | 0 | for (ctrl_t* pos = ctrl; pos < ctrl + capacity; pos += Group::kWidth) { |
149 | 0 | Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos); |
150 | 0 | } |
151 | | // Copy the cloned ctrl bytes. |
152 | 0 | std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes()); |
153 | 0 | ctrl[capacity] = ctrl_t::kSentinel; |
154 | 0 | } |
155 | | // Extern template instantiation for inline function. |
156 | | template FindInfo find_first_non_full(const CommonFields&, size_t); |
157 | | |
158 | | FindInfo find_first_non_full_outofline(const CommonFields& common, |
159 | 0 | size_t hash) { |
160 | 0 | return find_first_non_full(common, hash); |
161 | 0 | } |
162 | | |
163 | | namespace { |
164 | | |
165 | | // Returns the address of the slot just after slot assuming each slot has the |
166 | | // specified size. |
167 | 0 | static inline void* NextSlot(void* slot, size_t slot_size) { |
168 | 0 | return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) + slot_size); |
169 | 0 | } |
170 | | |
171 | | // Returns the address of the slot just before slot assuming each slot has the |
172 | | // specified size. |
173 | 0 | static inline void* PrevSlot(void* slot, size_t slot_size) { |
174 | 0 | return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size); |
175 | 0 | } |
176 | | |
177 | | // Finds guaranteed to exists empty slot from the given position. |
178 | | // NOTE: this function is almost never triggered inside of the |
179 | | // DropDeletesWithoutResize, so we keep it simple. |
180 | | // The table is rather sparse, so empty slot will be found very quickly. |
181 | 0 | size_t FindEmptySlot(size_t start, size_t end, const ctrl_t* ctrl) { |
182 | 0 | for (size_t i = start; i < end; ++i) { |
183 | 0 | if (IsEmpty(ctrl[i])) { |
184 | 0 | return i; |
185 | 0 | } |
186 | 0 | } |
187 | 0 | assert(false && "no empty slot"); |
188 | 0 | return ~size_t{}; |
189 | 0 | } |
190 | | |
191 | | void DropDeletesWithoutResize(CommonFields& common, |
192 | 0 | const PolicyFunctions& policy) { |
193 | 0 | void* set = &common; |
194 | 0 | void* slot_array = common.slot_array(); |
195 | 0 | const size_t capacity = common.capacity(); |
196 | 0 | assert(IsValidCapacity(capacity)); |
197 | 0 | assert(!is_small(capacity)); |
198 | | // Algorithm: |
199 | | // - mark all DELETED slots as EMPTY |
200 | | // - mark all FULL slots as DELETED |
201 | | // - for each slot marked as DELETED |
202 | | // hash = Hash(element) |
203 | | // target = find_first_non_full(hash) |
204 | | // if target is in the same group |
205 | | // mark slot as FULL |
206 | | // else if target is EMPTY |
207 | | // transfer element to target |
208 | | // mark slot as EMPTY |
209 | | // mark target as FULL |
210 | | // else if target is DELETED |
211 | | // swap current element with target element |
212 | | // mark target as FULL |
213 | | // repeat procedure for current slot with moved from element (target) |
214 | 0 | ctrl_t* ctrl = common.control(); |
215 | 0 | ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity); |
216 | 0 | const void* hash_fn = policy.hash_fn(common); |
217 | 0 | auto hasher = policy.hash_slot; |
218 | 0 | auto transfer = policy.transfer; |
219 | 0 | const size_t slot_size = policy.slot_size; |
220 | |
|
221 | 0 | size_t total_probe_length = 0; |
222 | 0 | void* slot_ptr = SlotAddress(slot_array, 0, slot_size); |
223 | | |
224 | | // The index of an empty slot that can be used as temporary memory for |
225 | | // the swap operation. |
226 | 0 | constexpr size_t kUnknownId = ~size_t{}; |
227 | 0 | size_t tmp_space_id = kUnknownId; |
228 | |
|
229 | 0 | for (size_t i = 0; i != capacity; |
230 | 0 | ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) { |
231 | 0 | assert(slot_ptr == SlotAddress(slot_array, i, slot_size)); |
232 | 0 | if (IsEmpty(ctrl[i])) { |
233 | 0 | tmp_space_id = i; |
234 | 0 | continue; |
235 | 0 | } |
236 | 0 | if (!IsDeleted(ctrl[i])) continue; |
237 | 0 | const size_t hash = (*hasher)(hash_fn, slot_ptr); |
238 | 0 | const FindInfo target = find_first_non_full(common, hash); |
239 | 0 | const size_t new_i = target.offset; |
240 | 0 | total_probe_length += target.probe_length; |
241 | | |
242 | | // Verify if the old and new i fall within the same group wrt the hash. |
243 | | // If they do, we don't need to move the object as it falls already in the |
244 | | // best probe we can. |
245 | 0 | const size_t probe_offset = probe(common, hash).offset(); |
246 | 0 | const auto probe_index = [probe_offset, capacity](size_t pos) { |
247 | 0 | return ((pos - probe_offset) & capacity) / Group::kWidth; |
248 | 0 | }; |
249 | | |
250 | | // Element doesn't move. |
251 | 0 | if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) { |
252 | 0 | SetCtrl(common, i, H2(hash), slot_size); |
253 | 0 | continue; |
254 | 0 | } |
255 | | |
256 | 0 | void* new_slot_ptr = SlotAddress(slot_array, new_i, slot_size); |
257 | 0 | if (IsEmpty(ctrl[new_i])) { |
258 | | // Transfer element to the empty spot. |
259 | | // SetCtrl poisons/unpoisons the slots so we have to call it at the |
260 | | // right time. |
261 | 0 | SetCtrl(common, new_i, H2(hash), slot_size); |
262 | 0 | (*transfer)(set, new_slot_ptr, slot_ptr); |
263 | 0 | SetCtrl(common, i, ctrl_t::kEmpty, slot_size); |
264 | | // Initialize or change empty space id. |
265 | 0 | tmp_space_id = i; |
266 | 0 | } else { |
267 | 0 | assert(IsDeleted(ctrl[new_i])); |
268 | 0 | SetCtrl(common, new_i, H2(hash), slot_size); |
269 | | // Until we are done rehashing, DELETED marks previously FULL slots. |
270 | |
|
271 | 0 | if (tmp_space_id == kUnknownId) { |
272 | 0 | tmp_space_id = FindEmptySlot(i + 1, capacity, ctrl); |
273 | 0 | } |
274 | 0 | void* tmp_space = SlotAddress(slot_array, tmp_space_id, slot_size); |
275 | 0 | SanitizerUnpoisonMemoryRegion(tmp_space, slot_size); |
276 | | |
277 | | // Swap i and new_i elements. |
278 | 0 | (*transfer)(set, tmp_space, new_slot_ptr); |
279 | 0 | (*transfer)(set, new_slot_ptr, slot_ptr); |
280 | 0 | (*transfer)(set, slot_ptr, tmp_space); |
281 | |
|
282 | 0 | SanitizerPoisonMemoryRegion(tmp_space, slot_size); |
283 | | |
284 | | // repeat the processing of the ith slot |
285 | 0 | --i; |
286 | 0 | slot_ptr = PrevSlot(slot_ptr, slot_size); |
287 | 0 | } |
288 | 0 | } |
289 | 0 | ResetGrowthLeft(common); |
290 | 0 | common.infoz().RecordRehash(total_probe_length); |
291 | 0 | } |
292 | | |
293 | 0 | static bool WasNeverFull(CommonFields& c, size_t index) { |
294 | 0 | if (is_single_group(c.capacity())) { |
295 | 0 | return true; |
296 | 0 | } |
297 | 0 | const size_t index_before = (index - Group::kWidth) & c.capacity(); |
298 | 0 | const auto empty_after = Group(c.control() + index).MaskEmpty(); |
299 | 0 | const auto empty_before = Group(c.control() + index_before).MaskEmpty(); |
300 | | |
301 | | // We count how many consecutive non empties we have to the right and to the |
302 | | // left of `it`. If the sum is >= kWidth then there is at least one probe |
303 | | // window that might have seen a full group. |
304 | 0 | return empty_before && empty_after && |
305 | 0 | static_cast<size_t>(empty_after.TrailingZeros()) + |
306 | 0 | empty_before.LeadingZeros() < |
307 | 0 | Group::kWidth; |
308 | 0 | } |
309 | | |
310 | | } // namespace |
311 | | |
312 | 0 | void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { |
313 | 0 | assert(IsFull(c.control()[index]) && "erasing a dangling iterator"); |
314 | 0 | c.decrement_size(); |
315 | 0 | c.infoz().RecordErase(); |
316 | |
|
317 | 0 | if (WasNeverFull(c, index)) { |
318 | 0 | SetCtrl(c, index, ctrl_t::kEmpty, slot_size); |
319 | 0 | c.growth_info().OverwriteFullAsEmpty(); |
320 | 0 | return; |
321 | 0 | } |
322 | | |
323 | 0 | c.growth_info().OverwriteFullAsDeleted(); |
324 | 0 | SetCtrl(c, index, ctrl_t::kDeleted, slot_size); |
325 | 0 | } |
326 | | |
327 | | void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, |
328 | 0 | bool reuse, bool soo_enabled) { |
329 | 0 | c.set_size(0); |
330 | 0 | if (reuse) { |
331 | 0 | assert(!soo_enabled || c.capacity() > SooCapacity()); |
332 | 0 | ResetCtrl(c, policy.slot_size); |
333 | 0 | ResetGrowthLeft(c); |
334 | 0 | c.infoz().RecordStorageChanged(0, c.capacity()); |
335 | 0 | } else { |
336 | | // We need to record infoz before calling dealloc, which will unregister |
337 | | // infoz. |
338 | 0 | c.infoz().RecordClearedReservation(); |
339 | 0 | c.infoz().RecordStorageChanged(0, soo_enabled ? SooCapacity() : 0); |
340 | 0 | (*policy.dealloc)(c, policy); |
341 | 0 | c = soo_enabled ? CommonFields{soo_tag_t{}} : CommonFields{non_soo_tag_t{}}; |
342 | 0 | } |
343 | 0 | } |
344 | | |
345 | | void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes( |
346 | 6 | ctrl_t* __restrict new_ctrl, size_t new_capacity) const { |
347 | 6 | assert(is_single_group(new_capacity)); |
348 | 6 | constexpr size_t kHalfWidth = Group::kWidth / 2; |
349 | 6 | constexpr size_t kQuarterWidth = Group::kWidth / 4; |
350 | 6 | assert(old_capacity_ < kHalfWidth); |
351 | 6 | static_assert(sizeof(uint64_t) >= kHalfWidth, |
352 | 6 | "Group size is too large. The ctrl bytes for half a group must " |
353 | 6 | "fit into a uint64_t for this implementation."); |
354 | 6 | static_assert(sizeof(uint64_t) <= Group::kWidth, |
355 | 6 | "Group size is too small. The ctrl bytes for a group must " |
356 | 6 | "cover a uint64_t for this implementation."); |
357 | | |
358 | 6 | const size_t half_old_capacity = old_capacity_ / 2; |
359 | | |
360 | | // NOTE: operations are done with compile time known size = kHalfWidth. |
361 | | // Compiler optimizes that into single ASM operation. |
362 | | |
363 | | // Load the bytes from half_old_capacity + 1. This contains the last half of |
364 | | // old_ctrl bytes, followed by the sentinel byte, and then the first half of |
365 | | // the cloned bytes. This effectively shuffles the control bytes. |
366 | 6 | uint64_t copied_bytes = 0; |
367 | 6 | copied_bytes = |
368 | 6 | absl::little_endian::Load64(old_ctrl() + half_old_capacity + 1); |
369 | | |
370 | | // We change the sentinel byte to kEmpty before storing to both the start of |
371 | | // the new_ctrl, and past the end of the new_ctrl later for the new cloned |
372 | | // bytes. Note that this is faster than setting the sentinel byte to kEmpty |
373 | | // after the copy directly in new_ctrl because we are limited on store |
374 | | // bandwidth. |
375 | 6 | constexpr uint64_t kEmptyXorSentinel = |
376 | 6 | static_cast<uint8_t>(ctrl_t::kEmpty) ^ |
377 | 6 | static_cast<uint8_t>(ctrl_t::kSentinel); |
378 | 6 | const uint64_t mask_convert_old_sentinel_to_empty = |
379 | 6 | kEmptyXorSentinel << (half_old_capacity * 8); |
380 | 6 | copied_bytes ^= mask_convert_old_sentinel_to_empty; |
381 | | |
382 | | // Copy second half of bytes to the beginning. This correctly sets the bytes |
383 | | // [0, old_capacity]. We potentially copy more bytes in order to have compile |
384 | | // time known size. Mirrored bytes from the old_ctrl() will also be copied. In |
385 | | // case of old_capacity_ == 3, we will copy 1st element twice. |
386 | | // Examples: |
387 | | // (old capacity = 1) |
388 | | // old_ctrl = 0S0EEEEEEE... |
389 | | // new_ctrl = E0EEEEEE??... |
390 | | // |
391 | | // (old capacity = 3) |
392 | | // old_ctrl = 012S012EEEEE... |
393 | | // new_ctrl = 12E012EE????... |
394 | | // |
395 | | // (old capacity = 7) |
396 | | // old_ctrl = 0123456S0123456EE... |
397 | | // new_ctrl = 456E0123?????????... |
398 | 6 | absl::little_endian::Store64(new_ctrl, copied_bytes); |
399 | | |
400 | | // Set the space [old_capacity + 1, new_capacity] to empty as these bytes will |
401 | | // not be written again. This is safe because |
402 | | // NumControlBytes = new_capacity + kWidth and new_capacity >= |
403 | | // old_capacity+1. |
404 | | // Examples: |
405 | | // (old_capacity = 3, new_capacity = 15) |
406 | | // new_ctrl = 12E012EE?????????????...?? |
407 | | // *new_ctrl = 12E0EEEEEEEEEEEEEEEE?...?? |
408 | | // position / S |
409 | | // |
410 | | // (old_capacity = 7, new_capacity = 15) |
411 | | // new_ctrl = 456E0123?????????????????...?? |
412 | | // *new_ctrl = 456E0123EEEEEEEEEEEEEEEE?...?? |
413 | | // position / S |
414 | 6 | std::memset(new_ctrl + old_capacity_ + 1, static_cast<int8_t>(ctrl_t::kEmpty), |
415 | 6 | Group::kWidth); |
416 | | |
417 | | // Set the last kHalfWidth bytes to empty, to ensure the bytes all the way to |
418 | | // the end are initialized. |
419 | | // Examples: |
420 | | // new_ctrl = 12E0EEEEEEEEEEEEEEEE?...??????? |
421 | | // *new_ctrl = 12E0EEEEEEEEEEEEEEEE???EEEEEEEE |
422 | | // position S / |
423 | | // |
424 | | // new_ctrl = 456E0123EEEEEEEEEEEEEEEE??????? |
425 | | // *new_ctrl = 456E0123EEEEEEEEEEEEEEEEEEEEEEE |
426 | | // position S / |
427 | 6 | std::memset(new_ctrl + NumControlBytes(new_capacity) - kHalfWidth, |
428 | 6 | static_cast<int8_t>(ctrl_t::kEmpty), kHalfWidth); |
429 | | |
430 | | // Copy the first bytes to the end (starting at new_capacity +1) to set the |
431 | | // cloned bytes. Note that we use the already copied bytes from old_ctrl here |
432 | | // rather than copying from new_ctrl to avoid a Read-after-Write hazard, since |
433 | | // new_ctrl was just written to. The first old_capacity-1 bytes are set |
434 | | // correctly. Then there may be up to old_capacity bytes that need to be |
435 | | // overwritten, and any remaining bytes will be correctly set to empty. This |
436 | | // sets [new_capacity + 1, new_capacity +1 + old_capacity] correctly. |
437 | | // Examples: |
438 | | // new_ctrl = 12E0EEEEEEEEEEEEEEEE?...??????? |
439 | | // *new_ctrl = 12E0EEEEEEEEEEEE12E012EEEEEEEEE |
440 | | // position S/ |
441 | | // |
442 | | // new_ctrl = 456E0123EEEEEEEE?...???EEEEEEEE |
443 | | // *new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE |
444 | | // position S/ |
445 | 6 | absl::little_endian::Store64(new_ctrl + new_capacity + 1, copied_bytes); |
446 | | |
447 | | // Set The remaining bytes at the end past the cloned bytes to empty. The |
448 | | // incorrectly set bytes are [new_capacity + old_capacity + 2, |
449 | | // min(new_capacity + 1 + kHalfWidth, new_capacity + old_capacity + 2 + |
450 | | // half_old_capacity)]. Taking the difference, we need to set min(kHalfWidth - |
451 | | // (old_capacity + 1), half_old_capacity)]. Since old_capacity < kHalfWidth, |
452 | | // half_old_capacity < kQuarterWidth, so we set kQuarterWidth beginning at |
453 | | // new_capacity + old_capacity + 2 to kEmpty. |
454 | | // Examples: |
455 | | // new_ctrl = 12E0EEEEEEEEEEEE12E012EEEEEEEEE |
456 | | // *new_ctrl = 12E0EEEEEEEEEEEE12E0EEEEEEEEEEE |
457 | | // position S / |
458 | | // |
459 | | // new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE |
460 | | // *new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE (no change) |
461 | | // position S / |
462 | 6 | std::memset(new_ctrl + new_capacity + old_capacity_ + 2, |
463 | 6 | static_cast<int8_t>(ctrl_t::kEmpty), kQuarterWidth); |
464 | | |
465 | | // Finally, we set the new sentinel byte. |
466 | 6 | new_ctrl[new_capacity] = ctrl_t::kSentinel; |
467 | 6 | } |
468 | | |
469 | | void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2, |
470 | 0 | size_t new_capacity) { |
471 | 0 | assert(is_single_group(new_capacity)); |
472 | 0 | std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty), |
473 | 0 | NumControlBytes(new_capacity)); |
474 | 0 | assert(HashSetResizeHelper::SooSlotIndex() == 1); |
475 | | // This allows us to avoid branching on had_soo_slot_. |
476 | 0 | assert(had_soo_slot_ || h2 == ctrl_t::kEmpty); |
477 | 0 | new_ctrl[1] = new_ctrl[new_capacity + 2] = h2; |
478 | 0 | new_ctrl[new_capacity] = ctrl_t::kSentinel; |
479 | 0 | } |
480 | | |
481 | | void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots( |
482 | 6 | void* new_slots, size_t slot_size) const { |
483 | 6 | assert(old_capacity_ > 0); |
484 | 6 | const size_t half_old_capacity = old_capacity_ / 2; |
485 | | |
486 | 6 | SanitizerUnpoisonMemoryRegion(old_slots(), slot_size * old_capacity_); |
487 | 6 | std::memcpy(new_slots, |
488 | 6 | SlotAddress(old_slots(), half_old_capacity + 1, slot_size), |
489 | 6 | slot_size * half_old_capacity); |
490 | 6 | std::memcpy(SlotAddress(new_slots, half_old_capacity + 1, slot_size), |
491 | 6 | old_slots(), slot_size * (half_old_capacity + 1)); |
492 | 6 | } |
493 | | |
494 | | void HashSetResizeHelper::GrowSizeIntoSingleGroupTransferable( |
495 | 6 | CommonFields& c, size_t slot_size) { |
496 | 6 | assert(old_capacity_ < Group::kWidth / 2); |
497 | 6 | assert(is_single_group(c.capacity())); |
498 | 6 | assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity())); |
499 | | |
500 | 6 | GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity()); |
501 | 6 | GrowIntoSingleGroupShuffleTransferableSlots(c.slot_array(), slot_size); |
502 | | |
503 | | // We poison since GrowIntoSingleGroupShuffleTransferableSlots |
504 | | // may leave empty slots unpoisoned. |
505 | 6 | PoisonSingleGroupEmptySlots(c, slot_size); |
506 | 6 | } |
507 | | |
508 | | void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c, |
509 | 0 | size_t slot_size) { |
510 | 0 | assert(was_soo_); |
511 | 0 | assert(had_soo_slot_); |
512 | 0 | assert(is_single_group(c.capacity())); |
513 | 0 | std::memcpy(SlotAddress(c.slot_array(), SooSlotIndex(), slot_size), |
514 | 0 | old_soo_data(), slot_size); |
515 | 0 | PoisonSingleGroupEmptySlots(c, slot_size); |
516 | 0 | } |
517 | | |
518 | | namespace { |
519 | | |
520 | | // Called whenever the table needs to vacate empty slots either by removing |
521 | | // tombstones via rehash or growth. |
522 | | ABSL_ATTRIBUTE_NOINLINE |
523 | | FindInfo FindInsertPositionWithGrowthOrRehash(CommonFields& common, size_t hash, |
524 | 0 | const PolicyFunctions& policy) { |
525 | 0 | const size_t cap = common.capacity(); |
526 | 0 | if (cap > Group::kWidth && |
527 | | // Do these calculations in 64-bit to avoid overflow. |
528 | 0 | common.size() * uint64_t{32} <= cap * uint64_t{25}) { |
529 | | // Squash DELETED without growing if there is enough capacity. |
530 | | // |
531 | | // Rehash in place if the current size is <= 25/32 of capacity. |
532 | | // Rationale for such a high factor: 1) DropDeletesWithoutResize() is |
533 | | // faster than resize, and 2) it takes quite a bit of work to add |
534 | | // tombstones. In the worst case, seems to take approximately 4 |
535 | | // insert/erase pairs to create a single tombstone and so if we are |
536 | | // rehashing because of tombstones, we can afford to rehash-in-place as |
537 | | // long as we are reclaiming at least 1/8 the capacity without doing more |
538 | | // than 2X the work. (Where "work" is defined to be size() for rehashing |
539 | | // or rehashing in place, and 1 for an insert or erase.) But rehashing in |
540 | | // place is faster per operation than inserting or even doubling the size |
541 | | // of the table, so we actually afford to reclaim even less space from a |
542 | | // resize-in-place. The decision is to rehash in place if we can reclaim |
543 | | // at about 1/8th of the usable capacity (specifically 3/28 of the |
544 | | // capacity) which means that the total cost of rehashing will be a small |
545 | | // fraction of the total work. |
546 | | // |
547 | | // Here is output of an experiment using the BM_CacheInSteadyState |
548 | | // benchmark running the old case (where we rehash-in-place only if we can |
549 | | // reclaim at least 7/16*capacity) vs. this code (which rehashes in place |
550 | | // if we can recover 3/32*capacity). |
551 | | // |
552 | | // Note that although in the worst-case number of rehashes jumped up from |
553 | | // 15 to 190, but the number of operations per second is almost the same. |
554 | | // |
555 | | // Abridged output of running BM_CacheInSteadyState benchmark from |
556 | | // raw_hash_set_benchmark. N is the number of insert/erase operations. |
557 | | // |
558 | | // | OLD (recover >= 7/16 | NEW (recover >= 3/32) |
559 | | // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes |
560 | | // 448 | 145284 0.44 18 | 140118 0.44 19 |
561 | | // 493 | 152546 0.24 11 | 151417 0.48 28 |
562 | | // 538 | 151439 0.26 11 | 151152 0.53 38 |
563 | | // 583 | 151765 0.28 11 | 150572 0.57 50 |
564 | | // 628 | 150241 0.31 11 | 150853 0.61 66 |
565 | | // 672 | 149602 0.33 12 | 150110 0.66 90 |
566 | | // 717 | 149998 0.35 12 | 149531 0.70 129 |
567 | | // 762 | 149836 0.37 13 | 148559 0.74 190 |
568 | | // 807 | 149736 0.39 14 | 151107 0.39 14 |
569 | | // 852 | 150204 0.42 15 | 151019 0.42 15 |
570 | 0 | DropDeletesWithoutResize(common, policy); |
571 | 0 | } else { |
572 | | // Otherwise grow the container. |
573 | 0 | policy.resize(common, NextCapacity(cap), HashtablezInfoHandle{}); |
574 | 0 | } |
575 | | // This function is typically called with tables containing deleted slots. |
576 | | // The table will be big and `FindFirstNonFullAfterResize` will always |
577 | | // fallback to `find_first_non_full`. So using `find_first_non_full` directly. |
578 | 0 | return find_first_non_full(common, hash); |
579 | 0 | } |
580 | | |
581 | | } // namespace |
582 | | |
583 | 0 | const void* GetHashRefForEmptyHasher(const CommonFields& common) { |
584 | | // Empty base optimization typically make the empty base class address to be |
585 | | // the same as the first address of the derived class object. |
586 | | // But we generally assume that for empty hasher we can return any valid |
587 | | // pointer. |
588 | 0 | return &common; |
589 | 0 | } |
590 | | |
591 | | size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target, |
592 | 16 | const PolicyFunctions& policy) { |
593 | | // When there are no deleted slots in the table |
594 | | // and growth_left is positive, we can insert at the first |
595 | | // empty slot in the probe sequence (target). |
596 | 16 | const bool use_target_hint = |
597 | | // Optimization is disabled when generations are enabled. |
598 | | // We have to rehash even sparse tables randomly in such mode. |
599 | 16 | !SwisstableGenerationsEnabled() && |
600 | 16 | common.growth_info().HasNoDeletedAndGrowthLeft(); |
601 | 16 | if (ABSL_PREDICT_FALSE(!use_target_hint)) { |
602 | | // Notes about optimized mode when generations are disabled: |
603 | | // We do not enter this branch if table has no deleted slots |
604 | | // and growth_left is positive. |
605 | | // We enter this branch in the following cases listed in decreasing |
606 | | // frequency: |
607 | | // 1. Table without deleted slots (>95% cases) that needs to be resized. |
608 | | // 2. Table with deleted slots that has space for the inserting element. |
609 | | // 3. Table with deleted slots that needs to be rehashed or resized. |
610 | 8 | if (ABSL_PREDICT_TRUE(common.growth_info().HasNoGrowthLeftAndNoDeleted())) { |
611 | 8 | const size_t old_capacity = common.capacity(); |
612 | 8 | policy.resize(common, NextCapacity(old_capacity), HashtablezInfoHandle{}); |
613 | 8 | target = HashSetResizeHelper::FindFirstNonFullAfterResize( |
614 | 8 | common, old_capacity, hash); |
615 | 8 | } else { |
616 | | // Note: the table may have no deleted slots here when generations |
617 | | // are enabled. |
618 | 0 | const bool rehash_for_bug_detection = |
619 | 0 | common.should_rehash_for_bug_detection_on_insert(); |
620 | 0 | if (rehash_for_bug_detection) { |
621 | | // Move to a different heap allocation in order to detect bugs. |
622 | 0 | const size_t cap = common.capacity(); |
623 | 0 | policy.resize(common, |
624 | 0 | common.growth_left() > 0 ? cap : NextCapacity(cap), |
625 | 0 | HashtablezInfoHandle{}); |
626 | 0 | } |
627 | 0 | if (ABSL_PREDICT_TRUE(common.growth_left() > 0)) { |
628 | 0 | target = find_first_non_full(common, hash); |
629 | 0 | } else { |
630 | 0 | target = FindInsertPositionWithGrowthOrRehash(common, hash, policy); |
631 | 0 | } |
632 | 0 | } |
633 | 8 | } |
634 | 16 | PrepareInsertCommon(common); |
635 | 16 | common.growth_info().OverwriteControlAsFull(common.control()[target.offset]); |
636 | 16 | SetCtrl(common, target.offset, H2(hash), policy.slot_size); |
637 | 16 | common.infoz().RecordInsert(hash, target.probe_length); |
638 | 16 | return target.offset; |
639 | 16 | } |
640 | | |
641 | | } // namespace container_internal |
642 | | ABSL_NAMESPACE_END |
643 | | } // namespace absl |