Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_COMPILER_PERSISTENT_MAP_H_
6 : #define V8_COMPILER_PERSISTENT_MAP_H_
7 :
8 : #include <array>
9 : #include <tuple>
10 :
11 : #include "src/base/functional.h"
12 : #include "src/zone/zone-containers.h"
13 :
14 : namespace v8 {
15 : namespace internal {
16 : namespace compiler {
17 :
18 : // PersistentMap is a persistent map datastructure based on hash trees (a binary
19 : // tree using the bits of a hash value as addresses). The map is a conceptually
20 : // infinite: All keys are initially mapped to a default value, values are
21 : // deleted by overwriting them with the default value. The iterators produce
22 : // exactly the keys that are not the default value. The hash values should have
23 : // high variance in their high bits, so dense integers are a bad choice.
24 : // Complexity:
25 : // - Copy and assignment: O(1)
26 : // - access: O(log n)
27 : // - update: O(log n) time and space
28 : // - iteration: amortized O(1) per step
29 : // - Zip: O(n)
30 : // - equality check: O(n)
31 : // TODO(tebbi): Cache map transitions to avoid re-allocation of the same map.
32 : // TODO(tebbi): Implement an O(1) equality check based on hash consing or
33 : // something similar.
34 : template <class Key, class Value, class Hasher = base::hash<Key>>
35 : class PersistentMap {
36 : public:
37 : using key_type = Key;
38 : using mapped_type = Value;
39 : using value_type = std::pair<Key, Value>;
40 :
41 : private:
42 : static constexpr size_t kHashBits = 32;
43 : enum Bit : int { kLeft = 0, kRight = 1 };
44 :
45 : // Access hash bits starting from the high bits and compare them according to
46 : // their unsigned value. This way, the order in the hash tree is compatible
47 : // with numeric hash comparisons.
48 : class HashValue;
49 :
50 : struct KeyValue : std::pair<Key, Value> {
51 1995 : const Key& key() const { return this->first; }
52 14440486 : const Value& value() const { return this->second; }
53 : using std::pair<Key, Value>::pair;
54 : };
55 :
56 : struct FocusedTree;
57 :
58 : public:
59 : // Depth of the last added element. This is a cheap estimate for the size of
60 : // the hash tree.
61 : size_t last_depth() const {
62 : if (tree_) {
63 : return tree_->length;
64 : } else {
65 : return 0;
66 : }
67 : }
68 :
69 13246788 : const Value& Get(const Key& key) const {
70 153443 : HashValue key_hash = HashValue(Hasher()(key));
71 13236115 : const FocusedTree* tree = FindHash(key_hash);
72 13288569 : return GetFocusedValue(tree, key);
73 : }
74 :
75 : // Add or overwrite an existing key-value pair.
76 : void Set(Key key, Value value);
77 :
78 64423264 : bool operator==(const PersistentMap& other) const {
79 64423264 : if (tree_ == other.tree_) return true;
80 13550409 : if (def_value_ != other.def_value_) return false;
81 20853831 : for (const std::tuple<Key, Value, Value>& triple : Zip(other)) {
82 20739175 : if (std::get<1>(triple) != std::get<2>(triple)) return false;
83 : }
84 114854 : return true;
85 : }
86 :
87 : bool operator!=(const PersistentMap& other) const {
88 64424895 : return !(*this == other);
89 : }
90 :
91 : // The iterator produces key-value pairs in the lexicographical order of
92 : // hash value and key. It produces exactly the key-value pairs where the value
93 : // is not the default value.
94 : class iterator;
95 :
96 : iterator begin() const {
97 27485869 : if (!tree_) return end();
98 15692100 : return iterator::begin(tree_, def_value_);
99 : }
100 : iterator end() const { return iterator::end(def_value_); }
101 :
102 : // Iterator to traverse two maps in lockstep, producing matching value pairs
103 : // for each key where at least one value is different from the respective
104 : // default.
105 : class double_iterator;
106 :
107 : // An iterable to iterate over the two maps in lockstep.
108 : struct ZipIterable {
109 : PersistentMap a;
110 : PersistentMap b;
111 27101412 : double_iterator begin() { return double_iterator(a.begin(), b.begin()); }
112 27101442 : double_iterator end() { return double_iterator(a.end(), b.end()); }
113 : };
114 :
115 13550417 : ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; }
116 :
117 : explicit PersistentMap(Zone* zone, Value def_value = Value())
118 : : PersistentMap(nullptr, zone, def_value) {}
119 :
120 : private:
121 : // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
122 : const FocusedTree* FindHash(HashValue hash) const;
123 :
124 : // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
125 : // Output the path to this {FocusedTree} and its length. If no such
126 : // {FocusedTree} exists, return {nullptr} and output the path to the last node
127 : // with a matching hash prefix. Note that {length} is the length of the found
128 : // path and may be less than the length of the found {FocusedTree}.
129 : const FocusedTree* FindHash(HashValue hash,
130 : std::array<const FocusedTree*, kHashBits>* path,
131 : int* length) const;
132 :
133 : // Load value from the leaf node on the focused path of {tree}.
134 : const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const;
135 :
136 : // Return the {FocusedTree} representing the left (bit==kLeft) or right
137 : // (bit==kRight) child of the node on the path of {tree} at tree level
138 : // {level}.
139 : static const FocusedTree* GetChild(const FocusedTree* tree, int level,
140 : Bit bit);
141 :
142 : // Find the leftmost path in the tree, starting at the node at tree level
143 : // {level} on the path of {start}. Output the level of the leaf to {level} and
144 : // the path to {path}.
145 : static const FocusedTree* FindLeftmost(
146 : const FocusedTree* start, int* level,
147 : std::array<const FocusedTree*, kHashBits>* path);
148 :
149 : PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)
150 33212466 : : tree_(tree), def_value_(def_value), zone_(zone) {}
151 :
152 : const FocusedTree* tree_;
153 : Value def_value_;
154 : Zone* zone_;
155 : };
156 :
157 : // This structure represents a hash tree with one focused path to a specific
158 : // leaf. For the focused leaf, it stores key, value and key hash. The path is
159 : // defined by the hash bits of the focused leaf. In a traditional tree
160 : // datastructure, the nodes of a path form a linked list with the values being
161 : // the pointers outside of this path. Instead of storing all of these nodes,
162 : // we store an array of the pointers pointing outside of the path. This is
163 : // similar to the stack used when doing DFS traversal of a tree. The hash of
164 : // the leaf is used to know if the pointers point to the left or the
165 : // right of the path. As there is no explicit representation of a tree node,
166 : // this structure also represents all the nodes on its path. The intended node
167 : // depends on the tree depth, which is always clear from the referencing
168 : // context. So the pointer to a {FocusedTree} stored in the
169 : // {PersistentMap.tree_} always references the root, while a pointer from a
170 : // focused node of another {FocusedTree} always references to one tree level
171 : // lower than before.
172 : template <class Key, class Value, class Hasher>
173 : struct PersistentMap<Key, Value, Hasher>::FocusedTree {
174 : KeyValue key_value;
175 : // The depth of the focused path, that is, the number of pointers stored in
176 : // this structure.
177 : int8_t length;
178 : HashValue key_hash;
179 : // Out-of-line storage for hash collisions.
180 : const ZoneMap<Key, Value>* more;
181 : using more_iterator = typename ZoneMap<Key, Value>::const_iterator;
182 : // {path_array} has to be the last member: To store an array inline, we
183 : // over-allocate memory for this structure and access memory beyond
184 : // {path_array}. This corresponds to a flexible array member as defined in
185 : // C99.
186 : const FocusedTree* path_array[1];
187 : const FocusedTree*& path(int i) {
188 : DCHECK(i < length);
189 : return reinterpret_cast<const FocusedTree**>(
190 15285384 : reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i];
191 : }
192 : const FocusedTree* path(int i) const {
193 : DCHECK(i < length);
194 : return reinterpret_cast<const FocusedTree* const*>(
195 : reinterpret_cast<const byte*>(this) +
196 1762392984 : offsetof(FocusedTree, path_array))[i];
197 : }
198 : };
199 :
200 : template <class Key, class Value, class Hasher>
201 : class PersistentMap<Key, Value, Hasher>::HashValue {
202 : public:
203 19500675 : explicit HashValue(size_t hash) : bits_(static_cast<uint32_t>(hash)) {}
204 :
205 : Bit operator[](int pos) const {
206 : DCHECK_LT(pos, kHashBits);
207 3853093608 : return bits_ & (static_cast<decltype(bits_)>(1) << (kHashBits - pos - 1))
208 : ? kRight
209 3930009444 : : kLeft;
210 : }
211 :
212 170382 : bool operator<(HashValue other) const { return bits_ < other.bits_; }
213 184650 : bool operator==(HashValue other) const { return bits_ == other.bits_; }
214 9193693 : bool operator!=(HashValue other) const { return bits_ != other.bits_; }
215 : HashValue operator^(HashValue other) const {
216 76915836 : return HashValue(bits_ ^ other.bits_);
217 : }
218 :
219 : private:
220 : static_assert(sizeof(uint32_t) * 8 == kHashBits, "wrong type for bits_");
221 : uint32_t bits_;
222 : };
223 :
224 : template <class Key, class Value, class Hasher>
225 : class PersistentMap<Key, Value, Hasher>::iterator {
226 : public:
227 : const value_type operator*() const {
228 1052792901 : if (current_->more) {
229 : return *more_iter_;
230 : } else {
231 1052694044 : return current_->key_value;
232 : }
233 : }
234 :
235 21605185 : iterator& operator++() {
236 982238388 : do {
237 982632476 : if (!current_) {
238 : // Iterator is past the end.
239 : return *this;
240 : }
241 982635076 : if (current_->more) {
242 : DCHECK(more_iter_ != current_->more->end());
243 : ++more_iter_;
244 72088 : if (more_iter_ != current_->more->end()) return *this;
245 : }
246 982603732 : if (level_ == 0) {
247 4507 : *this = end(def_value_);
248 4507 : return *this;
249 : }
250 982599225 : --level_;
251 6126876708 : while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) {
252 1387607456 : if (level_ == 0) {
253 381370 : *this = end(def_value_);
254 381370 : return *this;
255 : }
256 1387226086 : --level_;
257 : }
258 982217855 : const FocusedTree* first_right_alternative = path_[level_];
259 982217855 : level_++;
260 982217855 : current_ = FindLeftmost(first_right_alternative, &level_, &path_);
261 982238388 : if (current_->more) {
262 3986 : more_iter_ = current_->more->begin();
263 : }
264 982238388 : } while (!((**this).second != def_value()));
265 : return *this;
266 : }
267 :
268 65170544 : bool operator==(const iterator& other) const {
269 91016318 : if (is_end()) return other.is_end();
270 39324770 : if (other.is_end()) return false;
271 9193693 : if (current_->key_hash != other.current_->key_hash) {
272 : return false;
273 : } else {
274 15135 : return (**this).first == (*other).first;
275 : }
276 : }
277 30794959 : bool operator!=(const iterator& other) const { return !(*this == other); }
278 :
279 11750063 : bool operator<(const iterator& other) const {
280 11750063 : if (is_end()) return false;
281 5966620 : if (other.is_end()) return true;
282 184650 : if (current_->key_hash == other.current_->key_hash) {
283 14268 : return (**this).first < (*other).first;
284 : } else {
285 170382 : return current_->key_hash < other.current_->key_hash;
286 : }
287 : }
288 :
289 25845774 : bool is_end() const { return current_ == nullptr; }
290 :
291 : const Value& def_value() { return def_value_; }
292 :
293 15691347 : static iterator begin(const FocusedTree* tree, Value def_value) {
294 : iterator i(def_value);
295 15691347 : i.current_ = FindLeftmost(tree, &i.level_, &i.path_);
296 15692010 : if (i.current_->more) {
297 4 : i.more_iter_ = i.current_->more->begin();
298 : }
299 : // Skip entries with default value. PersistentMap iterators must never point
300 : // to a default value.
301 38166771 : while (!i.is_end() && !((*i).second != def_value)) ++i;
302 15692076 : return i;
303 : }
304 :
305 : static iterator end(Value def_value) { return iterator(def_value); }
306 :
307 : private:
308 : int level_;
309 : typename FocusedTree::more_iterator more_iter_;
310 : const FocusedTree* current_;
311 : std::array<const FocusedTree*, kHashBits> path_;
312 : Value def_value_;
313 :
314 : explicit iterator(Value def_value)
315 96391391 : : level_(0), current_(nullptr), def_value_(def_value) {}
316 : };
317 :
318 : template <class Key, class Value, class Hasher>
319 : class PersistentMap<Key, Value, Hasher>::double_iterator {
320 : public:
321 20756469 : std::tuple<Key, Value, Value> operator*() {
322 20756469 : if (first_current_) {
323 : auto pair = *first_;
324 : return std::make_tuple(
325 : pair.first, pair.second,
326 20588606 : second_current_ ? (*second_).second : second_.def_value());
327 : } else {
328 : DCHECK(second_current_);
329 : auto pair = *second_;
330 : return std::make_tuple(pair.first, first_.def_value(), pair.second);
331 : }
332 : }
333 :
334 7320345 : double_iterator& operator++() {
335 : #ifdef DEBUG
336 : iterator old_first = first_;
337 : iterator old_second = second_;
338 : #endif
339 7320345 : if (first_current_) {
340 7312358 : ++first_;
341 : DCHECK(old_first < first_);
342 : }
343 7320195 : if (second_current_) {
344 7311961 : ++second_;
345 : DCHECK(old_second < second_);
346 : }
347 7320513 : return *this = double_iterator(first_, second_);
348 : }
349 :
350 34403741 : double_iterator(iterator first, iterator second)
351 34403741 : : first_(first), second_(second) {
352 34403741 : if (first_ == second_) {
353 22672127 : first_current_ = second_current_ = true;
354 11750083 : } else if (first_ < second_) {
355 5791075 : first_current_ = true;
356 5791075 : second_current_ = false;
357 : } else {
358 : DCHECK(second_ < first_);
359 5959005 : first_current_ = false;
360 5959005 : second_current_ = true;
361 : }
362 34422207 : }
363 :
364 20872348 : bool operator!=(const double_iterator& other) {
365 47639849 : return first_ != other.first_ || second_ != other.second_;
366 : }
367 :
368 : bool is_end() const { return first_.is_end() && second_.is_end(); }
369 :
370 : private:
371 : iterator first_;
372 : iterator second_;
373 : bool first_current_;
374 : bool second_current_;
375 : };
376 :
377 : template <class Key, class Value, class Hasher>
378 6266993 : void PersistentMap<Key, Value, Hasher>::Set(Key key, Value value) {
379 79884 : HashValue key_hash = HashValue(Hasher()(key));
380 : std::array<const FocusedTree*, kHashBits> path;
381 6264560 : int length = 0;
382 6264560 : const FocusedTree* old = FindHash(key_hash, &path, &length);
383 : ZoneMap<Key, Value>* more = nullptr;
384 9363755 : if (!(GetFocusedValue(old, key) != value)) return;
385 4714430 : if (old && !(old->more == nullptr && old->key_value.key() == key)) {
386 54778 : more = new (zone_->New(sizeof(*more))) ZoneMap<Key, Value>(zone_);
387 27389 : if (old->more) {
388 : *more = *old->more;
389 : } else {
390 1995 : (*more)[old->key_value.key()] = old->key_value.value();
391 : }
392 27389 : (*more)[key] = value;
393 : }
394 : FocusedTree* tree =
395 3215448 : new (zone_->New(sizeof(FocusedTree) +
396 6430900 : std::max(0, length - 1) * sizeof(const FocusedTree*)))
397 : FocusedTree{KeyValue(std::move(key), std::move(value)),
398 : static_cast<int8_t>(length),
399 : key_hash,
400 : more,
401 6430885 : {}};
402 33786220 : for (int i = 0; i < length; ++i) {
403 15285384 : tree->path(i) = path[i];
404 : }
405 3215452 : *this = PersistentMap(tree, zone_, def_value_);
406 : }
407 :
408 : template <class Key, class Value, class Hasher>
409 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
410 13291892 : PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const {
411 13291892 : const FocusedTree* tree = tree_;
412 : int level = 0;
413 39446403 : while (tree && hash != tree->key_hash) {
414 47685242 : while ((hash ^ tree->key_hash)[level] == 0) {
415 21530731 : ++level;
416 : }
417 26154511 : tree = level < tree->length ? tree->path(level) : nullptr;
418 26154511 : ++level;
419 : }
420 13291892 : return tree;
421 : }
422 :
423 : template <class Key, class Value, class Hasher>
424 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
425 6290410 : PersistentMap<Key, Value, Hasher>::FindHash(
426 : HashValue hash, std::array<const FocusedTree*, kHashBits>* path,
427 : int* length) const {
428 6290410 : const FocusedTree* tree = tree_;
429 : int level = 0;
430 33938746 : while (tree && hash != tree->key_hash) {
431 13895407 : int map_length = tree->length;
432 44427223 : while ((hash ^ tree->key_hash)[level] == 0) {
433 30670374 : (*path)[level] = level < map_length ? tree->path(level) : nullptr;
434 15335187 : ++level;
435 : }
436 13895407 : (*path)[level] = tree;
437 13895407 : tree = level < tree->length ? tree->path(level) : nullptr;
438 13895407 : ++level;
439 : }
440 6290410 : if (tree) {
441 14450850 : while (level < tree->length) {
442 9889310 : (*path)[level] = tree->path(level);
443 4944655 : ++level;
444 : }
445 : }
446 6290410 : *length = level;
447 6290410 : return tree;
448 : }
449 :
450 : template <class Key, class Value, class Hasher>
451 19463456 : const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue(
452 : const FocusedTree* tree, const Key& key) const {
453 19463456 : if (!tree) {
454 4933747 : return def_value_;
455 : }
456 14529709 : if (tree->more) {
457 : auto it = tree->more->find(key);
458 85173 : if (it == tree->more->end())
459 44373 : return def_value_;
460 : else
461 40800 : return it->second;
462 : } else {
463 14444536 : if (key == tree->key_value.key()) {
464 14440486 : return tree->key_value.value();
465 : } else {
466 4050 : return def_value_;
467 : }
468 : }
469 : }
470 :
471 : template <class Key, class Value, class Hasher>
472 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
473 : PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level,
474 : Bit bit) {
475 3187409226 : if (tree->key_hash[level] == bit) {
476 : return tree;
477 1704178357 : } else if (level < tree->length) {
478 : return tree->path(level);
479 : } else {
480 : return nullptr;
481 : }
482 : }
483 :
484 : template <class Key, class Value, class Hasher>
485 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
486 997926608 : PersistentMap<Key, Value, Hasher>::FindLeftmost(
487 : const FocusedTree* start, int* level,
488 : std::array<const FocusedTree*, kHashBits>* path) {
489 : const FocusedTree* current = start;
490 2481194905 : while (*level < current->length) {
491 1483268297 : if (const FocusedTree* child = GetChild(current, *level, kLeft)) {
492 2524791330 : (*path)[*level] = GetChild(current, *level, kRight);
493 : current = child;
494 1262395665 : ++*level;
495 220872632 : } else if (const FocusedTree* child = GetChild(current, *level, kRight)) {
496 441745264 : (*path)[*level] = GetChild(current, *level, kLeft);
497 : current = child;
498 220872632 : ++*level;
499 : } else {
500 0 : UNREACHABLE();
501 : }
502 : }
503 997926608 : return current;
504 : }
505 :
506 : template <class Key, class Value, class Hasher>
507 : std::ostream& operator<<(std::ostream& os,
508 : const PersistentMap<Key, Value, Hasher>& map) {
509 : os << "{";
510 : bool first = true;
511 : for (auto pair : map) {
512 : if (!first) os << ", ";
513 : first = false;
514 : os << pair.first << ": " << pair.second;
515 : }
516 : return os << "}";
517 : }
518 :
519 : } // namespace compiler
520 : } // namespace internal
521 : } // namespace v8
522 :
523 : #endif // V8_COMPILER_PERSISTENT_MAP_H_
|