Line data Source code
1 : // Copyright 2017 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_COMPILER_PERSISTENT_H_
6 : #define V8_COMPILER_PERSISTENT_H_
7 :
8 : #include <array>
9 : #include <bitset>
10 : #include <tuple>
11 :
12 : #include "src/base/functional.h"
13 : #include "src/zone/zone-containers.h"
14 :
15 : namespace v8 {
16 : namespace internal {
17 : namespace compiler {
18 :
19 : // PersistentMap is a persistent map datastructure based on hash trees (a binary
20 : // tree using the bits of a hash value as addresses). The map is a conceptually
21 : // infinite: All keys are initially mapped to a default value, values are
22 : // deleted by overwriting them with the default value. The iterators produce
23 : // exactly the keys that are not the default value. The hash values should have
24 : // high variance in their high bits, so dense integers are a bad choice.
25 : // Complexity:
26 : // - Copy and assignment: O(1)
27 : // - access: O(log n)
28 : // - update: O(log n) time and space
29 : // - iteration: amortized O(1) per step
30 : // - Zip: O(n)
31 : // - equality check: O(n)
32 : // TODO(tebbi): Cache map transitions to avoid re-allocation of the same map.
33 : // TODO(tebbi): Implement an O(1) equality check based on hash consing or
34 : // something similar.
35 : template <class Key, class Value, class Hasher = base::hash<Key>>
36 : class PersistentMap {
37 : public:
38 : using key_type = Key;
39 : using mapped_type = Value;
40 : using value_type = std::pair<Key, Value>;
41 :
42 : private:
43 : static constexpr size_t kHashBits = 32;
44 : enum Bit : int { kLeft = 0, kRight = 1 };
45 :
46 : // Access hash bits starting from the high bits and compare them according to
47 : // their unsigned value. This way, the order in the hash tree is compatible
48 : // with numeric hash comparisons.
49 : class HashValue;
50 :
51 : struct KeyValue : std::pair<Key, Value> {
52 : const Key& key() const { return this->first; }
53 : const Value& value() const { return this->second; }
54 : using std::pair<Key, Value>::pair;
55 : };
56 :
57 : struct FocusedTree;
58 :
59 : public:
60 : // Depth of the last added element. This is a cheap estimate for the size of
61 : // the hash tree.
62 : size_t last_depth() const {
63 : if (tree_) {
64 : return tree_->length;
65 : } else {
66 : return 0;
67 : }
68 : }
69 :
70 15773318 : const Value& Get(const Key& key) const {
71 119123 : HashValue key_hash = HashValue(Hasher()(key));
72 7886659 : const FocusedTree* tree = FindHash(key_hash);
73 7886659 : return GetFocusedValue(tree, key);
74 : }
75 :
76 : // Add or overwrite an existing key-value pair.
77 : PersistentMap Add(Key key, Value value) const;
78 5491795 : void Set(Key key, Value value) { *this = Add(key, value); }
79 :
80 59791764 : bool operator==(const PersistentMap& other) const {
81 59791764 : if (tree_ == other.tree_) return true;
82 10762688 : if (def_value_ != other.def_value_) return false;
83 17515061 : for (const std::tuple<Key, Value, Value>& triple : Zip(other)) {
84 17396864 : if (std::get<1>(triple) != std::get<2>(triple)) return false;
85 : }
86 118216 : return true;
87 : }
88 :
89 : bool operator!=(const PersistentMap& other) const {
90 59792798 : return !(*this == other);
91 : }
92 :
93 : // The iterator produces key-value pairs in the lexicographical order of
94 : // hash value and key. It produces exactly the key-value pairs where the value
95 : // is not the default value.
96 : class iterator;
97 :
98 31346014 : iterator begin() const {
99 21920774 : if (!tree_) return end();
100 12495534 : return iterator::begin(tree_, def_value_);
101 : }
102 : iterator end() const { return iterator::end(def_value_); }
103 :
104 : // Iterator to traverse two maps in lockstep, producing matching value pairs
105 : // for each key where at least one value is different from the respective
106 : // default.
107 : class double_iterator;
108 :
109 : // An iterable to iterate over the two maps in lockstep.
110 : struct ZipIterable {
111 : PersistentMap a;
112 : PersistentMap b;
113 10762691 : double_iterator begin() { return double_iterator(a.begin(), b.begin()); }
114 32288142 : double_iterator end() { return double_iterator(a.end(), b.end()); }
115 : };
116 :
117 10762691 : ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; }
118 :
119 : explicit PersistentMap(Zone* zone, Value def_value = Value())
120 : : PersistentMap(nullptr, zone, def_value) {}
121 :
122 : private:
123 : // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
124 : const FocusedTree* FindHash(HashValue hash) const;
125 :
126 : // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
127 : // Output the path to this {FocusedTree} and its length. If no such
128 : // {FocusedTree} exists, return {nullptr} and output the path to the last node
129 : // with a matching hash prefix. Note that {length} is the length of the found
130 : // path and may be less than the length of the found {FocusedTree}.
131 : const FocusedTree* FindHash(HashValue hash,
132 : std::array<const FocusedTree*, kHashBits>* path,
133 : int* length) const;
134 :
135 : // Load value from the leaf node on the focused path of {tree}.
136 : const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const;
137 :
138 : // Return the {FocusedTree} representing the left (bit==kLeft) or right
139 : // (bit==kRight) child of the node on the path of {tree} at tree level
140 : // {level}.
141 : static const FocusedTree* GetChild(const FocusedTree* tree, int level,
142 : Bit bit);
143 :
144 : // Find the leftmost path in the tree, starting at the node at tree level
145 : // {level} on the path of {start}. Output the level of the leaf to {level} and
146 : // the path to {path}.
147 : static const FocusedTree* FindLeftmost(
148 : const FocusedTree* start, int* level,
149 : std::array<const FocusedTree*, kHashBits>* path);
150 :
151 : PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)
152 33619994 : : tree_(tree), def_value_(def_value), zone_(zone) {}
153 :
154 : const FocusedTree* tree_;
155 : Value def_value_;
156 : Zone* zone_;
157 : };
158 :
159 : // This structure represents a hash tree with one focused path to a specific
160 : // leaf. For the focused leaf, it stores key, value and key hash. The path is
161 : // defined by the hash bits of the focused leaf. In a traditional tree
162 : // datastructure, the nodes of a path form a linked list with the values being
163 : // the pointers outside of this path. Instead of storing all of these nodes,
164 : // we store an array of the pointers pointing outside of the path. This is
165 : // similar to the stack used when doing DFS traversal of a tree. The hash of
166 : // the leaf is used to know if the pointers point to the left or the
167 : // right of the path. As there is no explicit representation of a tree node,
168 : // this structure also represents all the nodes on its path. The intended node
169 : // depends on the tree depth, which is always clear from the referencing
170 : // context. So the pointer to a {FocusedTree} stored in the
171 : // {PersistentMap.tree_} always references the root, while a pointer from a
172 : // focused node of another {FocusedTree} always references to one tree level
173 : // lower than before.
174 : template <class Key, class Value, class Hasher>
175 : struct PersistentMap<Key, Value, Hasher>::FocusedTree {
176 : KeyValue key_value;
177 : // The depth of the focused path, that is, the number of pointers stored in
178 : // this structure.
179 : int8_t length;
180 : HashValue key_hash;
181 : // Out-of-line storage for hash collisions.
182 : const ZoneMap<Key, Value>* more;
183 : using more_iterator = typename ZoneMap<Key, Value>::const_iterator;
184 : // {path_array} has to be the last member: To store an array inline, we
185 : // over-allocate memory for this structure and access memory beyond
186 : // {path_array}. This corresponds to a flexible array member as defined in
187 : // C99.
188 : const FocusedTree* path_array[1];
189 : const FocusedTree*& path(int i) {
190 : DCHECK(i < length);
191 : return reinterpret_cast<const FocusedTree**>(
192 13084568 : reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i];
193 : }
194 : const FocusedTree* path(int i) const {
195 : DCHECK(i < length);
196 : return reinterpret_cast<const FocusedTree* const*>(
197 : reinterpret_cast<const byte*>(this) +
198 104537028 : offsetof(FocusedTree, path_array))[i];
199 : }
200 : };
201 :
202 : template <class Key, class Value, class Hasher>
203 : class PersistentMap<Key, Value, Hasher>::HashValue {
204 : public:
205 : explicit HashValue(size_t hash) : bits_(hash) {}
206 : explicit HashValue(std::bitset<kHashBits> hash) : bits_(hash) {}
207 :
208 : Bit operator[](int pos) const {
209 356039788 : return bits_[kHashBits - pos - 1] ? kRight : kLeft;
210 : }
211 :
212 : bool operator<(HashValue other) const {
213 : static_assert(sizeof(*this) <= sizeof(unsigned long), ""); // NOLINT
214 : return bits_.to_ulong() < other.bits_.to_ulong();
215 : }
216 : bool operator==(HashValue other) const { return bits_ == other.bits_; }
217 : bool operator!=(HashValue other) const { return bits_ != other.bits_; }
218 : HashValue operator^(HashValue other) const {
219 : return HashValue(bits_ ^ other.bits_);
220 : }
221 :
222 : private:
223 : std::bitset<kHashBits> bits_;
224 : };
225 :
226 : template <class Key, class Value, class Hasher>
227 : class PersistentMap<Key, Value, Hasher>::iterator {
228 : public:
229 : const value_type operator*() const {
230 60821942 : if (current_->more) {
231 : return *more_iter_;
232 : } else {
233 60723161 : return current_->key_value;
234 : }
235 : }
236 :
237 16402461 : iterator& operator++() {
238 16264792 : do {
239 16664017 : if (!current_) {
240 : // Iterator is past the end.
241 : return *this;
242 : }
243 16664017 : if (current_->more) {
244 : DCHECK(more_iter_ != current_->more->end());
245 : ++more_iter_;
246 72088 : if (more_iter_ != current_->more->end()) return *this;
247 : }
248 16631963 : if (level_ == 0) {
249 3382 : *this = end(def_value_);
250 3382 : return *this;
251 : }
252 16628581 : --level_;
253 84884971 : while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) {
254 17863404 : if (level_ == 0) {
255 363790 : *this = end(def_value_);
256 363790 : return *this;
257 : }
258 17499614 : --level_;
259 : }
260 16264791 : const FocusedTree* first_right_alternative = path_[level_];
261 16264791 : level_++;
262 16264791 : current_ = FindLeftmost(first_right_alternative, &level_, &path_);
263 16264792 : if (current_->more) {
264 3986 : more_iter_ = current_->more->begin();
265 : }
266 : } while ((**this).second == def_value());
267 : return *this;
268 : }
269 :
270 107683082 : bool operator==(const iterator& other) const {
271 74543330 : if (is_end()) return other.is_end();
272 33139752 : if (other.is_end()) return false;
273 8227590 : if (current_->key_hash != other.current_->key_hash) {
274 : return false;
275 : } else {
276 15141 : return (**this).first == (*other).first;
277 : }
278 : }
279 25546648 : bool operator!=(const iterator& other) const { return !(*this == other); }
280 :
281 14113726 : bool operator<(const iterator& other) const {
282 9354133 : if (is_end()) return false;
283 4759593 : if (other.is_end()) return true;
284 167679 : if (current_->key_hash < other.current_->key_hash) {
285 : return true;
286 166661 : } else if (current_->key_hash == other.current_->key_hash) {
287 14274 : return (**this).first < (*other).first;
288 : } else {
289 : return false;
290 : }
291 : }
292 :
293 20701789 : bool is_end() const { return current_ == nullptr; }
294 :
295 : const Value& def_value() { return def_value_; }
296 :
297 : static iterator begin(const FocusedTree* tree, Value def_value) {
298 : iterator i(def_value);
299 12495534 : i.current_ = FindLeftmost(tree, &i.level_, &i.path_);
300 12495560 : if (i.current_->more) {
301 4 : i.more_iter_ = i.current_->more->begin();
302 : }
303 : return i;
304 : }
305 :
306 : static iterator end(Value def_value) { return iterator(def_value); }
307 :
308 : private:
309 : int level_;
310 : typename FocusedTree::more_iterator more_iter_;
311 : const FocusedTree* current_;
312 : std::array<const FocusedTree*, kHashBits> path_;
313 : Value def_value_;
314 :
315 : explicit iterator(Value def_value)
316 87683148 : : level_(0), current_(nullptr), def_value_(def_value) {}
317 : };
318 :
319 : template <class Key, class Value, class Hasher>
320 : class PersistentMap<Key, Value, Hasher>::double_iterator {
321 : public:
322 17414031 : std::tuple<Key, Value, Value> operator*() {
323 17414031 : if (first_current_) {
324 : auto pair = *first_;
325 : return std::make_tuple(
326 : pair.first, pair.second,
327 17260208 : second_current_ ? (*second_).second : second_.def_value());
328 : } else {
329 : DCHECK(second_current_);
330 : auto pair = *second_;
331 : return std::make_tuple(pair.first, first_.def_value(), pair.second);
332 : }
333 : }
334 :
335 6769531 : double_iterator& operator++() {
336 6769531 : if (first_current_) ++first_;
337 6769530 : if (second_current_) ++second_;
338 6769531 : return *this = double_iterator(first_, second_);
339 : }
340 :
341 28294801 : double_iterator(iterator first, iterator second)
342 28294801 : : first_(first), second_(second) {
343 28294801 : if (first_ == second_) {
344 18940835 : first_current_ = second_current_ = true;
345 9354131 : } else if (first_ < second_) {
346 4600156 : first_current_ = true;
347 4600156 : second_current_ = false;
348 : } else {
349 4753979 : first_current_ = false;
350 4753979 : second_current_ = true;
351 : }
352 28294970 : }
353 :
354 17532219 : bool operator!=(const double_iterator& other) {
355 39777197 : return first_ != other.first_ || second_ != other.second_;
356 : }
357 :
358 : bool is_end() const { return first_.is_end() && second_.is_end(); }
359 :
360 : private:
361 : iterator first_;
362 : iterator second_;
363 : bool first_current_;
364 : bool second_current_;
365 : };
366 :
367 : template <class Key, class Value, class Hasher>
368 5491795 : PersistentMap<Key, Value, Hasher> PersistentMap<Key, Value, Hasher>::Add(
369 30000 : Key key, Value value) const {
370 79884 : HashValue key_hash = HashValue(Hasher()(key));
371 : std::array<const FocusedTree*, kHashBits> path;
372 5491794 : int length = 0;
373 5491794 : const FocusedTree* old = FindHash(key_hash, &path, &length);
374 : ZoneMap<Key, Value>* more = nullptr;
375 5491794 : if (GetFocusedValue(old, key) == value) return *this;
376 4144819 : if (old && !(old->more == nullptr && old->key_value.key() == key)) {
377 27052 : more = new (zone_->New(sizeof(*more))) ZoneMap<Key, Value>(zone_);
378 27052 : if (old->more) {
379 : *more = *old->more;
380 : } else {
381 1995 : (*more)[old->key_value.key()] = old->key_value.value();
382 : }
383 27052 : (*more)[key] = value;
384 : }
385 : FocusedTree* tree =
386 : new (zone_->New(sizeof(FocusedTree) +
387 8755299 : std::max(0, length - 1) * sizeof(const FocusedTree*)))
388 : FocusedTree{KeyValue(std::move(key), std::move(value)),
389 : static_cast<int8_t>(length),
390 : key_hash,
391 : more,
392 5836866 : {}};
393 16003001 : for (int i = 0; i < length; ++i) {
394 13084568 : tree->path(i) = path[i];
395 : }
396 2918433 : return PersistentMap(tree, zone_, def_value_);
397 : }
398 :
399 : template <class Key, class Value, class Hasher>
400 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
401 7886659 : PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const {
402 7886659 : const FocusedTree* tree = tree_;
403 : int level = 0;
404 21192085 : while (tree && hash != tree->key_hash) {
405 24884922 : while ((hash ^ tree->key_hash)[level] == 0) {
406 11579496 : ++level;
407 : }
408 13305426 : tree = level < tree->length ? tree->path(level) : nullptr;
409 13305426 : ++level;
410 : }
411 7886659 : return tree;
412 : }
413 :
414 : template <class Key, class Value, class Hasher>
415 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
416 5491794 : PersistentMap<Key, Value, Hasher>::FindHash(
417 : HashValue hash, std::array<const FocusedTree*, kHashBits>* path,
418 : int* length) const {
419 5491794 : const FocusedTree* tree = tree_;
420 : int level = 0;
421 22079406 : while (tree && hash != tree->key_hash) {
422 11125818 : int map_length = tree->length;
423 34882131 : while ((hash ^ tree->key_hash)[level] == 0) {
424 25551908 : (*path)[level] = level < map_length ? tree->path(level) : nullptr;
425 12775954 : ++level;
426 : }
427 11125818 : (*path)[level] = tree;
428 11125818 : tree = level < tree->length ? tree->path(level) : nullptr;
429 11125818 : ++level;
430 : }
431 5491794 : if (tree) {
432 7789622 : while (level < tree->length) {
433 7909490 : (*path)[level] = tree->path(level);
434 3954745 : ++level;
435 : }
436 : }
437 5491794 : *length = level;
438 5491794 : return tree;
439 : }
440 :
441 : template <class Key, class Value, class Hasher>
442 13378452 : const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue(
443 : const FocusedTree* tree, const Key& key) const {
444 13378452 : if (!tree) {
445 4551824 : return def_value_;
446 : }
447 8826628 : if (tree->more) {
448 : auto it = tree->more->find(key);
449 100472 : if (it == tree->more->end())
450 28136 : return def_value_;
451 : else
452 22100 : return it->second;
453 : } else {
454 8776392 : if (key == tree->key_value.key()) {
455 8772386 : return tree->key_value.value();
456 : } else {
457 4006 : return def_value_;
458 : }
459 : }
460 : }
461 :
462 : template <class Key, class Value, class Hasher>
463 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
464 : PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level,
465 : Bit bit) {
466 127834159 : if (tree->key_hash[level] == bit) {
467 : return tree;
468 65458368 : } else if (level < tree->length) {
469 : return tree->path(level);
470 : } else {
471 : return nullptr;
472 : }
473 : }
474 :
475 : template <class Key, class Value, class Hasher>
476 : const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
477 28760302 : PersistentMap<Key, Value, Hasher>::FindLeftmost(
478 : const FocusedTree* start, int* level,
479 : std::array<const FocusedTree*, kHashBits>* path) {
480 : const FocusedTree* current = start;
481 119896482 : while (*level < current->length) {
482 62375851 : if (const FocusedTree* child = GetChild(current, *level, kLeft)) {
483 59293421 : (*path)[*level] = GetChild(current, *level, kRight);
484 : current = child;
485 59293421 : ++*level;
486 3082430 : } else if (const FocusedTree* child = GetChild(current, *level, kRight)) {
487 3082457 : (*path)[*level] = GetChild(current, *level, kLeft);
488 : current = child;
489 3082457 : ++*level;
490 : } else {
491 0 : UNREACHABLE();
492 : }
493 : }
494 28760329 : return current;
495 : }
496 :
497 : template <class Key, class Value, class Hasher>
498 : std::ostream& operator<<(std::ostream& os,
499 : const PersistentMap<Key, Value, Hasher>& map) {
500 : os << "{";
501 : bool first = true;
502 : for (auto pair : map) {
503 : if (!first) os << ", ";
504 : first = false;
505 : os << pair.first << ": " << pair.second;
506 : }
507 : return os << "}";
508 : }
509 :
510 : } // namespace compiler
511 : } // namespace internal
512 : } // namespace v8
513 :
514 : #endif // V8_COMPILER_PERSISTENT_MAP_H_
|