/src/hermes/lib/VM/detail/IdentifierHashTable.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #include "hermes/VM/detail/IdentifierHashTable.h" |
9 | | |
10 | | #include "hermes/VM/StringPrimitive.h" |
11 | | |
12 | | using namespace hermes::vm::detail; |
13 | | // In GCC/CLANG, method definitions can refer to ancestor namespaces of |
14 | | // the namespace that the class is declared in without namespace qualifiers. |
15 | | // This is not allowed in MSVC. |
16 | | using hermes::vm::StringPrimitive; |
17 | | using hermes::vm::SymbolID; |
18 | | |
19 | | template <typename T> |
20 | | uint32_t IdentifierHashTable::lookupString( |
21 | | llvh::ArrayRef<T> str, |
22 | | uint32_t hash, |
23 | 558k | bool mustBeNew) const { |
24 | 558k | assert(identifierTable_ && "identifier table pointer is not initialized"); |
25 | | |
26 | 558k | auto cap = capacity(); |
27 | 558k | assert(llvh::isPowerOf2_32(cap) && "capacity must be power of 2"); |
28 | 558k | assert(size_ < cap && "The hash table can never be full"); |
29 | | |
30 | 558k | #ifdef HERMES_SLOW_DEBUG |
31 | 558k | assert(hash == hashString(str) && "invalid hash"); |
32 | 558k | #endif |
33 | 558k | uint32_t idx = hash & (cap - 1); |
34 | 558k | uint32_t base = 1; |
35 | | // deletedIndex tracks the index of a deleted entry found in the conflict |
36 | | // chain. If we could not find an entry that matches str, we would return |
37 | | // the deleted slot for insertion to be able to reuse deleted space. |
38 | 558k | OptValue<uint32_t> deletedIndex; |
39 | | // The loop will always terminate as long as the hash table is not full. |
40 | 1.03M | while (1) { |
41 | 1.03M | if (table_.isEmpty(idx)) { |
42 | | // Found an empty entry, meaning that str does not exist in the table. |
43 | | // If deletedIndex is available, return it, otherwise return idx. |
44 | 326k | return deletedIndex ? *deletedIndex : idx; |
45 | 712k | } else if (table_.isDeleted(idx)) { |
46 | 1 | assert( |
47 | 1 | !mustBeNew && |
48 | 1 | "mustBeNew should never be set if there are deleted entries"); |
49 | 1 | deletedIndex = idx; |
50 | 712k | } else if (!mustBeNew) { |
51 | | // If mustBeNew is set, we know this string does not exist in the table. |
52 | | // There is no need to compare. |
53 | | |
54 | 670k | auto &lookupTableEntry = |
55 | 670k | identifierTable_->getLookupTableEntry(table_.get(idx)); |
56 | 670k | if (lookupTableEntry.getHash() == hash) { |
57 | 232k | if (lookupTableEntry.isStringPrim()) { |
58 | 230k | const StringPrimitive *strPrim = lookupTableEntry.getStringPrim(); |
59 | 230k | if (strPrim->isASCII()) { |
60 | 230k | if (stringRefEquals(str, strPrim->castToASCIIRef())) { |
61 | 230k | return idx; |
62 | 230k | } |
63 | 230k | } else { |
64 | 1 | if (stringRefEquals(str, strPrim->castToUTF16Ref())) { |
65 | 1 | return idx; |
66 | 1 | } |
67 | 1 | } |
68 | 230k | } else if (lookupTableEntry.isLazyASCII()) { |
69 | | // Lazy ASCII string. |
70 | 1.44k | if (stringRefEquals(str, lookupTableEntry.getLazyASCIIRef())) { |
71 | 1.44k | return idx; |
72 | 1.44k | } |
73 | 1.44k | } else { |
74 | | // UTF16 string. |
75 | 0 | if (stringRefEquals(str, lookupTableEntry.getLazyUTF16Ref())) { |
76 | 0 | return idx; |
77 | 0 | } |
78 | 0 | } |
79 | 232k | } |
80 | 670k | } |
81 | | /// Use quadratic probing to find next probe index in the hash table. |
82 | | /// h(k, i) = (h(k) + 1/2 * i + 1/2 * i^2) mod m. |
83 | | /// This guarantees the values of h(k,i) for i in [0,m-1] are all distinct. |
84 | 480k | idx = (idx + base) & (cap - 1); |
85 | 480k | ++base; |
86 | 480k | } |
87 | 558k | } unsigned int hermes::vm::detail::IdentifierHashTable::lookupString<char>(llvh::ArrayRef<char>, unsigned int, bool) const Line | Count | Source | 23 | 558k | bool mustBeNew) const { | 24 | 558k | assert(identifierTable_ && "identifier table pointer is not initialized"); | 25 | | | 26 | 558k | auto cap = capacity(); | 27 | 558k | assert(llvh::isPowerOf2_32(cap) && "capacity must be power of 2"); | 28 | 558k | assert(size_ < cap && "The hash table can never be full"); | 29 | | | 30 | 558k | #ifdef HERMES_SLOW_DEBUG | 31 | 558k | assert(hash == hashString(str) && "invalid hash"); | 32 | 558k | #endif | 33 | 558k | uint32_t idx = hash & (cap - 1); | 34 | 558k | uint32_t base = 1; | 35 | | // deletedIndex tracks the index of a deleted entry found in the conflict | 36 | | // chain. If we could not find an entry that matches str, we would return | 37 | | // the deleted slot for insertion to be able to reuse deleted space. | 38 | 558k | OptValue<uint32_t> deletedIndex; | 39 | | // The loop will always terminate as long as the hash table is not full. | 40 | 1.03M | while (1) { | 41 | 1.03M | if (table_.isEmpty(idx)) { | 42 | | // Found an empty entry, meaning that str does not exist in the table. | 43 | | // If deletedIndex is available, return it, otherwise return idx. | 44 | 326k | return deletedIndex ? *deletedIndex : idx; | 45 | 712k | } else if (table_.isDeleted(idx)) { | 46 | 1 | assert( | 47 | 1 | !mustBeNew && | 48 | 1 | "mustBeNew should never be set if there are deleted entries"); | 49 | 1 | deletedIndex = idx; | 50 | 712k | } else if (!mustBeNew) { | 51 | | // If mustBeNew is set, we know this string does not exist in the table. | 52 | | // There is no need to compare. | 53 | | | 54 | 670k | auto &lookupTableEntry = | 55 | 670k | identifierTable_->getLookupTableEntry(table_.get(idx)); | 56 | 670k | if (lookupTableEntry.getHash() == hash) { | 57 | 232k | if (lookupTableEntry.isStringPrim()) { | 58 | 230k | const StringPrimitive *strPrim = lookupTableEntry.getStringPrim(); | 59 | 230k | if (strPrim->isASCII()) { | 60 | 230k | if (stringRefEquals(str, strPrim->castToASCIIRef())) { | 61 | 230k | return idx; | 62 | 230k | } | 63 | 230k | } else { | 64 | 0 | if (stringRefEquals(str, strPrim->castToUTF16Ref())) { | 65 | 0 | return idx; | 66 | 0 | } | 67 | 0 | } | 68 | 230k | } else if (lookupTableEntry.isLazyASCII()) { | 69 | | // Lazy ASCII string. | 70 | 1.44k | if (stringRefEquals(str, lookupTableEntry.getLazyASCIIRef())) { | 71 | 1.44k | return idx; | 72 | 1.44k | } | 73 | 1.44k | } else { | 74 | | // UTF16 string. | 75 | 0 | if (stringRefEquals(str, lookupTableEntry.getLazyUTF16Ref())) { | 76 | 0 | return idx; | 77 | 0 | } | 78 | 0 | } | 79 | 232k | } | 80 | 670k | } | 81 | | /// Use quadratic probing to find next probe index in the hash table. | 82 | | /// h(k, i) = (h(k) + 1/2 * i + 1/2 * i^2) mod m. | 83 | | /// This guarantees the values of h(k,i) for i in [0,m-1] are all distinct. | 84 | 480k | idx = (idx + base) & (cap - 1); | 85 | 480k | ++base; | 86 | 480k | } | 87 | 558k | } |
unsigned int hermes::vm::detail::IdentifierHashTable::lookupString<char16_t>(llvh::ArrayRef<char16_t>, unsigned int, bool) const Line | Count | Source | 23 | 395 | bool mustBeNew) const { | 24 | 395 | assert(identifierTable_ && "identifier table pointer is not initialized"); | 25 | | | 26 | 395 | auto cap = capacity(); | 27 | 395 | assert(llvh::isPowerOf2_32(cap) && "capacity must be power of 2"); | 28 | 395 | assert(size_ < cap && "The hash table can never be full"); | 29 | | | 30 | 395 | #ifdef HERMES_SLOW_DEBUG | 31 | 395 | assert(hash == hashString(str) && "invalid hash"); | 32 | 395 | #endif | 33 | 395 | uint32_t idx = hash & (cap - 1); | 34 | 395 | uint32_t base = 1; | 35 | | // deletedIndex tracks the index of a deleted entry found in the conflict | 36 | | // chain. If we could not find an entry that matches str, we would return | 37 | | // the deleted slot for insertion to be able to reuse deleted space. | 38 | 395 | OptValue<uint32_t> deletedIndex; | 39 | | // The loop will always terminate as long as the hash table is not full. | 40 | 575 | while (1) { | 41 | 575 | if (table_.isEmpty(idx)) { | 42 | | // Found an empty entry, meaning that str does not exist in the table. | 43 | | // If deletedIndex is available, return it, otherwise return idx. | 44 | 394 | return deletedIndex ? *deletedIndex : idx; | 45 | 394 | } else if (table_.isDeleted(idx)) { | 46 | 0 | assert( | 47 | 0 | !mustBeNew && | 48 | 0 | "mustBeNew should never be set if there are deleted entries"); | 49 | 0 | deletedIndex = idx; | 50 | 181 | } else if (!mustBeNew) { | 51 | | // If mustBeNew is set, we know this string does not exist in the table. | 52 | | // There is no need to compare. | 53 | | | 54 | 181 | auto &lookupTableEntry = | 55 | 181 | identifierTable_->getLookupTableEntry(table_.get(idx)); | 56 | 181 | if (lookupTableEntry.getHash() == hash) { | 57 | 1 | if (lookupTableEntry.isStringPrim()) { | 58 | 1 | const StringPrimitive *strPrim = lookupTableEntry.getStringPrim(); | 59 | 1 | if (strPrim->isASCII()) { | 60 | 0 | if (stringRefEquals(str, strPrim->castToASCIIRef())) { | 61 | 0 | return idx; | 62 | 0 | } | 63 | 1 | } else { | 64 | 1 | if (stringRefEquals(str, strPrim->castToUTF16Ref())) { | 65 | 1 | return idx; | 66 | 1 | } | 67 | 1 | } | 68 | 1 | } else if (lookupTableEntry.isLazyASCII()) { | 69 | | // Lazy ASCII string. | 70 | 0 | if (stringRefEquals(str, lookupTableEntry.getLazyASCIIRef())) { | 71 | 0 | return idx; | 72 | 0 | } | 73 | 0 | } else { | 74 | | // UTF16 string. | 75 | 0 | if (stringRefEquals(str, lookupTableEntry.getLazyUTF16Ref())) { | 76 | 0 | return idx; | 77 | 0 | } | 78 | 0 | } | 79 | 1 | } | 80 | 181 | } | 81 | | /// Use quadratic probing to find next probe index in the hash table. | 82 | | /// h(k, i) = (h(k) + 1/2 * i + 1/2 * i^2) mod m. | 83 | | /// This guarantees the values of h(k,i) for i in [0,m-1] are all distinct. | 84 | 180 | idx = (idx + base) & (cap - 1); | 85 | 180 | ++base; | 86 | 180 | } | 87 | 395 | } |
|
88 | | |
89 | | // Instantiate the templated method so it can be called from other files. |
90 | | |
91 | | template uint32_t IdentifierHashTable::lookupString( |
92 | | llvh::ArrayRef<char> str, |
93 | | uint32_t hash, |
94 | | bool mustBeNew) const; |
95 | | |
96 | | template uint32_t IdentifierHashTable::lookupString( |
97 | | llvh::ArrayRef<char16_t> str, |
98 | | uint32_t hash, |
99 | | bool mustBeNew) const; |
100 | | |
101 | | uint32_t IdentifierHashTable::lookupString( |
102 | | const StringPrimitive *str, |
103 | 0 | bool mustBeNew) const { |
104 | 0 | if (str->isASCII()) { |
105 | 0 | return lookupString(str->castToASCIIRef(), mustBeNew); |
106 | 0 | } else { |
107 | 0 | return lookupString(str->castToUTF16Ref(), mustBeNew); |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | uint32_t IdentifierHashTable::lookupString( |
112 | | const StringPrimitive *str, |
113 | | uint32_t hash, |
114 | 142k | bool mustBeNew) const { |
115 | 142k | if (str->isASCII()) { |
116 | 142k | return lookupString(str->castToASCIIRef(), hash, mustBeNew); |
117 | 142k | } else { |
118 | 78 | return lookupString(str->castToUTF16Ref(), hash, mustBeNew); |
119 | 78 | } |
120 | 142k | } |
121 | | |
122 | 177k | void IdentifierHashTable::insert(uint32_t idx, SymbolID id) { |
123 | 177k | table_.set(idx, id.unsafeGetIndex()); |
124 | 177k | ++size_; |
125 | 177k | ++nonEmptyEntryCount_; |
126 | | |
127 | 177k | if (shouldGrow()) { |
128 | 27 | growAndRehash(capacity() * 2); |
129 | 27 | } |
130 | 177k | } |
131 | | |
132 | 41 | void IdentifierHashTable::remove(const StringPrimitive *str) { |
133 | 41 | if (str->isASCII()) { |
134 | 40 | remove(str->castToASCIIRef()); |
135 | 40 | } else { |
136 | 1 | remove(str->castToUTF16Ref()); |
137 | 1 | } |
138 | 41 | } |
139 | | |
140 | 121 | void IdentifierHashTable::growAndRehash(uint32_t newCapacity) { |
141 | | // Guard against potential overflow in the calculation of new capacity. |
142 | 121 | if (LLVM_UNLIKELY(newCapacity <= capacity())) { |
143 | 0 | hermes_fatal("too many identifiers created"); |
144 | 0 | } |
145 | 121 | assert(llvh::isPowerOf2_32(newCapacity) && "capacity must be power of 2"); |
146 | 121 | CompactTable tmpTable(newCapacity, table_.getCurrentScale()); |
147 | 121 | tmpTable.swap(table_); |
148 | 295k | for (uint32_t oldIdx = 0; oldIdx < tmpTable.size(); ++oldIdx) { |
149 | 294k | if (!tmpTable.isValid(oldIdx)) { |
150 | 145k | continue; |
151 | 145k | } |
152 | | // Pass true as second argument as we know this string is not in the table. |
153 | 149k | uint32_t idx = 0; |
154 | 149k | uint32_t oldVal = tmpTable.get(oldIdx); |
155 | 149k | auto &lookupTableEntry = identifierTable_->getLookupTableEntry(oldVal); |
156 | 149k | uint32_t hash = lookupTableEntry.getHash(); |
157 | 149k | if (lookupTableEntry.isStringPrim()) { |
158 | 142k | idx = lookupString(lookupTableEntry.getStringPrim(), hash, true); |
159 | 142k | } else if (lookupTableEntry.isLazyASCII()) { |
160 | 6.53k | idx = lookupString(lookupTableEntry.getLazyASCIIRef(), hash, true); |
161 | 6.53k | } else if (lookupTableEntry.isLazyUTF16()) { |
162 | 0 | idx = lookupString(lookupTableEntry.getLazyUTF16Ref(), hash, true); |
163 | 0 | } |
164 | 149k | table_.set(idx, oldVal); |
165 | 149k | } |
166 | 121 | nonEmptyEntryCount_ = size_; |
167 | 121 | } |