/src/hermes/lib/BCGen/HBC/ConsecutiveStringStorage.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #define DEBUG_TYPE "stringstorage" |
9 | | #include "hermes/BCGen/HBC/ConsecutiveStringStorage.h" |
10 | | |
11 | | #include "hermes/BCGen/HBC/BytecodeDataProvider.h" |
12 | | #include "hermes/Support/HashString.h" |
13 | | #include "hermes/Support/JenkinsHash.h" |
14 | | #include "hermes/Support/Statistic.h" |
15 | | #include "hermes/Support/Timer.h" |
16 | | #include "hermes/Support/UTF8.h" |
17 | | |
18 | | #include "llvh/ADT/DenseMap.h" |
19 | | #include "llvh/Support/Endian.h" |
20 | | |
21 | | #include <algorithm> |
22 | | #include <climits> |
23 | | #include <deque> |
24 | | |
25 | | using namespace hermes; |
26 | | using llvh::ArrayRef; |
27 | | using llvh::MutableArrayRef; |
28 | | |
29 | | STATISTIC(StringTableSize, "Size of the packed String table"); |
30 | | STATISTIC( |
31 | | StringTableSavings, |
32 | | "Byte savings in length of String table versus naive packing"); |
33 | | |
34 | | namespace { |
35 | | |
36 | | /// Group Name for any timers we create |
37 | | constexpr const char *const StringTableTimersName = "StringTableBuilder Timers"; |
38 | | |
39 | | constexpr size_t npos = ~size_t(0); |
40 | | |
41 | | /// Longest string that we'll attempt to pack. |
42 | | constexpr size_t kMaximumPackableStringLength = 24 * 1024; |
43 | | |
44 | | /// A helper class responsible for deciding how to "pack" strings, that is, lay |
45 | | /// out strings in a linear array suitable for ConsecutiveStringStorage. It is |
46 | | /// templated on the character type (char or char16_t). |
47 | | template <typename CharT> |
48 | | class StringPacker { |
49 | | StringPacker() = default; |
50 | | |
51 | | public: |
52 | | /// Represents a single string to be packed. |
53 | | struct StringEntry { |
54 | | /// Index of the string in the original array. |
55 | | uint32_t stringID_; |
56 | | |
57 | | /// Text of the string. |
58 | | ArrayRef<CharT> chars_; |
59 | | |
60 | | /// Position of the string in the output storage. |
61 | | /// This is set by the packer. |
62 | | size_t offsetInStorage_ = npos; |
63 | | |
64 | | // The remaining fields are used only by the optimizing packer. |
65 | | |
66 | | /// If parent_ is set, then this string is fully contained within its |
67 | | /// parent_, at the given offset. |
68 | | StringEntry *parent_ = nullptr; |
69 | | size_t offsetInParent_ = npos; |
70 | | |
71 | | /// The string that must come after us, if any. |
72 | | StringEntry *next_ = nullptr; |
73 | | |
74 | | /// The string that must come before us, if any. |
75 | | StringEntry *prev_ = nullptr; |
76 | | |
77 | | /// The amount that our chars_ overlaps with prev_->chars_. |
78 | | size_t overlapAmount_ = 0; |
79 | | |
80 | | /// Entries that may not be set as our next_, because they would introduce |
81 | | /// a cycle. |
82 | | llvh::DenseSet<const StringEntry *> potentialCycles_; |
83 | | |
84 | | StringEntry(uint32_t stringID, ArrayRef<CharT> chars) |
85 | 2.21k | : stringID_(stringID), chars_(chars) {} ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::StringEntry::StringEntry(unsigned int, llvh::ArrayRef<unsigned char>) Line | Count | Source | 85 | 2.00k | : stringID_(stringID), chars_(chars) {} |
ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::StringEntry::StringEntry(unsigned int, llvh::ArrayRef<char16_t>) Line | Count | Source | 85 | 219 | : stringID_(stringID), chars_(chars) {} |
|
86 | | }; |
87 | | |
88 | | /// A Trigram represents three packed characters. |
89 | | /// Note that Trigrams are keys in an llvh::DenseSet, which reserves the |
90 | | /// values ~0 and ~0+1 as sentinel values. We must not use the MSB. |
91 | | using Trigram = |
92 | | typename std::conditional<sizeof(CharT) == 1, uint32_t, uint64_t>::type; |
93 | | |
94 | | static constexpr size_t TrigramCharCount = 3; |
95 | | |
96 | | /// \return a trigram representing the first three characters in \p chars. |
97 | 0 | static inline Trigram makeTrigram(const CharT chars[TrigramCharCount]) { |
98 | 0 | unsigned shift = CHAR_BIT * sizeof(CharT); |
99 | 0 | return (Trigram(chars[0]) << (2 * shift)) | |
100 | 0 | (Trigram(chars[1]) << (1 * shift)) | (Trigram(chars[2]) << (0 * shift)); |
101 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::makeTrigram(unsigned char const*) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::makeTrigram(char16_t const*) |
102 | | |
103 | | /// \return a set of all Trigrams of all 3 character prefixes of \p strings. |
104 | | static llvh::DenseSet<Trigram> buildPrefixTrigramSet( |
105 | 0 | ArrayRef<StringEntry> strings) { |
106 | | // A rough experiment showed 8 strings per prefix is a reasonable estimate. |
107 | 0 | llvh::DenseSet<Trigram> result(strings.size() / 8); |
108 | 0 | for (const StringEntry &entry : strings) { |
109 | 0 | ArrayRef<CharT> chars = entry.chars_; |
110 | 0 | if (chars.size() >= TrigramCharCount) { |
111 | 0 | result.insert(makeTrigram(chars.data())); |
112 | 0 | } |
113 | 0 | } |
114 | 0 | return result; |
115 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::buildPrefixTrigramSet(llvh::ArrayRef<(anonymous namespace)::StringPacker<unsigned char>::StringEntry>) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::buildPrefixTrigramSet(llvh::ArrayRef<(anonymous namespace)::StringPacker<char16_t>::StringEntry>) |
116 | | |
117 | | /// HashedSuffix is a model of DenseMapInfo suitable for hashing string |
118 | | /// suffixes. When iterating over the suffixes, it is N^2 to hash each suffix |
119 | | /// separately. We can do better by retaining the hash of the previous suffix |
120 | | /// and simply incorporating the next character into it. |
121 | | struct HashedSuffix { |
122 | | /// The string that has been hashed. |
123 | | ArrayRef<CharT> chars_; |
124 | | |
125 | | /// The hash value of chars_. |
126 | | JenkinsHash hash_; |
127 | | |
128 | | /// DenseMapInfo interface. \return the empty key, deferring to ArrayRef. |
129 | 0 | static HashedSuffix getEmptyKey() { |
130 | 0 | return {llvh::DenseMapInfo<ArrayRef<CharT>>::getEmptyKey(), 0}; |
131 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::HashedSuffix::getEmptyKey() Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::HashedSuffix::getEmptyKey() |
132 | | |
133 | | /// DenseMapInfo interface. \return the tombstone key, deferring to |
134 | | /// ArrayRef. |
135 | 0 | static HashedSuffix getTombstoneKey() { |
136 | 0 | return {llvh::DenseMapInfo<ArrayRef<CharT>>::getTombstoneKey(), 0}; |
137 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::HashedSuffix::getTombstoneKey() Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::HashedSuffix::getTombstoneKey() |
138 | | |
139 | | /// DenseMapInfo interface. \return the hash, which we have cached. |
140 | 0 | static unsigned getHashValue(const HashedSuffix &s) { |
141 | 0 | return s.hash_; |
142 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::HashedSuffix::getHashValue((anonymous namespace)::StringPacker<unsigned char>::HashedSuffix const&) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::HashedSuffix::getHashValue((anonymous namespace)::StringPacker<char16_t>::HashedSuffix const&) |
143 | | |
144 | | /// DenseMapInfo interface. \return whether two suffixes are equal. |
145 | 0 | static bool isEqual(const HashedSuffix &lhs, const HashedSuffix &rhs) { |
146 | 0 | return lhs.hash_ == rhs.hash_ && lhs.chars_ == rhs.chars_; |
147 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::HashedSuffix::isEqual((anonymous namespace)::StringPacker<unsigned char>::HashedSuffix const&, (anonymous namespace)::StringPacker<unsigned char>::HashedSuffix const&) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::HashedSuffix::isEqual((anonymous namespace)::StringPacker<char16_t>::HashedSuffix const&, (anonymous namespace)::StringPacker<char16_t>::HashedSuffix const&) |
148 | | }; |
149 | | |
150 | | /// Represents an entry in a generalized suffix array. |
151 | | struct SuffixArrayEntry { |
152 | | /// The suffix represented by this entry. |
153 | | ArrayRef<CharT> suffix_; |
154 | | |
155 | | /// The list of StringEntries that have this suffix. |
156 | | std::vector<StringEntry *> entries_; |
157 | | |
158 | | /// Convenience move constructor from a std::pair. |
159 | | /// This is used to extract SuffixArrayEntries from our uniquing map. |
160 | | SuffixArrayEntry(std::pair<HashedSuffix, std::vector<StringEntry *>> &&kv) |
161 | 0 | : suffix_(std::move(kv.first.chars_)), entries_(std::move(kv.second)) {} Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry::SuffixArrayEntry(std::__1::pair<(anonymous namespace)::StringPacker<unsigned char>::HashedSuffix, std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::StringEntry*, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::StringEntry*> > >&&) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry::SuffixArrayEntry(std::__1::pair<(anonymous namespace)::StringPacker<char16_t>::HashedSuffix, std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::StringEntry*, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::StringEntry*> > >&&) |
162 | | |
163 | | /// \return the character at index pos, or -1 if pos >= our length. |
164 | 0 | int extCharAt(size_t pos) const { |
165 | 0 | return pos >= suffix_.size() ? -1 : suffix_[pos]; |
166 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry::extCharAt(unsigned long) const Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry::extCharAt(unsigned long) const |
167 | | }; |
168 | | |
169 | | // Given pointers \p begin and \p end, sort the range [begin, end) starting at |
170 | | // the character index \p charIdx. This is a recursive function. |
171 | | static void radixQuicksort( |
172 | | SuffixArrayEntry *begin, |
173 | | SuffixArrayEntry *end, |
174 | 0 | size_t charIdx) { |
175 | 0 | for (;;) { |
176 | 0 | if (end - begin <= 1) { |
177 | | // Already sorted. |
178 | 0 | return; |
179 | 0 | } |
180 | | |
181 | | // Partition via Hoare scheme. Invariants are: |
182 | | // [begin, lower) < pivot |
183 | | // [upper, end) > pivot |
184 | | // Final state adds: |
185 | | // [lower, upper) == pivot |
186 | 0 | int pivotChar = begin->extCharAt(charIdx); |
187 | 0 | SuffixArrayEntry *lower = begin; |
188 | 0 | SuffixArrayEntry *upper = end; |
189 | 0 | for (SuffixArrayEntry *cursor = begin + 1; cursor < upper;) { |
190 | 0 | int testChar = cursor->extCharAt(charIdx); |
191 | 0 | if (testChar < pivotChar) { |
192 | 0 | std::swap(*lower++, *cursor++); |
193 | 0 | } else if (testChar > pivotChar) { |
194 | 0 | std::swap(*--upper, *cursor); |
195 | 0 | } else { |
196 | | // testChar == pivotChar |
197 | 0 | cursor++; |
198 | 0 | } |
199 | 0 | } |
200 | | // We have partitioned [begin, end) according to the character at charIdx. |
201 | | // Sort left and right, and then recurse on our equal range (unless we've |
202 | | // exhausted it). |
203 | 0 | radixQuicksort(begin, lower, charIdx); |
204 | 0 | radixQuicksort(upper, end, charIdx); |
205 | | |
206 | | // If we reached the end of the pivot, we're done. Otherwise allow |
207 | | // ourselves to loop again to inspect the next character. |
208 | | // Note this is effectively a tail call. |
209 | 0 | if (pivotChar == -1) |
210 | 0 | return; |
211 | | |
212 | 0 | begin = lower; |
213 | 0 | end = upper; |
214 | 0 | charIdx += 1; |
215 | 0 | } |
216 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::radixQuicksort((anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry*, (anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry*, unsigned long) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::radixQuicksort((anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry*, (anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry*, unsigned long) |
217 | | |
218 | | /// \return a generalized suffix array over the given \p strings. |
219 | | /// Only suffixes that begin with an element of \p prefixesOfInterest (or are |
220 | | /// shorter than TrigramCharCount) are included. |
221 | | static std::vector<SuffixArrayEntry> buildSuffixArray( |
222 | | MutableArrayRef<StringEntry> strings, |
223 | 0 | const llvh::DenseSet<Trigram> &prefixesOfInterest) { |
224 | | // Unique each suffix, that is, for each suffix that begins with |
225 | | // prefixOfInterest construct the list of strings containing it. A rough |
226 | | // test showed 8 suffixes per string is a reasonable initial capacity. |
227 | 0 | llvh::DenseMap<HashedSuffix, std::vector<StringEntry *>, HashedSuffix> |
228 | 0 | suffixMap(8 * strings.size()); |
229 | 0 | for (StringEntry &entry : strings) { |
230 | 0 | size_t charsSize = entry.chars_.size(); |
231 | | // Skip excessively long strings. |
232 | 0 | if (charsSize > kMaximumPackableStringLength) { |
233 | 0 | continue; |
234 | 0 | } |
235 | 0 | const CharT *chars = entry.chars_.data(); |
236 | 0 | JenkinsHash hash = JenkinsHashInit; |
237 | 0 | size_t i = charsSize; |
238 | 0 | while (i--) { |
239 | 0 | hash = updateJenkinsHash(hash, chars[i]); |
240 | 0 | if (i + TrigramCharCount <= charsSize && |
241 | 0 | !prefixesOfInterest.count(makeTrigram(&chars[i]))) |
242 | 0 | continue; |
243 | 0 | ArrayRef<CharT> suffix(&chars[i], charsSize - i); |
244 | 0 | suffixMap[HashedSuffix{suffix, hash}].push_back(&entry); |
245 | 0 | } |
246 | 0 | } |
247 | | |
248 | | // Move our suffixes to an array, and sort it. |
249 | 0 | std::vector<SuffixArrayEntry> result; |
250 | 0 | if (!suffixMap.empty()) { |
251 | 0 | result.reserve(suffixMap.size()); |
252 | 0 | std::move(suffixMap.begin(), suffixMap.end(), std::back_inserter(result)); |
253 | 0 | SuffixArrayEntry *entries = &result[0]; |
254 | 0 | radixQuicksort(entries, entries + result.size(), 0); |
255 | 0 | } |
256 | 0 | return result; |
257 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::buildSuffixArray(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<unsigned char>::StringEntry>, llvh::DenseSet<unsigned int, llvh::DenseMapInfo<unsigned int> > const&) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::buildSuffixArray(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<char16_t>::StringEntry>, llvh::DenseSet<unsigned long, llvh::DenseMapInfo<unsigned long> > const&) |
258 | | |
259 | | /// Overlap represents an overlap relationship. There is an overlap |
260 | | /// relationship between src and dst if some suffix of src is equal to a |
261 | | /// prefix of dst. For example, in the strings "splitpea" and "peasoup", we |
262 | | /// have src = "splitpea" and dst = "peasoup" with overlap amount 3. Note we |
263 | | /// don't track the amount of overlap here; that's stored externally. |
264 | | /// Also note overlap is directed: there is no overlap from "peasoup" to |
265 | | /// "splitpea" because no suffix of "peasoup" is a prefix of "splitpea". |
266 | | struct Overlap { |
267 | | ArrayRef<StringEntry *> srcs_; |
268 | | StringEntry *dst_; |
269 | | }; |
270 | | |
271 | | /// A list of Overlaps, indexed by the amount of overlap. |
272 | | /// For example, WeightIndexedOverlaps[3] is the list of Overlaps with |
273 | | /// overlap amount 3. |
274 | | using WeightIndexedOverlaps = std::vector<std::vector<Overlap>>; |
275 | | |
276 | | /// Given a list of StringEntries, determine overlapping strings as follows: |
277 | | /// 1. Set rightString.parent_ to an entry that contains rightString, if any. |
278 | | /// 2. If an entry leftString overlaps with \p rightString, add an Overlap |
279 | | /// leftString->rightString to \p overlaps |
280 | | static void computeOverlapsAndParentForEntry( |
281 | | StringEntry *rightString, |
282 | | ArrayRef<SuffixArrayEntry> suffixArray, |
283 | 0 | WeightIndexedOverlaps *overlaps) { |
284 | | // This is a subtle function. We want to compute Overlaps, indexed by |
285 | | // overlap amount, and simultaneously identify parents. Iterate over |
286 | | // rightString's prefixes. Perform a binary search on our suffix array to |
287 | | // find suffixes that have that prefix. If any suffix is exactly equal to |
288 | | // that prefix, we have Arcs (suffix owners, rightString). See the |
289 | | // documentation "StringPacking.md" for an extended discussion. |
290 | 0 | using std::partition_point; |
291 | 0 | ArrayRef<CharT> rightChars = rightString->chars_; |
292 | 0 | auto lower = suffixArray.begin(); |
293 | 0 | auto upper = suffixArray.end(); |
294 | 0 | for (size_t index = 0, rightCharsLen = rightChars.size(); |
295 | 0 | index < rightCharsLen; |
296 | 0 | index++) { |
297 | 0 | CharT testChar = rightChars[index]; |
298 | | // Binary search on [lower, upper) to find all suffixes that have |
299 | | // testChar at index. |
300 | 0 | lower = partition_point(lower, upper, [=](const SuffixArrayEntry &ent) { |
301 | 0 | return ent.extCharAt(index) < testChar; |
302 | 0 | }); Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::computeOverlapsAndParentForEntry((anonymous namespace)::StringPacker<unsigned char>::StringEntry*, llvh::ArrayRef<(anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry>, std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> > > >*)::{lambda((anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry const&)#1}::operator()((anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry const&) const Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::computeOverlapsAndParentForEntry((anonymous namespace)::StringPacker<char16_t>::StringEntry*, llvh::ArrayRef<(anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry>, std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> > > >*)::{lambda((anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry const&)#1}::operator()((anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry const&) const |
303 | 0 | upper = partition_point(lower, upper, [=](const SuffixArrayEntry &ent) { |
304 | 0 | return ent.extCharAt(index) == testChar; |
305 | 0 | }); Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::computeOverlapsAndParentForEntry((anonymous namespace)::StringPacker<unsigned char>::StringEntry*, llvh::ArrayRef<(anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry>, std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> > > >*)::{lambda((anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry const&)#2}::operator()((anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry const&) const Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::computeOverlapsAndParentForEntry((anonymous namespace)::StringPacker<char16_t>::StringEntry*, llvh::ArrayRef<(anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry>, std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> > > >*)::{lambda((anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry const&)#2}::operator()((anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry const&) const |
306 | 0 | assert(lower <= upper && "Binary search crossed the streams"); |
307 | 0 | if (lower == upper) { |
308 | | // No suffixes remaining. |
309 | 0 | break; |
310 | 0 | } |
311 | | |
312 | | // [lower, upper) is now the range of suffixes that start with |
313 | | // rightString.slice(0, overlapAmount). |
314 | 0 | size_t overlapAmount = index + 1; |
315 | 0 | if (overlapAmount < rightCharsLen) { |
316 | | // Not the last iteration. We are looking for a prefix match. |
317 | | // *lower is a suffix that shares a prefix of length overlapAmount with |
318 | | // rightString. If it is equal to that prefix, then we found a suffix |
319 | | // equal to the prefix we're looking for. It is sufficient to test for |
320 | | // equality by checking the length. |
321 | 0 | if (lower->suffix_.size() == overlapAmount) { |
322 | | // Add an Overlap at index overlapAmount. |
323 | 0 | if (overlaps->size() <= overlapAmount) { |
324 | 0 | overlaps->resize(overlapAmount + 1); |
325 | 0 | } |
326 | 0 | Overlap ov = {lower->entries_, rightString}; |
327 | 0 | (*overlaps)[overlapAmount].push_back(ov); |
328 | 0 | } |
329 | 0 | } else { |
330 | | // overlapAmount == rightCharsLen. Final iteration! [lower, upper) is |
331 | | // now the range of suffixes that have rightEntry as a prefix. |
332 | | // That means that rightEntry is wholly contained within some string. |
333 | | // Of course it is wholly contained within itself; if it's also |
334 | | // contained within some OTHER string, we found a parent. |
335 | | // For compressibility, choose the parent with the lowest stringID. |
336 | 0 | for (auto cursor = lower; cursor < upper; ++cursor) { |
337 | 0 | for (StringEntry *parent : cursor->entries_) { |
338 | | // Can't parent ourselves. |
339 | 0 | if (parent == rightString) |
340 | 0 | continue; |
341 | | |
342 | | // Don't parent if we have an existing parent with a lower ID. |
343 | | // This means that we prefer parents that tend to end up early in |
344 | | // the string table. |
345 | 0 | if (rightString->parent_ && |
346 | 0 | rightString->parent_->stringID_ < parent->stringID_) |
347 | 0 | continue; |
348 | | |
349 | | // We found a parent. |
350 | | // rightEntry is a prefix of one of parent's suffixes. |
351 | | // Therefore rightEntry appears in the parent at the same offset of |
352 | | // the suffix within the parent. |
353 | | // A parent should always be longer than its child; otherwise we |
354 | | // must have duplicate strings. |
355 | 0 | assert( |
356 | 0 | parent->chars_.size() > rightString->chars_.size() && |
357 | 0 | "non-unique strings passed to StringPacker"); |
358 | 0 | rightString->parent_ = parent; |
359 | 0 | rightString->offsetInParent_ = |
360 | 0 | parent->chars_.size() - cursor->suffix_.size(); |
361 | 0 | } |
362 | 0 | } |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::computeOverlapsAndParentForEntry((anonymous namespace)::StringPacker<unsigned char>::StringEntry*, llvh::ArrayRef<(anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry>, std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> > > >*) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::computeOverlapsAndParentForEntry((anonymous namespace)::StringPacker<char16_t>::StringEntry*, llvh::ArrayRef<(anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry>, std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> > > >*) |
366 | | |
367 | | /// For each entry, compute its overlaps and parent as per |
368 | | /// computeOverlapsOrParentForEntry. |
369 | | /// \p return the list of Overlaps indexed by weight (amount of overlap). |
370 | | static WeightIndexedOverlaps computeOverlapsAndParents( |
371 | | MutableArrayRef<StringEntry> stringEntries, |
372 | 0 | ArrayRef<SuffixArrayEntry> suffixArray) { |
373 | 0 | WeightIndexedOverlaps result; |
374 | 0 | for (StringEntry &entry : stringEntries) { |
375 | 0 | computeOverlapsAndParentForEntry(&entry, suffixArray, &result); |
376 | 0 | } |
377 | 0 | return result; |
378 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::computeOverlapsAndParents(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<unsigned char>::StringEntry>, llvh::ArrayRef<(anonymous namespace)::StringPacker<unsigned char>::SuffixArrayEntry>) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::computeOverlapsAndParents(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<char16_t>::StringEntry>, llvh::ArrayRef<(anonymous namespace)::StringPacker<char16_t>::SuffixArrayEntry>) |
379 | | |
380 | | /// Indicate if we can add the edge from \p src to \p dst to our Hamiltonian |
381 | | /// path, that is, whether we can take advantage of the overlap between src |
382 | | /// and dst by positioning dst to overlap a suffix of src. |
383 | 0 | static bool canOverlap(const StringEntry *src, const StringEntry *dst) { |
384 | | // Are we trying to overlap ourself? |
385 | 0 | if (src == dst) |
386 | 0 | return false; |
387 | | |
388 | | // Are src or dst going to be substrings of another string? |
389 | 0 | if (src->parent_ || dst->parent_) |
390 | 0 | return false; |
391 | | |
392 | | // Did we already pick a string to come after src or before dst? |
393 | 0 | if (src->next_ || dst->prev_) |
394 | 0 | return false; |
395 | | |
396 | | // Would forming src->dst create a cycle? |
397 | 0 | if (src->potentialCycles_.count(dst)) |
398 | 0 | return false; |
399 | | |
400 | | // This edge is OK! |
401 | 0 | return true; |
402 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::canOverlap((anonymous namespace)::StringPacker<unsigned char>::StringEntry const*, (anonymous namespace)::StringPacker<unsigned char>::StringEntry const*) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::canOverlap((anonymous namespace)::StringPacker<char16_t>::StringEntry const*, (anonymous namespace)::StringPacker<char16_t>::StringEntry const*) |
403 | | |
404 | | /// Plan how we are going to lay out our strings by applying the weightiest |
405 | | /// Overlaps. That is, for each entry, determine the (at most 1) entry |
406 | | /// to come before it, and the (at most 1) entry to come after it. |
407 | | /// This uses the greedy heuristic. Walk our Overlaps from greatest to least |
408 | | /// amount, and plan to layout src directly before dst if we can. |
409 | | /// We must be careful to not produce a cycle like `a->b->c->a`. |
410 | | /// This is equivalent to computing Hamiltonian path in the graph of strings |
411 | | /// while attempting to maximize the weight of its edges. |
412 | 0 | static void planLayout(const WeightIndexedOverlaps &overlapsByWeight) { |
413 | 0 | size_t overlapAmount = overlapsByWeight.size(); |
414 | 0 | while (overlapAmount--) { |
415 | 0 | for (const Overlap &overlap : overlapsByWeight[overlapAmount]) { |
416 | 0 | StringEntry *dst = overlap.dst_; |
417 | 0 | if (dst->prev_ || dst->parent_) { |
418 | | // dst is already spoken for, no need to consider it further. |
419 | 0 | continue; |
420 | 0 | } |
421 | 0 | for (StringEntry *src : overlap.srcs_) { |
422 | 0 | if (canOverlap(src, dst)) { |
423 | | // Apply the Overlap. |
424 | 0 | src->next_ = dst; |
425 | 0 | dst->prev_ = src; |
426 | 0 | dst->overlapAmount_ = overlapAmount; |
427 | | |
428 | | // Traverse to the start and end of our new chain, and mark the fact |
429 | | // that end->start is prohibited because it would produce a cycle. |
430 | 0 | StringEntry *end = dst; |
431 | 0 | while (end->next_) |
432 | 0 | end = end->next_; |
433 | 0 | StringEntry *start = src; |
434 | 0 | while (start->prev_) |
435 | 0 | start = start->prev_; |
436 | 0 | end->potentialCycles_.insert(start); |
437 | | |
438 | | // We picked an entry to come before dst, so we're done with dst. |
439 | 0 | break; |
440 | 0 | } |
441 | 0 | } |
442 | 0 | } |
443 | 0 | } |
444 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::planLayout(std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<unsigned char>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<unsigned char>::Overlap> > > > const&) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::planLayout(std::__1::vector<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> >, std::__1::allocator<std::__1::vector<(anonymous namespace)::StringPacker<char16_t>::Overlap, std::__1::allocator<(anonymous namespace)::StringPacker<char16_t>::Overlap> > > > const&) |
445 | | |
446 | | /// Given a string \p entry, lay it out if it has not already been laid out, |
447 | | /// appending any data necessaery to the \p storage blob. |
448 | | /// "Laying out" an entry means positioning it within the storage, by setting |
449 | | /// its offsetInStorage_ field. Laying out an entry may mean laying out other |
450 | | /// entries: its parent, or its prev and next entries. |
451 | 0 | static void layoutIfNeeded(StringEntry *entry, std::vector<CharT> *storage) { |
452 | 0 | if (entry->offsetInStorage_ != npos) { |
453 | 0 | return; // already layed out |
454 | 0 | } |
455 | | |
456 | | // Empty string is trivial |
457 | 0 | if (entry->chars_.empty()) { |
458 | 0 | entry->offsetInStorage_ = 0; |
459 | 0 | return; |
460 | 0 | } |
461 | | |
462 | | // If we have a parent, lay it out position ourself within it |
463 | 0 | if (entry->parent_) { |
464 | 0 | assert(entry->parent_ != entry && "entry cannot parent itself"); |
465 | 0 | assert(entry->offsetInParent_ != npos && "parent with no offset"); |
466 | 0 | assert( |
467 | 0 | entry->prev_ == nullptr && entry->next_ == nullptr && |
468 | 0 | "Cannot have a parent and be in the path"); |
469 | 0 | layoutIfNeeded(entry->parent_, storage); |
470 | 0 | entry->offsetInStorage_ = |
471 | 0 | entry->parent_->offsetInStorage_ + entry->offsetInParent_; |
472 | 0 | return; |
473 | 0 | } |
474 | | |
475 | | // Go to the beginning and then layout forwards |
476 | 0 | StringEntry *cursor = entry; |
477 | 0 | while (cursor->prev_) { |
478 | 0 | cursor = cursor->prev_; |
479 | 0 | } |
480 | 0 | assert(cursor->overlapAmount_ == 0 && "Cannot have overlap with no prev"); |
481 | 0 | while (cursor) { |
482 | 0 | const auto &str = cursor->chars_; |
483 | 0 | assert( |
484 | 0 | cursor->offsetInStorage_ == npos && |
485 | 0 | "Cannot have already laid out an entry in this path"); |
486 | 0 | assert( |
487 | 0 | cursor->overlapAmount_ <= str.size() && |
488 | 0 | "overlap cannot be larger than string size"); |
489 | 0 | assert( |
490 | 0 | cursor->overlapAmount_ <= storage->size() && |
491 | 0 | "overlap cannot exceed the storage laid out so far "); |
492 | 0 | cursor->offsetInStorage_ = storage->size() - cursor->overlapAmount_; |
493 | 0 | storage->insert( |
494 | 0 | storage->end(), str.begin() + cursor->overlapAmount_, str.end()); |
495 | 0 | cursor = cursor->next_; |
496 | 0 | } |
497 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::layoutIfNeeded((anonymous namespace)::StringPacker<unsigned char>::StringEntry*, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >*) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::layoutIfNeeded((anonymous namespace)::StringPacker<char16_t>::StringEntry*, std::__1::vector<char16_t, std::__1::allocator<char16_t> >*) |
498 | | |
499 | | /// Packs \p strings in an optimized way. |
500 | | /// \return a storage blob, with the property that each entry appears at its |
501 | | /// offsetInStorage_ within the blob. |
502 | | static std::vector<CharT> optimizingPackStrings( |
503 | 0 | MutableArrayRef<StringEntry> strings) { |
504 | 0 | auto prefixSet = buildPrefixTrigramSet(strings); |
505 | 0 | auto suffixes = buildSuffixArray(strings, prefixSet); |
506 | 0 | auto overlaps = computeOverlapsAndParents(strings, suffixes); |
507 | 0 | planLayout(overlaps); |
508 | 0 | std::vector<CharT> storage; |
509 | 0 | for (StringEntry &entry : strings) { |
510 | 0 | layoutIfNeeded(&entry, &storage); |
511 | 0 | } |
512 | 0 | return storage; // efficient move |
513 | 0 | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::optimizingPackStrings(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<unsigned char>::StringEntry>) Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::optimizingPackStrings(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<char16_t>::StringEntry>) |
514 | | |
515 | | /// Packs \p strings naively, in their original order. |
516 | | /// \return a storage blob with the property that each string appears at its |
517 | | /// offsetInStorage_ within the blob. |
518 | | static std::vector<CharT> fastPackStrings( |
519 | 2.34k | MutableArrayRef<StringEntry> strings) { |
520 | 2.34k | std::vector<CharT> result; |
521 | 2.34k | for (StringEntry &str : strings) { |
522 | 2.21k | str.offsetInStorage_ = result.size(); |
523 | 2.21k | result.insert(result.end(), str.chars_.begin(), str.chars_.end()); |
524 | 2.21k | } |
525 | | // Note efficient move-semantics on return. |
526 | 2.34k | return result; |
527 | 2.34k | } ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::fastPackStrings(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<unsigned char>::StringEntry>) Line | Count | Source | 519 | 1.17k | MutableArrayRef<StringEntry> strings) { | 520 | 1.17k | std::vector<CharT> result; | 521 | 2.00k | for (StringEntry &str : strings) { | 522 | 2.00k | str.offsetInStorage_ = result.size(); | 523 | 2.00k | result.insert(result.end(), str.chars_.begin(), str.chars_.end()); | 524 | 2.00k | } | 525 | | // Note efficient move-semantics on return. | 526 | 1.17k | return result; | 527 | 1.17k | } |
ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::fastPackStrings(llvh::MutableArrayRef<(anonymous namespace)::StringPacker<char16_t>::StringEntry>) Line | Count | Source | 519 | 1.17k | MutableArrayRef<StringEntry> strings) { | 520 | 1.17k | std::vector<CharT> result; | 521 | 1.17k | for (StringEntry &str : strings) { | 522 | 219 | str.offsetInStorage_ = result.size(); | 523 | 219 | result.insert(result.end(), str.chars_.begin(), str.chars_.end()); | 524 | 219 | } | 525 | | // Note efficient move-semantics on return. | 526 | 1.17k | return result; | 527 | 1.17k | } |
|
528 | | |
529 | | #ifndef NDEBUG |
530 | | /// Validate a string packing. asserts that each string in \p strings does |
531 | | /// indeed appear at its claimed offset within \p storage. |
532 | | static void validateStringPacking( |
533 | | ArrayRef<StringEntry> strings, |
534 | 2.34k | ArrayRef<CharT> storage) { |
535 | 2.34k | for (const auto &entry : strings) { |
536 | 2.21k | auto offset = entry.offsetInStorage_; |
537 | 2.21k | auto size = entry.chars_.size(); |
538 | 2.21k | assert( |
539 | 2.21k | offset + size >= offset && offset + size <= storage.size() && |
540 | 2.21k | "Invalid offset or size for string entry"); |
541 | 2.21k | assert( |
542 | 2.21k | entry.chars_ == storage.slice(offset, size) && |
543 | 2.21k | "String does not appear at claimed offset in storage"); |
544 | 2.21k | } |
545 | 2.34k | } ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<unsigned char>::validateStringPacking(llvh::ArrayRef<(anonymous namespace)::StringPacker<unsigned char>::StringEntry>, llvh::ArrayRef<unsigned char>) Line | Count | Source | 534 | 1.17k | ArrayRef<CharT> storage) { | 535 | 2.00k | for (const auto &entry : strings) { | 536 | 2.00k | auto offset = entry.offsetInStorage_; | 537 | 2.00k | auto size = entry.chars_.size(); | 538 | 2.00k | assert( | 539 | 2.00k | offset + size >= offset && offset + size <= storage.size() && | 540 | 2.00k | "Invalid offset or size for string entry"); | 541 | 2.00k | assert( | 542 | 2.00k | entry.chars_ == storage.slice(offset, size) && | 543 | 2.00k | "String does not appear at claimed offset in storage"); | 544 | 2.00k | } | 545 | 1.17k | } |
ConsecutiveStringStorage.cpp:(anonymous namespace)::StringPacker<char16_t>::validateStringPacking(llvh::ArrayRef<(anonymous namespace)::StringPacker<char16_t>::StringEntry>, llvh::ArrayRef<char16_t>) Line | Count | Source | 534 | 1.17k | ArrayRef<CharT> storage) { | 535 | 1.17k | for (const auto &entry : strings) { | 536 | 219 | auto offset = entry.offsetInStorage_; | 537 | 219 | auto size = entry.chars_.size(); | 538 | 219 | assert( | 539 | 219 | offset + size >= offset && offset + size <= storage.size() && | 540 | 219 | "Invalid offset or size for string entry"); | 541 | 219 | assert( | 542 | 219 | entry.chars_ == storage.slice(offset, size) && | 543 | 219 | "String does not appear at claimed offset in storage"); | 544 | 219 | } | 545 | 1.17k | } |
|
546 | | #endif |
547 | | }; |
548 | | |
549 | | /// Class responsible for building a string table and associated storage blob. |
550 | | /// This is constructed from a list of StringRefs. Strings which are in ASCII |
551 | | /// are separated from strings that require a Unicode representation. |
552 | | /// Internally we represent strings as StringEntries, which associates some |
553 | | /// metadata about the string for use by the StringPacker. |
554 | | class StringTableBuilder { |
555 | | private: |
556 | | /// We work in ArrayRefs, but we need something to own the storage for our u16 |
557 | | /// strings. Each u16 string is owned by a vector<char16_t>. |
558 | | /// Note this cannot be a vector-of-vectors because vector invalidates |
559 | | /// its contents (i.e. strings are moved). deque and list will both work since |
560 | | /// push_back() is guaranteed to not invalidate references to the elements. |
561 | | std::deque<std::vector<char16_t>> u16StringStorage_; |
562 | | |
563 | | public: |
564 | | // Entries which are in ASCII. |
565 | | std::vector<StringPacker<unsigned char>::StringEntry> asciiStrings_; |
566 | | |
567 | | // Entries which contain non-ASCII characters. |
568 | | std::vector<StringPacker<char16_t>::StringEntry> u16Strings_; |
569 | | |
570 | | /// Constructs from a list of strings, given as a pair of iterators: begin |
571 | | /// and end. Note that we do not always copy the underlying string data so |
572 | | /// the resulting builder must not outlive these strings. In delta |
573 | | /// optimizing mode, only new strings are added here and packed. |
574 | | template <typename I, typename Force8Bit> |
575 | 1.17k | StringTableBuilder(I begin, I end, Force8Bit) { |
576 | | // Generate and store a StringEntry for each string. |
577 | | // Remember the index of each string in our StringEntry, so that we can |
578 | | // later output the table in the correct order. |
579 | | // Place the entry in our ASCII entries if possible; otherwise we have to |
580 | | // convert to u16. |
581 | | // In delta optimizing mode, the ids of new strings in the string table |
582 | | // are adjusted to reflect that they appear after the old strings. |
583 | 1.17k | uint32_t index = 0; |
584 | 3.38k | for (auto it = begin; it != end; ++it) { |
585 | 2.21k | auto &str = *it; |
586 | 2.21k | static_assert(sizeof(str.data()[0]) == 1, "strings must be UTF8"); |
587 | 2.21k | const unsigned char *begin = (const unsigned char *)str.data(); |
588 | 2.21k | const unsigned char *end = begin + str.size(); |
589 | 2.21k | if (Force8Bit::value || isAllASCII(begin, end)) { |
590 | 2.00k | ArrayRef<unsigned char> astr(begin, end); |
591 | 2.00k | asciiStrings_.emplace_back(index, astr); |
592 | 2.00k | } else { |
593 | 219 | u16StringStorage_.emplace_back(); |
594 | 219 | std::vector<char16_t> &ustr = u16StringStorage_.back(); |
595 | 219 | convertUTF8WithSurrogatesToUTF16( |
596 | 219 | std::back_inserter(ustr), (const char *)begin, (const char *)end); |
597 | 219 | u16Strings_.emplace_back(index, ustr); |
598 | 219 | } |
599 | 2.21k | index++; |
600 | 2.21k | } |
601 | 1.17k | } Unexecuted instantiation: ConsecutiveStringStorage.cpp:(anonymous namespace)::StringTableBuilder::StringTableBuilder<std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, false> >(std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, false>) ConsecutiveStringStorage.cpp:(anonymous namespace)::StringTableBuilder::StringTableBuilder<std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >**, long, 170l>, std::__1::integral_constant<bool, false> >(std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >**, long, 170l>, std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >**, long, 170l>, std::__1::integral_constant<bool, false>) Line | Count | Source | 575 | 234 | StringTableBuilder(I begin, I end, Force8Bit) { | 576 | | // Generate and store a StringEntry for each string. | 577 | | // Remember the index of each string in our StringEntry, so that we can | 578 | | // later output the table in the correct order. | 579 | | // Place the entry in our ASCII entries if possible; otherwise we have to | 580 | | // convert to u16. | 581 | | // In delta optimizing mode, the ids of new strings in the string table | 582 | | // are adjusted to reflect that they appear after the old strings. | 583 | 234 | uint32_t index = 0; | 584 | 470 | for (auto it = begin; it != end; ++it) { | 585 | 236 | auto &str = *it; | 586 | 236 | static_assert(sizeof(str.data()[0]) == 1, "strings must be UTF8"); | 587 | 236 | const unsigned char *begin = (const unsigned char *)str.data(); | 588 | 236 | const unsigned char *end = begin + str.size(); | 589 | 236 | if (Force8Bit::value || isAllASCII(begin, end)) { | 590 | 234 | ArrayRef<unsigned char> astr(begin, end); | 591 | 234 | asciiStrings_.emplace_back(index, astr); | 592 | 234 | } else { | 593 | 2 | u16StringStorage_.emplace_back(); | 594 | 2 | std::vector<char16_t> &ustr = u16StringStorage_.back(); | 595 | 2 | convertUTF8WithSurrogatesToUTF16( | 596 | 2 | std::back_inserter(ustr), (const char *)begin, (const char *)end); | 597 | 2 | u16Strings_.emplace_back(index, ustr); | 598 | 2 | } | 599 | 236 | index++; | 600 | 236 | } | 601 | 234 | } |
ConsecutiveStringStorage.cpp:(anonymous namespace)::StringTableBuilder::StringTableBuilder<llvh::StringRef const*, std::__1::integral_constant<bool, false> >(llvh::StringRef const*, llvh::StringRef const*, std::__1::integral_constant<bool, false>) Line | Count | Source | 575 | 234 | StringTableBuilder(I begin, I end, Force8Bit) { | 576 | | // Generate and store a StringEntry for each string. | 577 | | // Remember the index of each string in our StringEntry, so that we can | 578 | | // later output the table in the correct order. | 579 | | // Place the entry in our ASCII entries if possible; otherwise we have to | 580 | | // convert to u16. | 581 | | // In delta optimizing mode, the ids of new strings in the string table | 582 | | // are adjusted to reflect that they appear after the old strings. | 583 | 234 | uint32_t index = 0; | 584 | 2.21k | for (auto it = begin; it != end; ++it) { | 585 | 1.97k | auto &str = *it; | 586 | 1.97k | static_assert(sizeof(str.data()[0]) == 1, "strings must be UTF8"); | 587 | 1.97k | const unsigned char *begin = (const unsigned char *)str.data(); | 588 | 1.97k | const unsigned char *end = begin + str.size(); | 589 | 1.97k | if (Force8Bit::value || isAllASCII(begin, end)) { | 590 | 1.76k | ArrayRef<unsigned char> astr(begin, end); | 591 | 1.76k | asciiStrings_.emplace_back(index, astr); | 592 | 1.76k | } else { | 593 | 217 | u16StringStorage_.emplace_back(); | 594 | 217 | std::vector<char16_t> &ustr = u16StringStorage_.back(); | 595 | 217 | convertUTF8WithSurrogatesToUTF16( | 596 | 217 | std::back_inserter(ustr), (const char *)begin, (const char *)end); | 597 | 217 | u16Strings_.emplace_back(index, ustr); | 598 | 217 | } | 599 | 1.97k | index++; | 600 | 1.97k | } | 601 | 234 | } |
ConsecutiveStringStorage.cpp:(anonymous namespace)::StringTableBuilder::StringTableBuilder<std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, true> >(std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, true>) Line | Count | Source | 575 | 702 | StringTableBuilder(I begin, I end, Force8Bit) { | 576 | | // Generate and store a StringEntry for each string. | 577 | | // Remember the index of each string in our StringEntry, so that we can | 578 | | // later output the table in the correct order. | 579 | | // Place the entry in our ASCII entries if possible; otherwise we have to | 580 | | // convert to u16. | 581 | | // In delta optimizing mode, the ids of new strings in the string table | 582 | | // are adjusted to reflect that they appear after the old strings. | 583 | 702 | uint32_t index = 0; | 584 | 708 | for (auto it = begin; it != end; ++it) { | 585 | 6 | auto &str = *it; | 586 | 6 | static_assert(sizeof(str.data()[0]) == 1, "strings must be UTF8"); | 587 | 6 | const unsigned char *begin = (const unsigned char *)str.data(); | 588 | 6 | const unsigned char *end = begin + str.size(); | 589 | 6 | if (Force8Bit::value || isAllASCII(begin, end)) { | 590 | 6 | ArrayRef<unsigned char> astr(begin, end); | 591 | 6 | asciiStrings_.emplace_back(index, astr); | 592 | 6 | } else { | 593 | 0 | u16StringStorage_.emplace_back(); | 594 | 0 | std::vector<char16_t> &ustr = u16StringStorage_.back(); | 595 | 0 | convertUTF8WithSurrogatesToUTF16( | 596 | 0 | std::back_inserter(ustr), (const char *)begin, (const char *)end); | 597 | 0 | u16Strings_.emplace_back(index, ustr); | 598 | 0 | } | 599 | 6 | index++; | 600 | 6 | } | 601 | 702 | } |
|
602 | | |
603 | | /// \return the size our strings would occupy with no packing applied. |
604 | 1.17k | size_t unpackedSize() const { |
605 | 1.17k | size_t result = 0; |
606 | 2.00k | for (const auto &entry : asciiStrings_) { |
607 | 2.00k | result += sizeof(char) * entry.chars_.size(); |
608 | 2.00k | } |
609 | 1.17k | for (const auto &entry : u16Strings_) { |
610 | 219 | result += sizeof(char16_t) * entry.chars_.size(); |
611 | 219 | } |
612 | 1.17k | return result; |
613 | 1.17k | } |
614 | | |
615 | | /// "Pack" our strings into the given storages. This means setting the |
616 | | /// offsetInStorage_ field of each string entry, and then generating a |
617 | | /// corresponding storage blob such that each string is at that offset in the |
618 | | /// blob. Returns packed \p asciiStorage and \p u16Storage, by reference. |
619 | | /// If \p optimize is set, attempt to find a more efficient packing by reusing |
620 | | /// substrings and prefix-suffix overlaps. |
621 | | void packIntoStorage( |
622 | | std::vector<unsigned char> *asciiStorage, |
623 | | std::vector<char16_t> *u16Storage, |
624 | 1.17k | bool optimize) { |
625 | 1.17k | NamedRegionTimer timer( |
626 | 1.17k | "", |
627 | 1.17k | optimize ? "optimizingPackStrings" : "fastPackStrings", |
628 | 1.17k | "", |
629 | 1.17k | StringTableTimersName, |
630 | 1.17k | AreStatisticsEnabled()); |
631 | | // Note these assignments use efficient move-assignment, not copying. |
632 | 1.17k | if (optimize) { |
633 | 0 | *asciiStorage = |
634 | 0 | StringPacker<unsigned char>::optimizingPackStrings(asciiStrings_); |
635 | 0 | *u16Storage = StringPacker<char16_t>::optimizingPackStrings(u16Strings_); |
636 | 1.17k | } else { |
637 | 1.17k | *asciiStorage = |
638 | 1.17k | StringPacker<unsigned char>::fastPackStrings(asciiStrings_); |
639 | 1.17k | *u16Storage = StringPacker<char16_t>::fastPackStrings(u16Strings_); |
640 | 1.17k | } |
641 | | |
642 | 1.17k | #ifndef NDEBUG |
643 | | // Ensure that our packing was correct. |
644 | 1.17k | StringPacker<unsigned char>::validateStringPacking( |
645 | 1.17k | asciiStrings_, *asciiStorage); |
646 | 1.17k | StringPacker<char16_t>::validateStringPacking(u16Strings_, *u16Storage); |
647 | 1.17k | #endif |
648 | 1.17k | } |
649 | | |
650 | | /// Given that our strings have been packed into some storage, builds a string |
651 | | /// table from our stored string entries. |
652 | | /// \return a string table representing the offset and length of each string. |
653 | | /// Offsets are taken from the offsetInStorage_ property of each string entry. |
654 | | /// u16 string offsets are adjusted by \p u16OffsetAdjust and have their size |
655 | | /// negated to indicate they are Unicode. If a string is in the |
656 | | /// \p identifiers, we also mark the second most significant bit. |
657 | | std::vector<StringTableEntry> generateStringTable( |
658 | | ArrayRef<unsigned char> storage, |
659 | 1.17k | size_t u16OffsetAdjust) { |
660 | | // Each of our StringEntries remembers its original index in the initial |
661 | | // array. Create a table large enough, and set the corresponding index in |
662 | | // the table. |
663 | 1.17k | std::vector<StringTableEntry> table; |
664 | 1.17k | table.resize(asciiStrings_.size() + u16Strings_.size()); |
665 | | |
666 | 2.00k | for (const auto &asciiStr : asciiStrings_) { |
667 | 2.00k | table.at(asciiStr.stringID_) = { |
668 | 2.00k | static_cast<uint32_t>(asciiStr.offsetInStorage_), |
669 | 2.00k | static_cast<uint32_t>(asciiStr.chars_.size()), |
670 | 2.00k | false /* isUTF16 */}; |
671 | 2.00k | } |
672 | 1.17k | for (const auto &u16Str : u16Strings_) { |
673 | 219 | table.at(u16Str.stringID_) = { |
674 | 219 | static_cast<uint32_t>( |
675 | 219 | u16Str.offsetInStorage_ * sizeof(char16_t) + u16OffsetAdjust), |
676 | 219 | static_cast<uint32_t>(u16Str.chars_.size()), |
677 | 219 | true /* isUTF16 */}; |
678 | 219 | } |
679 | 1.17k | return table; |
680 | 1.17k | } |
681 | | |
682 | | /// Appends \p u16Storage to the given byte array \p output. |
683 | | /// \return the offset of the u16 storage in that byte array. |
684 | | static size_t appendU16Storage( |
685 | | ArrayRef<char16_t> u16Storage, |
686 | 1.17k | std::vector<unsigned char> *output) { |
687 | 1.17k | using namespace llvh::support; |
688 | 1.17k | static_assert(sizeof(char16_t) == 2, "sizeof char16_t unexpectedly not 2"); |
689 | 1.17k | if (u16Storage.empty()) { |
690 | | // Nothing to do, don't even bother aligning. |
691 | 1.14k | return 0; |
692 | 1.14k | } |
693 | | |
694 | | // Ensure 2-byte alignment. |
695 | 29 | if (output->size() % sizeof(char16_t)) { |
696 | 13 | output->push_back('\0'); |
697 | 13 | } |
698 | | |
699 | | // Make space, and write as little endian. |
700 | 29 | size_t offset = output->size(); |
701 | 29 | output->resize(output->size() + sizeof(char16_t) * u16Storage.size()); |
702 | 29 | unsigned char *cursor = &output->at(offset); |
703 | 2.80M | for (char16_t s : u16Storage) { |
704 | 2.80M | endian::write<char16_t, little, 0>(cursor, s); |
705 | 2.80M | cursor += sizeof(s); |
706 | 2.80M | } |
707 | 29 | return offset; |
708 | 1.17k | } |
709 | | }; |
710 | | |
711 | | } // namespace |
712 | | |
713 | | namespace hermes { |
714 | | namespace hbc { |
715 | | |
716 | | template <typename I, typename Force8Bit> |
717 | | ConsecutiveStringStorage::ConsecutiveStringStorage( |
718 | | I begin, |
719 | | I end, |
720 | | Force8Bit, |
721 | 1.17k | bool optimize) { |
722 | | // Prepare to build our string table. |
723 | | // Generate storage for our ASCII and u16 strings. |
724 | 1.17k | StringTableBuilder builder(begin, end, Force8Bit{}); |
725 | 1.17k | std::vector<unsigned char> asciiStorage; |
726 | 1.17k | std::vector<char16_t> u16Storage; |
727 | 1.17k | builder.packIntoStorage(&asciiStorage, &u16Storage, optimize); |
728 | | |
729 | | // Append the u16 storage to the ASCII storage, to form our combined storage. |
730 | 1.17k | storage_.insert(storage_.end(), asciiStorage.begin(), asciiStorage.end()); |
731 | 1.17k | auto u16Offset = StringTableBuilder::appendU16Storage(u16Storage, &storage_); |
732 | | |
733 | | // Build our table over the storage. |
734 | 1.17k | strTable_ = builder.generateStringTable(storage_, u16Offset); |
735 | | |
736 | | // Record some stats. |
737 | 1.17k | size_t unpackedSize = builder.unpackedSize(); |
738 | 1.17k | StringTableSize += unpackedSize; |
739 | 1.17k | StringTableSavings += unpackedSize - storage_.size(); |
740 | 1.17k | } Unexecuted instantiation: hermes::hbc::ConsecutiveStringStorage::ConsecutiveStringStorage<std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, false> >(std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, false>, bool) hermes::hbc::ConsecutiveStringStorage::ConsecutiveStringStorage<std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >**, long, 170l>, std::__1::integral_constant<bool, false> >(std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >**, long, 170l>, std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >**, long, 170l>, std::__1::integral_constant<bool, false>, bool) Line | Count | Source | 721 | 234 | bool optimize) { | 722 | | // Prepare to build our string table. | 723 | | // Generate storage for our ASCII and u16 strings. | 724 | 234 | StringTableBuilder builder(begin, end, Force8Bit{}); | 725 | 234 | std::vector<unsigned char> asciiStorage; | 726 | 234 | std::vector<char16_t> u16Storage; | 727 | 234 | builder.packIntoStorage(&asciiStorage, &u16Storage, optimize); | 728 | | | 729 | | // Append the u16 storage to the ASCII storage, to form our combined storage. | 730 | 234 | storage_.insert(storage_.end(), asciiStorage.begin(), asciiStorage.end()); | 731 | 234 | auto u16Offset = StringTableBuilder::appendU16Storage(u16Storage, &storage_); | 732 | | | 733 | | // Build our table over the storage. | 734 | 234 | strTable_ = builder.generateStringTable(storage_, u16Offset); | 735 | | | 736 | | // Record some stats. | 737 | 234 | size_t unpackedSize = builder.unpackedSize(); | 738 | 234 | StringTableSize += unpackedSize; | 739 | 234 | StringTableSavings += unpackedSize - storage_.size(); | 740 | 234 | } |
hermes::hbc::ConsecutiveStringStorage::ConsecutiveStringStorage<llvh::StringRef const*, std::__1::integral_constant<bool, false> >(llvh::StringRef const*, llvh::StringRef const*, std::__1::integral_constant<bool, false>, bool) Line | Count | Source | 721 | 234 | bool optimize) { | 722 | | // Prepare to build our string table. | 723 | | // Generate storage for our ASCII and u16 strings. | 724 | 234 | StringTableBuilder builder(begin, end, Force8Bit{}); | 725 | 234 | std::vector<unsigned char> asciiStorage; | 726 | 234 | std::vector<char16_t> u16Storage; | 727 | 234 | builder.packIntoStorage(&asciiStorage, &u16Storage, optimize); | 728 | | | 729 | | // Append the u16 storage to the ASCII storage, to form our combined storage. | 730 | 234 | storage_.insert(storage_.end(), asciiStorage.begin(), asciiStorage.end()); | 731 | 234 | auto u16Offset = StringTableBuilder::appendU16Storage(u16Storage, &storage_); | 732 | | | 733 | | // Build our table over the storage. | 734 | 234 | strTable_ = builder.generateStringTable(storage_, u16Offset); | 735 | | | 736 | | // Record some stats. | 737 | 234 | size_t unpackedSize = builder.unpackedSize(); | 738 | 234 | StringTableSize += unpackedSize; | 739 | 234 | StringTableSavings += unpackedSize - storage_.size(); | 740 | 234 | } |
hermes::hbc::ConsecutiveStringStorage::ConsecutiveStringStorage<std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, true> >(std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::__deque_iterator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const* const*, long, 170l>, std::__1::integral_constant<bool, true>, bool) Line | Count | Source | 721 | 702 | bool optimize) { | 722 | | // Prepare to build our string table. | 723 | | // Generate storage for our ASCII and u16 strings. | 724 | 702 | StringTableBuilder builder(begin, end, Force8Bit{}); | 725 | 702 | std::vector<unsigned char> asciiStorage; | 726 | 702 | std::vector<char16_t> u16Storage; | 727 | 702 | builder.packIntoStorage(&asciiStorage, &u16Storage, optimize); | 728 | | | 729 | | // Append the u16 storage to the ASCII storage, to form our combined storage. | 730 | 702 | storage_.insert(storage_.end(), asciiStorage.begin(), asciiStorage.end()); | 731 | 702 | auto u16Offset = StringTableBuilder::appendU16Storage(u16Storage, &storage_); | 732 | | | 733 | | // Build our table over the storage. | 734 | 702 | strTable_ = builder.generateStringTable(storage_, u16Offset); | 735 | | | 736 | | // Record some stats. | 737 | 702 | size_t unpackedSize = builder.unpackedSize(); | 738 | 702 | StringTableSize += unpackedSize; | 739 | 702 | StringTableSavings += unpackedSize - storage_.size(); | 740 | 702 | } |
|
741 | | |
742 | | template ConsecutiveStringStorage::ConsecutiveStringStorage( |
743 | | StringSetVector::const_iterator begin, |
744 | | StringSetVector::const_iterator end, |
745 | | std::false_type, |
746 | | bool optimize); |
747 | | |
748 | | template ConsecutiveStringStorage::ConsecutiveStringStorage( |
749 | | StringSetVector::iterator begin, |
750 | | StringSetVector::iterator end, |
751 | | std::false_type, |
752 | | bool optimize); |
753 | | |
754 | | template ConsecutiveStringStorage::ConsecutiveStringStorage( |
755 | | ArrayRef<llvh::StringRef>::const_iterator begin, |
756 | | ArrayRef<llvh::StringRef>::const_iterator end, |
757 | | std::false_type, |
758 | | bool optimize); |
759 | | |
760 | | template ConsecutiveStringStorage::ConsecutiveStringStorage( |
761 | | StringSetVector::const_iterator begin, |
762 | | StringSetVector::const_iterator end, |
763 | | std::true_type, |
764 | | bool optimize); |
765 | | |
766 | 906 | uint32_t ConsecutiveStringStorage::getEntryHash(size_t i) const { |
767 | 906 | ensureTableValid(); |
768 | 906 | ensureStorageValid(); |
769 | | |
770 | 906 | auto &entry = strTable_[i]; |
771 | 906 | uint32_t length = entry.getLength(); |
772 | 906 | assert( |
773 | 906 | entry.getOffset() + (entry.isUTF16() ? length * 2 : length) <= |
774 | 906 | storage_.size() && |
775 | 906 | "entry past end"); |
776 | 906 | const unsigned char *data = storage_.data() + entry.getOffset(); |
777 | 906 | if (entry.isUTF16()) { |
778 | 64 | const char16_t *u16data = reinterpret_cast<const char16_t *>(data); |
779 | 64 | return hermes::hashString(ArrayRef<char16_t>{u16data, length}); |
780 | 842 | } else { |
781 | 842 | return hermes::hashString(ArrayRef<char>{(const char *)data, length}); |
782 | 842 | } |
783 | 906 | } |
784 | | |
785 | 234 | void ConsecutiveStringStorage::appendStorage(ConsecutiveStringStorage &&rhs) { |
786 | 234 | ensureTableValid(); |
787 | 234 | ensureStorageValid(); |
788 | | // If we have not yet been written, just acquire the rhs. |
789 | 234 | if (strTable_.empty()) { |
790 | 234 | *this = std::move(rhs); |
791 | 234 | return; |
792 | 234 | } |
793 | | // Offset incoming string entries by the size of our storage, and append the |
794 | | // incoming storage. Don't bother to offset if the string is empty; this |
795 | | // ensures that the empty string doesn't get pushed to strange places. |
796 | 0 | uint32_t storageDelta = storage_.size(); |
797 | 0 | strTable_.reserve(strTable_.size() + rhs.strTable_.size()); |
798 | 0 | for (const StringTableEntry &entry : rhs.strTable_) { |
799 | 0 | uint32_t length = entry.getLength(); |
800 | 0 | uint32_t offset = entry.getOffset() + (length ? storageDelta : 0); |
801 | 0 | strTable_.emplace_back(offset, length, entry.isUTF16()); |
802 | 0 | } |
803 | 0 | storage_.insert(storage_.end(), rhs.storage_.begin(), rhs.storage_.end()); |
804 | 0 | } |
805 | | |
806 | | llvh::StringRef ConsecutiveStringStorage::getStringAtIndex( |
807 | | uint32_t idx, |
808 | 1.97k | std::string &utf8ConversionStorage) const { |
809 | 1.97k | ensureTableValid(); |
810 | 1.97k | ensureStorageValid(); |
811 | 1.97k | assert(idx < strTable_.size() && "getStringAtIndex: invalid index"); |
812 | 1.97k | return getStringFromEntry(strTable_[idx], storage_, utf8ConversionStorage); |
813 | 1.97k | } |
814 | | |
815 | | llvh::StringRef getStringFromEntry( |
816 | | const StringTableEntry &entry, |
817 | | llvh::ArrayRef<unsigned char> storage, |
818 | 1.99k | std::string &utf8ConversionStorage) { |
819 | 1.99k | uint32_t offset = entry.getOffset(); |
820 | 1.99k | uint32_t length = entry.getLength(); |
821 | 1.99k | assert( |
822 | 1.99k | offset + length <= storage.size() && offset + length >= offset && |
823 | 1.99k | "Invalid entry"); |
824 | 1.99k | if (!entry.isUTF16()) { |
825 | 1.78k | return llvh::StringRef{(const char *)storage.data() + offset, length}; |
826 | 1.78k | } else { |
827 | 217 | const char16_t *s = |
828 | 217 | reinterpret_cast<const char16_t *>(storage.data() + offset); |
829 | 217 | llvh::ArrayRef<char16_t> u16String(s, length); |
830 | 217 | convertUTF16ToUTF8WithSingleSurrogates(utf8ConversionStorage, u16String); |
831 | 217 | return utf8ConversionStorage; |
832 | 217 | } |
833 | 1.99k | } |
834 | | |
835 | | } // namespace hbc |
836 | | } // namespace hermes |
837 | | |
838 | | #undef DEBUG_TYPE |