/proc/self/cwd/source/common/stats/symbol_table.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/common/stats/symbol_table.h" |
2 | | |
3 | | #include <algorithm> |
4 | | #include <iostream> |
5 | | #include <memory> |
6 | | #include <vector> |
7 | | |
8 | | #include "source/common/common/assert.h" |
9 | | #include "source/common/common/logger.h" |
10 | | #include "source/common/common/utility.h" |
11 | | |
12 | | #include "absl/strings/str_cat.h" |
13 | | |
14 | | namespace Envoy { |
15 | | namespace Stats { |
16 | | |
17 | | // Masks used for variable-length encoding of arbitrary-sized integers into a |
18 | | // uint8-array. The integers are typically small, so we try to store them in as |
19 | | // few bytes as possible. The bottom 7 bits hold values, and the top bit is used |
20 | | // to determine whether another byte is needed for more data. |
21 | | static constexpr uint32_t SpilloverMask = 0x80; |
22 | | static constexpr uint32_t Low7Bits = 0x7f; |
23 | | |
24 | | // When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol |
25 | | // that will decode into uint8_t array starting (and ending) with {0}. Thus we |
26 | | // can use a leading 0 in the first byte to indicate that what follows is |
27 | | // literal char data, rather than symbol-table entries. Literal char data is |
28 | | // used for dynamically discovered stat-name tokens where you don't want to take |
29 | | // a symbol table lock, and would rather pay extra memory overhead to store the |
30 | | // tokens as fully elaborated strings. |
31 | | static constexpr Symbol FirstValidSymbol = 1; |
32 | | static constexpr uint8_t LiteralStringIndicator = 0; |
33 | | |
34 | 4.40G | size_t StatName::dataSize() const { |
35 | 4.40G | if (size_and_data_ == nullptr) { |
36 | 75.3M | return 0; |
37 | 75.3M | } |
38 | 4.32G | return SymbolTable::Encoding::decodeNumber(size_and_data_).first; |
39 | 4.40G | } |
40 | | |
41 | | #ifndef ENVOY_CONFIG_COVERAGE |
42 | 0 | void StatName::debugPrint() { |
43 | | // TODO(jmarantz): capture this functionality (always prints regardless of |
44 | | // loglevel) in an ENVOY_LOG macro variant or similar, perhaps |
45 | | // ENVOY_LOG_MISC(stderr, ...); |
46 | 0 | if (size_and_data_ == nullptr) { |
47 | 0 | std::cerr << "Null StatName" << std::endl; |
48 | 0 | } else { |
49 | 0 | const size_t nbytes = dataSize(); |
50 | 0 | std::cerr << "dataSize=" << nbytes << ":"; |
51 | 0 | for (size_t i = 0; i < nbytes; ++i) { |
52 | 0 | std::cerr << " " << static_cast<uint64_t>(data()[i]); |
53 | 0 | } |
54 | 0 | const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this); |
55 | 0 | std::cerr << ", numSymbols=" << encoding.size() << ":"; |
56 | 0 | for (Symbol symbol : encoding) { |
57 | 0 | std::cerr << " " << symbol; |
58 | 0 | } |
59 | 0 | std::cerr << std::endl; |
60 | 0 | } |
61 | 0 | } |
62 | | #endif |
63 | | |
64 | 85.8M | SymbolTable::Encoding::~Encoding() { |
65 | | // Verifies that moveToMemBlock() was called on this encoding. Failure |
66 | | // to call moveToMemBlock() will result in leaks symbols. |
67 | 85.8M | ASSERT(mem_block_.capacity() == 0); |
68 | 85.8M | } |
69 | | |
70 | 2.45G | size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) { |
71 | 2.45G | size_t num_bytes = 0; |
72 | 2.56G | do { |
73 | 2.56G | ++num_bytes; |
74 | 2.56G | number >>= 7; |
75 | 2.56G | } while (number != 0); |
76 | 2.45G | return num_bytes; |
77 | 2.45G | } |
78 | | |
79 | 505M | void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) { |
80 | | // UTF-8-like encoding where a value 127 or less gets written as a single |
81 | | // byte. For higher values we write the low-order 7 bits with a 1 in |
82 | | // the high-order bit. Then we right-shift 7 bits and keep adding more bytes |
83 | | // until we have consumed all the non-zero bits in symbol. |
84 | | // |
85 | | // When decoding, we stop consuming uint8_t when we see a uint8_t with |
86 | | // high-order bit 0. |
87 | 607M | do { |
88 | 607M | if (number < (1 << 7)) { |
89 | 505M | mem_block.appendOne(number); // number <= 127 gets encoded in one byte. |
90 | 505M | } else { |
91 | 102M | mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes. |
92 | 102M | } |
93 | 607M | number >>= 7; |
94 | 607M | } while (number != 0); |
95 | 505M | } |
96 | | |
97 | 81.3M | void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) { |
98 | 81.3M | ASSERT(data_bytes_required_ == 0); |
99 | 224M | for (Symbol symbol : symbols) { |
100 | 224M | data_bytes_required_ += encodingSizeBytes(symbol); |
101 | 224M | } |
102 | 81.3M | mem_block_.setCapacity(data_bytes_required_); |
103 | 224M | for (Symbol symbol : symbols) { |
104 | 224M | appendEncoding(symbol, mem_block_); |
105 | 224M | } |
106 | 81.3M | } |
107 | | |
108 | 5.93G | std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) { |
109 | 5.93G | uint64_t number = 0; |
110 | 5.93G | uint64_t uc = SpilloverMask; |
111 | 5.93G | const uint8_t* start = encoding; |
112 | 12.4G | for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) { |
113 | 6.55G | uc = static_cast<uint32_t>(*encoding); |
114 | 6.55G | number |= (uc & Low7Bits) << shift; |
115 | 6.55G | } |
116 | 5.93G | return std::make_pair(number, encoding - start); |
117 | 5.93G | } |
118 | | |
119 | 309M | SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) { |
120 | 309M | SymbolVec symbol_vec; |
121 | 309M | symbol_vec.reserve(stat_name.dataSize()); |
122 | 309M | decodeTokens( |
123 | 923M | stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); }, |
124 | 309M | [](absl::string_view) {}); |
125 | 309M | return symbol_vec; |
126 | 309M | } |
127 | | |
128 | | // Iterator for decoding StatName tokens. This is not an STL-compatible |
129 | | // iterator; it is used only locally. Using this rather than decodeSymbols makes |
130 | | // it easier to early-exit out of a comparison once we know the answer. For |
131 | | // example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order |
132 | | // when you get to the "b" vs "x" and do not need to decode "c" vs "y". |
133 | | // |
134 | | // This is also helpful for prefix-matching. |
135 | | // |
136 | | // The array-version of decodeSymbols is still a good approach for converting a |
137 | | // StatName to a string as the intermediate array of string_view is needed to |
138 | | // allocate the right size for the joined string. |
139 | | class SymbolTable::Encoding::TokenIter { |
140 | | public: |
141 | | // The type of token reached. |
142 | | enum class TokenType { StringView, Symbol, End }; |
143 | | |
144 | 368M | TokenIter(StatName stat_name) { |
145 | 368M | const uint8_t* raw_data = stat_name.dataIncludingSize(); |
146 | 368M | if (raw_data == nullptr) { |
147 | 10.9M | size_ = 0; |
148 | 10.9M | array_ = nullptr; |
149 | 357M | } else { |
150 | 357M | std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data); |
151 | 357M | size_ = size_consumed.first; |
152 | 357M | array_ = raw_data + size_consumed.second; |
153 | 357M | } |
154 | 368M | } |
155 | | |
156 | | /** |
157 | | * Parses the next token. The token can then be retrieved by calling |
158 | | * stringView() or symbol(). |
159 | | * @return The type of token, TokenType::End if we have reached the end of the StatName. |
160 | | */ |
161 | 1.61G | TokenType next() { |
162 | 1.61G | if (size_ == 0) { |
163 | 368M | return TokenType::End; |
164 | 368M | } |
165 | 1.24G | if (*array_ == LiteralStringIndicator) { |
166 | | // To avoid scanning memory to find the literal size during decode, we |
167 | | // var-length encode the size of the literal string prior to the data. |
168 | 39.9M | ASSERT(size_ > 1); |
169 | 39.9M | ++array_; |
170 | 39.9M | --size_; |
171 | 39.9M | std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_); |
172 | 39.9M | uint64_t length = length_consumed.first; |
173 | 39.9M | array_ += length_consumed.second; |
174 | 39.9M | size_ -= length_consumed.second; |
175 | 39.9M | ASSERT(size_ >= length); |
176 | 39.9M | string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length); |
177 | 39.9M | size_ -= length; |
178 | 39.9M | array_ += length; |
179 | 39.9M | #ifndef NDEBUG |
180 | 39.9M | last_type_ = TokenType::StringView; |
181 | 39.9M | #endif |
182 | 39.9M | return TokenType::StringView; |
183 | 39.9M | } |
184 | 1.20G | std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_); |
185 | 1.20G | symbol_ = symbol_consumed.first; |
186 | 1.20G | size_ -= symbol_consumed.second; |
187 | 1.20G | array_ += symbol_consumed.second; |
188 | 1.20G | #ifndef NDEBUG |
189 | 1.20G | last_type_ = TokenType::Symbol; |
190 | 1.20G | #endif |
191 | 1.20G | return TokenType::Symbol; |
192 | 1.24G | } |
193 | | |
194 | | /** @return the current string_view -- only valid to call if next()==TokenType::StringView */ |
195 | 39.9M | absl::string_view stringView() const { |
196 | 39.9M | #ifndef NDEBUG |
197 | 39.9M | ASSERT(last_type_ == TokenType::StringView); |
198 | 39.9M | #endif |
199 | 39.9M | return string_view_; |
200 | 39.9M | } |
201 | | |
202 | | /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */ |
203 | 1.20G | Symbol symbol() const { |
204 | 1.20G | #ifndef NDEBUG |
205 | 1.20G | ASSERT(last_type_ == TokenType::Symbol); |
206 | 1.20G | #endif |
207 | 1.20G | return symbol_; |
208 | 1.20G | } |
209 | | |
210 | | private: |
211 | | const uint8_t* array_; |
212 | | size_t size_; |
213 | | absl::string_view string_view_; |
214 | | Symbol symbol_; |
215 | | #ifndef NDEBUG |
216 | | TokenType last_type_{TokenType::End}; |
217 | | #endif |
218 | | }; |
219 | | |
220 | | void SymbolTable::Encoding::decodeTokens( |
221 | | StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn, |
222 | 368M | const std::function<void(absl::string_view)>& string_view_token_fn) { |
223 | 368M | TokenIter iter(stat_name); |
224 | 368M | TokenIter::TokenType type; |
225 | 1.61G | while ((type = iter.next()) != TokenIter::TokenType::End) { |
226 | 1.24G | if (type == TokenIter::TokenType::StringView) { |
227 | 39.9M | string_view_token_fn(iter.stringView()); |
228 | 1.20G | } else { |
229 | 1.20G | ASSERT(type == TokenIter::TokenType::Symbol); |
230 | 1.20G | symbol_token_fn(iter.symbol()); |
231 | 1.20G | } |
232 | 1.24G | } |
233 | 368M | } |
234 | | |
235 | 0 | bool StatName::startsWith(StatName prefix) const { |
236 | 0 | using TokenIter = SymbolTable::Encoding::TokenIter; |
237 | 0 | TokenIter prefix_iter(prefix); |
238 | 0 | TokenIter this_iter(*this); |
239 | 0 | while (true) { |
240 | 0 | TokenIter::TokenType prefix_type = prefix_iter.next(); |
241 | 0 | TokenIter::TokenType this_type = this_iter.next(); |
242 | 0 | if (prefix_type == TokenIter::TokenType::End) { |
243 | 0 | return true; // "a.b.c" starts with "a.b" or "a.b.c" |
244 | 0 | } |
245 | 0 | if (this_type == TokenIter::TokenType::End) { |
246 | 0 | return false; // "a.b" does not start with "a.b.c" |
247 | 0 | } |
248 | 0 | if (prefix_type != TokenIter::TokenType::Symbol) { |
249 | | // Disallow dynamic components in the prefix. We don't have a current need |
250 | | // for prefixes to be expressed dynamically, and handling that case would |
251 | | // add complexity. In particular we'd need to take locks to decode to |
252 | | // strings. |
253 | 0 | return false; |
254 | 0 | } |
255 | 0 | if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) { |
256 | 0 | return false; |
257 | 0 | } |
258 | 0 | } |
259 | 0 | return true; // not reached |
260 | 0 | } |
261 | | |
262 | 59.4M | std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const { |
263 | 59.4M | std::vector<absl::string_view> strings; |
264 | 59.4M | Thread::LockGuard lock(lock_); |
265 | 59.4M | Encoding::decodeTokens( |
266 | 59.4M | stat_name, |
267 | 59.4M | [this, &strings](Symbol symbol) |
268 | 266M | ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); }, |
269 | 59.4M | [&strings](absl::string_view str) { strings.push_back(str); }); |
270 | 59.4M | return strings; |
271 | 59.4M | } |
272 | | |
273 | 85.8M | void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) { |
274 | 85.8M | appendEncoding(data_bytes_required_, mem_block); |
275 | 85.8M | mem_block.appendBlock(mem_block_); |
276 | 85.8M | mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. |
277 | 85.8M | } |
278 | | |
279 | | void SymbolTable::Encoding::appendToMemBlock(StatName stat_name, |
280 | 108M | MemBlockBuilder<uint8_t>& mem_block) { |
281 | 108M | const uint8_t* data = stat_name.dataIncludingSize(); |
282 | 108M | if (data == nullptr) { |
283 | 10.9M | mem_block.appendOne(0); |
284 | 97.5M | } else { |
285 | 97.5M | mem_block.appendData(absl::MakeSpan(data, stat_name.size())); |
286 | 97.5M | } |
287 | 108M | } |
288 | | |
289 | | SymbolTable::SymbolTable() |
290 | | // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro. |
291 | 157k | : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {} |
292 | | |
293 | 157k | SymbolTable::~SymbolTable() { |
294 | | // To avoid leaks into the symbol table, we expect all StatNames to be freed. |
295 | | // Note: this could potentially be short-circuited if we decide a fast exit |
296 | | // is needed in production. But it would be good to ensure clean up during |
297 | | // tests. |
298 | 157k | ASSERT(numSymbols() == 0); |
299 | 157k | } |
300 | | |
301 | | // TODO(ambuc): There is a possible performance optimization here for avoiding |
302 | | // the encoding of IPs / numbers if they appear in stat names. We don't want to |
303 | | // waste time symbolizing an integer as an integer, if we can help it. |
304 | 85.8M | void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) { |
305 | 85.8M | if (name.empty()) { |
306 | 4.54M | return; |
307 | 4.54M | } |
308 | | |
309 | | // We want to hold the lock for the minimum amount of time, so we do the |
310 | | // string-splitting and prepare a temp vector of Symbol first. |
311 | 81.3M | const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.'); |
312 | 81.3M | std::vector<Symbol> symbols; |
313 | 81.3M | symbols.reserve(tokens.size()); |
314 | | |
315 | | // Now take the lock and populate the Symbol objects, which involves bumping |
316 | | // ref-counts in this. |
317 | 81.3M | { |
318 | 81.3M | Thread::LockGuard lock(lock_); |
319 | 81.3M | recent_lookups_.lookup(name); |
320 | 224M | for (auto& token : tokens) { |
321 | | // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with |
322 | | // length below some threshold, say 4 bytes. It might be preferable not to |
323 | | // reserve Symbols for every 3 digit number found (for example) in ipv4 |
324 | | // addresses. |
325 | 224M | symbols.push_back(toSymbol(token)); |
326 | 224M | } |
327 | 81.3M | } |
328 | | |
329 | | // Now efficiently encode the array of 32-bit symbols into a uint8_t array. |
330 | 81.3M | encoding.addSymbols(symbols); |
331 | 81.3M | } |
332 | | |
333 | 157k | uint64_t SymbolTable::numSymbols() const { |
334 | 157k | Thread::LockGuard lock(lock_); |
335 | 157k | ASSERT(encode_map_.size() == decode_map_.size()); |
336 | 157k | return encode_map_.size(); |
337 | 157k | } |
338 | | |
339 | 59.4M | std::string SymbolTable::toString(const StatName& stat_name) const { |
340 | 59.4M | return absl::StrJoin(decodeStrings(stat_name), "."); |
341 | 59.4M | } |
342 | | |
343 | 111M | void SymbolTable::incRefCount(const StatName& stat_name) { |
344 | | // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. |
345 | 111M | const SymbolVec symbols = Encoding::decodeSymbols(stat_name); |
346 | | |
347 | 111M | Thread::LockGuard lock(lock_); |
348 | 349M | for (Symbol symbol : symbols) { |
349 | 349M | auto decode_search = decode_map_.find(symbol); |
350 | | |
351 | 349M | ASSERT(decode_search != decode_map_.end(), |
352 | 349M | "Please see " |
353 | 349M | "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#" |
354 | 349M | "debugging-symbol-table-assertions"); |
355 | 349M | auto encode_search = encode_map_.find(decode_search->second->toStringView()); |
356 | 349M | ASSERT(encode_search != encode_map_.end(), |
357 | 349M | "Please see " |
358 | 349M | "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#" |
359 | 349M | "debugging-symbol-table-assertions"); |
360 | | |
361 | 349M | ++encode_search->second.ref_count_; |
362 | 349M | } |
363 | 111M | } |
364 | | |
365 | 197M | void SymbolTable::free(const StatName& stat_name) { |
366 | | // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. |
367 | 197M | const SymbolVec symbols = Encoding::decodeSymbols(stat_name); |
368 | | |
369 | 197M | Thread::LockGuard lock(lock_); |
370 | 573M | for (Symbol symbol : symbols) { |
371 | 573M | auto decode_search = decode_map_.find(symbol); |
372 | 573M | ASSERT(decode_search != decode_map_.end()); |
373 | | |
374 | 573M | auto encode_search = encode_map_.find(decode_search->second->toStringView()); |
375 | 573M | ASSERT(encode_search != encode_map_.end()); |
376 | | |
377 | | // If that was the last remaining client usage of the symbol, erase the |
378 | | // current mappings and add the now-unused symbol to the reuse pool. |
379 | | // |
380 | | // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in |
381 | | // symbol_table_speed_test.cc, relative to breaking out the decrement into a |
382 | | // separate step, likely due to the non-trivial dereferences in EXPR. |
383 | 573M | if (--encode_search->second.ref_count_ == 0) { |
384 | 15.9M | decode_map_.erase(decode_search); |
385 | 15.9M | encode_map_.erase(encode_search); |
386 | 15.9M | pool_.push(symbol); |
387 | 15.9M | } |
388 | 573M | } |
389 | 197M | } |
390 | | |
391 | 6.43k | uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const { |
392 | 6.43k | uint64_t total = 0; |
393 | 6.43k | absl::flat_hash_map<std::string, uint64_t> name_count_map; |
394 | | |
395 | | // We don't want to hold lock_ while calling the iterator, but we need it to |
396 | | // access recent_lookups_, so we buffer in name_count_map. |
397 | 6.43k | { |
398 | 6.43k | Thread::LockGuard lock(lock_); |
399 | 6.43k | recent_lookups_.forEach( |
400 | 6.43k | [&name_count_map](absl::string_view str, uint64_t count) |
401 | 6.43k | ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; }); |
402 | 6.43k | total += recent_lookups_.total(); |
403 | 6.43k | } |
404 | | |
405 | | // Now we have the collated name-count map data: we need to vectorize and |
406 | | // sort. We define the pair with the count first as std::pair::operator< |
407 | | // prioritizes its first element over its second. |
408 | 6.43k | using LookupCount = std::pair<uint64_t, absl::string_view>; |
409 | 6.43k | std::vector<LookupCount> lookup_data; |
410 | 6.43k | lookup_data.reserve(name_count_map.size()); |
411 | 6.43k | for (const auto& iter : name_count_map) { |
412 | 0 | lookup_data.emplace_back(LookupCount(iter.second, iter.first)); |
413 | 0 | } |
414 | 6.43k | std::sort(lookup_data.begin(), lookup_data.end()); |
415 | 6.43k | for (const LookupCount& lookup_count : lookup_data) { |
416 | 0 | iter(lookup_count.second, lookup_count.first); |
417 | 0 | } |
418 | 6.43k | return total; |
419 | 6.43k | } |
420 | | |
421 | 821 | DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const { |
422 | 821 | DynamicSpans dynamic_spans; |
423 | | |
424 | 821 | uint32_t index = 0; |
425 | 11.8M | auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) { |
426 | 11.8M | DynamicSpan span; |
427 | 11.8M | span.first = index; |
428 | 11.8M | index += std::count(str.begin(), str.end(), '.'); |
429 | 11.8M | span.second = index; |
430 | 11.8M | ++index; |
431 | 11.8M | dynamic_spans.push_back(span); |
432 | 11.8M | }; |
433 | | |
434 | | // Use decodeTokens to suss out which components of stat_name are |
435 | | // symbolic vs dynamic. The lambda that takes a Symbol is called |
436 | | // for symbolic components. The lambda called with a string_view |
437 | | // is called for dynamic components. |
438 | | // |
439 | | // Note that with fake symbol tables, the Symbol lambda is called |
440 | | // once for each character in the string, and no dynamics will |
441 | | // be recorded. |
442 | 821 | Encoding::decodeTokens( |
443 | 15.8M | stat_name, [&index](Symbol) { ++index; }, record_dynamic); |
444 | 821 | return dynamic_spans; |
445 | 821 | } |
446 | | |
447 | 0 | void SymbolTable::setRecentLookupCapacity(uint64_t capacity) { |
448 | 0 | Thread::LockGuard lock(lock_); |
449 | 0 | recent_lookups_.setCapacity(capacity); |
450 | 0 | } |
451 | | |
452 | 0 | void SymbolTable::clearRecentLookups() { |
453 | 0 | Thread::LockGuard lock(lock_); |
454 | 0 | recent_lookups_.clear(); |
455 | 0 | } |
456 | | |
457 | 0 | uint64_t SymbolTable::recentLookupCapacity() const { |
458 | 0 | Thread::LockGuard lock(lock_); |
459 | 0 | return recent_lookups_.capacity(); |
460 | 0 | } |
461 | | |
462 | 5.97k | StatNameSetPtr SymbolTable::makeSet(absl::string_view name) { |
463 | | // make_unique does not work with private ctor, even though SymbolTable is a friend. |
464 | 5.97k | StatNameSetPtr stat_name_set(new StatNameSet(*this, name)); |
465 | 5.97k | return stat_name_set; |
466 | 5.97k | } |
467 | | |
468 | 224M | Symbol SymbolTable::toSymbol(absl::string_view sv) { |
469 | 224M | Symbol result; |
470 | 224M | auto encode_find = encode_map_.find(sv); |
471 | | // If the string segment doesn't already exist, |
472 | 224M | if (encode_find == encode_map_.end()) { |
473 | | // We create the actual string, place it in the decode_map_, and then insert |
474 | | // a string_view pointing to it in the encode_map_. This allows us to only |
475 | | // store the string once. We use unique_ptr so copies are not made as |
476 | | // flat_hash_map moves values around. |
477 | 15.9M | InlineStringPtr str = InlineString::create(sv); |
478 | 15.9M | auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)}); |
479 | 15.9M | ASSERT(encode_insert.second); |
480 | 15.9M | auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)}); |
481 | 15.9M | ASSERT(decode_insert.second); |
482 | | |
483 | 15.9M | result = next_symbol_; |
484 | 15.9M | newSymbol(); |
485 | 208M | } else { |
486 | | // If the insertion didn't take place, return the actual value at that location and up the |
487 | | // refcount at that location |
488 | 208M | result = encode_find->second.symbol_; |
489 | 208M | ++(encode_find->second.ref_count_); |
490 | 208M | } |
491 | 224M | return result; |
492 | 224M | } |
493 | | |
494 | | absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const |
495 | 266M | ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { |
496 | 266M | auto search = decode_map_.find(symbol); |
497 | 266M | RELEASE_ASSERT(search != decode_map_.end(), "no such symbol"); |
498 | 266M | return search->second->toStringView(); |
499 | 266M | } |
500 | | |
501 | 15.9M | void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { |
502 | 15.9M | if (pool_.empty()) { |
503 | 15.2M | next_symbol_ = ++monotonic_counter_; |
504 | 15.2M | } else { |
505 | 722k | next_symbol_ = pool_.top(); |
506 | 722k | pool_.pop(); |
507 | 722k | } |
508 | | // This should catch integer overflow for the new symbol. |
509 | 15.9M | ASSERT(monotonic_counter_ != 0); |
510 | 15.9M | } |
511 | | |
512 | 0 | bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { |
513 | | // Proactively take the table lock in anticipation that we'll need to |
514 | | // convert at least one symbol to a string_view, and it's easier not to |
515 | | // bother to lazily take the lock. |
516 | 0 | Thread::LockGuard lock(lock_); |
517 | 0 | return lessThanLockHeld(a, b); |
518 | 0 | } |
519 | | |
520 | | bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const |
521 | 0 | ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { |
522 | 0 | Encoding::TokenIter a_iter(a), b_iter(b); |
523 | 0 | while (true) { |
524 | 0 | Encoding::TokenIter::TokenType a_type = a_iter.next(); |
525 | 0 | Encoding::TokenIter::TokenType b_type = b_iter.next(); |
526 | 0 | if (b_type == Encoding::TokenIter::TokenType::End) { |
527 | 0 | return false; // "x.y.z" > "x.y", "x.y" == "x.y" |
528 | 0 | } |
529 | 0 | if (a_type == Encoding::TokenIter::TokenType::End) { |
530 | 0 | return true; // "x.y" < "x.y.z" |
531 | 0 | } |
532 | 0 | if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol && |
533 | 0 | a_iter.symbol() == b_iter.symbol()) { |
534 | 0 | continue; // matching symbols don't need to be decoded to strings |
535 | 0 | } |
536 | | |
537 | 0 | absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol |
538 | 0 | ? fromSymbol(a_iter.symbol()) |
539 | 0 | : a_iter.stringView(); |
540 | 0 | absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol |
541 | 0 | ? fromSymbol(b_iter.symbol()) |
542 | 0 | : b_iter.stringView(); |
543 | 0 | if (a_token != b_token) { |
544 | 0 | return a_token < b_token; |
545 | 0 | } |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | | #ifndef ENVOY_CONFIG_COVERAGE |
550 | 0 | void SymbolTable::debugPrint() const { |
551 | 0 | Thread::LockGuard lock(lock_); |
552 | 0 | std::vector<Symbol> symbols; |
553 | 0 | for (const auto& p : decode_map_) { |
554 | 0 | symbols.push_back(p.first); |
555 | 0 | } |
556 | 0 | std::sort(symbols.begin(), symbols.end()); |
557 | 0 | for (Symbol symbol : symbols) { |
558 | 0 | const InlineString& token = *decode_map_.find(symbol)->second; |
559 | 0 | const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second; |
560 | 0 | ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_); |
561 | 0 | } |
562 | 0 | } |
563 | | #endif |
564 | | |
565 | 85.8M | SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) { |
566 | 85.8M | name = StringUtil::removeTrailingCharacters(name, '.'); |
567 | 85.8M | Encoding encoding; |
568 | 85.8M | addTokensToEncoding(name, encoding); |
569 | 85.8M | MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); |
570 | 85.8M | encoding.moveToMemBlock(mem_block); |
571 | 85.8M | return mem_block.release(); |
572 | 85.8M | } |
573 | | |
574 | | StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) |
575 | 85.8M | : StatNameStorageBase(table.encode(name)) {} |
576 | | |
577 | 3.18M | StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { |
578 | 3.18M | const size_t size = src.size(); |
579 | 3.18M | MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t. |
580 | 3.18M | src.copyToMemBlock(storage); |
581 | 3.18M | setBytes(storage.release()); |
582 | 3.18M | table.incRefCount(statName()); |
583 | 3.18M | } |
584 | | |
585 | 26.4M | SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) { |
586 | 26.4M | name = StringUtil::removeTrailingCharacters(name, '.'); |
587 | | |
588 | | // For all StatName objects, we first have the total number of bytes in the |
589 | | // representation. But for inlined dynamic string StatName variants, we must |
590 | | // store the length of the payload separately, so that if this token gets |
591 | | // joined with others, we'll know much space it consumes until the next token. |
592 | | // So the layout is |
593 | | // [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ] |
594 | | // So we need to figure out how many bytes we need to represent length-of-name |
595 | | // and name. |
596 | | |
597 | | // payload_bytes is the total number of bytes needed to represent the |
598 | | // characters in name, plus their encoded size, plus the literal indicator. |
599 | 26.4M | const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1; |
600 | | |
601 | | // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and |
602 | | // the length of those. |
603 | 26.4M | const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes); |
604 | 26.4M | MemBlockBuilder<uint8_t> mem_block(total_bytes); |
605 | | |
606 | 26.4M | Encoding::appendEncoding(payload_bytes, mem_block); |
607 | 26.4M | mem_block.appendOne(LiteralStringIndicator); |
608 | 26.4M | Encoding::appendEncoding(name.size(), mem_block); |
609 | 26.4M | mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size())); |
610 | 26.4M | ASSERT(mem_block.capacityRemaining() == 0); |
611 | 26.4M | return mem_block.release(); |
612 | 26.4M | } |
613 | | |
614 | 256M | StatNameStorage::~StatNameStorage() { |
615 | | // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to |
616 | | // decrement the reference counts held by the SymbolTable on behalf of |
617 | | // this StatName. This saves 8 bytes of storage per stat, relative to |
618 | | // holding a SymbolTable& in each StatNameStorage object. |
619 | 256M | ASSERT(bytes() == nullptr); |
620 | 256M | } |
621 | | |
622 | 89.0M | void StatNameStorage::free(SymbolTable& table) { |
623 | 89.0M | table.free(statName()); |
624 | 89.0M | clear(); |
625 | 89.0M | } |
626 | | |
627 | 5.81M | void StatNamePool::clear() { |
628 | 78.8M | for (StatNameStorage& storage : storage_vector_) { |
629 | 78.8M | storage.free(symbol_table_); |
630 | 78.8M | } |
631 | 5.81M | storage_vector_.clear(); |
632 | 5.81M | } |
633 | | |
634 | 78.8M | const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) { |
635 | 78.8M | storage_vector_.push_back(Stats::StatNameStorage(str, symbol_table_)); |
636 | 78.8M | return storage_vector_.back().bytes(); |
637 | 78.8M | } |
638 | | |
639 | 78.7M | StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); } |
640 | | |
641 | 26.1M | StatName StatNameDynamicPool::add(absl::string_view str) { |
642 | 26.1M | storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_)); |
643 | 26.1M | return StatName(storage_vector_.back().bytes()); |
644 | 26.1M | } |
645 | | |
646 | 30.0k | StatNameStorageSet::~StatNameStorageSet() { |
647 | | // free() must be called before destructing StatNameStorageSet to decrement |
648 | | // references to all symbols. |
649 | 30.0k | ASSERT(hash_set_.empty()); |
650 | 30.0k | } |
651 | | |
652 | 30.0k | void StatNameStorageSet::free(SymbolTable& symbol_table) { |
653 | | // We must free() all symbols referenced in the set, otherwise the symbols |
654 | | // will leak when the flat_hash_map superclass is destructed. They cannot |
655 | | // self-destruct without an explicit free() as each individual StatNameStorage |
656 | | // object does not have a reference to the symbol table, which would waste 8 |
657 | | // bytes per stat-name. The easiest way to safely free all the contents of the |
658 | | // symbol table set is to use flat_hash_map::extract(), which removes and |
659 | | // returns an element from the set without destructing the element |
660 | | // immediately. This gives us a chance to call free() on each one before they |
661 | | // are destroyed. |
662 | | // |
663 | | // There's a performance risk here, if removing elements via |
664 | | // flat_hash_set::begin() is inefficient to use in a loop like this. One can |
665 | | // imagine a hash-table implementation where the performance of this |
666 | | // usage-model would be poor. However, tests with 100k elements appeared to |
667 | | // run quickly when compiled for optimization, so at present this is not a |
668 | | // performance issue. |
669 | | |
670 | 30.0k | while (!hash_set_.empty()) { |
671 | 0 | auto storage = hash_set_.extract(hash_set_.begin()); |
672 | 0 | storage.value().free(symbol_table); |
673 | 0 | } |
674 | 30.0k | } |
675 | | |
676 | 139M | SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const { |
677 | 139M | size_t num_bytes = 0; |
678 | 346M | for (StatName stat_name : stat_names) { |
679 | 346M | if (!stat_name.empty()) { |
680 | 196M | num_bytes += stat_name.dataSize(); |
681 | 196M | } |
682 | 346M | } |
683 | 139M | MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes)); |
684 | 139M | Encoding::appendEncoding(num_bytes, mem_block); |
685 | 346M | for (StatName stat_name : stat_names) { |
686 | 346M | stat_name.appendDataToMemBlock(mem_block); |
687 | 346M | } |
688 | 139M | ASSERT(mem_block.capacityRemaining() == 0); |
689 | 139M | return mem_block.release(); |
690 | 139M | } |
691 | | |
692 | 53.2M | void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) { |
693 | 53.2M | RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); |
694 | | |
695 | | // First encode all the names. |
696 | 53.2M | size_t total_size_bytes = 1; /* one byte for holding the number of names */ |
697 | | |
698 | 161M | for (uint32_t i = 0; i < num_names; ++i) { |
699 | 108M | total_size_bytes += names[i].size(); |
700 | 108M | } |
701 | | |
702 | | // Now allocate the exact number of bytes required and move the encodings |
703 | | // into storage. |
704 | 53.2M | MemBlockBuilder<uint8_t> mem_block(total_size_bytes); |
705 | 53.2M | mem_block.appendOne(num_names); |
706 | 161M | for (uint32_t i = 0; i < num_names; ++i) { |
707 | 108M | const StatName stat_name = names[i]; |
708 | 108M | Encoding::appendToMemBlock(stat_name, mem_block); |
709 | 108M | incRefCount(stat_name); |
710 | 108M | } |
711 | | |
712 | | // This assertion double-checks the arithmetic where we computed |
713 | | // total_size_bytes. After appending all the encoded data into the |
714 | | // allocated byte array, we should have exhausted all the memory |
715 | | // we though we needed. |
716 | 53.2M | ASSERT(mem_block.capacityRemaining() == 0); |
717 | 53.2M | list.moveStorageIntoList(mem_block.release()); |
718 | 53.2M | } |
719 | | |
720 | 53.2M | StatNameList::~StatNameList() { ASSERT(!populated()); } |
721 | | |
722 | 450M | void StatNameList::iterate(const std::function<bool(StatName)>& f) const { |
723 | 450M | const uint8_t* p = &storage_[0]; |
724 | 450M | const uint32_t num_elements = *p++; |
725 | 560M | for (uint32_t i = 0; i < num_elements; ++i) { |
726 | 506M | const StatName stat_name(p); |
727 | 506M | p += stat_name.size(); |
728 | 506M | if (!f(stat_name)) { |
729 | 397M | break; |
730 | 397M | } |
731 | 506M | } |
732 | 450M | } |
733 | | |
734 | 53.2M | void StatNameList::clear(SymbolTable& symbol_table) { |
735 | 108M | iterate([&symbol_table](StatName stat_name) -> bool { |
736 | 108M | symbol_table.free(stat_name); |
737 | 108M | return true; |
738 | 108M | }); |
739 | 53.2M | storage_.reset(); |
740 | 53.2M | } |
741 | | |
742 | | StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name) |
743 | 5.97k | : name_(std::string(name)), pool_(symbol_table) { |
744 | 5.97k | builtin_stat_names_[""] = StatName(); |
745 | 5.97k | } |
746 | | |
747 | 252k | void StatNameSet::rememberBuiltin(absl::string_view str) { |
748 | 252k | StatName stat_name; |
749 | 252k | { |
750 | 252k | absl::MutexLock lock(&mutex_); |
751 | 252k | stat_name = pool_.add(str); |
752 | 252k | } |
753 | 252k | builtin_stat_names_[str] = stat_name; |
754 | 252k | } |
755 | | |
756 | 976k | StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const { |
757 | | // If token was recorded as a built-in during initialization, we can |
758 | | // service this request lock-free. |
759 | 976k | const auto iter = builtin_stat_names_.find(token); |
760 | 976k | if (iter != builtin_stat_names_.end()) { |
761 | 593k | return iter->second; |
762 | 593k | } |
763 | 382k | return fallback; |
764 | 976k | } |
765 | | |
766 | | } // namespace Stats |
767 | | } // namespace Envoy |