Line data Source code
1 : #include "source/common/stats/symbol_table.h"
2 :
3 : #include <algorithm>
4 : #include <iostream>
5 : #include <memory>
6 : #include <vector>
7 :
8 : #include "source/common/common/assert.h"
9 : #include "source/common/common/logger.h"
10 : #include "source/common/common/utility.h"
11 :
12 : #include "absl/strings/str_cat.h"
13 :
14 : namespace Envoy {
15 : namespace Stats {
16 :
17 : // Masks used for variable-length encoding of arbitrary-sized integers into a
18 : // uint8-array. The integers are typically small, so we try to store them in as
19 : // few bytes as possible. The bottom 7 bits hold values, and the top bit is used
20 : // to determine whether another byte is needed for more data.
21 : static constexpr uint32_t SpilloverMask = 0x80;
22 : static constexpr uint32_t Low7Bits = 0x7f;
23 :
24 : // When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol
25 : // that will decode into uint8_t array starting (and ending) with {0}. Thus we
26 : // can use a leading 0 in the first byte to indicate that what follows is
27 : // literal char data, rather than symbol-table entries. Literal char data is
28 : // used for dynamically discovered stat-name tokens where you don't want to take
29 : // a symbol table lock, and would rather pay extra memory overhead to store the
30 : // tokens as fully elaborated strings.
31 : static constexpr Symbol FirstValidSymbol = 1;
32 : static constexpr uint8_t LiteralStringIndicator = 0;
33 :
34 25631677 : size_t StatName::dataSize() const {
35 25631677 : if (size_and_data_ == nullptr) {
36 529874 : return 0;
37 529874 : }
38 25101803 : return SymbolTable::Encoding::decodeNumber(size_and_data_).first;
39 25631677 : }
40 :
41 : #ifndef ENVOY_CONFIG_COVERAGE
42 : void StatName::debugPrint() {
43 : // TODO(jmarantz): capture this functionality (always prints regardless of
44 : // loglevel) in an ENVOY_LOG macro variant or similar, perhaps
45 : // ENVOY_LOG_MISC(stderr, ...);
46 : if (size_and_data_ == nullptr) {
47 : std::cerr << "Null StatName" << std::endl;
48 : } else {
49 : const size_t nbytes = dataSize();
50 : std::cerr << "dataSize=" << nbytes << ":";
51 : for (size_t i = 0; i < nbytes; ++i) {
52 : std::cerr << " " << static_cast<uint64_t>(data()[i]);
53 : }
54 : const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this);
55 : std::cerr << ", numSymbols=" << encoding.size() << ":";
56 : for (Symbol symbol : encoding) {
57 : std::cerr << " " << symbol;
58 : }
59 : std::cerr << std::endl;
60 : }
61 : }
62 : #endif
63 :
64 669257 : SymbolTable::Encoding::~Encoding() {
65 : // Verifies that moveToMemBlock() was called on this encoding. Failure
66 : // to call moveToMemBlock() will result in leaks symbols.
67 669257 : ASSERT(mem_block_.capacity() == 0);
68 669257 : }
69 :
70 14748576 : size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) {
71 14748576 : size_t num_bytes = 0;
72 15013948 : do {
73 15013948 : ++num_bytes;
74 15013948 : number >>= 7;
75 15013948 : } while (number != 0);
76 14748576 : return num_bytes;
77 14748576 : }
78 :
79 3163902 : void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) {
80 : // UTF-8-like encoding where a value 127 or less gets written as a single
81 : // byte. For higher values we write the low-order 7 bits with a 1 in
82 : // the high-order bit. Then we right-shift 7 bits and keep adding more bytes
83 : // until we have consumed all the non-zero bits in symbol.
84 : //
85 : // When decoding, we stop consuming uint8_t when we see a uint8_t with
86 : // high-order bit 0.
87 3427589 : do {
88 3427589 : if (number < (1 << 7)) {
89 3163904 : mem_block.appendOne(number); // number <= 127 gets encoded in one byte.
90 3163904 : } else {
91 263685 : mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes.
92 263685 : }
93 3427589 : number >>= 7;
94 3427589 : } while (number != 0);
95 3163902 : }
96 :
97 620598 : void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) {
98 620598 : ASSERT(data_bytes_required_ == 0);
99 878748 : for (Symbol symbol : symbols) {
100 878748 : data_bytes_required_ += encodingSizeBytes(symbol);
101 878748 : }
102 620598 : mem_block_.setCapacity(data_bytes_required_);
103 878749 : for (Symbol symbol : symbols) {
104 878749 : appendEncoding(symbol, mem_block_);
105 878749 : }
106 620598 : }
107 :
108 34708935 : std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) {
109 34708935 : uint64_t number = 0;
110 34708935 : uint64_t uc = SpilloverMask;
111 34708935 : const uint8_t* start = encoding;
112 71947816 : for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) {
113 37238881 : uc = static_cast<uint32_t>(*encoding);
114 37238881 : number |= (uc & Low7Bits) << shift;
115 37238881 : }
116 34708935 : return std::make_pair(number, encoding - start);
117 34708935 : }
118 :
119 2519011 : SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) {
120 2519011 : SymbolVec symbol_vec;
121 2519011 : symbol_vec.reserve(stat_name.dataSize());
122 2519011 : decodeTokens(
123 3401908 : stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); },
124 2526006 : [](absl::string_view) {});
125 2519011 : return symbol_vec;
126 2519011 : }
127 :
128 : // Iterator for decoding StatName tokens. This is not an STL-compatible
129 : // iterator; it is used only locally. Using this rather than decodeSymbols makes
130 : // it easier to early-exit out of a comparison once we know the answer. For
131 : // example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order
132 : // when you get to the "b" vs "x" and do not need to decode "c" vs "y".
133 : //
134 : // This is also helpful for prefix-matching.
135 : //
136 : // The array-version of decodeSymbols is still a good approach for converting a
137 : // StatName to a string as the intermediate array of string_view is needed to
138 : // allocate the right size for the joined string.
139 : class SymbolTable::Encoding::TokenIter {
140 : public:
141 : // The type of token reached.
142 : enum class TokenType { StringView, Symbol, End };
143 :
144 3566765 : TokenIter(StatName stat_name) {
145 3566765 : const uint8_t* raw_data = stat_name.dataIncludingSize();
146 3566765 : if (raw_data == nullptr) {
147 86084 : size_ = 0;
148 86084 : array_ = nullptr;
149 3480681 : } else {
150 3480681 : std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data);
151 3480681 : size_ = size_consumed.first;
152 3480681 : array_ = raw_data + size_consumed.second;
153 3480681 : }
154 3566765 : }
155 :
156 : /**
157 : * Parses the next token. The token can then be retrieved by calling
158 : * stringView() or symbol().
159 : * @return The type of token, TokenType::End if we have reached the end of the StatName.
160 : */
161 9645027 : TokenType next() {
162 9645027 : if (size_ == 0) {
163 3566774 : return TokenType::End;
164 3566774 : }
165 6078253 : if (*array_ == LiteralStringIndicator) {
166 : // To avoid scanning memory to find the literal size during decode, we
167 : // var-length encode the size of the literal string prior to the data.
168 232379 : ASSERT(size_ > 1);
169 232379 : ++array_;
170 232379 : --size_;
171 232379 : std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_);
172 232379 : uint64_t length = length_consumed.first;
173 232379 : array_ += length_consumed.second;
174 232379 : size_ -= length_consumed.second;
175 232379 : ASSERT(size_ >= length);
176 232379 : string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length);
177 232379 : size_ -= length;
178 232379 : array_ += length;
179 : #ifndef NDEBUG
180 : last_type_ = TokenType::StringView;
181 : #endif
182 232379 : return TokenType::StringView;
183 232379 : }
184 5845874 : std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_);
185 5845874 : symbol_ = symbol_consumed.first;
186 5845874 : size_ -= symbol_consumed.second;
187 5845874 : array_ += symbol_consumed.second;
188 : #ifndef NDEBUG
189 : last_type_ = TokenType::Symbol;
190 : #endif
191 5845874 : return TokenType::Symbol;
192 6078253 : }
193 :
194 : /** @return the current string_view -- only valid to call if next()==TokenType::StringView */
195 232379 : absl::string_view stringView() const {
196 : #ifndef NDEBUG
197 : ASSERT(last_type_ == TokenType::StringView);
198 : #endif
199 232379 : return string_view_;
200 232379 : }
201 :
202 : /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */
203 5845926 : Symbol symbol() const {
204 : #ifndef NDEBUG
205 : ASSERT(last_type_ == TokenType::Symbol);
206 : #endif
207 5845926 : return symbol_;
208 5845926 : }
209 :
210 : private:
211 : const uint8_t* array_;
212 : size_t size_;
213 : absl::string_view string_view_;
214 : Symbol symbol_;
215 : #ifndef NDEBUG
216 : TokenType last_type_{TokenType::End};
217 : #endif
218 : };
219 :
220 : void SymbolTable::Encoding::decodeTokens(
221 : StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
222 3566771 : const std::function<void(absl::string_view)>& string_view_token_fn) {
223 3566771 : TokenIter iter(stat_name);
224 3566771 : TokenIter::TokenType type;
225 9645086 : while ((type = iter.next()) != TokenIter::TokenType::End) {
226 6078315 : if (type == TokenIter::TokenType::StringView) {
227 232379 : string_view_token_fn(iter.stringView());
228 5908003 : } else {
229 5845936 : ASSERT(type == TokenIter::TokenType::Symbol);
230 5845936 : symbol_token_fn(iter.symbol());
231 5845936 : }
232 6078315 : }
233 3566771 : }
234 :
235 0 : bool StatName::startsWith(StatName prefix) const {
236 0 : using TokenIter = SymbolTable::Encoding::TokenIter;
237 0 : TokenIter prefix_iter(prefix);
238 0 : TokenIter this_iter(*this);
239 0 : while (true) {
240 0 : TokenIter::TokenType prefix_type = prefix_iter.next();
241 0 : TokenIter::TokenType this_type = this_iter.next();
242 0 : if (prefix_type == TokenIter::TokenType::End) {
243 0 : return true; // "a.b.c" starts with "a.b" or "a.b.c"
244 0 : }
245 0 : if (this_type == TokenIter::TokenType::End) {
246 0 : return false; // "a.b" does not start with "a.b.c"
247 0 : }
248 0 : if (prefix_type != TokenIter::TokenType::Symbol) {
249 : // Disallow dynamic components in the prefix. We don't have a current need
250 : // for prefixes to be expressed dynamically, and handling that case would
251 : // add complexity. In particular we'd need to take locks to decode to
252 : // strings.
253 0 : return false;
254 0 : }
255 0 : if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) {
256 0 : return false;
257 0 : }
258 0 : }
259 0 : return true; // not reached
260 0 : }
261 :
262 1047512 : std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const {
263 1047512 : std::vector<absl::string_view> strings;
264 1047512 : Thread::LockGuard lock(lock_);
265 1047512 : Encoding::decodeTokens(
266 1047512 : stat_name,
267 1047512 : [this, &strings](Symbol symbol)
268 2507448 : ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); },
269 1049766 : [&strings](absl::string_view str) { strings.push_back(str); });
270 1047512 : return strings;
271 1047512 : }
272 :
273 669259 : void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) {
274 669259 : appendEncoding(data_bytes_required_, mem_block);
275 669259 : mem_block.appendBlock(mem_block_);
276 669259 : mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct.
277 669259 : }
278 :
279 : void SymbolTable::Encoding::appendToMemBlock(StatName stat_name,
280 898720 : MemBlockBuilder<uint8_t>& mem_block) {
281 898720 : const uint8_t* data = stat_name.dataIncludingSize();
282 898720 : if (data == nullptr) {
283 86084 : mem_block.appendOne(0);
284 813004 : } else {
285 812636 : mem_block.appendData(absl::MakeSpan(data, stat_name.size()));
286 812636 : }
287 898720 : }
288 :
289 : SymbolTable::SymbolTable()
290 : // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro.
291 5788 : : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {}
292 :
293 5788 : SymbolTable::~SymbolTable() {
294 : // To avoid leaks into the symbol table, we expect all StatNames to be freed.
295 : // Note: this could potentially be short-circuited if we decide a fast exit
296 : // is needed in production. But it would be good to ensure clean up during
297 : // tests.
298 5788 : ASSERT(numSymbols() == 0);
299 5788 : }
300 :
301 : // TODO(ambuc): There is a possible performance optimization here for avoiding
302 : // the encoding of IPs / numbers if they appear in stat names. We don't want to
303 : // waste time symbolizing an integer as an integer, if we can help it.
304 669264 : void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) {
305 669264 : if (name.empty()) {
306 48665 : return;
307 48665 : }
308 :
309 : // We want to hold the lock for the minimum amount of time, so we do the
310 : // string-splitting and prepare a temp vector of Symbol first.
311 620599 : const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.');
312 620599 : std::vector<Symbol> symbols;
313 620599 : symbols.reserve(tokens.size());
314 :
315 : // Now take the lock and populate the Symbol objects, which involves bumping
316 : // ref-counts in this.
317 620599 : {
318 620599 : Thread::LockGuard lock(lock_);
319 620599 : recent_lookups_.lookup(name);
320 878747 : for (auto& token : tokens) {
321 : // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with
322 : // length below some threshold, say 4 bytes. It might be preferable not to
323 : // reserve Symbols for every 3 digit number found (for example) in ipv4
324 : // addresses.
325 878747 : symbols.push_back(toSymbol(token));
326 878747 : }
327 620599 : }
328 :
329 : // Now efficiently encode the array of 32-bit symbols into a uint8_t array.
330 620599 : encoding.addSymbols(symbols);
331 620599 : }
332 :
333 0 : uint64_t SymbolTable::numSymbols() const {
334 0 : Thread::LockGuard lock(lock_);
335 0 : ASSERT(encode_map_.size() == decode_map_.size());
336 0 : return encode_map_.size();
337 0 : }
338 :
339 1047508 : std::string SymbolTable::toString(const StatName& stat_name) const {
340 1047508 : return absl::StrJoin(decodeStrings(stat_name), ".");
341 1047508 : }
342 :
343 924876 : void SymbolTable::incRefCount(const StatName& stat_name) {
344 : // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
345 924876 : const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
346 :
347 924876 : Thread::LockGuard lock(lock_);
348 1255916 : for (Symbol symbol : symbols) {
349 1227890 : auto decode_search = decode_map_.find(symbol);
350 :
351 1227890 : ASSERT(decode_search != decode_map_.end(),
352 1227890 : "Please see "
353 1227890 : "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
354 1227890 : "debugging-symbol-table-assertions");
355 1227890 : auto encode_search = encode_map_.find(decode_search->second->toStringView());
356 1227890 : ASSERT(encode_search != encode_map_.end(),
357 1227890 : "Please see "
358 1227890 : "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
359 1227890 : "debugging-symbol-table-assertions");
360 :
361 1227890 : ++encode_search->second.ref_count_;
362 1227890 : }
363 924876 : }
364 :
365 1594138 : void SymbolTable::free(const StatName& stat_name) {
366 : // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
367 1594138 : const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
368 :
369 1594138 : Thread::LockGuard lock(lock_);
370 2146006 : for (Symbol symbol : symbols) {
371 2106635 : auto decode_search = decode_map_.find(symbol);
372 2106635 : ASSERT(decode_search != decode_map_.end());
373 :
374 2106635 : auto encode_search = encode_map_.find(decode_search->second->toStringView());
375 2106635 : ASSERT(encode_search != encode_map_.end());
376 :
377 : // If that was the last remaining client usage of the symbol, erase the
378 : // current mappings and add the now-unused symbol to the reuse pool.
379 : //
380 : // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in
381 : // symbol_table_speed_test.cc, relative to breaking out the decrement into a
382 : // separate step, likely due to the non-trivial dereferences in EXPR.
383 2106635 : if (--encode_search->second.ref_count_ == 0) {
384 247028 : decode_map_.erase(decode_search);
385 247028 : encode_map_.erase(encode_search);
386 247028 : pool_.push(symbol);
387 247028 : }
388 2106635 : }
389 1594138 : }
390 :
391 205 : uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const {
392 205 : uint64_t total = 0;
393 205 : absl::flat_hash_map<std::string, uint64_t> name_count_map;
394 :
395 : // We don't want to hold lock_ while calling the iterator, but we need it to
396 : // access recent_lookups_, so we buffer in name_count_map.
397 205 : {
398 205 : Thread::LockGuard lock(lock_);
399 205 : recent_lookups_.forEach(
400 205 : [&name_count_map](absl::string_view str, uint64_t count)
401 205 : ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; });
402 205 : total += recent_lookups_.total();
403 205 : }
404 :
405 : // Now we have the collated name-count map data: we need to vectorize and
406 : // sort. We define the pair with the count first as std::pair::operator<
407 : // prioritizes its first element over its second.
408 205 : using LookupCount = std::pair<uint64_t, absl::string_view>;
409 205 : std::vector<LookupCount> lookup_data;
410 205 : lookup_data.reserve(name_count_map.size());
411 205 : for (const auto& iter : name_count_map) {
412 0 : lookup_data.emplace_back(LookupCount(iter.second, iter.first));
413 0 : }
414 205 : std::sort(lookup_data.begin(), lookup_data.end());
415 205 : for (const LookupCount& lookup_count : lookup_data) {
416 0 : iter(lookup_count.second, lookup_count.first);
417 0 : }
418 205 : return total;
419 205 : }
420 :
421 247 : DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const {
422 247 : DynamicSpans dynamic_spans;
423 :
424 247 : uint32_t index = 0;
425 1374 : auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) {
426 1374 : DynamicSpan span;
427 1374 : span.first = index;
428 1374 : index += std::count(str.begin(), str.end(), '.');
429 1374 : span.second = index;
430 1374 : ++index;
431 1374 : dynamic_spans.push_back(span);
432 1374 : };
433 :
434 : // Use decodeTokens to suss out which components of stat_name are
435 : // symbolic vs dynamic. The lambda that takes a Symbol is called
436 : // for symbolic components. The lambda called with a string_view
437 : // is called for dynamic components.
438 : //
439 : // Note that with fake symbol tables, the Symbol lambda is called
440 : // once for each character in the string, and no dynamics will
441 : // be recorded.
442 247 : Encoding::decodeTokens(
443 4436 : stat_name, [&index](Symbol) { ++index; }, record_dynamic);
444 247 : return dynamic_spans;
445 247 : }
446 :
447 0 : void SymbolTable::setRecentLookupCapacity(uint64_t capacity) {
448 0 : Thread::LockGuard lock(lock_);
449 0 : recent_lookups_.setCapacity(capacity);
450 0 : }
451 :
452 0 : void SymbolTable::clearRecentLookups() {
453 0 : Thread::LockGuard lock(lock_);
454 0 : recent_lookups_.clear();
455 0 : }
456 :
457 0 : uint64_t SymbolTable::recentLookupCapacity() const {
458 0 : Thread::LockGuard lock(lock_);
459 0 : return recent_lookups_.capacity();
460 0 : }
461 :
462 188 : StatNameSetPtr SymbolTable::makeSet(absl::string_view name) {
463 : // make_unique does not work with private ctor, even though SymbolTable is a friend.
464 188 : StatNameSetPtr stat_name_set(new StatNameSet(*this, name));
465 188 : return stat_name_set;
466 188 : }
467 :
468 878749 : Symbol SymbolTable::toSymbol(absl::string_view sv) {
469 878749 : Symbol result;
470 878749 : auto encode_find = encode_map_.find(sv);
471 : // If the string segment doesn't already exist,
472 878749 : if (encode_find == encode_map_.end()) {
473 : // We create the actual string, place it in the decode_map_, and then insert
474 : // a string_view pointing to it in the encode_map_. This allows us to only
475 : // store the string once. We use unique_ptr so copies are not made as
476 : // flat_hash_map moves values around.
477 247028 : InlineStringPtr str = InlineString::create(sv);
478 247028 : auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)});
479 247028 : ASSERT(encode_insert.second);
480 247028 : auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
481 247028 : ASSERT(decode_insert.second);
482 :
483 247028 : result = next_symbol_;
484 247028 : newSymbol();
485 716317 : } else {
486 : // If the insertion didn't take place, return the actual value at that location and up the
487 : // refcount at that location
488 631721 : result = encode_find->second.symbol_;
489 631721 : ++(encode_find->second.ref_count_);
490 631721 : }
491 878749 : return result;
492 878749 : }
493 :
494 : absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
495 2507152 : ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
496 2507152 : auto search = decode_map_.find(symbol);
497 2507152 : RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
498 2507152 : return search->second->toStringView();
499 2507152 : }
500 :
501 247028 : void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
502 247028 : if (pool_.empty()) {
503 229390 : next_symbol_ = ++monotonic_counter_;
504 244133 : } else {
505 17638 : next_symbol_ = pool_.top();
506 17638 : pool_.pop();
507 17638 : }
508 : // This should catch integer overflow for the new symbol.
509 247028 : ASSERT(monotonic_counter_ != 0);
510 247028 : }
511 :
512 0 : bool SymbolTable::lessThan(const StatName& a, const StatName& b) const {
513 : // Proactively take the table lock in anticipation that we'll need to
514 : // convert at least one symbol to a string_view, and it's easier not to
515 : // bother to lazily take the lock.
516 0 : Thread::LockGuard lock(lock_);
517 0 : return lessThanLockHeld(a, b);
518 0 : }
519 :
520 : bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const
521 0 : ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
522 0 : Encoding::TokenIter a_iter(a), b_iter(b);
523 0 : while (true) {
524 0 : Encoding::TokenIter::TokenType a_type = a_iter.next();
525 0 : Encoding::TokenIter::TokenType b_type = b_iter.next();
526 0 : if (b_type == Encoding::TokenIter::TokenType::End) {
527 0 : return false; // "x.y.z" > "x.y", "x.y" == "x.y"
528 0 : }
529 0 : if (a_type == Encoding::TokenIter::TokenType::End) {
530 0 : return true; // "x.y" < "x.y.z"
531 0 : }
532 0 : if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol &&
533 0 : a_iter.symbol() == b_iter.symbol()) {
534 0 : continue; // matching symbols don't need to be decoded to strings
535 0 : }
536 :
537 0 : absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol
538 0 : ? fromSymbol(a_iter.symbol())
539 0 : : a_iter.stringView();
540 0 : absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol
541 0 : ? fromSymbol(b_iter.symbol())
542 0 : : b_iter.stringView();
543 0 : if (a_token != b_token) {
544 0 : return a_token < b_token;
545 0 : }
546 0 : }
547 0 : }
548 :
549 : #ifndef ENVOY_CONFIG_COVERAGE
550 : void SymbolTable::debugPrint() const {
551 : Thread::LockGuard lock(lock_);
552 : std::vector<Symbol> symbols;
553 : for (const auto& p : decode_map_) {
554 : symbols.push_back(p.first);
555 : }
556 : std::sort(symbols.begin(), symbols.end());
557 : for (Symbol symbol : symbols) {
558 : const InlineString& token = *decode_map_.find(symbol)->second;
559 : const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second;
560 : ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_);
561 : }
562 : }
563 : #endif
564 :
565 669264 : SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) {
566 669264 : name = StringUtil::removeTrailingCharacters(name, '.');
567 669264 : Encoding encoding;
568 669264 : addTokensToEncoding(name, encoding);
569 669264 : MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired()));
570 669264 : encoding.moveToMemBlock(mem_block);
571 669264 : return mem_block.release();
572 669264 : }
573 :
574 : StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table)
575 669264 : : StatNameStorageBase(table.encode(name)) {}
576 :
577 26156 : StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) {
578 26156 : const size_t size = src.size();
579 26156 : MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t.
580 26156 : src.copyToMemBlock(storage);
581 26156 : setBytes(storage.release());
582 26156 : table.incRefCount(statName());
583 26156 : }
584 :
585 258696 : SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) {
586 258696 : name = StringUtil::removeTrailingCharacters(name, '.');
587 :
588 : // For all StatName objects, we first have the total number of bytes in the
589 : // representation. But for inlined dynamic string StatName variants, we must
590 : // store the length of the payload separately, so that if this token gets
591 : // joined with others, we'll know much space it consumes until the next token.
592 : // So the layout is
593 : // [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ]
594 : // So we need to figure out how many bytes we need to represent length-of-name
595 : // and name.
596 :
597 : // payload_bytes is the total number of bytes needed to represent the
598 : // characters in name, plus their encoded size, plus the literal indicator.
599 258696 : const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1;
600 :
601 : // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and
602 : // the length of those.
603 258696 : const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes);
604 258696 : MemBlockBuilder<uint8_t> mem_block(total_bytes);
605 :
606 258696 : Encoding::appendEncoding(payload_bytes, mem_block);
607 258696 : mem_block.appendOne(LiteralStringIndicator);
608 258696 : Encoding::appendEncoding(name.size(), mem_block);
609 258696 : mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size()));
610 258696 : ASSERT(mem_block.capacityRemaining() == 0);
611 258696 : return mem_block.release();
612 258696 : }
613 :
614 1784778 : StatNameStorage::~StatNameStorage() {
615 : // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to
616 : // decrement the reference counts held by the SymbolTable on behalf of
617 : // this StatName. This saves 8 bytes of storage per stat, relative to
618 : // holding a SymbolTable& in each StatNameStorage object.
619 1784778 : ASSERT(bytes() == nullptr);
620 1784778 : }
621 :
622 695418 : void StatNameStorage::free(SymbolTable& table) {
623 695418 : table.free(statName());
624 695418 : clear();
625 695418 : }
626 :
627 85929 : void StatNamePool::clear() {
628 525608 : for (StatNameStorage& storage : storage_vector_) {
629 525056 : storage.free(symbol_table_);
630 525056 : }
631 85929 : storage_vector_.clear();
632 85929 : }
633 :
634 525057 : const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) {
635 525057 : storage_vector_.push_back(Stats::StatNameStorage(str, symbol_table_));
636 525057 : return storage_vector_.back().bytes();
637 525057 : }
638 :
639 520168 : StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); }
640 :
641 78299 : StatName StatNameDynamicPool::add(absl::string_view str) {
642 78299 : storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_));
643 78299 : return StatName(storage_vector_.back().bytes());
644 78299 : }
645 :
646 1443 : StatNameStorageSet::~StatNameStorageSet() {
647 : // free() must be called before destructing StatNameStorageSet to decrement
648 : // references to all symbols.
649 1443 : ASSERT(hash_set_.empty());
650 1443 : }
651 :
652 1443 : void StatNameStorageSet::free(SymbolTable& symbol_table) {
653 : // We must free() all symbols referenced in the set, otherwise the symbols
654 : // will leak when the flat_hash_map superclass is destructed. They cannot
655 : // self-destruct without an explicit free() as each individual StatNameStorage
656 : // object does not have a reference to the symbol table, which would waste 8
657 : // bytes per stat-name. The easiest way to safely free all the contents of the
658 : // symbol table set is to use flat_hash_map::extract(), which removes and
659 : // returns an element from the set without destructing the element
660 : // immediately. This gives us a chance to call free() on each one before they
661 : // are destroyed.
662 : //
663 : // There's a performance risk here, if removing elements via
664 : // flat_hash_set::begin() is inefficient to use in a loop like this. One can
665 : // imagine a hash-table implementation where the performance of this
666 : // usage-model would be poor. However, tests with 100k elements appeared to
667 : // run quickly when compiled for optimization, so at present this is not a
668 : // performance issue.
669 :
670 1443 : while (!hash_set_.empty()) {
671 0 : auto storage = hash_set_.extract(hash_set_.begin());
672 0 : storage.value().free(symbol_table);
673 0 : }
674 1443 : }
675 :
676 1049185 : SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const {
677 1049185 : size_t num_bytes = 0;
678 2400464 : for (StatName stat_name : stat_names) {
679 2400464 : if (!stat_name.empty()) {
680 1235490 : num_bytes += stat_name.dataSize();
681 1235490 : }
682 2400464 : }
683 1049185 : MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes));
684 1049185 : Encoding::appendEncoding(num_bytes, mem_block);
685 2400464 : for (StatName stat_name : stat_names) {
686 2400464 : stat_name.appendDataToMemBlock(mem_block);
687 2400464 : }
688 1049185 : ASSERT(mem_block.capacityRemaining() == 0);
689 1049185 : return mem_block.release();
690 1049185 : }
691 :
692 392190 : void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) {
693 392190 : RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded");
694 :
695 : // First encode all the names.
696 392190 : size_t total_size_bytes = 1; /* one byte for holding the number of names */
697 :
698 1290910 : for (uint32_t i = 0; i < num_names; ++i) {
699 898720 : total_size_bytes += names[i].size();
700 898720 : }
701 :
702 : // Now allocate the exact number of bytes required and move the encodings
703 : // into storage.
704 392190 : MemBlockBuilder<uint8_t> mem_block(total_size_bytes);
705 392190 : mem_block.appendOne(num_names);
706 1290910 : for (uint32_t i = 0; i < num_names; ++i) {
707 898720 : const StatName stat_name = names[i];
708 898720 : Encoding::appendToMemBlock(stat_name, mem_block);
709 898720 : incRefCount(stat_name);
710 898720 : }
711 :
712 : // This assertion double-checks the arithmetic where we computed
713 : // total_size_bytes. After appending all the encoded data into the
714 : // allocated byte array, we should have exhausted all the memory
715 : // we though we needed.
716 392190 : ASSERT(mem_block.capacityRemaining() == 0);
717 392190 : list.moveStorageIntoList(mem_block.release());
718 392190 : }
719 :
720 392190 : StatNameList::~StatNameList() { ASSERT(!populated()); }
721 :
722 2843332 : void StatNameList::iterate(const std::function<bool(StatName)>& f) const {
723 2843332 : const uint8_t* p = &storage_[0];
724 2843332 : const uint32_t num_elements = *p++;
725 3780628 : for (uint32_t i = 0; i < num_elements; ++i) {
726 3388434 : const StatName stat_name(p);
727 3388434 : p += stat_name.size();
728 3388434 : if (!f(stat_name)) {
729 2451138 : break;
730 2451138 : }
731 3388434 : }
732 2843332 : }
733 :
734 392190 : void StatNameList::clear(SymbolTable& symbol_table) {
735 898720 : iterate([&symbol_table](StatName stat_name) -> bool {
736 898720 : symbol_table.free(stat_name);
737 898720 : return true;
738 898720 : });
739 392190 : storage_.reset();
740 392190 : }
741 :
742 : StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name)
743 188 : : name_(std::string(name)), pool_(symbol_table) {
744 188 : builtin_stat_names_[""] = StatName();
745 188 : }
746 :
747 1927 : void StatNameSet::rememberBuiltin(absl::string_view str) {
748 1927 : StatName stat_name;
749 1927 : {
750 1927 : absl::MutexLock lock(&mutex_);
751 1927 : stat_name = pool_.add(str);
752 1927 : }
753 1927 : builtin_stat_names_[str] = stat_name;
754 1927 : }
755 :
756 45705 : StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const {
757 : // If token was recorded as a built-in during initialization, we can
758 : // service this request lock-free.
759 45705 : const auto iter = builtin_stat_names_.find(token);
760 45705 : if (iter != builtin_stat_names_.end()) {
761 27240 : return iter->second;
762 27240 : }
763 18465 : return fallback;
764 45705 : }
765 :
766 : } // namespace Stats
767 : } // namespace Envoy
|