LCOV - code coverage report
Current view: top level - source/common/stats - symbol_table.cc (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 378 454 83.3 %
Date: 2024-01-05 06:35:25 Functions: 54 62 87.1 %

          Line data    Source code
       1             : #include "source/common/stats/symbol_table.h"
       2             : 
       3             : #include <algorithm>
       4             : #include <iostream>
       5             : #include <memory>
       6             : #include <vector>
       7             : 
       8             : #include "source/common/common/assert.h"
       9             : #include "source/common/common/logger.h"
      10             : #include "source/common/common/utility.h"
      11             : 
      12             : #include "absl/strings/str_cat.h"
      13             : 
      14             : namespace Envoy {
      15             : namespace Stats {
      16             : 
      17             : // Masks used for variable-length encoding of arbitrary-sized integers into a
      18             : // uint8-array. The integers are typically small, so we try to store them in as
      19             : // few bytes as possible. The bottom 7 bits hold values, and the top bit is used
      20             : // to determine whether another byte is needed for more data.
      21             : static constexpr uint32_t SpilloverMask = 0x80;
      22             : static constexpr uint32_t Low7Bits = 0x7f;
      23             : 
      24             : // When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol
      25             : // that will decode into uint8_t array starting (and ending) with {0}. Thus we
      26             : // can use a leading 0 in the first byte to indicate that what follows is
      27             : // literal char data, rather than symbol-table entries. Literal char data is
      28             : // used for dynamically discovered stat-name tokens where you don't want to take
      29             : // a symbol table lock, and would rather pay extra memory overhead to store the
      30             : // tokens as fully elaborated strings.
      31             : static constexpr Symbol FirstValidSymbol = 1;
      32             : static constexpr uint8_t LiteralStringIndicator = 0;
      33             : 
      34    25631677 : size_t StatName::dataSize() const {
      35    25631677 :   if (size_and_data_ == nullptr) {
      36      529874 :     return 0;
      37      529874 :   }
      38    25101803 :   return SymbolTable::Encoding::decodeNumber(size_and_data_).first;
      39    25631677 : }
      40             : 
      41             : #ifndef ENVOY_CONFIG_COVERAGE
      42             : void StatName::debugPrint() {
      43             :   // TODO(jmarantz): capture this functionality (always prints regardless of
      44             :   // loglevel) in an ENVOY_LOG macro variant or similar, perhaps
      45             :   // ENVOY_LOG_MISC(stderr, ...);
      46             :   if (size_and_data_ == nullptr) {
      47             :     std::cerr << "Null StatName" << std::endl;
      48             :   } else {
      49             :     const size_t nbytes = dataSize();
      50             :     std::cerr << "dataSize=" << nbytes << ":";
      51             :     for (size_t i = 0; i < nbytes; ++i) {
      52             :       std::cerr << " " << static_cast<uint64_t>(data()[i]);
      53             :     }
      54             :     const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this);
      55             :     std::cerr << ", numSymbols=" << encoding.size() << ":";
      56             :     for (Symbol symbol : encoding) {
      57             :       std::cerr << " " << symbol;
      58             :     }
      59             :     std::cerr << std::endl;
      60             :   }
      61             : }
      62             : #endif
      63             : 
      64      669257 : SymbolTable::Encoding::~Encoding() {
      65             :   // Verifies that moveToMemBlock() was called on this encoding. Failure
      66             :   // to call moveToMemBlock() will result in leaks symbols.
      67      669257 :   ASSERT(mem_block_.capacity() == 0);
      68      669257 : }
      69             : 
      70    14748576 : size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) {
      71    14748576 :   size_t num_bytes = 0;
      72    15013948 :   do {
      73    15013948 :     ++num_bytes;
      74    15013948 :     number >>= 7;
      75    15013948 :   } while (number != 0);
      76    14748576 :   return num_bytes;
      77    14748576 : }
      78             : 
      79     3163902 : void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) {
      80             :   // UTF-8-like encoding where a value 127 or less gets written as a single
      81             :   // byte. For higher values we write the low-order 7 bits with a 1 in
      82             :   // the high-order bit. Then we right-shift 7 bits and keep adding more bytes
      83             :   // until we have consumed all the non-zero bits in symbol.
      84             :   //
      85             :   // When decoding, we stop consuming uint8_t when we see a uint8_t with
      86             :   // high-order bit 0.
      87     3427589 :   do {
      88     3427589 :     if (number < (1 << 7)) {
      89     3163904 :       mem_block.appendOne(number); // number <= 127 gets encoded in one byte.
      90     3163904 :     } else {
      91      263685 :       mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes.
      92      263685 :     }
      93     3427589 :     number >>= 7;
      94     3427589 :   } while (number != 0);
      95     3163902 : }
      96             : 
      97      620598 : void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) {
      98      620598 :   ASSERT(data_bytes_required_ == 0);
      99      878748 :   for (Symbol symbol : symbols) {
     100      878748 :     data_bytes_required_ += encodingSizeBytes(symbol);
     101      878748 :   }
     102      620598 :   mem_block_.setCapacity(data_bytes_required_);
     103      878749 :   for (Symbol symbol : symbols) {
     104      878749 :     appendEncoding(symbol, mem_block_);
     105      878749 :   }
     106      620598 : }
     107             : 
     108    34708935 : std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) {
     109    34708935 :   uint64_t number = 0;
     110    34708935 :   uint64_t uc = SpilloverMask;
     111    34708935 :   const uint8_t* start = encoding;
     112    71947816 :   for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) {
     113    37238881 :     uc = static_cast<uint32_t>(*encoding);
     114    37238881 :     number |= (uc & Low7Bits) << shift;
     115    37238881 :   }
     116    34708935 :   return std::make_pair(number, encoding - start);
     117    34708935 : }
     118             : 
     119     2519011 : SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) {
     120     2519011 :   SymbolVec symbol_vec;
     121     2519011 :   symbol_vec.reserve(stat_name.dataSize());
     122     2519011 :   decodeTokens(
     123     3401908 :       stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); },
     124     2526006 :       [](absl::string_view) {});
     125     2519011 :   return symbol_vec;
     126     2519011 : }
     127             : 
     128             : // Iterator for decoding StatName tokens. This is not an STL-compatible
     129             : // iterator; it is used only locally. Using this rather than decodeSymbols makes
     130             : // it easier to early-exit out of a comparison once we know the answer. For
     131             : // example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order
     132             : // when you get to the "b" vs "x" and do not need to decode "c" vs "y".
     133             : //
     134             : // This is also helpful for prefix-matching.
     135             : //
     136             : // The array-version of decodeSymbols is still a good approach for converting a
     137             : // StatName to a string as the intermediate array of string_view is needed to
     138             : // allocate the right size for the joined string.
     139             : class SymbolTable::Encoding::TokenIter {
     140             : public:
     141             :   // The type of token reached.
     142             :   enum class TokenType { StringView, Symbol, End };
     143             : 
     144     3566765 :   TokenIter(StatName stat_name) {
     145     3566765 :     const uint8_t* raw_data = stat_name.dataIncludingSize();
     146     3566765 :     if (raw_data == nullptr) {
     147       86084 :       size_ = 0;
     148       86084 :       array_ = nullptr;
     149     3480681 :     } else {
     150     3480681 :       std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data);
     151     3480681 :       size_ = size_consumed.first;
     152     3480681 :       array_ = raw_data + size_consumed.second;
     153     3480681 :     }
     154     3566765 :   }
     155             : 
     156             :   /**
     157             :    * Parses the next token. The token can then be retrieved by calling
     158             :    * stringView() or symbol().
     159             :    * @return The type of token, TokenType::End if we have reached the end of the StatName.
     160             :    */
     161     9645027 :   TokenType next() {
     162     9645027 :     if (size_ == 0) {
     163     3566774 :       return TokenType::End;
     164     3566774 :     }
     165     6078253 :     if (*array_ == LiteralStringIndicator) {
     166             :       // To avoid scanning memory to find the literal size during decode, we
     167             :       // var-length encode the size of the literal string prior to the data.
     168      232379 :       ASSERT(size_ > 1);
     169      232379 :       ++array_;
     170      232379 :       --size_;
     171      232379 :       std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_);
     172      232379 :       uint64_t length = length_consumed.first;
     173      232379 :       array_ += length_consumed.second;
     174      232379 :       size_ -= length_consumed.second;
     175      232379 :       ASSERT(size_ >= length);
     176      232379 :       string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length);
     177      232379 :       size_ -= length;
     178      232379 :       array_ += length;
     179             : #ifndef NDEBUG
     180             :       last_type_ = TokenType::StringView;
     181             : #endif
     182      232379 :       return TokenType::StringView;
     183      232379 :     }
     184     5845874 :     std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_);
     185     5845874 :     symbol_ = symbol_consumed.first;
     186     5845874 :     size_ -= symbol_consumed.second;
     187     5845874 :     array_ += symbol_consumed.second;
     188             : #ifndef NDEBUG
     189             :     last_type_ = TokenType::Symbol;
     190             : #endif
     191     5845874 :     return TokenType::Symbol;
     192     6078253 :   }
     193             : 
     194             :   /** @return the current string_view -- only valid to call if next()==TokenType::StringView */
     195      232379 :   absl::string_view stringView() const {
     196             : #ifndef NDEBUG
     197             :     ASSERT(last_type_ == TokenType::StringView);
     198             : #endif
     199      232379 :     return string_view_;
     200      232379 :   }
     201             : 
     202             :   /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */
     203     5845926 :   Symbol symbol() const {
     204             : #ifndef NDEBUG
     205             :     ASSERT(last_type_ == TokenType::Symbol);
     206             : #endif
     207     5845926 :     return symbol_;
     208     5845926 :   }
     209             : 
     210             : private:
     211             :   const uint8_t* array_;
     212             :   size_t size_;
     213             :   absl::string_view string_view_;
     214             :   Symbol symbol_;
     215             : #ifndef NDEBUG
     216             :   TokenType last_type_{TokenType::End};
     217             : #endif
     218             : };
     219             : 
     220             : void SymbolTable::Encoding::decodeTokens(
     221             :     StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
     222     3566771 :     const std::function<void(absl::string_view)>& string_view_token_fn) {
     223     3566771 :   TokenIter iter(stat_name);
     224     3566771 :   TokenIter::TokenType type;
     225     9645086 :   while ((type = iter.next()) != TokenIter::TokenType::End) {
     226     6078315 :     if (type == TokenIter::TokenType::StringView) {
     227      232379 :       string_view_token_fn(iter.stringView());
     228     5908003 :     } else {
     229     5845936 :       ASSERT(type == TokenIter::TokenType::Symbol);
     230     5845936 :       symbol_token_fn(iter.symbol());
     231     5845936 :     }
     232     6078315 :   }
     233     3566771 : }
     234             : 
     235           0 : bool StatName::startsWith(StatName prefix) const {
     236           0 :   using TokenIter = SymbolTable::Encoding::TokenIter;
     237           0 :   TokenIter prefix_iter(prefix);
     238           0 :   TokenIter this_iter(*this);
     239           0 :   while (true) {
     240           0 :     TokenIter::TokenType prefix_type = prefix_iter.next();
     241           0 :     TokenIter::TokenType this_type = this_iter.next();
     242           0 :     if (prefix_type == TokenIter::TokenType::End) {
     243           0 :       return true; // "a.b.c" starts with "a.b" or "a.b.c"
     244           0 :     }
     245           0 :     if (this_type == TokenIter::TokenType::End) {
     246           0 :       return false; // "a.b" does not start with "a.b.c"
     247           0 :     }
     248           0 :     if (prefix_type != TokenIter::TokenType::Symbol) {
     249             :       // Disallow dynamic components in the prefix. We don't have a current need
     250             :       // for prefixes to be expressed dynamically, and handling that case would
     251             :       // add complexity. In particular we'd need to take locks to decode to
     252             :       // strings.
     253           0 :       return false;
     254           0 :     }
     255           0 :     if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) {
     256           0 :       return false;
     257           0 :     }
     258           0 :   }
     259           0 :   return true; // not reached
     260           0 : }
     261             : 
     262     1047512 : std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const {
     263     1047512 :   std::vector<absl::string_view> strings;
     264     1047512 :   Thread::LockGuard lock(lock_);
     265     1047512 :   Encoding::decodeTokens(
     266     1047512 :       stat_name,
     267     1047512 :       [this, &strings](Symbol symbol)
     268     2507448 :           ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); },
     269     1049766 :       [&strings](absl::string_view str) { strings.push_back(str); });
     270     1047512 :   return strings;
     271     1047512 : }
     272             : 
     273      669259 : void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) {
     274      669259 :   appendEncoding(data_bytes_required_, mem_block);
     275      669259 :   mem_block.appendBlock(mem_block_);
     276      669259 :   mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct.
     277      669259 : }
     278             : 
     279             : void SymbolTable::Encoding::appendToMemBlock(StatName stat_name,
     280      898720 :                                              MemBlockBuilder<uint8_t>& mem_block) {
     281      898720 :   const uint8_t* data = stat_name.dataIncludingSize();
     282      898720 :   if (data == nullptr) {
     283       86084 :     mem_block.appendOne(0);
     284      813004 :   } else {
     285      812636 :     mem_block.appendData(absl::MakeSpan(data, stat_name.size()));
     286      812636 :   }
     287      898720 : }
     288             : 
     289             : SymbolTable::SymbolTable()
     290             :     // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro.
     291        5788 :     : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {}
     292             : 
     293        5788 : SymbolTable::~SymbolTable() {
     294             :   // To avoid leaks into the symbol table, we expect all StatNames to be freed.
     295             :   // Note: this could potentially be short-circuited if we decide a fast exit
     296             :   // is needed in production. But it would be good to ensure clean up during
     297             :   // tests.
     298        5788 :   ASSERT(numSymbols() == 0);
     299        5788 : }
     300             : 
     301             : // TODO(ambuc): There is a possible performance optimization here for avoiding
     302             : // the encoding of IPs / numbers if they appear in stat names. We don't want to
     303             : // waste time symbolizing an integer as an integer, if we can help it.
     304      669264 : void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) {
     305      669264 :   if (name.empty()) {
     306       48665 :     return;
     307       48665 :   }
     308             : 
     309             :   // We want to hold the lock for the minimum amount of time, so we do the
     310             :   // string-splitting and prepare a temp vector of Symbol first.
     311      620599 :   const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.');
     312      620599 :   std::vector<Symbol> symbols;
     313      620599 :   symbols.reserve(tokens.size());
     314             : 
     315             :   // Now take the lock and populate the Symbol objects, which involves bumping
     316             :   // ref-counts in this.
     317      620599 :   {
     318      620599 :     Thread::LockGuard lock(lock_);
     319      620599 :     recent_lookups_.lookup(name);
     320      878747 :     for (auto& token : tokens) {
     321             :       // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with
     322             :       // length below some threshold, say 4 bytes. It might be preferable not to
     323             :       // reserve Symbols for every 3 digit number found (for example) in ipv4
     324             :       // addresses.
     325      878747 :       symbols.push_back(toSymbol(token));
     326      878747 :     }
     327      620599 :   }
     328             : 
     329             :   // Now efficiently encode the array of 32-bit symbols into a uint8_t array.
     330      620599 :   encoding.addSymbols(symbols);
     331      620599 : }
     332             : 
     333           0 : uint64_t SymbolTable::numSymbols() const {
     334           0 :   Thread::LockGuard lock(lock_);
     335           0 :   ASSERT(encode_map_.size() == decode_map_.size());
     336           0 :   return encode_map_.size();
     337           0 : }
     338             : 
     339     1047508 : std::string SymbolTable::toString(const StatName& stat_name) const {
     340     1047508 :   return absl::StrJoin(decodeStrings(stat_name), ".");
     341     1047508 : }
     342             : 
     343      924876 : void SymbolTable::incRefCount(const StatName& stat_name) {
     344             :   // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
     345      924876 :   const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
     346             : 
     347      924876 :   Thread::LockGuard lock(lock_);
     348     1255916 :   for (Symbol symbol : symbols) {
     349     1227890 :     auto decode_search = decode_map_.find(symbol);
     350             : 
     351     1227890 :     ASSERT(decode_search != decode_map_.end(),
     352     1227890 :            "Please see "
     353     1227890 :            "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
     354     1227890 :            "debugging-symbol-table-assertions");
     355     1227890 :     auto encode_search = encode_map_.find(decode_search->second->toStringView());
     356     1227890 :     ASSERT(encode_search != encode_map_.end(),
     357     1227890 :            "Please see "
     358     1227890 :            "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
     359     1227890 :            "debugging-symbol-table-assertions");
     360             : 
     361     1227890 :     ++encode_search->second.ref_count_;
     362     1227890 :   }
     363      924876 : }
     364             : 
     365     1594138 : void SymbolTable::free(const StatName& stat_name) {
     366             :   // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
     367     1594138 :   const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
     368             : 
     369     1594138 :   Thread::LockGuard lock(lock_);
     370     2146006 :   for (Symbol symbol : symbols) {
     371     2106635 :     auto decode_search = decode_map_.find(symbol);
     372     2106635 :     ASSERT(decode_search != decode_map_.end());
     373             : 
     374     2106635 :     auto encode_search = encode_map_.find(decode_search->second->toStringView());
     375     2106635 :     ASSERT(encode_search != encode_map_.end());
     376             : 
     377             :     // If that was the last remaining client usage of the symbol, erase the
     378             :     // current mappings and add the now-unused symbol to the reuse pool.
     379             :     //
     380             :     // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in
     381             :     // symbol_table_speed_test.cc, relative to breaking out the decrement into a
     382             :     // separate step, likely due to the non-trivial dereferences in EXPR.
     383     2106635 :     if (--encode_search->second.ref_count_ == 0) {
     384      247028 :       decode_map_.erase(decode_search);
     385      247028 :       encode_map_.erase(encode_search);
     386      247028 :       pool_.push(symbol);
     387      247028 :     }
     388     2106635 :   }
     389     1594138 : }
     390             : 
     391         205 : uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const {
     392         205 :   uint64_t total = 0;
     393         205 :   absl::flat_hash_map<std::string, uint64_t> name_count_map;
     394             : 
     395             :   // We don't want to hold lock_ while calling the iterator, but we need it to
     396             :   // access recent_lookups_, so we buffer in name_count_map.
     397         205 :   {
     398         205 :     Thread::LockGuard lock(lock_);
     399         205 :     recent_lookups_.forEach(
     400         205 :         [&name_count_map](absl::string_view str, uint64_t count)
     401         205 :             ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; });
     402         205 :     total += recent_lookups_.total();
     403         205 :   }
     404             : 
     405             :   // Now we have the collated name-count map data: we need to vectorize and
     406             :   // sort. We define the pair with the count first as std::pair::operator<
     407             :   // prioritizes its first element over its second.
     408         205 :   using LookupCount = std::pair<uint64_t, absl::string_view>;
     409         205 :   std::vector<LookupCount> lookup_data;
     410         205 :   lookup_data.reserve(name_count_map.size());
     411         205 :   for (const auto& iter : name_count_map) {
     412           0 :     lookup_data.emplace_back(LookupCount(iter.second, iter.first));
     413           0 :   }
     414         205 :   std::sort(lookup_data.begin(), lookup_data.end());
     415         205 :   for (const LookupCount& lookup_count : lookup_data) {
     416           0 :     iter(lookup_count.second, lookup_count.first);
     417           0 :   }
     418         205 :   return total;
     419         205 : }
     420             : 
     421         247 : DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const {
     422         247 :   DynamicSpans dynamic_spans;
     423             : 
     424         247 :   uint32_t index = 0;
     425        1374 :   auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) {
     426        1374 :     DynamicSpan span;
     427        1374 :     span.first = index;
     428        1374 :     index += std::count(str.begin(), str.end(), '.');
     429        1374 :     span.second = index;
     430        1374 :     ++index;
     431        1374 :     dynamic_spans.push_back(span);
     432        1374 :   };
     433             : 
     434             :   // Use decodeTokens to suss out which components of stat_name are
     435             :   // symbolic vs dynamic. The lambda that takes a Symbol is called
     436             :   // for symbolic components. The lambda called with a string_view
     437             :   // is called for dynamic components.
     438             :   //
     439             :   // Note that with fake symbol tables, the Symbol lambda is called
     440             :   // once for each character in the string, and no dynamics will
     441             :   // be recorded.
     442         247 :   Encoding::decodeTokens(
     443        4436 :       stat_name, [&index](Symbol) { ++index; }, record_dynamic);
     444         247 :   return dynamic_spans;
     445         247 : }
     446             : 
     447           0 : void SymbolTable::setRecentLookupCapacity(uint64_t capacity) {
     448           0 :   Thread::LockGuard lock(lock_);
     449           0 :   recent_lookups_.setCapacity(capacity);
     450           0 : }
     451             : 
     452           0 : void SymbolTable::clearRecentLookups() {
     453           0 :   Thread::LockGuard lock(lock_);
     454           0 :   recent_lookups_.clear();
     455           0 : }
     456             : 
     457           0 : uint64_t SymbolTable::recentLookupCapacity() const {
     458           0 :   Thread::LockGuard lock(lock_);
     459           0 :   return recent_lookups_.capacity();
     460           0 : }
     461             : 
     462         188 : StatNameSetPtr SymbolTable::makeSet(absl::string_view name) {
     463             :   // make_unique does not work with private ctor, even though SymbolTable is a friend.
     464         188 :   StatNameSetPtr stat_name_set(new StatNameSet(*this, name));
     465         188 :   return stat_name_set;
     466         188 : }
     467             : 
     468      878749 : Symbol SymbolTable::toSymbol(absl::string_view sv) {
     469      878749 :   Symbol result;
     470      878749 :   auto encode_find = encode_map_.find(sv);
     471             :   // If the string segment doesn't already exist,
     472      878749 :   if (encode_find == encode_map_.end()) {
     473             :     // We create the actual string, place it in the decode_map_, and then insert
     474             :     // a string_view pointing to it in the encode_map_. This allows us to only
     475             :     // store the string once. We use unique_ptr so copies are not made as
     476             :     // flat_hash_map moves values around.
     477      247028 :     InlineStringPtr str = InlineString::create(sv);
     478      247028 :     auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)});
     479      247028 :     ASSERT(encode_insert.second);
     480      247028 :     auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
     481      247028 :     ASSERT(decode_insert.second);
     482             : 
     483      247028 :     result = next_symbol_;
     484      247028 :     newSymbol();
     485      716317 :   } else {
     486             :     // If the insertion didn't take place, return the actual value at that location and up the
     487             :     // refcount at that location
     488      631721 :     result = encode_find->second.symbol_;
     489      631721 :     ++(encode_find->second.ref_count_);
     490      631721 :   }
     491      878749 :   return result;
     492      878749 : }
     493             : 
     494             : absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
     495     2507152 :     ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
     496     2507152 :   auto search = decode_map_.find(symbol);
     497     2507152 :   RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
     498     2507152 :   return search->second->toStringView();
     499     2507152 : }
     500             : 
     501      247028 : void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
     502      247028 :   if (pool_.empty()) {
     503      229390 :     next_symbol_ = ++monotonic_counter_;
     504      244133 :   } else {
     505       17638 :     next_symbol_ = pool_.top();
     506       17638 :     pool_.pop();
     507       17638 :   }
     508             :   // This should catch integer overflow for the new symbol.
     509      247028 :   ASSERT(monotonic_counter_ != 0);
     510      247028 : }
     511             : 
     512           0 : bool SymbolTable::lessThan(const StatName& a, const StatName& b) const {
     513             :   // Proactively take the table lock in anticipation that we'll need to
     514             :   // convert at least one symbol to a string_view, and it's easier not to
     515             :   // bother to lazily take the lock.
     516           0 :   Thread::LockGuard lock(lock_);
     517           0 :   return lessThanLockHeld(a, b);
     518           0 : }
     519             : 
     520             : bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const
     521           0 :     ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
     522           0 :   Encoding::TokenIter a_iter(a), b_iter(b);
     523           0 :   while (true) {
     524           0 :     Encoding::TokenIter::TokenType a_type = a_iter.next();
     525           0 :     Encoding::TokenIter::TokenType b_type = b_iter.next();
     526           0 :     if (b_type == Encoding::TokenIter::TokenType::End) {
     527           0 :       return false; // "x.y.z" > "x.y", "x.y" == "x.y"
     528           0 :     }
     529           0 :     if (a_type == Encoding::TokenIter::TokenType::End) {
     530           0 :       return true; // "x.y" < "x.y.z"
     531           0 :     }
     532           0 :     if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol &&
     533           0 :         a_iter.symbol() == b_iter.symbol()) {
     534           0 :       continue; // matching symbols don't need to be decoded to strings
     535           0 :     }
     536             : 
     537           0 :     absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol
     538           0 :                                     ? fromSymbol(a_iter.symbol())
     539           0 :                                     : a_iter.stringView();
     540           0 :     absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol
     541           0 :                                     ? fromSymbol(b_iter.symbol())
     542           0 :                                     : b_iter.stringView();
     543           0 :     if (a_token != b_token) {
     544           0 :       return a_token < b_token;
     545           0 :     }
     546           0 :   }
     547           0 : }
     548             : 
     549             : #ifndef ENVOY_CONFIG_COVERAGE
     550             : void SymbolTable::debugPrint() const {
     551             :   Thread::LockGuard lock(lock_);
     552             :   std::vector<Symbol> symbols;
     553             :   for (const auto& p : decode_map_) {
     554             :     symbols.push_back(p.first);
     555             :   }
     556             :   std::sort(symbols.begin(), symbols.end());
     557             :   for (Symbol symbol : symbols) {
     558             :     const InlineString& token = *decode_map_.find(symbol)->second;
     559             :     const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second;
     560             :     ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_);
     561             :   }
     562             : }
     563             : #endif
     564             : 
     565      669264 : SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) {
     566      669264 :   name = StringUtil::removeTrailingCharacters(name, '.');
     567      669264 :   Encoding encoding;
     568      669264 :   addTokensToEncoding(name, encoding);
     569      669264 :   MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired()));
     570      669264 :   encoding.moveToMemBlock(mem_block);
     571      669264 :   return mem_block.release();
     572      669264 : }
     573             : 
     574             : StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table)
     575      669264 :     : StatNameStorageBase(table.encode(name)) {}
     576             : 
     577       26156 : StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) {
     578       26156 :   const size_t size = src.size();
     579       26156 :   MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t.
     580       26156 :   src.copyToMemBlock(storage);
     581       26156 :   setBytes(storage.release());
     582       26156 :   table.incRefCount(statName());
     583       26156 : }
     584             : 
     585      258696 : SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) {
     586      258696 :   name = StringUtil::removeTrailingCharacters(name, '.');
     587             : 
     588             :   // For all StatName objects, we first have the total number of bytes in the
     589             :   // representation. But for inlined dynamic string StatName variants, we must
     590             :   // store the length of the payload separately, so that if this token gets
     591             :   // joined with others, we'll know much space it consumes until the next token.
     592             :   // So the layout is
     593             :   //   [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ]
     594             :   // So we need to figure out how many bytes we need to represent length-of-name
     595             :   // and name.
     596             : 
     597             :   // payload_bytes is the total number of bytes needed to represent the
     598             :   // characters in name, plus their encoded size, plus the literal indicator.
     599      258696 :   const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1;
     600             : 
     601             :   // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and
     602             :   // the length of those.
     603      258696 :   const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes);
     604      258696 :   MemBlockBuilder<uint8_t> mem_block(total_bytes);
     605             : 
     606      258696 :   Encoding::appendEncoding(payload_bytes, mem_block);
     607      258696 :   mem_block.appendOne(LiteralStringIndicator);
     608      258696 :   Encoding::appendEncoding(name.size(), mem_block);
     609      258696 :   mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size()));
     610      258696 :   ASSERT(mem_block.capacityRemaining() == 0);
     611      258696 :   return mem_block.release();
     612      258696 : }
     613             : 
     614     1784778 : StatNameStorage::~StatNameStorage() {
     615             :   // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to
     616             :   // decrement the reference counts held by the SymbolTable on behalf of
     617             :   // this StatName. This saves 8 bytes of storage per stat, relative to
     618             :   // holding a SymbolTable& in each StatNameStorage object.
     619     1784778 :   ASSERT(bytes() == nullptr);
     620     1784778 : }
     621             : 
     622      695418 : void StatNameStorage::free(SymbolTable& table) {
     623      695418 :   table.free(statName());
     624      695418 :   clear();
     625      695418 : }
     626             : 
     627       85929 : void StatNamePool::clear() {
     628      525608 :   for (StatNameStorage& storage : storage_vector_) {
     629      525056 :     storage.free(symbol_table_);
     630      525056 :   }
     631       85929 :   storage_vector_.clear();
     632       85929 : }
     633             : 
     634      525057 : const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) {
     635      525057 :   storage_vector_.push_back(Stats::StatNameStorage(str, symbol_table_));
     636      525057 :   return storage_vector_.back().bytes();
     637      525057 : }
     638             : 
     639      520168 : StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); }
     640             : 
     641       78299 : StatName StatNameDynamicPool::add(absl::string_view str) {
     642       78299 :   storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_));
     643       78299 :   return StatName(storage_vector_.back().bytes());
     644       78299 : }
     645             : 
     646        1443 : StatNameStorageSet::~StatNameStorageSet() {
     647             :   // free() must be called before destructing StatNameStorageSet to decrement
     648             :   // references to all symbols.
     649        1443 :   ASSERT(hash_set_.empty());
     650        1443 : }
     651             : 
     652        1443 : void StatNameStorageSet::free(SymbolTable& symbol_table) {
     653             :   // We must free() all symbols referenced in the set, otherwise the symbols
     654             :   // will leak when the flat_hash_map superclass is destructed. They cannot
     655             :   // self-destruct without an explicit free() as each individual StatNameStorage
     656             :   // object does not have a reference to the symbol table, which would waste 8
     657             :   // bytes per stat-name. The easiest way to safely free all the contents of the
     658             :   // symbol table set is to use flat_hash_map::extract(), which removes and
     659             :   // returns an element from the set without destructing the element
     660             :   // immediately. This gives us a chance to call free() on each one before they
     661             :   // are destroyed.
     662             :   //
     663             :   // There's a performance risk here, if removing elements via
     664             :   // flat_hash_set::begin() is inefficient to use in a loop like this. One can
     665             :   // imagine a hash-table implementation where the performance of this
     666             :   // usage-model would be poor. However, tests with 100k elements appeared to
     667             :   // run quickly when compiled for optimization, so at present this is not a
     668             :   // performance issue.
     669             : 
     670        1443 :   while (!hash_set_.empty()) {
     671           0 :     auto storage = hash_set_.extract(hash_set_.begin());
     672           0 :     storage.value().free(symbol_table);
     673           0 :   }
     674        1443 : }
     675             : 
     676     1049185 : SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const {
     677     1049185 :   size_t num_bytes = 0;
     678     2400464 :   for (StatName stat_name : stat_names) {
     679     2400464 :     if (!stat_name.empty()) {
     680     1235490 :       num_bytes += stat_name.dataSize();
     681     1235490 :     }
     682     2400464 :   }
     683     1049185 :   MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes));
     684     1049185 :   Encoding::appendEncoding(num_bytes, mem_block);
     685     2400464 :   for (StatName stat_name : stat_names) {
     686     2400464 :     stat_name.appendDataToMemBlock(mem_block);
     687     2400464 :   }
     688     1049185 :   ASSERT(mem_block.capacityRemaining() == 0);
     689     1049185 :   return mem_block.release();
     690     1049185 : }
     691             : 
     692      392190 : void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) {
     693      392190 :   RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded");
     694             : 
     695             :   // First encode all the names.
     696      392190 :   size_t total_size_bytes = 1; /* one byte for holding the number of names */
     697             : 
     698     1290910 :   for (uint32_t i = 0; i < num_names; ++i) {
     699      898720 :     total_size_bytes += names[i].size();
     700      898720 :   }
     701             : 
     702             :   // Now allocate the exact number of bytes required and move the encodings
     703             :   // into storage.
     704      392190 :   MemBlockBuilder<uint8_t> mem_block(total_size_bytes);
     705      392190 :   mem_block.appendOne(num_names);
     706     1290910 :   for (uint32_t i = 0; i < num_names; ++i) {
     707      898720 :     const StatName stat_name = names[i];
     708      898720 :     Encoding::appendToMemBlock(stat_name, mem_block);
     709      898720 :     incRefCount(stat_name);
     710      898720 :   }
     711             : 
     712             :   // This assertion double-checks the arithmetic where we computed
     713             :   // total_size_bytes. After appending all the encoded data into the
     714             :   // allocated byte array, we should have exhausted all the memory
     715             :   // we though we needed.
     716      392190 :   ASSERT(mem_block.capacityRemaining() == 0);
     717      392190 :   list.moveStorageIntoList(mem_block.release());
     718      392190 : }
     719             : 
     720      392190 : StatNameList::~StatNameList() { ASSERT(!populated()); }
     721             : 
     722     2843332 : void StatNameList::iterate(const std::function<bool(StatName)>& f) const {
     723     2843332 :   const uint8_t* p = &storage_[0];
     724     2843332 :   const uint32_t num_elements = *p++;
     725     3780628 :   for (uint32_t i = 0; i < num_elements; ++i) {
     726     3388434 :     const StatName stat_name(p);
     727     3388434 :     p += stat_name.size();
     728     3388434 :     if (!f(stat_name)) {
     729     2451138 :       break;
     730     2451138 :     }
     731     3388434 :   }
     732     2843332 : }
     733             : 
     734      392190 : void StatNameList::clear(SymbolTable& symbol_table) {
     735      898720 :   iterate([&symbol_table](StatName stat_name) -> bool {
     736      898720 :     symbol_table.free(stat_name);
     737      898720 :     return true;
     738      898720 :   });
     739      392190 :   storage_.reset();
     740      392190 : }
     741             : 
     742             : StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name)
     743         188 :     : name_(std::string(name)), pool_(symbol_table) {
     744         188 :   builtin_stat_names_[""] = StatName();
     745         188 : }
     746             : 
     747        1927 : void StatNameSet::rememberBuiltin(absl::string_view str) {
     748        1927 :   StatName stat_name;
     749        1927 :   {
     750        1927 :     absl::MutexLock lock(&mutex_);
     751        1927 :     stat_name = pool_.add(str);
     752        1927 :   }
     753        1927 :   builtin_stat_names_[str] = stat_name;
     754        1927 : }
     755             : 
     756       45705 : StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const {
     757             :   // If token was recorded as a built-in during initialization, we can
     758             :   // service this request lock-free.
     759       45705 :   const auto iter = builtin_stat_names_.find(token);
     760       45705 :   if (iter != builtin_stat_names_.end()) {
     761       27240 :     return iter->second;
     762       27240 :   }
     763       18465 :   return fallback;
     764       45705 : }
     765             : 
     766             : } // namespace Stats
     767             : } // namespace Envoy

Generated by: LCOV version 1.15