LCOV - code coverage report
Current view: top level - source/common/stats - symbol_table.h (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 66 92 71.7 %
Date: 2024-01-05 06:35:25 Functions: 33 51 64.7 %

          Line data    Source code
       1             : #pragma once
       2             : 
       3             : #include <algorithm>
       4             : #include <memory>
       5             : #include <stack>
       6             : #include <string>
       7             : #include <vector>
       8             : 
       9             : #include "source/common/common/assert.h"
      10             : #include "source/common/common/lock_guard.h"
      11             : #include "source/common/common/mem_block_builder.h"
      12             : #include "source/common/common/non_copyable.h"
      13             : #include "source/common/common/thread.h"
      14             : #include "source/common/common/utility.h"
      15             : #include "source/common/stats/recent_lookups.h"
      16             : 
      17             : #include "absl/container/fixed_array.h"
      18             : #include "absl/container/flat_hash_map.h"
      19             : #include "absl/container/inlined_vector.h"
      20             : #include "absl/strings/str_join.h"
      21             : #include "absl/strings/str_split.h"
      22             : 
      23             : namespace Envoy {
      24             : namespace Stats {
      25             : 
      26             : class StatName;
      27             : using StatNameVec = absl::InlinedVector<StatName, 8>;
      28             : class StatNameList;
      29             : class StatNameSet;
      30             : using StatNameSetPtr = std::unique_ptr<StatNameSet>;
      31             : 
      32             : /**
      33             :  * Holds a range of indexes indicating which parts of a stat-name are
      34             :  * dynamic. This is used to transfer stats from hot-restart parent to child,
      35             :  * retaining the same name structure.
      36             :  */
      37             : using DynamicSpan = std::pair<uint32_t, uint32_t>;
      38             : using DynamicSpans = std::vector<DynamicSpan>;
      39             : 
      40             : /** A Symbol represents a string-token with a small index. */
      41             : using Symbol = uint32_t;
      42             : 
      43             : /** Transient representations of a vector of 32-bit symbols */
      44             : using SymbolVec = std::vector<Symbol>;
      45             : 
      46             : /**
      47             :  * SymbolTableImpl manages a namespace optimized for stats, which are typically
      48             :  * composed of arrays of "."-separated tokens, with a significant overlap
      49             :  * between the tokens. Each token is mapped to a Symbol (uint32_t) and
      50             :  * reference-counted so that no-longer-used symbols can be reclaimed.
      51             :  *
      52             :  * We use a uint8_t array to encode a "."-deliminated stat-name into arrays of
      53             :  * integer symbol IDs in order to conserve space, as in practice the
      54             :  * majority of token instances in stat names draw from a fairly small set of
      55             :  * common names, typically less than 100. The format is somewhat similar to
      56             :  * UTF-8, with a variable-length array of uint8_t. See the implementation for
      57             :  * details.
      58             :  *
      59             :  * StatNameStorage can be used to manage memory for the byte-encoding. Not all
      60             :  * StatNames are backed by StatNameStorage -- the storage may be inlined into
      61             :  * another object such as HeapStatData. StaNameStorage is not fully RAII --
      62             :  * instead the owner must call free(SymbolTable&) explicitly before
      63             :  * StatNameStorage is destructed. This saves 8 bytes of storage per stat,
      64             :  * relative to holding a SymbolTable& in each StatNameStorage object.
      65             :  *
      66             :  * A StatName is a copyable and assignable reference to this storage. It does
      67             :  * not own the storage or keep it alive via reference counts; the owner must
      68             :  * ensure the backing store lives as long as the StatName.
      69             :  *
      70             :  * The underlying Symbol / SymbolVec data structures are private to the
      71             :  * impl. One side effect of the non-monotonically-increasing symbol counter is
      72             :  * that if a string is encoded, the resulting stat is destroyed, and then that
      73             :  * same string is re-encoded, it may or may not encode to the same underlying
      74             :  * symbol.
      75             :  */
      76             : class SymbolTable final {
      77             : public:
      78             :   /**
      79             :    * Efficient byte-encoded storage of an array of tokens. The most common
      80             :    * tokens are typically < 127, and are represented directly. tokens >= 128
      81             :    * spill into the next byte, allowing for tokens of arbitrary numeric value to
      82             :    * be stored. As long as the most common tokens are low-valued, the
      83             :    * representation is space-efficient. This scheme is similar to UTF-8. The
      84             :    * token ordering is dependent on the order in which stat-names are encoded
      85             :    * into the SymbolTable, which will not be optimal, but in practice appears
      86             :    * to be pretty good.
      87             :    *
      88             :    * This is exposed in the interface for the benefit of join(), which is
      89             :    * used in the hot-path to append two stat-names into a temp without taking
      90             :    * locks. This is used then in thread-local cache lookup, so that once warm,
      91             :    * no locks are taken when looking up stats.
      92             :    */
      93             :   using Storage = uint8_t[];
      94             :   using StoragePtr = std::unique_ptr<Storage>;
      95             : 
      96             :   /**
      97             :    * Intermediate representation for a stat-name. This helps store multiple
      98             :    * names in a single packed allocation. First we encode each desired name,
      99             :    * then sum their sizes for the single packed allocation. This is used to
     100             :    * store MetricImpl's tags and tagExtractedName.
     101             :    */
     102             :   class Encoding {
     103             :   public:
     104             :     /**
     105             :      * Before destructing SymbolEncoding, you must call moveToMemBlock. This
     106             :      * transfers ownership, and in particular, the responsibility to call
     107             :      * SymbolTable::clear() on all referenced symbols. If we ever wanted to be
     108             :      * able to destruct a SymbolEncoding without transferring it we could add a
     109             :      * clear(SymbolTable&) method.
     110             :      */
     111             :     ~Encoding();
     112             : 
     113             :     /**
     114             :      * Encodes a token into the vec.
     115             :      *
     116             :      * @param symbol the symbol to encode.
     117             :      */
     118             :     void addSymbols(const SymbolVec& symbols);
     119             : 
     120             :     /**
     121             :      * Decodes a uint8_t array into a SymbolVec.
     122             :      */
     123             :     static SymbolVec decodeSymbols(StatName stat_name);
     124             : 
     125             :     /**
     126             :      * Decodes a uint8_t array into a sequence of symbols and literal strings.
     127             :      * There are distinct lambdas for these two options. Calls to these lambdas
     128             :      * will be interleaved based on the sequencing of literal strings and
     129             :      * symbols held in the data.
     130             :      *
     131             :      * @param array the StatName encoded as a uint8_t array.
     132             :      * @param size the size of the array in bytes.
     133             :      * @param symbol_token_fn a function to be called whenever a symbol is encountered in the array.
     134             :      * @param string_view_token_fn a function to be called whenever a string literal is encountered.
     135             :      */
     136             :     static void decodeTokens(StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
     137             :                              const std::function<void(absl::string_view)>& string_view_token_fn);
     138             : 
     139             :     /**
     140             :      * Returns the number of bytes required to represent StatName as a uint8_t
     141             :      * array, including the encoded size.
     142             :      */
     143      669263 :     size_t bytesRequired() const {
     144      669263 :       return data_bytes_required_ + encodingSizeBytes(data_bytes_required_);
     145      669263 :     }
     146             : 
     147             :     /**
     148             :      * Moves the contents of the vector into an allocated array. The array
     149             :      * must have been allocated with bytesRequired() bytes.
     150             :      *
     151             :      * @param mem_block_builder memory block to receive the encoded bytes.
     152             :      */
     153             :     void moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder);
     154             : 
     155             :     /**
     156             :      * @param number A number to encode in a variable length byte-array.
     157             :      * @return The number of bytes it would take to encode the number.
     158             :      */
     159             :     static size_t encodingSizeBytes(uint64_t number);
     160             : 
     161             :     /**
     162             :      * @param num_data_bytes The number of bytes in a data-block.
     163             :      * @return The total number of bytes required for the data-block and its encoded size.
     164             :      */
     165     7421501 :     static size_t totalSizeBytes(size_t num_data_bytes) {
     166     7421501 :       return encodingSizeBytes(num_data_bytes) + num_data_bytes;
     167     7421501 :     }
     168             : 
     169             :     /**
     170             :      * Saves the specified number into the byte array, returning the next byte.
     171             :      * There is no guarantee that bytes will be aligned, so we can't cast to a
     172             :      * uint16_t* and assign, but must individually copy the bytes.
     173             :      *
     174             :      * Requires that the buffer be sized to accommodate encodingSizeBytes(number).
     175             :      *
     176             :      * @param number the number to write.
     177             :      * @param mem_block the memory into which to append the number.
     178             :      */
     179             :     static void appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block);
     180             : 
     181             :     /**
     182             :      * Appends stat_name's bytes into mem_block, which must have been allocated to
     183             :      * allow for stat_name.size() bytes.
     184             :      *
     185             :      * @param stat_name the stat_name to append.
     186             :      * @param mem_block the block of memory to append to.
     187             :      */
     188             :     static void appendToMemBlock(StatName stat_name, MemBlockBuilder<uint8_t>& mem_block);
     189             : 
     190             :     /**
     191             :      * Decodes a byte-array containing a variable-length number.
     192             :      *
     193             :      * @param The encoded byte array, written previously by appendEncoding.
     194             :      * @return A pair containing the decoded number, and the number of bytes consumed from encoding.
     195             :      */
     196             :     static std::pair<uint64_t, size_t> decodeNumber(const uint8_t* encoding);
     197             : 
     198             :   private:
     199             :     friend class StatName;
     200             :     friend class SymbolTable;
     201             :     class TokenIter;
     202             : 
     203             :     size_t data_bytes_required_{0};
     204             :     MemBlockBuilder<uint8_t> mem_block_;
     205             :   };
     206             : 
     207             :   SymbolTable();
     208             :   ~SymbolTable();
     209             : 
     210             :   /**
     211             :    * Decodes a vector of symbols back into its period-delimited stat name. If
     212             :    * decoding fails on any part of the symbol_vec, we release_assert and crash,
     213             :    * since this should never happen, and we don't want to continue running
     214             :    * with a corrupt stats set.
     215             :    *
     216             :    * @param stat_name the stat name.
     217             :    * @return std::string stringified stat_name.
     218             :    */
     219             :   std::string toString(const StatName& stat_name) const;
     220             : 
     221             :   /**
     222             :    * @return uint64_t the number of symbols in the symbol table.
     223             :    */
     224             :   uint64_t numSymbols() const;
     225             : 
     226             :   /**
     227             :    * Determines whether one StatName lexically precedes another. Note that
     228             :    * the lexical order may not exactly match the lexical order of the
     229             :    * elaborated strings. For example, stat-name of "-.-" would lexically
     230             :    * sort after "---" but when encoded as a StatName would come lexically
     231             :    * earlier. In practice this is unlikely to matter as those are not
     232             :    * reasonable names for Envoy stats.
     233             :    *
     234             :    * Note that this operation has to be performed with the context of the
     235             :    * SymbolTable so that the individual Symbol objects can be converted
     236             :    * into strings for lexical comparison.
     237             :    *
     238             :    * @param a the first stat name
     239             :    * @param b the second stat name
     240             :    * @return bool true if a lexically precedes b.
     241             :    */
     242             :   bool lessThan(const StatName& a, const StatName& b) const;
     243             : 
     244             :   /**
     245             :    * Joins two or more StatNames. For example if we have StatNames for {"a.b",
     246             :    * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The
     247             :    * advantage of using this representation is that it avoids having to
     248             :    * decode/encode into the elaborated form, and does not require locking the
     249             :    * SymbolTable.
     250             :    *
     251             :    * Note that this method does not bump reference counts on the referenced
     252             :    * Symbols in the SymbolTable, so it's only valid as long for the lifetime of
     253             :    * the joined StatNames.
     254             :    *
     255             :    * This is intended for use doing cached name lookups of scoped stats, where
     256             :    * the scope prefix and the names to combine it with are already in StatName
     257             :    * form. Using this class, they can be combined without accessing the
     258             :    * SymbolTable or, in particular, taking its lock.
     259             :    *
     260             :    * @param stat_names the names to join.
     261             :    * @return Storage allocated for the joined name.
     262             :    */
     263             :   StoragePtr join(const StatNameVec& stat_names) const;
     264             : 
     265             :   /**
     266             :    * Populates a StatNameList from a list of encodings. This is not done at
     267             :    * construction time to enable StatNameList to be instantiated directly in
     268             :    * a class that doesn't have a live SymbolTable when it is constructed.
     269             :    *
     270             :    * @param names A pointer to the first name in an array, allocated by the caller.
     271             :    * @param num_names The number of names.
     272             :    * @param list The StatNameList representing the stat names.
     273             :    */
     274             :   void populateList(const StatName* names, uint32_t num_names, StatNameList& list);
     275             : 
     276             : #ifndef ENVOY_CONFIG_COVERAGE
     277             :   void debugPrint() const;
     278             : #endif
     279             : 
     280             :   /**
     281             :    * Creates a StatNameSet.
     282             :    *
     283             :    * @param name the name of the set.
     284             :    * @return the set.
     285             :    */
     286             :   StatNameSetPtr makeSet(absl::string_view name);
     287             : 
     288             :   using RecentLookupsFn = std::function<void(absl::string_view, uint64_t)>;
     289             : 
     290             :   /**
     291             :    * Calls the provided function with the name of the most recently looked-up
     292             :    * symbols, including lookups on any StatNameSets, and with a count of
     293             :    * the recent lookups on that symbol.
     294             :    *
     295             :    * @param iter the function to call for every recent item.
     296             :    */
     297             :   uint64_t getRecentLookups(const RecentLookupsFn&) const;
     298             : 
     299             :   /**
     300             :    * Clears the recent-lookups structures.
     301             :    */
     302             :   void clearRecentLookups();
     303             : 
     304             :   /**
     305             :    * Sets the recent-lookup capacity.
     306             :    */
     307             :   void setRecentLookupCapacity(uint64_t capacity);
     308             : 
     309             :   /**
     310             :    * @return The configured recent-lookup tracking capacity.
     311             :    */
     312             :   uint64_t recentLookupCapacity() const;
     313             : 
     314             :   /**
     315             :    * Identifies the dynamic components of a stat_name into an array of integer
     316             :    * pairs, indicating the begin/end of spans of tokens in the stat-name that
     317             :    * are created from StatNameDynamicStore or StatNameDynamicPool.
     318             :    *
     319             :    * This can be used to reconstruct the same exact StatNames in
     320             :    * StatNames::mergeStats(), to enable stat continuity across hot-restart.
     321             :    *
     322             :    * @param stat_name the input stat name.
     323             :    * @return the array of pairs indicating the bounds.
     324             :    */
     325             :   DynamicSpans getDynamicSpans(StatName stat_name) const;
     326             : 
     327             :   bool lessThanLockHeld(const StatName& a, const StatName& b) const;
     328             : 
     329             :   template <class GetStatName, class Obj> struct StatNameCompare {
     330             :     StatNameCompare(const SymbolTable& symbol_table, GetStatName getter)
     331             :         : symbol_table_(symbol_table), getter_(getter) {}
     332             : 
     333             :     bool operator()(const Obj& a, const Obj& b) const;
     334             : 
     335             :     const SymbolTable& symbol_table_;
     336             :     GetStatName getter_;
     337             :   };
     338             : 
     339             :   /**
     340             :    * Sorts a range by StatName. This API is more efficient than
     341             :    * calling std::sort directly as it takes a single lock for the
     342             :    * entire sort, rather than locking on each comparison.
     343             :    *
     344             :    * @param begin the beginning of the range to sort
     345             :    * @param end the end of the range to sort
     346             :    * @param get_stat_name a functor that takes an Obj and returns a StatName.
     347             :    */
     348             :   template <class Obj, class Iter, class GetStatName>
     349             :   void sortByStatNames(Iter begin, Iter end, GetStatName get_stat_name) const {
     350             :     // Grab the lock once before sorting begins, so we don't have to re-take
     351             :     // it on every comparison.
     352             :     Thread::LockGuard lock(lock_);
     353             :     StatNameCompare<GetStatName, Obj> compare(*this, get_stat_name);
     354             :     std::sort(begin, end, compare);
     355             :   }
     356             : 
     357             : private:
     358             :   friend class StatName;
     359             :   friend class StatNameTest;
     360             :   friend class StatNameDeathTest;
     361             :   friend class StatNameDynamicStorage;
     362             :   friend class StatNameList;
     363             :   friend class StatNameStorage;
     364             : 
     365             :   /**
     366             :    * Encodes 'name' into the symbol table. Bumps reference counts for referenced
     367             :    * symbols. The caller must manage the storage, and is responsible for calling
     368             :    * SymbolTable::free() to release the reference counts.
     369             :    *
     370             :    * @param name The name to encode.
     371             :    * @return The encoded name, transferring ownership to the caller.
     372             :    *
     373             :    */
     374             :   StoragePtr encode(absl::string_view name);
     375             :   StoragePtr makeDynamicStorage(absl::string_view name);
     376             : 
     377             :   /**
     378             :    * Since SymbolTable does manual reference counting, a client of SymbolTable
     379             :    * must manually call free(symbol_vec) when it is freeing the backing store
     380             :    * for a StatName. This way, the symbol table will grow and shrink
     381             :    * dynamically, instead of being write-only.
     382             :    *
     383             :    * @param stat_name the stat name.
     384             :    */
     385             :   void free(const StatName& stat_name);
     386             : 
     387             :   /**
     388             :    * StatName backing-store can be managed by callers in a variety of ways
     389             :    * to minimize overhead. But any persistent reference to a StatName needs
     390             :    * to hold onto its own reference-counts for all symbols. This method
     391             :    * helps callers ensure the symbol-storage is maintained for the lifetime
     392             :    * of a reference.
     393             :    *
     394             :    * @param stat_name the stat name.
     395             :    */
     396             :   void incRefCount(const StatName& stat_name);
     397             : 
     398             :   struct SharedSymbol {
     399      247028 :     SharedSymbol(Symbol symbol) : symbol_(symbol) {}
     400             : 
     401             :     Symbol symbol_;
     402             :     uint32_t ref_count_{1};
     403             :   };
     404             : 
     405             :   // This must be held during both encode() and free().
     406             :   mutable Thread::MutexBasicLockable lock_;
     407             : 
     408             :   /**
     409             :    * Decodes a uint8_t array into an array of period-delimited strings. Note
     410             :    * that some of the strings may have periods in them, in the case where
     411             :    * StatNameDynamicStorage was used.
     412             :    *
     413             :    * If decoding fails on any part of the encoding, we RELEASE_ASSERT and crash,
     414             :    * since this should never happen, and we don't want to continue running with
     415             :    * corrupt stat names.
     416             :    *
     417             :    * @param array the uint8_t array of encoded symbols and dynamic strings.
     418             :    * @param size the size of the array in bytes.
     419             :    * @return std::string the retrieved stat name.
     420             :    */
     421             :   std::vector<absl::string_view> decodeStrings(StatName stat_name) const;
     422             : 
     423             :   /**
     424             :    * Convenience function for encode(), symbolizing one string segment at a time.
     425             :    *
     426             :    * @param sv the individual string to be encoded as a symbol.
     427             :    * @return Symbol the encoded string.
     428             :    */
     429             :   Symbol toSymbol(absl::string_view sv) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
     430             : 
     431             :   /**
     432             :    * Convenience function for decode(), decoding one symbol at a time.
     433             :    *
     434             :    * @param symbol the individual symbol to be decoded.
     435             :    * @return absl::string_view the decoded string.
     436             :    */
     437             :   absl::string_view fromSymbol(Symbol symbol) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
     438             : 
     439             :   /**
     440             :    * Stages a new symbol for use. To be called after a successful insertion.
     441             :    */
     442             :   void newSymbol();
     443             : 
     444             :   /**
     445             :    * Tokenizes name, finds or allocates symbols for each token, and adds them
     446             :    * to encoding.
     447             :    *
     448             :    * @param name The name to tokenize.
     449             :    * @param encoding The encoding to write to.
     450             :    */
     451             :   void addTokensToEncoding(absl::string_view name, Encoding& encoding);
     452             : 
     453           0 :   Symbol monotonicCounter() {
     454           0 :     Thread::LockGuard lock(lock_);
     455           0 :     return monotonic_counter_;
     456           0 :   }
     457             : 
     458             :   // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so
     459             :   // that if insertion succeeds, the value written is the correct one.
     460             :   Symbol next_symbol_ ABSL_GUARDED_BY(lock_);
     461             : 
     462             :   // If the free pool is exhausted, we monotonically increase this counter.
     463             :   Symbol monotonic_counter_;
     464             : 
     465             :   // Bitmap implementation.
     466             :   // The encode map stores both the symbol and the ref count of that symbol.
     467             :   // Using absl::string_view lets us only store the complete string once, in the decode map.
     468             :   using EncodeMap = absl::flat_hash_map<absl::string_view, SharedSymbol>;
     469             :   using DecodeMap = absl::flat_hash_map<Symbol, InlineStringPtr>;
     470             :   EncodeMap encode_map_ ABSL_GUARDED_BY(lock_);
     471             :   DecodeMap decode_map_ ABSL_GUARDED_BY(lock_);
     472             : 
     473             :   // Free pool of symbols for re-use.
     474             :   // TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
     475             :   // using an Envoy::IntervalSet.
     476             :   std::stack<Symbol> pool_ ABSL_GUARDED_BY(lock_);
     477             :   RecentLookups recent_lookups_ ABSL_GUARDED_BY(lock_);
     478             : };
     479             : 
     480             : // Base class for holding the backing-storing for a StatName. The two derived
     481             : // classes, StatNameStorage and StatNameDynamicStorage, share a need to hold an
     482             : // array of bytes, but use different representations.
     483             : class StatNameStorageBase {
     484             : public:
     485      927955 :   StatNameStorageBase(SymbolTable::StoragePtr&& bytes) : bytes_(std::move(bytes)) {}
     486       26156 :   StatNameStorageBase() = default;
     487             : 
     488             :   /**
     489             :    * @return a reference to the owned storage.
     490             :    */
     491             :   inline StatName statName() const;
     492             : 
     493             :   /**
     494             :    * @return the encoded data as a const pointer.
     495             :    */
     496      603356 :   const uint8_t* bytes() const { return bytes_.get(); }
     497             : 
     498             : protected:
     499       26156 :   void setBytes(SymbolTable::StoragePtr&& bytes) { bytes_ = std::move(bytes); }
     500      695419 :   void clear() { bytes_.reset(); }
     501             : 
     502             : private:
     503             :   SymbolTable::StoragePtr bytes_;
     504             : };
     505             : 
     506             : /**
     507             :  * Holds backing storage for a StatName. Usage of this is not required, as some
     508             :  * applications may want to hold multiple StatName objects in one contiguous
     509             :  * uint8_t array, or embed the characters directly in another structure.
     510             :  *
     511             :  * This is intended for embedding in other data structures that have access
     512             :  * to a SymbolTable. StatNameStorage::free(symbol_table) must be called prior
     513             :  * to allowing the StatNameStorage object to be destructed, otherwise an assert
     514             :  * will fire to guard against symbol-table leaks.
     515             :  *
     516             :  * Thus this class is inconvenient to directly use as temp storage for building
     517             :  * a StatName from a string. Instead it should be used via StatNameManagedStorage.
     518             :  */
     519             : class StatNameStorage : public StatNameStorageBase {
     520             : public:
     521             :   // Basic constructor for when you have a name as a string, and need to
     522             :   // generate symbols for it.
     523             :   StatNameStorage(absl::string_view name, SymbolTable& table);
     524             : 
     525             :   // Move constructor; needed for using StatNameStorage as an
     526             :   // absl::flat_hash_map value.
     527     1089359 :   StatNameStorage(StatNameStorage&& src) noexcept : StatNameStorageBase(std::move(src)) {}
     528             : 
     529             :   // Obtains new backing storage for an already existing StatName. Used to
     530             :   // record a computed StatName held in a temp into a more persistent data
     531             :   // structure.
     532             :   StatNameStorage(StatName src, SymbolTable& table);
     533             : 
     534             :   /**
     535             :    * Before allowing a StatNameStorage to be destroyed, you must call free()
     536             :    * on it, to drop the references to the symbols, allowing the SymbolTable
     537             :    * to shrink.
     538             :    */
     539             :   ~StatNameStorage();
     540             : 
     541             :   /**
     542             :    * Decrements the reference counts in the SymbolTable.
     543             :    *
     544             :    * @param table the symbol table.
     545             :    */
     546             :   void free(SymbolTable& table);
     547             : };
     548             : 
     549             : /**
     550             :  * Efficiently represents a stat name using a variable-length array of uint8_t.
     551             :  * This class does not own the backing store for this array; the backing-store
     552             :  * can be held in StatNameStorage, or it can be packed more tightly into another
     553             :  * object.
     554             :  *
     555             :  * When Envoy is configured with a large numbers of clusters, there are a huge
     556             :  * number of StatNames, so avoiding extra per-stat pointers has a significant
     557             :  * memory impact.
     558             :  */
     559             : class StatName {
     560             : public:
     561             :   // Constructs a StatName object directly referencing the storage of another
     562             :   // StatName.
     563     7256925 :   explicit StatName(const SymbolTable::Storage size_and_data) : size_and_data_(size_and_data) {}
     564             : 
     565             :   // Constructs an empty StatName object.
     566     5041340 :   StatName() = default;
     567             : 
     568             :   /**
     569             :    * Defines default hash function so StatName can be used as a key in an absl
     570             :    * hash-table without specifying a functor. See
     571             :    * https://abseil.io/docs/cpp/guides/hash for details.
     572             :    */
     573     2769779 :   template <typename H> friend H AbslHashValue(H h, StatName stat_name) {
     574     2769779 :     if (stat_name.empty()) {
     575       11750 :       return H::combine(std::move(h), absl::string_view());
     576       11750 :     }
     577             : 
     578     2758029 :     return H::combine(std::move(h), stat_name.dataAsStringView());
     579     2769779 :   }
     580             : 
     581             :   /**
     582             :    * Note that this hash function will return a different hash than that of
     583             :    * the elaborated string.
     584             :    *
     585             :    * @return uint64_t a hash of the underlying representation.
     586             :    */
     587     1411645 :   uint64_t hash() const { return absl::Hash<StatName>()(*this); }
     588             : 
     589      481310 :   bool operator==(const StatName& rhs) const {
     590      481310 :     return dataAsStringView() == rhs.dataAsStringView();
     591      481310 :   }
     592           0 :   bool operator!=(const StatName& rhs) const { return !(*this == rhs); }
     593             : 
     594             :   /**
     595             :    * @return size_t the number of bytes in the symbol array, excluding the
     596             :    *                overhead for the size itself.
     597             :    */
     598             :   size_t dataSize() const;
     599             : 
     600             :   /**
     601             :    * @return size_t the number of bytes in the symbol array, including the
     602             :    *                  overhead for the size itself.
     603             :    */
     604     5152085 :   size_t size() const { return SymbolTable::Encoding::totalSizeBytes(dataSize()); }
     605             : 
     606             :   /**
     607             :    * Copies the entire StatName representation into a MemBlockBuilder, including
     608             :    * the length metadata at the beginning. The MemBlockBuilder must not have
     609             :    * any other data in it.
     610             :    *
     611             :    * @param mem_block_builder the builder to receive the storage.
     612             :    */
     613       26156 :   void copyToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder) {
     614       26156 :     ASSERT(mem_block_builder.size() == 0);
     615       26156 :     mem_block_builder.appendData(absl::MakeSpan(size_and_data_, size()));
     616       26156 :   }
     617             : 
     618             :   /**
     619             :    * Appends the data portion of the StatName representation into a
     620             :    * MemBlockBuilder, excluding the length metadata. This is appropriate for
     621             :    * join(), where several stat-names are combined, and we only need the
     622             :    * aggregated length metadata.
     623             :    *
     624             :    * @param mem_block_builder the builder to receive the storage.
     625             :    */
     626     2400464 :   void appendDataToMemBlock(MemBlockBuilder<uint8_t>& storage) {
     627     2400464 :     storage.appendData(absl::MakeSpan(data(), dataSize()));
     628     2400464 :   }
     629             : 
     630             : #ifndef ENVOY_CONFIG_COVERAGE
     631             :   void debugPrint();
     632             : #endif
     633             : 
     634             :   /**
     635             :    * @return A pointer to the first byte of data (skipping over size bytes).
     636             :    */
     637     6121114 :   const uint8_t* data() const {
     638     6121114 :     if (size_and_data_ == nullptr) {
     639      357706 :       return nullptr;
     640      357706 :     }
     641     5763408 :     return size_and_data_ + SymbolTable::Encoding::encodingSizeBytes(dataSize());
     642     6121114 :   }
     643             : 
     644     4465345 :   const uint8_t* dataIncludingSize() const { return size_and_data_; }
     645             : 
     646             :   /**
     647             :    * @return whether this is empty.
     648             :    */
     649     5217439 :   bool empty() const { return size_and_data_ == nullptr || dataSize() == 0; }
     650             : 
     651             :   /**
     652             :    * Determines whether this starts with the prefix. Note: dynamic segments
     653             :    * are not supported in the current implementation; this matching only works
     654             :    * for symbolic segments. However it OK for this to include dynamic segments
     655             :    * following the prefix.
     656             :    *
     657             :    * @param symbolic_prefix the prefix, which must not contain dynamic segments.
     658             :    */
     659             :   bool startsWith(StatName symbolic_prefix) const;
     660             : 
     661             : private:
     662             :   /**
     663             :    * Casts the raw data as a string_view. Note that this string_view will not
     664             :    * be in human-readable form, but it will be compatible with a string-view
     665             :    * hasher and comparator.
     666             :    */
     667     3720650 :   absl::string_view dataAsStringView() const {
     668     3720650 :     return {reinterpret_cast<const char*>(data()),
     669     3720650 :             static_cast<absl::string_view::size_type>(dataSize())};
     670     3720650 :   }
     671             : 
     672             :   const uint8_t* size_and_data_{nullptr};
     673             : };
     674             : 
     675     1543559 : StatName StatNameStorageBase::statName() const { return StatName(bytes_.get()); }
     676             : 
     677             : /**
     678             :  * Contains the backing store for a StatName and enough context so it can
     679             :  * self-delete through RAII. This works by augmenting StatNameStorage with a
     680             :  * reference to the SymbolTable&, so it has an extra 8 bytes of footprint. It
     681             :  * is intended to be used in cases where simplicity of implementation is more
     682             :  * important than byte-savings, for example:
     683             :  *   - outside the stats system
     684             :  *   - in tests
     685             :  *   - as a scoped temp in a function
     686             :  * Due to the extra 8 bytes per instance, scalability should be taken into
     687             :  * account before using this as (say) a value or key in a map. In those
     688             :  * scenarios, it would be better to store the SymbolTable reference once
     689             :  * for the entire map.
     690             :  *
     691             :  * In the stat structures, we generally use StatNameStorage to avoid the
     692             :  * per-stat overhead.
     693             :  */
     694             : class StatNameManagedStorage : public StatNameStorage {
     695             : public:
     696             :   // Basic constructor for when you have a name as a string, and need to
     697             :   // generate symbols for it.
     698             :   StatNameManagedStorage(absl::string_view name, SymbolTable& table)
     699      144207 :       : StatNameStorage(name, table), symbol_table_(table) {}
     700             :   StatNameManagedStorage(StatNameManagedStorage&& src) noexcept
     701           0 :       : StatNameStorage(std::move(src)), symbol_table_(src.symbol_table_) {}
     702             :   StatNameManagedStorage(StatName src, SymbolTable& table) noexcept
     703           0 :       : StatNameStorage(src, table), symbol_table_(table) {}
     704             : 
     705      144207 :   ~StatNameManagedStorage() { free(symbol_table_); } // NOLINT(clang-analyzer-unix.Malloc)
     706             : 
     707             : private:
     708             :   SymbolTable& symbol_table_;
     709             : };
     710             : 
     711             : /**
     712             :  * Holds backing-store for a dynamic stat, where are no global locks needed
     713             :  * to create a StatName from a string, but there is no sharing of token data
     714             :  * between names, so there may be more memory consumed.
     715             :  */
     716             : class StatNameDynamicStorage : public StatNameStorageBase {
     717             : public:
     718             :   // Basic constructor based on a name. Note the table is used for a call to
     719             :   // encode the name, but no locks are taken in either implementation of the
     720             :   // SymbolTable api.
     721             :   StatNameDynamicStorage(absl::string_view name, SymbolTable& table)
     722      258696 :       : StatNameStorageBase(table.makeDynamicStorage(name)) {}
     723             :   // Move constructor.
     724             :   StatNameDynamicStorage(StatNameDynamicStorage&& src) noexcept
     725      175229 :       : StatNameStorageBase(std::move(src)) {}
     726             : };
     727             : 
     728             : /**
     729             :  * Maintains storage for a collection of StatName objects. Like
     730             :  * StatNameManagedStorage, this has an RAII usage model, taking
     731             :  * care of decrementing ref-counts in the SymbolTable for all
     732             :  * contained StatNames on destruction or on clear();
     733             :  *
     734             :  * Example usage:
     735             :  *   StatNamePool pool(symbol_table);
     736             :  *   StatName name1 = pool.add("name1");
     737             :  *   StatName name2 = pool.add("name2");
     738             :  *   const uint8_t* storage = pool.addReturningStorage("name3");
     739             :  *   StatName name3(storage);
     740             :  */
     741             : class StatNamePool {
     742             : public:
     743       85929 :   explicit StatNamePool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
     744       85929 :   ~StatNamePool() { clear(); }
     745             : 
     746             :   /**
     747             :    * Removes all StatNames from the pool.
     748             :    */
     749             :   void clear();
     750             : 
     751             :   /**
     752             :    * @param name the name to add the container.
     753             :    * @return the StatName held in the container for this name.
     754             :    */
     755             :   StatName add(absl::string_view name);
     756             : 
     757             :   /**
     758             :    * Does essentially the same thing as add(), but returns the storage as a
     759             :    * pointer which can later be used to create a StatName. This can be used
     760             :    * to accumulate a vector of uint8_t* which can later be used to create
     761             :    * StatName objects on demand.
     762             :    *
     763             :    * The use-case for this is in source/common/http/codes.cc, where we have a
     764             :    * fixed sized array of atomic pointers, indexed by HTTP code. This API
     765             :    * enables storing the allocated stat-name in that array of atomics, which
     766             :    * enables content-avoidance when finding StatNames for frequently used HTTP
     767             :    * codes.
     768             :    *
     769             :    * @param name the name to add the container.
     770             :    * @return a pointer to the bytes held in the container for this name, suitable for
     771             :    *         using to construct a StatName.
     772             :    */
     773             :   const uint8_t* addReturningStorage(absl::string_view name);
     774             : 
     775             : private:
     776             :   // We keep the stat names in a vector of StatNameStorage, storing the
     777             :   // SymbolTable reference separately. This saves 8 bytes per StatName,
     778             :   // at the cost of having a destructor that calls clear().
     779             :   SymbolTable& symbol_table_;
     780             :   std::vector<StatNameStorage> storage_vector_;
     781             : };
     782             : 
     783             : /**
     784             :  * Maintains storage for a collection of StatName objects constructed from
     785             :  * dynamically discovered strings. Like StatNameDynamicStorage, this has an RAII
     786             :  * usage model. Creating StatNames with this interface do not incur a
     787             :  * SymbolTable lock, but tokens are not shared across StatNames.
     788             :  *
     789             :  * The SymbolTable is required as a constructor argument to assist in encoding
     790             :  * the stat-names.
     791             :  *
     792             :  * Example usage:
     793             :  *   StatNameDynamicPool pool(symbol_table);
     794             :  *   StatName name1 = pool.add("name1");
     795             :  *   StatName name2 = pool.add("name2");
     796             :  *
     797             :  * Note; StatNameDynamicPool::add("foo") != StatNamePool::add("foo"), even
     798             :  * though their string representations are identical. They also will not match
     799             :  * in map lookups. Tests for StatName with dynamic components must therefore
     800             :  * be looked up by string, via Stats::TestUtil::TestStore.
     801             :  */
     802             : class StatNameDynamicPool {
     803             : public:
     804       33208 :   explicit StatNameDynamicPool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
     805             : 
     806             :   /**
     807             :    * @param name the name to add the container.
     808             :    * @return the StatName held in the container for this name.
     809             :    */
     810             :   StatName add(absl::string_view name);
     811             : 
     812             : private:
     813             :   // We keep the stat names in a vector of StatNameStorage, storing the
     814             :   // SymbolTable reference separately. This saves 8 bytes per StatName,
     815             :   // at the cost of having a destructor that calls clear().
     816             :   SymbolTable& symbol_table_;
     817             :   std::vector<StatNameDynamicStorage> storage_vector_;
     818             : };
     819             : 
     820             : // Represents an ordered container of StatNames. The encoding for each StatName
     821             : // is byte-packed together, so this carries less overhead than allocating the
     822             : // storage separately. The trade-off is there is no random access; you can only
     823             : // iterate through the StatNames.
     824             : //
     825             : // The maximum size of the list is 255 elements, so the length can fit in a
     826             : // byte. It would not be difficult to increase this, but there does not appear
     827             : // to be a current need.
     828             : class StatNameList {
     829             : public:
     830             :   ~StatNameList();
     831             : 
     832             :   /**
     833             :    * @return true if populate() has been called on this list.
     834             :    */
     835           0 :   bool populated() const { return storage_ != nullptr; }
     836             : 
     837             :   /**
     838             :    * Iterates over each StatName in the list, calling f(StatName). f() should
     839             :    * return true to keep iterating, or false to end the iteration.
     840             :    *
     841             :    * @param f The function to call on each stat.
     842             :    */
     843             :   void iterate(const std::function<bool(StatName)>& f) const;
     844             : 
     845             :   /**
     846             :    * Frees each StatName in the list. Failure to call this before destruction
     847             :    * results in an ASSERT at destruction of the list and the SymbolTable.
     848             :    *
     849             :    * This is not done as part of destruction as the SymbolTable may already
     850             :    * be destroyed.
     851             :    *
     852             :    * @param symbol_table the symbol table.
     853             :    */
     854             :   void clear(SymbolTable& symbol_table);
     855             : 
     856             : private:
     857             :   friend class SymbolTable;
     858             : 
     859             :   /**
     860             :    * Moves the specified storage into the list. The storage format is an
     861             :    * array of bytes, organized like this:
     862             :    *
     863             :    * [0] The number of elements in the list (must be < 256).
     864             :    * [1] low order 8 bits of the number of symbols in the first element.
     865             :    * [2] high order 8 bits of the number of symbols in the first element.
     866             :    * [3...] the symbols in the first element.
     867             :    * ...
     868             :    *
     869             :    *
     870             :    * For SymbolTable, each symbol is 1 or more bytes, in a variable-length
     871             :    * encoding. See SymbolTable::Encoding::addSymbol for details.
     872             :    */
     873      392190 :   void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); }
     874             : 
     875             :   SymbolTable::StoragePtr storage_;
     876             : };
     877             : 
     878             : // Value-templatized hash-map with StatName key.
     879             : template <class T> using StatNameHashMap = absl::flat_hash_map<StatName, T>;
     880             : 
     881             : // Hash-set of StatNames
     882             : using StatNameHashSet = absl::flat_hash_set<StatName>;
     883             : 
     884             : // Helper class for sorting StatNames.
     885             : struct StatNameLessThan {
     886           0 :   StatNameLessThan(const SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
     887           0 :   bool operator()(const StatName& a, const StatName& b) const {
     888           0 :     return symbol_table_.lessThan(a, b);
     889           0 :   }
     890             : 
     891             :   const SymbolTable& symbol_table_;
     892             : };
     893             : 
     894             : struct HeterogeneousStatNameHash {
     895             :   // Specifying is_transparent indicates to the library infrastructure that
     896             :   // type-conversions should not be applied when calling find(), but instead
     897             :   // pass the actual types of the contained and searched-for objects directly to
     898             :   // these functors. See
     899             :   // https://en.cppreference.com/w/cpp/utility/functional/less_void for an
     900             :   // official reference, and https://abseil.io/tips/144 for a description of
     901             :   // using it in the context of absl.
     902             :   using is_transparent = void; // NOLINT(readability-identifier-naming)
     903             : 
     904           0 :   size_t operator()(StatName a) const { return a.hash(); }
     905           0 :   size_t operator()(const StatNameStorage& a) const { return a.statName().hash(); }
     906             : };
     907             : 
     908             : struct HeterogeneousStatNameEqual {
     909             :   // See description for HeterogeneousStatNameHash::is_transparent.
     910             :   using is_transparent = void; // NOLINT(readability-identifier-naming)
     911             : 
     912           0 :   size_t operator()(StatName a, StatName b) const { return a == b; }
     913           0 :   size_t operator()(const StatNameStorage& a, const StatNameStorage& b) const {
     914           0 :     return a.statName() == b.statName();
     915           0 :   }
     916           0 :   size_t operator()(StatName a, const StatNameStorage& b) const { return a == b.statName(); }
     917           0 :   size_t operator()(const StatNameStorage& a, StatName b) const { return a.statName() == b; }
     918             : };
     919             : 
     920             : // Encapsulates a set<StatNameStorage>. We use containment here rather than a
     921             : // 'using' alias because we need to ensure that when the set is destructed,
     922             : // StatNameStorage::free(symbol_table) is called on each entry. It is a little
     923             : // easier at the call-sites in thread_local_store.cc to implement this an
     924             : // explicit free() method, analogous to StatNameStorage::free(), compared to
     925             : // storing a SymbolTable reference in the class and doing the free in the
     926             : // destructor, like StatNameManagedStorage.
     927             : class StatNameStorageSet {
     928             : public:
     929             :   using HashSet =
     930             :       absl::flat_hash_set<StatNameStorage, HeterogeneousStatNameHash, HeterogeneousStatNameEqual>;
     931             :   using Iterator = HashSet::iterator;
     932             : 
     933             :   ~StatNameStorageSet();
     934             : 
     935             :   /**
     936             :    * Releases all symbols held in this set. Must be called prior to destruction.
     937             :    *
     938             :    * @param symbol_table The symbol table that owns the symbols.
     939             :    */
     940             :   void free(SymbolTable& symbol_table);
     941             : 
     942             :   /**
     943             :    * @param storage The StatNameStorage to add to the set.
     944             :    */
     945           0 :   std::pair<HashSet::iterator, bool> insert(StatNameStorage&& storage) {
     946           0 :     return hash_set_.insert(std::move(storage));
     947           0 :   }
     948             : 
     949             :   /**
     950             :    * @param stat_name The stat_name to find.
     951             :    * @return the iterator pointing to the stat_name, or end() if not found.
     952             :    */
     953           0 :   Iterator find(StatName stat_name) { return hash_set_.find(stat_name); }
     954             : 
     955             :   /**
     956             :    * @return the end-marker.
     957             :    */
     958           0 :   Iterator end() { return hash_set_.end(); }
     959             : 
     960             :   /**
     961             :    * @return the number of elements in the set.
     962             :    */
     963           0 :   size_t size() const { return hash_set_.size(); }
     964             : 
     965             : private:
     966             :   HashSet hash_set_;
     967             : };
     968             : 
     969             : // Captures StatNames for lookup by string, keeping a map of 'built-ins' that is
     970             : // expected to be populated during initialization.
     971             : //
     972             : // Ideally, builtins should be added during process initialization, in the
     973             : // outermost relevant context. And as the builtins map is not mutex protected,
     974             : // builtins must *not* be added to an existing StatNameSet in the request-path.
     975             : //
     976             : // It is fine to populate a new StatNameSet when (for example) an xDS
     977             : // message reveals a new set of names to be used as stats. The population must
     978             : // be completed prior to exposing the new StatNameSet to worker threads.
     979             : //
     980             : // To create stats using names discovered in the request path, dynamic stat
     981             : // names must be used (see StatNameDynamicStorage). Consider using helper
     982             : // methods such as Stats::Utility::counterFromElements in common/stats/utility.h
     983             : // to simplify the process of allocating and combining stat names and creating
     984             : // counters, gauges, and histograms from them.
     985             : class StatNameSet {
     986             : public:
     987             :   // This object must be instantiated via SymbolTable::makeSet(), thus constructor is private.
     988             : 
     989             :   /**
     990             :    * Adds a string to the builtin map, which is not mutex protected. This map is
     991             :    * always consulted first as a hit there means no lock is required.
     992             :    *
     993             :    * Builtins can only be added immediately after construction, as the builtins
     994             :    * map is not mutex-protected.
     995             :    */
     996             :   void rememberBuiltin(absl::string_view str);
     997             : 
     998             :   /**
     999             :    * Remembers every string in a container as builtins.
    1000             :    */
    1001          74 :   template <class StringContainer> void rememberBuiltins(const StringContainer& container) {
    1002         518 :     for (const auto& str : container) {
    1003         518 :       rememberBuiltin(str);
    1004         518 :     }
    1005          74 :   }
    1006          74 :   void rememberBuiltins(const std::vector<const char*>& container) {
    1007          74 :     rememberBuiltins<std::vector<const char*>>(container);
    1008          74 :   }
    1009             : 
    1010             :   /**
    1011             :    * Finds a builtin StatName by name. If the builtin has not been registered,
    1012             :    * then the fallback is returned.
    1013             :    *
    1014             :    * @return the StatName or fallback.
    1015             :    */
    1016             :   StatName getBuiltin(absl::string_view token, StatName fallback) const;
    1017             : 
    1018             :   /**
    1019             :    * Adds a StatName using the pool, but without remembering it in any maps.
    1020             :    *
    1021             :    * For convenience, StatNameSet offers pass-through thread-safe access to
    1022             :    * its mutex-protected pool. This is useful in constructor initializers, when
    1023             :    * StatNames are needed both from compile-time constants, as well as from
    1024             :    * other constructor args, e.g.
    1025             :    *    MyClass(const std::vector<absl::string_view>& strings, Stats::SymbolTable& symbol_table)
    1026             :    *        : stat_name_set_(symbol_table),
    1027             :    *          known_const_(stat_name_set_.add("known_const")) { // unmapped constants from pool
    1028             :    *      stat_name_set_.rememberBuiltins(strings); // mapped builtins.
    1029             :    *    }
    1030             :    * This avoids the need to make two different pools; one backing the
    1031             :    * StatNameSet mapped entries, and the other backing the set passed in via the
    1032             :    * constructor.
    1033             :    *
    1034             :    * @param str The string to add as a StatName
    1035             :    * @return The StatName for str.
    1036             :    */
    1037        2492 :   StatName add(absl::string_view str) {
    1038        2492 :     absl::MutexLock lock(&mutex_);
    1039        2492 :     return pool_.add(str);
    1040        2492 :   }
    1041             : 
    1042             : private:
    1043             :   friend class SymbolTable;
    1044             : 
    1045             :   StatNameSet(SymbolTable& symbol_table, absl::string_view name);
    1046             : 
    1047             :   const std::string name_;
    1048             :   Stats::StatNamePool pool_ ABSL_GUARDED_BY(mutex_);
    1049             :   mutable absl::Mutex mutex_;
    1050             :   using StringStatNameMap = absl::flat_hash_map<std::string, Stats::StatName>;
    1051             :   StringStatNameMap builtin_stat_names_;
    1052             : };
    1053             : 
    1054             : template <class GetStatName, class Obj>
    1055             : bool SymbolTable::StatNameCompare<GetStatName, Obj>::operator()(const Obj& a, const Obj& b) const {
    1056             :   StatName a_stat_name = getter_(a);
    1057             :   StatName b_stat_name = getter_(b);
    1058             :   return symbol_table_.lessThanLockHeld(a_stat_name, b_stat_name);
    1059             : }
    1060             : 
    1061             : using SymbolTablePtr = std::unique_ptr<SymbolTable>;
    1062             : 
    1063             : // TODO(jmarantz): rename all remaining ~47 occurrences of SymbolTableImpl in
    1064             : // the codebase to SymbolTable, and drop this alias.
    1065             : using SymbolTableImpl = SymbolTable;
    1066             : 
    1067             : } // namespace Stats
    1068             : } // namespace Envoy

Generated by: LCOV version 1.15