Coverage Report

Created: 2024-09-19 09:45

/proc/self/cwd/source/common/stats/symbol_table.h
Line
Count
Source (jump to first uncovered line)
1
#pragma once
2
3
#include <algorithm>
4
#include <memory>
5
#include <stack>
6
#include <string>
7
#include <vector>
8
9
#include "source/common/common/assert.h"
10
#include "source/common/common/lock_guard.h"
11
#include "source/common/common/mem_block_builder.h"
12
#include "source/common/common/non_copyable.h"
13
#include "source/common/common/thread.h"
14
#include "source/common/common/utility.h"
15
#include "source/common/stats/recent_lookups.h"
16
17
#include "absl/container/fixed_array.h"
18
#include "absl/container/flat_hash_map.h"
19
#include "absl/container/inlined_vector.h"
20
#include "absl/strings/str_join.h"
21
#include "absl/strings/str_split.h"
22
23
namespace Envoy {
24
namespace Stats {
25
26
class StatName;
27
using StatNameVec = absl::InlinedVector<StatName, 8>;
28
class StatNameList;
29
class StatNameSet;
30
using StatNameSetPtr = std::unique_ptr<StatNameSet>;
31
32
/**
33
 * Holds a range of indexes indicating which parts of a stat-name are
34
 * dynamic. This is used to transfer stats from hot-restart parent to child,
35
 * retaining the same name structure.
36
 */
37
using DynamicSpan = std::pair<uint32_t, uint32_t>;
38
using DynamicSpans = std::vector<DynamicSpan>;
39
40
/** A Symbol represents a string-token with a small index. */
41
using Symbol = uint32_t;
42
43
/** Transient representations of a vector of 32-bit symbols */
44
using SymbolVec = std::vector<Symbol>;
45
46
/**
47
 * SymbolTableImpl manages a namespace optimized for stats, which are typically
48
 * composed of arrays of "."-separated tokens, with a significant overlap
49
 * between the tokens. Each token is mapped to a Symbol (uint32_t) and
50
 * reference-counted so that no-longer-used symbols can be reclaimed.
51
 *
52
 * We use a uint8_t array to encode a "."-deliminated stat-name into arrays of
53
 * integer symbol IDs in order to conserve space, as in practice the
54
 * majority of token instances in stat names draw from a fairly small set of
55
 * common names, typically less than 100. The format is somewhat similar to
56
 * UTF-8, with a variable-length array of uint8_t. See the implementation for
57
 * details.
58
 *
59
 * StatNameStorage can be used to manage memory for the byte-encoding. Not all
60
 * StatNames are backed by StatNameStorage -- the storage may be inlined into
61
 * another object such as HeapStatData. StaNameStorage is not fully RAII --
62
 * instead the owner must call free(SymbolTable&) explicitly before
63
 * StatNameStorage is destructed. This saves 8 bytes of storage per stat,
64
 * relative to holding a SymbolTable& in each StatNameStorage object.
65
 *
66
 * A StatName is a copyable and assignable reference to this storage. It does
67
 * not own the storage or keep it alive via reference counts; the owner must
68
 * ensure the backing store lives as long as the StatName.
69
 *
70
 * The underlying Symbol / SymbolVec data structures are private to the
71
 * impl. One side effect of the non-monotonically-increasing symbol counter is
72
 * that if a string is encoded, the resulting stat is destroyed, and then that
73
 * same string is re-encoded, it may or may not encode to the same underlying
74
 * symbol.
75
 */
76
class SymbolTable final {
77
public:
78
  /**
79
   * Efficient byte-encoded storage of an array of tokens. The most common
80
   * tokens are typically < 127, and are represented directly. tokens >= 128
81
   * spill into the next byte, allowing for tokens of arbitrary numeric value to
82
   * be stored. As long as the most common tokens are low-valued, the
83
   * representation is space-efficient. This scheme is similar to UTF-8. The
84
   * token ordering is dependent on the order in which stat-names are encoded
85
   * into the SymbolTable, which will not be optimal, but in practice appears
86
   * to be pretty good.
87
   *
88
   * This is exposed in the interface for the benefit of join(), which is
89
   * used in the hot-path to append two stat-names into a temp without taking
90
   * locks. This is used then in thread-local cache lookup, so that once warm,
91
   * no locks are taken when looking up stats.
92
   */
93
  using Storage = uint8_t[];
94
  using StoragePtr = std::unique_ptr<Storage>;
95
96
  /**
97
   * Intermediate representation for a stat-name. This helps store multiple
98
   * names in a single packed allocation. First we encode each desired name,
99
   * then sum their sizes for the single packed allocation. This is used to
100
   * store MetricImpl's tags and tagExtractedName.
101
   */
102
  class Encoding {
103
  public:
104
    /**
105
     * Before destructing SymbolEncoding, you must call moveToMemBlock. This
106
     * transfers ownership, and in particular, the responsibility to call
107
     * SymbolTable::clear() on all referenced symbols. If we ever wanted to be
108
     * able to destruct a SymbolEncoding without transferring it we could add a
109
     * clear(SymbolTable&) method.
110
     */
111
    ~Encoding();
112
113
    /**
114
     * Encodes a token into the vec.
115
     *
116
     * @param symbol the symbol to encode.
117
     */
118
    void addSymbols(const SymbolVec& symbols);
119
120
    /**
121
     * Decodes a uint8_t array into a SymbolVec.
122
     */
123
    static SymbolVec decodeSymbols(StatName stat_name);
124
125
    /**
126
     * Decodes a uint8_t array into a sequence of symbols and literal strings.
127
     * There are distinct lambdas for these two options. Calls to these lambdas
128
     * will be interleaved based on the sequencing of literal strings and
129
     * symbols held in the data.
130
     *
131
     * @param array the StatName encoded as a uint8_t array.
132
     * @param size the size of the array in bytes.
133
     * @param symbol_token_fn a function to be called whenever a symbol is encountered in the array.
134
     * @param string_view_token_fn a function to be called whenever a string literal is encountered.
135
     */
136
    static void decodeTokens(StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
137
                             const std::function<void(absl::string_view)>& string_view_token_fn);
138
139
    /**
140
     * Returns the number of bytes required to represent StatName as a uint8_t
141
     * array, including the encoded size.
142
     */
143
84.0M
    size_t bytesRequired() const {
144
84.0M
      return data_bytes_required_ + encodingSizeBytes(data_bytes_required_);
145
84.0M
    }
146
147
    /**
148
     * Moves the contents of the vector into an allocated array. The array
149
     * must have been allocated with bytesRequired() bytes.
150
     *
151
     * @param mem_block_builder memory block to receive the encoded bytes.
152
     */
153
    void moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder);
154
155
    /**
156
     * @param number A number to encode in a variable length byte-array.
157
     * @return The number of bytes it would take to encode the number.
158
     */
159
    static size_t encodingSizeBytes(uint64_t number);
160
161
    /**
162
     * @param num_data_bytes The number of bytes in a data-block.
163
     * @return The total number of bytes required for the data-block and its encoded size.
164
     */
165
987M
    static size_t totalSizeBytes(size_t num_data_bytes) {
166
987M
      return encodingSizeBytes(num_data_bytes) + num_data_bytes;
167
987M
    }
168
169
    /**
170
     * Saves the specified number into the byte array, returning the next byte.
171
     * There is no guarantee that bytes will be aligned, so we can't cast to a
172
     * uint16_t* and assign, but must individually copy the bytes.
173
     *
174
     * Requires that the buffer be sized to accommodate encodingSizeBytes(number).
175
     *
176
     * @param number the number to write.
177
     * @param mem_block the memory into which to append the number.
178
     */
179
    static void appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block);
180
181
    /**
182
     * Appends stat_name's bytes into mem_block, which must have been allocated to
183
     * allow for stat_name.size() bytes.
184
     *
185
     * @param stat_name the stat_name to append.
186
     * @param mem_block the block of memory to append to.
187
     */
188
    static void appendToMemBlock(StatName stat_name, MemBlockBuilder<uint8_t>& mem_block);
189
190
    /**
191
     * Decodes a byte-array containing a variable-length number.
192
     *
193
     * @param The encoded byte array, written previously by appendEncoding.
194
     * @return A pair containing the decoded number, and the number of bytes consumed from encoding.
195
     */
196
    static std::pair<uint64_t, size_t> decodeNumber(const uint8_t* encoding);
197
198
  private:
199
    friend class StatName;
200
    friend class SymbolTable;
201
    class TokenIter;
202
203
    size_t data_bytes_required_{0};
204
    MemBlockBuilder<uint8_t> mem_block_;
205
  };
206
207
  SymbolTable();
208
  ~SymbolTable();
209
210
  /**
211
   * Decodes a vector of symbols back into its period-delimited stat name. If
212
   * decoding fails on any part of the symbol_vec, we release_assert and crash,
213
   * since this should never happen, and we don't want to continue running
214
   * with a corrupt stats set.
215
   *
216
   * @param stat_name the stat name.
217
   * @return std::string stringified stat_name.
218
   */
219
  std::string toString(const StatName& stat_name) const;
220
221
  /**
222
   * @return uint64_t the number of symbols in the symbol table.
223
   */
224
  uint64_t numSymbols() const;
225
226
  /**
227
   * Determines whether one StatName lexically precedes another. Note that
228
   * the lexical order may not exactly match the lexical order of the
229
   * elaborated strings. For example, stat-name of "-.-" would lexically
230
   * sort after "---" but when encoded as a StatName would come lexically
231
   * earlier. In practice this is unlikely to matter as those are not
232
   * reasonable names for Envoy stats.
233
   *
234
   * Note that this operation has to be performed with the context of the
235
   * SymbolTable so that the individual Symbol objects can be converted
236
   * into strings for lexical comparison.
237
   *
238
   * @param a the first stat name
239
   * @param b the second stat name
240
   * @return bool true if a lexically precedes b.
241
   */
242
  bool lessThan(const StatName& a, const StatName& b) const;
243
244
  /**
245
   * Joins two or more StatNames. For example if we have StatNames for {"a.b",
246
   * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The
247
   * advantage of using this representation is that it avoids having to
248
   * decode/encode into the elaborated form, and does not require locking the
249
   * SymbolTable.
250
   *
251
   * Note that this method does not bump reference counts on the referenced
252
   * Symbols in the SymbolTable, so it's only valid as long for the lifetime of
253
   * the joined StatNames.
254
   *
255
   * This is intended for use doing cached name lookups of scoped stats, where
256
   * the scope prefix and the names to combine it with are already in StatName
257
   * form. Using this class, they can be combined without accessing the
258
   * SymbolTable or, in particular, taking its lock.
259
   *
260
   * @param stat_names the names to join.
261
   * @return Storage allocated for the joined name.
262
   */
263
  StoragePtr join(const StatNameVec& stat_names) const;
264
265
  /**
266
   * Populates a StatNameList from a list of encodings. This is not done at
267
   * construction time to enable StatNameList to be instantiated directly in
268
   * a class that doesn't have a live SymbolTable when it is constructed.
269
   *
270
   * @param names A pointer to the first name in an array, allocated by the caller.
271
   * @param num_names The number of names.
272
   * @param list The StatNameList representing the stat names.
273
   */
274
  void populateList(const StatName* names, uint32_t num_names, StatNameList& list);
275
276
#ifndef ENVOY_CONFIG_COVERAGE
277
  void debugPrint() const;
278
#endif
279
280
  /**
281
   * Creates a StatNameSet.
282
   *
283
   * @param name the name of the set.
284
   * @return the set.
285
   */
286
  StatNameSetPtr makeSet(absl::string_view name);
287
288
  using RecentLookupsFn = std::function<void(absl::string_view, uint64_t)>;
289
290
  /**
291
   * Calls the provided function with the name of the most recently looked-up
292
   * symbols, including lookups on any StatNameSets, and with a count of
293
   * the recent lookups on that symbol.
294
   *
295
   * @param iter the function to call for every recent item.
296
   */
297
  uint64_t getRecentLookups(const RecentLookupsFn&) const;
298
299
  /**
300
   * Clears the recent-lookups structures.
301
   */
302
  void clearRecentLookups();
303
304
  /**
305
   * Sets the recent-lookup capacity.
306
   */
307
  void setRecentLookupCapacity(uint64_t capacity);
308
309
  /**
310
   * @return The configured recent-lookup tracking capacity.
311
   */
312
  uint64_t recentLookupCapacity() const;
313
314
  /**
315
   * Identifies the dynamic components of a stat_name into an array of integer
316
   * pairs, indicating the begin/end of spans of tokens in the stat-name that
317
   * are created from StatNameDynamicStore or StatNameDynamicPool.
318
   *
319
   * This can be used to reconstruct the same exact StatNames in
320
   * StatNames::mergeStats(), to enable stat continuity across hot-restart.
321
   *
322
   * @param stat_name the input stat name.
323
   * @return the array of pairs indicating the bounds.
324
   */
325
  DynamicSpans getDynamicSpans(StatName stat_name) const;
326
327
  bool lessThanLockHeld(const StatName& a, const StatName& b) const;
328
329
  template <class GetStatName, class Obj> struct StatNameCompare {
330
    StatNameCompare(const SymbolTable& symbol_table, GetStatName getter)
331
        : symbol_table_(symbol_table), getter_(getter) {}
332
333
    bool operator()(const Obj& a, const Obj& b) const;
334
335
    const SymbolTable& symbol_table_;
336
    GetStatName getter_;
337
  };
338
339
  /**
340
   * Sorts a range by StatName. This API is more efficient than
341
   * calling std::sort directly as it takes a single lock for the
342
   * entire sort, rather than locking on each comparison.
343
   *
344
   * @param begin the beginning of the range to sort
345
   * @param end the end of the range to sort
346
   * @param get_stat_name a functor that takes an Obj and returns a StatName.
347
   */
348
  template <class Obj, class Iter, class GetStatName>
349
  void sortByStatNames(Iter begin, Iter end, GetStatName get_stat_name) const {
350
    // Grab the lock once before sorting begins, so we don't have to re-take
351
    // it on every comparison.
352
    Thread::LockGuard lock(lock_);
353
    StatNameCompare<GetStatName, Obj> compare(*this, get_stat_name);
354
    std::sort(begin, end, compare);
355
  }
356
357
private:
358
  friend class StatName;
359
  friend class StatNameTest;
360
  friend class StatNameDeathTest;
361
  friend class StatNameDynamicStorage;
362
  friend class StatNameList;
363
  friend class StatNameStorage;
364
365
  /**
366
   * Encodes 'name' into the symbol table. Bumps reference counts for referenced
367
   * symbols. The caller must manage the storage, and is responsible for calling
368
   * SymbolTable::free() to release the reference counts.
369
   *
370
   * @param name The name to encode.
371
   * @return The encoded name, transferring ownership to the caller.
372
   *
373
   */
374
  StoragePtr encode(absl::string_view name);
375
  StoragePtr makeDynamicStorage(absl::string_view name);
376
377
  /**
378
   * Since SymbolTable does manual reference counting, a client of SymbolTable
379
   * must manually call free(symbol_vec) when it is freeing the backing store
380
   * for a StatName. This way, the symbol table will grow and shrink
381
   * dynamically, instead of being write-only.
382
   *
383
   * @param stat_name the stat name.
384
   */
385
  void free(const StatName& stat_name);
386
387
  /**
388
   * StatName backing-store can be managed by callers in a variety of ways
389
   * to minimize overhead. But any persistent reference to a StatName needs
390
   * to hold onto its own reference-counts for all symbols. This method
391
   * helps callers ensure the symbol-storage is maintained for the lifetime
392
   * of a reference.
393
   *
394
   * @param stat_name the stat name.
395
   */
396
  void incRefCount(const StatName& stat_name);
397
398
  struct SharedSymbol {
399
19.9M
    SharedSymbol(Symbol symbol) : symbol_(symbol) {}
400
401
    Symbol symbol_;
402
    uint32_t ref_count_{1};
403
  };
404
405
  // This must be held during both encode() and free().
406
  mutable Thread::MutexBasicLockable lock_;
407
408
  /**
409
   * Decodes a uint8_t array into an array of period-delimited strings. Note
410
   * that some of the strings may have periods in them, in the case where
411
   * StatNameDynamicStorage was used.
412
   *
413
   * If decoding fails on any part of the encoding, we RELEASE_ASSERT and crash,
414
   * since this should never happen, and we don't want to continue running with
415
   * corrupt stat names.
416
   *
417
   * @param array the uint8_t array of encoded symbols and dynamic strings.
418
   * @param size the size of the array in bytes.
419
   * @return std::string the retrieved stat name.
420
   */
421
  std::vector<absl::string_view> decodeStrings(StatName stat_name) const;
422
423
  /**
424
   * Convenience function for encode(), symbolizing one string segment at a time.
425
   *
426
   * @param sv the individual string to be encoded as a symbol.
427
   * @return Symbol the encoded string.
428
   */
429
  Symbol toSymbol(absl::string_view sv) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
430
431
  /**
432
   * Convenience function for decode(), decoding one symbol at a time.
433
   *
434
   * @param symbol the individual symbol to be decoded.
435
   * @return absl::string_view the decoded string.
436
   */
437
  absl::string_view fromSymbol(Symbol symbol) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
438
439
  /**
440
   * Stages a new symbol for use. To be called after a successful insertion.
441
   */
442
  void newSymbol();
443
444
  /**
445
   * Tokenizes name, finds or allocates symbols for each token, and adds them
446
   * to encoding.
447
   *
448
   * @param name The name to tokenize.
449
   * @param encoding The encoding to write to.
450
   */
451
  void addTokensToEncoding(absl::string_view name, Encoding& encoding);
452
453
0
  Symbol monotonicCounter() {
454
0
    Thread::LockGuard lock(lock_);
455
0
    return monotonic_counter_;
456
0
  }
457
458
  // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so
459
  // that if insertion succeeds, the value written is the correct one.
460
  Symbol next_symbol_ ABSL_GUARDED_BY(lock_);
461
462
  // If the free pool is exhausted, we monotonically increase this counter.
463
  Symbol monotonic_counter_;
464
465
  // Bitmap implementation.
466
  // The encode map stores both the symbol and the ref count of that symbol.
467
  // Using absl::string_view lets us only store the complete string once, in the decode map.
468
  using EncodeMap = absl::flat_hash_map<absl::string_view, SharedSymbol>;
469
  using DecodeMap = absl::flat_hash_map<Symbol, InlineStringPtr>;
470
  EncodeMap encode_map_ ABSL_GUARDED_BY(lock_);
471
  DecodeMap decode_map_ ABSL_GUARDED_BY(lock_);
472
473
  // Free pool of symbols for re-use.
474
  // TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
475
  // using an Envoy::IntervalSet.
476
  std::stack<Symbol> pool_ ABSL_GUARDED_BY(lock_);
477
  RecentLookups recent_lookups_ ABSL_GUARDED_BY(lock_);
478
};
479
480
// Base class for holding the backing-storing for a StatName. The two derived
481
// classes, StatNameStorage and StatNameDynamicStorage, share a need to hold an
482
// array of bytes, but use different representations.
483
class StatNameStorageBase {
484
public:
485
108M
  StatNameStorageBase(SymbolTable::StoragePtr&& bytes) : bytes_(std::move(bytes)) {}
486
3.18M
  StatNameStorageBase() = default;
487
488
  /**
489
   * @return a reference to the owned storage.
490
   */
491
  inline StatName statName() const;
492
493
  /**
494
   * @return the encoded data as a const pointer.
495
   */
496
275M
  const uint8_t* bytes() const { return bytes_.get(); }
497
498
protected:
499
3.18M
  void setBytes(SymbolTable::StoragePtr&& bytes) { bytes_ = std::move(bytes); }
500
87.2M
  void clear() { bytes_.reset(); }
501
502
private:
503
  SymbolTable::StoragePtr bytes_;
504
};
505
506
/**
507
 * Holds backing storage for a StatName. Usage of this is not required, as some
508
 * applications may want to hold multiple StatName objects in one contiguous
509
 * uint8_t array, or embed the characters directly in another structure.
510
 *
511
 * This is intended for embedding in other data structures that have access
512
 * to a SymbolTable. StatNameStorage::free(symbol_table) must be called prior
513
 * to allowing the StatNameStorage object to be destructed, otherwise an assert
514
 * will fire to guard against symbol-table leaks.
515
 *
516
 * Thus this class is inconvenient to directly use as temp storage for building
517
 * a StatName from a string. Instead it should be used via StatNameManagedStorage.
518
 */
519
class StatNameStorage : public StatNameStorageBase {
520
public:
521
  // Basic constructor for when you have a name as a string, and need to
522
  // generate symbols for it.
523
  StatNameStorage(absl::string_view name, SymbolTable& table);
524
525
  // Move constructor; needed for using StatNameStorage as an
526
  // absl::flat_hash_map value.
527
86.9M
  StatNameStorage(StatNameStorage&& src) noexcept : StatNameStorageBase(std::move(src)) {}
528
529
  // Obtains new backing storage for an already existing StatName. Used to
530
  // record a computed StatName held in a temp into a more persistent data
531
  // structure.
532
  StatNameStorage(StatName src, SymbolTable& table);
533
534
  /**
535
   * Before allowing a StatNameStorage to be destroyed, you must call free()
536
   * on it, to drop the references to the symbols, allowing the SymbolTable
537
   * to shrink.
538
   */
539
  ~StatNameStorage();
540
541
  /**
542
   * Decrements the reference counts in the SymbolTable.
543
   *
544
   * @param table the symbol table.
545
   */
546
  void free(SymbolTable& table);
547
};
548
549
/**
550
 * Efficiently represents a stat name using a variable-length array of uint8_t.
551
 * This class does not own the backing store for this array; the backing-store
552
 * can be held in StatNameStorage, or it can be packed more tightly into another
553
 * object.
554
 *
555
 * When Envoy is configured with a large numbers of clusters, there are a huge
556
 * number of StatNames, so avoiding extra per-stat pointers has a significant
557
 * memory impact.
558
 */
559
class StatName {
560
public:
561
  // Constructs a StatName object directly referencing the storage of another
562
  // StatName.
563
1.02G
  explicit StatName(const SymbolTable::Storage size_and_data) : size_and_data_(size_and_data) {}
564
565
  // Constructs an empty StatName object.
566
719M
  StatName() = default;
567
568
  /**
569
   * Defines default hash function so StatName can be used as a key in an absl
570
   * hash-table without specifying a functor. See
571
   * https://abseil.io/docs/cpp/guides/hash for details.
572
   */
573
530M
  template <typename H> friend H AbslHashValue(H h, StatName stat_name) {
574
530M
    if (stat_name.empty()) {
575
0
      return H::combine(std::move(h), absl::string_view());
576
0
    }
577
578
530M
    return H::combine(std::move(h), stat_name.dataAsStringView());
579
530M
  }
580
581
  /**
582
   * Note that this hash function will return a different hash than that of
583
   * the elaborated string.
584
   *
585
   * @return uint64_t a hash of the underlying representation.
586
   */
587
323M
  uint64_t hash() const { return absl::Hash<StatName>()(*this); }
588
589
162M
  bool operator==(const StatName& rhs) const {
590
162M
    return dataAsStringView() == rhs.dataAsStringView();
591
162M
  }
592
0
  bool operator!=(const StatName& rhs) const { return !(*this == rhs); }
593
594
  /**
595
   * @return size_t the number of bytes in the symbol array, excluding the
596
   *                overhead for the size itself.
597
   */
598
  size_t dataSize() const;
599
600
  /**
601
   * @return size_t the number of bytes in the symbol array, including the
602
   *                  overhead for the size itself.
603
   */
604
712M
  size_t size() const { return SymbolTable::Encoding::totalSizeBytes(dataSize()); }
605
606
  /**
607
   * Copies the entire StatName representation into a MemBlockBuilder, including
608
   * the length metadata at the beginning. The MemBlockBuilder must not have
609
   * any other data in it.
610
   *
611
   * @param mem_block_builder the builder to receive the storage.
612
   */
613
3.18M
  void copyToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder) {
614
3.18M
    ASSERT(mem_block_builder.size() == 0);
615
3.18M
    mem_block_builder.appendData(absl::MakeSpan(size_and_data_, size()));
616
3.18M
  }
617
618
  /**
619
   * Appends the data portion of the StatName representation into a
620
   * MemBlockBuilder, excluding the length metadata. This is appropriate for
621
   * join(), where several stat-names are combined, and we only need the
622
   * aggregated length metadata.
623
   *
624
   * @param mem_block_builder the builder to receive the storage.
625
   */
626
338M
  void appendDataToMemBlock(MemBlockBuilder<uint8_t>& storage) {
627
338M
    storage.appendData(absl::MakeSpan(data(), dataSize()));
628
338M
  }
629
630
#ifndef ENVOY_CONFIG_COVERAGE
631
  void debugPrint();
632
#endif
633
634
  /**
635
   * @return A pointer to the first byte of data (skipping over size bytes).
636
   */
637
1.19G
  const uint8_t* data() const {
638
1.19G
    if (size_and_data_ == nullptr) {
639
52.9M
      return nullptr;
640
52.9M
    }
641
1.14G
    return size_and_data_ + SymbolTable::Encoding::encodingSizeBytes(dataSize());
642
1.19G
  }
643
644
470M
  const uint8_t* dataIncludingSize() const { return size_and_data_; }
645
646
  /**
647
   * @return whether this is empty.
648
   */
649
869M
  bool empty() const { return size_and_data_ == nullptr || dataSize() == 0; }
650
651
  /**
652
   * Determines whether this starts with the prefix. Note: dynamic segments
653
   * are not supported in the current implementation; this matching only works
654
   * for symbolic segments. However it OK for this to include dynamic segments
655
   * following the prefix.
656
   *
657
   * @param symbolic_prefix the prefix, which must not contain dynamic segments.
658
   */
659
  bool startsWith(StatName symbolic_prefix) const;
660
661
private:
662
  /**
663
   * Casts the raw data as a string_view. Note that this string_view will not
664
   * be in human-readable form, but it will be compatible with a string-view
665
   * hasher and comparator.
666
   */
667
855M
  absl::string_view dataAsStringView() const {
668
855M
    return {reinterpret_cast<const char*>(data()),
669
855M
            static_cast<absl::string_view::size_type>(dataSize())};
670
855M
  }
671
672
  const uint8_t* size_and_data_{nullptr};
673
};
674
675
192M
StatName StatNameStorageBase::statName() const { return StatName(bytes_.get()); }
676
677
/**
678
 * Contains the backing store for a StatName and enough context so it can
679
 * self-delete through RAII. This works by augmenting StatNameStorage with a
680
 * reference to the SymbolTable&, so it has an extra 8 bytes of footprint. It
681
 * is intended to be used in cases where simplicity of implementation is more
682
 * important than byte-savings, for example:
683
 *   - outside the stats system
684
 *   - in tests
685
 *   - as a scoped temp in a function
686
 * Due to the extra 8 bytes per instance, scalability should be taken into
687
 * account before using this as (say) a value or key in a map. In those
688
 * scenarios, it would be better to store the SymbolTable reference once
689
 * for the entire map.
690
 *
691
 * In the stat structures, we generally use StatNameStorage to avoid the
692
 * per-stat overhead.
693
 */
694
class StatNameManagedStorage : public StatNameStorage {
695
public:
696
  // Basic constructor for when you have a name as a string, and need to
697
  // generate symbols for it.
698
  StatNameManagedStorage(absl::string_view name, SymbolTable& table)
699
6.94M
      : StatNameStorage(name, table), symbol_table_(table) {}
700
  StatNameManagedStorage(StatNameManagedStorage&& src) noexcept
701
17.3k
      : StatNameStorage(std::move(src)), symbol_table_(src.symbol_table_) {}
702
  StatNameManagedStorage(StatName src, SymbolTable& table) noexcept
703
0
      : StatNameStorage(src, table), symbol_table_(table) {}
704
705
6.96M
  ~StatNameManagedStorage() { free(symbol_table_); } // NOLINT(clang-analyzer-unix.Malloc)
706
707
private:
708
  SymbolTable& symbol_table_;
709
};
710
711
/**
712
 * Holds backing-store for a dynamic stat, where are no global locks needed
713
 * to create a StatName from a string, but there is no sharing of token data
714
 * between names, so there may be more memory consumed.
715
 */
716
class StatNameDynamicStorage : public StatNameStorageBase {
717
public:
718
  // Basic constructor based on a name. Note the table is used for a call to
719
  // encode the name, but no locks are taken in either implementation of the
720
  // SymbolTable api.
721
  StatNameDynamicStorage(absl::string_view name, SymbolTable& table)
722
24.2M
      : StatNameStorageBase(table.makeDynamicStorage(name)) {}
723
  // Move constructor.
724
  StatNameDynamicStorage(StatNameDynamicStorage&& src) noexcept
725
55.5M
      : StatNameStorageBase(std::move(src)) {}
726
};
727
728
/**
729
 * Maintains storage for a collection of StatName objects. Like
730
 * StatNameManagedStorage, this has an RAII usage model, taking
731
 * care of decrementing ref-counts in the SymbolTable for all
732
 * contained StatNames on destruction or on clear();
733
 *
734
 * Example usage:
735
 *   StatNamePool pool(symbol_table);
736
 *   StatName name1 = pool.add("name1");
737
 *   StatName name2 = pool.add("name2");
738
 *   const uint8_t* storage = pool.addReturningStorage("name3");
739
 *   StatName name3(storage);
740
 */
741
class StatNamePool {
742
public:
743
5.43M
  explicit StatNamePool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
744
5.43M
  ~StatNamePool() { clear(); }
745
746
  /**
747
   * Removes all StatNames from the pool.
748
   */
749
  void clear();
750
751
  /**
752
   * @param name the name to add the container.
753
   * @return the StatName held in the container for this name.
754
   */
755
  StatName add(absl::string_view name);
756
757
  /**
758
   * Adds the StatName to the pool preserving the representation.
759
   * This avoids stringifying if we already have a StatName object
760
   * and is useful if parts of the StatName are dynamically encoded.
761
   * @param name the stat name to add the container.
762
   * @return the StatName held in the container for this name.
763
   *
764
   */
765
  StatName add(StatName name);
766
767
  /**
768
   * Does essentially the same thing as add(), but returns the storage as a
769
   * pointer which can later be used to create a StatName. This can be used
770
   * to accumulate a vector of uint8_t* which can later be used to create
771
   * StatName objects on demand.
772
   *
773
   * The use-case for this is in source/common/http/codes.cc, where we have a
774
   * fixed sized array of atomic pointers, indexed by HTTP code. This API
775
   * enables storing the allocated stat-name in that array of atomics, which
776
   * enables content-avoidance when finding StatNames for frequently used HTTP
777
   * codes.
778
   *
779
   * @param name the name to add the container.
780
   * @return a pointer to the bytes held in the container for this name, suitable for
781
   *         using to construct a StatName.
782
   */
783
  const uint8_t* addReturningStorage(absl::string_view name);
784
785
private:
786
  // We keep the stat names in a vector of StatNameStorage, storing the
787
  // SymbolTable reference separately. This saves 8 bytes per StatName,
788
  // at the cost of having a destructor that calls clear().
789
  SymbolTable& symbol_table_;
790
  std::vector<StatNameStorage> storage_vector_;
791
};
792
793
/**
794
 * Maintains storage for a collection of StatName objects constructed from
795
 * dynamically discovered strings. Like StatNameDynamicStorage, this has an RAII
796
 * usage model. Creating StatNames with this interface do not incur a
797
 * SymbolTable lock, but tokens are not shared across StatNames.
798
 *
799
 * The SymbolTable is required as a constructor argument to assist in encoding
800
 * the stat-names.
801
 *
802
 * Example usage:
803
 *   StatNameDynamicPool pool(symbol_table);
804
 *   StatName name1 = pool.add("name1");
805
 *   StatName name2 = pool.add("name2");
806
 *
807
 * Note; StatNameDynamicPool::add("foo") != StatNamePool::add("foo"), even
808
 * though their string representations are identical. They also will not match
809
 * in map lookups. Tests for StatName with dynamic components must therefore
810
 * be looked up by string, via Stats::TestUtil::TestStore.
811
 */
812
class StatNameDynamicPool {
813
public:
814
4.06M
  explicit StatNameDynamicPool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
815
816
  /**
817
   * @param name the name to add the container.
818
   * @return the StatName held in the container for this name.
819
   */
820
  StatName add(absl::string_view name);
821
822
private:
823
  // We keep the stat names in a vector of StatNameStorage, storing the
824
  // SymbolTable reference separately. This saves 8 bytes per StatName,
825
  // at the cost of having a destructor that calls clear().
826
  SymbolTable& symbol_table_;
827
  std::vector<StatNameDynamicStorage> storage_vector_;
828
};
829
830
// Represents an ordered container of StatNames. The encoding for each StatName
831
// is byte-packed together, so this carries less overhead than allocating the
832
// storage separately. The trade-off is there is no random access; you can only
833
// iterate through the StatNames.
834
//
835
// The maximum size of the list is 255 elements, so the length can fit in a
836
// byte. It would not be difficult to increase this, but there does not appear
837
// to be a current need.
838
class StatNameList {
839
public:
840
  ~StatNameList();
841
842
  /**
843
   * @return true if populate() has been called on this list.
844
   */
845
105M
  bool populated() const { return storage_ != nullptr; }
846
847
  /**
848
   * Iterates over each StatName in the list, calling f(StatName). f() should
849
   * return true to keep iterating, or false to end the iteration.
850
   *
851
   * @param f The function to call on each stat.
852
   */
853
  void iterate(const std::function<bool(StatName)>& f) const;
854
855
  /**
856
   * Frees each StatName in the list. Failure to call this before destruction
857
   * results in an ASSERT at destruction of the list and the SymbolTable.
858
   *
859
   * This is not done as part of destruction as the SymbolTable may already
860
   * be destroyed.
861
   *
862
   * @param symbol_table the symbol table.
863
   */
864
  void clear(SymbolTable& symbol_table);
865
866
private:
867
  friend class SymbolTable;
868
869
  /**
870
   * Moves the specified storage into the list. The storage format is an
871
   * array of bytes, organized like this:
872
   *
873
   * [0] The number of elements in the list (must be < 256).
874
   * [1] low order 8 bits of the number of symbols in the first element.
875
   * [2] high order 8 bits of the number of symbols in the first element.
876
   * [3...] the symbols in the first element.
877
   * ...
878
   *
879
   *
880
   * For SymbolTable, each symbol is 1 or more bytes, in a variable-length
881
   * encoding. See SymbolTable::Encoding::addSymbol for details.
882
   */
883
52.9M
  void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); }
884
885
  SymbolTable::StoragePtr storage_;
886
};
887
888
// Value-templatized hash-map with StatName key.
889
template <class T> using StatNameHashMap = absl::flat_hash_map<StatName, T>;
890
891
// Hash-set of StatNames
892
using StatNameHashSet = absl::flat_hash_set<StatName>;
893
894
// Helper class for sorting StatNames.
895
struct StatNameLessThan {
896
0
  StatNameLessThan(const SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
897
0
  bool operator()(const StatName& a, const StatName& b) const {
898
0
    return symbol_table_.lessThan(a, b);
899
0
  }
900
901
  const SymbolTable& symbol_table_;
902
};
903
904
struct HeterogeneousStatNameHash {
905
  // Specifying is_transparent indicates to the library infrastructure that
906
  // type-conversions should not be applied when calling find(), but instead
907
  // pass the actual types of the contained and searched-for objects directly to
908
  // these functors. See
909
  // https://en.cppreference.com/w/cpp/utility/functional/less_void for an
910
  // official reference, and https://abseil.io/tips/144 for a description of
911
  // using it in the context of absl.
912
  using is_transparent = void; // NOLINT(readability-identifier-naming)
913
914
0
  size_t operator()(StatName a) const { return a.hash(); }
915
0
  size_t operator()(const StatNameStorage& a) const { return a.statName().hash(); }
916
};
917
918
struct HeterogeneousStatNameEqual {
919
  // See description for HeterogeneousStatNameHash::is_transparent.
920
  using is_transparent = void; // NOLINT(readability-identifier-naming)
921
922
0
  size_t operator()(StatName a, StatName b) const { return a == b; }
923
0
  size_t operator()(const StatNameStorage& a, const StatNameStorage& b) const {
924
0
    return a.statName() == b.statName();
925
0
  }
926
0
  size_t operator()(StatName a, const StatNameStorage& b) const { return a == b.statName(); }
927
0
  size_t operator()(const StatNameStorage& a, StatName b) const { return a.statName() == b; }
928
};
929
930
// Encapsulates a set<StatNameStorage>. We use containment here rather than a
931
// 'using' alias because we need to ensure that when the set is destructed,
932
// StatNameStorage::free(symbol_table) is called on each entry. It is a little
933
// easier at the call-sites in thread_local_store.cc to implement this an
934
// explicit free() method, analogous to StatNameStorage::free(), compared to
935
// storing a SymbolTable reference in the class and doing the free in the
936
// destructor, like StatNameManagedStorage.
937
class StatNameStorageSet {
938
public:
939
  using HashSet =
940
      absl::flat_hash_set<StatNameStorage, HeterogeneousStatNameHash, HeterogeneousStatNameEqual>;
941
  using Iterator = HashSet::iterator;
942
943
  ~StatNameStorageSet();
944
945
  /**
946
   * Releases all symbols held in this set. Must be called prior to destruction.
947
   *
948
   * @param symbol_table The symbol table that owns the symbols.
949
   */
950
  void free(SymbolTable& symbol_table);
951
952
  /**
953
   * @param storage The StatNameStorage to add to the set.
954
   */
955
0
  std::pair<HashSet::iterator, bool> insert(StatNameStorage&& storage) {
956
0
    return hash_set_.insert(std::move(storage));
957
0
  }
958
959
  /**
960
   * @param stat_name The stat_name to find.
961
   * @return the iterator pointing to the stat_name, or end() if not found.
962
   */
963
0
  Iterator find(StatName stat_name) { return hash_set_.find(stat_name); }
964
965
  /**
966
   * @return the end-marker.
967
   */
968
0
  Iterator end() { return hash_set_.end(); }
969
970
  /**
971
   * @return the number of elements in the set.
972
   */
973
0
  size_t size() const { return hash_set_.size(); }
974
975
private:
976
  HashSet hash_set_;
977
};
978
979
// Captures StatNames for lookup by string, keeping a map of 'built-ins' that is
980
// expected to be populated during initialization.
981
//
982
// Ideally, builtins should be added during process initialization, in the
983
// outermost relevant context. And as the builtins map is not mutex protected,
984
// builtins must *not* be added to an existing StatNameSet in the request-path.
985
//
986
// It is fine to populate a new StatNameSet when (for example) an xDS
987
// message reveals a new set of names to be used as stats. The population must
988
// be completed prior to exposing the new StatNameSet to worker threads.
989
//
990
// To create stats using names discovered in the request path, dynamic stat
991
// names must be used (see StatNameDynamicStorage). Consider using helper
992
// methods such as Stats::Utility::counterFromElements in common/stats/utility.h
993
// to simplify the process of allocating and combining stat names and creating
994
// counters, gauges, and histograms from them.
995
class StatNameSet {
996
public:
997
  // This object must be instantiated via SymbolTable::makeSet(), thus constructor is private.
998
999
  /**
1000
   * Adds a string to the builtin map, which is not mutex protected. This map is
1001
   * always consulted first as a hit there means no lock is required.
1002
   *
1003
   * Builtins can only be added immediately after construction, as the builtins
1004
   * map is not mutex-protected.
1005
   */
1006
  void rememberBuiltin(absl::string_view str);
1007
1008
  /**
1009
   * Remembers every string in a container as builtins.
1010
   */
1011
818
  template <class StringContainer> void rememberBuiltins(const StringContainer& container) {
1012
5.72k
    for (const auto& str : container) {
1013
5.72k
      rememberBuiltin(str);
1014
5.72k
    }
1015
818
  }
void Envoy::Stats::StatNameSet::rememberBuiltins<std::__1::vector<char const*, std::__1::allocator<char const*> > >(std::__1::vector<char const*, std::__1::allocator<char const*> > const&)
Line
Count
Source
1011
818
  template <class StringContainer> void rememberBuiltins(const StringContainer& container) {
1012
5.72k
    for (const auto& str : container) {
1013
5.72k
      rememberBuiltin(str);
1014
5.72k
    }
1015
818
  }
Unexecuted instantiation: void Envoy::Stats::StatNameSet::rememberBuiltins<absl::lts_20230802::flat_hash_set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20230802::container_internal::StringHash, absl::lts_20230802::container_internal::StringEq, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >(absl::lts_20230802::flat_hash_set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::lts_20230802::container_internal::StringHash, absl::lts_20230802::container_internal::StringEq, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&)
1016
818
  void rememberBuiltins(const std::vector<const char*>& container) {
1017
818
    rememberBuiltins<std::vector<const char*>>(container);
1018
818
  }
1019
1020
  /**
1021
   * Finds a builtin StatName by name. If the builtin has not been registered,
1022
   * then the fallback is returned.
1023
   *
1024
   * @return the StatName or fallback.
1025
   */
1026
  StatName getBuiltin(absl::string_view token, StatName fallback) const;
1027
1028
  /**
1029
   * Adds a StatName using the pool, but without remembering it in any maps.
1030
   *
1031
   * For convenience, StatNameSet offers pass-through thread-safe access to
1032
   * its mutex-protected pool. This is useful in constructor initializers, when
1033
   * StatNames are needed both from compile-time constants, as well as from
1034
   * other constructor args, e.g.
1035
   *    MyClass(const std::vector<absl::string_view>& strings, Stats::SymbolTable& symbol_table)
1036
   *        : stat_name_set_(symbol_table),
1037
   *          known_const_(stat_name_set_.add("known_const")) { // unmapped constants from pool
1038
   *      stat_name_set_.rememberBuiltins(strings); // mapped builtins.
1039
   *    }
1040
   * This avoids the need to make two different pools; one backing the
1041
   * StatNameSet mapped entries, and the other backing the set passed in via the
1042
   * constructor.
1043
   *
1044
   * @param str The string to add as a StatName
1045
   * @return The StatName for str.
1046
   */
1047
49.2k
  StatName add(absl::string_view str) {
1048
49.2k
    absl::MutexLock lock(&mutex_);
1049
49.2k
    return pool_.add(str);
1050
49.2k
  }
1051
1052
private:
1053
  friend class SymbolTable;
1054
1055
  StatNameSet(SymbolTable& symbol_table, absl::string_view name);
1056
1057
  const std::string name_;
1058
  Stats::StatNamePool pool_ ABSL_GUARDED_BY(mutex_);
1059
  mutable absl::Mutex mutex_;
1060
  using StringStatNameMap = absl::flat_hash_map<std::string, Stats::StatName>;
1061
  StringStatNameMap builtin_stat_names_;
1062
};
1063
1064
template <class GetStatName, class Obj>
1065
bool SymbolTable::StatNameCompare<GetStatName, Obj>::operator()(const Obj& a, const Obj& b) const {
1066
  StatName a_stat_name = getter_(a);
1067
  StatName b_stat_name = getter_(b);
1068
  return symbol_table_.lessThanLockHeld(a_stat_name, b_stat_name);
1069
}
1070
1071
using SymbolTablePtr = std::unique_ptr<SymbolTable>;
1072
1073
// TODO(jmarantz): rename all remaining ~47 occurrences of SymbolTableImpl in
1074
// the codebase to SymbolTable, and drop this alias.
1075
using SymbolTableImpl = SymbolTable;
1076
1077
} // namespace Stats
1078
} // namespace Envoy