Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/source/common/stats/symbol_table.h
Line
Count
Source (jump to first uncovered line)
1
#pragma once
2
3
#include <algorithm>
4
#include <memory>
5
#include <stack>
6
#include <string>
7
#include <vector>
8
9
#include "source/common/common/assert.h"
10
#include "source/common/common/lock_guard.h"
11
#include "source/common/common/mem_block_builder.h"
12
#include "source/common/common/non_copyable.h"
13
#include "source/common/common/thread.h"
14
#include "source/common/common/utility.h"
15
#include "source/common/stats/recent_lookups.h"
16
17
#include "absl/container/fixed_array.h"
18
#include "absl/container/flat_hash_map.h"
19
#include "absl/container/inlined_vector.h"
20
#include "absl/strings/str_join.h"
21
#include "absl/strings/str_split.h"
22
23
namespace Envoy {
24
namespace Stats {
25
26
class StatName;
27
using StatNameVec = absl::InlinedVector<StatName, 8>;
28
class StatNameList;
29
class StatNameSet;
30
using StatNameSetPtr = std::unique_ptr<StatNameSet>;
31
32
/**
33
 * Holds a range of indexes indicating which parts of a stat-name are
34
 * dynamic. This is used to transfer stats from hot-restart parent to child,
35
 * retaining the same name structure.
36
 */
37
using DynamicSpan = std::pair<uint32_t, uint32_t>;
38
using DynamicSpans = std::vector<DynamicSpan>;
39
40
/** A Symbol represents a string-token with a small index. */
41
using Symbol = uint32_t;
42
43
/** Transient representations of a vector of 32-bit symbols */
44
using SymbolVec = std::vector<Symbol>;
45
46
/**
47
 * SymbolTableImpl manages a namespace optimized for stats, which are typically
48
 * composed of arrays of "."-separated tokens, with a significant overlap
49
 * between the tokens. Each token is mapped to a Symbol (uint32_t) and
50
 * reference-counted so that no-longer-used symbols can be reclaimed.
51
 *
52
 * We use a uint8_t array to encode a "."-deliminated stat-name into arrays of
53
 * integer symbol IDs in order to conserve space, as in practice the
54
 * majority of token instances in stat names draw from a fairly small set of
55
 * common names, typically less than 100. The format is somewhat similar to
56
 * UTF-8, with a variable-length array of uint8_t. See the implementation for
57
 * details.
58
 *
59
 * StatNameStorage can be used to manage memory for the byte-encoding. Not all
60
 * StatNames are backed by StatNameStorage -- the storage may be inlined into
61
 * another object such as HeapStatData. StaNameStorage is not fully RAII --
62
 * instead the owner must call free(SymbolTable&) explicitly before
63
 * StatNameStorage is destructed. This saves 8 bytes of storage per stat,
64
 * relative to holding a SymbolTable& in each StatNameStorage object.
65
 *
66
 * A StatName is a copyable and assignable reference to this storage. It does
67
 * not own the storage or keep it alive via reference counts; the owner must
68
 * ensure the backing store lives as long as the StatName.
69
 *
70
 * The underlying Symbol / SymbolVec data structures are private to the
71
 * impl. One side effect of the non-monotonically-increasing symbol counter is
72
 * that if a string is encoded, the resulting stat is destroyed, and then that
73
 * same string is re-encoded, it may or may not encode to the same underlying
74
 * symbol.
75
 */
76
class SymbolTable final {
77
public:
78
  /**
79
   * Efficient byte-encoded storage of an array of tokens. The most common
80
   * tokens are typically < 127, and are represented directly. tokens >= 128
81
   * spill into the next byte, allowing for tokens of arbitrary numeric value to
82
   * be stored. As long as the most common tokens are low-valued, the
83
   * representation is space-efficient. This scheme is similar to UTF-8. The
84
   * token ordering is dependent on the order in which stat-names are encoded
85
   * into the SymbolTable, which will not be optimal, but in practice appears
86
   * to be pretty good.
87
   *
88
   * This is exposed in the interface for the benefit of join(), which is
89
   * used in the hot-path to append two stat-names into a temp without taking
90
   * locks. This is used then in thread-local cache lookup, so that once warm,
91
   * no locks are taken when looking up stats.
92
   */
93
  using Storage = uint8_t[];
94
  using StoragePtr = std::unique_ptr<Storage>;
95
96
  /**
97
   * Intermediate representation for a stat-name. This helps store multiple
98
   * names in a single packed allocation. First we encode each desired name,
99
   * then sum their sizes for the single packed allocation. This is used to
100
   * store MetricImpl's tags and tagExtractedName.
101
   */
102
  class Encoding {
103
  public:
104
    /**
105
     * Before destructing SymbolEncoding, you must call moveToMemBlock. This
106
     * transfers ownership, and in particular, the responsibility to call
107
     * SymbolTable::clear() on all referenced symbols. If we ever wanted to be
108
     * able to destruct a SymbolEncoding without transferring it we could add a
109
     * clear(SymbolTable&) method.
110
     */
111
    ~Encoding();
112
113
    /**
114
     * Encodes a token into the vec.
115
     *
116
     * @param symbol the symbol to encode.
117
     */
118
    void addSymbols(const SymbolVec& symbols);
119
120
    /**
121
     * Decodes a uint8_t array into a SymbolVec.
122
     */
123
    static SymbolVec decodeSymbols(StatName stat_name);
124
125
    /**
126
     * Decodes a uint8_t array into a sequence of symbols and literal strings.
127
     * There are distinct lambdas for these two options. Calls to these lambdas
128
     * will be interleaved based on the sequencing of literal strings and
129
     * symbols held in the data.
130
     *
131
     * @param array the StatName encoded as a uint8_t array.
132
     * @param size the size of the array in bytes.
133
     * @param symbol_token_fn a function to be called whenever a symbol is encountered in the array.
134
     * @param string_view_token_fn a function to be called whenever a string literal is encountered.
135
     */
136
    static void decodeTokens(StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
137
                             const std::function<void(absl::string_view)>& string_view_token_fn);
138
139
    /**
140
     * Returns the number of bytes required to represent StatName as a uint8_t
141
     * array, including the encoded size.
142
     */
143
85.8M
    size_t bytesRequired() const {
144
85.8M
      return data_bytes_required_ + encodingSizeBytes(data_bytes_required_);
145
85.8M
    }
146
147
    /**
148
     * Moves the contents of the vector into an allocated array. The array
149
     * must have been allocated with bytesRequired() bytes.
150
     *
151
     * @param mem_block_builder memory block to receive the encoded bytes.
152
     */
153
    void moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder);
154
155
    /**
156
     * @param number A number to encode in a variable length byte-array.
157
     * @return The number of bytes it would take to encode the number.
158
     */
159
    static size_t encodingSizeBytes(uint64_t number);
160
161
    /**
162
     * @param num_data_bytes The number of bytes in a data-block.
163
     * @return The total number of bytes required for the data-block and its encoded size.
164
     */
165
999M
    static size_t totalSizeBytes(size_t num_data_bytes) {
166
999M
      return encodingSizeBytes(num_data_bytes) + num_data_bytes;
167
999M
    }
168
169
    /**
170
     * Saves the specified number into the byte array, returning the next byte.
171
     * There is no guarantee that bytes will be aligned, so we can't cast to a
172
     * uint16_t* and assign, but must individually copy the bytes.
173
     *
174
     * Requires that the buffer be sized to accommodate encodingSizeBytes(number).
175
     *
176
     * @param number the number to write.
177
     * @param mem_block the memory into which to append the number.
178
     */
179
    static void appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block);
180
181
    /**
182
     * Appends stat_name's bytes into mem_block, which must have been allocated to
183
     * allow for stat_name.size() bytes.
184
     *
185
     * @param stat_name the stat_name to append.
186
     * @param mem_block the block of memory to append to.
187
     */
188
    static void appendToMemBlock(StatName stat_name, MemBlockBuilder<uint8_t>& mem_block);
189
190
    /**
191
     * Decodes a byte-array containing a variable-length number.
192
     *
193
     * @param The encoded byte array, written previously by appendEncoding.
194
     * @return A pair containing the decoded number, and the number of bytes consumed from encoding.
195
     */
196
    static std::pair<uint64_t, size_t> decodeNumber(const uint8_t* encoding);
197
198
  private:
199
    friend class StatName;
200
    friend class SymbolTable;
201
    class TokenIter;
202
203
    size_t data_bytes_required_{0};
204
    MemBlockBuilder<uint8_t> mem_block_;
205
  };
206
207
  SymbolTable();
208
  ~SymbolTable();
209
210
  /**
211
   * Decodes a vector of symbols back into its period-delimited stat name. If
212
   * decoding fails on any part of the symbol_vec, we release_assert and crash,
213
   * since this should never happen, and we don't want to continue running
214
   * with a corrupt stats set.
215
   *
216
   * @param stat_name the stat name.
217
   * @return std::string stringified stat_name.
218
   */
219
  std::string toString(const StatName& stat_name) const;
220
221
  /**
222
   * @return uint64_t the number of symbols in the symbol table.
223
   */
224
  uint64_t numSymbols() const;
225
226
  /**
227
   * Determines whether one StatName lexically precedes another. Note that
228
   * the lexical order may not exactly match the lexical order of the
229
   * elaborated strings. For example, stat-name of "-.-" would lexically
230
   * sort after "---" but when encoded as a StatName would come lexically
231
   * earlier. In practice this is unlikely to matter as those are not
232
   * reasonable names for Envoy stats.
233
   *
234
   * Note that this operation has to be performed with the context of the
235
   * SymbolTable so that the individual Symbol objects can be converted
236
   * into strings for lexical comparison.
237
   *
238
   * @param a the first stat name
239
   * @param b the second stat name
240
   * @return bool true if a lexically precedes b.
241
   */
242
  bool lessThan(const StatName& a, const StatName& b) const;
243
244
  /**
245
   * Joins two or more StatNames. For example if we have StatNames for {"a.b",
246
   * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The
247
   * advantage of using this representation is that it avoids having to
248
   * decode/encode into the elaborated form, and does not require locking the
249
   * SymbolTable.
250
   *
251
   * Note that this method does not bump reference counts on the referenced
252
   * Symbols in the SymbolTable, so it's only valid as long for the lifetime of
253
   * the joined StatNames.
254
   *
255
   * This is intended for use doing cached name lookups of scoped stats, where
256
   * the scope prefix and the names to combine it with are already in StatName
257
   * form. Using this class, they can be combined without accessing the
258
   * SymbolTable or, in particular, taking its lock.
259
   *
260
   * @param stat_names the names to join.
261
   * @return Storage allocated for the joined name.
262
   */
263
  StoragePtr join(const StatNameVec& stat_names) const;
264
265
  /**
266
   * Populates a StatNameList from a list of encodings. This is not done at
267
   * construction time to enable StatNameList to be instantiated directly in
268
   * a class that doesn't have a live SymbolTable when it is constructed.
269
   *
270
   * @param names A pointer to the first name in an array, allocated by the caller.
271
   * @param num_names The number of names.
272
   * @param list The StatNameList representing the stat names.
273
   */
274
  void populateList(const StatName* names, uint32_t num_names, StatNameList& list);
275
276
#ifndef ENVOY_CONFIG_COVERAGE
277
  void debugPrint() const;
278
#endif
279
280
  /**
281
   * Creates a StatNameSet.
282
   *
283
   * @param name the name of the set.
284
   * @return the set.
285
   */
286
  StatNameSetPtr makeSet(absl::string_view name);
287
288
  using RecentLookupsFn = std::function<void(absl::string_view, uint64_t)>;
289
290
  /**
291
   * Calls the provided function with the name of the most recently looked-up
292
   * symbols, including lookups on any StatNameSets, and with a count of
293
   * the recent lookups on that symbol.
294
   *
295
   * @param iter the function to call for every recent item.
296
   */
297
  uint64_t getRecentLookups(const RecentLookupsFn&) const;
298
299
  /**
300
   * Clears the recent-lookups structures.
301
   */
302
  void clearRecentLookups();
303
304
  /**
305
   * Sets the recent-lookup capacity.
306
   */
307
  void setRecentLookupCapacity(uint64_t capacity);
308
309
  /**
310
   * @return The configured recent-lookup tracking capacity.
311
   */
312
  uint64_t recentLookupCapacity() const;
313
314
  /**
315
   * Identifies the dynamic components of a stat_name into an array of integer
316
   * pairs, indicating the begin/end of spans of tokens in the stat-name that
317
   * are created from StatNameDynamicStore or StatNameDynamicPool.
318
   *
319
   * This can be used to reconstruct the same exact StatNames in
320
   * StatNames::mergeStats(), to enable stat continuity across hot-restart.
321
   *
322
   * @param stat_name the input stat name.
323
   * @return the array of pairs indicating the bounds.
324
   */
325
  DynamicSpans getDynamicSpans(StatName stat_name) const;
326
327
  bool lessThanLockHeld(const StatName& a, const StatName& b) const;
328
329
  template <class GetStatName, class Obj> struct StatNameCompare {
330
    StatNameCompare(const SymbolTable& symbol_table, GetStatName getter)
331
        : symbol_table_(symbol_table), getter_(getter) {}
332
333
    bool operator()(const Obj& a, const Obj& b) const;
334
335
    const SymbolTable& symbol_table_;
336
    GetStatName getter_;
337
  };
338
339
  /**
340
   * Sorts a range by StatName. This API is more efficient than
341
   * calling std::sort directly as it takes a single lock for the
342
   * entire sort, rather than locking on each comparison.
343
   *
344
   * @param begin the beginning of the range to sort
345
   * @param end the end of the range to sort
346
   * @param get_stat_name a functor that takes an Obj and returns a StatName.
347
   */
348
  template <class Obj, class Iter, class GetStatName>
349
  void sortByStatNames(Iter begin, Iter end, GetStatName get_stat_name) const {
350
    // Grab the lock once before sorting begins, so we don't have to re-take
351
    // it on every comparison.
352
    Thread::LockGuard lock(lock_);
353
    StatNameCompare<GetStatName, Obj> compare(*this, get_stat_name);
354
    std::sort(begin, end, compare);
355
  }
356
357
private:
358
  friend class StatName;
359
  friend class StatNameTest;
360
  friend class StatNameDeathTest;
361
  friend class StatNameDynamicStorage;
362
  friend class StatNameList;
363
  friend class StatNameStorage;
364
365
  /**
366
   * Encodes 'name' into the symbol table. Bumps reference counts for referenced
367
   * symbols. The caller must manage the storage, and is responsible for calling
368
   * SymbolTable::free() to release the reference counts.
369
   *
370
   * @param name The name to encode.
371
   * @return The encoded name, transferring ownership to the caller.
372
   *
373
   */
374
  StoragePtr encode(absl::string_view name);
375
  StoragePtr makeDynamicStorage(absl::string_view name);
376
377
  /**
378
   * Since SymbolTable does manual reference counting, a client of SymbolTable
379
   * must manually call free(symbol_vec) when it is freeing the backing store
380
   * for a StatName. This way, the symbol table will grow and shrink
381
   * dynamically, instead of being write-only.
382
   *
383
   * @param stat_name the stat name.
384
   */
385
  void free(const StatName& stat_name);
386
387
  /**
388
   * StatName backing-store can be managed by callers in a variety of ways
389
   * to minimize overhead. But any persistent reference to a StatName needs
390
   * to hold onto its own reference-counts for all symbols. This method
391
   * helps callers ensure the symbol-storage is maintained for the lifetime
392
   * of a reference.
393
   *
394
   * @param stat_name the stat name.
395
   */
396
  void incRefCount(const StatName& stat_name);
397
398
  struct SharedSymbol {
399
15.9M
    SharedSymbol(Symbol symbol) : symbol_(symbol) {}
400
401
    Symbol symbol_;
402
    uint32_t ref_count_{1};
403
  };
404
405
  // This must be held during both encode() and free().
406
  mutable Thread::MutexBasicLockable lock_;
407
408
  /**
409
   * Decodes a uint8_t array into an array of period-delimited strings. Note
410
   * that some of the strings may have periods in them, in the case where
411
   * StatNameDynamicStorage was used.
412
   *
413
   * If decoding fails on any part of the encoding, we RELEASE_ASSERT and crash,
414
   * since this should never happen, and we don't want to continue running with
415
   * corrupt stat names.
416
   *
417
   * @param array the uint8_t array of encoded symbols and dynamic strings.
418
   * @param size the size of the array in bytes.
419
   * @return std::string the retrieved stat name.
420
   */
421
  std::vector<absl::string_view> decodeStrings(StatName stat_name) const;
422
423
  /**
424
   * Convenience function for encode(), symbolizing one string segment at a time.
425
   *
426
   * @param sv the individual string to be encoded as a symbol.
427
   * @return Symbol the encoded string.
428
   */
429
  Symbol toSymbol(absl::string_view sv) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
430
431
  /**
432
   * Convenience function for decode(), decoding one symbol at a time.
433
   *
434
   * @param symbol the individual symbol to be decoded.
435
   * @return absl::string_view the decoded string.
436
   */
437
  absl::string_view fromSymbol(Symbol symbol) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
438
439
  /**
440
   * Stages a new symbol for use. To be called after a successful insertion.
441
   */
442
  void newSymbol();
443
444
  /**
445
   * Tokenizes name, finds or allocates symbols for each token, and adds them
446
   * to encoding.
447
   *
448
   * @param name The name to tokenize.
449
   * @param encoding The encoding to write to.
450
   */
451
  void addTokensToEncoding(absl::string_view name, Encoding& encoding);
452
453
0
  Symbol monotonicCounter() {
454
0
    Thread::LockGuard lock(lock_);
455
0
    return monotonic_counter_;
456
0
  }
457
458
  // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so
459
  // that if insertion succeeds, the value written is the correct one.
460
  Symbol next_symbol_ ABSL_GUARDED_BY(lock_);
461
462
  // If the free pool is exhausted, we monotonically increase this counter.
463
  Symbol monotonic_counter_;
464
465
  // Bitmap implementation.
466
  // The encode map stores both the symbol and the ref count of that symbol.
467
  // Using absl::string_view lets us only store the complete string once, in the decode map.
468
  using EncodeMap = absl::flat_hash_map<absl::string_view, SharedSymbol>;
469
  using DecodeMap = absl::flat_hash_map<Symbol, InlineStringPtr>;
470
  EncodeMap encode_map_ ABSL_GUARDED_BY(lock_);
471
  DecodeMap decode_map_ ABSL_GUARDED_BY(lock_);
472
473
  // Free pool of symbols for re-use.
474
  // TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
475
  // using an Envoy::IntervalSet.
476
  std::stack<Symbol> pool_ ABSL_GUARDED_BY(lock_);
477
  RecentLookups recent_lookups_ ABSL_GUARDED_BY(lock_);
478
};
479
480
// Base class for holding the backing-storing for a StatName. The two derived
481
// classes, StatNameStorage and StatNameDynamicStorage, share a need to hold an
482
// array of bytes, but use different representations.
483
class StatNameStorageBase {
484
public:
485
112M
  StatNameStorageBase(SymbolTable::StoragePtr&& bytes) : bytes_(std::move(bytes)) {}
486
3.18M
  StatNameStorageBase() = default;
487
488
  /**
489
   * @return a reference to the owned storage.
490
   */
491
  inline StatName statName() const;
492
493
  /**
494
   * @return the encoded data as a const pointer.
495
   */
496
361M
  const uint8_t* bytes() const { return bytes_.get(); }
497
498
protected:
499
3.18M
  void setBytes(SymbolTable::StoragePtr&& bytes) { bytes_ = std::move(bytes); }
500
89.0M
  void clear() { bytes_.reset(); }
501
502
private:
503
  SymbolTable::StoragePtr bytes_;
504
};
505
506
/**
507
 * Holds backing storage for a StatName. Usage of this is not required, as some
508
 * applications may want to hold multiple StatName objects in one contiguous
509
 * uint8_t array, or embed the characters directly in another structure.
510
 *
511
 * This is intended for embedding in other data structures that have access
512
 * to a SymbolTable. StatNameStorage::free(symbol_table) must be called prior
513
 * to allowing the StatNameStorage object to be destructed, otherwise an assert
514
 * will fire to guard against symbol-table leaks.
515
 *
516
 * Thus this class is inconvenient to directly use as temp storage for building
517
 * a StatName from a string. Instead it should be used via StatNameManagedStorage.
518
 */
519
class StatNameStorage : public StatNameStorageBase {
520
public:
521
  // Basic constructor for when you have a name as a string, and need to
522
  // generate symbols for it.
523
  StatNameStorage(absl::string_view name, SymbolTable& table);
524
525
  // Move constructor; needed for using StatNameStorage as an
526
  // absl::flat_hash_map value.
527
167M
  StatNameStorage(StatNameStorage&& src) noexcept : StatNameStorageBase(std::move(src)) {}
528
529
  // Obtains new backing storage for an already existing StatName. Used to
530
  // record a computed StatName held in a temp into a more persistent data
531
  // structure.
532
  StatNameStorage(StatName src, SymbolTable& table);
533
534
  /**
535
   * Before allowing a StatNameStorage to be destroyed, you must call free()
536
   * on it, to drop the references to the symbols, allowing the SymbolTable
537
   * to shrink.
538
   */
539
  ~StatNameStorage();
540
541
  /**
542
   * Decrements the reference counts in the SymbolTable.
543
   *
544
   * @param table the symbol table.
545
   */
546
  void free(SymbolTable& table);
547
};
548
549
/**
550
 * Efficiently represents a stat name using a variable-length array of uint8_t.
551
 * This class does not own the backing store for this array; the backing-store
552
 * can be held in StatNameStorage, or it can be packed more tightly into another
553
 * object.
554
 *
555
 * When Envoy is configured with a large numbers of clusters, there are a huge
556
 * number of StatNames, so avoiding extra per-stat pointers has a significant
557
 * memory impact.
558
 */
559
class StatName {
560
public:
561
  // Constructs a StatName object directly referencing the storage of another
562
  // StatName.
563
1.03G
  explicit StatName(const SymbolTable::Storage size_and_data) : size_and_data_(size_and_data) {}
564
565
  // Constructs an empty StatName object.
566
724M
  StatName() = default;
567
568
  /**
569
   * Defines default hash function so StatName can be used as a key in an absl
570
   * hash-table without specifying a functor. See
571
   * https://abseil.io/docs/cpp/guides/hash for details.
572
   */
573
533M
  template <typename H> friend H AbslHashValue(H h, StatName stat_name) {
574
533M
    if (stat_name.empty()) {
575
3.02k
      return H::combine(std::move(h), absl::string_view());
576
3.02k
    }
577
578
533M
    return H::combine(std::move(h), stat_name.dataAsStringView());
579
533M
  }
580
581
  /**
582
   * Note that this hash function will return a different hash than that of
583
   * the elaborated string.
584
   *
585
   * @return uint64_t a hash of the underlying representation.
586
   */
587
324M
  uint64_t hash() const { return absl::Hash<StatName>()(*this); }
588
589
160M
  bool operator==(const StatName& rhs) const {
590
160M
    return dataAsStringView() == rhs.dataAsStringView();
591
160M
  }
592
0
  bool operator!=(const StatName& rhs) const { return !(*this == rhs); }
593
594
  /**
595
   * @return size_t the number of bytes in the symbol array, excluding the
596
   *                overhead for the size itself.
597
   */
598
  size_t dataSize() const;
599
600
  /**
601
   * @return size_t the number of bytes in the symbol array, including the
602
   *                  overhead for the size itself.
603
   */
604
719M
  size_t size() const { return SymbolTable::Encoding::totalSizeBytes(dataSize()); }
605
606
  /**
607
   * Copies the entire StatName representation into a MemBlockBuilder, including
608
   * the length metadata at the beginning. The MemBlockBuilder must not have
609
   * any other data in it.
610
   *
611
   * @param mem_block_builder the builder to receive the storage.
612
   */
613
3.18M
  void copyToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder) {
614
3.18M
    ASSERT(mem_block_builder.size() == 0);
615
3.18M
    mem_block_builder.appendData(absl::MakeSpan(size_and_data_, size()));
616
3.18M
  }
617
618
  /**
619
   * Appends the data portion of the StatName representation into a
620
   * MemBlockBuilder, excluding the length metadata. This is appropriate for
621
   * join(), where several stat-names are combined, and we only need the
622
   * aggregated length metadata.
623
   *
624
   * @param mem_block_builder the builder to receive the storage.
625
   */
626
346M
  void appendDataToMemBlock(MemBlockBuilder<uint8_t>& storage) {
627
346M
    storage.appendData(absl::MakeSpan(data(), dataSize()));
628
346M
  }
629
630
#ifndef ENVOY_CONFIG_COVERAGE
631
  void debugPrint();
632
#endif
633
634
  /**
635
   * @return A pointer to the first byte of data (skipping over size bytes).
636
   */
637
1.20G
  const uint8_t* data() const {
638
1.20G
    if (size_and_data_ == nullptr) {
639
53.4M
      return nullptr;
640
53.4M
    }
641
1.14G
    return size_and_data_ + SymbolTable::Encoding::encodingSizeBytes(dataSize());
642
1.20G
  }
643
644
476M
  const uint8_t* dataIncludingSize() const { return size_and_data_; }
645
646
  /**
647
   * @return whether this is empty.
648
   */
649
881M
  bool empty() const { return size_and_data_ == nullptr || dataSize() == 0; }
650
651
  /**
652
   * Determines whether this starts with the prefix. Note: dynamic segments
653
   * are not supported in the current implementation; this matching only works
654
   * for symbolic segments. However it OK for this to include dynamic segments
655
   * following the prefix.
656
   *
657
   * @param symbolic_prefix the prefix, which must not contain dynamic segments.
658
   */
659
  bool startsWith(StatName symbolic_prefix) const;
660
661
private:
662
  /**
663
   * Casts the raw data as a string_view. Note that this string_view will not
664
   * be in human-readable form, but it will be compatible with a string-view
665
   * hasher and comparator.
666
   */
667
854M
  absl::string_view dataAsStringView() const {
668
854M
    return {reinterpret_cast<const char*>(data()),
669
854M
            static_cast<absl::string_view::size_type>(dataSize())};
670
854M
  }
671
672
  const uint8_t* size_and_data_{nullptr};
673
};
674
675
193M
StatName StatNameStorageBase::statName() const { return StatName(bytes_.get()); }
676
677
/**
678
 * Contains the backing store for a StatName and enough context so it can
679
 * self-delete through RAII. This works by augmenting StatNameStorage with a
680
 * reference to the SymbolTable&, so it has an extra 8 bytes of footprint. It
681
 * is intended to be used in cases where simplicity of implementation is more
682
 * important than byte-savings, for example:
683
 *   - outside the stats system
684
 *   - in tests
685
 *   - as a scoped temp in a function
686
 * Due to the extra 8 bytes per instance, scalability should be taken into
687
 * account before using this as (say) a value or key in a map. In those
688
 * scenarios, it would be better to store the SymbolTable reference once
689
 * for the entire map.
690
 *
691
 * In the stat structures, we generally use StatNameStorage to avoid the
692
 * per-stat overhead.
693
 */
694
class StatNameManagedStorage : public StatNameStorage {
695
public:
696
  // Basic constructor for when you have a name as a string, and need to
697
  // generate symbols for it.
698
  StatNameManagedStorage(absl::string_view name, SymbolTable& table)
699
7.01M
      : StatNameStorage(name, table), symbol_table_(table) {}
700
  StatNameManagedStorage(StatNameManagedStorage&& src) noexcept
701
5.25k
      : StatNameStorage(std::move(src)), symbol_table_(src.symbol_table_) {}
702
  StatNameManagedStorage(StatName src, SymbolTable& table) noexcept
703
0
      : StatNameStorage(src, table), symbol_table_(table) {}
704
705
7.02M
  ~StatNameManagedStorage() { free(symbol_table_); } // NOLINT(clang-analyzer-unix.Malloc)
706
707
private:
708
  SymbolTable& symbol_table_;
709
};
710
711
/**
712
 * Holds backing-store for a dynamic stat, where are no global locks needed
713
 * to create a StatName from a string, but there is no sharing of token data
714
 * between names, so there may be more memory consumed.
715
 */
716
class StatNameDynamicStorage : public StatNameStorageBase {
717
public:
718
  // Basic constructor based on a name. Note the table is used for a call to
719
  // encode the name, but no locks are taken in either implementation of the
720
  // SymbolTable api.
721
  StatNameDynamicStorage(absl::string_view name, SymbolTable& table)
722
26.4M
      : StatNameStorageBase(table.makeDynamicStorage(name)) {}
723
  // Move constructor.
724
  StatNameDynamicStorage(StatNameDynamicStorage&& src) noexcept
725
58.9M
      : StatNameStorageBase(std::move(src)) {}
726
};
727
728
/**
729
 * Maintains storage for a collection of StatName objects. Like
730
 * StatNameManagedStorage, this has an RAII usage model, taking
731
 * care of decrementing ref-counts in the SymbolTable for all
732
 * contained StatNames on destruction or on clear();
733
 *
734
 * Example usage:
735
 *   StatNamePool pool(symbol_table);
736
 *   StatName name1 = pool.add("name1");
737
 *   StatName name2 = pool.add("name2");
738
 *   const uint8_t* storage = pool.addReturningStorage("name3");
739
 *   StatName name3(storage);
740
 */
741
class StatNamePool {
742
public:
743
5.81M
  explicit StatNamePool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
744
5.81M
  ~StatNamePool() { clear(); }
745
746
  /**
747
   * Removes all StatNames from the pool.
748
   */
749
  void clear();
750
751
  /**
752
   * @param name the name to add the container.
753
   * @return the StatName held in the container for this name.
754
   */
755
  StatName add(absl::string_view name);
756
757
  /**
758
   * Does essentially the same thing as add(), but returns the storage as a
759
   * pointer which can later be used to create a StatName. This can be used
760
   * to accumulate a vector of uint8_t* which can later be used to create
761
   * StatName objects on demand.
762
   *
763
   * The use-case for this is in source/common/http/codes.cc, where we have a
764
   * fixed sized array of atomic pointers, indexed by HTTP code. This API
765
   * enables storing the allocated stat-name in that array of atomics, which
766
   * enables content-avoidance when finding StatNames for frequently used HTTP
767
   * codes.
768
   *
769
   * @param name the name to add the container.
770
   * @return a pointer to the bytes held in the container for this name, suitable for
771
   *         using to construct a StatName.
772
   */
773
  const uint8_t* addReturningStorage(absl::string_view name);
774
775
private:
776
  // We keep the stat names in a vector of StatNameStorage, storing the
777
  // SymbolTable reference separately. This saves 8 bytes per StatName,
778
  // at the cost of having a destructor that calls clear().
779
  SymbolTable& symbol_table_;
780
  std::vector<StatNameStorage> storage_vector_;
781
};
782
783
/**
784
 * Maintains storage for a collection of StatName objects constructed from
785
 * dynamically discovered strings. Like StatNameDynamicStorage, this has an RAII
786
 * usage model. Creating StatNames with this interface do not incur a
787
 * SymbolTable lock, but tokens are not shared across StatNames.
788
 *
789
 * The SymbolTable is required as a constructor argument to assist in encoding
790
 * the stat-names.
791
 *
792
 * Example usage:
793
 *   StatNameDynamicPool pool(symbol_table);
794
 *   StatName name1 = pool.add("name1");
795
 *   StatName name2 = pool.add("name2");
796
 *
797
 * Note; StatNameDynamicPool::add("foo") != StatNamePool::add("foo"), even
798
 * though their string representations are identical. They also will not match
799
 * in map lookups. Tests for StatName with dynamic components must therefore
800
 * be looked up by string, via Stats::TestUtil::TestStore.
801
 */
802
class StatNameDynamicPool {
803
public:
804
4.09M
  explicit StatNameDynamicPool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
805
806
  /**
807
   * @param name the name to add the container.
808
   * @return the StatName held in the container for this name.
809
   */
810
  StatName add(absl::string_view name);
811
812
private:
813
  // We keep the stat names in a vector of StatNameStorage, storing the
814
  // SymbolTable reference separately. This saves 8 bytes per StatName,
815
  // at the cost of having a destructor that calls clear().
816
  SymbolTable& symbol_table_;
817
  std::vector<StatNameDynamicStorage> storage_vector_;
818
};
819
820
// Represents an ordered container of StatNames. The encoding for each StatName
821
// is byte-packed together, so this carries less overhead than allocating the
822
// storage separately. The trade-off is there is no random access; you can only
823
// iterate through the StatNames.
824
//
825
// The maximum size of the list is 255 elements, so the length can fit in a
826
// byte. It would not be difficult to increase this, but there does not appear
827
// to be a current need.
828
class StatNameList {
829
public:
830
  ~StatNameList();
831
832
  /**
833
   * @return true if populate() has been called on this list.
834
   */
835
106M
  bool populated() const { return storage_ != nullptr; }
836
837
  /**
838
   * Iterates over each StatName in the list, calling f(StatName). f() should
839
   * return true to keep iterating, or false to end the iteration.
840
   *
841
   * @param f The function to call on each stat.
842
   */
843
  void iterate(const std::function<bool(StatName)>& f) const;
844
845
  /**
846
   * Frees each StatName in the list. Failure to call this before destruction
847
   * results in an ASSERT at destruction of the list and the SymbolTable.
848
   *
849
   * This is not done as part of destruction as the SymbolTable may already
850
   * be destroyed.
851
   *
852
   * @param symbol_table the symbol table.
853
   */
854
  void clear(SymbolTable& symbol_table);
855
856
private:
857
  friend class SymbolTable;
858
859
  /**
860
   * Moves the specified storage into the list. The storage format is an
861
   * array of bytes, organized like this:
862
   *
863
   * [0] The number of elements in the list (must be < 256).
864
   * [1] low order 8 bits of the number of symbols in the first element.
865
   * [2] high order 8 bits of the number of symbols in the first element.
866
   * [3...] the symbols in the first element.
867
   * ...
868
   *
869
   *
870
   * For SymbolTable, each symbol is 1 or more bytes, in a variable-length
871
   * encoding. See SymbolTable::Encoding::addSymbol for details.
872
   */
873
53.2M
  void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); }
874
875
  SymbolTable::StoragePtr storage_;
876
};
877
878
// Value-templatized hash-map with StatName key.
879
template <class T> using StatNameHashMap = absl::flat_hash_map<StatName, T>;
880
881
// Hash-set of StatNames
882
using StatNameHashSet = absl::flat_hash_set<StatName>;
883
884
// Helper class for sorting StatNames.
885
struct StatNameLessThan {
886
0
  StatNameLessThan(const SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
887
0
  bool operator()(const StatName& a, const StatName& b) const {
888
0
    return symbol_table_.lessThan(a, b);
889
0
  }
890
891
  const SymbolTable& symbol_table_;
892
};
893
894
struct HeterogeneousStatNameHash {
895
  // Specifying is_transparent indicates to the library infrastructure that
896
  // type-conversions should not be applied when calling find(), but instead
897
  // pass the actual types of the contained and searched-for objects directly to
898
  // these functors. See
899
  // https://en.cppreference.com/w/cpp/utility/functional/less_void for an
900
  // official reference, and https://abseil.io/tips/144 for a description of
901
  // using it in the context of absl.
902
  using is_transparent = void; // NOLINT(readability-identifier-naming)
903
904
0
  size_t operator()(StatName a) const { return a.hash(); }
905
0
  size_t operator()(const StatNameStorage& a) const { return a.statName().hash(); }
906
};
907
908
struct HeterogeneousStatNameEqual {
909
  // See description for HeterogeneousStatNameHash::is_transparent.
910
  using is_transparent = void; // NOLINT(readability-identifier-naming)
911
912
0
  size_t operator()(StatName a, StatName b) const { return a == b; }
913
0
  size_t operator()(const StatNameStorage& a, const StatNameStorage& b) const {
914
0
    return a.statName() == b.statName();
915
0
  }
916
0
  size_t operator()(StatName a, const StatNameStorage& b) const { return a == b.statName(); }
917
0
  size_t operator()(const StatNameStorage& a, StatName b) const { return a.statName() == b; }
918
};
919
920
// Encapsulates a set<StatNameStorage>. We use containment here rather than a
921
// 'using' alias because we need to ensure that when the set is destructed,
922
// StatNameStorage::free(symbol_table) is called on each entry. It is a little
923
// easier at the call-sites in thread_local_store.cc to implement this an
924
// explicit free() method, analogous to StatNameStorage::free(), compared to
925
// storing a SymbolTable reference in the class and doing the free in the
926
// destructor, like StatNameManagedStorage.
927
class StatNameStorageSet {
928
public:
929
  using HashSet =
930
      absl::flat_hash_set<StatNameStorage, HeterogeneousStatNameHash, HeterogeneousStatNameEqual>;
931
  using Iterator = HashSet::iterator;
932
933
  ~StatNameStorageSet();
934
935
  /**
936
   * Releases all symbols held in this set. Must be called prior to destruction.
937
   *
938
   * @param symbol_table The symbol table that owns the symbols.
939
   */
940
  void free(SymbolTable& symbol_table);
941
942
  /**
943
   * @param storage The StatNameStorage to add to the set.
944
   */
945
0
  std::pair<HashSet::iterator, bool> insert(StatNameStorage&& storage) {
946
0
    return hash_set_.insert(std::move(storage));
947
0
  }
948
949
  /**
950
   * @param stat_name The stat_name to find.
951
   * @return the iterator pointing to the stat_name, or end() if not found.
952
   */
953
0
  Iterator find(StatName stat_name) { return hash_set_.find(stat_name); }
954
955
  /**
956
   * @return the end-marker.
957
   */
958
0
  Iterator end() { return hash_set_.end(); }
959
960
  /**
961
   * @return the number of elements in the set.
962
   */
963
0
  size_t size() const { return hash_set_.size(); }
964
965
private:
966
  HashSet hash_set_;
967
};
968
969
// Captures StatNames for lookup by string, keeping a map of 'built-ins' that is
970
// expected to be populated during initialization.
971
//
972
// Ideally, builtins should be added during process initialization, in the
973
// outermost relevant context. And as the builtins map is not mutex protected,
974
// builtins must *not* be added to an existing StatNameSet in the request-path.
975
//
976
// It is fine to populate a new StatNameSet when (for example) an xDS
977
// message reveals a new set of names to be used as stats. The population must
978
// be completed prior to exposing the new StatNameSet to worker threads.
979
//
980
// To create stats using names discovered in the request path, dynamic stat
981
// names must be used (see StatNameDynamicStorage). Consider using helper
982
// methods such as Stats::Utility::counterFromElements in common/stats/utility.h
983
// to simplify the process of allocating and combining stat names and creating
984
// counters, gauges, and histograms from them.
985
class StatNameSet {
986
public:
987
  // This object must be instantiated via SymbolTable::makeSet(), thus constructor is private.
988
989
  /**
990
   * Adds a string to the builtin map, which is not mutex protected. This map is
991
   * always consulted first as a hit there means no lock is required.
992
   *
993
   * Builtins can only be added immediately after construction, as the builtins
994
   * map is not mutex-protected.
995
   */
996
  void rememberBuiltin(absl::string_view str);
997
998
  /**
999
   * Remembers every string in a container as builtins.
1000
   */
1001
624
  template <class StringContainer> void rememberBuiltins(const StringContainer& container) {
1002
4.36k
    for (const auto& str : container) {
1003
4.36k
      rememberBuiltin(str);
1004
4.36k
    }
1005
624
  }
void Envoy::Stats::StatNameSet::rememberBuiltins<std::__1::vector<char const*, std::__1::allocator<char const*> > >(std::__1::vector<char const*, std::__1::allocator<char const*> > const&)
Line
Count
Source
1001
624
  template <class StringContainer> void rememberBuiltins(const StringContainer& container) {
1002
4.36k
    for (const auto& str : container) {
1003
4.36k
      rememberBuiltin(str);
1004
4.36k
    }
1005
624
  }
Unexecuted instantiation: void Envoy::Stats::StatNameSet::rememberBuiltins<absl::flat_hash_set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::container_internal::StringHash, absl::container_internal::StringEq, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >(absl::flat_hash_set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, absl::container_internal::StringHash, absl::container_internal::StringEq, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > const&)
1006
624
  void rememberBuiltins(const std::vector<const char*>& container) {
1007
624
    rememberBuiltins<std::vector<const char*>>(container);
1008
624
  }
1009
1010
  /**
1011
   * Finds a builtin StatName by name. If the builtin has not been registered,
1012
   * then the fallback is returned.
1013
   *
1014
   * @return the StatName or fallback.
1015
   */
1016
  StatName getBuiltin(absl::string_view token, StatName fallback) const;
1017
1018
  /**
1019
   * Adds a StatName using the pool, but without remembering it in any maps.
1020
   *
1021
   * For convenience, StatNameSet offers pass-through thread-safe access to
1022
   * its mutex-protected pool. This is useful in constructor initializers, when
1023
   * StatNames are needed both from compile-time constants, as well as from
1024
   * other constructor args, e.g.
1025
   *    MyClass(const std::vector<absl::string_view>& strings, Stats::SymbolTable& symbol_table)
1026
   *        : stat_name_set_(symbol_table),
1027
   *          known_const_(stat_name_set_.add("known_const")) { // unmapped constants from pool
1028
   *      stat_name_set_.rememberBuiltins(strings); // mapped builtins.
1029
   *    }
1030
   * This avoids the need to make two different pools; one backing the
1031
   * StatNameSet mapped entries, and the other backing the set passed in via the
1032
   * constructor.
1033
   *
1034
   * @param str The string to add as a StatName
1035
   * @return The StatName for str.
1036
   */
1037
53.7k
  StatName add(absl::string_view str) {
1038
53.7k
    absl::MutexLock lock(&mutex_);
1039
53.7k
    return pool_.add(str);
1040
53.7k
  }
1041
1042
private:
1043
  friend class SymbolTable;
1044
1045
  StatNameSet(SymbolTable& symbol_table, absl::string_view name);
1046
1047
  const std::string name_;
1048
  Stats::StatNamePool pool_ ABSL_GUARDED_BY(mutex_);
1049
  mutable absl::Mutex mutex_;
1050
  using StringStatNameMap = absl::flat_hash_map<std::string, Stats::StatName>;
1051
  StringStatNameMap builtin_stat_names_;
1052
};
1053
1054
template <class GetStatName, class Obj>
1055
bool SymbolTable::StatNameCompare<GetStatName, Obj>::operator()(const Obj& a, const Obj& b) const {
1056
  StatName a_stat_name = getter_(a);
1057
  StatName b_stat_name = getter_(b);
1058
  return symbol_table_.lessThanLockHeld(a_stat_name, b_stat_name);
1059
}
1060
1061
using SymbolTablePtr = std::unique_ptr<SymbolTable>;
1062
1063
// TODO(jmarantz): rename all remaining ~47 occurrences of SymbolTableImpl in
1064
// the codebase to SymbolTable, and drop this alias.
1065
using SymbolTableImpl = SymbolTable;
1066
1067
} // namespace Stats
1068
} // namespace Envoy