Line data Source code
1 : #pragma once
2 :
3 : #include <algorithm>
4 : #include <memory>
5 : #include <stack>
6 : #include <string>
7 : #include <vector>
8 :
9 : #include "source/common/common/assert.h"
10 : #include "source/common/common/lock_guard.h"
11 : #include "source/common/common/mem_block_builder.h"
12 : #include "source/common/common/non_copyable.h"
13 : #include "source/common/common/thread.h"
14 : #include "source/common/common/utility.h"
15 : #include "source/common/stats/recent_lookups.h"
16 :
17 : #include "absl/container/fixed_array.h"
18 : #include "absl/container/flat_hash_map.h"
19 : #include "absl/container/inlined_vector.h"
20 : #include "absl/strings/str_join.h"
21 : #include "absl/strings/str_split.h"
22 :
23 : namespace Envoy {
24 : namespace Stats {
25 :
26 : class StatName;
27 : using StatNameVec = absl::InlinedVector<StatName, 8>;
28 : class StatNameList;
29 : class StatNameSet;
30 : using StatNameSetPtr = std::unique_ptr<StatNameSet>;
31 :
32 : /**
33 : * Holds a range of indexes indicating which parts of a stat-name are
34 : * dynamic. This is used to transfer stats from hot-restart parent to child,
35 : * retaining the same name structure.
36 : */
37 : using DynamicSpan = std::pair<uint32_t, uint32_t>;
38 : using DynamicSpans = std::vector<DynamicSpan>;
39 :
40 : /** A Symbol represents a string-token with a small index. */
41 : using Symbol = uint32_t;
42 :
43 : /** Transient representations of a vector of 32-bit symbols */
44 : using SymbolVec = std::vector<Symbol>;
45 :
46 : /**
47 : * SymbolTableImpl manages a namespace optimized for stats, which are typically
48 : * composed of arrays of "."-separated tokens, with a significant overlap
49 : * between the tokens. Each token is mapped to a Symbol (uint32_t) and
50 : * reference-counted so that no-longer-used symbols can be reclaimed.
51 : *
52 : * We use a uint8_t array to encode a "."-deliminated stat-name into arrays of
53 : * integer symbol IDs in order to conserve space, as in practice the
54 : * majority of token instances in stat names draw from a fairly small set of
55 : * common names, typically less than 100. The format is somewhat similar to
56 : * UTF-8, with a variable-length array of uint8_t. See the implementation for
57 : * details.
58 : *
59 : * StatNameStorage can be used to manage memory for the byte-encoding. Not all
60 : * StatNames are backed by StatNameStorage -- the storage may be inlined into
61 : * another object such as HeapStatData. StaNameStorage is not fully RAII --
62 : * instead the owner must call free(SymbolTable&) explicitly before
63 : * StatNameStorage is destructed. This saves 8 bytes of storage per stat,
64 : * relative to holding a SymbolTable& in each StatNameStorage object.
65 : *
66 : * A StatName is a copyable and assignable reference to this storage. It does
67 : * not own the storage or keep it alive via reference counts; the owner must
68 : * ensure the backing store lives as long as the StatName.
69 : *
70 : * The underlying Symbol / SymbolVec data structures are private to the
71 : * impl. One side effect of the non-monotonically-increasing symbol counter is
72 : * that if a string is encoded, the resulting stat is destroyed, and then that
73 : * same string is re-encoded, it may or may not encode to the same underlying
74 : * symbol.
75 : */
76 : class SymbolTable final {
77 : public:
78 : /**
79 : * Efficient byte-encoded storage of an array of tokens. The most common
80 : * tokens are typically < 127, and are represented directly. tokens >= 128
81 : * spill into the next byte, allowing for tokens of arbitrary numeric value to
82 : * be stored. As long as the most common tokens are low-valued, the
83 : * representation is space-efficient. This scheme is similar to UTF-8. The
84 : * token ordering is dependent on the order in which stat-names are encoded
85 : * into the SymbolTable, which will not be optimal, but in practice appears
86 : * to be pretty good.
87 : *
88 : * This is exposed in the interface for the benefit of join(), which is
89 : * used in the hot-path to append two stat-names into a temp without taking
90 : * locks. This is used then in thread-local cache lookup, so that once warm,
91 : * no locks are taken when looking up stats.
92 : */
93 : using Storage = uint8_t[];
94 : using StoragePtr = std::unique_ptr<Storage>;
95 :
96 : /**
97 : * Intermediate representation for a stat-name. This helps store multiple
98 : * names in a single packed allocation. First we encode each desired name,
99 : * then sum their sizes for the single packed allocation. This is used to
100 : * store MetricImpl's tags and tagExtractedName.
101 : */
102 : class Encoding {
103 : public:
104 : /**
105 : * Before destructing SymbolEncoding, you must call moveToMemBlock. This
106 : * transfers ownership, and in particular, the responsibility to call
107 : * SymbolTable::clear() on all referenced symbols. If we ever wanted to be
108 : * able to destruct a SymbolEncoding without transferring it we could add a
109 : * clear(SymbolTable&) method.
110 : */
111 : ~Encoding();
112 :
113 : /**
114 : * Encodes a token into the vec.
115 : *
116 : * @param symbol the symbol to encode.
117 : */
118 : void addSymbols(const SymbolVec& symbols);
119 :
120 : /**
121 : * Decodes a uint8_t array into a SymbolVec.
122 : */
123 : static SymbolVec decodeSymbols(StatName stat_name);
124 :
125 : /**
126 : * Decodes a uint8_t array into a sequence of symbols and literal strings.
127 : * There are distinct lambdas for these two options. Calls to these lambdas
128 : * will be interleaved based on the sequencing of literal strings and
129 : * symbols held in the data.
130 : *
131 : * @param array the StatName encoded as a uint8_t array.
132 : * @param size the size of the array in bytes.
133 : * @param symbol_token_fn a function to be called whenever a symbol is encountered in the array.
134 : * @param string_view_token_fn a function to be called whenever a string literal is encountered.
135 : */
136 : static void decodeTokens(StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
137 : const std::function<void(absl::string_view)>& string_view_token_fn);
138 :
139 : /**
140 : * Returns the number of bytes required to represent StatName as a uint8_t
141 : * array, including the encoded size.
142 : */
143 669263 : size_t bytesRequired() const {
144 669263 : return data_bytes_required_ + encodingSizeBytes(data_bytes_required_);
145 669263 : }
146 :
147 : /**
148 : * Moves the contents of the vector into an allocated array. The array
149 : * must have been allocated with bytesRequired() bytes.
150 : *
151 : * @param mem_block_builder memory block to receive the encoded bytes.
152 : */
153 : void moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder);
154 :
155 : /**
156 : * @param number A number to encode in a variable length byte-array.
157 : * @return The number of bytes it would take to encode the number.
158 : */
159 : static size_t encodingSizeBytes(uint64_t number);
160 :
161 : /**
162 : * @param num_data_bytes The number of bytes in a data-block.
163 : * @return The total number of bytes required for the data-block and its encoded size.
164 : */
165 7421501 : static size_t totalSizeBytes(size_t num_data_bytes) {
166 7421501 : return encodingSizeBytes(num_data_bytes) + num_data_bytes;
167 7421501 : }
168 :
169 : /**
170 : * Saves the specified number into the byte array, returning the next byte.
171 : * There is no guarantee that bytes will be aligned, so we can't cast to a
172 : * uint16_t* and assign, but must individually copy the bytes.
173 : *
174 : * Requires that the buffer be sized to accommodate encodingSizeBytes(number).
175 : *
176 : * @param number the number to write.
177 : * @param mem_block the memory into which to append the number.
178 : */
179 : static void appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block);
180 :
181 : /**
182 : * Appends stat_name's bytes into mem_block, which must have been allocated to
183 : * allow for stat_name.size() bytes.
184 : *
185 : * @param stat_name the stat_name to append.
186 : * @param mem_block the block of memory to append to.
187 : */
188 : static void appendToMemBlock(StatName stat_name, MemBlockBuilder<uint8_t>& mem_block);
189 :
190 : /**
191 : * Decodes a byte-array containing a variable-length number.
192 : *
193 : * @param The encoded byte array, written previously by appendEncoding.
194 : * @return A pair containing the decoded number, and the number of bytes consumed from encoding.
195 : */
196 : static std::pair<uint64_t, size_t> decodeNumber(const uint8_t* encoding);
197 :
198 : private:
199 : friend class StatName;
200 : friend class SymbolTable;
201 : class TokenIter;
202 :
203 : size_t data_bytes_required_{0};
204 : MemBlockBuilder<uint8_t> mem_block_;
205 : };
206 :
207 : SymbolTable();
208 : ~SymbolTable();
209 :
210 : /**
211 : * Decodes a vector of symbols back into its period-delimited stat name. If
212 : * decoding fails on any part of the symbol_vec, we release_assert and crash,
213 : * since this should never happen, and we don't want to continue running
214 : * with a corrupt stats set.
215 : *
216 : * @param stat_name the stat name.
217 : * @return std::string stringified stat_name.
218 : */
219 : std::string toString(const StatName& stat_name) const;
220 :
221 : /**
222 : * @return uint64_t the number of symbols in the symbol table.
223 : */
224 : uint64_t numSymbols() const;
225 :
226 : /**
227 : * Determines whether one StatName lexically precedes another. Note that
228 : * the lexical order may not exactly match the lexical order of the
229 : * elaborated strings. For example, stat-name of "-.-" would lexically
230 : * sort after "---" but when encoded as a StatName would come lexically
231 : * earlier. In practice this is unlikely to matter as those are not
232 : * reasonable names for Envoy stats.
233 : *
234 : * Note that this operation has to be performed with the context of the
235 : * SymbolTable so that the individual Symbol objects can be converted
236 : * into strings for lexical comparison.
237 : *
238 : * @param a the first stat name
239 : * @param b the second stat name
240 : * @return bool true if a lexically precedes b.
241 : */
242 : bool lessThan(const StatName& a, const StatName& b) const;
243 :
244 : /**
245 : * Joins two or more StatNames. For example if we have StatNames for {"a.b",
246 : * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The
247 : * advantage of using this representation is that it avoids having to
248 : * decode/encode into the elaborated form, and does not require locking the
249 : * SymbolTable.
250 : *
251 : * Note that this method does not bump reference counts on the referenced
252 : * Symbols in the SymbolTable, so it's only valid as long for the lifetime of
253 : * the joined StatNames.
254 : *
255 : * This is intended for use doing cached name lookups of scoped stats, where
256 : * the scope prefix and the names to combine it with are already in StatName
257 : * form. Using this class, they can be combined without accessing the
258 : * SymbolTable or, in particular, taking its lock.
259 : *
260 : * @param stat_names the names to join.
261 : * @return Storage allocated for the joined name.
262 : */
263 : StoragePtr join(const StatNameVec& stat_names) const;
264 :
265 : /**
266 : * Populates a StatNameList from a list of encodings. This is not done at
267 : * construction time to enable StatNameList to be instantiated directly in
268 : * a class that doesn't have a live SymbolTable when it is constructed.
269 : *
270 : * @param names A pointer to the first name in an array, allocated by the caller.
271 : * @param num_names The number of names.
272 : * @param list The StatNameList representing the stat names.
273 : */
274 : void populateList(const StatName* names, uint32_t num_names, StatNameList& list);
275 :
276 : #ifndef ENVOY_CONFIG_COVERAGE
277 : void debugPrint() const;
278 : #endif
279 :
280 : /**
281 : * Creates a StatNameSet.
282 : *
283 : * @param name the name of the set.
284 : * @return the set.
285 : */
286 : StatNameSetPtr makeSet(absl::string_view name);
287 :
288 : using RecentLookupsFn = std::function<void(absl::string_view, uint64_t)>;
289 :
290 : /**
291 : * Calls the provided function with the name of the most recently looked-up
292 : * symbols, including lookups on any StatNameSets, and with a count of
293 : * the recent lookups on that symbol.
294 : *
295 : * @param iter the function to call for every recent item.
296 : */
297 : uint64_t getRecentLookups(const RecentLookupsFn&) const;
298 :
299 : /**
300 : * Clears the recent-lookups structures.
301 : */
302 : void clearRecentLookups();
303 :
304 : /**
305 : * Sets the recent-lookup capacity.
306 : */
307 : void setRecentLookupCapacity(uint64_t capacity);
308 :
309 : /**
310 : * @return The configured recent-lookup tracking capacity.
311 : */
312 : uint64_t recentLookupCapacity() const;
313 :
314 : /**
315 : * Identifies the dynamic components of a stat_name into an array of integer
316 : * pairs, indicating the begin/end of spans of tokens in the stat-name that
317 : * are created from StatNameDynamicStore or StatNameDynamicPool.
318 : *
319 : * This can be used to reconstruct the same exact StatNames in
320 : * StatNames::mergeStats(), to enable stat continuity across hot-restart.
321 : *
322 : * @param stat_name the input stat name.
323 : * @return the array of pairs indicating the bounds.
324 : */
325 : DynamicSpans getDynamicSpans(StatName stat_name) const;
326 :
327 : bool lessThanLockHeld(const StatName& a, const StatName& b) const;
328 :
329 : template <class GetStatName, class Obj> struct StatNameCompare {
330 : StatNameCompare(const SymbolTable& symbol_table, GetStatName getter)
331 : : symbol_table_(symbol_table), getter_(getter) {}
332 :
333 : bool operator()(const Obj& a, const Obj& b) const;
334 :
335 : const SymbolTable& symbol_table_;
336 : GetStatName getter_;
337 : };
338 :
339 : /**
340 : * Sorts a range by StatName. This API is more efficient than
341 : * calling std::sort directly as it takes a single lock for the
342 : * entire sort, rather than locking on each comparison.
343 : *
344 : * @param begin the beginning of the range to sort
345 : * @param end the end of the range to sort
346 : * @param get_stat_name a functor that takes an Obj and returns a StatName.
347 : */
348 : template <class Obj, class Iter, class GetStatName>
349 : void sortByStatNames(Iter begin, Iter end, GetStatName get_stat_name) const {
350 : // Grab the lock once before sorting begins, so we don't have to re-take
351 : // it on every comparison.
352 : Thread::LockGuard lock(lock_);
353 : StatNameCompare<GetStatName, Obj> compare(*this, get_stat_name);
354 : std::sort(begin, end, compare);
355 : }
356 :
357 : private:
358 : friend class StatName;
359 : friend class StatNameTest;
360 : friend class StatNameDeathTest;
361 : friend class StatNameDynamicStorage;
362 : friend class StatNameList;
363 : friend class StatNameStorage;
364 :
365 : /**
366 : * Encodes 'name' into the symbol table. Bumps reference counts for referenced
367 : * symbols. The caller must manage the storage, and is responsible for calling
368 : * SymbolTable::free() to release the reference counts.
369 : *
370 : * @param name The name to encode.
371 : * @return The encoded name, transferring ownership to the caller.
372 : *
373 : */
374 : StoragePtr encode(absl::string_view name);
375 : StoragePtr makeDynamicStorage(absl::string_view name);
376 :
377 : /**
378 : * Since SymbolTable does manual reference counting, a client of SymbolTable
379 : * must manually call free(symbol_vec) when it is freeing the backing store
380 : * for a StatName. This way, the symbol table will grow and shrink
381 : * dynamically, instead of being write-only.
382 : *
383 : * @param stat_name the stat name.
384 : */
385 : void free(const StatName& stat_name);
386 :
387 : /**
388 : * StatName backing-store can be managed by callers in a variety of ways
389 : * to minimize overhead. But any persistent reference to a StatName needs
390 : * to hold onto its own reference-counts for all symbols. This method
391 : * helps callers ensure the symbol-storage is maintained for the lifetime
392 : * of a reference.
393 : *
394 : * @param stat_name the stat name.
395 : */
396 : void incRefCount(const StatName& stat_name);
397 :
398 : struct SharedSymbol {
399 247028 : SharedSymbol(Symbol symbol) : symbol_(symbol) {}
400 :
401 : Symbol symbol_;
402 : uint32_t ref_count_{1};
403 : };
404 :
405 : // This must be held during both encode() and free().
406 : mutable Thread::MutexBasicLockable lock_;
407 :
408 : /**
409 : * Decodes a uint8_t array into an array of period-delimited strings. Note
410 : * that some of the strings may have periods in them, in the case where
411 : * StatNameDynamicStorage was used.
412 : *
413 : * If decoding fails on any part of the encoding, we RELEASE_ASSERT and crash,
414 : * since this should never happen, and we don't want to continue running with
415 : * corrupt stat names.
416 : *
417 : * @param array the uint8_t array of encoded symbols and dynamic strings.
418 : * @param size the size of the array in bytes.
419 : * @return std::string the retrieved stat name.
420 : */
421 : std::vector<absl::string_view> decodeStrings(StatName stat_name) const;
422 :
423 : /**
424 : * Convenience function for encode(), symbolizing one string segment at a time.
425 : *
426 : * @param sv the individual string to be encoded as a symbol.
427 : * @return Symbol the encoded string.
428 : */
429 : Symbol toSymbol(absl::string_view sv) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
430 :
431 : /**
432 : * Convenience function for decode(), decoding one symbol at a time.
433 : *
434 : * @param symbol the individual symbol to be decoded.
435 : * @return absl::string_view the decoded string.
436 : */
437 : absl::string_view fromSymbol(Symbol symbol) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
438 :
439 : /**
440 : * Stages a new symbol for use. To be called after a successful insertion.
441 : */
442 : void newSymbol();
443 :
444 : /**
445 : * Tokenizes name, finds or allocates symbols for each token, and adds them
446 : * to encoding.
447 : *
448 : * @param name The name to tokenize.
449 : * @param encoding The encoding to write to.
450 : */
451 : void addTokensToEncoding(absl::string_view name, Encoding& encoding);
452 :
453 0 : Symbol monotonicCounter() {
454 0 : Thread::LockGuard lock(lock_);
455 0 : return monotonic_counter_;
456 0 : }
457 :
458 : // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so
459 : // that if insertion succeeds, the value written is the correct one.
460 : Symbol next_symbol_ ABSL_GUARDED_BY(lock_);
461 :
462 : // If the free pool is exhausted, we monotonically increase this counter.
463 : Symbol monotonic_counter_;
464 :
465 : // Bitmap implementation.
466 : // The encode map stores both the symbol and the ref count of that symbol.
467 : // Using absl::string_view lets us only store the complete string once, in the decode map.
468 : using EncodeMap = absl::flat_hash_map<absl::string_view, SharedSymbol>;
469 : using DecodeMap = absl::flat_hash_map<Symbol, InlineStringPtr>;
470 : EncodeMap encode_map_ ABSL_GUARDED_BY(lock_);
471 : DecodeMap decode_map_ ABSL_GUARDED_BY(lock_);
472 :
473 : // Free pool of symbols for re-use.
474 : // TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols
475 : // using an Envoy::IntervalSet.
476 : std::stack<Symbol> pool_ ABSL_GUARDED_BY(lock_);
477 : RecentLookups recent_lookups_ ABSL_GUARDED_BY(lock_);
478 : };
479 :
480 : // Base class for holding the backing-storing for a StatName. The two derived
481 : // classes, StatNameStorage and StatNameDynamicStorage, share a need to hold an
482 : // array of bytes, but use different representations.
483 : class StatNameStorageBase {
484 : public:
485 927955 : StatNameStorageBase(SymbolTable::StoragePtr&& bytes) : bytes_(std::move(bytes)) {}
486 26156 : StatNameStorageBase() = default;
487 :
488 : /**
489 : * @return a reference to the owned storage.
490 : */
491 : inline StatName statName() const;
492 :
493 : /**
494 : * @return the encoded data as a const pointer.
495 : */
496 603356 : const uint8_t* bytes() const { return bytes_.get(); }
497 :
498 : protected:
499 26156 : void setBytes(SymbolTable::StoragePtr&& bytes) { bytes_ = std::move(bytes); }
500 695419 : void clear() { bytes_.reset(); }
501 :
502 : private:
503 : SymbolTable::StoragePtr bytes_;
504 : };
505 :
506 : /**
507 : * Holds backing storage for a StatName. Usage of this is not required, as some
508 : * applications may want to hold multiple StatName objects in one contiguous
509 : * uint8_t array, or embed the characters directly in another structure.
510 : *
511 : * This is intended for embedding in other data structures that have access
512 : * to a SymbolTable. StatNameStorage::free(symbol_table) must be called prior
513 : * to allowing the StatNameStorage object to be destructed, otherwise an assert
514 : * will fire to guard against symbol-table leaks.
515 : *
516 : * Thus this class is inconvenient to directly use as temp storage for building
517 : * a StatName from a string. Instead it should be used via StatNameManagedStorage.
518 : */
519 : class StatNameStorage : public StatNameStorageBase {
520 : public:
521 : // Basic constructor for when you have a name as a string, and need to
522 : // generate symbols for it.
523 : StatNameStorage(absl::string_view name, SymbolTable& table);
524 :
525 : // Move constructor; needed for using StatNameStorage as an
526 : // absl::flat_hash_map value.
527 1089359 : StatNameStorage(StatNameStorage&& src) noexcept : StatNameStorageBase(std::move(src)) {}
528 :
529 : // Obtains new backing storage for an already existing StatName. Used to
530 : // record a computed StatName held in a temp into a more persistent data
531 : // structure.
532 : StatNameStorage(StatName src, SymbolTable& table);
533 :
534 : /**
535 : * Before allowing a StatNameStorage to be destroyed, you must call free()
536 : * on it, to drop the references to the symbols, allowing the SymbolTable
537 : * to shrink.
538 : */
539 : ~StatNameStorage();
540 :
541 : /**
542 : * Decrements the reference counts in the SymbolTable.
543 : *
544 : * @param table the symbol table.
545 : */
546 : void free(SymbolTable& table);
547 : };
548 :
549 : /**
550 : * Efficiently represents a stat name using a variable-length array of uint8_t.
551 : * This class does not own the backing store for this array; the backing-store
552 : * can be held in StatNameStorage, or it can be packed more tightly into another
553 : * object.
554 : *
555 : * When Envoy is configured with a large numbers of clusters, there are a huge
556 : * number of StatNames, so avoiding extra per-stat pointers has a significant
557 : * memory impact.
558 : */
559 : class StatName {
560 : public:
561 : // Constructs a StatName object directly referencing the storage of another
562 : // StatName.
563 7256925 : explicit StatName(const SymbolTable::Storage size_and_data) : size_and_data_(size_and_data) {}
564 :
565 : // Constructs an empty StatName object.
566 5041340 : StatName() = default;
567 :
568 : /**
569 : * Defines default hash function so StatName can be used as a key in an absl
570 : * hash-table without specifying a functor. See
571 : * https://abseil.io/docs/cpp/guides/hash for details.
572 : */
573 2769779 : template <typename H> friend H AbslHashValue(H h, StatName stat_name) {
574 2769779 : if (stat_name.empty()) {
575 11750 : return H::combine(std::move(h), absl::string_view());
576 11750 : }
577 :
578 2758029 : return H::combine(std::move(h), stat_name.dataAsStringView());
579 2769779 : }
580 :
581 : /**
582 : * Note that this hash function will return a different hash than that of
583 : * the elaborated string.
584 : *
585 : * @return uint64_t a hash of the underlying representation.
586 : */
587 1411645 : uint64_t hash() const { return absl::Hash<StatName>()(*this); }
588 :
589 481310 : bool operator==(const StatName& rhs) const {
590 481310 : return dataAsStringView() == rhs.dataAsStringView();
591 481310 : }
592 0 : bool operator!=(const StatName& rhs) const { return !(*this == rhs); }
593 :
594 : /**
595 : * @return size_t the number of bytes in the symbol array, excluding the
596 : * overhead for the size itself.
597 : */
598 : size_t dataSize() const;
599 :
600 : /**
601 : * @return size_t the number of bytes in the symbol array, including the
602 : * overhead for the size itself.
603 : */
604 5152085 : size_t size() const { return SymbolTable::Encoding::totalSizeBytes(dataSize()); }
605 :
606 : /**
607 : * Copies the entire StatName representation into a MemBlockBuilder, including
608 : * the length metadata at the beginning. The MemBlockBuilder must not have
609 : * any other data in it.
610 : *
611 : * @param mem_block_builder the builder to receive the storage.
612 : */
613 26156 : void copyToMemBlock(MemBlockBuilder<uint8_t>& mem_block_builder) {
614 26156 : ASSERT(mem_block_builder.size() == 0);
615 26156 : mem_block_builder.appendData(absl::MakeSpan(size_and_data_, size()));
616 26156 : }
617 :
618 : /**
619 : * Appends the data portion of the StatName representation into a
620 : * MemBlockBuilder, excluding the length metadata. This is appropriate for
621 : * join(), where several stat-names are combined, and we only need the
622 : * aggregated length metadata.
623 : *
624 : * @param mem_block_builder the builder to receive the storage.
625 : */
626 2400464 : void appendDataToMemBlock(MemBlockBuilder<uint8_t>& storage) {
627 2400464 : storage.appendData(absl::MakeSpan(data(), dataSize()));
628 2400464 : }
629 :
630 : #ifndef ENVOY_CONFIG_COVERAGE
631 : void debugPrint();
632 : #endif
633 :
634 : /**
635 : * @return A pointer to the first byte of data (skipping over size bytes).
636 : */
637 6121114 : const uint8_t* data() const {
638 6121114 : if (size_and_data_ == nullptr) {
639 357706 : return nullptr;
640 357706 : }
641 5763408 : return size_and_data_ + SymbolTable::Encoding::encodingSizeBytes(dataSize());
642 6121114 : }
643 :
644 4465345 : const uint8_t* dataIncludingSize() const { return size_and_data_; }
645 :
646 : /**
647 : * @return whether this is empty.
648 : */
649 5217439 : bool empty() const { return size_and_data_ == nullptr || dataSize() == 0; }
650 :
651 : /**
652 : * Determines whether this starts with the prefix. Note: dynamic segments
653 : * are not supported in the current implementation; this matching only works
654 : * for symbolic segments. However it OK for this to include dynamic segments
655 : * following the prefix.
656 : *
657 : * @param symbolic_prefix the prefix, which must not contain dynamic segments.
658 : */
659 : bool startsWith(StatName symbolic_prefix) const;
660 :
661 : private:
662 : /**
663 : * Casts the raw data as a string_view. Note that this string_view will not
664 : * be in human-readable form, but it will be compatible with a string-view
665 : * hasher and comparator.
666 : */
667 3720650 : absl::string_view dataAsStringView() const {
668 3720650 : return {reinterpret_cast<const char*>(data()),
669 3720650 : static_cast<absl::string_view::size_type>(dataSize())};
670 3720650 : }
671 :
672 : const uint8_t* size_and_data_{nullptr};
673 : };
674 :
675 1543559 : StatName StatNameStorageBase::statName() const { return StatName(bytes_.get()); }
676 :
677 : /**
678 : * Contains the backing store for a StatName and enough context so it can
679 : * self-delete through RAII. This works by augmenting StatNameStorage with a
680 : * reference to the SymbolTable&, so it has an extra 8 bytes of footprint. It
681 : * is intended to be used in cases where simplicity of implementation is more
682 : * important than byte-savings, for example:
683 : * - outside the stats system
684 : * - in tests
685 : * - as a scoped temp in a function
686 : * Due to the extra 8 bytes per instance, scalability should be taken into
687 : * account before using this as (say) a value or key in a map. In those
688 : * scenarios, it would be better to store the SymbolTable reference once
689 : * for the entire map.
690 : *
691 : * In the stat structures, we generally use StatNameStorage to avoid the
692 : * per-stat overhead.
693 : */
694 : class StatNameManagedStorage : public StatNameStorage {
695 : public:
696 : // Basic constructor for when you have a name as a string, and need to
697 : // generate symbols for it.
698 : StatNameManagedStorage(absl::string_view name, SymbolTable& table)
699 144207 : : StatNameStorage(name, table), symbol_table_(table) {}
700 : StatNameManagedStorage(StatNameManagedStorage&& src) noexcept
701 0 : : StatNameStorage(std::move(src)), symbol_table_(src.symbol_table_) {}
702 : StatNameManagedStorage(StatName src, SymbolTable& table) noexcept
703 0 : : StatNameStorage(src, table), symbol_table_(table) {}
704 :
705 144207 : ~StatNameManagedStorage() { free(symbol_table_); } // NOLINT(clang-analyzer-unix.Malloc)
706 :
707 : private:
708 : SymbolTable& symbol_table_;
709 : };
710 :
711 : /**
712 : * Holds backing-store for a dynamic stat, where are no global locks needed
713 : * to create a StatName from a string, but there is no sharing of token data
714 : * between names, so there may be more memory consumed.
715 : */
716 : class StatNameDynamicStorage : public StatNameStorageBase {
717 : public:
718 : // Basic constructor based on a name. Note the table is used for a call to
719 : // encode the name, but no locks are taken in either implementation of the
720 : // SymbolTable api.
721 : StatNameDynamicStorage(absl::string_view name, SymbolTable& table)
722 258696 : : StatNameStorageBase(table.makeDynamicStorage(name)) {}
723 : // Move constructor.
724 : StatNameDynamicStorage(StatNameDynamicStorage&& src) noexcept
725 175229 : : StatNameStorageBase(std::move(src)) {}
726 : };
727 :
728 : /**
729 : * Maintains storage for a collection of StatName objects. Like
730 : * StatNameManagedStorage, this has an RAII usage model, taking
731 : * care of decrementing ref-counts in the SymbolTable for all
732 : * contained StatNames on destruction or on clear();
733 : *
734 : * Example usage:
735 : * StatNamePool pool(symbol_table);
736 : * StatName name1 = pool.add("name1");
737 : * StatName name2 = pool.add("name2");
738 : * const uint8_t* storage = pool.addReturningStorage("name3");
739 : * StatName name3(storage);
740 : */
741 : class StatNamePool {
742 : public:
743 85929 : explicit StatNamePool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
744 85929 : ~StatNamePool() { clear(); }
745 :
746 : /**
747 : * Removes all StatNames from the pool.
748 : */
749 : void clear();
750 :
751 : /**
752 : * @param name the name to add the container.
753 : * @return the StatName held in the container for this name.
754 : */
755 : StatName add(absl::string_view name);
756 :
757 : /**
758 : * Does essentially the same thing as add(), but returns the storage as a
759 : * pointer which can later be used to create a StatName. This can be used
760 : * to accumulate a vector of uint8_t* which can later be used to create
761 : * StatName objects on demand.
762 : *
763 : * The use-case for this is in source/common/http/codes.cc, where we have a
764 : * fixed sized array of atomic pointers, indexed by HTTP code. This API
765 : * enables storing the allocated stat-name in that array of atomics, which
766 : * enables content-avoidance when finding StatNames for frequently used HTTP
767 : * codes.
768 : *
769 : * @param name the name to add the container.
770 : * @return a pointer to the bytes held in the container for this name, suitable for
771 : * using to construct a StatName.
772 : */
773 : const uint8_t* addReturningStorage(absl::string_view name);
774 :
775 : private:
776 : // We keep the stat names in a vector of StatNameStorage, storing the
777 : // SymbolTable reference separately. This saves 8 bytes per StatName,
778 : // at the cost of having a destructor that calls clear().
779 : SymbolTable& symbol_table_;
780 : std::vector<StatNameStorage> storage_vector_;
781 : };
782 :
783 : /**
784 : * Maintains storage for a collection of StatName objects constructed from
785 : * dynamically discovered strings. Like StatNameDynamicStorage, this has an RAII
786 : * usage model. Creating StatNames with this interface do not incur a
787 : * SymbolTable lock, but tokens are not shared across StatNames.
788 : *
789 : * The SymbolTable is required as a constructor argument to assist in encoding
790 : * the stat-names.
791 : *
792 : * Example usage:
793 : * StatNameDynamicPool pool(symbol_table);
794 : * StatName name1 = pool.add("name1");
795 : * StatName name2 = pool.add("name2");
796 : *
797 : * Note; StatNameDynamicPool::add("foo") != StatNamePool::add("foo"), even
798 : * though their string representations are identical. They also will not match
799 : * in map lookups. Tests for StatName with dynamic components must therefore
800 : * be looked up by string, via Stats::TestUtil::TestStore.
801 : */
802 : class StatNameDynamicPool {
803 : public:
804 33208 : explicit StatNameDynamicPool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
805 :
806 : /**
807 : * @param name the name to add the container.
808 : * @return the StatName held in the container for this name.
809 : */
810 : StatName add(absl::string_view name);
811 :
812 : private:
813 : // We keep the stat names in a vector of StatNameStorage, storing the
814 : // SymbolTable reference separately. This saves 8 bytes per StatName,
815 : // at the cost of having a destructor that calls clear().
816 : SymbolTable& symbol_table_;
817 : std::vector<StatNameDynamicStorage> storage_vector_;
818 : };
819 :
820 : // Represents an ordered container of StatNames. The encoding for each StatName
821 : // is byte-packed together, so this carries less overhead than allocating the
822 : // storage separately. The trade-off is there is no random access; you can only
823 : // iterate through the StatNames.
824 : //
825 : // The maximum size of the list is 255 elements, so the length can fit in a
826 : // byte. It would not be difficult to increase this, but there does not appear
827 : // to be a current need.
828 : class StatNameList {
829 : public:
830 : ~StatNameList();
831 :
832 : /**
833 : * @return true if populate() has been called on this list.
834 : */
835 0 : bool populated() const { return storage_ != nullptr; }
836 :
837 : /**
838 : * Iterates over each StatName in the list, calling f(StatName). f() should
839 : * return true to keep iterating, or false to end the iteration.
840 : *
841 : * @param f The function to call on each stat.
842 : */
843 : void iterate(const std::function<bool(StatName)>& f) const;
844 :
845 : /**
846 : * Frees each StatName in the list. Failure to call this before destruction
847 : * results in an ASSERT at destruction of the list and the SymbolTable.
848 : *
849 : * This is not done as part of destruction as the SymbolTable may already
850 : * be destroyed.
851 : *
852 : * @param symbol_table the symbol table.
853 : */
854 : void clear(SymbolTable& symbol_table);
855 :
856 : private:
857 : friend class SymbolTable;
858 :
859 : /**
860 : * Moves the specified storage into the list. The storage format is an
861 : * array of bytes, organized like this:
862 : *
863 : * [0] The number of elements in the list (must be < 256).
864 : * [1] low order 8 bits of the number of symbols in the first element.
865 : * [2] high order 8 bits of the number of symbols in the first element.
866 : * [3...] the symbols in the first element.
867 : * ...
868 : *
869 : *
870 : * For SymbolTable, each symbol is 1 or more bytes, in a variable-length
871 : * encoding. See SymbolTable::Encoding::addSymbol for details.
872 : */
873 392190 : void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); }
874 :
875 : SymbolTable::StoragePtr storage_;
876 : };
877 :
878 : // Value-templatized hash-map with StatName key.
879 : template <class T> using StatNameHashMap = absl::flat_hash_map<StatName, T>;
880 :
881 : // Hash-set of StatNames
882 : using StatNameHashSet = absl::flat_hash_set<StatName>;
883 :
884 : // Helper class for sorting StatNames.
885 : struct StatNameLessThan {
886 0 : StatNameLessThan(const SymbolTable& symbol_table) : symbol_table_(symbol_table) {}
887 0 : bool operator()(const StatName& a, const StatName& b) const {
888 0 : return symbol_table_.lessThan(a, b);
889 0 : }
890 :
891 : const SymbolTable& symbol_table_;
892 : };
893 :
894 : struct HeterogeneousStatNameHash {
895 : // Specifying is_transparent indicates to the library infrastructure that
896 : // type-conversions should not be applied when calling find(), but instead
897 : // pass the actual types of the contained and searched-for objects directly to
898 : // these functors. See
899 : // https://en.cppreference.com/w/cpp/utility/functional/less_void for an
900 : // official reference, and https://abseil.io/tips/144 for a description of
901 : // using it in the context of absl.
902 : using is_transparent = void; // NOLINT(readability-identifier-naming)
903 :
904 0 : size_t operator()(StatName a) const { return a.hash(); }
905 0 : size_t operator()(const StatNameStorage& a) const { return a.statName().hash(); }
906 : };
907 :
908 : struct HeterogeneousStatNameEqual {
909 : // See description for HeterogeneousStatNameHash::is_transparent.
910 : using is_transparent = void; // NOLINT(readability-identifier-naming)
911 :
912 0 : size_t operator()(StatName a, StatName b) const { return a == b; }
913 0 : size_t operator()(const StatNameStorage& a, const StatNameStorage& b) const {
914 0 : return a.statName() == b.statName();
915 0 : }
916 0 : size_t operator()(StatName a, const StatNameStorage& b) const { return a == b.statName(); }
917 0 : size_t operator()(const StatNameStorage& a, StatName b) const { return a.statName() == b; }
918 : };
919 :
920 : // Encapsulates a set<StatNameStorage>. We use containment here rather than a
921 : // 'using' alias because we need to ensure that when the set is destructed,
922 : // StatNameStorage::free(symbol_table) is called on each entry. It is a little
923 : // easier at the call-sites in thread_local_store.cc to implement this an
924 : // explicit free() method, analogous to StatNameStorage::free(), compared to
925 : // storing a SymbolTable reference in the class and doing the free in the
926 : // destructor, like StatNameManagedStorage.
927 : class StatNameStorageSet {
928 : public:
929 : using HashSet =
930 : absl::flat_hash_set<StatNameStorage, HeterogeneousStatNameHash, HeterogeneousStatNameEqual>;
931 : using Iterator = HashSet::iterator;
932 :
933 : ~StatNameStorageSet();
934 :
935 : /**
936 : * Releases all symbols held in this set. Must be called prior to destruction.
937 : *
938 : * @param symbol_table The symbol table that owns the symbols.
939 : */
940 : void free(SymbolTable& symbol_table);
941 :
942 : /**
943 : * @param storage The StatNameStorage to add to the set.
944 : */
945 0 : std::pair<HashSet::iterator, bool> insert(StatNameStorage&& storage) {
946 0 : return hash_set_.insert(std::move(storage));
947 0 : }
948 :
949 : /**
950 : * @param stat_name The stat_name to find.
951 : * @return the iterator pointing to the stat_name, or end() if not found.
952 : */
953 0 : Iterator find(StatName stat_name) { return hash_set_.find(stat_name); }
954 :
955 : /**
956 : * @return the end-marker.
957 : */
958 0 : Iterator end() { return hash_set_.end(); }
959 :
960 : /**
961 : * @return the number of elements in the set.
962 : */
963 0 : size_t size() const { return hash_set_.size(); }
964 :
965 : private:
966 : HashSet hash_set_;
967 : };
968 :
969 : // Captures StatNames for lookup by string, keeping a map of 'built-ins' that is
970 : // expected to be populated during initialization.
971 : //
972 : // Ideally, builtins should be added during process initialization, in the
973 : // outermost relevant context. And as the builtins map is not mutex protected,
974 : // builtins must *not* be added to an existing StatNameSet in the request-path.
975 : //
976 : // It is fine to populate a new StatNameSet when (for example) an xDS
977 : // message reveals a new set of names to be used as stats. The population must
978 : // be completed prior to exposing the new StatNameSet to worker threads.
979 : //
980 : // To create stats using names discovered in the request path, dynamic stat
981 : // names must be used (see StatNameDynamicStorage). Consider using helper
982 : // methods such as Stats::Utility::counterFromElements in common/stats/utility.h
983 : // to simplify the process of allocating and combining stat names and creating
984 : // counters, gauges, and histograms from them.
985 : class StatNameSet {
986 : public:
987 : // This object must be instantiated via SymbolTable::makeSet(), thus constructor is private.
988 :
989 : /**
990 : * Adds a string to the builtin map, which is not mutex protected. This map is
991 : * always consulted first as a hit there means no lock is required.
992 : *
993 : * Builtins can only be added immediately after construction, as the builtins
994 : * map is not mutex-protected.
995 : */
996 : void rememberBuiltin(absl::string_view str);
997 :
998 : /**
999 : * Remembers every string in a container as builtins.
1000 : */
1001 74 : template <class StringContainer> void rememberBuiltins(const StringContainer& container) {
1002 518 : for (const auto& str : container) {
1003 518 : rememberBuiltin(str);
1004 518 : }
1005 74 : }
1006 74 : void rememberBuiltins(const std::vector<const char*>& container) {
1007 74 : rememberBuiltins<std::vector<const char*>>(container);
1008 74 : }
1009 :
1010 : /**
1011 : * Finds a builtin StatName by name. If the builtin has not been registered,
1012 : * then the fallback is returned.
1013 : *
1014 : * @return the StatName or fallback.
1015 : */
1016 : StatName getBuiltin(absl::string_view token, StatName fallback) const;
1017 :
1018 : /**
1019 : * Adds a StatName using the pool, but without remembering it in any maps.
1020 : *
1021 : * For convenience, StatNameSet offers pass-through thread-safe access to
1022 : * its mutex-protected pool. This is useful in constructor initializers, when
1023 : * StatNames are needed both from compile-time constants, as well as from
1024 : * other constructor args, e.g.
1025 : * MyClass(const std::vector<absl::string_view>& strings, Stats::SymbolTable& symbol_table)
1026 : * : stat_name_set_(symbol_table),
1027 : * known_const_(stat_name_set_.add("known_const")) { // unmapped constants from pool
1028 : * stat_name_set_.rememberBuiltins(strings); // mapped builtins.
1029 : * }
1030 : * This avoids the need to make two different pools; one backing the
1031 : * StatNameSet mapped entries, and the other backing the set passed in via the
1032 : * constructor.
1033 : *
1034 : * @param str The string to add as a StatName
1035 : * @return The StatName for str.
1036 : */
1037 2492 : StatName add(absl::string_view str) {
1038 2492 : absl::MutexLock lock(&mutex_);
1039 2492 : return pool_.add(str);
1040 2492 : }
1041 :
1042 : private:
1043 : friend class SymbolTable;
1044 :
1045 : StatNameSet(SymbolTable& symbol_table, absl::string_view name);
1046 :
1047 : const std::string name_;
1048 : Stats::StatNamePool pool_ ABSL_GUARDED_BY(mutex_);
1049 : mutable absl::Mutex mutex_;
1050 : using StringStatNameMap = absl::flat_hash_map<std::string, Stats::StatName>;
1051 : StringStatNameMap builtin_stat_names_;
1052 : };
1053 :
1054 : template <class GetStatName, class Obj>
1055 : bool SymbolTable::StatNameCompare<GetStatName, Obj>::operator()(const Obj& a, const Obj& b) const {
1056 : StatName a_stat_name = getter_(a);
1057 : StatName b_stat_name = getter_(b);
1058 : return symbol_table_.lessThanLockHeld(a_stat_name, b_stat_name);
1059 : }
1060 :
1061 : using SymbolTablePtr = std::unique_ptr<SymbolTable>;
1062 :
1063 : // TODO(jmarantz): rename all remaining ~47 occurrences of SymbolTableImpl in
1064 : // the codebase to SymbolTable, and drop this alias.
1065 : using SymbolTableImpl = SymbolTable;
1066 :
1067 : } // namespace Stats
1068 : } // namespace Envoy
|