Coverage Report

Created: 2024-09-19 09:45

/proc/self/cwd/source/common/stats/symbol_table.cc
Line
Count
Source (jump to first uncovered line)
1
#include "source/common/stats/symbol_table.h"
2
3
#include <algorithm>
4
#include <iostream>
5
#include <memory>
6
#include <vector>
7
8
#include "source/common/common/assert.h"
9
#include "source/common/common/logger.h"
10
#include "source/common/common/utility.h"
11
12
#include "absl/strings/str_cat.h"
13
14
namespace Envoy {
15
namespace Stats {
16
17
// Masks used for variable-length encoding of arbitrary-sized integers into a
18
// uint8-array. The integers are typically small, so we try to store them in as
19
// few bytes as possible. The bottom 7 bits hold values, and the top bit is used
20
// to determine whether another byte is needed for more data.
21
static constexpr uint32_t SpilloverMask = 0x80;
22
static constexpr uint32_t Low7Bits = 0x7f;
23
24
// When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol
25
// that will decode into uint8_t array starting (and ending) with {0}. Thus we
26
// can use a leading 0 in the first byte to indicate that what follows is
27
// literal char data, rather than symbol-table entries. Literal char data is
28
// used for dynamically discovered stat-name tokens where you don't want to take
29
// a symbol table lock, and would rather pay extra memory overhead to store the
30
// tokens as fully elaborated strings.
31
static constexpr Symbol FirstValidSymbol = 1;
32
static constexpr uint8_t LiteralStringIndicator = 0;
33
34
4.35G
size_t StatName::dataSize() const {
35
4.35G
  if (size_and_data_ == nullptr) {
36
74.6M
    return 0;
37
74.6M
  }
38
4.28G
  return SymbolTable::Encoding::decodeNumber(size_and_data_).first;
39
4.35G
}
40
41
#ifndef ENVOY_CONFIG_COVERAGE
42
0
void StatName::debugPrint() {
43
  // TODO(jmarantz): capture this functionality (always prints regardless of
44
  // loglevel) in an ENVOY_LOG macro variant or similar, perhaps
45
  // ENVOY_LOG_MISC(stderr, ...);
46
0
  if (size_and_data_ == nullptr) {
47
0
    std::cerr << "Null StatName" << std::endl;
48
0
  } else {
49
0
    const size_t nbytes = dataSize();
50
0
    std::cerr << "dataSize=" << nbytes << ":";
51
0
    for (size_t i = 0; i < nbytes; ++i) {
52
0
      std::cerr << " " << static_cast<uint64_t>(data()[i]);
53
0
    }
54
0
    const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this);
55
0
    std::cerr << ", numSymbols=" << encoding.size() << ":";
56
0
    for (Symbol symbol : encoding) {
57
0
      std::cerr << " " << symbol;
58
0
    }
59
0
    std::cerr << std::endl;
60
0
  }
61
0
}
62
#endif
63
64
84.0M
SymbolTable::Encoding::~Encoding() {
65
  // Verifies that moveToMemBlock() was called on this encoding. Failure
66
  // to call moveToMemBlock() will result in leaks symbols.
67
84.0M
  ASSERT(mem_block_.capacity() == 0);
68
84.0M
}
69
70
2.60G
size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) {
71
2.60G
  size_t num_bytes = 0;
72
2.85G
  do {
73
2.85G
    ++num_bytes;
74
2.85G
    number >>= 7;
75
2.85G
  } while (number != 0);
76
2.60G
  return num_bytes;
77
2.60G
}
78
79
667M
void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) {
80
  // UTF-8-like encoding where a value 127 or less gets written as a single
81
  // byte. For higher values we write the low-order 7 bits with a 1 in
82
  // the high-order bit. Then we right-shift 7 bits and keep adding more bytes
83
  // until we have consumed all the non-zero bits in symbol.
84
  //
85
  // When decoding, we stop consuming uint8_t when we see a uint8_t with
86
  // high-order bit 0.
87
915M
  do {
88
915M
    if (number < (1 << 7)) {
89
667M
      mem_block.appendOne(number); // number <= 127 gets encoded in one byte.
90
667M
    } else {
91
247M
      mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes.
92
247M
    }
93
915M
    number >>= 7;
94
915M
  } while (number != 0);
95
667M
}
96
97
78.9M
void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) {
98
78.9M
  ASSERT(data_bytes_required_ == 0);
99
391M
  for (Symbol symbol : symbols) {
100
391M
    data_bytes_required_ += encodingSizeBytes(symbol);
101
391M
  }
102
78.9M
  mem_block_.setCapacity(data_bytes_required_);
103
391M
  for (Symbol symbol : symbols) {
104
391M
    appendEncoding(symbol, mem_block_);
105
391M
  }
106
78.9M
}
107
108
7.69G
std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) {
109
7.69G
  uint64_t number = 0;
110
7.69G
  uint64_t uc = SpilloverMask;
111
7.69G
  const uint8_t* start = encoding;
112
17.7G
  for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) {
113
10.0G
    uc = static_cast<uint32_t>(*encoding);
114
10.0G
    number |= (uc & Low7Bits) << shift;
115
10.0G
  }
116
7.69G
  return std::make_pair(number, encoding - start);
117
7.69G
}
118
119
305M
SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) {
120
305M
  SymbolVec symbol_vec;
121
305M
  symbol_vec.reserve(stat_name.dataSize());
122
305M
  decodeTokens(
123
2.18G
      stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); },
124
305M
      [](absl::string_view) {});
125
305M
  return symbol_vec;
126
305M
}
127
128
// Iterator for decoding StatName tokens. This is not an STL-compatible
129
// iterator; it is used only locally. Using this rather than decodeSymbols makes
130
// it easier to early-exit out of a comparison once we know the answer. For
131
// example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order
132
// when you get to the "b" vs "x" and do not need to decode "c" vs "y".
133
//
134
// This is also helpful for prefix-matching.
135
//
136
// The array-version of decodeSymbols is still a good approach for converting a
137
// StatName to a string as the intermediate array of string_view is needed to
138
// allocate the right size for the joined string.
139
class SymbolTable::Encoding::TokenIter {
140
public:
141
  // The type of token reached.
142
  enum class TokenType { StringView, Symbol, End };
143
144
362M
  TokenIter(StatName stat_name) {
145
362M
    const uint8_t* raw_data = stat_name.dataIncludingSize();
146
362M
    if (raw_data == nullptr) {
147
10.8M
      size_ = 0;
148
10.8M
      array_ = nullptr;
149
351M
    } else {
150
351M
      std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data);
151
351M
      size_ = size_consumed.first;
152
351M
      array_ = raw_data + size_consumed.second;
153
351M
    }
154
362M
  }
155
156
  /**
157
   * Parses the next token. The token can then be retrieved by calling
158
   * stringView() or symbol().
159
   * @return The type of token, TokenType::End if we have reached the end of the StatName.
160
   */
161
3.42G
  TokenType next() {
162
3.42G
    if (size_ == 0) {
163
362M
      return TokenType::End;
164
362M
    }
165
3.06G
    if (*array_ == LiteralStringIndicator) {
166
      // To avoid scanning memory to find the literal size during decode, we
167
      // var-length encode the size of the literal string prior to the data.
168
35.2M
      ASSERT(size_ > 1);
169
35.2M
      ++array_;
170
35.2M
      --size_;
171
35.2M
      std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_);
172
35.2M
      uint64_t length = length_consumed.first;
173
35.2M
      array_ += length_consumed.second;
174
35.2M
      size_ -= length_consumed.second;
175
35.2M
      ASSERT(size_ >= length);
176
35.2M
      string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length);
177
35.2M
      size_ -= length;
178
35.2M
      array_ += length;
179
35.2M
#ifndef NDEBUG
180
35.2M
      last_type_ = TokenType::StringView;
181
35.2M
#endif
182
35.2M
      return TokenType::StringView;
183
35.2M
    }
184
3.02G
    std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_);
185
3.02G
    symbol_ = symbol_consumed.first;
186
3.02G
    size_ -= symbol_consumed.second;
187
3.02G
    array_ += symbol_consumed.second;
188
3.02G
#ifndef NDEBUG
189
3.02G
    last_type_ = TokenType::Symbol;
190
3.02G
#endif
191
3.02G
    return TokenType::Symbol;
192
3.06G
  }
193
194
  /** @return the current string_view -- only valid to call if next()==TokenType::StringView */
195
35.2M
  absl::string_view stringView() const {
196
35.2M
#ifndef NDEBUG
197
35.2M
    ASSERT(last_type_ == TokenType::StringView);
198
35.2M
#endif
199
35.2M
    return string_view_;
200
35.2M
  }
201
202
  /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */
203
3.02G
  Symbol symbol() const {
204
3.02G
#ifndef NDEBUG
205
3.02G
    ASSERT(last_type_ == TokenType::Symbol);
206
3.02G
#endif
207
3.02G
    return symbol_;
208
3.02G
  }
209
210
private:
211
  const uint8_t* array_;
212
  size_t size_;
213
  absl::string_view string_view_;
214
  Symbol symbol_;
215
#ifndef NDEBUG
216
  TokenType last_type_{TokenType::End};
217
#endif
218
};
219
220
void SymbolTable::Encoding::decodeTokens(
221
    StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
222
362M
    const std::function<void(absl::string_view)>& string_view_token_fn) {
223
362M
  TokenIter iter(stat_name);
224
362M
  TokenIter::TokenType type;
225
3.42G
  while ((type = iter.next()) != TokenIter::TokenType::End) {
226
3.06G
    if (type == TokenIter::TokenType::StringView) {
227
35.2M
      string_view_token_fn(iter.stringView());
228
3.02G
    } else {
229
3.02G
      ASSERT(type == TokenIter::TokenType::Symbol);
230
3.02G
      symbol_token_fn(iter.symbol());
231
3.02G
    }
232
3.06G
  }
233
362M
}
234
235
0
bool StatName::startsWith(StatName prefix) const {
236
0
  using TokenIter = SymbolTable::Encoding::TokenIter;
237
0
  TokenIter prefix_iter(prefix);
238
0
  TokenIter this_iter(*this);
239
0
  while (true) {
240
0
    TokenIter::TokenType prefix_type = prefix_iter.next();
241
0
    TokenIter::TokenType this_type = this_iter.next();
242
0
    if (prefix_type == TokenIter::TokenType::End) {
243
0
      return true; // "a.b.c" starts with "a.b" or "a.b.c"
244
0
    }
245
0
    if (this_type == TokenIter::TokenType::End) {
246
0
      return false; // "a.b" does not start with "a.b.c"
247
0
    }
248
0
    if (prefix_type != TokenIter::TokenType::Symbol) {
249
      // Disallow dynamic components in the prefix. We don't have a current need
250
      // for prefixes to be expressed dynamically, and handling that case would
251
      // add complexity. In particular we'd need to take locks to decode to
252
      // strings.
253
0
      return false;
254
0
    }
255
0
    if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) {
256
0
      return false;
257
0
    }
258
0
  }
259
0
  return true; // not reached
260
0
}
261
262
57.6M
std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const {
263
57.6M
  std::vector<absl::string_view> strings;
264
57.6M
  Thread::LockGuard lock(lock_);
265
57.6M
  Encoding::decodeTokens(
266
57.6M
      stat_name,
267
57.6M
      [this, &strings](Symbol symbol)
268
824M
          ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); },
269
57.6M
      [&strings](absl::string_view str) { strings.push_back(str); });
270
57.6M
  return strings;
271
57.6M
}
272
273
84.0M
void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) {
274
84.0M
  appendEncoding(data_bytes_required_, mem_block);
275
84.0M
  mem_block.appendBlock(mem_block_);
276
84.0M
  mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct.
277
84.0M
}
278
279
void SymbolTable::Encoding::appendToMemBlock(StatName stat_name,
280
107M
                                             MemBlockBuilder<uint8_t>& mem_block) {
281
107M
  const uint8_t* data = stat_name.dataIncludingSize();
282
107M
  if (data == nullptr) {
283
10.8M
    mem_block.appendOne(0);
284
96.5M
  } else {
285
96.5M
    mem_block.appendData(absl::MakeSpan(data, stat_name.size()));
286
96.5M
  }
287
107M
}
288
289
SymbolTable::SymbolTable()
290
    // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro.
291
171k
    : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {}
292
293
171k
SymbolTable::~SymbolTable() {
294
  // To avoid leaks into the symbol table, we expect all StatNames to be freed.
295
  // Note: this could potentially be short-circuited if we decide a fast exit
296
  // is needed in production. But it would be good to ensure clean up during
297
  // tests.
298
171k
  ASSERT(numSymbols() == 0);
299
171k
}
300
301
// TODO(ambuc): There is a possible performance optimization here for avoiding
302
// the encoding of IPs / numbers if they appear in stat names. We don't want to
303
// waste time symbolizing an integer as an integer, if we can help it.
304
84.0M
void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) {
305
84.0M
  if (name.empty()) {
306
5.10M
    return;
307
5.10M
  }
308
309
  // We want to hold the lock for the minimum amount of time, so we do the
310
  // string-splitting and prepare a temp vector of Symbol first.
311
78.9M
  const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.');
312
78.9M
  std::vector<Symbol> symbols;
313
78.9M
  symbols.reserve(tokens.size());
314
315
  // Now take the lock and populate the Symbol objects, which involves bumping
316
  // ref-counts in this.
317
78.9M
  {
318
78.9M
    Thread::LockGuard lock(lock_);
319
78.9M
    recent_lookups_.lookup(name);
320
391M
    for (auto& token : tokens) {
321
      // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with
322
      // length below some threshold, say 4 bytes. It might be preferable not to
323
      // reserve Symbols for every 3 digit number found (for example) in ipv4
324
      // addresses.
325
391M
      symbols.push_back(toSymbol(token));
326
391M
    }
327
78.9M
  }
328
329
  // Now efficiently encode the array of 32-bit symbols into a uint8_t array.
330
78.9M
  encoding.addSymbols(symbols);
331
78.9M
}
332
333
171k
uint64_t SymbolTable::numSymbols() const {
334
171k
  Thread::LockGuard lock(lock_);
335
171k
  ASSERT(encode_map_.size() == decode_map_.size());
336
171k
  return encode_map_.size();
337
171k
}
338
339
57.6M
std::string SymbolTable::toString(const StatName& stat_name) const {
340
57.6M
  return absl::StrJoin(decodeStrings(stat_name), ".");
341
57.6M
}
342
343
110M
void SymbolTable::incRefCount(const StatName& stat_name) {
344
  // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
345
110M
  const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
346
347
110M
  Thread::LockGuard lock(lock_);
348
896M
  for (Symbol symbol : symbols) {
349
896M
    auto decode_search = decode_map_.find(symbol);
350
351
896M
    ASSERT(decode_search != decode_map_.end(),
352
896M
           "Please see "
353
896M
           "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
354
896M
           "debugging-symbol-table-assertions");
355
896M
    auto encode_search = encode_map_.find(decode_search->second->toStringView());
356
896M
    ASSERT(encode_search != encode_map_.end(),
357
896M
           "Please see "
358
896M
           "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
359
896M
           "debugging-symbol-table-assertions");
360
361
896M
    ++encode_search->second.ref_count_;
362
896M
  }
363
110M
}
364
365
194M
void SymbolTable::free(const StatName& stat_name) {
366
  // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
367
194M
  const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
368
369
194M
  Thread::LockGuard lock(lock_);
370
1.28G
  for (Symbol symbol : symbols) {
371
1.28G
    auto decode_search = decode_map_.find(symbol);
372
1.28G
    ASSERT(decode_search != decode_map_.end());
373
374
1.28G
    auto encode_search = encode_map_.find(decode_search->second->toStringView());
375
1.28G
    ASSERT(encode_search != encode_map_.end());
376
377
    // If that was the last remaining client usage of the symbol, erase the
378
    // current mappings and add the now-unused symbol to the reuse pool.
379
    //
380
    // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in
381
    // symbol_table_speed_test.cc, relative to breaking out the decrement into a
382
    // separate step, likely due to the non-trivial dereferences in EXPR.
383
1.28G
    if (--encode_search->second.ref_count_ == 0) {
384
19.9M
      decode_map_.erase(decode_search);
385
19.9M
      encode_map_.erase(encode_search);
386
19.9M
      pool_.push(symbol);
387
19.9M
    }
388
1.28G
  }
389
194M
}
390
391
5.00k
uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const {
392
5.00k
  uint64_t total = 0;
393
5.00k
  absl::flat_hash_map<std::string, uint64_t> name_count_map;
394
395
  // We don't want to hold lock_ while calling the iterator, but we need it to
396
  // access recent_lookups_, so we buffer in name_count_map.
397
5.00k
  {
398
5.00k
    Thread::LockGuard lock(lock_);
399
5.00k
    recent_lookups_.forEach(
400
5.00k
        [&name_count_map](absl::string_view str, uint64_t count)
401
5.00k
            ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; });
402
5.00k
    total += recent_lookups_.total();
403
5.00k
  }
404
405
  // Now we have the collated name-count map data: we need to vectorize and
406
  // sort. We define the pair with the count first as std::pair::operator<
407
  // prioritizes its first element over its second.
408
5.00k
  using LookupCount = std::pair<uint64_t, absl::string_view>;
409
5.00k
  std::vector<LookupCount> lookup_data;
410
5.00k
  lookup_data.reserve(name_count_map.size());
411
5.00k
  for (const auto& iter : name_count_map) {
412
0
    lookup_data.emplace_back(LookupCount(iter.second, iter.first));
413
0
  }
414
5.00k
  std::sort(lookup_data.begin(), lookup_data.end());
415
5.00k
  for (const LookupCount& lookup_count : lookup_data) {
416
0
    iter(lookup_count.second, lookup_count.first);
417
0
  }
418
5.00k
  return total;
419
5.00k
}
420
421
797
DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const {
422
797
  DynamicSpans dynamic_spans;
423
424
797
  uint32_t index = 0;
425
10.7M
  auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) {
426
10.7M
    DynamicSpan span;
427
10.7M
    span.first = index;
428
10.7M
    index += std::count(str.begin(), str.end(), '.');
429
10.7M
    span.second = index;
430
10.7M
    ++index;
431
10.7M
    dynamic_spans.push_back(span);
432
10.7M
  };
433
434
  // Use decodeTokens to suss out which components of stat_name are
435
  // symbolic vs dynamic. The lambda that takes a Symbol is called
436
  // for symbolic components. The lambda called with a string_view
437
  // is called for dynamic components.
438
  //
439
  // Note that with fake symbol tables, the Symbol lambda is called
440
  // once for each character in the string, and no dynamics will
441
  // be recorded.
442
797
  Encoding::decodeTokens(
443
14.9M
      stat_name, [&index](Symbol) { ++index; }, record_dynamic);
444
797
  return dynamic_spans;
445
797
}
446
447
0
void SymbolTable::setRecentLookupCapacity(uint64_t capacity) {
448
0
  Thread::LockGuard lock(lock_);
449
0
  recent_lookups_.setCapacity(capacity);
450
0
}
451
452
0
void SymbolTable::clearRecentLookups() {
453
0
  Thread::LockGuard lock(lock_);
454
0
  recent_lookups_.clear();
455
0
}
456
457
0
uint64_t SymbolTable::recentLookupCapacity() const {
458
0
  Thread::LockGuard lock(lock_);
459
0
  return recent_lookups_.capacity();
460
0
}
461
462
4.64k
StatNameSetPtr SymbolTable::makeSet(absl::string_view name) {
463
  // make_unique does not work with private ctor, even though SymbolTable is a friend.
464
4.64k
  StatNameSetPtr stat_name_set(new StatNameSet(*this, name));
465
4.64k
  return stat_name_set;
466
4.64k
}
467
468
391M
Symbol SymbolTable::toSymbol(absl::string_view sv) {
469
391M
  Symbol result;
470
391M
  auto encode_find = encode_map_.find(sv);
471
  // If the string segment doesn't already exist,
472
391M
  if (encode_find == encode_map_.end()) {
473
    // We create the actual string, place it in the decode_map_, and then insert
474
    // a string_view pointing to it in the encode_map_. This allows us to only
475
    // store the string once. We use unique_ptr so copies are not made as
476
    // flat_hash_map moves values around.
477
19.9M
    InlineStringPtr str = InlineString::create(sv);
478
19.9M
    auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)});
479
19.9M
    ASSERT(encode_insert.second);
480
19.9M
    auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
481
19.9M
    ASSERT(decode_insert.second);
482
483
19.9M
    result = next_symbol_;
484
19.9M
    newSymbol();
485
372M
  } else {
486
    // If the insertion didn't take place, return the actual value at that location and up the
487
    // refcount at that location
488
372M
    result = encode_find->second.symbol_;
489
372M
    ++(encode_find->second.ref_count_);
490
372M
  }
491
391M
  return result;
492
391M
}
493
494
absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
495
824M
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
496
824M
  auto search = decode_map_.find(symbol);
497
824M
  RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
498
824M
  return search->second->toStringView();
499
824M
}
500
501
19.9M
void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
502
19.9M
  if (pool_.empty()) {
503
18.8M
    next_symbol_ = ++monotonic_counter_;
504
18.8M
  } else {
505
1.00M
    next_symbol_ = pool_.top();
506
1.00M
    pool_.pop();
507
1.00M
  }
508
  // This should catch integer overflow for the new symbol.
509
19.9M
  ASSERT(monotonic_counter_ != 0);
510
19.9M
}
511
512
0
bool SymbolTable::lessThan(const StatName& a, const StatName& b) const {
513
  // Proactively take the table lock in anticipation that we'll need to
514
  // convert at least one symbol to a string_view, and it's easier not to
515
  // bother to lazily take the lock.
516
0
  Thread::LockGuard lock(lock_);
517
0
  return lessThanLockHeld(a, b);
518
0
}
519
520
bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const
521
0
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
522
0
  Encoding::TokenIter a_iter(a), b_iter(b);
523
0
  while (true) {
524
0
    Encoding::TokenIter::TokenType a_type = a_iter.next();
525
0
    Encoding::TokenIter::TokenType b_type = b_iter.next();
526
0
    if (b_type == Encoding::TokenIter::TokenType::End) {
527
0
      return false; // "x.y.z" > "x.y", "x.y" == "x.y"
528
0
    }
529
0
    if (a_type == Encoding::TokenIter::TokenType::End) {
530
0
      return true; // "x.y" < "x.y.z"
531
0
    }
532
0
    if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol &&
533
0
        a_iter.symbol() == b_iter.symbol()) {
534
0
      continue; // matching symbols don't need to be decoded to strings
535
0
    }
536
537
0
    absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol
538
0
                                    ? fromSymbol(a_iter.symbol())
539
0
                                    : a_iter.stringView();
540
0
    absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol
541
0
                                    ? fromSymbol(b_iter.symbol())
542
0
                                    : b_iter.stringView();
543
0
    if (a_token != b_token) {
544
0
      return a_token < b_token;
545
0
    }
546
0
  }
547
0
}
548
549
#ifndef ENVOY_CONFIG_COVERAGE
550
0
void SymbolTable::debugPrint() const {
551
0
  Thread::LockGuard lock(lock_);
552
0
  std::vector<Symbol> symbols;
553
0
  for (const auto& p : decode_map_) {
554
0
    symbols.push_back(p.first);
555
0
  }
556
0
  std::sort(symbols.begin(), symbols.end());
557
0
  for (Symbol symbol : symbols) {
558
0
    const InlineString& token = *decode_map_.find(symbol)->second;
559
0
    const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second;
560
0
    ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_);
561
0
  }
562
0
}
563
#endif
564
565
84.0M
SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) {
566
84.0M
  name = StringUtil::removeTrailingCharacters(name, '.');
567
84.0M
  Encoding encoding;
568
84.0M
  addTokensToEncoding(name, encoding);
569
84.0M
  MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired()));
570
84.0M
  encoding.moveToMemBlock(mem_block);
571
84.0M
  return mem_block.release();
572
84.0M
}
573
574
StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table)
575
84.0M
    : StatNameStorageBase(table.encode(name)) {}
576
577
3.18M
StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) {
578
3.18M
  const size_t size = src.size();
579
3.18M
  MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t.
580
3.18M
  src.copyToMemBlock(storage);
581
3.18M
  setBytes(storage.release());
582
3.18M
  table.incRefCount(statName());
583
3.18M
}
584
585
24.2M
SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) {
586
24.2M
  name = StringUtil::removeTrailingCharacters(name, '.');
587
588
  // For all StatName objects, we first have the total number of bytes in the
589
  // representation. But for inlined dynamic string StatName variants, we must
590
  // store the length of the payload separately, so that if this token gets
591
  // joined with others, we'll know much space it consumes until the next token.
592
  // So the layout is
593
  //   [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ]
594
  // So we need to figure out how many bytes we need to represent length-of-name
595
  // and name.
596
597
  // payload_bytes is the total number of bytes needed to represent the
598
  // characters in name, plus their encoded size, plus the literal indicator.
599
24.2M
  const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1;
600
601
  // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and
602
  // the length of those.
603
24.2M
  const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes);
604
24.2M
  MemBlockBuilder<uint8_t> mem_block(total_bytes);
605
606
24.2M
  Encoding::appendEncoding(payload_bytes, mem_block);
607
24.2M
  mem_block.appendOne(LiteralStringIndicator);
608
24.2M
  Encoding::appendEncoding(name.size(), mem_block);
609
24.2M
  mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size()));
610
24.2M
  ASSERT(mem_block.capacityRemaining() == 0);
611
24.2M
  return mem_block.release();
612
24.2M
}
613
614
174M
StatNameStorage::~StatNameStorage() {
615
  // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to
616
  // decrement the reference counts held by the SymbolTable on behalf of
617
  // this StatName. This saves 8 bytes of storage per stat, relative to
618
  // holding a SymbolTable& in each StatNameStorage object.
619
174M
  ASSERT(bytes() == nullptr);
620
174M
}
621
622
87.2M
void StatNameStorage::free(SymbolTable& table) {
623
  // nolint: https://github.com/llvm/llvm-project/issues/81597
624
  // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
625
87.2M
  table.free(statName());
626
87.2M
  clear();
627
87.2M
}
628
629
5.43M
void StatNamePool::clear() {
630
77.1M
  for (StatNameStorage& storage : storage_vector_) {
631
    // nolint: https://github.com/llvm/llvm-project/issues/81597
632
    // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
633
77.1M
    storage.free(symbol_table_);
634
77.1M
  }
635
5.43M
  storage_vector_.clear();
636
5.43M
}
637
638
77.1M
const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) {
639
77.1M
  storage_vector_.emplace_back(str, symbol_table_);
640
77.1M
  return storage_vector_.back().bytes();
641
77.1M
}
642
643
0
StatName StatNamePool::add(StatName name) {
644
0
  storage_vector_.emplace_back(name, symbol_table_);
645
0
  return storage_vector_.back().statName();
646
0
}
647
648
76.9M
StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); }
649
650
24.0M
StatName StatNameDynamicPool::add(absl::string_view str) {
651
24.0M
  storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_));
652
24.0M
  return StatName(storage_vector_.back().bytes());
653
24.0M
}
654
655
24.4k
StatNameStorageSet::~StatNameStorageSet() {
656
  // free() must be called before destructing StatNameStorageSet to decrement
657
  // references to all symbols.
658
24.4k
  ASSERT(hash_set_.empty());
659
24.4k
}
660
661
24.4k
void StatNameStorageSet::free(SymbolTable& symbol_table) {
662
  // We must free() all symbols referenced in the set, otherwise the symbols
663
  // will leak when the flat_hash_map superclass is destructed. They cannot
664
  // self-destruct without an explicit free() as each individual StatNameStorage
665
  // object does not have a reference to the symbol table, which would waste 8
666
  // bytes per stat-name. The easiest way to safely free all the contents of the
667
  // symbol table set is to use flat_hash_map::extract(), which removes and
668
  // returns an element from the set without destructing the element
669
  // immediately. This gives us a chance to call free() on each one before they
670
  // are destroyed.
671
  //
672
  // There's a performance risk here, if removing elements via
673
  // flat_hash_set::begin() is inefficient to use in a loop like this. One can
674
  // imagine a hash-table implementation where the performance of this
675
  // usage-model would be poor. However, tests with 100k elements appeared to
676
  // run quickly when compiled for optimization, so at present this is not a
677
  // performance issue.
678
679
24.4k
  while (!hash_set_.empty()) {
680
0
    auto storage = hash_set_.extract(hash_set_.begin());
681
0
    storage.value().free(symbol_table);
682
0
  }
683
24.4k
}
684
685
139M
SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const {
686
139M
  size_t num_bytes = 0;
687
338M
  for (StatName stat_name : stat_names) {
688
338M
    if (!stat_name.empty()) {
689
191M
      num_bytes += stat_name.dataSize();
690
191M
    }
691
338M
  }
692
139M
  MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes));
693
139M
  Encoding::appendEncoding(num_bytes, mem_block);
694
338M
  for (StatName stat_name : stat_names) {
695
338M
    stat_name.appendDataToMemBlock(mem_block);
696
338M
  }
697
139M
  ASSERT(mem_block.capacityRemaining() == 0);
698
139M
  return mem_block.release();
699
139M
}
700
701
52.9M
void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) {
702
52.9M
  RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded");
703
704
  // First encode all the names.
705
52.9M
  size_t total_size_bytes = 1; /* one byte for holding the number of names */
706
707
160M
  for (uint32_t i = 0; i < num_names; ++i) {
708
107M
    total_size_bytes += names[i].size();
709
107M
  }
710
711
  // Now allocate the exact number of bytes required and move the encodings
712
  // into storage.
713
52.9M
  MemBlockBuilder<uint8_t> mem_block(total_size_bytes);
714
52.9M
  mem_block.appendOne(num_names);
715
160M
  for (uint32_t i = 0; i < num_names; ++i) {
716
107M
    const StatName stat_name = names[i];
717
107M
    Encoding::appendToMemBlock(stat_name, mem_block);
718
107M
    incRefCount(stat_name);
719
107M
  }
720
721
  // This assertion double-checks the arithmetic where we computed
722
  // total_size_bytes. After appending all the encoded data into the
723
  // allocated byte array, we should have exhausted all the memory
724
  // we though we needed.
725
52.9M
  ASSERT(mem_block.capacityRemaining() == 0);
726
52.9M
  list.moveStorageIntoList(mem_block.release());
727
52.9M
}
728
729
52.9M
StatNameList::~StatNameList() { ASSERT(!populated()); }
730
731
447M
void StatNameList::iterate(const std::function<bool(StatName)>& f) const {
732
447M
  const uint8_t* p = &storage_[0];
733
447M
  const uint32_t num_elements = *p++;
734
555M
  for (uint32_t i = 0; i < num_elements; ++i) {
735
502M
    const StatName stat_name(p);
736
502M
    p += stat_name.size();
737
502M
    if (!f(stat_name)) {
738
394M
      break;
739
394M
    }
740
502M
  }
741
447M
}
742
743
52.9M
void StatNameList::clear(SymbolTable& symbol_table) {
744
107M
  iterate([&symbol_table](StatName stat_name) -> bool {
745
    // nolint: https://github.com/llvm/llvm-project/issues/81597
746
    // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
747
107M
    symbol_table.free(stat_name);
748
107M
    return true;
749
107M
  });
750
52.9M
  storage_.reset();
751
52.9M
}
752
753
StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name)
754
4.64k
    : name_(std::string(name)), pool_(symbol_table) {
755
4.64k
  builtin_stat_names_[""] = StatName();
756
4.64k
}
757
758
190k
void StatNameSet::rememberBuiltin(absl::string_view str) {
759
190k
  StatName stat_name;
760
190k
  {
761
190k
    absl::MutexLock lock(&mutex_);
762
190k
    stat_name = pool_.add(str);
763
190k
  }
764
190k
  builtin_stat_names_[str] = stat_name;
765
190k
}
766
767
729k
StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const {
768
  // If token was recorded as a built-in during initialization, we can
769
  // service this request lock-free.
770
729k
  const auto iter = builtin_stat_names_.find(token);
771
729k
  if (iter != builtin_stat_names_.end()) {
772
450k
    return iter->second;
773
450k
  }
774
279k
  return fallback;
775
729k
}
776
777
} // namespace Stats
778
} // namespace Envoy