Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/source/common/stats/symbol_table.cc
Line
Count
Source (jump to first uncovered line)
1
#include "source/common/stats/symbol_table.h"
2
3
#include <algorithm>
4
#include <iostream>
5
#include <memory>
6
#include <vector>
7
8
#include "source/common/common/assert.h"
9
#include "source/common/common/logger.h"
10
#include "source/common/common/utility.h"
11
12
#include "absl/strings/str_cat.h"
13
14
namespace Envoy {
15
namespace Stats {
16
17
// Masks used for variable-length encoding of arbitrary-sized integers into a
18
// uint8-array. The integers are typically small, so we try to store them in as
19
// few bytes as possible. The bottom 7 bits hold values, and the top bit is used
20
// to determine whether another byte is needed for more data.
21
static constexpr uint32_t SpilloverMask = 0x80;
22
static constexpr uint32_t Low7Bits = 0x7f;
23
24
// When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol
25
// that will decode into uint8_t array starting (and ending) with {0}. Thus we
26
// can use a leading 0 in the first byte to indicate that what follows is
27
// literal char data, rather than symbol-table entries. Literal char data is
28
// used for dynamically discovered stat-name tokens where you don't want to take
29
// a symbol table lock, and would rather pay extra memory overhead to store the
30
// tokens as fully elaborated strings.
31
static constexpr Symbol FirstValidSymbol = 1;
32
static constexpr uint8_t LiteralStringIndicator = 0;
33
34
4.40G
size_t StatName::dataSize() const {
35
4.40G
  if (size_and_data_ == nullptr) {
36
75.3M
    return 0;
37
75.3M
  }
38
4.32G
  return SymbolTable::Encoding::decodeNumber(size_and_data_).first;
39
4.40G
}
40
41
#ifndef ENVOY_CONFIG_COVERAGE
42
0
void StatName::debugPrint() {
43
  // TODO(jmarantz): capture this functionality (always prints regardless of
44
  // loglevel) in an ENVOY_LOG macro variant or similar, perhaps
45
  // ENVOY_LOG_MISC(stderr, ...);
46
0
  if (size_and_data_ == nullptr) {
47
0
    std::cerr << "Null StatName" << std::endl;
48
0
  } else {
49
0
    const size_t nbytes = dataSize();
50
0
    std::cerr << "dataSize=" << nbytes << ":";
51
0
    for (size_t i = 0; i < nbytes; ++i) {
52
0
      std::cerr << " " << static_cast<uint64_t>(data()[i]);
53
0
    }
54
0
    const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this);
55
0
    std::cerr << ", numSymbols=" << encoding.size() << ":";
56
0
    for (Symbol symbol : encoding) {
57
0
      std::cerr << " " << symbol;
58
0
    }
59
0
    std::cerr << std::endl;
60
0
  }
61
0
}
62
#endif
63
64
85.8M
SymbolTable::Encoding::~Encoding() {
65
  // Verifies that moveToMemBlock() was called on this encoding. Failure
66
  // to call moveToMemBlock() will result in leaks symbols.
67
85.8M
  ASSERT(mem_block_.capacity() == 0);
68
85.8M
}
69
70
2.45G
size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) {
71
2.45G
  size_t num_bytes = 0;
72
2.56G
  do {
73
2.56G
    ++num_bytes;
74
2.56G
    number >>= 7;
75
2.56G
  } while (number != 0);
76
2.45G
  return num_bytes;
77
2.45G
}
78
79
505M
void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) {
80
  // UTF-8-like encoding where a value 127 or less gets written as a single
81
  // byte. For higher values we write the low-order 7 bits with a 1 in
82
  // the high-order bit. Then we right-shift 7 bits and keep adding more bytes
83
  // until we have consumed all the non-zero bits in symbol.
84
  //
85
  // When decoding, we stop consuming uint8_t when we see a uint8_t with
86
  // high-order bit 0.
87
607M
  do {
88
607M
    if (number < (1 << 7)) {
89
505M
      mem_block.appendOne(number); // number <= 127 gets encoded in one byte.
90
505M
    } else {
91
102M
      mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes.
92
102M
    }
93
607M
    number >>= 7;
94
607M
  } while (number != 0);
95
505M
}
96
97
81.3M
void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) {
98
81.3M
  ASSERT(data_bytes_required_ == 0);
99
224M
  for (Symbol symbol : symbols) {
100
224M
    data_bytes_required_ += encodingSizeBytes(symbol);
101
224M
  }
102
81.3M
  mem_block_.setCapacity(data_bytes_required_);
103
224M
  for (Symbol symbol : symbols) {
104
224M
    appendEncoding(symbol, mem_block_);
105
224M
  }
106
81.3M
}
107
108
5.93G
std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) {
109
5.93G
  uint64_t number = 0;
110
5.93G
  uint64_t uc = SpilloverMask;
111
5.93G
  const uint8_t* start = encoding;
112
12.4G
  for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) {
113
6.55G
    uc = static_cast<uint32_t>(*encoding);
114
6.55G
    number |= (uc & Low7Bits) << shift;
115
6.55G
  }
116
5.93G
  return std::make_pair(number, encoding - start);
117
5.93G
}
118
119
309M
SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) {
120
309M
  SymbolVec symbol_vec;
121
309M
  symbol_vec.reserve(stat_name.dataSize());
122
309M
  decodeTokens(
123
923M
      stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); },
124
309M
      [](absl::string_view) {});
125
309M
  return symbol_vec;
126
309M
}
127
128
// Iterator for decoding StatName tokens. This is not an STL-compatible
129
// iterator; it is used only locally. Using this rather than decodeSymbols makes
130
// it easier to early-exit out of a comparison once we know the answer. For
131
// example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order
132
// when you get to the "b" vs "x" and do not need to decode "c" vs "y".
133
//
134
// This is also helpful for prefix-matching.
135
//
136
// The array-version of decodeSymbols is still a good approach for converting a
137
// StatName to a string as the intermediate array of string_view is needed to
138
// allocate the right size for the joined string.
139
class SymbolTable::Encoding::TokenIter {
140
public:
141
  // The type of token reached.
142
  enum class TokenType { StringView, Symbol, End };
143
144
368M
  TokenIter(StatName stat_name) {
145
368M
    const uint8_t* raw_data = stat_name.dataIncludingSize();
146
368M
    if (raw_data == nullptr) {
147
10.9M
      size_ = 0;
148
10.9M
      array_ = nullptr;
149
357M
    } else {
150
357M
      std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data);
151
357M
      size_ = size_consumed.first;
152
357M
      array_ = raw_data + size_consumed.second;
153
357M
    }
154
368M
  }
155
156
  /**
157
   * Parses the next token. The token can then be retrieved by calling
158
   * stringView() or symbol().
159
   * @return The type of token, TokenType::End if we have reached the end of the StatName.
160
   */
161
1.61G
  TokenType next() {
162
1.61G
    if (size_ == 0) {
163
368M
      return TokenType::End;
164
368M
    }
165
1.24G
    if (*array_ == LiteralStringIndicator) {
166
      // To avoid scanning memory to find the literal size during decode, we
167
      // var-length encode the size of the literal string prior to the data.
168
39.9M
      ASSERT(size_ > 1);
169
39.9M
      ++array_;
170
39.9M
      --size_;
171
39.9M
      std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_);
172
39.9M
      uint64_t length = length_consumed.first;
173
39.9M
      array_ += length_consumed.second;
174
39.9M
      size_ -= length_consumed.second;
175
39.9M
      ASSERT(size_ >= length);
176
39.9M
      string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length);
177
39.9M
      size_ -= length;
178
39.9M
      array_ += length;
179
39.9M
#ifndef NDEBUG
180
39.9M
      last_type_ = TokenType::StringView;
181
39.9M
#endif
182
39.9M
      return TokenType::StringView;
183
39.9M
    }
184
1.20G
    std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_);
185
1.20G
    symbol_ = symbol_consumed.first;
186
1.20G
    size_ -= symbol_consumed.second;
187
1.20G
    array_ += symbol_consumed.second;
188
1.20G
#ifndef NDEBUG
189
1.20G
    last_type_ = TokenType::Symbol;
190
1.20G
#endif
191
1.20G
    return TokenType::Symbol;
192
1.24G
  }
193
194
  /** @return the current string_view -- only valid to call if next()==TokenType::StringView */
195
39.9M
  absl::string_view stringView() const {
196
39.9M
#ifndef NDEBUG
197
39.9M
    ASSERT(last_type_ == TokenType::StringView);
198
39.9M
#endif
199
39.9M
    return string_view_;
200
39.9M
  }
201
202
  /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */
203
1.20G
  Symbol symbol() const {
204
1.20G
#ifndef NDEBUG
205
1.20G
    ASSERT(last_type_ == TokenType::Symbol);
206
1.20G
#endif
207
1.20G
    return symbol_;
208
1.20G
  }
209
210
private:
211
  const uint8_t* array_;
212
  size_t size_;
213
  absl::string_view string_view_;
214
  Symbol symbol_;
215
#ifndef NDEBUG
216
  TokenType last_type_{TokenType::End};
217
#endif
218
};
219
220
void SymbolTable::Encoding::decodeTokens(
221
    StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
222
368M
    const std::function<void(absl::string_view)>& string_view_token_fn) {
223
368M
  TokenIter iter(stat_name);
224
368M
  TokenIter::TokenType type;
225
1.61G
  while ((type = iter.next()) != TokenIter::TokenType::End) {
226
1.24G
    if (type == TokenIter::TokenType::StringView) {
227
39.9M
      string_view_token_fn(iter.stringView());
228
1.20G
    } else {
229
1.20G
      ASSERT(type == TokenIter::TokenType::Symbol);
230
1.20G
      symbol_token_fn(iter.symbol());
231
1.20G
    }
232
1.24G
  }
233
368M
}
234
235
0
bool StatName::startsWith(StatName prefix) const {
236
0
  using TokenIter = SymbolTable::Encoding::TokenIter;
237
0
  TokenIter prefix_iter(prefix);
238
0
  TokenIter this_iter(*this);
239
0
  while (true) {
240
0
    TokenIter::TokenType prefix_type = prefix_iter.next();
241
0
    TokenIter::TokenType this_type = this_iter.next();
242
0
    if (prefix_type == TokenIter::TokenType::End) {
243
0
      return true; // "a.b.c" starts with "a.b" or "a.b.c"
244
0
    }
245
0
    if (this_type == TokenIter::TokenType::End) {
246
0
      return false; // "a.b" does not start with "a.b.c"
247
0
    }
248
0
    if (prefix_type != TokenIter::TokenType::Symbol) {
249
      // Disallow dynamic components in the prefix. We don't have a current need
250
      // for prefixes to be expressed dynamically, and handling that case would
251
      // add complexity. In particular we'd need to take locks to decode to
252
      // strings.
253
0
      return false;
254
0
    }
255
0
    if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) {
256
0
      return false;
257
0
    }
258
0
  }
259
0
  return true; // not reached
260
0
}
261
262
59.4M
std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const {
263
59.4M
  std::vector<absl::string_view> strings;
264
59.4M
  Thread::LockGuard lock(lock_);
265
59.4M
  Encoding::decodeTokens(
266
59.4M
      stat_name,
267
59.4M
      [this, &strings](Symbol symbol)
268
266M
          ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); },
269
59.4M
      [&strings](absl::string_view str) { strings.push_back(str); });
270
59.4M
  return strings;
271
59.4M
}
272
273
85.8M
void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) {
274
85.8M
  appendEncoding(data_bytes_required_, mem_block);
275
85.8M
  mem_block.appendBlock(mem_block_);
276
85.8M
  mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct.
277
85.8M
}
278
279
void SymbolTable::Encoding::appendToMemBlock(StatName stat_name,
280
108M
                                             MemBlockBuilder<uint8_t>& mem_block) {
281
108M
  const uint8_t* data = stat_name.dataIncludingSize();
282
108M
  if (data == nullptr) {
283
10.9M
    mem_block.appendOne(0);
284
97.5M
  } else {
285
97.5M
    mem_block.appendData(absl::MakeSpan(data, stat_name.size()));
286
97.5M
  }
287
108M
}
288
289
SymbolTable::SymbolTable()
290
    // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro.
291
157k
    : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {}
292
293
157k
SymbolTable::~SymbolTable() {
294
  // To avoid leaks into the symbol table, we expect all StatNames to be freed.
295
  // Note: this could potentially be short-circuited if we decide a fast exit
296
  // is needed in production. But it would be good to ensure clean up during
297
  // tests.
298
157k
  ASSERT(numSymbols() == 0);
299
157k
}
300
301
// TODO(ambuc): There is a possible performance optimization here for avoiding
302
// the encoding of IPs / numbers if they appear in stat names. We don't want to
303
// waste time symbolizing an integer as an integer, if we can help it.
304
85.8M
void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) {
305
85.8M
  if (name.empty()) {
306
4.54M
    return;
307
4.54M
  }
308
309
  // We want to hold the lock for the minimum amount of time, so we do the
310
  // string-splitting and prepare a temp vector of Symbol first.
311
81.3M
  const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.');
312
81.3M
  std::vector<Symbol> symbols;
313
81.3M
  symbols.reserve(tokens.size());
314
315
  // Now take the lock and populate the Symbol objects, which involves bumping
316
  // ref-counts in this.
317
81.3M
  {
318
81.3M
    Thread::LockGuard lock(lock_);
319
81.3M
    recent_lookups_.lookup(name);
320
224M
    for (auto& token : tokens) {
321
      // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with
322
      // length below some threshold, say 4 bytes. It might be preferable not to
323
      // reserve Symbols for every 3 digit number found (for example) in ipv4
324
      // addresses.
325
224M
      symbols.push_back(toSymbol(token));
326
224M
    }
327
81.3M
  }
328
329
  // Now efficiently encode the array of 32-bit symbols into a uint8_t array.
330
81.3M
  encoding.addSymbols(symbols);
331
81.3M
}
332
333
157k
uint64_t SymbolTable::numSymbols() const {
334
157k
  Thread::LockGuard lock(lock_);
335
157k
  ASSERT(encode_map_.size() == decode_map_.size());
336
157k
  return encode_map_.size();
337
157k
}
338
339
59.4M
std::string SymbolTable::toString(const StatName& stat_name) const {
340
59.4M
  return absl::StrJoin(decodeStrings(stat_name), ".");
341
59.4M
}
342
343
111M
void SymbolTable::incRefCount(const StatName& stat_name) {
344
  // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
345
111M
  const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
346
347
111M
  Thread::LockGuard lock(lock_);
348
349M
  for (Symbol symbol : symbols) {
349
349M
    auto decode_search = decode_map_.find(symbol);
350
351
349M
    ASSERT(decode_search != decode_map_.end(),
352
349M
           "Please see "
353
349M
           "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
354
349M
           "debugging-symbol-table-assertions");
355
349M
    auto encode_search = encode_map_.find(decode_search->second->toStringView());
356
349M
    ASSERT(encode_search != encode_map_.end(),
357
349M
           "Please see "
358
349M
           "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
359
349M
           "debugging-symbol-table-assertions");
360
361
349M
    ++encode_search->second.ref_count_;
362
349M
  }
363
111M
}
364
365
197M
void SymbolTable::free(const StatName& stat_name) {
366
  // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
367
197M
  const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
368
369
197M
  Thread::LockGuard lock(lock_);
370
573M
  for (Symbol symbol : symbols) {
371
573M
    auto decode_search = decode_map_.find(symbol);
372
573M
    ASSERT(decode_search != decode_map_.end());
373
374
573M
    auto encode_search = encode_map_.find(decode_search->second->toStringView());
375
573M
    ASSERT(encode_search != encode_map_.end());
376
377
    // If that was the last remaining client usage of the symbol, erase the
378
    // current mappings and add the now-unused symbol to the reuse pool.
379
    //
380
    // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in
381
    // symbol_table_speed_test.cc, relative to breaking out the decrement into a
382
    // separate step, likely due to the non-trivial dereferences in EXPR.
383
573M
    if (--encode_search->second.ref_count_ == 0) {
384
15.9M
      decode_map_.erase(decode_search);
385
15.9M
      encode_map_.erase(encode_search);
386
15.9M
      pool_.push(symbol);
387
15.9M
    }
388
573M
  }
389
197M
}
390
391
6.43k
uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const {
392
6.43k
  uint64_t total = 0;
393
6.43k
  absl::flat_hash_map<std::string, uint64_t> name_count_map;
394
395
  // We don't want to hold lock_ while calling the iterator, but we need it to
396
  // access recent_lookups_, so we buffer in name_count_map.
397
6.43k
  {
398
6.43k
    Thread::LockGuard lock(lock_);
399
6.43k
    recent_lookups_.forEach(
400
6.43k
        [&name_count_map](absl::string_view str, uint64_t count)
401
6.43k
            ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; });
402
6.43k
    total += recent_lookups_.total();
403
6.43k
  }
404
405
  // Now we have the collated name-count map data: we need to vectorize and
406
  // sort. We define the pair with the count first as std::pair::operator<
407
  // prioritizes its first element over its second.
408
6.43k
  using LookupCount = std::pair<uint64_t, absl::string_view>;
409
6.43k
  std::vector<LookupCount> lookup_data;
410
6.43k
  lookup_data.reserve(name_count_map.size());
411
6.43k
  for (const auto& iter : name_count_map) {
412
0
    lookup_data.emplace_back(LookupCount(iter.second, iter.first));
413
0
  }
414
6.43k
  std::sort(lookup_data.begin(), lookup_data.end());
415
6.43k
  for (const LookupCount& lookup_count : lookup_data) {
416
0
    iter(lookup_count.second, lookup_count.first);
417
0
  }
418
6.43k
  return total;
419
6.43k
}
420
421
821
DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const {
422
821
  DynamicSpans dynamic_spans;
423
424
821
  uint32_t index = 0;
425
11.8M
  auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) {
426
11.8M
    DynamicSpan span;
427
11.8M
    span.first = index;
428
11.8M
    index += std::count(str.begin(), str.end(), '.');
429
11.8M
    span.second = index;
430
11.8M
    ++index;
431
11.8M
    dynamic_spans.push_back(span);
432
11.8M
  };
433
434
  // Use decodeTokens to suss out which components of stat_name are
435
  // symbolic vs dynamic. The lambda that takes a Symbol is called
436
  // for symbolic components. The lambda called with a string_view
437
  // is called for dynamic components.
438
  //
439
  // Note that with fake symbol tables, the Symbol lambda is called
440
  // once for each character in the string, and no dynamics will
441
  // be recorded.
442
821
  Encoding::decodeTokens(
443
15.8M
      stat_name, [&index](Symbol) { ++index; }, record_dynamic);
444
821
  return dynamic_spans;
445
821
}
446
447
0
void SymbolTable::setRecentLookupCapacity(uint64_t capacity) {
448
0
  Thread::LockGuard lock(lock_);
449
0
  recent_lookups_.setCapacity(capacity);
450
0
}
451
452
0
void SymbolTable::clearRecentLookups() {
453
0
  Thread::LockGuard lock(lock_);
454
0
  recent_lookups_.clear();
455
0
}
456
457
0
uint64_t SymbolTable::recentLookupCapacity() const {
458
0
  Thread::LockGuard lock(lock_);
459
0
  return recent_lookups_.capacity();
460
0
}
461
462
5.97k
StatNameSetPtr SymbolTable::makeSet(absl::string_view name) {
463
  // make_unique does not work with private ctor, even though SymbolTable is a friend.
464
5.97k
  StatNameSetPtr stat_name_set(new StatNameSet(*this, name));
465
5.97k
  return stat_name_set;
466
5.97k
}
467
468
224M
Symbol SymbolTable::toSymbol(absl::string_view sv) {
469
224M
  Symbol result;
470
224M
  auto encode_find = encode_map_.find(sv);
471
  // If the string segment doesn't already exist,
472
224M
  if (encode_find == encode_map_.end()) {
473
    // We create the actual string, place it in the decode_map_, and then insert
474
    // a string_view pointing to it in the encode_map_. This allows us to only
475
    // store the string once. We use unique_ptr so copies are not made as
476
    // flat_hash_map moves values around.
477
15.9M
    InlineStringPtr str = InlineString::create(sv);
478
15.9M
    auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)});
479
15.9M
    ASSERT(encode_insert.second);
480
15.9M
    auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
481
15.9M
    ASSERT(decode_insert.second);
482
483
15.9M
    result = next_symbol_;
484
15.9M
    newSymbol();
485
208M
  } else {
486
    // If the insertion didn't take place, return the actual value at that location and up the
487
    // refcount at that location
488
208M
    result = encode_find->second.symbol_;
489
208M
    ++(encode_find->second.ref_count_);
490
208M
  }
491
224M
  return result;
492
224M
}
493
494
absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
495
266M
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
496
266M
  auto search = decode_map_.find(symbol);
497
266M
  RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
498
266M
  return search->second->toStringView();
499
266M
}
500
501
15.9M
void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
502
15.9M
  if (pool_.empty()) {
503
15.2M
    next_symbol_ = ++monotonic_counter_;
504
15.2M
  } else {
505
722k
    next_symbol_ = pool_.top();
506
722k
    pool_.pop();
507
722k
  }
508
  // This should catch integer overflow for the new symbol.
509
15.9M
  ASSERT(monotonic_counter_ != 0);
510
15.9M
}
511
512
0
bool SymbolTable::lessThan(const StatName& a, const StatName& b) const {
513
  // Proactively take the table lock in anticipation that we'll need to
514
  // convert at least one symbol to a string_view, and it's easier not to
515
  // bother to lazily take the lock.
516
0
  Thread::LockGuard lock(lock_);
517
0
  return lessThanLockHeld(a, b);
518
0
}
519
520
bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const
521
0
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
522
0
  Encoding::TokenIter a_iter(a), b_iter(b);
523
0
  while (true) {
524
0
    Encoding::TokenIter::TokenType a_type = a_iter.next();
525
0
    Encoding::TokenIter::TokenType b_type = b_iter.next();
526
0
    if (b_type == Encoding::TokenIter::TokenType::End) {
527
0
      return false; // "x.y.z" > "x.y", "x.y" == "x.y"
528
0
    }
529
0
    if (a_type == Encoding::TokenIter::TokenType::End) {
530
0
      return true; // "x.y" < "x.y.z"
531
0
    }
532
0
    if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol &&
533
0
        a_iter.symbol() == b_iter.symbol()) {
534
0
      continue; // matching symbols don't need to be decoded to strings
535
0
    }
536
537
0
    absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol
538
0
                                    ? fromSymbol(a_iter.symbol())
539
0
                                    : a_iter.stringView();
540
0
    absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol
541
0
                                    ? fromSymbol(b_iter.symbol())
542
0
                                    : b_iter.stringView();
543
0
    if (a_token != b_token) {
544
0
      return a_token < b_token;
545
0
    }
546
0
  }
547
0
}
548
549
#ifndef ENVOY_CONFIG_COVERAGE
550
0
void SymbolTable::debugPrint() const {
551
0
  Thread::LockGuard lock(lock_);
552
0
  std::vector<Symbol> symbols;
553
0
  for (const auto& p : decode_map_) {
554
0
    symbols.push_back(p.first);
555
0
  }
556
0
  std::sort(symbols.begin(), symbols.end());
557
0
  for (Symbol symbol : symbols) {
558
0
    const InlineString& token = *decode_map_.find(symbol)->second;
559
0
    const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second;
560
0
    ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_);
561
0
  }
562
0
}
563
#endif
564
565
85.8M
SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) {
566
85.8M
  name = StringUtil::removeTrailingCharacters(name, '.');
567
85.8M
  Encoding encoding;
568
85.8M
  addTokensToEncoding(name, encoding);
569
85.8M
  MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired()));
570
85.8M
  encoding.moveToMemBlock(mem_block);
571
85.8M
  return mem_block.release();
572
85.8M
}
573
574
StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table)
575
85.8M
    : StatNameStorageBase(table.encode(name)) {}
576
577
3.18M
StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) {
578
3.18M
  const size_t size = src.size();
579
3.18M
  MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t.
580
3.18M
  src.copyToMemBlock(storage);
581
3.18M
  setBytes(storage.release());
582
3.18M
  table.incRefCount(statName());
583
3.18M
}
584
585
26.4M
SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) {
586
26.4M
  name = StringUtil::removeTrailingCharacters(name, '.');
587
588
  // For all StatName objects, we first have the total number of bytes in the
589
  // representation. But for inlined dynamic string StatName variants, we must
590
  // store the length of the payload separately, so that if this token gets
591
  // joined with others, we'll know much space it consumes until the next token.
592
  // So the layout is
593
  //   [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ]
594
  // So we need to figure out how many bytes we need to represent length-of-name
595
  // and name.
596
597
  // payload_bytes is the total number of bytes needed to represent the
598
  // characters in name, plus their encoded size, plus the literal indicator.
599
26.4M
  const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1;
600
601
  // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and
602
  // the length of those.
603
26.4M
  const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes);
604
26.4M
  MemBlockBuilder<uint8_t> mem_block(total_bytes);
605
606
26.4M
  Encoding::appendEncoding(payload_bytes, mem_block);
607
26.4M
  mem_block.appendOne(LiteralStringIndicator);
608
26.4M
  Encoding::appendEncoding(name.size(), mem_block);
609
26.4M
  mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size()));
610
26.4M
  ASSERT(mem_block.capacityRemaining() == 0);
611
26.4M
  return mem_block.release();
612
26.4M
}
613
614
256M
StatNameStorage::~StatNameStorage() {
615
  // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to
616
  // decrement the reference counts held by the SymbolTable on behalf of
617
  // this StatName. This saves 8 bytes of storage per stat, relative to
618
  // holding a SymbolTable& in each StatNameStorage object.
619
256M
  ASSERT(bytes() == nullptr);
620
256M
}
621
622
89.0M
void StatNameStorage::free(SymbolTable& table) {
623
89.0M
  table.free(statName());
624
89.0M
  clear();
625
89.0M
}
626
627
5.81M
void StatNamePool::clear() {
628
78.8M
  for (StatNameStorage& storage : storage_vector_) {
629
78.8M
    storage.free(symbol_table_);
630
78.8M
  }
631
5.81M
  storage_vector_.clear();
632
5.81M
}
633
634
78.8M
const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) {
635
78.8M
  storage_vector_.push_back(Stats::StatNameStorage(str, symbol_table_));
636
78.8M
  return storage_vector_.back().bytes();
637
78.8M
}
638
639
78.7M
StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); }
640
641
26.1M
StatName StatNameDynamicPool::add(absl::string_view str) {
642
26.1M
  storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_));
643
26.1M
  return StatName(storage_vector_.back().bytes());
644
26.1M
}
645
646
30.0k
StatNameStorageSet::~StatNameStorageSet() {
647
  // free() must be called before destructing StatNameStorageSet to decrement
648
  // references to all symbols.
649
30.0k
  ASSERT(hash_set_.empty());
650
30.0k
}
651
652
30.0k
void StatNameStorageSet::free(SymbolTable& symbol_table) {
653
  // We must free() all symbols referenced in the set, otherwise the symbols
654
  // will leak when the flat_hash_map superclass is destructed. They cannot
655
  // self-destruct without an explicit free() as each individual StatNameStorage
656
  // object does not have a reference to the symbol table, which would waste 8
657
  // bytes per stat-name. The easiest way to safely free all the contents of the
658
  // symbol table set is to use flat_hash_map::extract(), which removes and
659
  // returns an element from the set without destructing the element
660
  // immediately. This gives us a chance to call free() on each one before they
661
  // are destroyed.
662
  //
663
  // There's a performance risk here, if removing elements via
664
  // flat_hash_set::begin() is inefficient to use in a loop like this. One can
665
  // imagine a hash-table implementation where the performance of this
666
  // usage-model would be poor. However, tests with 100k elements appeared to
667
  // run quickly when compiled for optimization, so at present this is not a
668
  // performance issue.
669
670
30.0k
  while (!hash_set_.empty()) {
671
0
    auto storage = hash_set_.extract(hash_set_.begin());
672
0
    storage.value().free(symbol_table);
673
0
  }
674
30.0k
}
675
676
139M
SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const {
677
139M
  size_t num_bytes = 0;
678
346M
  for (StatName stat_name : stat_names) {
679
346M
    if (!stat_name.empty()) {
680
196M
      num_bytes += stat_name.dataSize();
681
196M
    }
682
346M
  }
683
139M
  MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes));
684
139M
  Encoding::appendEncoding(num_bytes, mem_block);
685
346M
  for (StatName stat_name : stat_names) {
686
346M
    stat_name.appendDataToMemBlock(mem_block);
687
346M
  }
688
139M
  ASSERT(mem_block.capacityRemaining() == 0);
689
139M
  return mem_block.release();
690
139M
}
691
692
53.2M
void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) {
693
53.2M
  RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded");
694
695
  // First encode all the names.
696
53.2M
  size_t total_size_bytes = 1; /* one byte for holding the number of names */
697
698
161M
  for (uint32_t i = 0; i < num_names; ++i) {
699
108M
    total_size_bytes += names[i].size();
700
108M
  }
701
702
  // Now allocate the exact number of bytes required and move the encodings
703
  // into storage.
704
53.2M
  MemBlockBuilder<uint8_t> mem_block(total_size_bytes);
705
53.2M
  mem_block.appendOne(num_names);
706
161M
  for (uint32_t i = 0; i < num_names; ++i) {
707
108M
    const StatName stat_name = names[i];
708
108M
    Encoding::appendToMemBlock(stat_name, mem_block);
709
108M
    incRefCount(stat_name);
710
108M
  }
711
712
  // This assertion double-checks the arithmetic where we computed
713
  // total_size_bytes. After appending all the encoded data into the
714
  // allocated byte array, we should have exhausted all the memory
715
  // we though we needed.
716
53.2M
  ASSERT(mem_block.capacityRemaining() == 0);
717
53.2M
  list.moveStorageIntoList(mem_block.release());
718
53.2M
}
719
720
53.2M
StatNameList::~StatNameList() { ASSERT(!populated()); }
721
722
450M
void StatNameList::iterate(const std::function<bool(StatName)>& f) const {
723
450M
  const uint8_t* p = &storage_[0];
724
450M
  const uint32_t num_elements = *p++;
725
560M
  for (uint32_t i = 0; i < num_elements; ++i) {
726
506M
    const StatName stat_name(p);
727
506M
    p += stat_name.size();
728
506M
    if (!f(stat_name)) {
729
397M
      break;
730
397M
    }
731
506M
  }
732
450M
}
733
734
53.2M
void StatNameList::clear(SymbolTable& symbol_table) {
735
108M
  iterate([&symbol_table](StatName stat_name) -> bool {
736
108M
    symbol_table.free(stat_name);
737
108M
    return true;
738
108M
  });
739
53.2M
  storage_.reset();
740
53.2M
}
741
742
StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name)
743
5.97k
    : name_(std::string(name)), pool_(symbol_table) {
744
5.97k
  builtin_stat_names_[""] = StatName();
745
5.97k
}
746
747
252k
void StatNameSet::rememberBuiltin(absl::string_view str) {
748
252k
  StatName stat_name;
749
252k
  {
750
252k
    absl::MutexLock lock(&mutex_);
751
252k
    stat_name = pool_.add(str);
752
252k
  }
753
252k
  builtin_stat_names_[str] = stat_name;
754
252k
}
755
756
976k
StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const {
757
  // If token was recorded as a built-in during initialization, we can
758
  // service this request lock-free.
759
976k
  const auto iter = builtin_stat_names_.find(token);
760
976k
  if (iter != builtin_stat_names_.end()) {
761
593k
    return iter->second;
762
593k
  }
763
382k
  return fallback;
764
976k
}
765
766
} // namespace Stats
767
} // namespace Envoy