1
#include "source/common/stats/symbol_table.h"
2

            
3
#include <algorithm>
4
#include <iostream>
5
#include <memory>
6
#include <vector>
7

            
8
#include "source/common/common/assert.h"
9
#include "source/common/common/logger.h"
10
#include "source/common/common/utility.h"
11

            
12
#include "absl/strings/str_cat.h"
13

            
14
namespace Envoy {
15
namespace Stats {
16

            
17
// Masks used for variable-length encoding of arbitrary-sized integers into a
18
// uint8-array. The integers are typically small, so we try to store them in as
19
// few bytes as possible. The bottom 7 bits hold values, and the top bit is used
20
// to determine whether another byte is needed for more data.
21
static constexpr uint32_t SpilloverMask = 0x80;
22
static constexpr uint32_t Low7Bits = 0x7f;
23

            
24
// When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol
25
// that will decode into uint8_t array starting (and ending) with {0}. Thus we
26
// can use a leading 0 in the first byte to indicate that what follows is
27
// literal char data, rather than symbol-table entries. Literal char data is
28
// used for dynamically discovered stat-name tokens where you don't want to take
29
// a symbol table lock, and would rather pay extra memory overhead to store the
30
// tokens as fully elaborated strings.
31
static constexpr Symbol FirstValidSymbol = 1;
32
static constexpr uint8_t LiteralStringIndicator = 0;
33

            
34
2161382449
size_t StatName::dataSize() const {
35
2161382449
  if (size_and_data_ == nullptr) {
36
62342264
    return 0;
37
62342264
  }
38
2099040185
  return SymbolTable::Encoding::decodeNumber(size_and_data_).first;
39
2161382449
}
40

            
41
#ifndef ENVOY_CONFIG_COVERAGE
42
void StatName::debugPrint() {
43
  // TODO(jmarantz): capture this functionality (always prints regardless of
44
  // loglevel) in an ENVOY_LOG macro variant or similar, perhaps
45
  // ENVOY_LOG_MISC(stderr, ...);
46
  if (size_and_data_ == nullptr) {
47
    std::cerr << "Null StatName" << std::endl;
48
  } else {
49
    const size_t nbytes = dataSize();
50
    std::cerr << "dataSize=" << nbytes << ":";
51
    for (size_t i = 0; i < nbytes; ++i) {
52
      std::cerr << " " << static_cast<uint64_t>(data()[i]);
53
    }
54
    const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this);
55
    std::cerr << ", numSymbols=" << encoding.size() << ":";
56
    for (Symbol symbol : encoding) {
57
      std::cerr << " " << symbol;
58
    }
59
    std::cerr << std::endl;
60
  }
61
}
62
#endif
63

            
64
58254346
SymbolTable::Encoding::~Encoding() {
65
  // Verifies that moveToMemBlock() was called on this encoding. Failure
66
  // to call moveToMemBlock() will result in leaks symbols.
67
58254346
  ASSERT(mem_block_.capacity() == 0);
68
58254346
}
69

            
70
1221014803
size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) {
71
1221014803
  size_t num_bytes = 0;
72
1249991937
  do {
73
1249991937
    ++num_bytes;
74
1249991937
    number >>= 7;
75
1249991937
  } while (number != 0);
76
1221014803
  return num_bytes;
77
1221014803
}
78

            
79
221384926
void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) {
80
  // UTF-8-like encoding where a value 127 or less gets written as a single
81
  // byte. For higher values we write the low-order 7 bits with a 1 in
82
  // the high-order bit. Then we right-shift 7 bits and keep adding more bytes
83
  // until we have consumed all the non-zero bits in symbol.
84
  //
85
  // When decoding, we stop consuming uint8_t when we see a uint8_t with
86
  // high-order bit 0.
87
250361692
  do {
88
250361692
    if (number < (1 << 7)) {
89
221383919
      mem_block.appendOne(number); // number <= 127 gets encoded in one byte.
90
221383919
    } else {
91
28977773
      mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes.
92
28977773
    }
93
250361692
    number >>= 7;
94
250361692
  } while (number != 0);
95
221384926
}
96

            
97
56569393
void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) {
98
56569393
  ASSERT(data_bytes_required_ == 0);
99
74361967
  for (Symbol symbol : symbols) {
100
74361967
    data_bytes_required_ += encodingSizeBytes(symbol);
101
74361967
  }
102
56569393
  mem_block_.setCapacity(data_bytes_required_);
103
74361959
  for (Symbol symbol : symbols) {
104
74361959
    appendEncoding(symbol, mem_block_);
105
74361959
  }
106
56569393
}
107

            
108
2934923257
std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) {
109
2934923257
  uint64_t number = 0;
110
2934923257
  uint64_t uc = SpilloverMask;
111
2934923257
  const uint8_t* start = encoding;
112
6118533809
  for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) {
113
3183610552
    uc = static_cast<uint32_t>(*encoding);
114
3183610552
    number |= (uc & Low7Bits) << shift;
115
3183610552
  }
116
2934923257
  return std::make_pair(number, encoding - start);
117
2934923257
}
118

            
119
240221012
SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) {
120
240221012
  SymbolVec symbol_vec;
121
240221012
  symbol_vec.reserve(stat_name.dataSize());
122
240221012
  decodeTokens(
123
306335168
      stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); },
124
240221012
      [](absl::string_view) {});
125
240221012
  return symbol_vec;
126
240221012
}
127

            
128
// Iterator for decoding StatName tokens. This is not an STL-compatible
129
// iterator; it is used only locally. Using this rather than decodeSymbols makes
130
// it easier to early-exit out of a comparison once we know the answer. For
131
// example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order
132
// when you get to the "b" vs "x" and do not need to decode "c" vs "y".
133
//
134
// This is also helpful for prefix-matching.
135
//
136
// The array-version of decodeSymbols is still a good approach for converting a
137
// StatName to a string as the intermediate array of string_view is needed to
138
// allocate the right size for the joined string.
139
class SymbolTable::Encoding::TokenIter {
140
public:
141
  // The type of token reached.
142
  enum class TokenType { StringView, Symbol, End };
143

            
144
327440115
  TokenIter(StatName stat_name) {
145
327440115
    const uint8_t* raw_data = stat_name.dataIncludingSize();
146
327440115
    if (raw_data == nullptr) {
147
14696992
      size_ = 0;
148
14696992
      array_ = nullptr;
149
312743123
    } else {
150
312743123
      std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data);
151
312743123
      size_ = size_consumed.first;
152
312743123
      array_ = raw_data + size_consumed.second;
153
312743123
    }
154
327440115
  }
155

            
156
  /**
157
   * Parses the next token. The token can then be retrieved by calling
158
   * stringView() or symbol().
159
   * @return The type of token, TokenType::End if we have reached the end of the StatName.
160
   */
161
850486546
  TokenType next() {
162
850486546
    if (size_ == 0) {
163
327301880
      return TokenType::End;
164
327301880
    }
165
523184666
    if (*array_ == LiteralStringIndicator) {
166
      // To avoid scanning memory to find the literal size during decode, we
167
      // var-length encode the size of the literal string prior to the data.
168
314494
      ASSERT(size_ > 1);
169
314494
      ++array_;
170
314494
      --size_;
171
314494
      std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_);
172
314494
      uint64_t length = length_consumed.first;
173
314494
      array_ += length_consumed.second;
174
314494
      size_ -= length_consumed.second;
175
314494
      ASSERT(size_ >= length);
176
314494
      string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length);
177
314494
      size_ -= length;
178
314494
      array_ += length;
179
#ifndef NDEBUG
180
      last_type_ = TokenType::StringView;
181
#endif
182
314494
      return TokenType::StringView;
183
314494
    }
184
522870172
    std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_);
185
522870172
    symbol_ = symbol_consumed.first;
186
522870172
    size_ -= symbol_consumed.second;
187
522870172
    array_ += symbol_consumed.second;
188
#ifndef NDEBUG
189
    last_type_ = TokenType::Symbol;
190
#endif
191
522870172
    return TokenType::Symbol;
192
523184666
  }
193

            
194
  /** @return the current string_view -- only valid to call if next()==TokenType::StringView */
195
314491
  absl::string_view stringView() const {
196
#ifndef NDEBUG
197
    ASSERT(last_type_ == TokenType::StringView);
198
#endif
199
314491
    return string_view_;
200
314491
  }
201

            
202
  /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */
203
522863562
  Symbol symbol() const {
204
#ifndef NDEBUG
205
    ASSERT(last_type_ == TokenType::Symbol);
206
#endif
207
522863562
    return symbol_;
208
522863562
  }
209

            
210
private:
211
  const uint8_t* array_;
212
  size_t size_;
213
  absl::string_view string_view_;
214
  Symbol symbol_;
215
#ifndef NDEBUG
216
  TokenType last_type_{TokenType::End};
217
#endif
218
};
219

            
220
void SymbolTable::Encoding::decodeTokens(
221
    StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn,
222
327229478
    const std::function<void(absl::string_view)>& string_view_token_fn) {
223
327229478
  TokenIter iter(stat_name);
224
327229478
  TokenIter::TokenType type;
225
850083934
  while ((type = iter.next()) != TokenIter::TokenType::End) {
226
522854456
    if (type == TokenIter::TokenType::StringView) {
227
314491
      string_view_token_fn(iter.stringView());
228
522539965
    } else {
229
522539965
      ASSERT(type == TokenIter::TokenType::Symbol);
230
522539965
      symbol_token_fn(iter.symbol());
231
522539965
    }
232
522854456
  }
233
327229478
}
234

            
235
70795
bool StatName::startsWith(StatName prefix) const {
236
70795
  using TokenIter = SymbolTable::Encoding::TokenIter;
237
70795
  TokenIter prefix_iter(prefix);
238
70795
  TokenIter this_iter(*this);
239
140036
  while (true) {
240
140036
    TokenIter::TokenType prefix_type = prefix_iter.next();
241
140036
    TokenIter::TokenType this_type = this_iter.next();
242
140036
    if (prefix_type == TokenIter::TokenType::End) {
243
69212
      return true; // "a.b.c" starts with "a.b" or "a.b.c"
244
69212
    }
245
70824
    if (this_type == TokenIter::TokenType::End) {
246
1
      return false; // "a.b" does not start with "a.b.c"
247
1
    }
248
70823
    if (prefix_type != TokenIter::TokenType::Symbol) {
249
      // Disallow dynamic components in the prefix. We don't have a current need
250
      // for prefixes to be expressed dynamically, and handling that case would
251
      // add complexity. In particular we'd need to take locks to decode to
252
      // strings.
253
1
      return false;
254
1
    }
255
70822
    if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) {
256
1581
      return false;
257
1581
    }
258
70822
  }
259
  return true; // not reached
260
70795
}
261

            
262
87007935
std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const {
263
87007935
  std::vector<absl::string_view> strings;
264
87007935
  Thread::LockGuard lock(lock_);
265
87007935
  Encoding::decodeTokens(
266
87007935
      stat_name,
267
87007935
      [this, &strings](Symbol symbol)
268
220540700
          ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); },
269
87007935
      [&strings](absl::string_view str) { strings.push_back(str); });
270
87007935
  return strings;
271
87007935
}
272

            
273
58254373
void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) {
274
58254373
  appendEncoding(data_bytes_required_, mem_block);
275
58254373
  mem_block.appendBlock(mem_block_);
276
58254373
  mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct.
277
58254373
}
278

            
279
void SymbolTable::Encoding::appendToMemBlock(StatName stat_name,
280
88790607
                                             MemBlockBuilder<uint8_t>& mem_block) {
281
88790607
  const uint8_t* data = stat_name.dataIncludingSize();
282
88790607
  if (data == nullptr) {
283
14696992
    mem_block.appendOne(0);
284
74121187
  } else {
285
74093615
    mem_block.appendData(absl::MakeSpan(data, stat_name.size()));
286
74093615
  }
287
88790607
}
288

            
289
SymbolTable::SymbolTable()
290
    // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro.
291
106144
    : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {}
292

            
293
106082
SymbolTable::~SymbolTable() {
294
  // To avoid leaks into the symbol table, we expect all StatNames to be freed.
295
  // Note: this could potentially be short-circuited if we decide a fast exit
296
  // is needed in production. But it would be good to ensure clean up during
297
  // tests.
298
106082
  ASSERT(numSymbols() == 0);
299
106082
}
300

            
301
// TODO(ambuc): There is a possible performance optimization here for avoiding
302
// the encoding of IPs / numbers if they appear in stat names. We don't want to
303
// waste time symbolizing an integer as an integer, if we can help it.
304
58254394
void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) {
305
58254394
  if (name.empty()) {
306
1684812
    return;
307
1684812
  }
308

            
309
  // We want to hold the lock for the minimum amount of time, so we do the
310
  // string-splitting and prepare a temp vector of Symbol first.
311
56569582
  const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.');
312
56569582
  std::vector<Symbol> symbols;
313
56569582
  symbols.reserve(tokens.size());
314

            
315
  // Now take the lock and populate the Symbol objects, which involves bumping
316
  // ref-counts in this.
317
56569582
  {
318
56569582
    Thread::LockGuard lock(lock_);
319
56569582
    recent_lookups_.lookup(name);
320
74361946
    for (auto& token : tokens) {
321
      // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with
322
      // length below some threshold, say 4 bytes. It might be preferable not to
323
      // reserve Symbols for every 3 digit number found (for example) in ipv4
324
      // addresses.
325
74361946
      symbols.push_back(toSymbol(token));
326
74361946
    }
327
56569582
  }
328

            
329
  // Now efficiently encode the array of 32-bit symbols into a uint8_t array.
330
56569582
  encoding.addSymbols(symbols);
331
56569582
}
332

            
333
171
uint64_t SymbolTable::numSymbols() const {
334
171
  Thread::LockGuard lock(lock_);
335
171
  ASSERT(encode_map_.size() == decode_map_.size());
336
171
  return encode_map_.size();
337
171
}
338

            
339
87007864
std::string SymbolTable::toString(const StatName& stat_name) const {
340
87007864
  return absl::StrJoin(decodeStrings(stat_name), ".");
341
87007864
}
342

            
343
90984925
void SymbolTable::incRefCount(const StatName& stat_name) {
344
  // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
345
90984925
  const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
346

            
347
90984925
  Thread::LockGuard lock(lock_);
348
115813532
  for (Symbol symbol : symbols) {
349
113824459
    auto decode_search = decode_map_.find(symbol);
350

            
351
113824459
    ASSERT(decode_search != decode_map_.end(),
352
113824459
           "Please see "
353
113824459
           "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
354
113824459
           "debugging-symbol-table-assertions");
355
113824459
    auto encode_search = encode_map_.find(decode_search->second->toStringView());
356
113824459
    ASSERT(encode_search != encode_map_.end(),
357
113824459
           "Please see "
358
113824459
           "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#"
359
113824459
           "debugging-symbol-table-assertions");
360

            
361
113824459
    ++encode_search->second.ref_count_;
362
113824459
  }
363
90984925
}
364

            
365
149236114
void SymbolTable::free(const StatName& stat_name) {
366
  // Before taking the lock, decode the array of symbols from the SymbolTable::Storage.
367
149236114
  const SymbolVec symbols = Encoding::decodeSymbols(stat_name);
368

            
369
149236114
  Thread::LockGuard lock(lock_);
370
190524783
  for (Symbol symbol : symbols) {
371
188181027
    auto decode_search = decode_map_.find(symbol);
372
188181027
    ASSERT(decode_search != decode_map_.end());
373

            
374
188181027
    auto encode_search = encode_map_.find(decode_search->second->toStringView());
375
188181027
    ASSERT(encode_search != encode_map_.end());
376

            
377
    // If that was the last remaining client usage of the symbol, erase the
378
    // current mappings and add the now-unused symbol to the reuse pool.
379
    //
380
    // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in
381
    // symbol_table_speed_test.cc, relative to breaking out the decrement into a
382
    // separate step, likely due to the non-trivial dereferences in EXPR.
383
188181027
    if (--encode_search->second.ref_count_ == 0) {
384
11880409
      decode_map_.erase(decode_search);
385
11880409
      encode_map_.erase(encode_search);
386
11880409
      pool_.push(symbol);
387
11880409
    }
388
188181027
  }
389
149236114
}
390

            
391
22381
uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const {
392
22381
  uint64_t total = 0;
393
22381
  absl::flat_hash_map<std::string, uint64_t> name_count_map;
394

            
395
  // We don't want to hold lock_ while calling the iterator, but we need it to
396
  // access recent_lookups_, so we buffer in name_count_map.
397
22381
  {
398
22381
    Thread::LockGuard lock(lock_);
399
22381
    recent_lookups_.forEach(
400
22381
        [&name_count_map](absl::string_view str, uint64_t count)
401
22382
            ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; });
402
22381
    total += recent_lookups_.total();
403
22381
  }
404

            
405
  // Now we have the collated name-count map data: we need to vectorize and
406
  // sort. We define the pair with the count first as std::pair::operator<
407
  // prioritizes its first element over its second.
408
22381
  using LookupCount = std::pair<uint64_t, absl::string_view>;
409
22381
  std::vector<LookupCount> lookup_data;
410
22381
  lookup_data.reserve(name_count_map.size());
411
22382
  for (const auto& iter : name_count_map) {
412
4
    lookup_data.emplace_back(LookupCount(iter.second, iter.first));
413
4
  }
414
22381
  std::sort(lookup_data.begin(), lookup_data.end());
415
22382
  for (const LookupCount& lookup_count : lookup_data) {
416
4
    iter(lookup_count.second, lookup_count.first);
417
4
  }
418
22381
  return total;
419
22381
}
420

            
421
546
DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const {
422
546
  DynamicSpans dynamic_spans;
423

            
424
546
  uint32_t index = 0;
425
546
  auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) {
426
277
    DynamicSpan span;
427
277
    span.first = index;
428
277
    index += std::count(str.begin(), str.end(), '.');
429
277
    span.second = index;
430
277
    ++index;
431
277
    dynamic_spans.push_back(span);
432
277
  };
433

            
434
  // Use decodeTokens to suss out which components of stat_name are
435
  // symbolic vs dynamic. The lambda that takes a Symbol is called
436
  // for symbolic components. The lambda called with a string_view
437
  // is called for dynamic components.
438
  //
439
  // Note that with fake symbol tables, the Symbol lambda is called
440
  // once for each character in the string, and no dynamics will
441
  // be recorded.
442
546
  Encoding::decodeTokens(stat_name, [&index](Symbol) { ++index; }, record_dynamic);
443
546
  return dynamic_spans;
444
546
}
445

            
446
11
void SymbolTable::setRecentLookupCapacity(uint64_t capacity) {
447
11
  Thread::LockGuard lock(lock_);
448
11
  recent_lookups_.setCapacity(capacity);
449
11
}
450

            
451
7
void SymbolTable::clearRecentLookups() {
452
7
  Thread::LockGuard lock(lock_);
453
7
  recent_lookups_.clear();
454
7
}
455

            
456
13
uint64_t SymbolTable::recentLookupCapacity() const {
457
13
  Thread::LockGuard lock(lock_);
458
13
  return recent_lookups_.capacity();
459
13
}
460

            
461
18876
StatNameSetPtr SymbolTable::makeSet(absl::string_view name) {
462
  // make_unique does not work with private ctor, even though SymbolTable is a friend.
463
18876
  StatNameSetPtr stat_name_set(new StatNameSet(*this, name));
464
18876
  return stat_name_set;
465
18876
}
466

            
467
74361944
Symbol SymbolTable::toSymbol(absl::string_view sv) {
468
74361944
  Symbol result;
469
74361944
  auto encode_find = encode_map_.find(sv);
470
  // If the string segment doesn't already exist,
471
74361944
  if (encode_find == encode_map_.end()) {
472
    // We create the actual string, place it in the decode_map_, and then insert
473
    // a string_view pointing to it in the encode_map_. This allows us to only
474
    // store the string once. We use unique_ptr so copies are not made as
475
    // flat_hash_map moves values around.
476
11882014
    InlineStringPtr str = InlineString::create(sv);
477
11882014
    auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)});
478
11882014
    ASSERT(encode_insert.second);
479
11882014
    auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)});
480
11882014
    ASSERT(decode_insert.second);
481

            
482
11882014
    result = next_symbol_;
483
11882014
    newSymbol();
484
62771585
  } else {
485
    // If the insertion didn't take place, return the actual value at that location and up the
486
    // refcount at that location
487
62479930
    result = encode_find->second.symbol_;
488
62479930
    ++(encode_find->second.ref_count_);
489
62479930
  }
490
74361944
  return result;
491
74361944
}
492

            
493
absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const
494
220606129
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
495
220606129
  auto search = decode_map_.find(symbol);
496
220606129
  RELEASE_ASSERT(search != decode_map_.end(), "no such symbol");
497
220606129
  return search->second->toStringView();
498
220606129
}
499

            
500
11882015
void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
501
11882015
  if (pool_.empty()) {
502
11658105
    next_symbol_ = ++monotonic_counter_;
503
11772421
  } else {
504
223910
    next_symbol_ = pool_.top();
505
223910
    pool_.pop();
506
223910
  }
507
  // This should catch integer overflow for the new symbol.
508
11882015
  ASSERT(monotonic_counter_ != 0);
509
11882015
}
510

            
511
34564
bool SymbolTable::lessThan(const StatName& a, const StatName& b) const {
512
  // Proactively take the table lock in anticipation that we'll need to
513
  // convert at least one symbol to a string_view, and it's easier not to
514
  // bother to lazily take the lock.
515
34564
  Thread::LockGuard lock(lock_);
516
34564
  return lessThanLockHeld(a, b);
517
34564
}
518

            
519
bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const
520
34580
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
521
34580
  Encoding::TokenIter a_iter(a), b_iter(b);
522
60865
  while (true) {
523
60865
    Encoding::TokenIter::TokenType a_type = a_iter.next();
524
60865
    Encoding::TokenIter::TokenType b_type = b_iter.next();
525
60865
    if (b_type == Encoding::TokenIter::TokenType::End) {
526
1585
      return false; // "x.y.z" > "x.y", "x.y" == "x.y"
527
1585
    }
528
59280
    if (a_type == Encoding::TokenIter::TokenType::End) {
529
2
      return true; // "x.y" < "x.y.z"
530
2
    }
531
59278
    if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol &&
532
59278
        a_iter.symbol() == b_iter.symbol()) {
533
26285
      continue; // matching symbols don't need to be decoded to strings
534
26285
    }
535

            
536
32993
    absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol
537
32993
                                    ? fromSymbol(a_iter.symbol())
538
32993
                                    : a_iter.stringView();
539
32993
    absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol
540
32993
                                    ? fromSymbol(b_iter.symbol())
541
32993
                                    : b_iter.stringView();
542
32993
    if (a_token != b_token) {
543
32993
      return a_token < b_token;
544
32993
    }
545
32993
  }
546
34580
}
547

            
548
#ifndef ENVOY_CONFIG_COVERAGE
549
void SymbolTable::debugPrint() const {
550
  Thread::LockGuard lock(lock_);
551
  std::vector<Symbol> symbols;
552
  for (const auto& p : decode_map_) {
553
    symbols.push_back(p.first);
554
  }
555
  std::sort(symbols.begin(), symbols.end());
556
  for (Symbol symbol : symbols) {
557
    const InlineString& token = *decode_map_.find(symbol)->second;
558
    const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second;
559
    ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_);
560
  }
561
}
562
#endif
563

            
564
58254409
SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) {
565
58254409
  name = StringUtil::removeTrailingCharacters(name, '.');
566
58254409
  Encoding encoding;
567
58254409
  addTokensToEncoding(name, encoding);
568
58254409
  MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired()));
569
58254409
  encoding.moveToMemBlock(mem_block);
570
58254409
  return mem_block.release();
571
58254409
}
572

            
573
StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table)
574
58254401
    : StatNameStorageBase(table.encode(name)) {}
575

            
576
2194313
StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) {
577
2194313
  const size_t size = src.size();
578
2194313
  MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t.
579
2194313
  src.copyToMemBlock(storage);
580
2194313
  setBytes(storage.release());
581
2194313
  table.incRefCount(statName());
582
2194313
}
583

            
584
61766
SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) {
585
61766
  name = StringUtil::removeTrailingCharacters(name, '.');
586

            
587
  // For all StatName objects, we first have the total number of bytes in the
588
  // representation. But for inlined dynamic string StatName variants, we must
589
  // store the length of the payload separately, so that if this token gets
590
  // joined with others, we'll know much space it consumes until the next token.
591
  // So the layout is
592
  //   [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ]
593
  // So we need to figure out how many bytes we need to represent length-of-name
594
  // and name.
595

            
596
  // payload_bytes is the total number of bytes needed to represent the
597
  // characters in name, plus their encoded size, plus the literal indicator.
598
61766
  const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1;
599

            
600
  // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and
601
  // the length of those.
602
61766
  const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes);
603
61766
  MemBlockBuilder<uint8_t> mem_block(total_bytes);
604

            
605
61766
  Encoding::appendEncoding(payload_bytes, mem_block);
606
61766
  mem_block.appendOne(LiteralStringIndicator);
607
61766
  Encoding::appendEncoding(name.size(), mem_block);
608
61766
  mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size()));
609
61766
  ASSERT(mem_block.capacityRemaining() == 0);
610
61766
  return mem_block.release();
611
61766
}
612

            
613
115472631
StatNameStorage::~StatNameStorage() {
614
  // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to
615
  // decrement the reference counts held by the SymbolTable on behalf of
616
  // this StatName. This saves 8 bytes of storage per stat, relative to
617
  // holding a SymbolTable& in each StatNameStorage object.
618
115472631
  ASSERT(bytes() == nullptr);
619
115472631
}
620

            
621
60448650
void StatNameStorage::free(SymbolTable& table) {
622
  // nolint: https://github.com/llvm/llvm-project/issues/81597
623
  // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
624
60448650
  table.free(statName());
625
60448650
  clear();
626
60448650
}
627

            
628
8773340
void StatNamePool::clear() {
629
51356545
  for (StatNameStorage& storage : storage_vector_) {
630
    // nolint: https://github.com/llvm/llvm-project/issues/81597
631
    // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
632
51355435
    storage.free(symbol_table_);
633
51355435
  }
634
8773340
  storage_vector_.clear();
635
8773340
}
636

            
637
51355433
const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) {
638
51355433
  storage_vector_.emplace_back(str, symbol_table_);
639
51355433
  return storage_vector_.back().bytes();
640
51355433
}
641

            
642
2
StatName StatNamePool::add(StatName name) {
643
2
  storage_vector_.emplace_back(name, symbol_table_);
644
2
  return storage_vector_.back().statName();
645
2
}
646

            
647
51020671
StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); }
648

            
649
12473
StatName StatNameDynamicPool::add(absl::string_view str) {
650
12473
  storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_));
651
12473
  return StatName(storage_vector_.back().bytes());
652
12473
}
653

            
654
254287
StatNameStorageSet::~StatNameStorageSet() {
655
  // free() must be called before destructing StatNameStorageSet to decrement
656
  // references to all symbols.
657
254287
  ASSERT(hash_set_.empty());
658
254287
}
659

            
660
254287
void StatNameStorageSet::free(SymbolTable& symbol_table) {
661
  // We must free() all symbols referenced in the set, otherwise the symbols
662
  // will leak when the flat_hash_map superclass is destructed. They cannot
663
  // self-destruct without an explicit free() as each individual StatNameStorage
664
  // object does not have a reference to the symbol table, which would waste 8
665
  // bytes per stat-name. The easiest way to safely free all the contents of the
666
  // symbol table set is to use flat_hash_map::extract(), which removes and
667
  // returns an element from the set without destructing the element
668
  // immediately. This gives us a chance to call free() on each one before they
669
  // are destroyed.
670
  //
671
  // There's a performance risk here, if removing elements via
672
  // flat_hash_set::begin() is inefficient to use in a loop like this. One can
673
  // imagine a hash-table implementation where the performance of this
674
  // usage-model would be poor. However, tests with 100k elements appeared to
675
  // run quickly when compiled for optimization, so at present this is not a
676
  // performance issue.
677

            
678
324081
  while (!hash_set_.empty()) {
679
69794
    auto storage = hash_set_.extract(hash_set_.begin());
680
69794
    storage.value().free(symbol_table);
681
69794
  }
682
254287
}
683

            
684
88628101
SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const {
685
88628101
  size_t num_bytes = 0;
686
187791577
  for (StatName stat_name : stat_names) {
687
187791577
    if (!stat_name.empty()) {
688
98798213
      num_bytes += stat_name.dataSize();
689
98798213
    }
690
187791577
  }
691
88628101
  MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes));
692
88628101
  Encoding::appendEncoding(num_bytes, mem_block);
693
187791564
  for (StatName stat_name : stat_names) {
694
187791564
    stat_name.appendDataToMemBlock(mem_block);
695
187791564
  }
696
88628101
  ASSERT(mem_block.capacityRemaining() == 0);
697
88628101
  return mem_block.release();
698
88628101
}
699

            
700
39889409
void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) {
701
39889409
  RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded");
702

            
703
  // First encode all the names.
704
39889409
  size_t total_size_bytes = 1; /* one byte for holding the number of names */
705

            
706
128680032
  for (uint32_t i = 0; i < num_names; ++i) {
707
88790623
    total_size_bytes += names[i].size();
708
88790623
  }
709

            
710
  // Now allocate the exact number of bytes required and move the encodings
711
  // into storage.
712
39889409
  MemBlockBuilder<uint8_t> mem_block(total_size_bytes);
713
39889409
  mem_block.appendOne(num_names);
714
128680023
  for (uint32_t i = 0; i < num_names; ++i) {
715
88790614
    const StatName stat_name = names[i];
716
88790614
    Encoding::appendToMemBlock(stat_name, mem_block);
717
88790614
    incRefCount(stat_name);
718
88790614
  }
719

            
720
  // This assertion double-checks the arithmetic where we computed
721
  // total_size_bytes. After appending all the encoded data into the
722
  // allocated byte array, we should have exhausted all the memory
723
  // we though we needed.
724
39889409
  ASSERT(mem_block.capacityRemaining() == 0);
725
39889409
  list.moveStorageIntoList(mem_block.release());
726
39889409
}
727

            
728
39887829
StatNameList::~StatNameList() { ASSERT(!populated()); }
729

            
730
259827412
void StatNameList::iterate(const std::function<bool(StatName)>& f) const {
731
259827412
  const uint8_t* p = &storage_[0];
732
259827412
  const uint32_t num_elements = *p++;
733
348644604
  for (uint32_t i = 0; i < num_elements; ++i) {
734
308751131
    const StatName stat_name(p);
735
308751131
    p += stat_name.size();
736
308751131
    if (!f(stat_name)) {
737
219933939
      break;
738
219933939
    }
739
308751131
  }
740
259827412
}
741

            
742
39887830
void StatNameList::clear(SymbolTable& symbol_table) {
743
88787470
  iterate([&symbol_table](StatName stat_name) -> bool {
744
    // nolint: https://github.com/llvm/llvm-project/issues/81597
745
    // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
746
88787470
    symbol_table.free(stat_name);
747
88787470
    return true;
748
88787470
  });
749
39887830
  storage_.reset();
750
39887830
}
751

            
752
StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name)
753
18876
    : name_(std::string(name)), pool_(symbol_table) {
754
18876
  builtin_stat_names_[""] = StatName();
755
18876
}
756

            
757
656587
void StatNameSet::rememberBuiltin(absl::string_view str) {
758
656587
  StatName stat_name;
759
656587
  {
760
656587
    absl::MutexLock lock(mutex_);
761
656587
    stat_name = pool_.add(str);
762
656587
  }
763
656587
  builtin_stat_names_[str] = stat_name;
764
656587
}
765

            
766
4575770
StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const {
767
  // If token was recorded as a built-in during initialization, we can
768
  // service this request lock-free.
769
4575770
  const auto iter = builtin_stat_names_.find(token);
770
4575770
  if (iter != builtin_stat_names_.end()) {
771
2350920
    return iter->second;
772
2350920
  }
773
2224850
  return fallback;
774
4575770
}
775

            
776
} // namespace Stats
777
} // namespace Envoy