/proc/self/cwd/source/common/stats/symbol_table.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include "source/common/stats/symbol_table.h" |
2 | | |
3 | | #include <algorithm> |
4 | | #include <iostream> |
5 | | #include <memory> |
6 | | #include <vector> |
7 | | |
8 | | #include "source/common/common/assert.h" |
9 | | #include "source/common/common/logger.h" |
10 | | #include "source/common/common/utility.h" |
11 | | |
12 | | #include "absl/strings/str_cat.h" |
13 | | |
14 | | namespace Envoy { |
15 | | namespace Stats { |
16 | | |
17 | | // Masks used for variable-length encoding of arbitrary-sized integers into a |
18 | | // uint8-array. The integers are typically small, so we try to store them in as |
19 | | // few bytes as possible. The bottom 7 bits hold values, and the top bit is used |
20 | | // to determine whether another byte is needed for more data. |
21 | | static constexpr uint32_t SpilloverMask = 0x80; |
22 | | static constexpr uint32_t Low7Bits = 0x7f; |
23 | | |
24 | | // When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol |
25 | | // that will decode into uint8_t array starting (and ending) with {0}. Thus we |
26 | | // can use a leading 0 in the first byte to indicate that what follows is |
27 | | // literal char data, rather than symbol-table entries. Literal char data is |
28 | | // used for dynamically discovered stat-name tokens where you don't want to take |
29 | | // a symbol table lock, and would rather pay extra memory overhead to store the |
30 | | // tokens as fully elaborated strings. |
31 | | static constexpr Symbol FirstValidSymbol = 1; |
32 | | static constexpr uint8_t LiteralStringIndicator = 0; |
33 | | |
34 | 4.35G | size_t StatName::dataSize() const { |
35 | 4.35G | if (size_and_data_ == nullptr) { |
36 | 74.6M | return 0; |
37 | 74.6M | } |
38 | 4.28G | return SymbolTable::Encoding::decodeNumber(size_and_data_).first; |
39 | 4.35G | } |
40 | | |
41 | | #ifndef ENVOY_CONFIG_COVERAGE |
42 | 0 | void StatName::debugPrint() { |
43 | | // TODO(jmarantz): capture this functionality (always prints regardless of |
44 | | // loglevel) in an ENVOY_LOG macro variant or similar, perhaps |
45 | | // ENVOY_LOG_MISC(stderr, ...); |
46 | 0 | if (size_and_data_ == nullptr) { |
47 | 0 | std::cerr << "Null StatName" << std::endl; |
48 | 0 | } else { |
49 | 0 | const size_t nbytes = dataSize(); |
50 | 0 | std::cerr << "dataSize=" << nbytes << ":"; |
51 | 0 | for (size_t i = 0; i < nbytes; ++i) { |
52 | 0 | std::cerr << " " << static_cast<uint64_t>(data()[i]); |
53 | 0 | } |
54 | 0 | const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this); |
55 | 0 | std::cerr << ", numSymbols=" << encoding.size() << ":"; |
56 | 0 | for (Symbol symbol : encoding) { |
57 | 0 | std::cerr << " " << symbol; |
58 | 0 | } |
59 | 0 | std::cerr << std::endl; |
60 | 0 | } |
61 | 0 | } |
62 | | #endif |
63 | | |
64 | 84.0M | SymbolTable::Encoding::~Encoding() { |
65 | | // Verifies that moveToMemBlock() was called on this encoding. Failure |
66 | | // to call moveToMemBlock() will result in leaks symbols. |
67 | 84.0M | ASSERT(mem_block_.capacity() == 0); |
68 | 84.0M | } |
69 | | |
70 | 2.60G | size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) { |
71 | 2.60G | size_t num_bytes = 0; |
72 | 2.85G | do { |
73 | 2.85G | ++num_bytes; |
74 | 2.85G | number >>= 7; |
75 | 2.85G | } while (number != 0); |
76 | 2.60G | return num_bytes; |
77 | 2.60G | } |
78 | | |
79 | 667M | void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder<uint8_t>& mem_block) { |
80 | | // UTF-8-like encoding where a value 127 or less gets written as a single |
81 | | // byte. For higher values we write the low-order 7 bits with a 1 in |
82 | | // the high-order bit. Then we right-shift 7 bits and keep adding more bytes |
83 | | // until we have consumed all the non-zero bits in symbol. |
84 | | // |
85 | | // When decoding, we stop consuming uint8_t when we see a uint8_t with |
86 | | // high-order bit 0. |
87 | 915M | do { |
88 | 915M | if (number < (1 << 7)) { |
89 | 667M | mem_block.appendOne(number); // number <= 127 gets encoded in one byte. |
90 | 667M | } else { |
91 | 247M | mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes. |
92 | 247M | } |
93 | 915M | number >>= 7; |
94 | 915M | } while (number != 0); |
95 | 667M | } |
96 | | |
97 | 78.9M | void SymbolTable::Encoding::addSymbols(const std::vector<Symbol>& symbols) { |
98 | 78.9M | ASSERT(data_bytes_required_ == 0); |
99 | 391M | for (Symbol symbol : symbols) { |
100 | 391M | data_bytes_required_ += encodingSizeBytes(symbol); |
101 | 391M | } |
102 | 78.9M | mem_block_.setCapacity(data_bytes_required_); |
103 | 391M | for (Symbol symbol : symbols) { |
104 | 391M | appendEncoding(symbol, mem_block_); |
105 | 391M | } |
106 | 78.9M | } |
107 | | |
108 | 7.69G | std::pair<uint64_t, size_t> SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) { |
109 | 7.69G | uint64_t number = 0; |
110 | 7.69G | uint64_t uc = SpilloverMask; |
111 | 7.69G | const uint8_t* start = encoding; |
112 | 17.7G | for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) { |
113 | 10.0G | uc = static_cast<uint32_t>(*encoding); |
114 | 10.0G | number |= (uc & Low7Bits) << shift; |
115 | 10.0G | } |
116 | 7.69G | return std::make_pair(number, encoding - start); |
117 | 7.69G | } |
118 | | |
119 | 305M | SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) { |
120 | 305M | SymbolVec symbol_vec; |
121 | 305M | symbol_vec.reserve(stat_name.dataSize()); |
122 | 305M | decodeTokens( |
123 | 2.18G | stat_name, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); }, |
124 | 305M | [](absl::string_view) {}); |
125 | 305M | return symbol_vec; |
126 | 305M | } |
127 | | |
128 | | // Iterator for decoding StatName tokens. This is not an STL-compatible |
129 | | // iterator; it is used only locally. Using this rather than decodeSymbols makes |
130 | | // it easier to early-exit out of a comparison once we know the answer. For |
131 | | // example, if you are ordering "a.b.c.d" against "a.x.y.z" you know the order |
132 | | // when you get to the "b" vs "x" and do not need to decode "c" vs "y". |
133 | | // |
134 | | // This is also helpful for prefix-matching. |
135 | | // |
136 | | // The array-version of decodeSymbols is still a good approach for converting a |
137 | | // StatName to a string as the intermediate array of string_view is needed to |
138 | | // allocate the right size for the joined string. |
139 | | class SymbolTable::Encoding::TokenIter { |
140 | | public: |
141 | | // The type of token reached. |
142 | | enum class TokenType { StringView, Symbol, End }; |
143 | | |
144 | 362M | TokenIter(StatName stat_name) { |
145 | 362M | const uint8_t* raw_data = stat_name.dataIncludingSize(); |
146 | 362M | if (raw_data == nullptr) { |
147 | 10.8M | size_ = 0; |
148 | 10.8M | array_ = nullptr; |
149 | 351M | } else { |
150 | 351M | std::pair<uint64_t, size_t> size_consumed = decodeNumber(raw_data); |
151 | 351M | size_ = size_consumed.first; |
152 | 351M | array_ = raw_data + size_consumed.second; |
153 | 351M | } |
154 | 362M | } |
155 | | |
156 | | /** |
157 | | * Parses the next token. The token can then be retrieved by calling |
158 | | * stringView() or symbol(). |
159 | | * @return The type of token, TokenType::End if we have reached the end of the StatName. |
160 | | */ |
161 | 3.42G | TokenType next() { |
162 | 3.42G | if (size_ == 0) { |
163 | 362M | return TokenType::End; |
164 | 362M | } |
165 | 3.06G | if (*array_ == LiteralStringIndicator) { |
166 | | // To avoid scanning memory to find the literal size during decode, we |
167 | | // var-length encode the size of the literal string prior to the data. |
168 | 35.2M | ASSERT(size_ > 1); |
169 | 35.2M | ++array_; |
170 | 35.2M | --size_; |
171 | 35.2M | std::pair<uint64_t, size_t> length_consumed = decodeNumber(array_); |
172 | 35.2M | uint64_t length = length_consumed.first; |
173 | 35.2M | array_ += length_consumed.second; |
174 | 35.2M | size_ -= length_consumed.second; |
175 | 35.2M | ASSERT(size_ >= length); |
176 | 35.2M | string_view_ = absl::string_view(reinterpret_cast<const char*>(array_), length); |
177 | 35.2M | size_ -= length; |
178 | 35.2M | array_ += length; |
179 | 35.2M | #ifndef NDEBUG |
180 | 35.2M | last_type_ = TokenType::StringView; |
181 | 35.2M | #endif |
182 | 35.2M | return TokenType::StringView; |
183 | 35.2M | } |
184 | 3.02G | std::pair<uint64_t, size_t> symbol_consumed = decodeNumber(array_); |
185 | 3.02G | symbol_ = symbol_consumed.first; |
186 | 3.02G | size_ -= symbol_consumed.second; |
187 | 3.02G | array_ += symbol_consumed.second; |
188 | 3.02G | #ifndef NDEBUG |
189 | 3.02G | last_type_ = TokenType::Symbol; |
190 | 3.02G | #endif |
191 | 3.02G | return TokenType::Symbol; |
192 | 3.06G | } |
193 | | |
194 | | /** @return the current string_view -- only valid to call if next()==TokenType::StringView */ |
195 | 35.2M | absl::string_view stringView() const { |
196 | 35.2M | #ifndef NDEBUG |
197 | 35.2M | ASSERT(last_type_ == TokenType::StringView); |
198 | 35.2M | #endif |
199 | 35.2M | return string_view_; |
200 | 35.2M | } |
201 | | |
202 | | /** @return the current symbol -- only valid to call if next()==TokenType::Symbol */ |
203 | 3.02G | Symbol symbol() const { |
204 | 3.02G | #ifndef NDEBUG |
205 | 3.02G | ASSERT(last_type_ == TokenType::Symbol); |
206 | 3.02G | #endif |
207 | 3.02G | return symbol_; |
208 | 3.02G | } |
209 | | |
210 | | private: |
211 | | const uint8_t* array_; |
212 | | size_t size_; |
213 | | absl::string_view string_view_; |
214 | | Symbol symbol_; |
215 | | #ifndef NDEBUG |
216 | | TokenType last_type_{TokenType::End}; |
217 | | #endif |
218 | | }; |
219 | | |
220 | | void SymbolTable::Encoding::decodeTokens( |
221 | | StatName stat_name, const std::function<void(Symbol)>& symbol_token_fn, |
222 | 362M | const std::function<void(absl::string_view)>& string_view_token_fn) { |
223 | 362M | TokenIter iter(stat_name); |
224 | 362M | TokenIter::TokenType type; |
225 | 3.42G | while ((type = iter.next()) != TokenIter::TokenType::End) { |
226 | 3.06G | if (type == TokenIter::TokenType::StringView) { |
227 | 35.2M | string_view_token_fn(iter.stringView()); |
228 | 3.02G | } else { |
229 | 3.02G | ASSERT(type == TokenIter::TokenType::Symbol); |
230 | 3.02G | symbol_token_fn(iter.symbol()); |
231 | 3.02G | } |
232 | 3.06G | } |
233 | 362M | } |
234 | | |
235 | 0 | bool StatName::startsWith(StatName prefix) const { |
236 | 0 | using TokenIter = SymbolTable::Encoding::TokenIter; |
237 | 0 | TokenIter prefix_iter(prefix); |
238 | 0 | TokenIter this_iter(*this); |
239 | 0 | while (true) { |
240 | 0 | TokenIter::TokenType prefix_type = prefix_iter.next(); |
241 | 0 | TokenIter::TokenType this_type = this_iter.next(); |
242 | 0 | if (prefix_type == TokenIter::TokenType::End) { |
243 | 0 | return true; // "a.b.c" starts with "a.b" or "a.b.c" |
244 | 0 | } |
245 | 0 | if (this_type == TokenIter::TokenType::End) { |
246 | 0 | return false; // "a.b" does not start with "a.b.c" |
247 | 0 | } |
248 | 0 | if (prefix_type != TokenIter::TokenType::Symbol) { |
249 | | // Disallow dynamic components in the prefix. We don't have a current need |
250 | | // for prefixes to be expressed dynamically, and handling that case would |
251 | | // add complexity. In particular we'd need to take locks to decode to |
252 | | // strings. |
253 | 0 | return false; |
254 | 0 | } |
255 | 0 | if (this_type != TokenIter::TokenType::Symbol || this_iter.symbol() != prefix_iter.symbol()) { |
256 | 0 | return false; |
257 | 0 | } |
258 | 0 | } |
259 | 0 | return true; // not reached |
260 | 0 | } |
261 | | |
262 | 57.6M | std::vector<absl::string_view> SymbolTable::decodeStrings(StatName stat_name) const { |
263 | 57.6M | std::vector<absl::string_view> strings; |
264 | 57.6M | Thread::LockGuard lock(lock_); |
265 | 57.6M | Encoding::decodeTokens( |
266 | 57.6M | stat_name, |
267 | 57.6M | [this, &strings](Symbol symbol) |
268 | 824M | ABSL_NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); }, |
269 | 57.6M | [&strings](absl::string_view str) { strings.push_back(str); }); |
270 | 57.6M | return strings; |
271 | 57.6M | } |
272 | | |
273 | 84.0M | void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder<uint8_t>& mem_block) { |
274 | 84.0M | appendEncoding(data_bytes_required_, mem_block); |
275 | 84.0M | mem_block.appendBlock(mem_block_); |
276 | 84.0M | mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. |
277 | 84.0M | } |
278 | | |
279 | | void SymbolTable::Encoding::appendToMemBlock(StatName stat_name, |
280 | 107M | MemBlockBuilder<uint8_t>& mem_block) { |
281 | 107M | const uint8_t* data = stat_name.dataIncludingSize(); |
282 | 107M | if (data == nullptr) { |
283 | 10.8M | mem_block.appendOne(0); |
284 | 96.5M | } else { |
285 | 96.5M | mem_block.appendData(absl::MakeSpan(data, stat_name.size())); |
286 | 96.5M | } |
287 | 107M | } |
288 | | |
289 | | SymbolTable::SymbolTable() |
290 | | // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro. |
291 | 171k | : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {} |
292 | | |
293 | 171k | SymbolTable::~SymbolTable() { |
294 | | // To avoid leaks into the symbol table, we expect all StatNames to be freed. |
295 | | // Note: this could potentially be short-circuited if we decide a fast exit |
296 | | // is needed in production. But it would be good to ensure clean up during |
297 | | // tests. |
298 | 171k | ASSERT(numSymbols() == 0); |
299 | 171k | } |
300 | | |
301 | | // TODO(ambuc): There is a possible performance optimization here for avoiding |
302 | | // the encoding of IPs / numbers if they appear in stat names. We don't want to |
303 | | // waste time symbolizing an integer as an integer, if we can help it. |
304 | 84.0M | void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) { |
305 | 84.0M | if (name.empty()) { |
306 | 5.10M | return; |
307 | 5.10M | } |
308 | | |
309 | | // We want to hold the lock for the minimum amount of time, so we do the |
310 | | // string-splitting and prepare a temp vector of Symbol first. |
311 | 78.9M | const std::vector<absl::string_view> tokens = absl::StrSplit(name, '.'); |
312 | 78.9M | std::vector<Symbol> symbols; |
313 | 78.9M | symbols.reserve(tokens.size()); |
314 | | |
315 | | // Now take the lock and populate the Symbol objects, which involves bumping |
316 | | // ref-counts in this. |
317 | 78.9M | { |
318 | 78.9M | Thread::LockGuard lock(lock_); |
319 | 78.9M | recent_lookups_.lookup(name); |
320 | 391M | for (auto& token : tokens) { |
321 | | // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with |
322 | | // length below some threshold, say 4 bytes. It might be preferable not to |
323 | | // reserve Symbols for every 3 digit number found (for example) in ipv4 |
324 | | // addresses. |
325 | 391M | symbols.push_back(toSymbol(token)); |
326 | 391M | } |
327 | 78.9M | } |
328 | | |
329 | | // Now efficiently encode the array of 32-bit symbols into a uint8_t array. |
330 | 78.9M | encoding.addSymbols(symbols); |
331 | 78.9M | } |
332 | | |
333 | 171k | uint64_t SymbolTable::numSymbols() const { |
334 | 171k | Thread::LockGuard lock(lock_); |
335 | 171k | ASSERT(encode_map_.size() == decode_map_.size()); |
336 | 171k | return encode_map_.size(); |
337 | 171k | } |
338 | | |
339 | 57.6M | std::string SymbolTable::toString(const StatName& stat_name) const { |
340 | 57.6M | return absl::StrJoin(decodeStrings(stat_name), "."); |
341 | 57.6M | } |
342 | | |
343 | 110M | void SymbolTable::incRefCount(const StatName& stat_name) { |
344 | | // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. |
345 | 110M | const SymbolVec symbols = Encoding::decodeSymbols(stat_name); |
346 | | |
347 | 110M | Thread::LockGuard lock(lock_); |
348 | 896M | for (Symbol symbol : symbols) { |
349 | 896M | auto decode_search = decode_map_.find(symbol); |
350 | | |
351 | 896M | ASSERT(decode_search != decode_map_.end(), |
352 | 896M | "Please see " |
353 | 896M | "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#" |
354 | 896M | "debugging-symbol-table-assertions"); |
355 | 896M | auto encode_search = encode_map_.find(decode_search->second->toStringView()); |
356 | 896M | ASSERT(encode_search != encode_map_.end(), |
357 | 896M | "Please see " |
358 | 896M | "https://github.com/envoyproxy/envoy/blob/main/source/docs/stats.md#" |
359 | 896M | "debugging-symbol-table-assertions"); |
360 | | |
361 | 896M | ++encode_search->second.ref_count_; |
362 | 896M | } |
363 | 110M | } |
364 | | |
365 | 194M | void SymbolTable::free(const StatName& stat_name) { |
366 | | // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. |
367 | 194M | const SymbolVec symbols = Encoding::decodeSymbols(stat_name); |
368 | | |
369 | 194M | Thread::LockGuard lock(lock_); |
370 | 1.28G | for (Symbol symbol : symbols) { |
371 | 1.28G | auto decode_search = decode_map_.find(symbol); |
372 | 1.28G | ASSERT(decode_search != decode_map_.end()); |
373 | | |
374 | 1.28G | auto encode_search = encode_map_.find(decode_search->second->toStringView()); |
375 | 1.28G | ASSERT(encode_search != encode_map_.end()); |
376 | | |
377 | | // If that was the last remaining client usage of the symbol, erase the |
378 | | // current mappings and add the now-unused symbol to the reuse pool. |
379 | | // |
380 | | // The "if (--EXPR.ref_count_)" pattern speeds up BM_CreateRace by 20% in |
381 | | // symbol_table_speed_test.cc, relative to breaking out the decrement into a |
382 | | // separate step, likely due to the non-trivial dereferences in EXPR. |
383 | 1.28G | if (--encode_search->second.ref_count_ == 0) { |
384 | 19.9M | decode_map_.erase(decode_search); |
385 | 19.9M | encode_map_.erase(encode_search); |
386 | 19.9M | pool_.push(symbol); |
387 | 19.9M | } |
388 | 1.28G | } |
389 | 194M | } |
390 | | |
391 | 5.00k | uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const { |
392 | 5.00k | uint64_t total = 0; |
393 | 5.00k | absl::flat_hash_map<std::string, uint64_t> name_count_map; |
394 | | |
395 | | // We don't want to hold lock_ while calling the iterator, but we need it to |
396 | | // access recent_lookups_, so we buffer in name_count_map. |
397 | 5.00k | { |
398 | 5.00k | Thread::LockGuard lock(lock_); |
399 | 5.00k | recent_lookups_.forEach( |
400 | 5.00k | [&name_count_map](absl::string_view str, uint64_t count) |
401 | 5.00k | ABSL_NO_THREAD_SAFETY_ANALYSIS { name_count_map[std::string(str)] += count; }); |
402 | 5.00k | total += recent_lookups_.total(); |
403 | 5.00k | } |
404 | | |
405 | | // Now we have the collated name-count map data: we need to vectorize and |
406 | | // sort. We define the pair with the count first as std::pair::operator< |
407 | | // prioritizes its first element over its second. |
408 | 5.00k | using LookupCount = std::pair<uint64_t, absl::string_view>; |
409 | 5.00k | std::vector<LookupCount> lookup_data; |
410 | 5.00k | lookup_data.reserve(name_count_map.size()); |
411 | 5.00k | for (const auto& iter : name_count_map) { |
412 | 0 | lookup_data.emplace_back(LookupCount(iter.second, iter.first)); |
413 | 0 | } |
414 | 5.00k | std::sort(lookup_data.begin(), lookup_data.end()); |
415 | 5.00k | for (const LookupCount& lookup_count : lookup_data) { |
416 | 0 | iter(lookup_count.second, lookup_count.first); |
417 | 0 | } |
418 | 5.00k | return total; |
419 | 5.00k | } |
420 | | |
421 | 797 | DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const { |
422 | 797 | DynamicSpans dynamic_spans; |
423 | | |
424 | 797 | uint32_t index = 0; |
425 | 10.7M | auto record_dynamic = [&dynamic_spans, &index](absl::string_view str) { |
426 | 10.7M | DynamicSpan span; |
427 | 10.7M | span.first = index; |
428 | 10.7M | index += std::count(str.begin(), str.end(), '.'); |
429 | 10.7M | span.second = index; |
430 | 10.7M | ++index; |
431 | 10.7M | dynamic_spans.push_back(span); |
432 | 10.7M | }; |
433 | | |
434 | | // Use decodeTokens to suss out which components of stat_name are |
435 | | // symbolic vs dynamic. The lambda that takes a Symbol is called |
436 | | // for symbolic components. The lambda called with a string_view |
437 | | // is called for dynamic components. |
438 | | // |
439 | | // Note that with fake symbol tables, the Symbol lambda is called |
440 | | // once for each character in the string, and no dynamics will |
441 | | // be recorded. |
442 | 797 | Encoding::decodeTokens( |
443 | 14.9M | stat_name, [&index](Symbol) { ++index; }, record_dynamic); |
444 | 797 | return dynamic_spans; |
445 | 797 | } |
446 | | |
447 | 0 | void SymbolTable::setRecentLookupCapacity(uint64_t capacity) { |
448 | 0 | Thread::LockGuard lock(lock_); |
449 | 0 | recent_lookups_.setCapacity(capacity); |
450 | 0 | } |
451 | | |
452 | 0 | void SymbolTable::clearRecentLookups() { |
453 | 0 | Thread::LockGuard lock(lock_); |
454 | 0 | recent_lookups_.clear(); |
455 | 0 | } |
456 | | |
457 | 0 | uint64_t SymbolTable::recentLookupCapacity() const { |
458 | 0 | Thread::LockGuard lock(lock_); |
459 | 0 | return recent_lookups_.capacity(); |
460 | 0 | } |
461 | | |
462 | 4.64k | StatNameSetPtr SymbolTable::makeSet(absl::string_view name) { |
463 | | // make_unique does not work with private ctor, even though SymbolTable is a friend. |
464 | 4.64k | StatNameSetPtr stat_name_set(new StatNameSet(*this, name)); |
465 | 4.64k | return stat_name_set; |
466 | 4.64k | } |
467 | | |
468 | 391M | Symbol SymbolTable::toSymbol(absl::string_view sv) { |
469 | 391M | Symbol result; |
470 | 391M | auto encode_find = encode_map_.find(sv); |
471 | | // If the string segment doesn't already exist, |
472 | 391M | if (encode_find == encode_map_.end()) { |
473 | | // We create the actual string, place it in the decode_map_, and then insert |
474 | | // a string_view pointing to it in the encode_map_. This allows us to only |
475 | | // store the string once. We use unique_ptr so copies are not made as |
476 | | // flat_hash_map moves values around. |
477 | 19.9M | InlineStringPtr str = InlineString::create(sv); |
478 | 19.9M | auto encode_insert = encode_map_.insert({str->toStringView(), SharedSymbol(next_symbol_)}); |
479 | 19.9M | ASSERT(encode_insert.second); |
480 | 19.9M | auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)}); |
481 | 19.9M | ASSERT(decode_insert.second); |
482 | | |
483 | 19.9M | result = next_symbol_; |
484 | 19.9M | newSymbol(); |
485 | 372M | } else { |
486 | | // If the insertion didn't take place, return the actual value at that location and up the |
487 | | // refcount at that location |
488 | 372M | result = encode_find->second.symbol_; |
489 | 372M | ++(encode_find->second.ref_count_); |
490 | 372M | } |
491 | 391M | return result; |
492 | 391M | } |
493 | | |
494 | | absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const |
495 | 824M | ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { |
496 | 824M | auto search = decode_map_.find(symbol); |
497 | 824M | RELEASE_ASSERT(search != decode_map_.end(), "no such symbol"); |
498 | 824M | return search->second->toStringView(); |
499 | 824M | } |
500 | | |
501 | 19.9M | void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { |
502 | 19.9M | if (pool_.empty()) { |
503 | 18.8M | next_symbol_ = ++monotonic_counter_; |
504 | 18.8M | } else { |
505 | 1.00M | next_symbol_ = pool_.top(); |
506 | 1.00M | pool_.pop(); |
507 | 1.00M | } |
508 | | // This should catch integer overflow for the new symbol. |
509 | 19.9M | ASSERT(monotonic_counter_ != 0); |
510 | 19.9M | } |
511 | | |
512 | 0 | bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { |
513 | | // Proactively take the table lock in anticipation that we'll need to |
514 | | // convert at least one symbol to a string_view, and it's easier not to |
515 | | // bother to lazily take the lock. |
516 | 0 | Thread::LockGuard lock(lock_); |
517 | 0 | return lessThanLockHeld(a, b); |
518 | 0 | } |
519 | | |
520 | | bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const |
521 | 0 | ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { |
522 | 0 | Encoding::TokenIter a_iter(a), b_iter(b); |
523 | 0 | while (true) { |
524 | 0 | Encoding::TokenIter::TokenType a_type = a_iter.next(); |
525 | 0 | Encoding::TokenIter::TokenType b_type = b_iter.next(); |
526 | 0 | if (b_type == Encoding::TokenIter::TokenType::End) { |
527 | 0 | return false; // "x.y.z" > "x.y", "x.y" == "x.y" |
528 | 0 | } |
529 | 0 | if (a_type == Encoding::TokenIter::TokenType::End) { |
530 | 0 | return true; // "x.y" < "x.y.z" |
531 | 0 | } |
532 | 0 | if (a_type == b_type && a_type == Encoding::TokenIter::TokenType::Symbol && |
533 | 0 | a_iter.symbol() == b_iter.symbol()) { |
534 | 0 | continue; // matching symbols don't need to be decoded to strings |
535 | 0 | } |
536 | | |
537 | 0 | absl::string_view a_token = a_type == Encoding::TokenIter::TokenType::Symbol |
538 | 0 | ? fromSymbol(a_iter.symbol()) |
539 | 0 | : a_iter.stringView(); |
540 | 0 | absl::string_view b_token = b_type == Encoding::TokenIter::TokenType::Symbol |
541 | 0 | ? fromSymbol(b_iter.symbol()) |
542 | 0 | : b_iter.stringView(); |
543 | 0 | if (a_token != b_token) { |
544 | 0 | return a_token < b_token; |
545 | 0 | } |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | | #ifndef ENVOY_CONFIG_COVERAGE |
550 | 0 | void SymbolTable::debugPrint() const { |
551 | 0 | Thread::LockGuard lock(lock_); |
552 | 0 | std::vector<Symbol> symbols; |
553 | 0 | for (const auto& p : decode_map_) { |
554 | 0 | symbols.push_back(p.first); |
555 | 0 | } |
556 | 0 | std::sort(symbols.begin(), symbols.end()); |
557 | 0 | for (Symbol symbol : symbols) { |
558 | 0 | const InlineString& token = *decode_map_.find(symbol)->second; |
559 | 0 | const SharedSymbol& shared_symbol = encode_map_.find(token.toStringView())->second; |
560 | 0 | ENVOY_LOG_MISC(info, "{}: '{}' ({})", symbol, token.toStringView(), shared_symbol.ref_count_); |
561 | 0 | } |
562 | 0 | } |
563 | | #endif |
564 | | |
565 | 84.0M | SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) { |
566 | 84.0M | name = StringUtil::removeTrailingCharacters(name, '.'); |
567 | 84.0M | Encoding encoding; |
568 | 84.0M | addTokensToEncoding(name, encoding); |
569 | 84.0M | MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); |
570 | 84.0M | encoding.moveToMemBlock(mem_block); |
571 | 84.0M | return mem_block.release(); |
572 | 84.0M | } |
573 | | |
574 | | StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) |
575 | 84.0M | : StatNameStorageBase(table.encode(name)) {} |
576 | | |
577 | 3.18M | StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { |
578 | 3.18M | const size_t size = src.size(); |
579 | 3.18M | MemBlockBuilder<uint8_t> storage(size); // Note: MemBlockBuilder takes uint64_t. |
580 | 3.18M | src.copyToMemBlock(storage); |
581 | 3.18M | setBytes(storage.release()); |
582 | 3.18M | table.incRefCount(statName()); |
583 | 3.18M | } |
584 | | |
585 | 24.2M | SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) { |
586 | 24.2M | name = StringUtil::removeTrailingCharacters(name, '.'); |
587 | | |
588 | | // For all StatName objects, we first have the total number of bytes in the |
589 | | // representation. But for inlined dynamic string StatName variants, we must |
590 | | // store the length of the payload separately, so that if this token gets |
591 | | // joined with others, we'll know much space it consumes until the next token. |
592 | | // So the layout is |
593 | | // [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ] |
594 | | // So we need to figure out how many bytes we need to represent length-of-name |
595 | | // and name. |
596 | | |
597 | | // payload_bytes is the total number of bytes needed to represent the |
598 | | // characters in name, plus their encoded size, plus the literal indicator. |
599 | 24.2M | const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1; |
600 | | |
601 | | // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and |
602 | | // the length of those. |
603 | 24.2M | const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes); |
604 | 24.2M | MemBlockBuilder<uint8_t> mem_block(total_bytes); |
605 | | |
606 | 24.2M | Encoding::appendEncoding(payload_bytes, mem_block); |
607 | 24.2M | mem_block.appendOne(LiteralStringIndicator); |
608 | 24.2M | Encoding::appendEncoding(name.size(), mem_block); |
609 | 24.2M | mem_block.appendData(absl::MakeSpan(reinterpret_cast<const uint8_t*>(name.data()), name.size())); |
610 | 24.2M | ASSERT(mem_block.capacityRemaining() == 0); |
611 | 24.2M | return mem_block.release(); |
612 | 24.2M | } |
613 | | |
614 | 174M | StatNameStorage::~StatNameStorage() { |
615 | | // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to |
616 | | // decrement the reference counts held by the SymbolTable on behalf of |
617 | | // this StatName. This saves 8 bytes of storage per stat, relative to |
618 | | // holding a SymbolTable& in each StatNameStorage object. |
619 | 174M | ASSERT(bytes() == nullptr); |
620 | 174M | } |
621 | | |
622 | 87.2M | void StatNameStorage::free(SymbolTable& table) { |
623 | | // nolint: https://github.com/llvm/llvm-project/issues/81597 |
624 | | // NOLINTNEXTLINE(clang-analyzer-unix.Malloc) |
625 | 87.2M | table.free(statName()); |
626 | 87.2M | clear(); |
627 | 87.2M | } |
628 | | |
629 | 5.43M | void StatNamePool::clear() { |
630 | 77.1M | for (StatNameStorage& storage : storage_vector_) { |
631 | | // nolint: https://github.com/llvm/llvm-project/issues/81597 |
632 | | // NOLINTNEXTLINE(clang-analyzer-unix.Malloc) |
633 | 77.1M | storage.free(symbol_table_); |
634 | 77.1M | } |
635 | 5.43M | storage_vector_.clear(); |
636 | 5.43M | } |
637 | | |
638 | 77.1M | const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) { |
639 | 77.1M | storage_vector_.emplace_back(str, symbol_table_); |
640 | 77.1M | return storage_vector_.back().bytes(); |
641 | 77.1M | } |
642 | | |
643 | 0 | StatName StatNamePool::add(StatName name) { |
644 | 0 | storage_vector_.emplace_back(name, symbol_table_); |
645 | 0 | return storage_vector_.back().statName(); |
646 | 0 | } |
647 | | |
648 | 76.9M | StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); } |
649 | | |
650 | 24.0M | StatName StatNameDynamicPool::add(absl::string_view str) { |
651 | 24.0M | storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_)); |
652 | 24.0M | return StatName(storage_vector_.back().bytes()); |
653 | 24.0M | } |
654 | | |
655 | 24.4k | StatNameStorageSet::~StatNameStorageSet() { |
656 | | // free() must be called before destructing StatNameStorageSet to decrement |
657 | | // references to all symbols. |
658 | 24.4k | ASSERT(hash_set_.empty()); |
659 | 24.4k | } |
660 | | |
661 | 24.4k | void StatNameStorageSet::free(SymbolTable& symbol_table) { |
662 | | // We must free() all symbols referenced in the set, otherwise the symbols |
663 | | // will leak when the flat_hash_map superclass is destructed. They cannot |
664 | | // self-destruct without an explicit free() as each individual StatNameStorage |
665 | | // object does not have a reference to the symbol table, which would waste 8 |
666 | | // bytes per stat-name. The easiest way to safely free all the contents of the |
667 | | // symbol table set is to use flat_hash_map::extract(), which removes and |
668 | | // returns an element from the set without destructing the element |
669 | | // immediately. This gives us a chance to call free() on each one before they |
670 | | // are destroyed. |
671 | | // |
672 | | // There's a performance risk here, if removing elements via |
673 | | // flat_hash_set::begin() is inefficient to use in a loop like this. One can |
674 | | // imagine a hash-table implementation where the performance of this |
675 | | // usage-model would be poor. However, tests with 100k elements appeared to |
676 | | // run quickly when compiled for optimization, so at present this is not a |
677 | | // performance issue. |
678 | | |
679 | 24.4k | while (!hash_set_.empty()) { |
680 | 0 | auto storage = hash_set_.extract(hash_set_.begin()); |
681 | 0 | storage.value().free(symbol_table); |
682 | 0 | } |
683 | 24.4k | } |
684 | | |
685 | 139M | SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const { |
686 | 139M | size_t num_bytes = 0; |
687 | 338M | for (StatName stat_name : stat_names) { |
688 | 338M | if (!stat_name.empty()) { |
689 | 191M | num_bytes += stat_name.dataSize(); |
690 | 191M | } |
691 | 338M | } |
692 | 139M | MemBlockBuilder<uint8_t> mem_block(Encoding::totalSizeBytes(num_bytes)); |
693 | 139M | Encoding::appendEncoding(num_bytes, mem_block); |
694 | 338M | for (StatName stat_name : stat_names) { |
695 | 338M | stat_name.appendDataToMemBlock(mem_block); |
696 | 338M | } |
697 | 139M | ASSERT(mem_block.capacityRemaining() == 0); |
698 | 139M | return mem_block.release(); |
699 | 139M | } |
700 | | |
701 | 52.9M | void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) { |
702 | 52.9M | RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); |
703 | | |
704 | | // First encode all the names. |
705 | 52.9M | size_t total_size_bytes = 1; /* one byte for holding the number of names */ |
706 | | |
707 | 160M | for (uint32_t i = 0; i < num_names; ++i) { |
708 | 107M | total_size_bytes += names[i].size(); |
709 | 107M | } |
710 | | |
711 | | // Now allocate the exact number of bytes required and move the encodings |
712 | | // into storage. |
713 | 52.9M | MemBlockBuilder<uint8_t> mem_block(total_size_bytes); |
714 | 52.9M | mem_block.appendOne(num_names); |
715 | 160M | for (uint32_t i = 0; i < num_names; ++i) { |
716 | 107M | const StatName stat_name = names[i]; |
717 | 107M | Encoding::appendToMemBlock(stat_name, mem_block); |
718 | 107M | incRefCount(stat_name); |
719 | 107M | } |
720 | | |
721 | | // This assertion double-checks the arithmetic where we computed |
722 | | // total_size_bytes. After appending all the encoded data into the |
723 | | // allocated byte array, we should have exhausted all the memory |
724 | | // we though we needed. |
725 | 52.9M | ASSERT(mem_block.capacityRemaining() == 0); |
726 | 52.9M | list.moveStorageIntoList(mem_block.release()); |
727 | 52.9M | } |
728 | | |
729 | 52.9M | StatNameList::~StatNameList() { ASSERT(!populated()); } |
730 | | |
731 | 447M | void StatNameList::iterate(const std::function<bool(StatName)>& f) const { |
732 | 447M | const uint8_t* p = &storage_[0]; |
733 | 447M | const uint32_t num_elements = *p++; |
734 | 555M | for (uint32_t i = 0; i < num_elements; ++i) { |
735 | 502M | const StatName stat_name(p); |
736 | 502M | p += stat_name.size(); |
737 | 502M | if (!f(stat_name)) { |
738 | 394M | break; |
739 | 394M | } |
740 | 502M | } |
741 | 447M | } |
742 | | |
743 | 52.9M | void StatNameList::clear(SymbolTable& symbol_table) { |
744 | 107M | iterate([&symbol_table](StatName stat_name) -> bool { |
745 | | // nolint: https://github.com/llvm/llvm-project/issues/81597 |
746 | | // NOLINTNEXTLINE(clang-analyzer-unix.Malloc) |
747 | 107M | symbol_table.free(stat_name); |
748 | 107M | return true; |
749 | 107M | }); |
750 | 52.9M | storage_.reset(); |
751 | 52.9M | } |
752 | | |
753 | | StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name) |
754 | 4.64k | : name_(std::string(name)), pool_(symbol_table) { |
755 | 4.64k | builtin_stat_names_[""] = StatName(); |
756 | 4.64k | } |
757 | | |
758 | 190k | void StatNameSet::rememberBuiltin(absl::string_view str) { |
759 | 190k | StatName stat_name; |
760 | 190k | { |
761 | 190k | absl::MutexLock lock(&mutex_); |
762 | 190k | stat_name = pool_.add(str); |
763 | 190k | } |
764 | 190k | builtin_stat_names_[str] = stat_name; |
765 | 190k | } |
766 | | |
767 | 729k | StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) const { |
768 | | // If token was recorded as a built-in during initialization, we can |
769 | | // service this request lock-free. |
770 | 729k | const auto iter = builtin_stat_names_.find(token); |
771 | 729k | if (iter != builtin_stat_names_.end()) { |
772 | 450k | return iter->second; |
773 | 450k | } |
774 | 279k | return fallback; |
775 | 729k | } |
776 | | |
777 | | } // namespace Stats |
778 | | } // namespace Envoy |