Line data Source code
1 : #include "source/extensions/load_balancing_policies/maglev/maglev_lb.h"
2 :
3 : #include "envoy/config/cluster/v3/cluster.pb.h"
4 :
5 : #include "source/common/runtime/runtime_features.h"
6 :
7 : namespace Envoy {
8 : namespace Upstream {
9 : namespace {
10 0 : bool shouldUseCompactTable(size_t num_hosts, uint64_t table_size) {
11 : // Don't use compact maglev on 32-bit platforms.
12 0 : if constexpr (!(ENVOY_BIT_ARRAY_SUPPORTED)) {
13 0 : return false;
14 0 : }
15 :
16 : #ifdef MAGLEV_LB_FORCE_ORIGINAL_IMPL
17 : return false;
18 : #endif
19 :
20 0 : if (num_hosts > MaglevTable::MaxNumberOfHostsForCompactMaglev) {
21 0 : return false;
22 0 : }
23 :
24 0 : constexpr size_t shared_ptr_size = sizeof(HostConstSharedPtr);
25 0 : const uint64_t original_maglev_cost = shared_ptr_size * table_size;
26 : // We might be off by a byte e.g. due to rounding down when going from bits to
27 : // bytes.
28 0 : const uint64_t compact_maglev_cost =
29 0 : shared_ptr_size * num_hosts + ((absl::bit_width(num_hosts) * table_size) / 8);
30 0 : return compact_maglev_cost < original_maglev_cost;
31 0 : }
32 :
33 : /**
34 : * Factory for creating the optimal Maglev table instance for the given parameters.
35 : */
36 : class MaglevFactory : private Logger::Loggable<Logger::Id::upstream> {
37 : public:
38 : static MaglevTableSharedPtr
39 : createMaglevTable(const NormalizedHostWeightVector& normalized_host_weights,
40 : double max_normalized_weight, uint64_t table_size,
41 0 : bool use_hostname_for_hashing, MaglevLoadBalancerStats& stats) {
42 :
43 0 : MaglevTableSharedPtr maglev_table;
44 0 : if (shouldUseCompactTable(normalized_host_weights.size(), table_size)) {
45 0 : maglev_table =
46 0 : std::make_shared<CompactMaglevTable>(normalized_host_weights, max_normalized_weight,
47 0 : table_size, use_hostname_for_hashing, stats);
48 0 : ENVOY_LOG(debug, "creating compact maglev table given table size {} and number of hosts {}",
49 0 : table_size, normalized_host_weights.size());
50 0 : } else {
51 0 : maglev_table =
52 0 : std::make_shared<OriginalMaglevTable>(normalized_host_weights, max_normalized_weight,
53 0 : table_size, use_hostname_for_hashing, stats);
54 0 : ENVOY_LOG(debug, "creating original maglev table given table size {} and number of hosts {}",
55 0 : table_size, normalized_host_weights.size());
56 0 : }
57 :
58 0 : return maglev_table;
59 0 : }
60 : };
61 :
62 : } // namespace
63 :
64 0 : LegacyMaglevLbConfig::LegacyMaglevLbConfig(const ClusterProto& cluster) {
65 0 : if (cluster.has_maglev_lb_config()) {
66 0 : lb_config_ = cluster.maglev_lb_config();
67 0 : }
68 0 : }
69 :
70 0 : TypedMaglevLbConfig::TypedMaglevLbConfig(const MaglevLbProto& lb_config) : lb_config_(lb_config) {}
71 :
72 : ThreadAwareLoadBalancerBase::HashingLoadBalancerSharedPtr
73 : MaglevLoadBalancer::createLoadBalancer(const NormalizedHostWeightVector& normalized_host_weights,
74 : double /* min_normalized_weight */,
75 0 : double max_normalized_weight) {
76 0 : HashingLoadBalancerSharedPtr maglev_lb =
77 0 : MaglevFactory::createMaglevTable(normalized_host_weights, max_normalized_weight, table_size_,
78 0 : use_hostname_for_hashing_, stats_);
79 :
80 0 : if (hash_balance_factor_ == 0) {
81 0 : return maglev_lb;
82 0 : }
83 :
84 0 : return std::make_shared<BoundedLoadHashingLoadBalancer>(
85 0 : maglev_lb, std::move(normalized_host_weights), hash_balance_factor_);
86 0 : }
87 :
88 : void MaglevTable::constructMaglevTableInternal(
89 : const NormalizedHostWeightVector& normalized_host_weights, double max_normalized_weight,
90 0 : bool use_hostname_for_hashing) {
91 : // We can't do anything sensible with no hosts.
92 0 : if (normalized_host_weights.empty()) {
93 0 : ENVOY_LOG(debug, "maglev: normalized hosts weights is empty, skipping building table");
94 0 : return;
95 0 : }
96 :
97 : // Prepare stable (sorted) vector of host_weight.
98 : // Maglev requires stable order of table_build_entries because the hash table will be filled in
99 : // the order. Unstable table_build_entries results the change of backend assignment.
100 0 : std::vector<std::tuple<absl::string_view, HostConstSharedPtr, double>> sorted_host_weights;
101 0 : sorted_host_weights.reserve(normalized_host_weights.size());
102 0 : for (const auto& host_weight : normalized_host_weights) {
103 0 : const auto& host = host_weight.first;
104 0 : const absl::string_view key_to_hash = hashKey(host, use_hostname_for_hashing);
105 0 : ASSERT(!key_to_hash.empty());
106 :
107 0 : sorted_host_weights.emplace_back(key_to_hash, host_weight.first, host_weight.second);
108 0 : }
109 :
110 0 : std::sort(sorted_host_weights.begin(), sorted_host_weights.end());
111 :
112 : // Implementation of pseudocode listing 1 in the paper (see header file for more info).
113 0 : std::vector<TableBuildEntry> table_build_entries;
114 0 : table_build_entries.reserve(normalized_host_weights.size());
115 0 : for (const auto& sorted_host_weight : sorted_host_weights) {
116 0 : const auto& key_to_hash = std::get<0>(sorted_host_weight);
117 0 : const auto& host = std::get<1>(sorted_host_weight);
118 0 : const auto& weight = std::get<2>(sorted_host_weight);
119 :
120 0 : table_build_entries.emplace_back(host, HashUtil::xxHash64(key_to_hash) % table_size_,
121 0 : (HashUtil::xxHash64(key_to_hash, 1) % (table_size_ - 1)) + 1,
122 0 : weight);
123 0 : }
124 :
125 0 : constructImplementationInternals(table_build_entries, max_normalized_weight);
126 :
127 : // Update Stats
128 0 : uint64_t min_entries_per_host = table_size_;
129 0 : uint64_t max_entries_per_host = 0;
130 0 : for (const auto& entry : table_build_entries) {
131 0 : min_entries_per_host = std::min(entry.count_, min_entries_per_host);
132 0 : max_entries_per_host = std::max(entry.count_, max_entries_per_host);
133 0 : }
134 0 : stats_.min_entries_per_host_.set(min_entries_per_host);
135 0 : stats_.max_entries_per_host_.set(max_entries_per_host);
136 :
137 0 : if (ENVOY_LOG_CHECK_LEVEL(trace)) {
138 0 : logMaglevTable(use_hostname_for_hashing);
139 0 : }
140 0 : }
141 :
142 : void OriginalMaglevTable::constructImplementationInternals(
143 0 : std::vector<TableBuildEntry>& table_build_entries, double max_normalized_weight) {
144 : // Size internal representation for maglev table correctly.
145 0 : table_.resize(table_size_);
146 :
147 : // Iterate through the table build entries as many times as it takes to fill up the table.
148 0 : uint64_t table_index = 0;
149 0 : for (uint32_t iteration = 1; table_index < table_size_; ++iteration) {
150 0 : for (uint64_t i = 0; i < table_build_entries.size() && table_index < table_size_; i++) {
151 0 : TableBuildEntry& entry = table_build_entries[i];
152 : // To understand how target_weight_ and weight_ are used below, consider a host with weight
153 : // equal to max_normalized_weight. This would be picked on every single iteration. If it had
154 : // weight equal to max_normalized_weight / 3, then it would only be picked every 3 iterations,
155 : // etc.
156 0 : if (iteration * entry.weight_ < entry.target_weight_) {
157 0 : continue;
158 0 : }
159 0 : entry.target_weight_ += max_normalized_weight;
160 0 : uint64_t c = permutation(entry);
161 0 : while (table_[c] != nullptr) {
162 0 : entry.next_++;
163 0 : c = permutation(entry);
164 0 : }
165 :
166 0 : table_[c] = entry.host_;
167 0 : entry.next_++;
168 0 : entry.count_++;
169 0 : table_index++;
170 0 : }
171 0 : }
172 0 : }
173 : CompactMaglevTable::CompactMaglevTable(const NormalizedHostWeightVector& normalized_host_weights,
174 : double max_normalized_weight, uint64_t table_size,
175 : bool use_hostname_for_hashing,
176 : MaglevLoadBalancerStats& stats)
177 : : MaglevTable(table_size, stats),
178 0 : table_(absl::bit_width(normalized_host_weights.size()), table_size) {
179 0 : constructMaglevTableInternal(normalized_host_weights, max_normalized_weight,
180 0 : use_hostname_for_hashing);
181 0 : }
182 :
183 : void CompactMaglevTable::constructImplementationInternals(
184 0 : std::vector<TableBuildEntry>& table_build_entries, double max_normalized_weight) {
185 : // Populate the host table. Index into table_build_entries[i] will align with
186 : // the host here.
187 0 : host_table_.reserve(table_build_entries.size());
188 0 : for (const auto& entry : table_build_entries) {
189 0 : host_table_.emplace_back(entry.host_);
190 0 : }
191 0 : host_table_.shrink_to_fit();
192 :
193 : // Vector to track whether or not a given fixed width bit is set in the
194 : // BitArray used as the maglev table.
195 0 : std::vector<bool> occupied(table_size_, false);
196 :
197 : // Iterate through the table build entries as many times as it takes to fill up the table.
198 0 : uint64_t table_index = 0;
199 0 : for (uint32_t iteration = 1; table_index < table_size_; ++iteration) {
200 0 : for (uint32_t i = 0; i < table_build_entries.size() && table_index < table_size_; i++) {
201 0 : TableBuildEntry& entry = table_build_entries[i];
202 : // To understand how target_weight_ and weight_ are used below, consider a host with weight
203 : // equal to max_normalized_weight. This would be picked on every single iteration. If it had
204 : // weight equal to max_normalized_weight / 3, then it would only be picked every 3 iterations,
205 : // etc.
206 0 : if (iteration * entry.weight_ < entry.target_weight_) {
207 0 : continue;
208 0 : }
209 0 : entry.target_weight_ += max_normalized_weight;
210 : // As we're using the compact implementation, our table size is limited to
211 : // 32-bit, hence static_cast here should be safe.
212 0 : uint32_t c = static_cast<uint32_t>(permutation(entry));
213 0 : while (occupied[c]) {
214 0 : entry.next_++;
215 0 : c = static_cast<uint32_t>(permutation(entry));
216 0 : }
217 :
218 : // Record the index of the given host.
219 0 : table_.set(c, i);
220 0 : occupied[c] = true;
221 :
222 0 : entry.next_++;
223 0 : entry.count_++;
224 0 : table_index++;
225 0 : }
226 0 : }
227 0 : }
228 :
229 0 : void OriginalMaglevTable::logMaglevTable(bool use_hostname_for_hashing) const {
230 0 : for (uint64_t i = 0; i < table_.size(); ++i) {
231 0 : const absl::string_view key_to_hash = hashKey(table_[i], use_hostname_for_hashing);
232 0 : ENVOY_LOG(trace, "maglev: i={} address={} host={}", i, table_[i]->address()->asString(),
233 0 : key_to_hash);
234 0 : }
235 0 : }
236 :
237 0 : void CompactMaglevTable::logMaglevTable(bool use_hostname_for_hashing) const {
238 0 : for (uint64_t i = 0; i < table_size_; ++i) {
239 :
240 0 : const uint32_t index = table_.get(i);
241 0 : ASSERT(index < host_table_.size(), "Compact MaglevTable index into host table out of range");
242 0 : const auto& host = host_table_[index];
243 :
244 0 : const absl::string_view key_to_hash = hashKey(host, use_hostname_for_hashing);
245 0 : ENVOY_LOG(trace, "maglev: i={} address={} host={}", i, host->address()->asString(),
246 0 : key_to_hash);
247 0 : }
248 0 : }
249 :
250 : MaglevTable::MaglevTable(uint64_t table_size, MaglevLoadBalancerStats& stats)
251 0 : : table_size_(table_size), stats_(stats) {}
252 :
253 0 : HostConstSharedPtr OriginalMaglevTable::chooseHost(uint64_t hash, uint32_t attempt) const {
254 0 : if (table_.empty()) {
255 0 : return nullptr;
256 0 : }
257 :
258 0 : if (attempt > 0) {
259 : // If a retry host predicate is being applied, mutate the hash to choose an alternate host.
260 : // By using value with most bits set for the retry attempts, we achieve a larger change in
261 : // the hash, thereby reducing the likelihood that all retries are directed to a single host.
262 0 : hash ^= ~0ULL - attempt + 1;
263 0 : }
264 :
265 0 : return table_[hash % table_size_];
266 0 : }
267 :
268 0 : HostConstSharedPtr CompactMaglevTable::chooseHost(uint64_t hash, uint32_t attempt) const {
269 0 : if (host_table_.empty()) {
270 0 : return nullptr;
271 0 : }
272 :
273 0 : if (attempt > 0) {
274 : // If a retry host predicate is being applied, mutate the hash to choose an alternate host.
275 : // By using value with most bits set for the retry attempts, we achieve a larger change in
276 : // the hash, thereby reducing the likelihood that all retries are directed to a single host.
277 0 : hash ^= ~0ULL - attempt + 1;
278 0 : }
279 :
280 0 : const uint32_t index = table_.get(hash % table_size_);
281 0 : ASSERT(index < host_table_.size(), "Compact MaglevTable index into host table out of range");
282 0 : return host_table_[index];
283 0 : }
284 :
285 0 : uint64_t MaglevTable::permutation(const TableBuildEntry& entry) {
286 0 : return (entry.offset_ + (entry.skip_ * entry.next_)) % table_size_;
287 0 : }
288 :
289 : MaglevLoadBalancer::MaglevLoadBalancer(
290 : const PrioritySet& priority_set, ClusterLbStats& stats, Stats::Scope& scope,
291 : Runtime::Loader& runtime, Random::RandomGenerator& random,
292 : OptRef<const envoy::config::cluster::v3::Cluster::MaglevLbConfig> config,
293 : const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config)
294 : : ThreadAwareLoadBalancerBase(priority_set, stats, runtime, random,
295 : PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(
296 : common_config, healthy_panic_threshold, 100, 50),
297 : common_config.has_locality_weighted_lb_config()),
298 : scope_(scope.createScope("maglev_lb.")), stats_(generateStats(*scope_)),
299 : table_size_(config ? PROTOBUF_GET_WRAPPED_OR_DEFAULT(config.ref(), table_size,
300 : MaglevTable::DefaultTableSize)
301 : : MaglevTable::DefaultTableSize),
302 : use_hostname_for_hashing_(
303 : common_config.has_consistent_hashing_lb_config()
304 : ? common_config.consistent_hashing_lb_config().use_hostname_for_hashing()
305 : : false),
306 : hash_balance_factor_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(
307 0 : common_config.consistent_hashing_lb_config(), hash_balance_factor, 0)) {
308 0 : ENVOY_LOG(debug, "maglev table size: {}", table_size_);
309 : // The table size must be prime number.
310 0 : if (!Primes::isPrime(table_size_)) {
311 0 : throw EnvoyException("The table size of maglev must be prime number");
312 0 : }
313 0 : }
314 :
315 : MaglevLoadBalancer::MaglevLoadBalancer(
316 : const PrioritySet& priority_set, ClusterLbStats& stats, Stats::Scope& scope,
317 : Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
318 : const envoy::extensions::load_balancing_policies::maglev::v3::Maglev& config)
319 : : ThreadAwareLoadBalancerBase(priority_set, stats, runtime, random, healthy_panic_threshold,
320 : config.has_locality_weighted_lb_config()),
321 : scope_(scope.createScope("maglev_lb.")), stats_(generateStats(*scope_)),
322 : table_size_(
323 : PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, table_size, MaglevTable::DefaultTableSize)),
324 : use_hostname_for_hashing_(
325 : config.has_consistent_hashing_lb_config()
326 : ? config.consistent_hashing_lb_config().use_hostname_for_hashing()
327 : : false),
328 : hash_balance_factor_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(config.consistent_hashing_lb_config(),
329 0 : hash_balance_factor, 0)) {
330 0 : ENVOY_LOG(debug, "maglev table size: {}", table_size_);
331 : // The table size must be prime number.
332 0 : if (!Primes::isPrime(table_size_)) {
333 0 : throw EnvoyException("The table size of maglev must be prime number");
334 0 : }
335 0 : }
336 :
337 0 : MaglevLoadBalancerStats MaglevLoadBalancer::generateStats(Stats::Scope& scope) {
338 0 : return {ALL_MAGLEV_LOAD_BALANCER_STATS(POOL_GAUGE(scope))};
339 0 : }
340 :
341 : } // namespace Upstream
342 : } // namespace Envoy
|