Line data Source code
1 : #pragma once
2 :
3 : #include <bitset>
4 : #include <cmath>
5 : #include <cstdint>
6 : #include <memory>
7 : #include <queue>
8 : #include <set>
9 : #include <vector>
10 :
11 : #include "envoy/common/callback.h"
12 : #include "envoy/common/random_generator.h"
13 : #include "envoy/config/cluster/v3/cluster.pb.h"
14 : #include "envoy/extensions/load_balancing_policies/least_request/v3/least_request.pb.h"
15 : #include "envoy/extensions/load_balancing_policies/random/v3/random.pb.h"
16 : #include "envoy/extensions/load_balancing_policies/round_robin/v3/round_robin.pb.h"
17 : #include "envoy/runtime/runtime.h"
18 : #include "envoy/stream_info/stream_info.h"
19 : #include "envoy/upstream/load_balancer.h"
20 : #include "envoy/upstream/upstream.h"
21 :
22 : #include "source/common/protobuf/utility.h"
23 : #include "source/common/runtime/runtime_protos.h"
24 : #include "source/common/upstream/edf_scheduler.h"
25 : #include "source/common/upstream/subset_lb_config.h"
26 :
27 : namespace Envoy {
28 : namespace Upstream {
29 :
30 : // Priority levels and localities are considered overprovisioned with this factor.
31 : static constexpr uint32_t kDefaultOverProvisioningFactor = 140;
32 :
33 : class LoadBalancerConfigHelper {
34 : public:
35 : template <class LegacyProto>
36 : static absl::optional<envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig>
37 171 : slowStartConfigFromLegacyProto(const LegacyProto& proto_config) {
38 171 : if (!proto_config.has_slow_start_config()) {
39 135 : return {};
40 135 : }
41 0 :
42 36 : envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig slow_start_config;
43 36 : const auto& legacy_slow_start_config = proto_config.slow_start_config();
44 36 : if (legacy_slow_start_config.has_slow_start_window()) {
45 30 : *slow_start_config.mutable_slow_start_window() = legacy_slow_start_config.slow_start_window();
46 30 : }
47 36 : if (legacy_slow_start_config.has_aggression()) {
48 29 : *slow_start_config.mutable_aggression() = legacy_slow_start_config.aggression();
49 29 : }
50 36 : if (legacy_slow_start_config.has_min_weight_percent()) {
51 5 : *slow_start_config.mutable_min_weight_percent() =
52 5 : legacy_slow_start_config.min_weight_percent();
53 5 : }
54 36 : return slow_start_config;
55 171 : }
56 :
57 : template <class Proto>
58 : static absl::optional<envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig>
59 0 : slowStartConfigFromProto(const Proto& proto_config) {
60 0 : if (!proto_config.has_slow_start_config()) {
61 0 : return {};
62 0 : }
63 0 : return proto_config.slow_start_config();
64 0 : }
65 :
66 : static absl::optional<envoy::extensions::load_balancing_policies::common::v3::LocalityLbConfig>
67 : localityLbConfigFromCommonLbConfig(
68 : const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config);
69 :
70 : template <class Proto>
71 : static absl::optional<envoy::extensions::load_balancing_policies::common::v3::LocalityLbConfig>
72 0 : localityLbConfigFromProto(const Proto& proto_config) {
73 0 : if (!proto_config.has_locality_lb_config()) {
74 0 : return {};
75 0 : }
76 0 : return proto_config.locality_lb_config();
77 0 : }
78 : };
79 :
80 : /**
81 : * Base class for all LB implementations.
82 : */
83 : class LoadBalancerBase : public LoadBalancer, protected Logger::Loggable<Logger::Id::upstream> {
84 : public:
85 : enum class HostAvailability { Healthy, Degraded };
86 :
87 : // A utility function to chose a priority level based on a precomputed hash and
88 : // two PriorityLoad vectors, one for healthy load and one for degraded.
89 : //
90 : // Returns the priority, a number between 0 and per_priority_load.size()-1 as well as which host
91 : // availability level was chosen.
92 : static std::pair<uint32_t, HostAvailability>
93 : choosePriority(uint64_t hash, const HealthyLoad& healthy_per_priority_load,
94 : const DegradedLoad& degraded_per_priority_load);
95 :
96 : // Pool selection not implemented.
97 : absl::optional<Upstream::SelectedPoolAndConnection>
98 : selectExistingConnection(Upstream::LoadBalancerContext* /*context*/,
99 : const Upstream::Host& /*host*/,
100 0 : std::vector<uint8_t>& /*hash_key*/) override {
101 0 : return absl::nullopt;
102 0 : }
103 : // Lifetime tracking not implemented.
104 0 : OptRef<Envoy::Http::ConnectionPool::ConnectionLifetimeCallbacks> lifetimeCallbacks() override {
105 0 : return {};
106 0 : }
107 :
108 : protected:
109 : /**
110 : * For the given host_set @return if we should be in a panic mode or not. For example, if the
111 : * majority of hosts are unhealthy we'll be likely in a panic mode. In this case we'll route
112 : * requests to hosts regardless of whether they are healthy or not.
113 : */
114 : bool isHostSetInPanic(const HostSet& host_set) const;
115 :
116 : /**
117 : * Method is called when all host sets are in panic mode.
118 : * In such state the load is distributed based on the number of hosts
119 : * in given priority regardless of their health.
120 : */
121 : void recalculateLoadInTotalPanic();
122 :
123 : LoadBalancerBase(const PrioritySet& priority_set, ClusterLbStats& stats, Runtime::Loader& runtime,
124 : Random::RandomGenerator& random, uint32_t healthy_panic_threshold);
125 :
126 : // Choose host set randomly, based on the healthy_per_priority_load_ and
127 : // degraded_per_priority_load_. per_priority_load_ is consulted first, spilling over to
128 : // degraded_per_priority_load_ if necessary. When a host set is selected based on
129 : // degraded_per_priority_load_, only degraded hosts should be selected from that host set.
130 : //
131 : // @return host set to use and which availability to target.
132 : std::pair<HostSet&, HostAvailability> chooseHostSet(LoadBalancerContext* context,
133 : uint64_t hash) const;
134 :
135 0 : uint32_t percentageLoad(uint32_t priority) const {
136 0 : return per_priority_load_.healthy_priority_load_.get()[priority];
137 0 : }
138 0 : uint32_t percentageDegradedLoad(uint32_t priority) const {
139 0 : return per_priority_load_.degraded_priority_load_.get()[priority];
140 0 : }
141 0 : bool isInPanic(uint32_t priority) const { return per_priority_panic_[priority]; }
142 977 : uint64_t random(bool peeking) {
143 977 : if (peeking) {
144 198 : stashed_random_.push_back(random_.random());
145 198 : return stashed_random_.back();
146 779 : } else {
147 779 : if (!stashed_random_.empty()) {
148 170 : auto random = stashed_random_.front();
149 170 : stashed_random_.pop_front();
150 170 : return random;
151 609 : } else {
152 609 : return random_.random();
153 609 : }
154 779 : }
155 977 : }
156 :
157 : ClusterLbStats& stats_;
158 : Runtime::Loader& runtime_;
159 : std::deque<uint64_t> stashed_random_;
160 : Random::RandomGenerator& random_;
161 : const uint32_t default_healthy_panic_percent_;
162 : // The priority-ordered set of hosts to use for load balancing.
163 : const PrioritySet& priority_set_;
164 :
165 : public:
166 : // Called when a host set at the given priority level is updated. This updates
167 : // per_priority_health for that priority level, and may update per_priority_load for all
168 : // priority levels.
169 : void static recalculatePerPriorityState(uint32_t priority, const PrioritySet& priority_set,
170 : HealthyAndDegradedLoad& priority_load,
171 : HealthyAvailability& per_priority_health,
172 : DegradedAvailability& per_priority_degraded,
173 : uint32_t& total_healthy_hosts);
174 : void recalculatePerPriorityPanic();
175 :
176 : protected:
177 : // Method calculates normalized total availability.
178 : //
179 : // The availability of a priority is ratio of available (healthy/degraded) hosts over the total
180 : // number of hosts multiplied by 100 and the overprovisioning factor. The total availability is
181 : // the sum of the availability of each priority, up to a maximum of 100.
182 : //
183 : // For example, using the default overprovisioning factor of 1.4, a if priority A has 4 hosts,
184 : // of which 1 is degraded and 1 is healthy, it will have availability of 2/4 * 100 * 1.4 = 70.
185 : //
186 : // Assuming two priorities with availability 60 and 70, the total availability would be 100.
187 : static uint32_t
188 : calculateNormalizedTotalAvailability(HealthyAvailability& per_priority_health,
189 2741 : DegradedAvailability& per_priority_degraded) {
190 2741 : const auto health =
191 2741 : std::accumulate(per_priority_health.get().begin(), per_priority_health.get().end(), 0);
192 2741 : const auto degraded =
193 2741 : std::accumulate(per_priority_degraded.get().begin(), per_priority_degraded.get().end(), 0);
194 :
195 2741 : return std::min<uint32_t>(health + degraded, 100);
196 2741 : }
197 :
198 : // The percentage load (0-100) for each priority level when targeting healthy hosts and
199 : // the percentage load (0-100) for each priority level when targeting degraded hosts.
200 : HealthyAndDegradedLoad per_priority_load_;
201 : // The health percentage (0-100) for each priority level.
202 : HealthyAvailability per_priority_health_;
203 : // The degraded percentage (0-100) for each priority level.
204 : DegradedAvailability per_priority_degraded_;
205 : // Levels which are in panic
206 : std::vector<bool> per_priority_panic_;
207 : // The total count of healthy hosts across all priority levels.
208 : uint32_t total_healthy_hosts_;
209 :
210 : private:
211 : Common::CallbackHandlePtr priority_update_cb_;
212 : };
213 :
214 : class LoadBalancerContextBase : public LoadBalancerContext {
215 : public:
216 0 : absl::optional<uint64_t> computeHashKey() override { return {}; }
217 :
218 0 : const Network::Connection* downstreamConnection() const override { return nullptr; }
219 :
220 0 : const Router::MetadataMatchCriteria* metadataMatchCriteria() override { return nullptr; }
221 :
222 0 : const StreamInfo::StreamInfo* requestStreamInfo() const override { return nullptr; }
223 :
224 0 : const Http::RequestHeaderMap* downstreamHeaders() const override { return nullptr; }
225 :
226 : const HealthyAndDegradedLoad&
227 : determinePriorityLoad(const PrioritySet&, const HealthyAndDegradedLoad& original_priority_load,
228 0 : const Upstream::RetryPriority::PriorityMappingFunc&) override {
229 0 : return original_priority_load;
230 0 : }
231 :
232 0 : bool shouldSelectAnotherHost(const Host&) override { return false; }
233 :
234 0 : uint32_t hostSelectionRetryCount() const override { return 1; }
235 :
236 0 : Network::Socket::OptionsSharedPtr upstreamSocketOptions() const override { return nullptr; }
237 :
238 0 : Network::TransportSocketOptionsConstSharedPtr upstreamTransportSocketOptions() const override {
239 0 : return nullptr;
240 0 : }
241 :
242 0 : absl::optional<OverrideHost> overrideHostToSelect() const override { return {}; }
243 : };
244 :
245 : /**
246 : * Base class for zone aware load balancers
247 : */
248 : class ZoneAwareLoadBalancerBase : public LoadBalancerBase {
249 : public:
250 : using LocalityLbConfig = envoy::extensions::load_balancing_policies::common::v3::LocalityLbConfig;
251 :
252 : HostConstSharedPtr chooseHost(LoadBalancerContext* context) override;
253 :
254 : protected:
255 : // Both priority_set and local_priority_set if non-null must have at least one host set.
256 : ZoneAwareLoadBalancerBase(const PrioritySet& priority_set, const PrioritySet* local_priority_set,
257 : ClusterLbStats& stats, Runtime::Loader& runtime,
258 : Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
259 : const absl::optional<LocalityLbConfig> locality_config);
260 :
261 : // When deciding which hosts to use on an LB decision, we need to know how to index into the
262 : // priority_set. This priority_set cursor is used by ZoneAwareLoadBalancerBase subclasses, e.g.
263 : // RoundRobinLoadBalancer, to index into auxiliary data structures specific to the LB for
264 : // a given host set selection.
265 : struct HostsSource {
266 : enum class SourceType : uint8_t {
267 : // All hosts in the host set.
268 : AllHosts,
269 : // All healthy hosts in the host set.
270 : HealthyHosts,
271 : // All degraded hosts in the host set.
272 : DegradedHosts,
273 : // Healthy hosts for locality @ locality_index.
274 : LocalityHealthyHosts,
275 : // Degraded hosts for locality @ locality_index.
276 : LocalityDegradedHosts,
277 : };
278 :
279 977 : HostsSource() = default;
280 :
281 : // TODO(kbaichoo): plumb the priority parameter as uint8_t all the way from
282 : // the config.
283 : HostsSource(uint32_t priority, SourceType source_type)
284 3222 : : priority_(static_cast<uint8_t>(priority)), source_type_(source_type) {
285 3222 : ASSERT(priority <= 128, "Priority out of bounds.");
286 3222 : ASSERT(source_type == SourceType::AllHosts || source_type == SourceType::HealthyHosts ||
287 3222 : source_type == SourceType::DegradedHosts);
288 3222 : }
289 :
290 : // TODO(kbaichoo): plumb the priority parameter as uint8_t all the way from
291 : // the config.
292 : HostsSource(uint32_t priority, SourceType source_type, uint32_t locality_index)
293 : : priority_(static_cast<uint8_t>(priority)), source_type_(source_type),
294 1560 : locality_index_(locality_index) {
295 1560 : ASSERT(priority <= 128, "Priority out of bounds.");
296 1560 : ASSERT(source_type == SourceType::LocalityHealthyHosts ||
297 1560 : source_type == SourceType::LocalityDegradedHosts);
298 1560 : }
299 :
300 : // Priority in PrioritySet.
301 : uint8_t priority_{};
302 :
303 : // How to index into HostSet for a given priority.
304 : SourceType source_type_{};
305 :
306 : // Locality index into HostsPerLocality for SourceType::LocalityHealthyHosts.
307 : uint32_t locality_index_{};
308 :
309 3803 : bool operator==(const HostsSource& other) const {
310 3803 : return priority_ == other.priority_ && source_type_ == other.source_type_ &&
311 3803 : locality_index_ == other.locality_index_;
312 3803 : }
313 : };
314 :
315 : struct HostsSourceHash {
316 13680 : size_t operator()(const HostsSource& hs) const {
317 : // This is only used for absl::flat_hash_map keys, so we don't need a deterministic hash.
318 13680 : size_t hash = std::hash<uint32_t>()(hs.priority_);
319 13680 : hash = 37 * hash + std::hash<size_t>()(static_cast<std::size_t>(hs.source_type_));
320 13680 : hash = 37 * hash + std::hash<uint32_t>()(hs.locality_index_);
321 13680 : return hash;
322 13680 : }
323 : };
324 :
325 : /**
326 : * By implementing this method instead of chooseHost, host selection will
327 : * be subject to host filters specified by LoadBalancerContext.
328 : *
329 : * Host selection will be retried up to the number specified by
330 : * hostSelectionRetryCount on LoadBalancerContext, and if no hosts are found
331 : * within the allowed attempts, the host that was selected during the last
332 : * attempt will be returned.
333 : *
334 : * If host selection is idempotent (i.e. retrying will not change the outcome),
335 : * sub classes should override chooseHost to avoid the unnecessary overhead of
336 : * retrying host selection.
337 : */
338 : virtual HostConstSharedPtr chooseHostOnce(LoadBalancerContext* context) PURE;
339 :
340 : /**
341 : * Pick the host source to use, doing zone aware routing when the hosts are sufficiently healthy.
342 : * If no host is chosen (due to fail_traffic_on_panic being set), return absl::nullopt.
343 : */
344 : absl::optional<HostsSource> hostSourceToUse(LoadBalancerContext* context, uint64_t hash) const;
345 :
346 : /**
347 : * Index into priority_set via hosts source descriptor.
348 : */
349 : const HostVector& hostSourceToHosts(HostsSource hosts_source) const;
350 :
351 : private:
352 : enum class LocalityRoutingState {
353 : // Locality based routing is off.
354 : NoLocalityRouting,
355 : // All queries can be routed to the local locality.
356 : LocalityDirect,
357 : // The local locality can not handle the anticipated load. Residual load will be spread across
358 : // various other localities.
359 : LocalityResidual
360 : };
361 :
362 : struct LocalityPercentages {
363 : // The percentage of local hosts in a specific locality.
364 : // Percentage is stored as integer number and scaled by 10000 multiplier for better precision.
365 : // If upstream_percentage is 0, local_percentage may not be representative
366 : // of the actual percentage and will be set to 0.
367 : uint64_t local_percentage;
368 : // The percentage of upstream hosts in a specific locality.
369 : // Percentage is stored as integer number and scaled by 10000 multiplier for better precision.
370 : uint64_t upstream_percentage;
371 : };
372 :
373 : /**
374 : * Increase per_priority_state_ to at least priority_set.hostSetsPerPriority().size()
375 : */
376 : void resizePerPriorityState();
377 :
378 : /**
379 : * @return decision on quick exit from locality aware routing based on cluster configuration.
380 : * This gets recalculated on update callback.
381 : */
382 : bool earlyExitNonLocalityRoutingNew();
383 :
384 : /**
385 : * @return decision on quick exit from locality aware routing based on cluster configuration.
386 : * This gets recalculated on update callback.
387 : *
388 : * This is the legacy version of the function from previous versions of Envoy, kept temporarily
389 : * as an alternate code-path to reduce the risk of changes.
390 : */
391 : bool earlyExitNonLocalityRouting();
392 :
393 : /**
394 : * Try to select upstream hosts from the same locality.
395 : * @param host_set the last host set returned by chooseHostSet()
396 : */
397 : uint32_t tryChooseLocalLocalityHosts(const HostSet& host_set) const;
398 :
399 : /**
400 : * @return combined per-locality information about percentages of local/upstream hosts in each
401 : * upstream locality. See LocalityPercentages for more details. The ordering of localities
402 : * matches the ordering of upstream localities in the input upstream_hosts_per_locality.
403 : */
404 : absl::FixedArray<LocalityPercentages>
405 : calculateLocalityPercentagesNew(const HostsPerLocality& local_hosts_per_locality,
406 : const HostsPerLocality& upstream_hosts_per_locality);
407 :
408 : /**
409 : * @return (number of hosts in a given locality)/(total number of hosts) in `ret` param.
410 : * The result is stored as integer number and scaled by 10000 multiplier for better precision.
411 : * Caller is responsible for allocation/de-allocation of `ret`.
412 : *
413 : * This is the legacy version of the function from previous versions of Envoy, kept temporarily
414 : * as an alternate code-path to reduce the risk of changes.
415 : */
416 : void calculateLocalityPercentage(const HostsPerLocality& hosts_per_locality, uint64_t* ret);
417 :
418 : /**
419 : * Regenerate locality aware routing structures for fast decisions on upstream locality selection.
420 : */
421 : void regenerateLocalityRoutingStructuresNew();
422 :
423 : /**
424 : * Regenerate locality aware routing structures for fast decisions on upstream locality selection.
425 : *
426 : * This is the legacy version of the function from previous versions of Envoy, kept temporarily
427 : * as an alternate code-path to reduce the risk of changes.
428 : */
429 : void regenerateLocalityRoutingStructures();
430 :
431 164 : HostSet& localHostSet() const { return *local_priority_set_->hostSetsPerPriority()[0]; }
432 :
433 : static absl::optional<HostsSource::SourceType>
434 0 : localitySourceType(HostAvailability host_availability) {
435 0 : switch (host_availability) {
436 0 : case HostAvailability::Healthy:
437 0 : return absl::make_optional<HostsSource::SourceType>(
438 0 : HostsSource::SourceType::LocalityHealthyHosts);
439 0 : case HostAvailability::Degraded:
440 0 : return absl::make_optional<HostsSource::SourceType>(
441 0 : HostsSource::SourceType::LocalityDegradedHosts);
442 0 : }
443 0 : IS_ENVOY_BUG("unexpected locality source type enum");
444 0 : return absl::nullopt;
445 0 : }
446 :
447 611 : static absl::optional<HostsSource::SourceType> sourceType(HostAvailability host_availability) {
448 611 : switch (host_availability) {
449 572 : case HostAvailability::Healthy:
450 572 : return absl::make_optional<HostsSource::SourceType>(HostsSource::SourceType::HealthyHosts);
451 39 : case HostAvailability::Degraded:
452 39 : return absl::make_optional<HostsSource::SourceType>(HostsSource::SourceType::DegradedHosts);
453 611 : }
454 :
455 0 : IS_ENVOY_BUG("unexpected source type enum");
456 0 : return absl::nullopt;
457 611 : }
458 :
459 : // The set of local Envoy instances which are load balancing across priority_set_.
460 : const PrioritySet* local_priority_set_;
461 :
462 : struct PerPriorityState {
463 : // The percent of requests which can be routed to the local locality.
464 : uint64_t local_percent_to_route_{};
465 : // Tracks the current state of locality based routing.
466 : LocalityRoutingState locality_routing_state_{LocalityRoutingState::NoLocalityRouting};
467 : // When locality_routing_state_ == LocalityResidual this tracks the capacity
468 : // for each of the non-local localities to determine what traffic should be
469 : // routed where.
470 : std::vector<uint64_t> residual_capacity_;
471 : };
472 : using PerPriorityStatePtr = std::unique_ptr<PerPriorityState>;
473 : // Routing state broken out for each priority level in priority_set_.
474 : std::vector<PerPriorityStatePtr> per_priority_state_;
475 : Common::CallbackHandlePtr priority_update_cb_;
476 : Common::CallbackHandlePtr local_priority_set_member_update_cb_handle_;
477 :
478 : // Config for zone aware routing.
479 : const uint64_t min_cluster_size_;
480 : // Keep small members (bools and enums) at the end of class, to reduce alignment overhead.
481 : const uint32_t routing_enabled_;
482 : const bool fail_traffic_on_panic_ : 1;
483 : const bool use_new_locality_routing_ : 1;
484 :
485 : // If locality weight aware routing is enabled.
486 : const bool locality_weighted_balancing_ : 1;
487 :
488 : friend class TestZoneAwareLoadBalancer;
489 : };
490 :
491 : /**
492 : * Base implementation of LoadBalancer that performs weighted RR selection across the hosts in the
493 : * cluster. This scheduler respects host weighting and utilizes an EdfScheduler to achieve O(log
494 : * n) pick and insertion time complexity, O(n) memory use. The key insight is that if we schedule
495 : * with 1 / weight deadline, we will achieve the desired pick frequency for weighted RR in a given
496 : * interval. Naive implementations of weighted RR are either O(n) pick time or O(m * n) memory use,
497 : * where m is the weight range. We also explicitly check for the unweighted special case and use a
498 : * simple index to achieve O(1) scheduling in that case.
499 : * TODO(htuch): We use EDF at Google, but the EDF scheduler may be overkill if we don't want to
500 : * support large ranges of weights or arbitrary precision floating weights, we could construct an
501 : * explicit schedule, since m will be a small constant factor in O(m * n). This
502 : * could also be done on a thread aware LB, avoiding creating multiple EDF
503 : * instances.
504 : *
505 : * This base class also supports unweighted selection which derived classes can use to customize
506 : * behavior. Derived classes can also override how host weight is determined when in weighted mode.
507 : */
508 : class EdfLoadBalancerBase : public ZoneAwareLoadBalancerBase {
509 : public:
510 : using SlowStartConfig = envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig;
511 :
512 : EdfLoadBalancerBase(const PrioritySet& priority_set, const PrioritySet* local_priority_set,
513 : ClusterLbStats& stats, Runtime::Loader& runtime,
514 : Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
515 : const absl::optional<LocalityLbConfig> locality_config,
516 : const absl::optional<SlowStartConfig> slow_start_config,
517 : TimeSource& time_source);
518 :
519 : // Upstream::ZoneAwareLoadBalancerBase
520 : HostConstSharedPtr peekAnotherHost(LoadBalancerContext* context) override;
521 : HostConstSharedPtr chooseHostOnce(LoadBalancerContext* context) override;
522 :
523 : protected:
524 : struct Scheduler {
525 : // EdfScheduler for weighted LB. The edf_ is only created when the original
526 : // host weights of 2 or more hosts differ. When not present, the
527 : // implementation of chooseHostOnce falls back to unweightedHostPick.
528 : std::unique_ptr<EdfScheduler<const Host>> edf_;
529 : };
530 :
531 : void initialize();
532 :
533 : virtual void refresh(uint32_t priority);
534 :
535 : bool isSlowStartEnabled() const;
536 : bool noHostsAreInSlowStart() const;
537 :
538 : virtual void recalculateHostsInSlowStart(const HostVector& hosts_added);
539 :
540 : // Seed to allow us to desynchronize load balancers across a fleet. If we don't
541 : // do this, multiple Envoys that receive an update at the same time (or even
542 : // multiple load balancers on the same host) will send requests to
543 : // backends in roughly lock step, causing significant imbalance and potential
544 : // overload.
545 : const uint64_t seed_;
546 :
547 : double applySlowStartFactor(double host_weight, const Host& host) const;
548 :
549 : private:
550 : friend class EdfLoadBalancerBasePeer;
551 : virtual void refreshHostSource(const HostsSource& source) PURE;
552 : virtual double hostWeight(const Host& host) const PURE;
553 : virtual HostConstSharedPtr unweightedHostPeek(const HostVector& hosts_to_use,
554 : const HostsSource& source) PURE;
555 : virtual HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
556 : const HostsSource& source) PURE;
557 :
558 : // Scheduler for each valid HostsSource.
559 : absl::flat_hash_map<HostsSource, Scheduler, HostsSourceHash> scheduler_;
560 : Common::CallbackHandlePtr priority_update_cb_;
561 : Common::CallbackHandlePtr member_update_cb_;
562 :
563 : protected:
564 : // Slow start related config
565 : const std::chrono::milliseconds slow_start_window_;
566 : const absl::optional<Runtime::Double> aggression_runtime_;
567 : TimeSource& time_source_;
568 : MonotonicTime latest_host_added_time_;
569 : const double slow_start_min_weight_percent_;
570 : };
571 :
572 : /**
573 : * A round robin load balancer. When in weighted mode, EDF scheduling is used. When in not
574 : * weighted mode, simple RR index selection is used.
575 : */
576 : class RoundRobinLoadBalancer : public EdfLoadBalancerBase {
577 : public:
578 : RoundRobinLoadBalancer(
579 : const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
580 : Runtime::Loader& runtime, Random::RandomGenerator& random,
581 : const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config,
582 : OptRef<const envoy::config::cluster::v3::Cluster::RoundRobinLbConfig> round_robin_config,
583 : TimeSource& time_source)
584 : : EdfLoadBalancerBase(
585 : priority_set, local_priority_set, stats, runtime, random,
586 : PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(common_config, healthy_panic_threshold,
587 : 100, 50),
588 : LoadBalancerConfigHelper::localityLbConfigFromCommonLbConfig(common_config),
589 : round_robin_config.has_value()
590 : ? LoadBalancerConfigHelper::slowStartConfigFromLegacyProto(round_robin_config.ref())
591 : : absl::nullopt,
592 396 : time_source) {
593 396 : initialize();
594 396 : }
595 :
596 : RoundRobinLoadBalancer(
597 : const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
598 : Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
599 : const envoy::extensions::load_balancing_policies::round_robin::v3::RoundRobin&
600 : round_robin_config,
601 : TimeSource& time_source)
602 : : EdfLoadBalancerBase(
603 : priority_set, local_priority_set, stats, runtime, random, healthy_panic_threshold,
604 : LoadBalancerConfigHelper::localityLbConfigFromProto(round_robin_config),
605 0 : LoadBalancerConfigHelper::slowStartConfigFromProto(round_robin_config), time_source) {
606 0 : initialize();
607 0 : }
608 :
609 : private:
610 3312 : void refreshHostSource(const HostsSource& source) override {
611 0 : // insert() is used here on purpose so that we don't overwrite the index if the host source
612 0 : // already exists. Note that host sources will never be removed, but given how uncommon this
613 0 : // is it probably doesn't matter.
614 3312 : rr_indexes_.insert({source, seed_});
615 0 : // If the list of hosts changes, the order of picks change. Discard the
616 0 : // index.
617 3312 : peekahead_index_ = 0;
618 3312 : }
619 127544 : double hostWeight(const Host& host) const override {
620 127544 : if (!noHostsAreInSlowStart()) {
621 336 : return applySlowStartFactor(host.weight(), host);
622 336 : }
623 127208 : return host.weight();
624 127544 : }
625 :
626 : HostConstSharedPtr unweightedHostPeek(const HostVector& hosts_to_use,
627 60 : const HostsSource& source) override {
628 60 : auto i = rr_indexes_.find(source);
629 60 : if (i == rr_indexes_.end()) {
630 0 : return nullptr;
631 0 : }
632 60 : return hosts_to_use[(i->second + (peekahead_index_)++) % hosts_to_use.size()];
633 60 : }
634 :
635 : HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
636 340 : const HostsSource& source) override {
637 340 : if (peekahead_index_ > 0) {
638 54 : --peekahead_index_;
639 54 : }
640 0 : // To avoid storing the RR index in the base class, we end up using a second map here with
641 0 : // host source as the key. This means that each LB decision will require two map lookups in
642 0 : // the unweighted case. We might consider trying to optimize this in the future.
643 340 : ASSERT(rr_indexes_.find(source) != rr_indexes_.end());
644 340 : return hosts_to_use[rr_indexes_[source]++ % hosts_to_use.size()];
645 340 : }
646 :
647 : uint64_t peekahead_index_{};
648 : absl::flat_hash_map<HostsSource, uint64_t, HostsSourceHash> rr_indexes_;
649 : };
650 :
651 : /**
652 : * Weighted Least Request load balancer.
653 : *
654 : * In a normal setup when all hosts have the same weight it randomly picks up N healthy hosts
655 : * (where N is specified in the LB configuration) and compares number of active requests. Technique
656 : * is based on http://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf and is known as P2C
657 : * (power of two choices).
658 : *
659 : * When hosts have different weights, an RR EDF schedule is used. Host weight is scaled
660 : * by the number of active requests at pick/insert time. Thus, hosts will never fully drain as
661 : * they would in normal P2C, though they will get picked less and less often. In the future, we
662 : * can consider two alternate algorithms:
663 : * 1) Expand out all hosts by weight (using more memory) and do standard P2C.
664 : * 2) Use a weighted Maglev table, and perform P2C on two random hosts selected from the table.
665 : * The benefit of the Maglev table is at the expense of resolution, memory usage is capped.
666 : * Additionally, the Maglev table can be shared amongst all threads.
667 : */
668 : class LeastRequestLoadBalancer : public EdfLoadBalancerBase {
669 : public:
670 : LeastRequestLoadBalancer(
671 : const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
672 : Runtime::Loader& runtime, Random::RandomGenerator& random,
673 : const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config,
674 : OptRef<const envoy::config::cluster::v3::Cluster::LeastRequestLbConfig> least_request_config,
675 : TimeSource& time_source)
676 : : EdfLoadBalancerBase(
677 : priority_set, local_priority_set, stats, runtime, random,
678 : PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(common_config, healthy_panic_threshold,
679 : 100, 50),
680 : LoadBalancerConfigHelper::localityLbConfigFromCommonLbConfig(common_config),
681 : least_request_config.has_value()
682 : ? LoadBalancerConfigHelper::slowStartConfigFromLegacyProto(
683 : least_request_config.ref())
684 : : absl::nullopt,
685 : time_source),
686 : choice_count_(
687 : least_request_config.has_value()
688 : ? PROTOBUF_GET_WRAPPED_OR_DEFAULT(least_request_config.ref(), choice_count, 2)
689 : : 2),
690 : active_request_bias_runtime_(
691 : least_request_config.has_value() && least_request_config->has_active_request_bias()
692 : ? absl::optional<Runtime::Double>(
693 : {least_request_config->active_request_bias(), runtime})
694 115 : : absl::nullopt) {
695 115 : initialize();
696 115 : }
697 :
698 : LeastRequestLoadBalancer(
699 : const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
700 : Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
701 : const envoy::extensions::load_balancing_policies::least_request::v3::LeastRequest&
702 : least_request_config,
703 : TimeSource& time_source)
704 : : EdfLoadBalancerBase(
705 : priority_set, local_priority_set, stats, runtime, random, healthy_panic_threshold,
706 : LoadBalancerConfigHelper::localityLbConfigFromProto(least_request_config),
707 : LoadBalancerConfigHelper::slowStartConfigFromProto(least_request_config), time_source),
708 : choice_count_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(least_request_config, choice_count, 2)),
709 : active_request_bias_runtime_(
710 : least_request_config.has_active_request_bias()
711 : ? absl::optional<Runtime::Double>(
712 : {least_request_config.active_request_bias(), runtime})
713 0 : : absl::nullopt) {
714 0 : initialize();
715 0 : }
716 :
717 : protected:
718 290 : void refresh(uint32_t priority) override {
719 290 : active_request_bias_ = active_request_bias_runtime_ != absl::nullopt
720 290 : ? active_request_bias_runtime_.value().value()
721 290 : : 1.0;
722 :
723 290 : if (active_request_bias_ < 0.0 || std::isnan(active_request_bias_)) {
724 0 : ENVOY_LOG_MISC(warn,
725 0 : "upstream: invalid active request bias supplied (runtime key {}), using 1.0",
726 0 : active_request_bias_runtime_->runtimeKey());
727 0 : active_request_bias_ = 1.0;
728 0 : }
729 :
730 290 : EdfLoadBalancerBase::refresh(priority);
731 290 : }
732 :
733 : private:
734 1470 : void refreshHostSource(const HostsSource&) override {}
735 : double hostWeight(const Host& host) const override;
736 : HostConstSharedPtr unweightedHostPeek(const HostVector& hosts_to_use,
737 : const HostsSource& source) override;
738 : HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
739 : const HostsSource& source) override;
740 :
741 : const uint32_t choice_count_;
742 :
743 : // The exponent used to calculate host weights can be configured via runtime. We cache it for
744 : // performance reasons and refresh it in `LeastRequestLoadBalancer::refresh(uint32_t priority)`
745 : // whenever a `HostSet` is updated.
746 : double active_request_bias_{};
747 :
748 : const absl::optional<Runtime::Double> active_request_bias_runtime_;
749 : };
750 :
751 : /**
752 : * Random load balancer that picks a random host out of all hosts.
753 : */
754 : class RandomLoadBalancer : public ZoneAwareLoadBalancerBase {
755 : public:
756 : RandomLoadBalancer(const PrioritySet& priority_set, const PrioritySet* local_priority_set,
757 : ClusterLbStats& stats, Runtime::Loader& runtime,
758 : Random::RandomGenerator& random,
759 : const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config)
760 : : ZoneAwareLoadBalancerBase(
761 : priority_set, local_priority_set, stats, runtime, random,
762 : PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(common_config, healthy_panic_threshold,
763 : 100, 50),
764 136 : LoadBalancerConfigHelper::localityLbConfigFromCommonLbConfig(common_config)) {}
765 :
766 : RandomLoadBalancer(
767 : const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
768 : Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
769 : const envoy::extensions::load_balancing_policies::random::v3::Random& random_config)
770 : : ZoneAwareLoadBalancerBase(
771 : priority_set, local_priority_set, stats, runtime, random, healthy_panic_threshold,
772 0 : LoadBalancerConfigHelper::localityLbConfigFromProto(random_config)) {}
773 :
774 : // Upstream::ZoneAwareLoadBalancerBase
775 : HostConstSharedPtr chooseHostOnce(LoadBalancerContext* context) override;
776 : HostConstSharedPtr peekAnotherHost(LoadBalancerContext* context) override;
777 :
778 : protected:
779 : HostConstSharedPtr peekOrChoose(LoadBalancerContext* context, bool peek);
780 : };
781 :
782 : } // namespace Upstream
783 : } // namespace Envoy
|