LCOV - code coverage report
Current view: top level - source/common/upstream - load_balancer_impl.h (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 106 183 57.9 %
Date: 2024-01-05 06:35:25 Functions: 20 45 44.4 %

          Line data    Source code
       1             : #pragma once
       2             : 
       3             : #include <bitset>
       4             : #include <cmath>
       5             : #include <cstdint>
       6             : #include <memory>
       7             : #include <queue>
       8             : #include <set>
       9             : #include <vector>
      10             : 
      11             : #include "envoy/common/callback.h"
      12             : #include "envoy/common/random_generator.h"
      13             : #include "envoy/config/cluster/v3/cluster.pb.h"
      14             : #include "envoy/extensions/load_balancing_policies/least_request/v3/least_request.pb.h"
      15             : #include "envoy/extensions/load_balancing_policies/random/v3/random.pb.h"
      16             : #include "envoy/extensions/load_balancing_policies/round_robin/v3/round_robin.pb.h"
      17             : #include "envoy/runtime/runtime.h"
      18             : #include "envoy/stream_info/stream_info.h"
      19             : #include "envoy/upstream/load_balancer.h"
      20             : #include "envoy/upstream/upstream.h"
      21             : 
      22             : #include "source/common/protobuf/utility.h"
      23             : #include "source/common/runtime/runtime_protos.h"
      24             : #include "source/common/upstream/edf_scheduler.h"
      25             : #include "source/common/upstream/subset_lb_config.h"
      26             : 
      27             : namespace Envoy {
      28             : namespace Upstream {
      29             : 
      30             : // Priority levels and localities are considered overprovisioned with this factor.
      31             : static constexpr uint32_t kDefaultOverProvisioningFactor = 140;
      32             : 
      33             : class LoadBalancerConfigHelper {
      34             : public:
      35             :   template <class LegacyProto>
      36             :   static absl::optional<envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig>
      37         171 :   slowStartConfigFromLegacyProto(const LegacyProto& proto_config) {
      38         171 :     if (!proto_config.has_slow_start_config()) {
      39         135 :       return {};
      40         135 :     }
      41           0 : 
      42          36 :     envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig slow_start_config;
      43          36 :     const auto& legacy_slow_start_config = proto_config.slow_start_config();
      44          36 :     if (legacy_slow_start_config.has_slow_start_window()) {
      45          30 :       *slow_start_config.mutable_slow_start_window() = legacy_slow_start_config.slow_start_window();
      46          30 :     }
      47          36 :     if (legacy_slow_start_config.has_aggression()) {
      48          29 :       *slow_start_config.mutable_aggression() = legacy_slow_start_config.aggression();
      49          29 :     }
      50          36 :     if (legacy_slow_start_config.has_min_weight_percent()) {
      51           5 :       *slow_start_config.mutable_min_weight_percent() =
      52           5 :           legacy_slow_start_config.min_weight_percent();
      53           5 :     }
      54          36 :     return slow_start_config;
      55         171 :   }
      56             : 
      57             :   template <class Proto>
      58             :   static absl::optional<envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig>
      59           0 :   slowStartConfigFromProto(const Proto& proto_config) {
      60           0 :     if (!proto_config.has_slow_start_config()) {
      61           0 :       return {};
      62           0 :     }
      63           0 :     return proto_config.slow_start_config();
      64           0 :   }
      65             : 
      66             :   static absl::optional<envoy::extensions::load_balancing_policies::common::v3::LocalityLbConfig>
      67             :   localityLbConfigFromCommonLbConfig(
      68             :       const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config);
      69             : 
      70             :   template <class Proto>
      71             :   static absl::optional<envoy::extensions::load_balancing_policies::common::v3::LocalityLbConfig>
      72           0 :   localityLbConfigFromProto(const Proto& proto_config) {
      73           0 :     if (!proto_config.has_locality_lb_config()) {
      74           0 :       return {};
      75           0 :     }
      76           0 :     return proto_config.locality_lb_config();
      77           0 :   }
      78             : };
      79             : 
      80             : /**
      81             :  * Base class for all LB implementations.
      82             :  */
      83             : class LoadBalancerBase : public LoadBalancer, protected Logger::Loggable<Logger::Id::upstream> {
      84             : public:
      85             :   enum class HostAvailability { Healthy, Degraded };
      86             : 
      87             :   // A utility function to chose a priority level based on a precomputed hash and
      88             :   // two PriorityLoad vectors, one for healthy load and one for degraded.
      89             :   //
      90             :   // Returns the priority, a number between 0 and per_priority_load.size()-1 as well as which host
      91             :   // availability level was chosen.
      92             :   static std::pair<uint32_t, HostAvailability>
      93             :   choosePriority(uint64_t hash, const HealthyLoad& healthy_per_priority_load,
      94             :                  const DegradedLoad& degraded_per_priority_load);
      95             : 
      96             :   // Pool selection not implemented.
      97             :   absl::optional<Upstream::SelectedPoolAndConnection>
      98             :   selectExistingConnection(Upstream::LoadBalancerContext* /*context*/,
      99             :                            const Upstream::Host& /*host*/,
     100           0 :                            std::vector<uint8_t>& /*hash_key*/) override {
     101           0 :     return absl::nullopt;
     102           0 :   }
     103             :   // Lifetime tracking not implemented.
     104           0 :   OptRef<Envoy::Http::ConnectionPool::ConnectionLifetimeCallbacks> lifetimeCallbacks() override {
     105           0 :     return {};
     106           0 :   }
     107             : 
     108             : protected:
     109             :   /**
     110             :    * For the given host_set @return if we should be in a panic mode or not. For example, if the
     111             :    * majority of hosts are unhealthy we'll be likely in a panic mode. In this case we'll route
     112             :    * requests to hosts regardless of whether they are healthy or not.
     113             :    */
     114             :   bool isHostSetInPanic(const HostSet& host_set) const;
     115             : 
     116             :   /**
     117             :    * Method is called when all host sets are in panic mode.
     118             :    * In such state the load is distributed based on the number of hosts
     119             :    * in given priority regardless of their health.
     120             :    */
     121             :   void recalculateLoadInTotalPanic();
     122             : 
     123             :   LoadBalancerBase(const PrioritySet& priority_set, ClusterLbStats& stats, Runtime::Loader& runtime,
     124             :                    Random::RandomGenerator& random, uint32_t healthy_panic_threshold);
     125             : 
     126             :   // Choose host set randomly, based on the healthy_per_priority_load_ and
     127             :   // degraded_per_priority_load_. per_priority_load_ is consulted first, spilling over to
     128             :   // degraded_per_priority_load_ if necessary. When a host set is selected based on
     129             :   // degraded_per_priority_load_, only degraded hosts should be selected from that host set.
     130             :   //
     131             :   // @return host set to use and which availability to target.
     132             :   std::pair<HostSet&, HostAvailability> chooseHostSet(LoadBalancerContext* context,
     133             :                                                       uint64_t hash) const;
     134             : 
     135           0 :   uint32_t percentageLoad(uint32_t priority) const {
     136           0 :     return per_priority_load_.healthy_priority_load_.get()[priority];
     137           0 :   }
     138           0 :   uint32_t percentageDegradedLoad(uint32_t priority) const {
     139           0 :     return per_priority_load_.degraded_priority_load_.get()[priority];
     140           0 :   }
     141           0 :   bool isInPanic(uint32_t priority) const { return per_priority_panic_[priority]; }
     142         977 :   uint64_t random(bool peeking) {
     143         977 :     if (peeking) {
     144         198 :       stashed_random_.push_back(random_.random());
     145         198 :       return stashed_random_.back();
     146         779 :     } else {
     147         779 :       if (!stashed_random_.empty()) {
     148         170 :         auto random = stashed_random_.front();
     149         170 :         stashed_random_.pop_front();
     150         170 :         return random;
     151         609 :       } else {
     152         609 :         return random_.random();
     153         609 :       }
     154         779 :     }
     155         977 :   }
     156             : 
     157             :   ClusterLbStats& stats_;
     158             :   Runtime::Loader& runtime_;
     159             :   std::deque<uint64_t> stashed_random_;
     160             :   Random::RandomGenerator& random_;
     161             :   const uint32_t default_healthy_panic_percent_;
     162             :   // The priority-ordered set of hosts to use for load balancing.
     163             :   const PrioritySet& priority_set_;
     164             : 
     165             : public:
     166             :   // Called when a host set at the given priority level is updated. This updates
     167             :   // per_priority_health for that priority level, and may update per_priority_load for all
     168             :   // priority levels.
     169             :   void static recalculatePerPriorityState(uint32_t priority, const PrioritySet& priority_set,
     170             :                                           HealthyAndDegradedLoad& priority_load,
     171             :                                           HealthyAvailability& per_priority_health,
     172             :                                           DegradedAvailability& per_priority_degraded,
     173             :                                           uint32_t& total_healthy_hosts);
     174             :   void recalculatePerPriorityPanic();
     175             : 
     176             : protected:
     177             :   // Method calculates normalized total availability.
     178             :   //
     179             :   // The availability of a priority is ratio of available (healthy/degraded) hosts over the total
     180             :   // number of hosts multiplied by 100 and the overprovisioning factor. The total availability is
     181             :   // the sum of the availability of each priority, up to a maximum of 100.
     182             :   //
     183             :   // For example, using the default overprovisioning factor of 1.4, a if priority A has 4 hosts,
     184             :   // of which 1 is degraded and 1 is healthy, it will have availability of 2/4 * 100 * 1.4 = 70.
     185             :   //
     186             :   // Assuming two priorities with availability 60 and 70, the total availability would be 100.
     187             :   static uint32_t
     188             :   calculateNormalizedTotalAvailability(HealthyAvailability& per_priority_health,
     189        2741 :                                        DegradedAvailability& per_priority_degraded) {
     190        2741 :     const auto health =
     191        2741 :         std::accumulate(per_priority_health.get().begin(), per_priority_health.get().end(), 0);
     192        2741 :     const auto degraded =
     193        2741 :         std::accumulate(per_priority_degraded.get().begin(), per_priority_degraded.get().end(), 0);
     194             : 
     195        2741 :     return std::min<uint32_t>(health + degraded, 100);
     196        2741 :   }
     197             : 
     198             :   // The percentage load (0-100) for each priority level when targeting healthy hosts and
     199             :   // the percentage load (0-100) for each priority level when targeting degraded hosts.
     200             :   HealthyAndDegradedLoad per_priority_load_;
     201             :   // The health percentage (0-100) for each priority level.
     202             :   HealthyAvailability per_priority_health_;
     203             :   // The degraded percentage (0-100) for each priority level.
     204             :   DegradedAvailability per_priority_degraded_;
     205             :   // Levels which are in panic
     206             :   std::vector<bool> per_priority_panic_;
     207             :   // The total count of healthy hosts across all priority levels.
     208             :   uint32_t total_healthy_hosts_;
     209             : 
     210             : private:
     211             :   Common::CallbackHandlePtr priority_update_cb_;
     212             : };
     213             : 
     214             : class LoadBalancerContextBase : public LoadBalancerContext {
     215             : public:
     216           0 :   absl::optional<uint64_t> computeHashKey() override { return {}; }
     217             : 
     218           0 :   const Network::Connection* downstreamConnection() const override { return nullptr; }
     219             : 
     220           0 :   const Router::MetadataMatchCriteria* metadataMatchCriteria() override { return nullptr; }
     221             : 
     222           0 :   const StreamInfo::StreamInfo* requestStreamInfo() const override { return nullptr; }
     223             : 
     224           0 :   const Http::RequestHeaderMap* downstreamHeaders() const override { return nullptr; }
     225             : 
     226             :   const HealthyAndDegradedLoad&
     227             :   determinePriorityLoad(const PrioritySet&, const HealthyAndDegradedLoad& original_priority_load,
     228           0 :                         const Upstream::RetryPriority::PriorityMappingFunc&) override {
     229           0 :     return original_priority_load;
     230           0 :   }
     231             : 
     232           0 :   bool shouldSelectAnotherHost(const Host&) override { return false; }
     233             : 
     234           0 :   uint32_t hostSelectionRetryCount() const override { return 1; }
     235             : 
     236           0 :   Network::Socket::OptionsSharedPtr upstreamSocketOptions() const override { return nullptr; }
     237             : 
     238           0 :   Network::TransportSocketOptionsConstSharedPtr upstreamTransportSocketOptions() const override {
     239           0 :     return nullptr;
     240           0 :   }
     241             : 
     242           0 :   absl::optional<OverrideHost> overrideHostToSelect() const override { return {}; }
     243             : };
     244             : 
     245             : /**
     246             :  * Base class for zone aware load balancers
     247             :  */
     248             : class ZoneAwareLoadBalancerBase : public LoadBalancerBase {
     249             : public:
     250             :   using LocalityLbConfig = envoy::extensions::load_balancing_policies::common::v3::LocalityLbConfig;
     251             : 
     252             :   HostConstSharedPtr chooseHost(LoadBalancerContext* context) override;
     253             : 
     254             : protected:
     255             :   // Both priority_set and local_priority_set if non-null must have at least one host set.
     256             :   ZoneAwareLoadBalancerBase(const PrioritySet& priority_set, const PrioritySet* local_priority_set,
     257             :                             ClusterLbStats& stats, Runtime::Loader& runtime,
     258             :                             Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
     259             :                             const absl::optional<LocalityLbConfig> locality_config);
     260             : 
     261             :   // When deciding which hosts to use on an LB decision, we need to know how to index into the
     262             :   // priority_set. This priority_set cursor is used by ZoneAwareLoadBalancerBase subclasses, e.g.
     263             :   // RoundRobinLoadBalancer, to index into auxiliary data structures specific to the LB for
     264             :   // a given host set selection.
     265             :   struct HostsSource {
     266             :     enum class SourceType : uint8_t {
     267             :       // All hosts in the host set.
     268             :       AllHosts,
     269             :       // All healthy hosts in the host set.
     270             :       HealthyHosts,
     271             :       // All degraded hosts in the host set.
     272             :       DegradedHosts,
     273             :       // Healthy hosts for locality @ locality_index.
     274             :       LocalityHealthyHosts,
     275             :       // Degraded hosts for locality @ locality_index.
     276             :       LocalityDegradedHosts,
     277             :     };
     278             : 
     279         977 :     HostsSource() = default;
     280             : 
     281             :     // TODO(kbaichoo): plumb the priority parameter as uint8_t all the way from
     282             :     // the config.
     283             :     HostsSource(uint32_t priority, SourceType source_type)
     284        3222 :         : priority_(static_cast<uint8_t>(priority)), source_type_(source_type) {
     285        3222 :       ASSERT(priority <= 128, "Priority out of bounds.");
     286        3222 :       ASSERT(source_type == SourceType::AllHosts || source_type == SourceType::HealthyHosts ||
     287        3222 :              source_type == SourceType::DegradedHosts);
     288        3222 :     }
     289             : 
     290             :     // TODO(kbaichoo): plumb the priority parameter as uint8_t all the way from
     291             :     // the config.
     292             :     HostsSource(uint32_t priority, SourceType source_type, uint32_t locality_index)
     293             :         : priority_(static_cast<uint8_t>(priority)), source_type_(source_type),
     294        1560 :           locality_index_(locality_index) {
     295        1560 :       ASSERT(priority <= 128, "Priority out of bounds.");
     296        1560 :       ASSERT(source_type == SourceType::LocalityHealthyHosts ||
     297        1560 :              source_type == SourceType::LocalityDegradedHosts);
     298        1560 :     }
     299             : 
     300             :     // Priority in PrioritySet.
     301             :     uint8_t priority_{};
     302             : 
     303             :     // How to index into HostSet for a given priority.
     304             :     SourceType source_type_{};
     305             : 
     306             :     // Locality index into HostsPerLocality for SourceType::LocalityHealthyHosts.
     307             :     uint32_t locality_index_{};
     308             : 
     309        3803 :     bool operator==(const HostsSource& other) const {
     310        3803 :       return priority_ == other.priority_ && source_type_ == other.source_type_ &&
     311        3803 :              locality_index_ == other.locality_index_;
     312        3803 :     }
     313             :   };
     314             : 
     315             :   struct HostsSourceHash {
     316       13680 :     size_t operator()(const HostsSource& hs) const {
     317             :       // This is only used for absl::flat_hash_map keys, so we don't need a deterministic hash.
     318       13680 :       size_t hash = std::hash<uint32_t>()(hs.priority_);
     319       13680 :       hash = 37 * hash + std::hash<size_t>()(static_cast<std::size_t>(hs.source_type_));
     320       13680 :       hash = 37 * hash + std::hash<uint32_t>()(hs.locality_index_);
     321       13680 :       return hash;
     322       13680 :     }
     323             :   };
     324             : 
     325             :   /**
     326             :    * By implementing this method instead of chooseHost, host selection will
     327             :    * be subject to host filters specified by LoadBalancerContext.
     328             :    *
     329             :    * Host selection will be retried up to the number specified by
     330             :    * hostSelectionRetryCount on LoadBalancerContext, and if no hosts are found
     331             :    * within the allowed attempts, the host that was selected during the last
     332             :    * attempt will be returned.
     333             :    *
     334             :    * If host selection is idempotent (i.e. retrying will not change the outcome),
     335             :    * sub classes should override chooseHost to avoid the unnecessary overhead of
     336             :    * retrying host selection.
     337             :    */
     338             :   virtual HostConstSharedPtr chooseHostOnce(LoadBalancerContext* context) PURE;
     339             : 
     340             :   /**
     341             :    * Pick the host source to use, doing zone aware routing when the hosts are sufficiently healthy.
     342             :    * If no host is chosen (due to fail_traffic_on_panic being set), return absl::nullopt.
     343             :    */
     344             :   absl::optional<HostsSource> hostSourceToUse(LoadBalancerContext* context, uint64_t hash) const;
     345             : 
     346             :   /**
     347             :    * Index into priority_set via hosts source descriptor.
     348             :    */
     349             :   const HostVector& hostSourceToHosts(HostsSource hosts_source) const;
     350             : 
     351             : private:
     352             :   enum class LocalityRoutingState {
     353             :     // Locality based routing is off.
     354             :     NoLocalityRouting,
     355             :     // All queries can be routed to the local locality.
     356             :     LocalityDirect,
     357             :     // The local locality can not handle the anticipated load. Residual load will be spread across
     358             :     // various other localities.
     359             :     LocalityResidual
     360             :   };
     361             : 
     362             :   struct LocalityPercentages {
     363             :     // The percentage of local hosts in a specific locality.
     364             :     // Percentage is stored as integer number and scaled by 10000 multiplier for better precision.
     365             :     // If upstream_percentage is 0, local_percentage may not be representative
     366             :     // of the actual percentage and will be set to 0.
     367             :     uint64_t local_percentage;
     368             :     // The percentage of upstream hosts in a specific locality.
     369             :     // Percentage is stored as integer number and scaled by 10000 multiplier for better precision.
     370             :     uint64_t upstream_percentage;
     371             :   };
     372             : 
     373             :   /**
     374             :    * Increase per_priority_state_ to at least priority_set.hostSetsPerPriority().size()
     375             :    */
     376             :   void resizePerPriorityState();
     377             : 
     378             :   /**
     379             :    * @return decision on quick exit from locality aware routing based on cluster configuration.
     380             :    * This gets recalculated on update callback.
     381             :    */
     382             :   bool earlyExitNonLocalityRoutingNew();
     383             : 
     384             :   /**
     385             :    * @return decision on quick exit from locality aware routing based on cluster configuration.
     386             :    * This gets recalculated on update callback.
     387             :    *
     388             :    * This is the legacy version of the function from previous versions of Envoy, kept temporarily
     389             :    * as an alternate code-path to reduce the risk of changes.
     390             :    */
     391             :   bool earlyExitNonLocalityRouting();
     392             : 
     393             :   /**
     394             :    * Try to select upstream hosts from the same locality.
     395             :    * @param host_set the last host set returned by chooseHostSet()
     396             :    */
     397             :   uint32_t tryChooseLocalLocalityHosts(const HostSet& host_set) const;
     398             : 
     399             :   /**
     400             :    * @return combined per-locality information about percentages of local/upstream hosts in each
     401             :    * upstream locality. See LocalityPercentages for more details. The ordering of localities
     402             :    * matches the ordering of upstream localities in the input upstream_hosts_per_locality.
     403             :    */
     404             :   absl::FixedArray<LocalityPercentages>
     405             :   calculateLocalityPercentagesNew(const HostsPerLocality& local_hosts_per_locality,
     406             :                                   const HostsPerLocality& upstream_hosts_per_locality);
     407             : 
     408             :   /**
     409             :    * @return (number of hosts in a given locality)/(total number of hosts) in `ret` param.
     410             :    * The result is stored as integer number and scaled by 10000 multiplier for better precision.
     411             :    * Caller is responsible for allocation/de-allocation of `ret`.
     412             :    *
     413             :    * This is the legacy version of the function from previous versions of Envoy, kept temporarily
     414             :    * as an alternate code-path to reduce the risk of changes.
     415             :    */
     416             :   void calculateLocalityPercentage(const HostsPerLocality& hosts_per_locality, uint64_t* ret);
     417             : 
     418             :   /**
     419             :    * Regenerate locality aware routing structures for fast decisions on upstream locality selection.
     420             :    */
     421             :   void regenerateLocalityRoutingStructuresNew();
     422             : 
     423             :   /**
     424             :    * Regenerate locality aware routing structures for fast decisions on upstream locality selection.
     425             :    *
     426             :    * This is the legacy version of the function from previous versions of Envoy, kept temporarily
     427             :    * as an alternate code-path to reduce the risk of changes.
     428             :    */
     429             :   void regenerateLocalityRoutingStructures();
     430             : 
     431         164 :   HostSet& localHostSet() const { return *local_priority_set_->hostSetsPerPriority()[0]; }
     432             : 
     433             :   static absl::optional<HostsSource::SourceType>
     434           0 :   localitySourceType(HostAvailability host_availability) {
     435           0 :     switch (host_availability) {
     436           0 :     case HostAvailability::Healthy:
     437           0 :       return absl::make_optional<HostsSource::SourceType>(
     438           0 :           HostsSource::SourceType::LocalityHealthyHosts);
     439           0 :     case HostAvailability::Degraded:
     440           0 :       return absl::make_optional<HostsSource::SourceType>(
     441           0 :           HostsSource::SourceType::LocalityDegradedHosts);
     442           0 :     }
     443           0 :     IS_ENVOY_BUG("unexpected locality source type enum");
     444           0 :     return absl::nullopt;
     445           0 :   }
     446             : 
     447         611 :   static absl::optional<HostsSource::SourceType> sourceType(HostAvailability host_availability) {
     448         611 :     switch (host_availability) {
     449         572 :     case HostAvailability::Healthy:
     450         572 :       return absl::make_optional<HostsSource::SourceType>(HostsSource::SourceType::HealthyHosts);
     451          39 :     case HostAvailability::Degraded:
     452          39 :       return absl::make_optional<HostsSource::SourceType>(HostsSource::SourceType::DegradedHosts);
     453         611 :     }
     454             : 
     455           0 :     IS_ENVOY_BUG("unexpected source type enum");
     456           0 :     return absl::nullopt;
     457         611 :   }
     458             : 
     459             :   // The set of local Envoy instances which are load balancing across priority_set_.
     460             :   const PrioritySet* local_priority_set_;
     461             : 
     462             :   struct PerPriorityState {
     463             :     // The percent of requests which can be routed to the local locality.
     464             :     uint64_t local_percent_to_route_{};
     465             :     // Tracks the current state of locality based routing.
     466             :     LocalityRoutingState locality_routing_state_{LocalityRoutingState::NoLocalityRouting};
     467             :     // When locality_routing_state_ == LocalityResidual this tracks the capacity
     468             :     // for each of the non-local localities to determine what traffic should be
     469             :     // routed where.
     470             :     std::vector<uint64_t> residual_capacity_;
     471             :   };
     472             :   using PerPriorityStatePtr = std::unique_ptr<PerPriorityState>;
     473             :   // Routing state broken out for each priority level in priority_set_.
     474             :   std::vector<PerPriorityStatePtr> per_priority_state_;
     475             :   Common::CallbackHandlePtr priority_update_cb_;
     476             :   Common::CallbackHandlePtr local_priority_set_member_update_cb_handle_;
     477             : 
     478             :   // Config for zone aware routing.
     479             :   const uint64_t min_cluster_size_;
     480             :   // Keep small members (bools and enums) at the end of class, to reduce alignment overhead.
     481             :   const uint32_t routing_enabled_;
     482             :   const bool fail_traffic_on_panic_ : 1;
     483             :   const bool use_new_locality_routing_ : 1;
     484             : 
     485             :   // If locality weight aware routing is enabled.
     486             :   const bool locality_weighted_balancing_ : 1;
     487             : 
     488             :   friend class TestZoneAwareLoadBalancer;
     489             : };
     490             : 
     491             : /**
     492             :  * Base implementation of LoadBalancer that performs weighted RR selection across the hosts in the
     493             :  * cluster. This scheduler respects host weighting and utilizes an EdfScheduler to achieve O(log
     494             :  * n) pick and insertion time complexity, O(n) memory use. The key insight is that if we schedule
     495             :  * with 1 / weight deadline, we will achieve the desired pick frequency for weighted RR in a given
     496             :  * interval. Naive implementations of weighted RR are either O(n) pick time or O(m * n) memory use,
     497             :  * where m is the weight range. We also explicitly check for the unweighted special case and use a
     498             :  * simple index to achieve O(1) scheduling in that case.
     499             :  * TODO(htuch): We use EDF at Google, but the EDF scheduler may be overkill if we don't want to
     500             :  * support large ranges of weights or arbitrary precision floating weights, we could construct an
     501             :  * explicit schedule, since m will be a small constant factor in O(m * n). This
     502             :  * could also be done on a thread aware LB, avoiding creating multiple EDF
     503             :  * instances.
     504             :  *
     505             :  * This base class also supports unweighted selection which derived classes can use to customize
     506             :  * behavior. Derived classes can also override how host weight is determined when in weighted mode.
     507             :  */
     508             : class EdfLoadBalancerBase : public ZoneAwareLoadBalancerBase {
     509             : public:
     510             :   using SlowStartConfig = envoy::extensions::load_balancing_policies::common::v3::SlowStartConfig;
     511             : 
     512             :   EdfLoadBalancerBase(const PrioritySet& priority_set, const PrioritySet* local_priority_set,
     513             :                       ClusterLbStats& stats, Runtime::Loader& runtime,
     514             :                       Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
     515             :                       const absl::optional<LocalityLbConfig> locality_config,
     516             :                       const absl::optional<SlowStartConfig> slow_start_config,
     517             :                       TimeSource& time_source);
     518             : 
     519             :   // Upstream::ZoneAwareLoadBalancerBase
     520             :   HostConstSharedPtr peekAnotherHost(LoadBalancerContext* context) override;
     521             :   HostConstSharedPtr chooseHostOnce(LoadBalancerContext* context) override;
     522             : 
     523             : protected:
     524             :   struct Scheduler {
     525             :     // EdfScheduler for weighted LB. The edf_ is only created when the original
     526             :     // host weights of 2 or more hosts differ. When not present, the
     527             :     // implementation of chooseHostOnce falls back to unweightedHostPick.
     528             :     std::unique_ptr<EdfScheduler<const Host>> edf_;
     529             :   };
     530             : 
     531             :   void initialize();
     532             : 
     533             :   virtual void refresh(uint32_t priority);
     534             : 
     535             :   bool isSlowStartEnabled() const;
     536             :   bool noHostsAreInSlowStart() const;
     537             : 
     538             :   virtual void recalculateHostsInSlowStart(const HostVector& hosts_added);
     539             : 
     540             :   // Seed to allow us to desynchronize load balancers across a fleet. If we don't
     541             :   // do this, multiple Envoys that receive an update at the same time (or even
     542             :   // multiple load balancers on the same host) will send requests to
     543             :   // backends in roughly lock step, causing significant imbalance and potential
     544             :   // overload.
     545             :   const uint64_t seed_;
     546             : 
     547             :   double applySlowStartFactor(double host_weight, const Host& host) const;
     548             : 
     549             : private:
     550             :   friend class EdfLoadBalancerBasePeer;
     551             :   virtual void refreshHostSource(const HostsSource& source) PURE;
     552             :   virtual double hostWeight(const Host& host) const PURE;
     553             :   virtual HostConstSharedPtr unweightedHostPeek(const HostVector& hosts_to_use,
     554             :                                                 const HostsSource& source) PURE;
     555             :   virtual HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
     556             :                                                 const HostsSource& source) PURE;
     557             : 
     558             :   // Scheduler for each valid HostsSource.
     559             :   absl::flat_hash_map<HostsSource, Scheduler, HostsSourceHash> scheduler_;
     560             :   Common::CallbackHandlePtr priority_update_cb_;
     561             :   Common::CallbackHandlePtr member_update_cb_;
     562             : 
     563             : protected:
     564             :   // Slow start related config
     565             :   const std::chrono::milliseconds slow_start_window_;
     566             :   const absl::optional<Runtime::Double> aggression_runtime_;
     567             :   TimeSource& time_source_;
     568             :   MonotonicTime latest_host_added_time_;
     569             :   const double slow_start_min_weight_percent_;
     570             : };
     571             : 
     572             : /**
     573             :  * A round robin load balancer. When in weighted mode, EDF scheduling is used. When in not
     574             :  * weighted mode, simple RR index selection is used.
     575             :  */
     576             : class RoundRobinLoadBalancer : public EdfLoadBalancerBase {
     577             : public:
     578             :   RoundRobinLoadBalancer(
     579             :       const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
     580             :       Runtime::Loader& runtime, Random::RandomGenerator& random,
     581             :       const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config,
     582             :       OptRef<const envoy::config::cluster::v3::Cluster::RoundRobinLbConfig> round_robin_config,
     583             :       TimeSource& time_source)
     584             :       : EdfLoadBalancerBase(
     585             :             priority_set, local_priority_set, stats, runtime, random,
     586             :             PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(common_config, healthy_panic_threshold,
     587             :                                                            100, 50),
     588             :             LoadBalancerConfigHelper::localityLbConfigFromCommonLbConfig(common_config),
     589             :             round_robin_config.has_value()
     590             :                 ? LoadBalancerConfigHelper::slowStartConfigFromLegacyProto(round_robin_config.ref())
     591             :                 : absl::nullopt,
     592         396 :             time_source) {
     593         396 :     initialize();
     594         396 :   }
     595             : 
     596             :   RoundRobinLoadBalancer(
     597             :       const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
     598             :       Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
     599             :       const envoy::extensions::load_balancing_policies::round_robin::v3::RoundRobin&
     600             :           round_robin_config,
     601             :       TimeSource& time_source)
     602             :       : EdfLoadBalancerBase(
     603             :             priority_set, local_priority_set, stats, runtime, random, healthy_panic_threshold,
     604             :             LoadBalancerConfigHelper::localityLbConfigFromProto(round_robin_config),
     605           0 :             LoadBalancerConfigHelper::slowStartConfigFromProto(round_robin_config), time_source) {
     606           0 :     initialize();
     607           0 :   }
     608             : 
     609             : private:
     610        3312 :   void refreshHostSource(const HostsSource& source) override {
     611           0 :     // insert() is used here on purpose so that we don't overwrite the index if the host source
     612           0 :     // already exists. Note that host sources will never be removed, but given how uncommon this
     613           0 :     // is it probably doesn't matter.
     614        3312 :     rr_indexes_.insert({source, seed_});
     615           0 :     // If the list of hosts changes, the order of picks change. Discard the
     616           0 :     // index.
     617        3312 :     peekahead_index_ = 0;
     618        3312 :   }
     619      127544 :   double hostWeight(const Host& host) const override {
     620      127544 :     if (!noHostsAreInSlowStart()) {
     621         336 :       return applySlowStartFactor(host.weight(), host);
     622         336 :     }
     623      127208 :     return host.weight();
     624      127544 :   }
     625             : 
     626             :   HostConstSharedPtr unweightedHostPeek(const HostVector& hosts_to_use,
     627          60 :                                         const HostsSource& source) override {
     628          60 :     auto i = rr_indexes_.find(source);
     629          60 :     if (i == rr_indexes_.end()) {
     630           0 :       return nullptr;
     631           0 :     }
     632          60 :     return hosts_to_use[(i->second + (peekahead_index_)++) % hosts_to_use.size()];
     633          60 :   }
     634             : 
     635             :   HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
     636         340 :                                         const HostsSource& source) override {
     637         340 :     if (peekahead_index_ > 0) {
     638          54 :       --peekahead_index_;
     639          54 :     }
     640           0 :     // To avoid storing the RR index in the base class, we end up using a second map here with
     641           0 :     // host source as the key. This means that each LB decision will require two map lookups in
     642           0 :     // the unweighted case. We might consider trying to optimize this in the future.
     643         340 :     ASSERT(rr_indexes_.find(source) != rr_indexes_.end());
     644         340 :     return hosts_to_use[rr_indexes_[source]++ % hosts_to_use.size()];
     645         340 :   }
     646             : 
     647             :   uint64_t peekahead_index_{};
     648             :   absl::flat_hash_map<HostsSource, uint64_t, HostsSourceHash> rr_indexes_;
     649             : };
     650             : 
     651             : /**
     652             :  * Weighted Least Request load balancer.
     653             :  *
     654             :  * In a normal setup when all hosts have the same weight it randomly picks up N healthy hosts
     655             :  * (where N is specified in the LB configuration) and compares number of active requests. Technique
     656             :  * is based on http://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf and is known as P2C
     657             :  * (power of two choices).
     658             :  *
     659             :  * When hosts have different weights, an RR EDF schedule is used. Host weight is scaled
     660             :  * by the number of active requests at pick/insert time. Thus, hosts will never fully drain as
     661             :  * they would in normal P2C, though they will get picked less and less often. In the future, we
     662             :  * can consider two alternate algorithms:
     663             :  * 1) Expand out all hosts by weight (using more memory) and do standard P2C.
     664             :  * 2) Use a weighted Maglev table, and perform P2C on two random hosts selected from the table.
     665             :  *    The benefit of the Maglev table is at the expense of resolution, memory usage is capped.
     666             :  *    Additionally, the Maglev table can be shared amongst all threads.
     667             :  */
     668             : class LeastRequestLoadBalancer : public EdfLoadBalancerBase {
     669             : public:
     670             :   LeastRequestLoadBalancer(
     671             :       const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
     672             :       Runtime::Loader& runtime, Random::RandomGenerator& random,
     673             :       const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config,
     674             :       OptRef<const envoy::config::cluster::v3::Cluster::LeastRequestLbConfig> least_request_config,
     675             :       TimeSource& time_source)
     676             :       : EdfLoadBalancerBase(
     677             :             priority_set, local_priority_set, stats, runtime, random,
     678             :             PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(common_config, healthy_panic_threshold,
     679             :                                                            100, 50),
     680             :             LoadBalancerConfigHelper::localityLbConfigFromCommonLbConfig(common_config),
     681             :             least_request_config.has_value()
     682             :                 ? LoadBalancerConfigHelper::slowStartConfigFromLegacyProto(
     683             :                       least_request_config.ref())
     684             :                 : absl::nullopt,
     685             :             time_source),
     686             :         choice_count_(
     687             :             least_request_config.has_value()
     688             :                 ? PROTOBUF_GET_WRAPPED_OR_DEFAULT(least_request_config.ref(), choice_count, 2)
     689             :                 : 2),
     690             :         active_request_bias_runtime_(
     691             :             least_request_config.has_value() && least_request_config->has_active_request_bias()
     692             :                 ? absl::optional<Runtime::Double>(
     693             :                       {least_request_config->active_request_bias(), runtime})
     694         115 :                 : absl::nullopt) {
     695         115 :     initialize();
     696         115 :   }
     697             : 
     698             :   LeastRequestLoadBalancer(
     699             :       const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
     700             :       Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
     701             :       const envoy::extensions::load_balancing_policies::least_request::v3::LeastRequest&
     702             :           least_request_config,
     703             :       TimeSource& time_source)
     704             :       : EdfLoadBalancerBase(
     705             :             priority_set, local_priority_set, stats, runtime, random, healthy_panic_threshold,
     706             :             LoadBalancerConfigHelper::localityLbConfigFromProto(least_request_config),
     707             :             LoadBalancerConfigHelper::slowStartConfigFromProto(least_request_config), time_source),
     708             :         choice_count_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(least_request_config, choice_count, 2)),
     709             :         active_request_bias_runtime_(
     710             :             least_request_config.has_active_request_bias()
     711             :                 ? absl::optional<Runtime::Double>(
     712             :                       {least_request_config.active_request_bias(), runtime})
     713           0 :                 : absl::nullopt) {
     714           0 :     initialize();
     715           0 :   }
     716             : 
     717             : protected:
     718         290 :   void refresh(uint32_t priority) override {
     719         290 :     active_request_bias_ = active_request_bias_runtime_ != absl::nullopt
     720         290 :                                ? active_request_bias_runtime_.value().value()
     721         290 :                                : 1.0;
     722             : 
     723         290 :     if (active_request_bias_ < 0.0 || std::isnan(active_request_bias_)) {
     724           0 :       ENVOY_LOG_MISC(warn,
     725           0 :                      "upstream: invalid active request bias supplied (runtime key {}), using 1.0",
     726           0 :                      active_request_bias_runtime_->runtimeKey());
     727           0 :       active_request_bias_ = 1.0;
     728           0 :     }
     729             : 
     730         290 :     EdfLoadBalancerBase::refresh(priority);
     731         290 :   }
     732             : 
     733             : private:
     734        1470 :   void refreshHostSource(const HostsSource&) override {}
     735             :   double hostWeight(const Host& host) const override;
     736             :   HostConstSharedPtr unweightedHostPeek(const HostVector& hosts_to_use,
     737             :                                         const HostsSource& source) override;
     738             :   HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
     739             :                                         const HostsSource& source) override;
     740             : 
     741             :   const uint32_t choice_count_;
     742             : 
     743             :   // The exponent used to calculate host weights can be configured via runtime. We cache it for
     744             :   // performance reasons and refresh it in `LeastRequestLoadBalancer::refresh(uint32_t priority)`
     745             :   // whenever a `HostSet` is updated.
     746             :   double active_request_bias_{};
     747             : 
     748             :   const absl::optional<Runtime::Double> active_request_bias_runtime_;
     749             : };
     750             : 
     751             : /**
     752             :  * Random load balancer that picks a random host out of all hosts.
     753             :  */
     754             : class RandomLoadBalancer : public ZoneAwareLoadBalancerBase {
     755             : public:
     756             :   RandomLoadBalancer(const PrioritySet& priority_set, const PrioritySet* local_priority_set,
     757             :                      ClusterLbStats& stats, Runtime::Loader& runtime,
     758             :                      Random::RandomGenerator& random,
     759             :                      const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config)
     760             :       : ZoneAwareLoadBalancerBase(
     761             :             priority_set, local_priority_set, stats, runtime, random,
     762             :             PROTOBUF_PERCENT_TO_ROUNDED_INTEGER_OR_DEFAULT(common_config, healthy_panic_threshold,
     763             :                                                            100, 50),
     764         136 :             LoadBalancerConfigHelper::localityLbConfigFromCommonLbConfig(common_config)) {}
     765             : 
     766             :   RandomLoadBalancer(
     767             :       const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterLbStats& stats,
     768             :       Runtime::Loader& runtime, Random::RandomGenerator& random, uint32_t healthy_panic_threshold,
     769             :       const envoy::extensions::load_balancing_policies::random::v3::Random& random_config)
     770             :       : ZoneAwareLoadBalancerBase(
     771             :             priority_set, local_priority_set, stats, runtime, random, healthy_panic_threshold,
     772           0 :             LoadBalancerConfigHelper::localityLbConfigFromProto(random_config)) {}
     773             : 
     774             :   // Upstream::ZoneAwareLoadBalancerBase
     775             :   HostConstSharedPtr chooseHostOnce(LoadBalancerContext* context) override;
     776             :   HostConstSharedPtr peekAnotherHost(LoadBalancerContext* context) override;
     777             : 
     778             : protected:
     779             :   HostConstSharedPtr peekOrChoose(LoadBalancerContext* context, bool peek);
     780             : };
     781             : 
     782             : } // namespace Upstream
     783             : } // namespace Envoy

Generated by: LCOV version 1.15