LCOV - code coverage report
Current view: top level - source/common/upstream - outlier_detection_impl.h (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 0 126 0.0 %
Date: 2024-01-05 06:35:25 Functions: 0 60 0.0 %

          Line data    Source code
       1             : #pragma once
       2             : 
       3             : #include <atomic>
       4             : #include <chrono>
       5             : #include <cstdint>
       6             : #include <list>
       7             : #include <memory>
       8             : #include <string>
       9             : #include <vector>
      10             : 
      11             : #include "envoy/access_log/access_log.h"
      12             : #include "envoy/common/callback.h"
      13             : #include "envoy/common/time.h"
      14             : #include "envoy/config/cluster/v3/cluster.pb.h"
      15             : #include "envoy/config/cluster/v3/outlier_detection.pb.h"
      16             : #include "envoy/data/cluster/v3/outlier_detection_event.pb.h"
      17             : #include "envoy/event/timer.h"
      18             : #include "envoy/http/codes.h"
      19             : #include "envoy/runtime/runtime.h"
      20             : #include "envoy/stats/scope.h"
      21             : #include "envoy/stats/stats.h"
      22             : #include "envoy/upstream/outlier_detection.h"
      23             : 
      24             : #include "source/common/upstream/upstream_impl.h"
      25             : 
      26             : #include "absl/container/node_hash_map.h"
      27             : 
      28             : namespace Envoy {
      29             : namespace Upstream {
      30             : namespace Outlier {
      31             : 
      32             : /**
      33             :  * Factory for creating a detector from a proto configuration.
      34             :  */
      35             : class DetectorImplFactory {
      36             : public:
      37             :   static absl::StatusOr<DetectorSharedPtr>
      38             :   createForCluster(Cluster& cluster, const envoy::config::cluster::v3::Cluster& cluster_config,
      39             :                    Event::Dispatcher& dispatcher, Runtime::Loader& runtime,
      40             :                    EventLoggerSharedPtr event_logger, Random::RandomGenerator& random);
      41             : };
      42             : 
      43             : /**
      44             :  * Thin struct to facilitate calculations for success rate outlier detection.
      45             :  */
      46             : struct HostSuccessRatePair {
      47             :   HostSuccessRatePair(HostSharedPtr host, double success_rate)
      48           0 :       : host_(host), success_rate_(success_rate) {}
      49             :   HostSharedPtr host_;
      50             :   double success_rate_;
      51             : };
      52             : 
      53             : struct SuccessRateAccumulatorBucket {
      54             :   std::atomic<uint64_t> success_request_counter_;
      55             :   std::atomic<uint64_t> total_request_counter_;
      56             : };
      57             : 
      58             : /**
      59             :  * The SuccessRateAccumulator uses the SuccessRateAccumulatorBucket to get per host success rate
      60             :  * stats. This implementation has a fixed window size of time, and thus only needs a
      61             :  * bucket to write to, and a bucket to accumulate/run stats over.
      62             :  */
      63             : class SuccessRateAccumulator {
      64             : public:
      65             :   SuccessRateAccumulator()
      66             :       : current_success_rate_bucket_(new SuccessRateAccumulatorBucket()),
      67           0 :         backup_success_rate_bucket_(new SuccessRateAccumulatorBucket()) {}
      68             : 
      69             :   /**
      70             :    * This function updates the bucket to write data to.
      71             :    * @return a pointer to the SuccessRateAccumulatorBucket.
      72             :    */
      73             :   SuccessRateAccumulatorBucket* updateCurrentWriter();
      74             :   /**
      75             :    * This function returns the success rate of a host over a window of time if the request volume is
      76             :    * high enough. The underlying window of time could be dynamically adjusted. In the current
      77             :    * implementation it is a fixed time window.
      78             :    * @param success_rate_request_volume the threshold of requests an accumulator has to have in
      79             :    *                                    order to be able to return a significant success rate value.
      80             :    * @return a valid absl::optional<double> with the success rate. If there were not enough
      81             :    * requests, an invalid absl::optional<double> is returned.
      82             :    */
      83             :   absl::optional<std::pair<double, uint64_t>> getSuccessRateAndVolume();
      84             : 
      85             : private:
      86             :   std::unique_ptr<SuccessRateAccumulatorBucket> current_success_rate_bucket_;
      87             :   std::unique_ptr<SuccessRateAccumulatorBucket> backup_success_rate_bucket_;
      88             : };
      89             : 
      90             : class SuccessRateMonitor {
      91             : public:
      92             :   SuccessRateMonitor(envoy::data::cluster::v3::OutlierEjectionType ejection_type)
      93           0 :       : ejection_type_(ejection_type) {
      94           0 :     // Point the success_rate_accumulator_bucket_ pointer to a bucket.
      95           0 :     updateCurrentSuccessRateBucket();
      96           0 :   }
      97           0 :   double getSuccessRate() const { return success_rate_; }
      98           0 :   SuccessRateAccumulator& successRateAccumulator() { return success_rate_accumulator_; }
      99           0 :   void setSuccessRate(double new_success_rate) { success_rate_ = new_success_rate; }
     100           0 :   void updateCurrentSuccessRateBucket() {
     101           0 :     success_rate_accumulator_bucket_.store(success_rate_accumulator_.updateCurrentWriter());
     102           0 :   }
     103           0 :   void incTotalReqCounter() { success_rate_accumulator_bucket_.load()->total_request_counter_++; }
     104           0 :   void incSuccessReqCounter() {
     105           0 :     success_rate_accumulator_bucket_.load()->success_request_counter_++;
     106           0 :   }
     107             : 
     108           0 :   envoy::data::cluster::v3::OutlierEjectionType getEjectionType() const { return ejection_type_; }
     109             : 
     110             : private:
     111             :   SuccessRateAccumulator success_rate_accumulator_;
     112             :   std::atomic<SuccessRateAccumulatorBucket*> success_rate_accumulator_bucket_;
     113             :   envoy::data::cluster::v3::OutlierEjectionType ejection_type_;
     114             :   double success_rate_{-1};
     115             : };
     116             : 
     117             : class DetectorImpl;
     118             : 
     119             : /**
     120             :  * Implementation of DetectorHostMonitor for the generic detector.
     121             :  */
     122             : class DetectorHostMonitorImpl : public DetectorHostMonitor {
     123             : public:
     124             :   DetectorHostMonitorImpl(std::shared_ptr<DetectorImpl> detector, HostSharedPtr host);
     125             : 
     126             :   void eject(MonotonicTime ejection_time);
     127             :   void uneject(MonotonicTime ejection_time);
     128             : 
     129           0 :   uint32_t& ejectTimeBackoff() { return eject_time_backoff_; }
     130             : 
     131           0 :   void resetConsecutive5xx() { consecutive_5xx_ = 0; }
     132           0 :   void resetConsecutiveGatewayFailure() { consecutive_gateway_failure_ = 0; }
     133           0 :   void resetConsecutiveLocalOriginFailure() { consecutive_local_origin_failure_ = 0; }
     134             :   static absl::optional<Http::Code> resultToHttpCode(Result result);
     135             : 
     136             :   // Upstream::Outlier::DetectorHostMonitor
     137           0 :   uint32_t numEjections() override { return num_ejections_; }
     138             :   void putHttpResponseCode(uint64_t response_code) override;
     139             :   void putResult(Result result, absl::optional<uint64_t> code) override;
     140           0 :   void putResponseTime(std::chrono::milliseconds) override {}
     141           0 :   const absl::optional<MonotonicTime>& lastEjectionTime() override { return last_ejection_time_; }
     142           0 :   const absl::optional<MonotonicTime>& lastUnejectionTime() override {
     143           0 :     return last_unejection_time_;
     144           0 :   }
     145             : 
     146           0 :   const SuccessRateMonitor& getSRMonitor(SuccessRateMonitorType type) const {
     147           0 :     return (SuccessRateMonitorType::ExternalOrigin == type) ? external_origin_sr_monitor_
     148           0 :                                                             : local_origin_sr_monitor_;
     149           0 :   }
     150             : 
     151           0 :   SuccessRateMonitor& getSRMonitor(SuccessRateMonitorType type) {
     152             :     // Call const version of the same method
     153           0 :     return const_cast<SuccessRateMonitor&>(
     154           0 :         const_cast<const DetectorHostMonitorImpl*>(this)->getSRMonitor(type));
     155           0 :   }
     156             : 
     157           0 :   double successRate(SuccessRateMonitorType type) const override {
     158           0 :     return getSRMonitor(type).getSuccessRate();
     159           0 :   }
     160             :   void updateCurrentSuccessRateBucket();
     161           0 :   void successRate(SuccessRateMonitorType type, double new_success_rate) {
     162           0 :     getSRMonitor(type).setSuccessRate(new_success_rate);
     163           0 :   }
     164             : 
     165             :   // handlers for reporting local origin errors
     166             :   void localOriginFailure();
     167             :   void localOriginNoFailure();
     168             : 
     169             :   // handlers for setting and getting jitter, used to add a random value
     170             :   // to outlier eject time in order to prevent a connection storm when
     171             :   // hosts are unejected
     172           0 :   void setJitter(const std::chrono::milliseconds jitter) { jitter_ = jitter; }
     173           0 :   std::chrono::milliseconds getJitter() const { return jitter_; }
     174             : 
     175             : private:
     176             :   std::weak_ptr<DetectorImpl> detector_;
     177             :   std::weak_ptr<Host> host_;
     178             :   absl::optional<MonotonicTime> last_ejection_time_;
     179             :   absl::optional<MonotonicTime> last_unejection_time_;
     180             :   uint32_t num_ejections_{};
     181             :   // Determines ejection time. Each time a node is ejected,
     182             :   // the eject_time_backoff is incremented. The value is decremented
     183             :   // each time the node was healthy and not ejected.
     184             :   uint32_t eject_time_backoff_{};
     185             : 
     186             :   // counters for externally generated failures
     187             :   std::atomic<uint32_t> consecutive_5xx_{0};
     188             :   std::atomic<uint32_t> consecutive_gateway_failure_{0};
     189             : 
     190             :   // counters for local origin failures
     191             :   std::atomic<uint32_t> consecutive_local_origin_failure_{0};
     192             : 
     193             :   // jitter for outlier ejection time
     194             :   std::chrono::milliseconds jitter_;
     195             : 
     196             :   // success rate monitors:
     197             :   // - external_origin: for all events when external/local are not split
     198             :   //   and for external origin failures when external/local events are split
     199             :   // - local origin: for local events when external/local events are split and
     200             :   //   not used when external/local events are not split.
     201             :   SuccessRateMonitor external_origin_sr_monitor_;
     202             :   SuccessRateMonitor local_origin_sr_monitor_;
     203             : 
     204             :   void putResultNoLocalExternalSplit(Result result, absl::optional<uint64_t> code);
     205             :   void putResultWithLocalExternalSplit(Result result, absl::optional<uint64_t> code);
     206             :   std::function<void(DetectorHostMonitorImpl*, Result, absl::optional<uint64_t> code)>
     207             :       put_result_func_;
     208             : };
     209             : 
     210             : /**
     211             :  * All outlier detection stats. @see stats_macros.h
     212             :  */
     213             : #define ALL_OUTLIER_DETECTION_STATS(COUNTER, GAUGE)                                                \
     214           0 :   COUNTER(ejections_consecutive_5xx)                                                               \
     215           0 :   COUNTER(ejections_detected_consecutive_5xx)                                                      \
     216           0 :   COUNTER(ejections_detected_consecutive_gateway_failure)                                          \
     217           0 :   COUNTER(ejections_detected_success_rate)                                                         \
     218           0 :   COUNTER(ejections_detected_failure_percentage)                                                   \
     219           0 :   COUNTER(ejections_enforced_consecutive_5xx)                                                      \
     220           0 :   COUNTER(ejections_enforced_consecutive_gateway_failure)                                          \
     221           0 :   COUNTER(ejections_enforced_success_rate)                                                         \
     222           0 :   COUNTER(ejections_enforced_failure_percentage)                                                   \
     223           0 :   COUNTER(ejections_detected_consecutive_local_origin_failure)                                     \
     224           0 :   COUNTER(ejections_enforced_consecutive_local_origin_failure)                                     \
     225           0 :   COUNTER(ejections_detected_local_origin_success_rate)                                            \
     226           0 :   COUNTER(ejections_enforced_local_origin_success_rate)                                            \
     227           0 :   COUNTER(ejections_detected_local_origin_failure_percentage)                                      \
     228           0 :   COUNTER(ejections_enforced_local_origin_failure_percentage)                                      \
     229           0 :   COUNTER(ejections_enforced_total)                                                                \
     230           0 :   COUNTER(ejections_overflow)                                                                      \
     231           0 :   COUNTER(ejections_success_rate)                                                                  \
     232           0 :   COUNTER(ejections_total)                                                                         \
     233           0 :   GAUGE(ejections_active, Accumulate)
     234             : 
     235             : /**
     236             :  * Struct definition for all outlier detection stats. @see stats_macros.h
     237             :  */
     238             : struct DetectionStats {
     239             :   ALL_OUTLIER_DETECTION_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT)
     240             : };
     241             : 
     242             : // Names used in runtime configuration.
     243             : constexpr absl::string_view MaxEjectionPercentRuntime = "outlier_detection.max_ejection_percent";
     244             : constexpr absl::string_view ConsecutiveGatewayFailureRuntime =
     245             :     "outlier_detection.consecutive_gateway_failure";
     246             : constexpr absl::string_view Consecutive5xxRuntime = "outlier_detection.consecutive_5xx";
     247             : constexpr absl::string_view ConsecutiveLocalOriginFailureRuntime =
     248             :     "outlier_detection.consecutive_local_origin_failure";
     249             : constexpr absl::string_view IntervalMsRuntime = "outlier_detection.interval_ms";
     250             : constexpr absl::string_view BaseEjectionTimeMsRuntime = "outlier_detection.base_ejection_time_ms";
     251             : constexpr absl::string_view MaxEjectionTimeMsRuntime = "outlier_detection.max_ejection_time_ms";
     252             : constexpr absl::string_view EnforcingConsecutive5xxRuntime =
     253             :     "outlier_detection.enforcing_consecutive_5xx";
     254             : constexpr absl::string_view EnforcingConsecutiveGatewayFailureRuntime =
     255             :     "outlier_detection.enforcing_consecutive_gateway_failure";
     256             : constexpr absl::string_view EnforcingSuccessRateRuntime =
     257             :     "outlier_detection.enforcing_success_rate";
     258             : constexpr absl::string_view EnforcingConsecutiveLocalOriginFailureRuntime =
     259             :     "outlier_detection.enforcing_consecutive_local_origin_failure";
     260             : constexpr absl::string_view EnforcingLocalOriginSuccessRateRuntime =
     261             :     "outlier_detection.enforcing_local_origin_success_rate";
     262             : constexpr absl::string_view EnforcingFailurePercentageRuntime =
     263             :     "outlier_detection.enforcing_failure_percentage";
     264             : constexpr absl::string_view EnforcingFailurePercentageLocalOriginRuntime =
     265             :     "outlier_detection.enforcing_failure_percentage_local_origin";
     266             : constexpr absl::string_view SuccessRateMinimumHostsRuntime =
     267             :     "outlier_detection.success_rate_minimum_hosts";
     268             : constexpr absl::string_view SuccessRateRequestVolumeRuntime =
     269             :     "outlier_detection.success_rate_request_volume";
     270             : constexpr absl::string_view FailurePercentageMinimumHostsRuntime =
     271             :     "outlier_detection.failure_percentage_minimum_hosts";
     272             : constexpr absl::string_view FailurePercentageRequestVolumeRuntime =
     273             :     "outlier_detection.failure_percentage_request_volume";
     274             : constexpr absl::string_view SuccessRateStdevFactorRuntime =
     275             :     "outlier_detection.success_rate_stdev_factor";
     276             : constexpr absl::string_view FailurePercentageThresholdRuntime =
     277             :     "outlier_detection.failure_percentage_threshold";
     278             : constexpr absl::string_view MaxEjectionTimeJitterMsRuntime =
     279             :     "outlier_detection.max_ejection_time_jitter_ms";
     280             : 
     281             : /**
     282             :  * Configuration for the outlier detection.
     283             :  */
     284             : class DetectorConfig {
     285             : public:
     286             :   DetectorConfig(const envoy::config::cluster::v3::OutlierDetection& config);
     287             : 
     288           0 :   uint64_t intervalMs() const { return interval_ms_; }
     289           0 :   uint64_t baseEjectionTimeMs() const { return base_ejection_time_ms_; }
     290           0 :   uint64_t consecutive5xx() const { return consecutive_5xx_; }
     291           0 :   uint64_t consecutiveGatewayFailure() const { return consecutive_gateway_failure_; }
     292           0 :   uint64_t maxEjectionPercent() const { return max_ejection_percent_; }
     293           0 :   uint64_t successRateMinimumHosts() const { return success_rate_minimum_hosts_; }
     294           0 :   uint64_t successRateRequestVolume() const { return success_rate_request_volume_; }
     295           0 :   uint64_t successRateStdevFactor() const { return success_rate_stdev_factor_; }
     296           0 :   uint64_t failurePercentageThreshold() const { return failure_percentage_threshold_; }
     297           0 :   uint64_t failurePercentageMinimumHosts() const { return failure_percentage_minimum_hosts_; }
     298           0 :   uint64_t failurePercentageRequestVolume() const { return failure_percentage_request_volume_; }
     299           0 :   uint64_t enforcingConsecutive5xx() const { return enforcing_consecutive_5xx_; }
     300           0 :   uint64_t enforcingConsecutiveGatewayFailure() const {
     301           0 :     return enforcing_consecutive_gateway_failure_;
     302           0 :   }
     303           0 :   uint64_t enforcingSuccessRate() const { return enforcing_success_rate_; }
     304           0 :   uint64_t enforcingFailurePercentage() const { return enforcing_failure_percentage_; }
     305           0 :   uint64_t enforcingFailurePercentageLocalOrigin() const {
     306           0 :     return enforcing_failure_percentage_local_origin_;
     307           0 :   }
     308           0 :   bool splitExternalLocalOriginErrors() const { return split_external_local_origin_errors_; }
     309           0 :   uint64_t consecutiveLocalOriginFailure() const { return consecutive_local_origin_failure_; }
     310           0 :   uint64_t enforcingConsecutiveLocalOriginFailure() const {
     311           0 :     return enforcing_consecutive_local_origin_failure_;
     312           0 :   }
     313           0 :   uint64_t enforcingLocalOriginSuccessRate() const { return enforcing_local_origin_success_rate_; }
     314           0 :   uint64_t maxEjectionTimeMs() const { return max_ejection_time_ms_; }
     315           0 :   uint64_t maxEjectionTimeJitterMs() const { return max_ejection_time_jitter_ms_; }
     316           0 :   bool successfulActiveHealthCheckUnejectHost() const {
     317           0 :     return successful_active_health_check_uneject_host_;
     318           0 :   }
     319             : 
     320             : private:
     321             :   const uint64_t interval_ms_;
     322             :   const uint64_t base_ejection_time_ms_;
     323             :   const uint64_t consecutive_5xx_;
     324             :   const uint64_t consecutive_gateway_failure_;
     325             :   const uint64_t max_ejection_percent_;
     326             :   const uint64_t success_rate_minimum_hosts_;
     327             :   const uint64_t success_rate_request_volume_;
     328             :   const uint64_t success_rate_stdev_factor_;
     329             :   const uint64_t failure_percentage_threshold_;
     330             :   const uint64_t failure_percentage_minimum_hosts_;
     331             :   const uint64_t failure_percentage_request_volume_;
     332             :   const uint64_t enforcing_consecutive_5xx_;
     333             :   const uint64_t enforcing_consecutive_gateway_failure_;
     334             :   const uint64_t enforcing_success_rate_;
     335             :   const uint64_t enforcing_failure_percentage_;
     336             :   const uint64_t enforcing_failure_percentage_local_origin_;
     337             :   const bool split_external_local_origin_errors_;
     338             :   const uint64_t consecutive_local_origin_failure_;
     339             :   const uint64_t enforcing_consecutive_local_origin_failure_;
     340             :   const uint64_t enforcing_local_origin_success_rate_;
     341             :   const uint64_t max_ejection_time_ms_;
     342             :   const uint64_t max_ejection_time_jitter_ms_;
     343             :   const bool successful_active_health_check_uneject_host_;
     344             : 
     345             :   static constexpr uint64_t DEFAULT_INTERVAL_MS = 10000;
     346             :   static constexpr uint64_t DEFAULT_BASE_EJECTION_TIME_MS = 30000;
     347             :   static constexpr uint64_t DEFAULT_CONSECUTIVE_5XX = 5;
     348             :   static constexpr uint64_t DEFAULT_CONSECUTIVE_GATEWAY_FAILURE = 5;
     349             :   static constexpr uint64_t DEFAULT_MAX_EJECTION_PERCENT = 10;
     350             :   static constexpr uint64_t DEFAULT_SUCCESS_RATE_MINIMUM_HOSTS = 5;
     351             :   static constexpr uint64_t DEFAULT_SUCCESS_RATE_REQUEST_VOLUME = 100;
     352             :   static constexpr uint64_t DEFAULT_SUCCESS_RATE_STDEV_FACTOR = 1900;
     353             :   static constexpr uint64_t DEFAULT_FAILURE_PERCENTAGE_THRESHOLD = 85;
     354             :   static constexpr uint64_t DEFAULT_FAILURE_PERCENTAGE_MINIMUM_HOSTS = 5;
     355             :   static constexpr uint64_t DEFAULT_FAILURE_PERCENTAGE_REQUEST_VOLUME = 50;
     356             :   static constexpr uint64_t DEFAULT_ENFORCING_CONSECUTIVE_5XX = 100;
     357             :   static constexpr uint64_t DEFAULT_ENFORCING_CONSECUTIVE_GATEWAY_FAILURE = 0;
     358             :   static constexpr uint64_t DEFAULT_ENFORCING_SUCCESS_RATE = 100;
     359             :   static constexpr uint64_t DEFAULT_ENFORCING_FAILURE_PERCENTAGE = 0;
     360             :   static constexpr uint64_t DEFAULT_ENFORCING_FAILURE_PERCENTAGE_LOCAL_ORIGIN = 0;
     361             :   static constexpr uint64_t DEFAULT_CONSECUTIVE_LOCAL_ORIGIN_FAILURE = 5;
     362             :   static constexpr uint64_t DEFAULT_ENFORCING_CONSECUTIVE_LOCAL_ORIGIN_FAILURE = 100;
     363             :   static constexpr uint64_t DEFAULT_ENFORCING_LOCAL_ORIGIN_SUCCESS_RATE = 100;
     364             :   static constexpr uint64_t DEFAULT_MAX_EJECTION_TIME_MS = 10 * DEFAULT_BASE_EJECTION_TIME_MS;
     365             :   static constexpr uint64_t DEFAULT_MAX_EJECTION_TIME_JITTER_MS = 0;
     366             : };
     367             : 
     368             : /**
     369             :  * An implementation of an outlier detector. In the future we may support multiple outlier detection
     370             :  * implementations with different configuration. For now, as we iterate everything is contained
     371             :  * within this implementation.
     372             :  */
     373             : class DetectorImpl : public Detector, public std::enable_shared_from_this<DetectorImpl> {
     374             : public:
     375             :   static absl::StatusOr<std::shared_ptr<DetectorImpl>>
     376             :   create(Cluster& cluster, const envoy::config::cluster::v3::OutlierDetection& config,
     377             :          Event::Dispatcher& dispatcher, Runtime::Loader& runtime, TimeSource& time_source,
     378             :          EventLoggerSharedPtr event_logger, Random::RandomGenerator& random);
     379             :   ~DetectorImpl() override;
     380             : 
     381             :   void onConsecutive5xx(HostSharedPtr host);
     382             :   void onConsecutiveGatewayFailure(HostSharedPtr host);
     383             :   void onConsecutiveLocalOriginFailure(HostSharedPtr host);
     384           0 :   Runtime::Loader& runtime() { return runtime_; }
     385           0 :   DetectorConfig& config() { return config_; }
     386             :   void unejectHost(HostSharedPtr host);
     387             : 
     388             :   // Upstream::Outlier::Detector
     389           0 :   void addChangedStateCb(ChangeStateCb cb) override { callbacks_.push_back(cb); }
     390             :   double
     391           0 :   successRateAverage(DetectorHostMonitor::SuccessRateMonitorType monitor_type) const override {
     392           0 :     return getSRNums(monitor_type).success_rate_average_;
     393           0 :   }
     394             :   double successRateEjectionThreshold(
     395           0 :       DetectorHostMonitor::SuccessRateMonitorType monitor_type) const override {
     396           0 :     return getSRNums(monitor_type).ejection_threshold_;
     397           0 :   }
     398             : 
     399             :   /**
     400             :    * This function returns pair of double values for success rate outlier detection. The pair
     401             :    * contains the average success rate of all valid hosts in the cluster and the ejection threshold.
     402             :    * If a host's success rate is under this threshold, the host is an outlier.
     403             :    * @param success_rate_sum is the sum of the data in the success_rate_data vector.
     404             :    * @param valid_success_rate_hosts is the vector containing the individual success rate data
     405             :    *        points.
     406             :    * @return EjectionPair
     407             :    */
     408             :   struct EjectionPair {
     409             :     double success_rate_average_; // average success rate of all valid hosts in the cluster
     410             :     double ejection_threshold_;   // ejection threshold for the cluster
     411             :   };
     412             :   static EjectionPair
     413             :   successRateEjectionThreshold(double success_rate_sum,
     414             :                                const std::vector<HostSuccessRatePair>& valid_success_rate_hosts,
     415             :                                double success_rate_stdev_factor);
     416             : 
     417           0 :   const absl::node_hash_map<HostSharedPtr, DetectorHostMonitorImpl*>& getHostMonitors() {
     418           0 :     return host_monitors_;
     419           0 :   }
     420             : 
     421             : private:
     422             :   DetectorImpl(const Cluster& cluster, const envoy::config::cluster::v3::OutlierDetection& config,
     423             :                Event::Dispatcher& dispatcher, Runtime::Loader& runtime, TimeSource& time_source,
     424             :                EventLoggerSharedPtr event_logger, Random::RandomGenerator& random);
     425             : 
     426             :   void addHostMonitor(HostSharedPtr host);
     427             :   void armIntervalTimer();
     428             :   void checkHostForUneject(HostSharedPtr host, DetectorHostMonitorImpl* monitor, MonotonicTime now);
     429             :   void ejectHost(HostSharedPtr host, envoy::data::cluster::v3::OutlierEjectionType type);
     430             :   static DetectionStats generateStats(Stats::Scope& scope);
     431             :   void initialize(Cluster& cluster);
     432             :   void onConsecutiveErrorWorker(HostSharedPtr host,
     433             :                                 envoy::data::cluster::v3::OutlierEjectionType type);
     434             :   void notifyMainThreadConsecutiveError(HostSharedPtr host,
     435             :                                         envoy::data::cluster::v3::OutlierEjectionType type);
     436             :   void onIntervalTimer();
     437             :   void runCallbacks(HostSharedPtr host);
     438             :   bool enforceEjection(envoy::data::cluster::v3::OutlierEjectionType type);
     439             :   void updateEnforcedEjectionStats(envoy::data::cluster::v3::OutlierEjectionType type);
     440             :   void updateDetectedEjectionStats(envoy::data::cluster::v3::OutlierEjectionType type);
     441             :   void processSuccessRateEjections(DetectorHostMonitor::SuccessRateMonitorType monitor_type);
     442             : 
     443             :   // The helper to double write value and gauge. The gauge could be null value since because any
     444             :   // stat might be deactivated.
     445             :   class EjectionsActiveHelper {
     446             :   public:
     447           0 :     EjectionsActiveHelper(Envoy::Stats::Gauge& gauge) : ejections_active_ref_(gauge) {}
     448           0 :     void inc() {
     449           0 :       ejections_active_ref_.inc();
     450           0 :       ++ejections_active_value_;
     451           0 :     }
     452           0 :     void dec() {
     453           0 :       ejections_active_ref_.dec();
     454           0 :       --ejections_active_value_;
     455           0 :     }
     456           0 :     uint64_t value() { return ejections_active_value_.load(); }
     457             :     Envoy::Stats::Gauge& ejections_active_ref_;
     458             :     std::atomic<uint64_t> ejections_active_value_{0};
     459             :   };
     460             :   DetectorConfig config_;
     461             :   Event::Dispatcher& dispatcher_;
     462             :   Runtime::Loader& runtime_;
     463             :   TimeSource& time_source_;
     464             :   DetectionStats stats_;
     465             :   EjectionsActiveHelper ejections_active_helper_{stats_.ejections_active_};
     466             :   Event::TimerPtr interval_timer_;
     467             :   std::list<ChangeStateCb> callbacks_;
     468             :   absl::node_hash_map<HostSharedPtr, DetectorHostMonitorImpl*> host_monitors_;
     469             :   EventLoggerSharedPtr event_logger_;
     470             :   Common::CallbackHandlePtr member_update_cb_;
     471             :   Random::RandomGenerator& random_generator_;
     472             : 
     473             :   // EjectionPair for external and local origin events.
     474             :   // When external/local origin events are not split, external_origin_sr_num_ are used for
     475             :   // both types of events: external and local. local_origin_sr_num_ is not used.
     476             :   // When external/local origin events are split, external_origin_sr_num_ are used only
     477             :   // for external events and local_origin_sr_num_ is used for local origin events.
     478             :   EjectionPair external_origin_sr_num_;
     479             :   EjectionPair local_origin_sr_num_;
     480             : 
     481           0 :   const EjectionPair& getSRNums(DetectorHostMonitor::SuccessRateMonitorType monitor_type) const {
     482           0 :     return (DetectorHostMonitor::SuccessRateMonitorType::ExternalOrigin == monitor_type)
     483           0 :                ? external_origin_sr_num_
     484           0 :                : local_origin_sr_num_;
     485           0 :   }
     486           0 :   EjectionPair& getSRNums(DetectorHostMonitor::SuccessRateMonitorType monitor_type) {
     487           0 :     return const_cast<EjectionPair&>(
     488           0 :         static_cast<const DetectorImpl&>(*this).getSRNums(monitor_type));
     489           0 :   }
     490             : };
     491             : 
     492             : class EventLoggerImpl : public EventLogger {
     493             : public:
     494             :   EventLoggerImpl(AccessLog::AccessLogManager& log_manager, const std::string& file_name,
     495             :                   TimeSource& time_source)
     496             :       : file_(log_manager.createAccessLog(
     497             :             Filesystem::FilePathAndType{Filesystem::DestinationType::File, file_name})),
     498           0 :         time_source_(time_source) {}
     499             : 
     500             :   // Upstream::Outlier::EventLogger
     501             :   void logEject(const HostDescriptionConstSharedPtr& host, Detector& detector,
     502             :                 envoy::data::cluster::v3::OutlierEjectionType type, bool enforced) override;
     503             : 
     504             :   void logUneject(const HostDescriptionConstSharedPtr& host) override;
     505             : 
     506             : private:
     507             :   void setCommonEventParams(envoy::data::cluster::v3::OutlierDetectionEvent& event,
     508             :                             const HostDescriptionConstSharedPtr& host,
     509             :                             absl::optional<MonotonicTime> time);
     510             : 
     511             :   AccessLog::AccessLogFileSharedPtr file_;
     512             :   TimeSource& time_source_;
     513             : };
     514             : 
     515             : } // namespace Outlier
     516             : } // namespace Upstream
     517             : } // namespace Envoy

Generated by: LCOV version 1.15