Line data Source code
1 : #pragma once 2 : 3 : #include <atomic> 4 : #include <chrono> 5 : #include <cstdint> 6 : #include <list> 7 : #include <memory> 8 : #include <string> 9 : #include <vector> 10 : 11 : #include "envoy/access_log/access_log.h" 12 : #include "envoy/common/callback.h" 13 : #include "envoy/common/time.h" 14 : #include "envoy/config/cluster/v3/cluster.pb.h" 15 : #include "envoy/config/cluster/v3/outlier_detection.pb.h" 16 : #include "envoy/data/cluster/v3/outlier_detection_event.pb.h" 17 : #include "envoy/event/timer.h" 18 : #include "envoy/http/codes.h" 19 : #include "envoy/runtime/runtime.h" 20 : #include "envoy/stats/scope.h" 21 : #include "envoy/stats/stats.h" 22 : #include "envoy/upstream/outlier_detection.h" 23 : 24 : #include "source/common/upstream/upstream_impl.h" 25 : 26 : #include "absl/container/node_hash_map.h" 27 : 28 : namespace Envoy { 29 : namespace Upstream { 30 : namespace Outlier { 31 : 32 : /** 33 : * Factory for creating a detector from a proto configuration. 34 : */ 35 : class DetectorImplFactory { 36 : public: 37 : static absl::StatusOr<DetectorSharedPtr> 38 : createForCluster(Cluster& cluster, const envoy::config::cluster::v3::Cluster& cluster_config, 39 : Event::Dispatcher& dispatcher, Runtime::Loader& runtime, 40 : EventLoggerSharedPtr event_logger, Random::RandomGenerator& random); 41 : }; 42 : 43 : /** 44 : * Thin struct to facilitate calculations for success rate outlier detection. 45 : */ 46 : struct HostSuccessRatePair { 47 : HostSuccessRatePair(HostSharedPtr host, double success_rate) 48 0 : : host_(host), success_rate_(success_rate) {} 49 : HostSharedPtr host_; 50 : double success_rate_; 51 : }; 52 : 53 : struct SuccessRateAccumulatorBucket { 54 : std::atomic<uint64_t> success_request_counter_; 55 : std::atomic<uint64_t> total_request_counter_; 56 : }; 57 : 58 : /** 59 : * The SuccessRateAccumulator uses the SuccessRateAccumulatorBucket to get per host success rate 60 : * stats. This implementation has a fixed window size of time, and thus only needs a 61 : * bucket to write to, and a bucket to accumulate/run stats over. 62 : */ 63 : class SuccessRateAccumulator { 64 : public: 65 : SuccessRateAccumulator() 66 : : current_success_rate_bucket_(new SuccessRateAccumulatorBucket()), 67 0 : backup_success_rate_bucket_(new SuccessRateAccumulatorBucket()) {} 68 : 69 : /** 70 : * This function updates the bucket to write data to. 71 : * @return a pointer to the SuccessRateAccumulatorBucket. 72 : */ 73 : SuccessRateAccumulatorBucket* updateCurrentWriter(); 74 : /** 75 : * This function returns the success rate of a host over a window of time if the request volume is 76 : * high enough. The underlying window of time could be dynamically adjusted. In the current 77 : * implementation it is a fixed time window. 78 : * @param success_rate_request_volume the threshold of requests an accumulator has to have in 79 : * order to be able to return a significant success rate value. 80 : * @return a valid absl::optional<double> with the success rate. If there were not enough 81 : * requests, an invalid absl::optional<double> is returned. 82 : */ 83 : absl::optional<std::pair<double, uint64_t>> getSuccessRateAndVolume(); 84 : 85 : private: 86 : std::unique_ptr<SuccessRateAccumulatorBucket> current_success_rate_bucket_; 87 : std::unique_ptr<SuccessRateAccumulatorBucket> backup_success_rate_bucket_; 88 : }; 89 : 90 : class SuccessRateMonitor { 91 : public: 92 : SuccessRateMonitor(envoy::data::cluster::v3::OutlierEjectionType ejection_type) 93 0 : : ejection_type_(ejection_type) { 94 0 : // Point the success_rate_accumulator_bucket_ pointer to a bucket. 95 0 : updateCurrentSuccessRateBucket(); 96 0 : } 97 0 : double getSuccessRate() const { return success_rate_; } 98 0 : SuccessRateAccumulator& successRateAccumulator() { return success_rate_accumulator_; } 99 0 : void setSuccessRate(double new_success_rate) { success_rate_ = new_success_rate; } 100 0 : void updateCurrentSuccessRateBucket() { 101 0 : success_rate_accumulator_bucket_.store(success_rate_accumulator_.updateCurrentWriter()); 102 0 : } 103 0 : void incTotalReqCounter() { success_rate_accumulator_bucket_.load()->total_request_counter_++; } 104 0 : void incSuccessReqCounter() { 105 0 : success_rate_accumulator_bucket_.load()->success_request_counter_++; 106 0 : } 107 : 108 0 : envoy::data::cluster::v3::OutlierEjectionType getEjectionType() const { return ejection_type_; } 109 : 110 : private: 111 : SuccessRateAccumulator success_rate_accumulator_; 112 : std::atomic<SuccessRateAccumulatorBucket*> success_rate_accumulator_bucket_; 113 : envoy::data::cluster::v3::OutlierEjectionType ejection_type_; 114 : double success_rate_{-1}; 115 : }; 116 : 117 : class DetectorImpl; 118 : 119 : /** 120 : * Implementation of DetectorHostMonitor for the generic detector. 121 : */ 122 : class DetectorHostMonitorImpl : public DetectorHostMonitor { 123 : public: 124 : DetectorHostMonitorImpl(std::shared_ptr<DetectorImpl> detector, HostSharedPtr host); 125 : 126 : void eject(MonotonicTime ejection_time); 127 : void uneject(MonotonicTime ejection_time); 128 : 129 0 : uint32_t& ejectTimeBackoff() { return eject_time_backoff_; } 130 : 131 0 : void resetConsecutive5xx() { consecutive_5xx_ = 0; } 132 0 : void resetConsecutiveGatewayFailure() { consecutive_gateway_failure_ = 0; } 133 0 : void resetConsecutiveLocalOriginFailure() { consecutive_local_origin_failure_ = 0; } 134 : static absl::optional<Http::Code> resultToHttpCode(Result result); 135 : 136 : // Upstream::Outlier::DetectorHostMonitor 137 0 : uint32_t numEjections() override { return num_ejections_; } 138 : void putHttpResponseCode(uint64_t response_code) override; 139 : void putResult(Result result, absl::optional<uint64_t> code) override; 140 0 : void putResponseTime(std::chrono::milliseconds) override {} 141 0 : const absl::optional<MonotonicTime>& lastEjectionTime() override { return last_ejection_time_; } 142 0 : const absl::optional<MonotonicTime>& lastUnejectionTime() override { 143 0 : return last_unejection_time_; 144 0 : } 145 : 146 0 : const SuccessRateMonitor& getSRMonitor(SuccessRateMonitorType type) const { 147 0 : return (SuccessRateMonitorType::ExternalOrigin == type) ? external_origin_sr_monitor_ 148 0 : : local_origin_sr_monitor_; 149 0 : } 150 : 151 0 : SuccessRateMonitor& getSRMonitor(SuccessRateMonitorType type) { 152 : // Call const version of the same method 153 0 : return const_cast<SuccessRateMonitor&>( 154 0 : const_cast<const DetectorHostMonitorImpl*>(this)->getSRMonitor(type)); 155 0 : } 156 : 157 0 : double successRate(SuccessRateMonitorType type) const override { 158 0 : return getSRMonitor(type).getSuccessRate(); 159 0 : } 160 : void updateCurrentSuccessRateBucket(); 161 0 : void successRate(SuccessRateMonitorType type, double new_success_rate) { 162 0 : getSRMonitor(type).setSuccessRate(new_success_rate); 163 0 : } 164 : 165 : // handlers for reporting local origin errors 166 : void localOriginFailure(); 167 : void localOriginNoFailure(); 168 : 169 : // handlers for setting and getting jitter, used to add a random value 170 : // to outlier eject time in order to prevent a connection storm when 171 : // hosts are unejected 172 0 : void setJitter(const std::chrono::milliseconds jitter) { jitter_ = jitter; } 173 0 : std::chrono::milliseconds getJitter() const { return jitter_; } 174 : 175 : private: 176 : std::weak_ptr<DetectorImpl> detector_; 177 : std::weak_ptr<Host> host_; 178 : absl::optional<MonotonicTime> last_ejection_time_; 179 : absl::optional<MonotonicTime> last_unejection_time_; 180 : uint32_t num_ejections_{}; 181 : // Determines ejection time. Each time a node is ejected, 182 : // the eject_time_backoff is incremented. The value is decremented 183 : // each time the node was healthy and not ejected. 184 : uint32_t eject_time_backoff_{}; 185 : 186 : // counters for externally generated failures 187 : std::atomic<uint32_t> consecutive_5xx_{0}; 188 : std::atomic<uint32_t> consecutive_gateway_failure_{0}; 189 : 190 : // counters for local origin failures 191 : std::atomic<uint32_t> consecutive_local_origin_failure_{0}; 192 : 193 : // jitter for outlier ejection time 194 : std::chrono::milliseconds jitter_; 195 : 196 : // success rate monitors: 197 : // - external_origin: for all events when external/local are not split 198 : // and for external origin failures when external/local events are split 199 : // - local origin: for local events when external/local events are split and 200 : // not used when external/local events are not split. 201 : SuccessRateMonitor external_origin_sr_monitor_; 202 : SuccessRateMonitor local_origin_sr_monitor_; 203 : 204 : void putResultNoLocalExternalSplit(Result result, absl::optional<uint64_t> code); 205 : void putResultWithLocalExternalSplit(Result result, absl::optional<uint64_t> code); 206 : std::function<void(DetectorHostMonitorImpl*, Result, absl::optional<uint64_t> code)> 207 : put_result_func_; 208 : }; 209 : 210 : /** 211 : * All outlier detection stats. @see stats_macros.h 212 : */ 213 : #define ALL_OUTLIER_DETECTION_STATS(COUNTER, GAUGE) \ 214 0 : COUNTER(ejections_consecutive_5xx) \ 215 0 : COUNTER(ejections_detected_consecutive_5xx) \ 216 0 : COUNTER(ejections_detected_consecutive_gateway_failure) \ 217 0 : COUNTER(ejections_detected_success_rate) \ 218 0 : COUNTER(ejections_detected_failure_percentage) \ 219 0 : COUNTER(ejections_enforced_consecutive_5xx) \ 220 0 : COUNTER(ejections_enforced_consecutive_gateway_failure) \ 221 0 : COUNTER(ejections_enforced_success_rate) \ 222 0 : COUNTER(ejections_enforced_failure_percentage) \ 223 0 : COUNTER(ejections_detected_consecutive_local_origin_failure) \ 224 0 : COUNTER(ejections_enforced_consecutive_local_origin_failure) \ 225 0 : COUNTER(ejections_detected_local_origin_success_rate) \ 226 0 : COUNTER(ejections_enforced_local_origin_success_rate) \ 227 0 : COUNTER(ejections_detected_local_origin_failure_percentage) \ 228 0 : COUNTER(ejections_enforced_local_origin_failure_percentage) \ 229 0 : COUNTER(ejections_enforced_total) \ 230 0 : COUNTER(ejections_overflow) \ 231 0 : COUNTER(ejections_success_rate) \ 232 0 : COUNTER(ejections_total) \ 233 0 : GAUGE(ejections_active, Accumulate) 234 : 235 : /** 236 : * Struct definition for all outlier detection stats. @see stats_macros.h 237 : */ 238 : struct DetectionStats { 239 : ALL_OUTLIER_DETECTION_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT) 240 : }; 241 : 242 : // Names used in runtime configuration. 243 : constexpr absl::string_view MaxEjectionPercentRuntime = "outlier_detection.max_ejection_percent"; 244 : constexpr absl::string_view ConsecutiveGatewayFailureRuntime = 245 : "outlier_detection.consecutive_gateway_failure"; 246 : constexpr absl::string_view Consecutive5xxRuntime = "outlier_detection.consecutive_5xx"; 247 : constexpr absl::string_view ConsecutiveLocalOriginFailureRuntime = 248 : "outlier_detection.consecutive_local_origin_failure"; 249 : constexpr absl::string_view IntervalMsRuntime = "outlier_detection.interval_ms"; 250 : constexpr absl::string_view BaseEjectionTimeMsRuntime = "outlier_detection.base_ejection_time_ms"; 251 : constexpr absl::string_view MaxEjectionTimeMsRuntime = "outlier_detection.max_ejection_time_ms"; 252 : constexpr absl::string_view EnforcingConsecutive5xxRuntime = 253 : "outlier_detection.enforcing_consecutive_5xx"; 254 : constexpr absl::string_view EnforcingConsecutiveGatewayFailureRuntime = 255 : "outlier_detection.enforcing_consecutive_gateway_failure"; 256 : constexpr absl::string_view EnforcingSuccessRateRuntime = 257 : "outlier_detection.enforcing_success_rate"; 258 : constexpr absl::string_view EnforcingConsecutiveLocalOriginFailureRuntime = 259 : "outlier_detection.enforcing_consecutive_local_origin_failure"; 260 : constexpr absl::string_view EnforcingLocalOriginSuccessRateRuntime = 261 : "outlier_detection.enforcing_local_origin_success_rate"; 262 : constexpr absl::string_view EnforcingFailurePercentageRuntime = 263 : "outlier_detection.enforcing_failure_percentage"; 264 : constexpr absl::string_view EnforcingFailurePercentageLocalOriginRuntime = 265 : "outlier_detection.enforcing_failure_percentage_local_origin"; 266 : constexpr absl::string_view SuccessRateMinimumHostsRuntime = 267 : "outlier_detection.success_rate_minimum_hosts"; 268 : constexpr absl::string_view SuccessRateRequestVolumeRuntime = 269 : "outlier_detection.success_rate_request_volume"; 270 : constexpr absl::string_view FailurePercentageMinimumHostsRuntime = 271 : "outlier_detection.failure_percentage_minimum_hosts"; 272 : constexpr absl::string_view FailurePercentageRequestVolumeRuntime = 273 : "outlier_detection.failure_percentage_request_volume"; 274 : constexpr absl::string_view SuccessRateStdevFactorRuntime = 275 : "outlier_detection.success_rate_stdev_factor"; 276 : constexpr absl::string_view FailurePercentageThresholdRuntime = 277 : "outlier_detection.failure_percentage_threshold"; 278 : constexpr absl::string_view MaxEjectionTimeJitterMsRuntime = 279 : "outlier_detection.max_ejection_time_jitter_ms"; 280 : 281 : /** 282 : * Configuration for the outlier detection. 283 : */ 284 : class DetectorConfig { 285 : public: 286 : DetectorConfig(const envoy::config::cluster::v3::OutlierDetection& config); 287 : 288 0 : uint64_t intervalMs() const { return interval_ms_; } 289 0 : uint64_t baseEjectionTimeMs() const { return base_ejection_time_ms_; } 290 0 : uint64_t consecutive5xx() const { return consecutive_5xx_; } 291 0 : uint64_t consecutiveGatewayFailure() const { return consecutive_gateway_failure_; } 292 0 : uint64_t maxEjectionPercent() const { return max_ejection_percent_; } 293 0 : uint64_t successRateMinimumHosts() const { return success_rate_minimum_hosts_; } 294 0 : uint64_t successRateRequestVolume() const { return success_rate_request_volume_; } 295 0 : uint64_t successRateStdevFactor() const { return success_rate_stdev_factor_; } 296 0 : uint64_t failurePercentageThreshold() const { return failure_percentage_threshold_; } 297 0 : uint64_t failurePercentageMinimumHosts() const { return failure_percentage_minimum_hosts_; } 298 0 : uint64_t failurePercentageRequestVolume() const { return failure_percentage_request_volume_; } 299 0 : uint64_t enforcingConsecutive5xx() const { return enforcing_consecutive_5xx_; } 300 0 : uint64_t enforcingConsecutiveGatewayFailure() const { 301 0 : return enforcing_consecutive_gateway_failure_; 302 0 : } 303 0 : uint64_t enforcingSuccessRate() const { return enforcing_success_rate_; } 304 0 : uint64_t enforcingFailurePercentage() const { return enforcing_failure_percentage_; } 305 0 : uint64_t enforcingFailurePercentageLocalOrigin() const { 306 0 : return enforcing_failure_percentage_local_origin_; 307 0 : } 308 0 : bool splitExternalLocalOriginErrors() const { return split_external_local_origin_errors_; } 309 0 : uint64_t consecutiveLocalOriginFailure() const { return consecutive_local_origin_failure_; } 310 0 : uint64_t enforcingConsecutiveLocalOriginFailure() const { 311 0 : return enforcing_consecutive_local_origin_failure_; 312 0 : } 313 0 : uint64_t enforcingLocalOriginSuccessRate() const { return enforcing_local_origin_success_rate_; } 314 0 : uint64_t maxEjectionTimeMs() const { return max_ejection_time_ms_; } 315 0 : uint64_t maxEjectionTimeJitterMs() const { return max_ejection_time_jitter_ms_; } 316 0 : bool successfulActiveHealthCheckUnejectHost() const { 317 0 : return successful_active_health_check_uneject_host_; 318 0 : } 319 : 320 : private: 321 : const uint64_t interval_ms_; 322 : const uint64_t base_ejection_time_ms_; 323 : const uint64_t consecutive_5xx_; 324 : const uint64_t consecutive_gateway_failure_; 325 : const uint64_t max_ejection_percent_; 326 : const uint64_t success_rate_minimum_hosts_; 327 : const uint64_t success_rate_request_volume_; 328 : const uint64_t success_rate_stdev_factor_; 329 : const uint64_t failure_percentage_threshold_; 330 : const uint64_t failure_percentage_minimum_hosts_; 331 : const uint64_t failure_percentage_request_volume_; 332 : const uint64_t enforcing_consecutive_5xx_; 333 : const uint64_t enforcing_consecutive_gateway_failure_; 334 : const uint64_t enforcing_success_rate_; 335 : const uint64_t enforcing_failure_percentage_; 336 : const uint64_t enforcing_failure_percentage_local_origin_; 337 : const bool split_external_local_origin_errors_; 338 : const uint64_t consecutive_local_origin_failure_; 339 : const uint64_t enforcing_consecutive_local_origin_failure_; 340 : const uint64_t enforcing_local_origin_success_rate_; 341 : const uint64_t max_ejection_time_ms_; 342 : const uint64_t max_ejection_time_jitter_ms_; 343 : const bool successful_active_health_check_uneject_host_; 344 : 345 : static constexpr uint64_t DEFAULT_INTERVAL_MS = 10000; 346 : static constexpr uint64_t DEFAULT_BASE_EJECTION_TIME_MS = 30000; 347 : static constexpr uint64_t DEFAULT_CONSECUTIVE_5XX = 5; 348 : static constexpr uint64_t DEFAULT_CONSECUTIVE_GATEWAY_FAILURE = 5; 349 : static constexpr uint64_t DEFAULT_MAX_EJECTION_PERCENT = 10; 350 : static constexpr uint64_t DEFAULT_SUCCESS_RATE_MINIMUM_HOSTS = 5; 351 : static constexpr uint64_t DEFAULT_SUCCESS_RATE_REQUEST_VOLUME = 100; 352 : static constexpr uint64_t DEFAULT_SUCCESS_RATE_STDEV_FACTOR = 1900; 353 : static constexpr uint64_t DEFAULT_FAILURE_PERCENTAGE_THRESHOLD = 85; 354 : static constexpr uint64_t DEFAULT_FAILURE_PERCENTAGE_MINIMUM_HOSTS = 5; 355 : static constexpr uint64_t DEFAULT_FAILURE_PERCENTAGE_REQUEST_VOLUME = 50; 356 : static constexpr uint64_t DEFAULT_ENFORCING_CONSECUTIVE_5XX = 100; 357 : static constexpr uint64_t DEFAULT_ENFORCING_CONSECUTIVE_GATEWAY_FAILURE = 0; 358 : static constexpr uint64_t DEFAULT_ENFORCING_SUCCESS_RATE = 100; 359 : static constexpr uint64_t DEFAULT_ENFORCING_FAILURE_PERCENTAGE = 0; 360 : static constexpr uint64_t DEFAULT_ENFORCING_FAILURE_PERCENTAGE_LOCAL_ORIGIN = 0; 361 : static constexpr uint64_t DEFAULT_CONSECUTIVE_LOCAL_ORIGIN_FAILURE = 5; 362 : static constexpr uint64_t DEFAULT_ENFORCING_CONSECUTIVE_LOCAL_ORIGIN_FAILURE = 100; 363 : static constexpr uint64_t DEFAULT_ENFORCING_LOCAL_ORIGIN_SUCCESS_RATE = 100; 364 : static constexpr uint64_t DEFAULT_MAX_EJECTION_TIME_MS = 10 * DEFAULT_BASE_EJECTION_TIME_MS; 365 : static constexpr uint64_t DEFAULT_MAX_EJECTION_TIME_JITTER_MS = 0; 366 : }; 367 : 368 : /** 369 : * An implementation of an outlier detector. In the future we may support multiple outlier detection 370 : * implementations with different configuration. For now, as we iterate everything is contained 371 : * within this implementation. 372 : */ 373 : class DetectorImpl : public Detector, public std::enable_shared_from_this<DetectorImpl> { 374 : public: 375 : static absl::StatusOr<std::shared_ptr<DetectorImpl>> 376 : create(Cluster& cluster, const envoy::config::cluster::v3::OutlierDetection& config, 377 : Event::Dispatcher& dispatcher, Runtime::Loader& runtime, TimeSource& time_source, 378 : EventLoggerSharedPtr event_logger, Random::RandomGenerator& random); 379 : ~DetectorImpl() override; 380 : 381 : void onConsecutive5xx(HostSharedPtr host); 382 : void onConsecutiveGatewayFailure(HostSharedPtr host); 383 : void onConsecutiveLocalOriginFailure(HostSharedPtr host); 384 0 : Runtime::Loader& runtime() { return runtime_; } 385 0 : DetectorConfig& config() { return config_; } 386 : void unejectHost(HostSharedPtr host); 387 : 388 : // Upstream::Outlier::Detector 389 0 : void addChangedStateCb(ChangeStateCb cb) override { callbacks_.push_back(cb); } 390 : double 391 0 : successRateAverage(DetectorHostMonitor::SuccessRateMonitorType monitor_type) const override { 392 0 : return getSRNums(monitor_type).success_rate_average_; 393 0 : } 394 : double successRateEjectionThreshold( 395 0 : DetectorHostMonitor::SuccessRateMonitorType monitor_type) const override { 396 0 : return getSRNums(monitor_type).ejection_threshold_; 397 0 : } 398 : 399 : /** 400 : * This function returns pair of double values for success rate outlier detection. The pair 401 : * contains the average success rate of all valid hosts in the cluster and the ejection threshold. 402 : * If a host's success rate is under this threshold, the host is an outlier. 403 : * @param success_rate_sum is the sum of the data in the success_rate_data vector. 404 : * @param valid_success_rate_hosts is the vector containing the individual success rate data 405 : * points. 406 : * @return EjectionPair 407 : */ 408 : struct EjectionPair { 409 : double success_rate_average_; // average success rate of all valid hosts in the cluster 410 : double ejection_threshold_; // ejection threshold for the cluster 411 : }; 412 : static EjectionPair 413 : successRateEjectionThreshold(double success_rate_sum, 414 : const std::vector<HostSuccessRatePair>& valid_success_rate_hosts, 415 : double success_rate_stdev_factor); 416 : 417 0 : const absl::node_hash_map<HostSharedPtr, DetectorHostMonitorImpl*>& getHostMonitors() { 418 0 : return host_monitors_; 419 0 : } 420 : 421 : private: 422 : DetectorImpl(const Cluster& cluster, const envoy::config::cluster::v3::OutlierDetection& config, 423 : Event::Dispatcher& dispatcher, Runtime::Loader& runtime, TimeSource& time_source, 424 : EventLoggerSharedPtr event_logger, Random::RandomGenerator& random); 425 : 426 : void addHostMonitor(HostSharedPtr host); 427 : void armIntervalTimer(); 428 : void checkHostForUneject(HostSharedPtr host, DetectorHostMonitorImpl* monitor, MonotonicTime now); 429 : void ejectHost(HostSharedPtr host, envoy::data::cluster::v3::OutlierEjectionType type); 430 : static DetectionStats generateStats(Stats::Scope& scope); 431 : void initialize(Cluster& cluster); 432 : void onConsecutiveErrorWorker(HostSharedPtr host, 433 : envoy::data::cluster::v3::OutlierEjectionType type); 434 : void notifyMainThreadConsecutiveError(HostSharedPtr host, 435 : envoy::data::cluster::v3::OutlierEjectionType type); 436 : void onIntervalTimer(); 437 : void runCallbacks(HostSharedPtr host); 438 : bool enforceEjection(envoy::data::cluster::v3::OutlierEjectionType type); 439 : void updateEnforcedEjectionStats(envoy::data::cluster::v3::OutlierEjectionType type); 440 : void updateDetectedEjectionStats(envoy::data::cluster::v3::OutlierEjectionType type); 441 : void processSuccessRateEjections(DetectorHostMonitor::SuccessRateMonitorType monitor_type); 442 : 443 : // The helper to double write value and gauge. The gauge could be null value since because any 444 : // stat might be deactivated. 445 : class EjectionsActiveHelper { 446 : public: 447 0 : EjectionsActiveHelper(Envoy::Stats::Gauge& gauge) : ejections_active_ref_(gauge) {} 448 0 : void inc() { 449 0 : ejections_active_ref_.inc(); 450 0 : ++ejections_active_value_; 451 0 : } 452 0 : void dec() { 453 0 : ejections_active_ref_.dec(); 454 0 : --ejections_active_value_; 455 0 : } 456 0 : uint64_t value() { return ejections_active_value_.load(); } 457 : Envoy::Stats::Gauge& ejections_active_ref_; 458 : std::atomic<uint64_t> ejections_active_value_{0}; 459 : }; 460 : DetectorConfig config_; 461 : Event::Dispatcher& dispatcher_; 462 : Runtime::Loader& runtime_; 463 : TimeSource& time_source_; 464 : DetectionStats stats_; 465 : EjectionsActiveHelper ejections_active_helper_{stats_.ejections_active_}; 466 : Event::TimerPtr interval_timer_; 467 : std::list<ChangeStateCb> callbacks_; 468 : absl::node_hash_map<HostSharedPtr, DetectorHostMonitorImpl*> host_monitors_; 469 : EventLoggerSharedPtr event_logger_; 470 : Common::CallbackHandlePtr member_update_cb_; 471 : Random::RandomGenerator& random_generator_; 472 : 473 : // EjectionPair for external and local origin events. 474 : // When external/local origin events are not split, external_origin_sr_num_ are used for 475 : // both types of events: external and local. local_origin_sr_num_ is not used. 476 : // When external/local origin events are split, external_origin_sr_num_ are used only 477 : // for external events and local_origin_sr_num_ is used for local origin events. 478 : EjectionPair external_origin_sr_num_; 479 : EjectionPair local_origin_sr_num_; 480 : 481 0 : const EjectionPair& getSRNums(DetectorHostMonitor::SuccessRateMonitorType monitor_type) const { 482 0 : return (DetectorHostMonitor::SuccessRateMonitorType::ExternalOrigin == monitor_type) 483 0 : ? external_origin_sr_num_ 484 0 : : local_origin_sr_num_; 485 0 : } 486 0 : EjectionPair& getSRNums(DetectorHostMonitor::SuccessRateMonitorType monitor_type) { 487 0 : return const_cast<EjectionPair&>( 488 0 : static_cast<const DetectorImpl&>(*this).getSRNums(monitor_type)); 489 0 : } 490 : }; 491 : 492 : class EventLoggerImpl : public EventLogger { 493 : public: 494 : EventLoggerImpl(AccessLog::AccessLogManager& log_manager, const std::string& file_name, 495 : TimeSource& time_source) 496 : : file_(log_manager.createAccessLog( 497 : Filesystem::FilePathAndType{Filesystem::DestinationType::File, file_name})), 498 0 : time_source_(time_source) {} 499 : 500 : // Upstream::Outlier::EventLogger 501 : void logEject(const HostDescriptionConstSharedPtr& host, Detector& detector, 502 : envoy::data::cluster::v3::OutlierEjectionType type, bool enforced) override; 503 : 504 : void logUneject(const HostDescriptionConstSharedPtr& host) override; 505 : 506 : private: 507 : void setCommonEventParams(envoy::data::cluster::v3::OutlierDetectionEvent& event, 508 : const HostDescriptionConstSharedPtr& host, 509 : absl::optional<MonotonicTime> time); 510 : 511 : AccessLog::AccessLogFileSharedPtr file_; 512 : TimeSource& time_source_; 513 : }; 514 : 515 : } // namespace Outlier 516 : } // namespace Upstream 517 : } // namespace Envoy