1
#pragma once
2

            
3
#include <chrono>
4
#include <cstdint>
5
#include <functional>
6
#include <memory>
7

            
8
#include "envoy/common/pure.h"
9
#include "envoy/common/time.h"
10
#include "envoy/data/cluster/v3/outlier_detection_event.pb.h"
11

            
12
#include "absl/types/optional.h"
13

            
14
namespace Envoy {
15
namespace Upstream {
16

            
17
class Host;
18
using HostSharedPtr = std::shared_ptr<Host>;
19

            
20
class HostDescription;
21
using HostDescriptionConstSharedPtr = std::shared_ptr<const HostDescription>;
22

            
23
namespace Outlier {
24

            
25
/**
26
 * Non-HTTP result of requests/operations.
27
 */
28
enum class Result {
29
  // Local origin errors detected by Envoy.
30
  LocalOriginTimeout,             // Timed out while connecting or executing a request.
31
  LocalOriginConnectFailed,       // Remote host rejected the connection.
32
  LocalOriginConnectSuccess,      // Successfully established a connection to upstream host.
33
                                  // Use this code when there is another protocol on top of
34
                                  // transport protocol. For example HTTP runs on top of tcp.
35
                                  // The same for redis. It first establishes TCP and then runs
36
                                  // a transaction.
37
  LocalOriginConnectSuccessFinal, // Successfully established a connection to upstream host
38
                                  // Use this code when there is no other protocol on top of the
39
                                  // protocol used by a filter. For example tcp_proxy filter
40
                                  // serves only tcp level. There is no other protocol on top of
41
                                  // tcp which the tcp_proxy filter is aware of.
42

            
43
  // The entries below only make sense when Envoy understands requests/responses for the
44
  // protocol being proxied. They do not make sense for TcpProxy, for example.
45
  // External origin errors.
46
  ExtOriginRequestFailed,  // The server indicated it cannot process a request.
47
  ExtOriginRequestSuccess, // Request was completed successfully.
48
  ExtOriginRequestDegraded // The server is degraded.
49
};
50

            
51
/**
52
 * Monitor for per host data. Proxy filters should send pertinent data when available.
53
 */
54
class DetectorHostMonitor {
55
public:
56
  // Types of Success Rate monitors.
57
  enum class SuccessRateMonitorType { ExternalOrigin, LocalOrigin };
58

            
59
152731
  virtual ~DetectorHostMonitor() = default;
60

            
61
  /**
62
   * @return the number of times this host has been ejected.
63
   */
64
  virtual uint32_t numEjections() PURE;
65

            
66
  /**
67
   * Add a non-HTTP result for a host.
68
   * Some non-HTTP codes like TIMEOUT may require special mapping to HTTP code
69
   * and such code may be passed as optional parameter.
70
   */
71
  virtual void putResult(Result result, absl::optional<uint64_t> code) PURE;
72

            
73
  /**
74
   * Wrapper around putResult with 2 params when mapping to HTTP code is not
75
   * required.
76
   */
77
53654
  void putResult(Result result) { putResult(result, absl::nullopt); }
78

            
79
  /**
80
   * Add a response time for a host (in this case response time is generic and might be used for
81
   * different operations including HTTP, Mongo, Redis, etc.).
82
   */
83
  virtual void putResponseTime(std::chrono::milliseconds time) PURE;
84

            
85
  /**
86
   * Get the time of last ejection.
87
   * @return the last time this host was ejected, if the host has been ejected previously.
88
   */
89
  virtual const absl::optional<MonotonicTime>& lastEjectionTime() PURE;
90

            
91
  /**
92
   * Get the time of last unejection.
93
   * @return the last time this host was unejected, if the host has been unejected previously.
94
   */
95
  virtual const absl::optional<MonotonicTime>& lastUnejectionTime() PURE;
96

            
97
  /**
98
   * @return the success rate of the host in the last calculated interval, in the range 0-100.
99
   *         -1 means that the host did not have enough request volume to calculate success rate
100
   *         or the cluster did not have enough hosts to run through success rate outlier ejection.
101
   * @param type specifies for which Success Rate Monitor the success rate value should be returned.
102
   *         If the outlier detector is configured not to split external and local origin errors,
103
   *         ExternalOrigin type returns success rate for all types of errors: external and local
104
   * origin and LocalOrigin type returns -1. If the outlier detector is configured to split external
105
   * and local origin errors, ExternalOrigin type returns success rate for external origin errors
106
   * and LocalOrigin type returns success rate for local origin errors.
107
   */
108
  virtual double successRate(SuccessRateMonitorType type) const PURE;
109
};
110

            
111
using DetectorHostMonitorPtr = std::unique_ptr<DetectorHostMonitor>;
112

            
113
/**
114
 * Interface for an outlier detection engine. Uses per host data to determine which hosts in a
115
 * cluster are outliers and should be ejected.
116
 */
117
class Detector {
118
public:
119
118
  virtual ~Detector() = default;
120

            
121
  /**
122
   * Outlier detection change state callback.
123
   */
124
  using ChangeStateCb = std::function<void(const HostSharedPtr& host)>;
125

            
126
  /**
127
   * Add a changed state callback to the detector. The callback will be called whenever any host
128
   * changes state (either ejected or brought back in) due to outlier status.
129
   */
130
  virtual void addChangedStateCb(ChangeStateCb cb) PURE;
131

            
132
  /**
133
   * Returns the average success rate of the hosts in the Detector for the last aggregation
134
   * interval.
135
   * @return the average success rate, or -1 if there were not enough hosts with enough request
136
   *         volume to proceed with success rate based outlier ejection.
137
   * @param type - see DetectorHostMonitor::successRate.
138
   */
139
  virtual double successRateAverage(DetectorHostMonitor::SuccessRateMonitorType) const PURE;
140

            
141
  /**
142
   * Returns the success rate threshold used in the last interval. The threshold is used to eject
143
   * hosts based on their success rate.
144
   * @return the threshold, or -1 if there were not enough hosts with enough request volume to
145
   *         proceed with success rate based outlier ejection.
146
   */
147
  virtual double
148
      successRateEjectionThreshold(DetectorHostMonitor::SuccessRateMonitorType) const PURE;
149
};
150

            
151
using DetectorSharedPtr = std::shared_ptr<Detector>;
152

            
153
/**
154
 * Sink for outlier detection event logs.
155
 */
156
class EventLogger {
157
public:
158
60
  virtual ~EventLogger() = default;
159

            
160
  /**
161
   * Log an ejection event.
162
   * @param host supplies the host that generated the event.
163
   * @param detector supplies the detector that is doing the ejection.
164
   * @param type supplies the type of the event.
165
   * @param enforced is true if the ejection took place; false, if only logging took place.
166
   */
167
  virtual void logEject(const HostDescriptionConstSharedPtr& host, Detector& detector,
168
                        envoy::data::cluster::v3::OutlierEjectionType type, bool enforced) PURE;
169

            
170
  /**
171
   * Log an unejection event.
172
   * @param host supplies the host that generated the event.
173
   */
174
  virtual void logUneject(const HostDescriptionConstSharedPtr& host) PURE;
175
};
176

            
177
using EventLoggerSharedPtr = std::shared_ptr<EventLogger>;
178

            
179
} // namespace Outlier
180
} // namespace Upstream
181
} // namespace Envoy