/proc/self/cwd/source/server/overload_manager_impl.h
Line | Count | Source (jump to first uncovered line) |
1 | | #pragma once |
2 | | |
3 | | #include <chrono> |
4 | | #include <vector> |
5 | | |
6 | | #include "envoy/api/api.h" |
7 | | #include "envoy/config/overload/v3/overload.pb.h" |
8 | | #include "envoy/event/dispatcher.h" |
9 | | #include "envoy/event/scaled_range_timer_manager.h" |
10 | | #include "envoy/protobuf/message_validator.h" |
11 | | #include "envoy/server/options.h" |
12 | | #include "envoy/server/overload/overload_manager.h" |
13 | | #include "envoy/server/resource_monitor.h" |
14 | | #include "envoy/stats/scope.h" |
15 | | #include "envoy/stats/stats.h" |
16 | | #include "envoy/thread_local/thread_local.h" |
17 | | |
18 | | #include "source/common/common/logger.h" |
19 | | #include "source/common/event/scaled_range_timer_manager_impl.h" |
20 | | |
21 | | #include "absl/container/node_hash_map.h" |
22 | | #include "absl/container/node_hash_set.h" |
23 | | |
24 | | namespace Envoy { |
25 | | namespace Server { |
26 | | |
27 | | /** |
28 | | * Trigger encapsulates translating resource pressure into the corresponding |
29 | | * OverloadActionState. |
30 | | */ |
31 | | class Trigger { |
32 | | public: |
33 | 0 | virtual ~Trigger() = default; |
34 | | |
35 | | // Updates the current value of the metric and returns whether the trigger has changed state. |
36 | | virtual bool updateValue(double value) PURE; |
37 | | |
38 | | // Returns the action state for the trigger. |
39 | | virtual OverloadActionState actionState() const PURE; |
40 | | }; |
41 | | |
42 | | class OverloadAction { |
43 | | public: |
44 | | static absl::StatusOr<std::unique_ptr<OverloadAction>> |
45 | | create(const envoy::config::overload::v3::OverloadAction& config, Stats::Scope& stats_scope); |
46 | | |
47 | | // Updates the current pressure for the given resource and returns whether the action |
48 | | // has changed state. |
49 | | bool updateResourcePressure(const std::string& name, double pressure); |
50 | | |
51 | | // Returns the current action state, which is the max state across all registered triggers. |
52 | | OverloadActionState getState() const; |
53 | | |
54 | | private: |
55 | | OverloadAction(const envoy::config::overload::v3::OverloadAction& config, |
56 | | Stats::Scope& stats_scope, absl::Status& creation_status); |
57 | | |
58 | | using TriggerPtr = std::unique_ptr<Trigger>; |
59 | | absl::node_hash_map<std::string, TriggerPtr> triggers_; |
60 | | OverloadActionState state_; |
61 | | Stats::Gauge& active_gauge_; |
62 | | Stats::Gauge& scale_percent_gauge_; |
63 | | }; |
64 | | |
65 | | /** |
66 | | * Implement a LoadShedPoint which is a particular point in the connection / |
67 | | * request lifecycle where we can either abort or continue the given work. |
68 | | */ |
69 | | class LoadShedPointImpl : public LoadShedPoint { |
70 | | public: |
71 | | static absl::StatusOr<std::unique_ptr<LoadShedPointImpl>> |
72 | | create(const envoy::config::overload::v3::LoadShedPoint& config, Stats::Scope& stats_scope, |
73 | | Random::RandomGenerator& random_generator); |
74 | | LoadShedPointImpl(const LoadShedPointImpl&) = delete; |
75 | | LoadShedPointImpl& operator=(const LoadShedPointImpl&) = delete; |
76 | | |
77 | | bool shouldShedLoad() override; |
78 | | |
79 | | /** |
80 | | * Provide resource updates for the LoadShedPoint. It will update the |
81 | | * probability of shedding load at this point atomically if needed. |
82 | | * @param resource_name - the name of the resource that has been updated. |
83 | | * @param resource_utilization - utilization of this resource. A ratio of the |
84 | | * current usage / the resource limit. |
85 | | */ |
86 | | void updateResource(absl::string_view resource_name, double resource_utilization); |
87 | | |
88 | | private: |
89 | | LoadShedPointImpl(const envoy::config::overload::v3::LoadShedPoint& config, |
90 | | Stats::Scope& stats_scope, Random::RandomGenerator& random_generator, |
91 | | absl::Status& creation_status); |
92 | | using TriggerPtr = std::unique_ptr<Trigger>; |
93 | | |
94 | | // Helper to handle updating the probability to shed load given the triggers. |
95 | | void updateProbabilityShedLoad(); |
96 | | |
97 | | absl::flat_hash_map<std::string, TriggerPtr> triggers_; |
98 | | std::atomic<float> probability_shed_load_{0}; |
99 | | Stats::Gauge& scale_percent_; |
100 | | Stats::Counter& shed_load_counter_; |
101 | | Random::RandomGenerator& random_generator_; |
102 | | }; |
103 | | |
104 | | // Simple table that converts strings into Symbol instances. Symbols are guaranteed to start at 0 |
105 | | // and be indexed sequentially. |
106 | | class NamedOverloadActionSymbolTable { |
107 | | public: |
108 | | class Symbol { |
109 | | public: |
110 | | // Allow copy construction everywhere. |
111 | | Symbol(const Symbol&) = default; |
112 | | |
113 | | // Returns the index of the symbol in the table. |
114 | 0 | size_t index() const { return index_; } |
115 | | |
116 | | // Support use as a map key. |
117 | 0 | bool operator==(const Symbol& other) const { return other.index_ == index_; } |
118 | | |
119 | | // Support absl::Hash. |
120 | | template <typename H> |
121 | 22.2k | friend H AbslHashValue(H h, const Symbol& s) { // NOLINT(readability-identifier-naming) |
122 | 22.2k | return H::combine(std::move(h), s.index_); |
123 | 22.2k | } |
124 | | |
125 | | private: |
126 | | friend class NamedOverloadActionSymbolTable; |
127 | | // Only the symbol table class can create Symbol instances from indices. |
128 | 22.2k | explicit Symbol(size_t index) : index_(index) {} |
129 | | |
130 | | size_t index_; |
131 | | }; |
132 | | |
133 | | // Finds an existing or adds a new entry for the given name. |
134 | | Symbol get(absl::string_view name); |
135 | | |
136 | | // Returns the symbol for the name if there is one, otherwise nullopt. |
137 | | absl::optional<Symbol> lookup(absl::string_view string) const; |
138 | | |
139 | | // Translates a symbol back into a name. |
140 | | const absl::string_view name(Symbol symbol) const; |
141 | | |
142 | | // Returns the number of symbols in the table. All symbols are guaranteed to have an index less |
143 | | // than size(). |
144 | 3.95k | size_t size() const { return table_.size(); } |
145 | | |
146 | | private: |
147 | | absl::flat_hash_map<std::string, size_t> table_; |
148 | | std::vector<std::string> names_; |
149 | | }; |
150 | | |
151 | | class ThreadLocalOverloadStateImpl; |
152 | | |
153 | | class OverloadManagerImpl : Logger::Loggable<Logger::Id::main>, public OverloadManager { |
154 | | public: |
155 | | static absl::StatusOr<std::unique_ptr<OverloadManagerImpl>> |
156 | | create(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope, |
157 | | ThreadLocal::SlotAllocator& slot_allocator, |
158 | | const envoy::config::overload::v3::OverloadManager& config, |
159 | | ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api, |
160 | | const Server::Options& options); |
161 | | |
162 | | // Server::OverloadManager |
163 | | void start() override; |
164 | | bool registerForAction(const std::string& action, Event::Dispatcher& dispatcher, |
165 | | OverloadActionCb callback) override; |
166 | | ThreadLocalOverloadState& getThreadLocalOverloadState() override; |
167 | | LoadShedPoint* getLoadShedPoint(absl::string_view point_name) override; |
168 | | Event::ScaledRangeTimerManagerFactory scaledTimerFactory() override; |
169 | | void stop() override; |
170 | | |
171 | | protected: |
172 | | OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope, |
173 | | ThreadLocal::SlotAllocator& slot_allocator, |
174 | | const envoy::config::overload::v3::OverloadManager& config, |
175 | | ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api, |
176 | | const Server::Options& options, absl::Status& creation_status); |
177 | | |
178 | | // Factory for timer managers. This allows test-only subclasses to inject a mock implementation. |
179 | | virtual Event::ScaledRangeTimerManagerPtr createScaledRangeTimerManager( |
180 | | Event::Dispatcher& dispatcher, |
181 | | const Event::ScaledTimerTypeMapConstSharedPtr& timer_minimums) const; |
182 | | |
183 | | private: |
184 | | using FlushEpochId = uint64_t; |
185 | | class Resource : public ResourceUpdateCallbacks { |
186 | | public: |
187 | | Resource(const std::string& name, ResourceMonitorPtr monitor, OverloadManagerImpl& manager, |
188 | | Stats::Scope& stats_scope); |
189 | | |
190 | | // ResourceMonitor::ResourceUpdateCallbacks |
191 | | void onSuccess(const ResourceUsage& usage) override; |
192 | | void onFailure(const EnvoyException& error) override; |
193 | | |
194 | | void update(FlushEpochId flush_epoch); |
195 | | |
196 | | private: |
197 | | const std::string name_; |
198 | | ResourceMonitorPtr monitor_; |
199 | | OverloadManagerImpl& manager_; |
200 | | bool pending_update_{false}; |
201 | | FlushEpochId flush_epoch_; |
202 | | Stats::Gauge& pressure_gauge_; |
203 | | Stats::Counter& failed_updates_counter_; |
204 | | Stats::Counter& skipped_updates_counter_; |
205 | | }; |
206 | | |
207 | | struct ActionCallback { |
208 | | ActionCallback(Event::Dispatcher& dispatcher, OverloadActionCb callback) |
209 | 0 | : dispatcher_(dispatcher), callback_(callback) {} |
210 | | Event::Dispatcher& dispatcher_; |
211 | | OverloadActionCb callback_; |
212 | | }; |
213 | | |
214 | | void updateResourcePressure(const std::string& resource, double pressure, |
215 | | FlushEpochId flush_epoch); |
216 | | // Flushes any enqueued action state updates to all worker threads. |
217 | | void flushResourceUpdates(); |
218 | | |
219 | | bool started_{false}; |
220 | | Event::Dispatcher& dispatcher_; |
221 | | TimeSource& time_source_; |
222 | | ThreadLocal::TypedSlot<ThreadLocalOverloadStateImpl> tls_; |
223 | | NamedOverloadActionSymbolTable action_symbol_table_; |
224 | | const std::chrono::milliseconds refresh_interval_; |
225 | | // Tracks the latency between resource refresh updates. |
226 | | Stats::Histogram& refresh_interval_delays_; |
227 | | // Tracks when we last ran the resource monitor refresh loop. |
228 | | // For the first measurement, we use the time when When the Overload Manager first starts. |
229 | | MonotonicTime time_resources_last_measured_; |
230 | | Event::TimerPtr timer_; |
231 | | absl::node_hash_map<std::string, Resource> resources_; |
232 | | std::shared_ptr<absl::node_hash_map<OverloadProactiveResourceName, ProactiveResource>> |
233 | | proactive_resources_; |
234 | | |
235 | | absl::node_hash_map<NamedOverloadActionSymbolTable::Symbol, std::unique_ptr<OverloadAction>> |
236 | | actions_; |
237 | | |
238 | | absl::flat_hash_map<std::string, std::unique_ptr<LoadShedPointImpl>> loadshed_points_; |
239 | | |
240 | | Event::ScaledTimerTypeMapConstSharedPtr timer_minimums_; |
241 | | |
242 | | absl::flat_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadActionState> |
243 | | state_updates_to_flush_; |
244 | | absl::flat_hash_map<ActionCallback*, OverloadActionState> callbacks_to_flush_; |
245 | | FlushEpochId flush_epoch_ = 0; |
246 | | uint64_t flush_awaiting_updates_ = 0; |
247 | | |
248 | | using ResourceToActionMap = |
249 | | std::unordered_multimap<std::string, NamedOverloadActionSymbolTable::Symbol>; |
250 | | ResourceToActionMap resource_to_actions_; |
251 | | |
252 | | using ActionToCallbackMap = |
253 | | std::unordered_multimap<NamedOverloadActionSymbolTable::Symbol, ActionCallback, |
254 | | absl::Hash<NamedOverloadActionSymbolTable::Symbol>>; |
255 | | ActionToCallbackMap action_to_callbacks_; |
256 | | }; |
257 | | |
258 | | } // namespace Server |
259 | | } // namespace Envoy |