/proc/self/cwd/source/server/overload_manager_impl.h
Line | Count | Source (jump to first uncovered line) |
1 | | #pragma once |
2 | | |
3 | | #include <chrono> |
4 | | #include <vector> |
5 | | |
6 | | #include "envoy/api/api.h" |
7 | | #include "envoy/config/overload/v3/overload.pb.h" |
8 | | #include "envoy/event/dispatcher.h" |
9 | | #include "envoy/event/scaled_range_timer_manager.h" |
10 | | #include "envoy/protobuf/message_validator.h" |
11 | | #include "envoy/server/options.h" |
12 | | #include "envoy/server/overload/overload_manager.h" |
13 | | #include "envoy/server/resource_monitor.h" |
14 | | #include "envoy/stats/scope.h" |
15 | | #include "envoy/stats/stats.h" |
16 | | #include "envoy/thread_local/thread_local.h" |
17 | | |
18 | | #include "source/common/common/logger.h" |
19 | | #include "source/common/event/scaled_range_timer_manager_impl.h" |
20 | | |
21 | | #include "absl/container/node_hash_map.h" |
22 | | #include "absl/container/node_hash_set.h" |
23 | | |
24 | | namespace Envoy { |
25 | | namespace Server { |
26 | | |
27 | | /** |
28 | | * Trigger encapsulates translating resource pressure into the corresponding |
29 | | * OverloadActionState. |
30 | | */ |
31 | | class Trigger { |
32 | | public: |
33 | 0 | virtual ~Trigger() = default; |
34 | | |
35 | | // Updates the current value of the metric and returns whether the trigger has changed state. |
36 | | virtual bool updateValue(double value) PURE; |
37 | | |
38 | | // Returns the action state for the trigger. |
39 | | virtual OverloadActionState actionState() const PURE; |
40 | | }; |
41 | | |
42 | | class OverloadAction { |
43 | | public: |
44 | | OverloadAction(const envoy::config::overload::v3::OverloadAction& config, |
45 | | Stats::Scope& stats_scope); |
46 | | |
47 | | // Updates the current pressure for the given resource and returns whether the action |
48 | | // has changed state. |
49 | | bool updateResourcePressure(const std::string& name, double pressure); |
50 | | |
51 | | // Returns the current action state, which is the max state across all registered triggers. |
52 | | OverloadActionState getState() const; |
53 | | |
54 | | private: |
55 | | using TriggerPtr = std::unique_ptr<Trigger>; |
56 | | absl::node_hash_map<std::string, TriggerPtr> triggers_; |
57 | | OverloadActionState state_; |
58 | | Stats::Gauge& active_gauge_; |
59 | | Stats::Gauge& scale_percent_gauge_; |
60 | | }; |
61 | | |
62 | | /** |
63 | | * Implement a LoadShedPoint which is a particular point in the connection / |
64 | | * request lifecycle where we can either abort or continue the given work. |
65 | | */ |
66 | | class LoadShedPointImpl : public LoadShedPoint { |
67 | | public: |
68 | | LoadShedPointImpl(const envoy::config::overload::v3::LoadShedPoint& config, |
69 | | Stats::Scope& stats_scope, Random::RandomGenerator& random_generator); |
70 | | LoadShedPointImpl(const LoadShedPointImpl&) = delete; |
71 | | LoadShedPointImpl& operator=(const LoadShedPointImpl&) = delete; |
72 | | |
73 | | bool shouldShedLoad() override; |
74 | | |
75 | | /** |
76 | | * Provide resource updates for the LoadShedPoint. It will update the |
77 | | * probability of shedding load at this point atomically if needed. |
78 | | * @param resource_name - the name of the resource that has been updated. |
79 | | * @param resource_utilization - utilization of this resource. A ratio of the |
80 | | * current usage / the resource limit. |
81 | | */ |
82 | | void updateResource(absl::string_view resource_name, double resource_utilization); |
83 | | |
84 | | private: |
85 | | using TriggerPtr = std::unique_ptr<Trigger>; |
86 | | |
87 | | // Helper to handle updating the probability to shed load given the triggers. |
88 | | void updateProbabilityShedLoad(); |
89 | | |
90 | | absl::flat_hash_map<std::string, TriggerPtr> triggers_; |
91 | | std::atomic<float> probability_shed_load_{0}; |
92 | | Stats::Gauge& scale_percent_; |
93 | | Random::RandomGenerator& random_generator_; |
94 | | }; |
95 | | |
96 | | // Simple table that converts strings into Symbol instances. Symbols are guaranteed to start at 0 |
97 | | // and be indexed sequentially. |
98 | | class NamedOverloadActionSymbolTable { |
99 | | public: |
100 | | class Symbol { |
101 | | public: |
102 | | // Allow copy construction everywhere. |
103 | | Symbol(const Symbol&) = default; |
104 | | |
105 | | // Returns the index of the symbol in the table. |
106 | 0 | size_t index() const { return index_; } |
107 | | |
108 | | // Support use as a map key. |
109 | 0 | bool operator==(const Symbol& other) const { return other.index_ == index_; } |
110 | | |
111 | | // Support absl::Hash. |
112 | | template <typename H> |
113 | 26.6k | friend H AbslHashValue(H h, const Symbol& s) { // NOLINT(readability-identifier-naming) |
114 | 26.6k | return H::combine(std::move(h), s.index_); |
115 | 26.6k | } |
116 | | |
117 | | private: |
118 | | friend class NamedOverloadActionSymbolTable; |
119 | | // Only the symbol table class can create Symbol instances from indices. |
120 | 26.6k | explicit Symbol(size_t index) : index_(index) {} |
121 | | |
122 | | size_t index_; |
123 | | }; |
124 | | |
125 | | // Finds an existing or adds a new entry for the given name. |
126 | | Symbol get(absl::string_view name); |
127 | | |
128 | | // Returns the symbol for the name if there is one, otherwise nullopt. |
129 | | absl::optional<Symbol> lookup(absl::string_view string) const; |
130 | | |
131 | | // Translates a symbol back into a name. |
132 | | const absl::string_view name(Symbol symbol) const; |
133 | | |
134 | | // Returns the number of symbols in the table. All symbols are guaranteed to have an index less |
135 | | // than size(). |
136 | 5.29k | size_t size() const { return table_.size(); } |
137 | | |
138 | | private: |
139 | | absl::flat_hash_map<std::string, size_t> table_; |
140 | | std::vector<std::string> names_; |
141 | | }; |
142 | | |
143 | | class ThreadLocalOverloadStateImpl; |
144 | | |
145 | | class OverloadManagerImpl : Logger::Loggable<Logger::Id::main>, public OverloadManager { |
146 | | public: |
147 | | OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope, |
148 | | ThreadLocal::SlotAllocator& slot_allocator, |
149 | | const envoy::config::overload::v3::OverloadManager& config, |
150 | | ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api, |
151 | | const Server::Options& options); |
152 | | |
153 | | // Server::OverloadManager |
154 | | void start() override; |
155 | | bool registerForAction(const std::string& action, Event::Dispatcher& dispatcher, |
156 | | OverloadActionCb callback) override; |
157 | | ThreadLocalOverloadState& getThreadLocalOverloadState() override; |
158 | | LoadShedPoint* getLoadShedPoint(absl::string_view point_name) override; |
159 | | Event::ScaledRangeTimerManagerFactory scaledTimerFactory() override; |
160 | | |
161 | | // Stop the overload manager timer and wait for any pending resource updates to complete. |
162 | | // After this returns, overload manager clients should not receive any more callbacks |
163 | | // about overload state changes. |
164 | | void stop(); |
165 | | |
166 | | protected: |
167 | | // Factory for timer managers. This allows test-only subclasses to inject a mock implementation. |
168 | | virtual Event::ScaledRangeTimerManagerPtr createScaledRangeTimerManager( |
169 | | Event::Dispatcher& dispatcher, |
170 | | const Event::ScaledTimerTypeMapConstSharedPtr& timer_minimums) const; |
171 | | |
172 | | private: |
173 | | using FlushEpochId = uint64_t; |
174 | | class Resource : public ResourceUpdateCallbacks { |
175 | | public: |
176 | | Resource(const std::string& name, ResourceMonitorPtr monitor, OverloadManagerImpl& manager, |
177 | | Stats::Scope& stats_scope); |
178 | | |
179 | | // ResourceMonitor::ResourceUpdateCallbacks |
180 | | void onSuccess(const ResourceUsage& usage) override; |
181 | | void onFailure(const EnvoyException& error) override; |
182 | | |
183 | | void update(FlushEpochId flush_epoch); |
184 | | |
185 | | private: |
186 | | const std::string name_; |
187 | | ResourceMonitorPtr monitor_; |
188 | | OverloadManagerImpl& manager_; |
189 | | bool pending_update_{false}; |
190 | | FlushEpochId flush_epoch_; |
191 | | Stats::Gauge& pressure_gauge_; |
192 | | Stats::Counter& failed_updates_counter_; |
193 | | Stats::Counter& skipped_updates_counter_; |
194 | | }; |
195 | | |
196 | | struct ActionCallback { |
197 | | ActionCallback(Event::Dispatcher& dispatcher, OverloadActionCb callback) |
198 | 0 | : dispatcher_(dispatcher), callback_(callback) {} |
199 | | Event::Dispatcher& dispatcher_; |
200 | | OverloadActionCb callback_; |
201 | | }; |
202 | | |
203 | | void updateResourcePressure(const std::string& resource, double pressure, |
204 | | FlushEpochId flush_epoch); |
205 | | // Flushes any enqueued action state updates to all worker threads. |
206 | | void flushResourceUpdates(); |
207 | | |
208 | | bool started_{false}; |
209 | | Event::Dispatcher& dispatcher_; |
210 | | TimeSource& time_source_; |
211 | | ThreadLocal::TypedSlot<ThreadLocalOverloadStateImpl> tls_; |
212 | | NamedOverloadActionSymbolTable action_symbol_table_; |
213 | | const std::chrono::milliseconds refresh_interval_; |
214 | | // Tracks the latency between resource refresh updates. |
215 | | Stats::Histogram& refresh_interval_delays_; |
216 | | // Tracks when we last ran the resource monitor refresh loop. |
217 | | // For the first measurement, we use the time when When the Overload Manager first starts. |
218 | | MonotonicTime time_resources_last_measured_; |
219 | | Event::TimerPtr timer_; |
220 | | absl::node_hash_map<std::string, Resource> resources_; |
221 | | std::shared_ptr<absl::node_hash_map<OverloadProactiveResourceName, ProactiveResource>> |
222 | | proactive_resources_; |
223 | | |
224 | | absl::node_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadAction> actions_; |
225 | | |
226 | | absl::flat_hash_map<std::string, std::unique_ptr<LoadShedPointImpl>> loadshed_points_; |
227 | | |
228 | | Event::ScaledTimerTypeMapConstSharedPtr timer_minimums_; |
229 | | |
230 | | absl::flat_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadActionState> |
231 | | state_updates_to_flush_; |
232 | | absl::flat_hash_map<ActionCallback*, OverloadActionState> callbacks_to_flush_; |
233 | | FlushEpochId flush_epoch_ = 0; |
234 | | uint64_t flush_awaiting_updates_ = 0; |
235 | | |
236 | | using ResourceToActionMap = |
237 | | std::unordered_multimap<std::string, NamedOverloadActionSymbolTable::Symbol>; |
238 | | ResourceToActionMap resource_to_actions_; |
239 | | |
240 | | using ActionToCallbackMap = |
241 | | std::unordered_multimap<NamedOverloadActionSymbolTable::Symbol, ActionCallback, |
242 | | absl::Hash<NamedOverloadActionSymbolTable::Symbol>>; |
243 | | ActionToCallbackMap action_to_callbacks_; |
244 | | }; |
245 | | |
246 | | } // namespace Server |
247 | | } // namespace Envoy |