Line data Source code
1 : #pragma once 2 : 3 : #include <chrono> 4 : #include <vector> 5 : 6 : #include "envoy/api/api.h" 7 : #include "envoy/config/overload/v3/overload.pb.h" 8 : #include "envoy/event/dispatcher.h" 9 : #include "envoy/event/scaled_range_timer_manager.h" 10 : #include "envoy/protobuf/message_validator.h" 11 : #include "envoy/server/options.h" 12 : #include "envoy/server/overload/overload_manager.h" 13 : #include "envoy/server/resource_monitor.h" 14 : #include "envoy/stats/scope.h" 15 : #include "envoy/stats/stats.h" 16 : #include "envoy/thread_local/thread_local.h" 17 : 18 : #include "source/common/common/logger.h" 19 : #include "source/common/event/scaled_range_timer_manager_impl.h" 20 : 21 : #include "absl/container/node_hash_map.h" 22 : #include "absl/container/node_hash_set.h" 23 : 24 : namespace Envoy { 25 : namespace Server { 26 : 27 : /** 28 : * Trigger encapsulates translating resource pressure into the corresponding 29 : * OverloadActionState. 30 : */ 31 : class Trigger { 32 : public: 33 0 : virtual ~Trigger() = default; 34 : 35 : // Updates the current value of the metric and returns whether the trigger has changed state. 36 : virtual bool updateValue(double value) PURE; 37 : 38 : // Returns the action state for the trigger. 39 : virtual OverloadActionState actionState() const PURE; 40 : }; 41 : 42 : class OverloadAction { 43 : public: 44 : OverloadAction(const envoy::config::overload::v3::OverloadAction& config, 45 : Stats::Scope& stats_scope); 46 : 47 : // Updates the current pressure for the given resource and returns whether the action 48 : // has changed state. 49 : bool updateResourcePressure(const std::string& name, double pressure); 50 : 51 : // Returns the current action state, which is the max state across all registered triggers. 52 : OverloadActionState getState() const; 53 : 54 : private: 55 : using TriggerPtr = std::unique_ptr<Trigger>; 56 : absl::node_hash_map<std::string, TriggerPtr> triggers_; 57 : OverloadActionState state_; 58 : Stats::Gauge& active_gauge_; 59 : Stats::Gauge& scale_percent_gauge_; 60 : }; 61 : 62 : /** 63 : * Implement a LoadShedPoint which is a particular point in the connection / 64 : * request lifecycle where we can either abort or continue the given work. 65 : */ 66 : class LoadShedPointImpl : public LoadShedPoint { 67 : public: 68 : LoadShedPointImpl(const envoy::config::overload::v3::LoadShedPoint& config, 69 : Stats::Scope& stats_scope, Random::RandomGenerator& random_generator); 70 : LoadShedPointImpl(const LoadShedPointImpl&) = delete; 71 : LoadShedPointImpl& operator=(const LoadShedPointImpl&) = delete; 72 : 73 : bool shouldShedLoad() override; 74 : 75 : /** 76 : * Provide resource updates for the LoadShedPoint. It will update the 77 : * probability of shedding load at this point atomically if needed. 78 : * @param resource_name - the name of the resource that has been updated. 79 : * @param resource_utilization - utilization of this resource. A ratio of the 80 : * current usage / the resource limit. 81 : */ 82 : void updateResource(absl::string_view resource_name, double resource_utilization); 83 : 84 : private: 85 : using TriggerPtr = std::unique_ptr<Trigger>; 86 : 87 : // Helper to handle updating the probability to shed load given the triggers. 88 : void updateProbabilityShedLoad(); 89 : 90 : absl::flat_hash_map<std::string, TriggerPtr> triggers_; 91 : std::atomic<float> probability_shed_load_{0}; 92 : Stats::Gauge& scale_percent_; 93 : Random::RandomGenerator& random_generator_; 94 : }; 95 : 96 : // Simple table that converts strings into Symbol instances. Symbols are guaranteed to start at 0 97 : // and be indexed sequentially. 98 : class NamedOverloadActionSymbolTable { 99 : public: 100 : class Symbol { 101 : public: 102 : // Allow copy construction everywhere. 103 : Symbol(const Symbol&) = default; 104 : 105 : // Returns the index of the symbol in the table. 106 0 : size_t index() const { return index_; } 107 : 108 : // Support use as a map key. 109 0 : bool operator==(const Symbol& other) const { return other.index_ == index_; } 110 : 111 : // Support absl::Hash. 112 : template <typename H> 113 656 : friend H AbslHashValue(H h, const Symbol& s) { // NOLINT(readability-identifier-naming) 114 656 : return H::combine(std::move(h), s.index_); 115 656 : } 116 : 117 : private: 118 : friend class NamedOverloadActionSymbolTable; 119 : // Only the symbol table class can create Symbol instances from indices. 120 656 : explicit Symbol(size_t index) : index_(index) {} 121 : 122 : size_t index_; 123 : }; 124 : 125 : // Finds an existing or adds a new entry for the given name. 126 : Symbol get(absl::string_view name); 127 : 128 : // Returns the symbol for the name if there is one, otherwise nullopt. 129 : absl::optional<Symbol> lookup(absl::string_view string) const; 130 : 131 : // Translates a symbol back into a name. 132 : const absl::string_view name(Symbol symbol) const; 133 : 134 : // Returns the number of symbols in the table. All symbols are guaranteed to have an index less 135 : // than size(). 136 192 : size_t size() const { return table_.size(); } 137 : 138 : private: 139 : absl::flat_hash_map<std::string, size_t> table_; 140 : std::vector<std::string> names_; 141 : }; 142 : 143 : class ThreadLocalOverloadStateImpl; 144 : 145 : class OverloadManagerImpl : Logger::Loggable<Logger::Id::main>, public OverloadManager { 146 : public: 147 : OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope, 148 : ThreadLocal::SlotAllocator& slot_allocator, 149 : const envoy::config::overload::v3::OverloadManager& config, 150 : ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api, 151 : const Server::Options& options); 152 : 153 : // Server::OverloadManager 154 : void start() override; 155 : bool registerForAction(const std::string& action, Event::Dispatcher& dispatcher, 156 : OverloadActionCb callback) override; 157 : ThreadLocalOverloadState& getThreadLocalOverloadState() override; 158 : LoadShedPoint* getLoadShedPoint(absl::string_view point_name) override; 159 : Event::ScaledRangeTimerManagerFactory scaledTimerFactory() override; 160 : void stop() override; 161 : 162 : protected: 163 : // Factory for timer managers. This allows test-only subclasses to inject a mock implementation. 164 : virtual Event::ScaledRangeTimerManagerPtr createScaledRangeTimerManager( 165 : Event::Dispatcher& dispatcher, 166 : const Event::ScaledTimerTypeMapConstSharedPtr& timer_minimums) const; 167 : 168 : private: 169 : using FlushEpochId = uint64_t; 170 : class Resource : public ResourceUpdateCallbacks { 171 : public: 172 : Resource(const std::string& name, ResourceMonitorPtr monitor, OverloadManagerImpl& manager, 173 : Stats::Scope& stats_scope); 174 : 175 : // ResourceMonitor::ResourceUpdateCallbacks 176 : void onSuccess(const ResourceUsage& usage) override; 177 : void onFailure(const EnvoyException& error) override; 178 : 179 : void update(FlushEpochId flush_epoch); 180 : 181 : private: 182 : const std::string name_; 183 : ResourceMonitorPtr monitor_; 184 : OverloadManagerImpl& manager_; 185 : bool pending_update_{false}; 186 : FlushEpochId flush_epoch_; 187 : Stats::Gauge& pressure_gauge_; 188 : Stats::Counter& failed_updates_counter_; 189 : Stats::Counter& skipped_updates_counter_; 190 : }; 191 : 192 : struct ActionCallback { 193 : ActionCallback(Event::Dispatcher& dispatcher, OverloadActionCb callback) 194 0 : : dispatcher_(dispatcher), callback_(callback) {} 195 : Event::Dispatcher& dispatcher_; 196 : OverloadActionCb callback_; 197 : }; 198 : 199 : void updateResourcePressure(const std::string& resource, double pressure, 200 : FlushEpochId flush_epoch); 201 : // Flushes any enqueued action state updates to all worker threads. 202 : void flushResourceUpdates(); 203 : 204 : bool started_{false}; 205 : Event::Dispatcher& dispatcher_; 206 : TimeSource& time_source_; 207 : ThreadLocal::TypedSlot<ThreadLocalOverloadStateImpl> tls_; 208 : NamedOverloadActionSymbolTable action_symbol_table_; 209 : const std::chrono::milliseconds refresh_interval_; 210 : // Tracks the latency between resource refresh updates. 211 : Stats::Histogram& refresh_interval_delays_; 212 : // Tracks when we last ran the resource monitor refresh loop. 213 : // For the first measurement, we use the time when When the Overload Manager first starts. 214 : MonotonicTime time_resources_last_measured_; 215 : Event::TimerPtr timer_; 216 : absl::node_hash_map<std::string, Resource> resources_; 217 : std::shared_ptr<absl::node_hash_map<OverloadProactiveResourceName, ProactiveResource>> 218 : proactive_resources_; 219 : 220 : absl::node_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadAction> actions_; 221 : 222 : absl::flat_hash_map<std::string, std::unique_ptr<LoadShedPointImpl>> loadshed_points_; 223 : 224 : Event::ScaledTimerTypeMapConstSharedPtr timer_minimums_; 225 : 226 : absl::flat_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadActionState> 227 : state_updates_to_flush_; 228 : absl::flat_hash_map<ActionCallback*, OverloadActionState> callbacks_to_flush_; 229 : FlushEpochId flush_epoch_ = 0; 230 : uint64_t flush_awaiting_updates_ = 0; 231 : 232 : using ResourceToActionMap = 233 : std::unordered_multimap<std::string, NamedOverloadActionSymbolTable::Symbol>; 234 : ResourceToActionMap resource_to_actions_; 235 : 236 : using ActionToCallbackMap = 237 : std::unordered_multimap<NamedOverloadActionSymbolTable::Symbol, ActionCallback, 238 : absl::Hash<NamedOverloadActionSymbolTable::Symbol>>; 239 : ActionToCallbackMap action_to_callbacks_; 240 : }; 241 : 242 : } // namespace Server 243 : } // namespace Envoy