Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/source/server/overload_manager_impl.h
Line
Count
Source (jump to first uncovered line)
1
#pragma once
2
3
#include <chrono>
4
#include <vector>
5
6
#include "envoy/api/api.h"
7
#include "envoy/config/overload/v3/overload.pb.h"
8
#include "envoy/event/dispatcher.h"
9
#include "envoy/event/scaled_range_timer_manager.h"
10
#include "envoy/protobuf/message_validator.h"
11
#include "envoy/server/options.h"
12
#include "envoy/server/overload/overload_manager.h"
13
#include "envoy/server/resource_monitor.h"
14
#include "envoy/stats/scope.h"
15
#include "envoy/stats/stats.h"
16
#include "envoy/thread_local/thread_local.h"
17
18
#include "source/common/common/logger.h"
19
#include "source/common/event/scaled_range_timer_manager_impl.h"
20
21
#include "absl/container/node_hash_map.h"
22
#include "absl/container/node_hash_set.h"
23
24
namespace Envoy {
25
namespace Server {
26
27
/**
28
 * Trigger encapsulates translating resource pressure into the corresponding
29
 * OverloadActionState.
30
 */
31
class Trigger {
32
public:
33
0
  virtual ~Trigger() = default;
34
35
  // Updates the current value of the metric and returns whether the trigger has changed state.
36
  virtual bool updateValue(double value) PURE;
37
38
  // Returns the action state for the trigger.
39
  virtual OverloadActionState actionState() const PURE;
40
};
41
42
class OverloadAction {
43
public:
44
  OverloadAction(const envoy::config::overload::v3::OverloadAction& config,
45
                 Stats::Scope& stats_scope);
46
47
  // Updates the current pressure for the given resource and returns whether the action
48
  // has changed state.
49
  bool updateResourcePressure(const std::string& name, double pressure);
50
51
  // Returns the current action state, which is the max state across all registered triggers.
52
  OverloadActionState getState() const;
53
54
private:
55
  using TriggerPtr = std::unique_ptr<Trigger>;
56
  absl::node_hash_map<std::string, TriggerPtr> triggers_;
57
  OverloadActionState state_;
58
  Stats::Gauge& active_gauge_;
59
  Stats::Gauge& scale_percent_gauge_;
60
};
61
62
/**
63
 * Implement a LoadShedPoint which is a particular point in the connection /
64
 * request lifecycle where we can either abort or continue the given work.
65
 */
66
class LoadShedPointImpl : public LoadShedPoint {
67
public:
68
  LoadShedPointImpl(const envoy::config::overload::v3::LoadShedPoint& config,
69
                    Stats::Scope& stats_scope, Random::RandomGenerator& random_generator);
70
  LoadShedPointImpl(const LoadShedPointImpl&) = delete;
71
  LoadShedPointImpl& operator=(const LoadShedPointImpl&) = delete;
72
73
  bool shouldShedLoad() override;
74
75
  /**
76
   * Provide resource updates for the LoadShedPoint. It will update the
77
   * probability of shedding load at this point atomically if needed.
78
   * @param resource_name - the name of the resource that has been updated.
79
   * @param resource_utilization - utilization of this resource. A ratio of the
80
   *  current usage / the resource limit.
81
   */
82
  void updateResource(absl::string_view resource_name, double resource_utilization);
83
84
private:
85
  using TriggerPtr = std::unique_ptr<Trigger>;
86
87
  // Helper to handle updating the probability to shed load given the triggers.
88
  void updateProbabilityShedLoad();
89
90
  absl::flat_hash_map<std::string, TriggerPtr> triggers_;
91
  std::atomic<float> probability_shed_load_{0};
92
  Stats::Gauge& scale_percent_;
93
  Random::RandomGenerator& random_generator_;
94
};
95
96
// Simple table that converts strings into Symbol instances. Symbols are guaranteed to start at 0
97
// and be indexed sequentially.
98
class NamedOverloadActionSymbolTable {
99
public:
100
  class Symbol {
101
  public:
102
    // Allow copy construction everywhere.
103
    Symbol(const Symbol&) = default;
104
105
    // Returns the index of the symbol in the table.
106
0
    size_t index() const { return index_; }
107
108
    // Support use as a map key.
109
0
    bool operator==(const Symbol& other) const { return other.index_ == index_; }
110
111
    // Support absl::Hash.
112
    template <typename H>
113
26.6k
    friend H AbslHashValue(H h, const Symbol& s) { // NOLINT(readability-identifier-naming)
114
26.6k
      return H::combine(std::move(h), s.index_);
115
26.6k
    }
116
117
  private:
118
    friend class NamedOverloadActionSymbolTable;
119
    // Only the symbol table class can create Symbol instances from indices.
120
26.6k
    explicit Symbol(size_t index) : index_(index) {}
121
122
    size_t index_;
123
  };
124
125
  // Finds an existing or adds a new entry for the given name.
126
  Symbol get(absl::string_view name);
127
128
  // Returns the symbol for the name if there is one, otherwise nullopt.
129
  absl::optional<Symbol> lookup(absl::string_view string) const;
130
131
  // Translates a symbol back into a name.
132
  const absl::string_view name(Symbol symbol) const;
133
134
  // Returns the number of symbols in the table. All symbols are guaranteed to have an index less
135
  // than size().
136
5.29k
  size_t size() const { return table_.size(); }
137
138
private:
139
  absl::flat_hash_map<std::string, size_t> table_;
140
  std::vector<std::string> names_;
141
};
142
143
class ThreadLocalOverloadStateImpl;
144
145
class OverloadManagerImpl : Logger::Loggable<Logger::Id::main>, public OverloadManager {
146
public:
147
  OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope,
148
                      ThreadLocal::SlotAllocator& slot_allocator,
149
                      const envoy::config::overload::v3::OverloadManager& config,
150
                      ProtobufMessage::ValidationVisitor& validation_visitor, Api::Api& api,
151
                      const Server::Options& options);
152
153
  // Server::OverloadManager
154
  void start() override;
155
  bool registerForAction(const std::string& action, Event::Dispatcher& dispatcher,
156
                         OverloadActionCb callback) override;
157
  ThreadLocalOverloadState& getThreadLocalOverloadState() override;
158
  LoadShedPoint* getLoadShedPoint(absl::string_view point_name) override;
159
  Event::ScaledRangeTimerManagerFactory scaledTimerFactory() override;
160
161
  // Stop the overload manager timer and wait for any pending resource updates to complete.
162
  // After this returns, overload manager clients should not receive any more callbacks
163
  // about overload state changes.
164
  void stop();
165
166
protected:
167
  // Factory for timer managers. This allows test-only subclasses to inject a mock implementation.
168
  virtual Event::ScaledRangeTimerManagerPtr createScaledRangeTimerManager(
169
      Event::Dispatcher& dispatcher,
170
      const Event::ScaledTimerTypeMapConstSharedPtr& timer_minimums) const;
171
172
private:
173
  using FlushEpochId = uint64_t;
174
  class Resource : public ResourceUpdateCallbacks {
175
  public:
176
    Resource(const std::string& name, ResourceMonitorPtr monitor, OverloadManagerImpl& manager,
177
             Stats::Scope& stats_scope);
178
179
    // ResourceMonitor::ResourceUpdateCallbacks
180
    void onSuccess(const ResourceUsage& usage) override;
181
    void onFailure(const EnvoyException& error) override;
182
183
    void update(FlushEpochId flush_epoch);
184
185
  private:
186
    const std::string name_;
187
    ResourceMonitorPtr monitor_;
188
    OverloadManagerImpl& manager_;
189
    bool pending_update_{false};
190
    FlushEpochId flush_epoch_;
191
    Stats::Gauge& pressure_gauge_;
192
    Stats::Counter& failed_updates_counter_;
193
    Stats::Counter& skipped_updates_counter_;
194
  };
195
196
  struct ActionCallback {
197
    ActionCallback(Event::Dispatcher& dispatcher, OverloadActionCb callback)
198
0
        : dispatcher_(dispatcher), callback_(callback) {}
199
    Event::Dispatcher& dispatcher_;
200
    OverloadActionCb callback_;
201
  };
202
203
  void updateResourcePressure(const std::string& resource, double pressure,
204
                              FlushEpochId flush_epoch);
205
  // Flushes any enqueued action state updates to all worker threads.
206
  void flushResourceUpdates();
207
208
  bool started_{false};
209
  Event::Dispatcher& dispatcher_;
210
  TimeSource& time_source_;
211
  ThreadLocal::TypedSlot<ThreadLocalOverloadStateImpl> tls_;
212
  NamedOverloadActionSymbolTable action_symbol_table_;
213
  const std::chrono::milliseconds refresh_interval_;
214
  // Tracks the latency between resource refresh updates.
215
  Stats::Histogram& refresh_interval_delays_;
216
  // Tracks when we last ran the resource monitor refresh loop.
217
  // For the first measurement, we use the time when When the Overload Manager first starts.
218
  MonotonicTime time_resources_last_measured_;
219
  Event::TimerPtr timer_;
220
  absl::node_hash_map<std::string, Resource> resources_;
221
  std::shared_ptr<absl::node_hash_map<OverloadProactiveResourceName, ProactiveResource>>
222
      proactive_resources_;
223
224
  absl::node_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadAction> actions_;
225
226
  absl::flat_hash_map<std::string, std::unique_ptr<LoadShedPointImpl>> loadshed_points_;
227
228
  Event::ScaledTimerTypeMapConstSharedPtr timer_minimums_;
229
230
  absl::flat_hash_map<NamedOverloadActionSymbolTable::Symbol, OverloadActionState>
231
      state_updates_to_flush_;
232
  absl::flat_hash_map<ActionCallback*, OverloadActionState> callbacks_to_flush_;
233
  FlushEpochId flush_epoch_ = 0;
234
  uint64_t flush_awaiting_updates_ = 0;
235
236
  using ResourceToActionMap =
237
      std::unordered_multimap<std::string, NamedOverloadActionSymbolTable::Symbol>;
238
  ResourceToActionMap resource_to_actions_;
239
240
  using ActionToCallbackMap =
241
      std::unordered_multimap<NamedOverloadActionSymbolTable::Symbol, ActionCallback,
242
                              absl::Hash<NamedOverloadActionSymbolTable::Symbol>>;
243
  ActionToCallbackMap action_to_callbacks_;
244
};
245
246
} // namespace Server
247
} // namespace Envoy