/proc/self/cwd/source/server/guarddog_impl.h
Line | Count | Source (jump to first uncovered line) |
1 | | #pragma once |
2 | | |
3 | | #include <chrono> |
4 | | #include <vector> |
5 | | |
6 | | #include "envoy/api/api.h" |
7 | | #include "envoy/config/bootstrap/v3/bootstrap.pb.h" |
8 | | #include "envoy/event/timer.h" |
9 | | #include "envoy/server/configuration.h" |
10 | | #include "envoy/server/guarddog.h" |
11 | | #include "envoy/server/guarddog_config.h" |
12 | | #include "envoy/server/watchdog.h" |
13 | | #include "envoy/stats/scope.h" |
14 | | #include "envoy/stats/stats.h" |
15 | | |
16 | | #include "source/common/common/lock_guard.h" |
17 | | #include "source/common/common/logger.h" |
18 | | #include "source/common/common/thread.h" |
19 | | #include "source/common/event/libevent.h" |
20 | | #include "source/server/watchdog_impl.h" |
21 | | |
22 | | #include "absl/types/optional.h" |
23 | | |
24 | | namespace Envoy { |
25 | | namespace Server { |
26 | | |
27 | | /** |
28 | | * This feature performs deadlock detection stats collection & enforcement. |
29 | | * |
30 | | * It launches a thread that scans at an interval the minimum of the configured |
31 | | * intervals. If it finds starved threads or suspected deadlocks it will take |
32 | | * the appropriate action depending on the config parameters described below. |
33 | | * |
34 | | * Thread lifetime is tied to GuardDog object lifetime (RAII style). |
35 | | */ |
36 | | class GuardDogImpl : public GuardDog { |
37 | | public: |
38 | | /** |
39 | | * Defines a test interlock hook to enable tests to synchronize the guard-dog |
40 | | * execution so they can probe current counter values. The default |
41 | | * implementation that runs in production has empty methods, which are |
42 | | * overridden in the implementation used during tests. |
43 | | */ |
44 | | class TestInterlockHook { |
45 | | public: |
46 | 7.62k | virtual ~TestInterlockHook() = default; |
47 | | |
48 | | /** |
49 | | * Called from GuardDogImpl to indicate that it has evaluated all watch-dogs up to a particular |
50 | | * point in time. Called while the GuardDog mutex is held. |
51 | | */ |
52 | 14.6k | virtual void signalFromImpl() {} |
53 | | |
54 | | /** |
55 | | * Called from GuardDog tests to block until the implementation has reached the desired |
56 | | * condition. Called while the GuardDog mutex is held. |
57 | | * @param mutex The GuardDog's mutex for use by Thread::CondVar::wait. |
58 | | */ |
59 | 0 | virtual void waitFromTest(Thread::MutexBasicLockable& /*mutex*/) {} |
60 | | }; |
61 | | |
62 | | /** |
63 | | * @param stats_scope Statistics scope to write watchdog_miss and |
64 | | * watchdog_mega_miss events into. |
65 | | * @param config Configuration object. |
66 | | * @param api API object. |
67 | | * @param test_interlock a hook for enabling interlock with unit tests. |
68 | | * |
69 | | * See the configuration documentation for details on the timeout settings. |
70 | | */ |
71 | | GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config, |
72 | | Api::Api& api, absl::string_view name, |
73 | | std::unique_ptr<TestInterlockHook>&& test_interlock); |
74 | | GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config, |
75 | | Api::Api& api, absl::string_view name); |
76 | | ~GuardDogImpl() override; |
77 | | |
78 | | /** |
79 | | * Exposed for testing purposes only (but harmless to call): |
80 | | */ |
81 | 0 | const std::chrono::milliseconds loopIntervalForTest() const { return loop_interval_; } |
82 | | |
83 | | /** |
84 | | * Test hook to force a step() to catch up with the current watchdog state and simulated time. |
85 | | * This is inlined so that it does not need to be present in the production binary. |
86 | | */ |
87 | 0 | void forceCheckForTest() { |
88 | 0 | Thread::LockGuard guard(mutex_); |
89 | 0 | dispatcher_->post([this]() { loop_timer_->enableTimer(std::chrono::milliseconds(0)); }); |
90 | 0 | test_interlock_hook_->waitFromTest(mutex_); |
91 | 0 | } |
92 | | |
93 | | // Server::GuardDog |
94 | | WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id, const std::string& thread_name, |
95 | | Event::Dispatcher& dispatcher) override; |
96 | | void stopWatching(WatchDogSharedPtr wd) override; |
97 | | |
98 | | private: |
99 | | void start(Api::Api& api); |
100 | | void step(); |
101 | | void stop(); |
102 | | // Per the C++ standard it is OK to use these in ctor initializer as long as |
103 | | // it is after kill and multikill timeout values are initialized. |
104 | 7.62k | bool killEnabled() const { return kill_timeout_ > std::chrono::milliseconds(0); } |
105 | 7.62k | bool multikillEnabled() const { return multi_kill_timeout_ > std::chrono::milliseconds(0); } |
106 | | |
107 | | using WatchDogAction = envoy::config::bootstrap::v3::Watchdog::WatchdogAction; |
108 | | // Helper function to invoke all the GuardDogActions registered for an Event. |
109 | | void invokeGuardDogActions( |
110 | | WatchDogAction::WatchdogEvent event, |
111 | | std::vector<std::pair<Thread::ThreadId, MonotonicTime>> thread_last_checkin_pairs, |
112 | | MonotonicTime now); |
113 | | |
114 | | using WatchDogImplSharedPtr = std::shared_ptr<WatchDogImpl>; |
115 | | struct WatchedDog { |
116 | | WatchedDog(Stats::Scope& stats_scope, const std::string& thread_name, |
117 | | const WatchDogImplSharedPtr& watch_dog); |
118 | | |
119 | | const WatchDogImplSharedPtr dog_; |
120 | | MonotonicTime last_checkin_; |
121 | | absl::optional<MonotonicTime> last_alert_time_; |
122 | | bool miss_alerted_{}; |
123 | | bool megamiss_alerted_{}; |
124 | | Stats::Counter& miss_counter_; |
125 | | Stats::Counter& megamiss_counter_; |
126 | | }; |
127 | | using WatchedDogPtr = std::unique_ptr<WatchedDog>; |
128 | | |
129 | | std::unique_ptr<TestInterlockHook> test_interlock_hook_; |
130 | | Stats::Scope& stats_scope_; |
131 | | TimeSource& time_source_; |
132 | | const std::chrono::milliseconds miss_timeout_; |
133 | | const std::chrono::milliseconds megamiss_timeout_; |
134 | | const std::chrono::milliseconds kill_timeout_; |
135 | | const std::chrono::milliseconds multi_kill_timeout_; |
136 | | const double multi_kill_fraction_; |
137 | | const std::chrono::milliseconds loop_interval_; |
138 | | Stats::Counter& watchdog_miss_counter_; |
139 | | Stats::Counter& watchdog_megamiss_counter_; |
140 | | std::vector<WatchedDogPtr> watched_dogs_ ABSL_GUARDED_BY(wd_lock_); |
141 | | Thread::MutexBasicLockable wd_lock_; |
142 | | Thread::ThreadPtr thread_; |
143 | | Event::DispatcherPtr dispatcher_; |
144 | | Event::TimerPtr loop_timer_; |
145 | | using EventToActionsMap = absl::flat_hash_map<WatchDogAction::WatchdogEvent, |
146 | | std::vector<Configuration::GuardDogActionPtr>>; |
147 | | EventToActionsMap events_to_actions_; |
148 | | Thread::MutexBasicLockable mutex_; |
149 | | bool run_thread_ ABSL_GUARDED_BY(mutex_); |
150 | | }; |
151 | | |
152 | | } // namespace Server |
153 | | } // namespace Envoy |