Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/source/server/guarddog_impl.h
Line
Count
Source (jump to first uncovered line)
1
#pragma once
2
3
#include <chrono>
4
#include <vector>
5
6
#include "envoy/api/api.h"
7
#include "envoy/config/bootstrap/v3/bootstrap.pb.h"
8
#include "envoy/event/timer.h"
9
#include "envoy/server/configuration.h"
10
#include "envoy/server/guarddog.h"
11
#include "envoy/server/guarddog_config.h"
12
#include "envoy/server/watchdog.h"
13
#include "envoy/stats/scope.h"
14
#include "envoy/stats/stats.h"
15
16
#include "source/common/common/lock_guard.h"
17
#include "source/common/common/logger.h"
18
#include "source/common/common/thread.h"
19
#include "source/common/event/libevent.h"
20
#include "source/server/watchdog_impl.h"
21
22
#include "absl/types/optional.h"
23
24
namespace Envoy {
25
namespace Server {
26
27
/**
28
 * This feature performs deadlock detection stats collection & enforcement.
29
 *
30
 * It launches a thread that scans at an interval the minimum of the configured
31
 * intervals. If it finds starved threads or suspected deadlocks it will take
32
 * the appropriate action depending on the config parameters described below.
33
 *
34
 * Thread lifetime is tied to GuardDog object lifetime (RAII style).
35
 */
36
class GuardDogImpl : public GuardDog {
37
public:
38
  /**
39
   * Defines a test interlock hook to enable tests to synchronize the guard-dog
40
   * execution so they can probe current counter values. The default
41
   * implementation that runs in production has empty methods, which are
42
   * overridden in the implementation used during tests.
43
   */
44
  class TestInterlockHook {
45
  public:
46
7.62k
    virtual ~TestInterlockHook() = default;
47
48
    /**
49
     * Called from GuardDogImpl to indicate that it has evaluated all watch-dogs up to a particular
50
     * point in time. Called while the GuardDog mutex is held.
51
     */
52
14.6k
    virtual void signalFromImpl() {}
53
54
    /**
55
     * Called from GuardDog tests to block until the implementation has reached the desired
56
     * condition. Called while the GuardDog mutex is held.
57
     * @param mutex The GuardDog's mutex for use by Thread::CondVar::wait.
58
     */
59
0
    virtual void waitFromTest(Thread::MutexBasicLockable& /*mutex*/) {}
60
  };
61
62
  /**
63
   * @param stats_scope Statistics scope to write watchdog_miss and
64
   * watchdog_mega_miss events into.
65
   * @param config Configuration object.
66
   * @param api API object.
67
   * @param test_interlock a hook for enabling interlock with unit tests.
68
   *
69
   * See the configuration documentation for details on the timeout settings.
70
   */
71
  GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config,
72
               Api::Api& api, absl::string_view name,
73
               std::unique_ptr<TestInterlockHook>&& test_interlock);
74
  GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config,
75
               Api::Api& api, absl::string_view name);
76
  ~GuardDogImpl() override;
77
78
  /**
79
   * Exposed for testing purposes only (but harmless to call):
80
   */
81
0
  const std::chrono::milliseconds loopIntervalForTest() const { return loop_interval_; }
82
83
  /**
84
   * Test hook to force a step() to catch up with the current watchdog state and simulated time.
85
   * This is inlined so that it does not need to be present in the production binary.
86
   */
87
0
  void forceCheckForTest() {
88
0
    Thread::LockGuard guard(mutex_);
89
0
    dispatcher_->post([this]() { loop_timer_->enableTimer(std::chrono::milliseconds(0)); });
90
0
    test_interlock_hook_->waitFromTest(mutex_);
91
0
  }
92
93
  // Server::GuardDog
94
  WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id, const std::string& thread_name,
95
                                   Event::Dispatcher& dispatcher) override;
96
  void stopWatching(WatchDogSharedPtr wd) override;
97
98
private:
99
  void start(Api::Api& api);
100
  void step();
101
  void stop();
102
  // Per the C++ standard it is OK to use these in ctor initializer as long as
103
  // it is after kill and multikill timeout values are initialized.
104
7.62k
  bool killEnabled() const { return kill_timeout_ > std::chrono::milliseconds(0); }
105
7.62k
  bool multikillEnabled() const { return multi_kill_timeout_ > std::chrono::milliseconds(0); }
106
107
  using WatchDogAction = envoy::config::bootstrap::v3::Watchdog::WatchdogAction;
108
  // Helper function to invoke all the GuardDogActions registered for an Event.
109
  void invokeGuardDogActions(
110
      WatchDogAction::WatchdogEvent event,
111
      std::vector<std::pair<Thread::ThreadId, MonotonicTime>> thread_last_checkin_pairs,
112
      MonotonicTime now);
113
114
  using WatchDogImplSharedPtr = std::shared_ptr<WatchDogImpl>;
115
  struct WatchedDog {
116
    WatchedDog(Stats::Scope& stats_scope, const std::string& thread_name,
117
               const WatchDogImplSharedPtr& watch_dog);
118
119
    const WatchDogImplSharedPtr dog_;
120
    MonotonicTime last_checkin_;
121
    absl::optional<MonotonicTime> last_alert_time_;
122
    bool miss_alerted_{};
123
    bool megamiss_alerted_{};
124
    Stats::Counter& miss_counter_;
125
    Stats::Counter& megamiss_counter_;
126
  };
127
  using WatchedDogPtr = std::unique_ptr<WatchedDog>;
128
129
  std::unique_ptr<TestInterlockHook> test_interlock_hook_;
130
  Stats::Scope& stats_scope_;
131
  TimeSource& time_source_;
132
  const std::chrono::milliseconds miss_timeout_;
133
  const std::chrono::milliseconds megamiss_timeout_;
134
  const std::chrono::milliseconds kill_timeout_;
135
  const std::chrono::milliseconds multi_kill_timeout_;
136
  const double multi_kill_fraction_;
137
  const std::chrono::milliseconds loop_interval_;
138
  Stats::Counter& watchdog_miss_counter_;
139
  Stats::Counter& watchdog_megamiss_counter_;
140
  std::vector<WatchedDogPtr> watched_dogs_ ABSL_GUARDED_BY(wd_lock_);
141
  Thread::MutexBasicLockable wd_lock_;
142
  Thread::ThreadPtr thread_;
143
  Event::DispatcherPtr dispatcher_;
144
  Event::TimerPtr loop_timer_;
145
  using EventToActionsMap = absl::flat_hash_map<WatchDogAction::WatchdogEvent,
146
                                                std::vector<Configuration::GuardDogActionPtr>>;
147
  EventToActionsMap events_to_actions_;
148
  Thread::MutexBasicLockable mutex_;
149
  bool run_thread_ ABSL_GUARDED_BY(mutex_);
150
};
151
152
} // namespace Server
153
} // namespace Envoy