LCOV - code coverage report
Current view: top level - source/server - guarddog_impl.h (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 4 11 36.4 %
Date: 2024-01-05 06:35:25 Functions: 4 7 57.1 %

          Line data    Source code
       1             : #pragma once
       2             : 
       3             : #include <chrono>
       4             : #include <vector>
       5             : 
       6             : #include "envoy/api/api.h"
       7             : #include "envoy/config/bootstrap/v3/bootstrap.pb.h"
       8             : #include "envoy/event/timer.h"
       9             : #include "envoy/server/configuration.h"
      10             : #include "envoy/server/guarddog.h"
      11             : #include "envoy/server/guarddog_config.h"
      12             : #include "envoy/server/watchdog.h"
      13             : #include "envoy/stats/scope.h"
      14             : #include "envoy/stats/stats.h"
      15             : 
      16             : #include "source/common/common/lock_guard.h"
      17             : #include "source/common/common/logger.h"
      18             : #include "source/common/common/thread.h"
      19             : #include "source/common/event/libevent.h"
      20             : #include "source/server/watchdog_impl.h"
      21             : 
      22             : #include "absl/types/optional.h"
      23             : 
      24             : namespace Envoy {
      25             : namespace Server {
      26             : 
      27             : /**
      28             :  * This feature performs deadlock detection stats collection & enforcement.
      29             :  *
      30             :  * It launches a thread that scans at an interval the minimum of the configured
      31             :  * intervals. If it finds starved threads or suspected deadlocks it will take
      32             :  * the appropriate action depending on the config parameters described below.
      33             :  *
      34             :  * Thread lifetime is tied to GuardDog object lifetime (RAII style).
      35             :  */
      36             : class GuardDogImpl : public GuardDog {
      37             : public:
      38             :   /**
      39             :    * Defines a test interlock hook to enable tests to synchronize the guard-dog
      40             :    * execution so they can probe current counter values. The default
      41             :    * implementation that runs in production has empty methods, which are
      42             :    * overridden in the implementation used during tests.
      43             :    */
      44             :   class TestInterlockHook {
      45             :   public:
      46         222 :     virtual ~TestInterlockHook() = default;
      47             : 
      48             :     /**
      49             :      * Called from GuardDogImpl to indicate that it has evaluated all watch-dogs up to a particular
      50             :      * point in time. Called while the GuardDog mutex is held.
      51             :      */
      52         522 :     virtual void signalFromImpl() {}
      53             : 
      54             :     /**
      55             :      * Called from GuardDog tests to block until the implementation has reached the desired
      56             :      * condition. Called while the GuardDog mutex is held.
      57             :      * @param mutex The GuardDog's mutex for use by Thread::CondVar::wait.
      58             :      */
      59           0 :     virtual void waitFromTest(Thread::MutexBasicLockable& /*mutex*/) {}
      60             :   };
      61             : 
      62             :   /**
      63             :    * @param stats_scope Statistics scope to write watchdog_miss and
      64             :    * watchdog_mega_miss events into.
      65             :    * @param config Configuration object.
      66             :    * @param api API object.
      67             :    * @param test_interlock a hook for enabling interlock with unit tests.
      68             :    *
      69             :    * See the configuration documentation for details on the timeout settings.
      70             :    */
      71             :   GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config,
      72             :                Api::Api& api, absl::string_view name,
      73             :                std::unique_ptr<TestInterlockHook>&& test_interlock);
      74             :   GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuration::Watchdog& config,
      75             :                Api::Api& api, absl::string_view name);
      76             :   ~GuardDogImpl() override;
      77             : 
      78             :   /**
      79             :    * Exposed for testing purposes only (but harmless to call):
      80             :    */
      81           0 :   const std::chrono::milliseconds loopIntervalForTest() const { return loop_interval_; }
      82             : 
      83             :   /**
      84             :    * Test hook to force a step() to catch up with the current watchdog state and simulated time.
      85             :    * This is inlined so that it does not need to be present in the production binary.
      86             :    */
      87           0 :   void forceCheckForTest() {
      88           0 :     Thread::LockGuard guard(mutex_);
      89           0 :     dispatcher_->post([this]() { loop_timer_->enableTimer(std::chrono::milliseconds(0)); });
      90           0 :     test_interlock_hook_->waitFromTest(mutex_);
      91           0 :   }
      92             : 
      93             :   // Server::GuardDog
      94             :   WatchDogSharedPtr createWatchDog(Thread::ThreadId thread_id, const std::string& thread_name,
      95             :                                    Event::Dispatcher& dispatcher) override;
      96             :   void stopWatching(WatchDogSharedPtr wd) override;
      97             : 
      98             : private:
      99             :   void start(Api::Api& api);
     100             :   void step();
     101             :   void stop();
     102             :   // Per the C++ standard it is OK to use these in ctor initializer as long as
     103             :   // it is after kill and multikill timeout values are initialized.
     104         222 :   bool killEnabled() const { return kill_timeout_ > std::chrono::milliseconds(0); }
     105         222 :   bool multikillEnabled() const { return multi_kill_timeout_ > std::chrono::milliseconds(0); }
     106             : 
     107             :   using WatchDogAction = envoy::config::bootstrap::v3::Watchdog::WatchdogAction;
     108             :   // Helper function to invoke all the GuardDogActions registered for an Event.
     109             :   void invokeGuardDogActions(
     110             :       WatchDogAction::WatchdogEvent event,
     111             :       std::vector<std::pair<Thread::ThreadId, MonotonicTime>> thread_last_checkin_pairs,
     112             :       MonotonicTime now);
     113             : 
     114             :   using WatchDogImplSharedPtr = std::shared_ptr<WatchDogImpl>;
     115             :   struct WatchedDog {
     116             :     WatchedDog(Stats::Scope& stats_scope, const std::string& thread_name,
     117             :                const WatchDogImplSharedPtr& watch_dog);
     118             : 
     119             :     const WatchDogImplSharedPtr dog_;
     120             :     MonotonicTime last_checkin_;
     121             :     absl::optional<MonotonicTime> last_alert_time_;
     122             :     bool miss_alerted_{};
     123             :     bool megamiss_alerted_{};
     124             :     Stats::Counter& miss_counter_;
     125             :     Stats::Counter& megamiss_counter_;
     126             :   };
     127             :   using WatchedDogPtr = std::unique_ptr<WatchedDog>;
     128             : 
     129             :   std::unique_ptr<TestInterlockHook> test_interlock_hook_;
     130             :   Stats::Scope& stats_scope_;
     131             :   TimeSource& time_source_;
     132             :   const std::chrono::milliseconds miss_timeout_;
     133             :   const std::chrono::milliseconds megamiss_timeout_;
     134             :   const std::chrono::milliseconds kill_timeout_;
     135             :   const std::chrono::milliseconds multi_kill_timeout_;
     136             :   const double multi_kill_fraction_;
     137             :   const std::chrono::milliseconds loop_interval_;
     138             :   Stats::Counter& watchdog_miss_counter_;
     139             :   Stats::Counter& watchdog_megamiss_counter_;
     140             :   std::vector<WatchedDogPtr> watched_dogs_ ABSL_GUARDED_BY(wd_lock_);
     141             :   Thread::MutexBasicLockable wd_lock_;
     142             :   Thread::ThreadPtr thread_;
     143             :   Event::DispatcherPtr dispatcher_;
     144             :   Event::TimerPtr loop_timer_;
     145             :   using EventToActionsMap = absl::flat_hash_map<WatchDogAction::WatchdogEvent,
     146             :                                                 std::vector<Configuration::GuardDogActionPtr>>;
     147             :   EventToActionsMap events_to_actions_;
     148             :   Thread::MutexBasicLockable mutex_;
     149             :   bool run_thread_ ABSL_GUARDED_BY(mutex_);
     150             : };
     151             : 
     152             : } // namespace Server
     153             : } // namespace Envoy

Generated by: LCOV version 1.15