LCOV - code coverage report
Current view: top level - source/common/signal - fatal_error_handler.cc (source / functions) Hit Total Coverage
Test: coverage.dat Lines: 34 88 38.6 %
Date: 2024-01-05 06:35:25 Functions: 4 10 40.0 %

          Line data    Source code
       1             : #include "source/common/signal/fatal_error_handler.h"
       2             : 
       3             : #include <atomic>
       4             : #include <list>
       5             : 
       6             : #include "envoy/event/dispatcher.h"
       7             : 
       8             : #include "source/common/common/assert.h"
       9             : #include "source/common/common/macros.h"
      10             : #include "source/common/signal/fatal_action.h"
      11             : 
      12             : #include "absl/base/attributes.h"
      13             : #include "absl/synchronization/mutex.h"
      14             : 
      15             : namespace Envoy {
      16             : namespace FatalErrorHandler {
      17             : 
      18             : namespace {
      19             : 
      20             : // The type of Fatal Actions.
      21             : enum class FatalActionType {
      22             :   Safe,
      23             :   Unsafe,
      24             : };
      25             : 
      26             : ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit);
      27             : // Since we can't grab the failure mutex on fatal error (snagging locks under
      28             : // fatal crash causing potential deadlocks) access the handler list as an atomic
      29             : // operation, which is async-signal-safe. If the crash handler runs at the same
      30             : // time as another thread tries to modify the list, one of them will get the
      31             : // list and the other will get nullptr instead. If the crash handler loses the
      32             : // race and gets nullptr, it won't run any of the registered error handlers.
      33             : using FailureFunctionList = std::list<const FatalErrorHandlerInterface*>;
      34             : ABSL_CONST_INIT std::atomic<FailureFunctionList*> fatal_error_handlers{nullptr};
      35             : 
      36             : // Use an atomic operation since on fatal error we'll consume the
      37             : // fatal_action_manager and don't want to have any locks as they aren't
      38             : // async-signal-safe.
      39             : ABSL_CONST_INIT std::atomic<FatalAction::FatalActionManager*> fatal_action_manager{nullptr};
      40             : ABSL_CONST_INIT std::atomic<int64_t> failure_tid{-1};
      41             : 
      42             : // Executes the Fatal Actions provided.
      43           0 : void runFatalActionsInternal(const FatalAction::FatalActionPtrList& actions) {
      44             :   // Exchange the fatal_error_handlers pointer so other functions cannot
      45             :   // concurrently access the list.
      46           0 :   FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
      47           0 :   if (list == nullptr) {
      48           0 :     return;
      49           0 :   }
      50             : 
      51             :   // Get the dispatcher and its tracked object.
      52           0 :   for (auto* handler : *list) {
      53           0 :     handler->runFatalActionsOnTrackedObject(actions);
      54           0 :   }
      55             : 
      56             :   // Restore the fatal_error_handlers pointer so subsequent calls using the list
      57             :   // can succeed.
      58           0 :   fatal_error_handlers.store(list);
      59           0 : }
      60             : 
      61             : // Helper function to run exclusively either safe or unsafe actions depending on
      62             : // the provided action_type.
      63             : // Returns a FatalAction status corresponding to our attempt to run the
      64             : // action_type.
      65           0 : FatalAction::Status runFatalActions(FatalActionType action_type) {
      66             :   // Check that registerFatalActions has already been called.
      67           0 :   FatalAction::FatalActionManager* action_manager = fatal_action_manager.load();
      68             : 
      69           0 :   if (action_manager == nullptr) {
      70           0 :     return FatalAction::Status::ActionManagerUnset;
      71           0 :   }
      72             : 
      73           0 :   int64_t my_tid = action_manager->getThreadFactory().currentThreadId().getId();
      74             : 
      75           0 :   if (action_type == FatalActionType::Safe) {
      76             :     // Try to run safe actions
      77           0 :     int64_t expected_tid = -1;
      78             : 
      79           0 :     if (failure_tid.compare_exchange_strong(expected_tid, my_tid)) {
      80             :       // Run the actions
      81           0 :       runFatalActionsInternal(action_manager->getSafeActions());
      82           0 :       return FatalAction::Status::Success;
      83           0 :     } else if (expected_tid == my_tid) {
      84           0 :       return FatalAction::Status::AlreadyRanOnThisThread;
      85           0 :     }
      86             : 
      87           0 :   } else {
      88             :     // Try to run unsafe actions
      89           0 :     int64_t failing_tid = failure_tid.load();
      90             : 
      91           0 :     ASSERT(failing_tid != -1);
      92             : 
      93           0 :     if (my_tid == failing_tid) {
      94           0 :       runFatalActionsInternal(action_manager->getUnsafeActions());
      95           0 :       return FatalAction::Status::Success;
      96           0 :     }
      97           0 :   }
      98             : 
      99           0 :   return FatalAction::Status::RunningOnAnotherThread;
     100           0 : }
     101             : 
     102             : } // namespace
     103             : 
     104        1498 : void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
     105        1498 : #ifdef ENVOY_OBJECT_TRACE_ON_DUMP
     106        1498 :   absl::MutexLock l(&failure_mutex);
     107        1498 :   FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
     108        1498 :   if (list == nullptr) {
     109         809 :     list = new FailureFunctionList;
     110         809 :   }
     111        1498 :   list->push_back(&handler);
     112             :   // Store the fatal_error_handlers pointer now that the list is updated.
     113        1498 :   fatal_error_handlers.store(list);
     114             : #else
     115             :   UNREFERENCED_PARAMETER(handler);
     116             : #endif
     117        1498 : }
     118             : 
     119        1498 : void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
     120        1498 : #ifdef ENVOY_OBJECT_TRACE_ON_DUMP
     121        1498 :   absl::MutexLock l(&failure_mutex);
     122        1498 :   FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
     123        1498 :   if (list == nullptr) {
     124             :     // removeFatalErrorHandler() may see an empty list of fatal error handlers
     125             :     // if it's called at the same time as callFatalErrorHandlers(). In that case
     126             :     // Envoy is in the middle of crashing anyway, but don't add a segfault on
     127             :     // top of the crash.
     128           0 :     return;
     129           0 :   }
     130        1498 :   list->remove(&handler);
     131        1498 :   if (list->empty()) {
     132         809 :     delete list;
     133        1377 :   } else {
     134         689 :     fatal_error_handlers.store(list);
     135         689 :   }
     136             : #else
     137             :   UNREFERENCED_PARAMETER(handler);
     138             : #endif
     139        1498 : }
     140             : 
     141           0 : void callFatalErrorHandlers(std::ostream& os) {
     142           0 :   FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
     143           0 :   if (list != nullptr) {
     144           0 :     for (const auto* handler : *list) {
     145           0 :       handler->onFatalError(os);
     146           0 :     }
     147             : 
     148           0 :     fatal_error_handlers.store(list);
     149           0 :   }
     150           0 : }
     151             : 
     152             : void registerFatalActions(FatalAction::FatalActionPtrList safe_actions,
     153             :                           FatalAction::FatalActionPtrList unsafe_actions,
     154         131 :                           Thread::ThreadFactory& thread_factory) {
     155             :   // Create a FatalActionManager and store it.
     156         131 :   if (!fatal_action_manager) {
     157         131 :     fatal_action_manager.exchange(new FatalAction::FatalActionManager(
     158         131 :         std::move(safe_actions), std::move(unsafe_actions), thread_factory));
     159         131 :   }
     160         131 : }
     161             : 
     162           0 : FatalAction::Status runSafeActions() { return runFatalActions(FatalActionType::Safe); }
     163             : 
     164           0 : FatalAction::Status runUnsafeActions() { return runFatalActions(FatalActionType::Unsafe); }
     165             : 
     166         135 : void clearFatalActionsOnTerminate() {
     167         135 :   auto* raw_ptr = fatal_action_manager.exchange(nullptr);
     168         135 :   if (raw_ptr != nullptr) {
     169         131 :     delete raw_ptr;
     170         131 :   }
     171         135 : }
     172             : 
     173             : // This resets the internal state of Fatal Action for the module.
     174             : // This is necessary as it allows us to have multiple test cases invoke the
     175             : // fatal actions without state from other tests leaking in.
     176           0 : void resetFatalActionStateForTest() {
     177             :   // Free the memory of the Fatal Action, since it's not managed by a smart
     178             :   // pointer. This prevents memory leaks in tests.
     179           0 :   auto* raw_ptr = fatal_action_manager.exchange(nullptr);
     180           0 :   if (raw_ptr != nullptr) {
     181           0 :     delete raw_ptr;
     182           0 :   }
     183           0 :   failure_tid.store(-1);
     184           0 : }
     185             : 
     186             : } // namespace FatalErrorHandler
     187             : } // namespace Envoy

Generated by: LCOV version 1.15