Line data Source code
1 : #include "source/common/signal/fatal_error_handler.h" 2 : 3 : #include <atomic> 4 : #include <list> 5 : 6 : #include "envoy/event/dispatcher.h" 7 : 8 : #include "source/common/common/assert.h" 9 : #include "source/common/common/macros.h" 10 : #include "source/common/signal/fatal_action.h" 11 : 12 : #include "absl/base/attributes.h" 13 : #include "absl/synchronization/mutex.h" 14 : 15 : namespace Envoy { 16 : namespace FatalErrorHandler { 17 : 18 : namespace { 19 : 20 : // The type of Fatal Actions. 21 : enum class FatalActionType { 22 : Safe, 23 : Unsafe, 24 : }; 25 : 26 : ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit); 27 : // Since we can't grab the failure mutex on fatal error (snagging locks under 28 : // fatal crash causing potential deadlocks) access the handler list as an atomic 29 : // operation, which is async-signal-safe. If the crash handler runs at the same 30 : // time as another thread tries to modify the list, one of them will get the 31 : // list and the other will get nullptr instead. If the crash handler loses the 32 : // race and gets nullptr, it won't run any of the registered error handlers. 33 : using FailureFunctionList = std::list<const FatalErrorHandlerInterface*>; 34 : ABSL_CONST_INIT std::atomic<FailureFunctionList*> fatal_error_handlers{nullptr}; 35 : 36 : // Use an atomic operation since on fatal error we'll consume the 37 : // fatal_action_manager and don't want to have any locks as they aren't 38 : // async-signal-safe. 39 : ABSL_CONST_INIT std::atomic<FatalAction::FatalActionManager*> fatal_action_manager{nullptr}; 40 : ABSL_CONST_INIT std::atomic<int64_t> failure_tid{-1}; 41 : 42 : // Executes the Fatal Actions provided. 43 0 : void runFatalActionsInternal(const FatalAction::FatalActionPtrList& actions) { 44 : // Exchange the fatal_error_handlers pointer so other functions cannot 45 : // concurrently access the list. 46 0 : FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); 47 0 : if (list == nullptr) { 48 0 : return; 49 0 : } 50 : 51 : // Get the dispatcher and its tracked object. 52 0 : for (auto* handler : *list) { 53 0 : handler->runFatalActionsOnTrackedObject(actions); 54 0 : } 55 : 56 : // Restore the fatal_error_handlers pointer so subsequent calls using the list 57 : // can succeed. 58 0 : fatal_error_handlers.store(list); 59 0 : } 60 : 61 : // Helper function to run exclusively either safe or unsafe actions depending on 62 : // the provided action_type. 63 : // Returns a FatalAction status corresponding to our attempt to run the 64 : // action_type. 65 0 : FatalAction::Status runFatalActions(FatalActionType action_type) { 66 : // Check that registerFatalActions has already been called. 67 0 : FatalAction::FatalActionManager* action_manager = fatal_action_manager.load(); 68 : 69 0 : if (action_manager == nullptr) { 70 0 : return FatalAction::Status::ActionManagerUnset; 71 0 : } 72 : 73 0 : int64_t my_tid = action_manager->getThreadFactory().currentThreadId().getId(); 74 : 75 0 : if (action_type == FatalActionType::Safe) { 76 : // Try to run safe actions 77 0 : int64_t expected_tid = -1; 78 : 79 0 : if (failure_tid.compare_exchange_strong(expected_tid, my_tid)) { 80 : // Run the actions 81 0 : runFatalActionsInternal(action_manager->getSafeActions()); 82 0 : return FatalAction::Status::Success; 83 0 : } else if (expected_tid == my_tid) { 84 0 : return FatalAction::Status::AlreadyRanOnThisThread; 85 0 : } 86 : 87 0 : } else { 88 : // Try to run unsafe actions 89 0 : int64_t failing_tid = failure_tid.load(); 90 : 91 0 : ASSERT(failing_tid != -1); 92 : 93 0 : if (my_tid == failing_tid) { 94 0 : runFatalActionsInternal(action_manager->getUnsafeActions()); 95 0 : return FatalAction::Status::Success; 96 0 : } 97 0 : } 98 : 99 0 : return FatalAction::Status::RunningOnAnotherThread; 100 0 : } 101 : 102 : } // namespace 103 : 104 1498 : void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) { 105 1498 : #ifdef ENVOY_OBJECT_TRACE_ON_DUMP 106 1498 : absl::MutexLock l(&failure_mutex); 107 1498 : FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); 108 1498 : if (list == nullptr) { 109 809 : list = new FailureFunctionList; 110 809 : } 111 1498 : list->push_back(&handler); 112 : // Store the fatal_error_handlers pointer now that the list is updated. 113 1498 : fatal_error_handlers.store(list); 114 : #else 115 : UNREFERENCED_PARAMETER(handler); 116 : #endif 117 1498 : } 118 : 119 1498 : void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) { 120 1498 : #ifdef ENVOY_OBJECT_TRACE_ON_DUMP 121 1498 : absl::MutexLock l(&failure_mutex); 122 1498 : FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); 123 1498 : if (list == nullptr) { 124 : // removeFatalErrorHandler() may see an empty list of fatal error handlers 125 : // if it's called at the same time as callFatalErrorHandlers(). In that case 126 : // Envoy is in the middle of crashing anyway, but don't add a segfault on 127 : // top of the crash. 128 0 : return; 129 0 : } 130 1498 : list->remove(&handler); 131 1498 : if (list->empty()) { 132 809 : delete list; 133 1377 : } else { 134 689 : fatal_error_handlers.store(list); 135 689 : } 136 : #else 137 : UNREFERENCED_PARAMETER(handler); 138 : #endif 139 1498 : } 140 : 141 0 : void callFatalErrorHandlers(std::ostream& os) { 142 0 : FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); 143 0 : if (list != nullptr) { 144 0 : for (const auto* handler : *list) { 145 0 : handler->onFatalError(os); 146 0 : } 147 : 148 0 : fatal_error_handlers.store(list); 149 0 : } 150 0 : } 151 : 152 : void registerFatalActions(FatalAction::FatalActionPtrList safe_actions, 153 : FatalAction::FatalActionPtrList unsafe_actions, 154 131 : Thread::ThreadFactory& thread_factory) { 155 : // Create a FatalActionManager and store it. 156 131 : if (!fatal_action_manager) { 157 131 : fatal_action_manager.exchange(new FatalAction::FatalActionManager( 158 131 : std::move(safe_actions), std::move(unsafe_actions), thread_factory)); 159 131 : } 160 131 : } 161 : 162 0 : FatalAction::Status runSafeActions() { return runFatalActions(FatalActionType::Safe); } 163 : 164 0 : FatalAction::Status runUnsafeActions() { return runFatalActions(FatalActionType::Unsafe); } 165 : 166 135 : void clearFatalActionsOnTerminate() { 167 135 : auto* raw_ptr = fatal_action_manager.exchange(nullptr); 168 135 : if (raw_ptr != nullptr) { 169 131 : delete raw_ptr; 170 131 : } 171 135 : } 172 : 173 : // This resets the internal state of Fatal Action for the module. 174 : // This is necessary as it allows us to have multiple test cases invoke the 175 : // fatal actions without state from other tests leaking in. 176 0 : void resetFatalActionStateForTest() { 177 : // Free the memory of the Fatal Action, since it's not managed by a smart 178 : // pointer. This prevents memory leaks in tests. 179 0 : auto* raw_ptr = fatal_action_manager.exchange(nullptr); 180 0 : if (raw_ptr != nullptr) { 181 0 : delete raw_ptr; 182 0 : } 183 0 : failure_tid.store(-1); 184 0 : } 185 : 186 : } // namespace FatalErrorHandler 187 : } // namespace Envoy