/src/perfetto/src/base/watchdog_posix.cc
Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) 2018 The Android Open Source Project |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include "perfetto/ext/base/platform.h" |
18 | | #include "perfetto/ext/base/watchdog.h" |
19 | | |
20 | | #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG) |
21 | | |
22 | | #include <fcntl.h> |
23 | | #include <poll.h> |
24 | | #include <signal.h> |
25 | | #include <stdint.h> |
26 | | #include <stdlib.h> |
27 | | #include <sys/syscall.h> |
28 | | #include <sys/timerfd.h> |
29 | | #include <unistd.h> |
30 | | |
31 | | #include <algorithm> |
32 | | #include <cinttypes> |
33 | | #include <fstream> |
34 | | #include <thread> |
35 | | |
36 | | #include "perfetto/base/build_config.h" |
37 | | #include "perfetto/base/logging.h" |
38 | | #include "perfetto/base/thread_utils.h" |
39 | | #include "perfetto/base/time.h" |
40 | | #include "perfetto/ext/base/crash_keys.h" |
41 | | #include "perfetto/ext/base/file_utils.h" |
42 | | #include "perfetto/ext/base/scoped_file.h" |
43 | | #include "perfetto/ext/base/utils.h" |
44 | | |
45 | | namespace perfetto { |
46 | | namespace base { |
47 | | |
48 | | namespace { |
49 | | |
50 | | constexpr uint32_t kDefaultPollingInterval = 30 * 1000; |
51 | | |
52 | | base::CrashKey g_crash_key_reason("wdog_reason"); |
53 | | |
54 | 836 | bool IsMultipleOf(uint32_t number, uint32_t divisor) { |
55 | 836 | return number >= divisor && number % divisor == 0; |
56 | 836 | } |
57 | | |
58 | 0 | double MeanForArray(const uint64_t array[], size_t size) { |
59 | 0 | uint64_t total = 0; |
60 | 0 | for (size_t i = 0; i < size; i++) { |
61 | 0 | total += array[i]; |
62 | 0 | } |
63 | 0 | return static_cast<double>(total / size); |
64 | 0 | } |
65 | | |
66 | | } // namespace |
67 | | |
68 | 0 | bool ReadProcStat(int fd, ProcStat* out) { |
69 | 0 | char c[512]; |
70 | 0 | size_t c_pos = 0; |
71 | 0 | while (c_pos < sizeof(c) - 1) { |
72 | 0 | ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos)); |
73 | 0 | if (rd < 0) { |
74 | 0 | PERFETTO_ELOG("Failed to read stat file to enforce resource limits."); |
75 | 0 | return false; |
76 | 0 | } |
77 | 0 | if (rd == 0) |
78 | 0 | break; |
79 | 0 | c_pos += static_cast<size_t>(rd); |
80 | 0 | } |
81 | 0 | PERFETTO_CHECK(c_pos < sizeof(c)); |
82 | 0 | c[c_pos] = '\0'; |
83 | |
|
84 | 0 | if (sscanf(c, |
85 | 0 | "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu " |
86 | 0 | "%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld", |
87 | 0 | &out->utime, &out->stime, &out->rss_pages) != 3) { |
88 | 0 | PERFETTO_ELOG("Invalid stat format: %s", c); |
89 | 0 | return false; |
90 | 0 | } |
91 | 0 | return true; |
92 | 0 | } |
93 | | |
94 | | Watchdog::Watchdog(uint32_t polling_interval_ms) |
95 | 2 | : polling_interval_ms_(polling_interval_ms) {} |
96 | | |
97 | 0 | Watchdog::~Watchdog() { |
98 | 0 | if (!thread_.joinable()) { |
99 | 0 | PERFETTO_DCHECK(!enabled_); |
100 | 0 | return; |
101 | 0 | } |
102 | 0 | PERFETTO_DCHECK(enabled_); |
103 | 0 | enabled_ = false; |
104 | | |
105 | | // Rearm the timer to 1ns from now. This will cause the watchdog thread to |
106 | | // wakeup from the poll() and see |enabled_| == false. |
107 | | // This code path is used only in tests. In production code the watchdog is |
108 | | // a singleton and is never destroyed. |
109 | 0 | struct itimerspec ts {}; |
110 | 0 | ts.it_value.tv_sec = 0; |
111 | 0 | ts.it_value.tv_nsec = 1; |
112 | 0 | timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr); |
113 | |
|
114 | 0 | thread_.join(); |
115 | 0 | } |
116 | | |
117 | 18.8k | Watchdog* Watchdog::GetInstance() { |
118 | 18.8k | static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval); |
119 | 18.8k | return watchdog; |
120 | 18.8k | } |
121 | | |
122 | | // Can be called from any thread. |
123 | | Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms, |
124 | 17.9k | WatchdogCrashReason crash_reason) { |
125 | 17.9k | if (!enabled_.load(std::memory_order_relaxed)) |
126 | 17.9k | return Watchdog::Timer(this, 0, crash_reason); |
127 | | |
128 | 0 | return Watchdog::Timer(this, ms, crash_reason); |
129 | 17.9k | } |
130 | | |
131 | | // Can be called from any thread. |
132 | 0 | void Watchdog::AddFatalTimer(TimerData timer) { |
133 | 0 | std::lock_guard<std::mutex> guard(mutex_); |
134 | 0 | timers_.emplace_back(std::move(timer)); |
135 | 0 | RearmTimerFd_Locked(); |
136 | 0 | } |
137 | | |
138 | | // Can be called from any thread. |
139 | 0 | void Watchdog::RemoveFatalTimer(TimerData timer) { |
140 | 0 | std::lock_guard<std::mutex> guard(mutex_); |
141 | 0 | for (auto it = timers_.begin(); it != timers_.end(); it++) { |
142 | 0 | if (*it == timer) { |
143 | 0 | timers_.erase(it); |
144 | 0 | break; // Remove only one. Doesn't matter which one. |
145 | 0 | } |
146 | 0 | } |
147 | 0 | RearmTimerFd_Locked(); |
148 | 0 | } |
149 | | |
150 | 0 | void Watchdog::RearmTimerFd_Locked() { |
151 | 0 | if (!enabled_) |
152 | 0 | return; |
153 | 0 | auto it = std::min_element(timers_.begin(), timers_.end()); |
154 | | |
155 | | // We use one timerfd to handle all the oustanding |timers_|. Keep it armed |
156 | | // to the task expiring soonest. |
157 | 0 | struct itimerspec ts {}; |
158 | 0 | if (it != timers_.end()) { |
159 | 0 | ts.it_value = ToPosixTimespec(it->deadline); |
160 | 0 | } |
161 | | // If |timers_| is empty (it == end()) |ts.it_value| will remain |
162 | | // zero-initialized and that will disarm the timer in the call below. |
163 | 0 | int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr); |
164 | 0 | PERFETTO_DCHECK(res == 0); |
165 | 0 | } |
166 | | |
167 | 0 | void Watchdog::Start() { |
168 | 0 | std::lock_guard<std::mutex> guard(mutex_); |
169 | 0 | if (thread_.joinable()) { |
170 | 0 | PERFETTO_DCHECK(enabled_); |
171 | 0 | } else { |
172 | 0 | PERFETTO_DCHECK(!enabled_); |
173 | |
|
174 | 0 | #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \ |
175 | 0 | PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) |
176 | | // Kick the thread to start running but only on Android or Linux. |
177 | 0 | timer_fd_.reset( |
178 | 0 | timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK)); |
179 | 0 | if (!timer_fd_) { |
180 | 0 | PERFETTO_PLOG( |
181 | 0 | "timerfd_create failed, the Perfetto watchdog is not available"); |
182 | 0 | return; |
183 | 0 | } |
184 | 0 | enabled_ = true; |
185 | 0 | RearmTimerFd_Locked(); // Deal with timers created before Start(). |
186 | 0 | thread_ = std::thread(&Watchdog::ThreadMain, this); |
187 | 0 | #endif |
188 | 0 | } |
189 | 0 | } |
190 | | |
191 | 836 | void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) { |
192 | | // Update the fields under the lock. |
193 | 836 | std::lock_guard<std::mutex> guard(mutex_); |
194 | | |
195 | 836 | PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0); |
196 | | |
197 | 836 | size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1; |
198 | 836 | memory_window_bytes_.Reset(size); |
199 | 836 | memory_limit_bytes_ = bytes; |
200 | 836 | } |
201 | | |
202 | 0 | void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) { |
203 | 0 | std::lock_guard<std::mutex> guard(mutex_); |
204 | |
|
205 | 0 | PERFETTO_CHECK(percentage <= 100); |
206 | 0 | PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || |
207 | 0 | percentage == 0); |
208 | | |
209 | 0 | size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1; |
210 | 0 | cpu_window_time_ticks_.Reset(size); |
211 | 0 | cpu_limit_percentage_ = percentage; |
212 | 0 | } |
213 | | |
214 | 0 | void Watchdog::ThreadMain() { |
215 | | // Register crash keys explicitly to avoid running out of slots at crash time. |
216 | 0 | g_crash_key_reason.Register(); |
217 | |
|
218 | 0 | base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY)); |
219 | 0 | if (!stat_fd) { |
220 | 0 | PERFETTO_ELOG("Failed to open stat file to enforce resource limits."); |
221 | 0 | return; |
222 | 0 | } |
223 | | |
224 | 0 | PERFETTO_DCHECK(timer_fd_); |
225 | |
|
226 | 0 | constexpr uint8_t kFdCount = 1; |
227 | 0 | struct pollfd fds[kFdCount]{}; |
228 | 0 | fds[0].fd = *timer_fd_; |
229 | 0 | fds[0].events = POLLIN; |
230 | |
|
231 | 0 | for (;;) { |
232 | | // We use the poll() timeout to drive the periodic ticks for the cpu/memory |
233 | | // checks. The only other case when the poll() unblocks is when we crash |
234 | | // (or have to quit via enabled_ == false, but that happens only in tests). |
235 | 0 | platform::BeforeMaybeBlockingSyscall(); |
236 | 0 | auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_)); |
237 | 0 | platform::AfterMaybeBlockingSyscall(); |
238 | 0 | if (!enabled_) |
239 | 0 | return; |
240 | 0 | if (ret < 0) { |
241 | 0 | if (errno == ENOMEM || errno == EINTR) { |
242 | | // Should happen extremely rarely. |
243 | 0 | std::this_thread::sleep_for(std::chrono::milliseconds(100)); |
244 | 0 | continue; |
245 | 0 | } |
246 | 0 | PERFETTO_FATAL("watchdog poll() failed"); |
247 | 0 | } |
248 | | |
249 | | // If we get here either: |
250 | | // 1. poll() timed out, in which case we should process cpu/mem guardrails. |
251 | | // 2. A timer expired, in which case we shall crash. |
252 | | |
253 | 0 | uint64_t expired = 0; // Must be exactly 8 bytes. |
254 | 0 | auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired))); |
255 | 0 | PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) || |
256 | 0 | (res == sizeof(expired) && expired > 0)); |
257 | 0 | const auto now = GetWallTimeMs(); |
258 | | |
259 | | // Check if any of the timers expired. |
260 | 0 | int tid_to_kill = 0; |
261 | 0 | WatchdogCrashReason crash_reason{}; |
262 | 0 | { |
263 | 0 | std::lock_guard<std::mutex> guard(mutex_); |
264 | 0 | for (const auto& timer : timers_) { |
265 | 0 | if (now >= timer.deadline) { |
266 | 0 | tid_to_kill = timer.thread_id; |
267 | 0 | crash_reason = timer.crash_reason; |
268 | 0 | break; |
269 | 0 | } |
270 | 0 | } |
271 | 0 | } |
272 | |
|
273 | 0 | if (tid_to_kill) |
274 | 0 | SerializeLogsAndKillThread(tid_to_kill, crash_reason); |
275 | | |
276 | | // Check CPU and memory guardrails (if enabled). |
277 | 0 | lseek(stat_fd.get(), 0, SEEK_SET); |
278 | 0 | ProcStat stat; |
279 | 0 | if (!ReadProcStat(stat_fd.get(), &stat)) |
280 | 0 | continue; |
281 | 0 | uint64_t cpu_time = stat.utime + stat.stime; |
282 | 0 | uint64_t rss_bytes = |
283 | 0 | static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize(); |
284 | |
|
285 | 0 | bool threshold_exceeded = false; |
286 | 0 | { |
287 | 0 | std::lock_guard<std::mutex> guard(mutex_); |
288 | 0 | if (CheckMemory_Locked(rss_bytes) && !IsSyncMemoryTaggingEnabled()) { |
289 | 0 | threshold_exceeded = true; |
290 | 0 | crash_reason = WatchdogCrashReason::kMemGuardrail; |
291 | 0 | } else if (CheckCpu_Locked(cpu_time)) { |
292 | 0 | threshold_exceeded = true; |
293 | 0 | crash_reason = WatchdogCrashReason::kCpuGuardrail; |
294 | 0 | } |
295 | 0 | } |
296 | |
|
297 | 0 | if (threshold_exceeded) |
298 | 0 | SerializeLogsAndKillThread(getpid(), crash_reason); |
299 | 0 | } |
300 | 0 | } |
301 | | |
302 | | void Watchdog::SerializeLogsAndKillThread(int tid, |
303 | 0 | WatchdogCrashReason crash_reason) { |
304 | 0 | g_crash_key_reason.Set(static_cast<int>(crash_reason)); |
305 | | |
306 | | // We are about to die. Serialize the logs into the crash buffer so the |
307 | | // debuggerd crash handler picks them up and attaches to the bugreport. |
308 | | // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h. |
309 | | // But in the watchdog case, we don't hit that codepath and must do ourselves. |
310 | 0 | MaybeSerializeLastLogsForCrashReporting(); |
311 | | |
312 | | // Send a SIGABRT to the thread that armed the timer. This is to see the |
313 | | // callstack of the thread that is stuck in a long task rather than the |
314 | | // watchdog thread. |
315 | 0 | if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) { |
316 | | // At this point the process must die. If for any reason the tgkill doesn't |
317 | | // work (e.g. the thread has disappeared), force a crash from here. |
318 | 0 | abort(); |
319 | 0 | } |
320 | | |
321 | 0 | if (disable_kill_failsafe_for_testing_) |
322 | 0 | return; |
323 | | |
324 | | // The tgkill() above will take some milliseconds to cause a crash, as it |
325 | | // involves the kernel to queue the SIGABRT on the target thread (often the |
326 | | // main thread, which is != watchdog thread) and do a scheduling round. |
327 | | // If something goes wrong though (the target thread has signals masked or |
328 | | // is stuck in an uninterruptible+wakekill syscall) force quit from this |
329 | | // thread. |
330 | 0 | std::this_thread::sleep_for(std::chrono::seconds(10)); |
331 | 0 | abort(); |
332 | 0 | } |
333 | | |
334 | 0 | bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) { |
335 | 0 | if (memory_limit_bytes_ == 0) |
336 | 0 | return false; |
337 | | |
338 | | // Add the current stat value to the ring buffer and check that the mean |
339 | | // remains under our threshold. |
340 | 0 | if (memory_window_bytes_.Push(rss_bytes)) { |
341 | 0 | if (memory_window_bytes_.Mean() > |
342 | 0 | static_cast<double>(memory_limit_bytes_)) { |
343 | 0 | PERFETTO_ELOG( |
344 | 0 | "Memory watchdog trigger. Memory window of %f bytes is above the " |
345 | 0 | "%" PRIu64 " bytes limit.", |
346 | 0 | memory_window_bytes_.Mean(), memory_limit_bytes_); |
347 | 0 | return true; |
348 | 0 | } |
349 | 0 | } |
350 | 0 | return false; |
351 | 0 | } |
352 | | |
353 | 0 | bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) { |
354 | 0 | if (cpu_limit_percentage_ == 0) |
355 | 0 | return false; |
356 | | |
357 | | // Add the cpu time to the ring buffer. |
358 | 0 | if (cpu_window_time_ticks_.Push(cpu_time)) { |
359 | | // Compute the percentage over the whole window and check that it remains |
360 | | // under the threshold. |
361 | 0 | uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() - |
362 | 0 | cpu_window_time_ticks_.OldestWhenFull(); |
363 | 0 | double window_interval_ticks = |
364 | 0 | (static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) / |
365 | 0 | 1000.0) * |
366 | 0 | static_cast<double>(sysconf(_SC_CLK_TCK)); |
367 | 0 | double percentage = static_cast<double>(difference_ticks) / |
368 | 0 | static_cast<double>(window_interval_ticks) * 100; |
369 | 0 | if (percentage > cpu_limit_percentage_) { |
370 | 0 | PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32 |
371 | 0 | "%% CPU limit.", |
372 | 0 | percentage, cpu_limit_percentage_); |
373 | 0 | return true; |
374 | 0 | } |
375 | 0 | } |
376 | 0 | return false; |
377 | 0 | } |
378 | | |
379 | 0 | uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) { |
380 | 0 | return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_; |
381 | 0 | } |
382 | | |
383 | 0 | bool Watchdog::WindowedInterval::Push(uint64_t sample) { |
384 | | // Add the sample to the current position in the ring buffer. |
385 | 0 | buffer_[position_] = sample; |
386 | | |
387 | | // Update the position with next one circularily. |
388 | 0 | position_ = (position_ + 1) % size_; |
389 | | |
390 | | // Set the filled flag the first time we wrap. |
391 | 0 | filled_ = filled_ || position_ == 0; |
392 | 0 | return filled_; |
393 | 0 | } |
394 | | |
395 | 0 | double Watchdog::WindowedInterval::Mean() const { |
396 | 0 | return MeanForArray(buffer_.get(), size_); |
397 | 0 | } |
398 | | |
399 | 0 | void Watchdog::WindowedInterval::Clear() { |
400 | 0 | position_ = 0; |
401 | 0 | buffer_.reset(new uint64_t[size_]()); |
402 | 0 | } |
403 | | |
404 | 836 | void Watchdog::WindowedInterval::Reset(size_t new_size) { |
405 | 836 | position_ = 0; |
406 | 836 | size_ = new_size; |
407 | 836 | buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]()); |
408 | 836 | } |
409 | | |
410 | | Watchdog::Timer::Timer(Watchdog* watchdog, |
411 | | uint32_t ms, |
412 | | WatchdogCrashReason crash_reason) |
413 | 17.9k | : watchdog_(watchdog) { |
414 | 17.9k | if (!ms) |
415 | 17.9k | return; // No-op timer created when the watchdog is disabled. |
416 | 11 | timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms); |
417 | 11 | timer_data_.thread_id = GetThreadId(); |
418 | 11 | timer_data_.crash_reason = crash_reason; |
419 | 11 | PERFETTO_DCHECK(watchdog_); |
420 | 11 | watchdog_->AddFatalTimer(timer_data_); |
421 | 11 | } |
422 | | |
423 | 18.0k | Watchdog::Timer::~Timer() { |
424 | 18.0k | if (timer_data_.deadline.count()) |
425 | 0 | watchdog_->RemoveFatalTimer(timer_data_); |
426 | 18.0k | } |
427 | | |
428 | 0 | Watchdog::Timer::Timer(Timer&& other) noexcept { |
429 | 0 | watchdog_ = std::move(other.watchdog_); |
430 | 0 | other.watchdog_ = nullptr; |
431 | 0 | timer_data_ = std::move(other.timer_data_); |
432 | 0 | other.timer_data_ = TimerData(); |
433 | 0 | } |
434 | | |
435 | | } // namespace base |
436 | | } // namespace perfetto |
437 | | |
438 | | #endif // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG) |