/src/mozilla-central/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "mozilla/ArrayUtils.h" |
8 | | #include "mozilla/BackgroundHangMonitor.h" |
9 | | #include "mozilla/CPUUsageWatcher.h" |
10 | | #include "mozilla/LinkedList.h" |
11 | | #include "mozilla/Monitor.h" |
12 | | #include "mozilla/Move.h" |
13 | | #include "mozilla/Preferences.h" |
14 | | #include "mozilla/StaticPtr.h" |
15 | | #include "mozilla/Telemetry.h" |
16 | | #include "mozilla/ThreadLocal.h" |
17 | | #include "mozilla/SystemGroup.h" |
18 | | #include "mozilla/Unused.h" |
19 | | |
20 | | #include "prinrval.h" |
21 | | #include "prthread.h" |
22 | | #include "ThreadStackHelper.h" |
23 | | #include "nsIObserverService.h" |
24 | | #include "nsIObserver.h" |
25 | | #include "mozilla/Services.h" |
26 | | #include "nsThreadUtils.h" |
27 | | #include "nsXULAppAPI.h" |
28 | | #include "GeckoProfiler.h" |
29 | | #include "HangDetails.h" |
30 | | |
31 | | #ifdef MOZ_GECKO_PROFILER |
32 | | #include "ProfilerMarkerPayload.h" |
33 | | #endif |
34 | | |
35 | | #include <algorithm> |
36 | | |
37 | | // Activate BHR only for one every BHR_BETA_MOD users. |
38 | | // We're doing experimentation with collecting a lot more data from BHR, and |
39 | | // don't want to enable it for beta users at the moment. We can scale this up in |
40 | | // the future. |
41 | 0 | #define BHR_BETA_MOD INT32_MAX; |
42 | | |
43 | | // Maximum depth of the call stack in the reported thread hangs. This value represents |
44 | | // the 99.9th percentile of the thread hangs stack depths reported by Telemetry. |
45 | | static const size_t kMaxThreadHangStackDepth = 30; |
46 | | |
47 | | // Interval at which we check the global and per-process CPU usage in order to determine |
48 | | // if there is high external CPU usage. |
49 | | static const int32_t kCheckCPUIntervalMilliseconds = 2000; |
50 | | |
51 | | // An utility comparator function used by std::unique to collapse "(* script)" entries in |
52 | | // a vector representing a call stack. |
53 | | bool StackScriptEntriesCollapser(const char* aStackEntry, const char *aAnotherStackEntry) |
54 | 0 | { |
55 | 0 | return !strcmp(aStackEntry, aAnotherStackEntry) && |
56 | 0 | (!strcmp(aStackEntry, "(chrome script)") || !strcmp(aStackEntry, "(content script)")); |
57 | 0 | } |
58 | | |
59 | | namespace mozilla { |
60 | | |
61 | | /** |
62 | | * BackgroundHangManager is the global object that |
63 | | * manages all instances of BackgroundHangThread. |
64 | | */ |
65 | | class BackgroundHangManager : public nsIObserver |
66 | | { |
67 | | private: |
68 | | // Background hang monitor thread function |
69 | | static void MonitorThread(void* aData) |
70 | 3 | { |
71 | 3 | AUTO_PROFILER_REGISTER_THREAD("BgHangMonitor"); |
72 | 3 | NS_SetCurrentThreadName("BHMgr Monitor"); |
73 | 3 | |
74 | 3 | /* We do not hold a reference to BackgroundHangManager here |
75 | 3 | because the monitor thread only exists as long as the |
76 | 3 | BackgroundHangManager instance exists. We stop the monitor |
77 | 3 | thread in the BackgroundHangManager destructor, and we can |
78 | 3 | only get to the destructor if we don't hold a reference here. */ |
79 | 3 | static_cast<BackgroundHangManager*>(aData)->RunMonitorThread(); |
80 | 3 | } |
81 | | |
82 | | // Hang monitor thread |
83 | | PRThread* mHangMonitorThread; |
84 | | // Stop hang monitoring |
85 | | bool mShutdown; |
86 | | |
87 | | BackgroundHangManager(const BackgroundHangManager&); |
88 | | BackgroundHangManager& operator=(const BackgroundHangManager&); |
89 | | void RunMonitorThread(); |
90 | | |
91 | | public: |
92 | | NS_DECL_THREADSAFE_ISUPPORTS |
93 | | NS_DECL_NSIOBSERVER |
94 | | static StaticRefPtr<BackgroundHangManager> sInstance; |
95 | | static bool sDisabled; |
96 | | |
97 | | // Lock for access to members of this class |
98 | | Monitor mLock; |
99 | | // Current time as seen by hang monitors |
100 | | TimeStamp mNow; |
101 | | // List of BackgroundHangThread instances associated with each thread |
102 | | LinkedList<BackgroundHangThread> mHangThreads; |
103 | | |
104 | | // Unwinding and reporting of hangs is despatched to this thread. |
105 | | nsCOMPtr<nsIThread> mHangProcessingThread; |
106 | | |
107 | | // Allows us to watch CPU usage and annotate hangs when the system is |
108 | | // under high external load. |
109 | | CPUUsageWatcher mCPUUsageWatcher; |
110 | | |
111 | | void Shutdown() |
112 | 0 | { |
113 | 0 | MonitorAutoLock autoLock(mLock); |
114 | 0 | mShutdown = true; |
115 | 0 | autoLock.Notify(); |
116 | 0 | } |
117 | | |
118 | | // Attempt to wakeup the hang monitor thread. |
119 | | void Wakeup() |
120 | 0 | { |
121 | 0 | mLock.AssertCurrentThreadOwns(); |
122 | 0 | mLock.NotifyAll(); |
123 | 0 | } |
124 | | |
125 | | BackgroundHangManager(); |
126 | | private: |
127 | | virtual ~BackgroundHangManager(); |
128 | | }; |
129 | | |
130 | | NS_IMPL_ISUPPORTS(BackgroundHangManager, nsIObserver) |
131 | | |
132 | | NS_IMETHODIMP |
133 | 0 | BackgroundHangManager::Observe(nsISupports* aSubject, const char* aTopic, const char16_t* aData) { |
134 | 0 | NS_ENSURE_TRUE(!strcmp(aTopic, "profile-after-change"), NS_ERROR_UNEXPECTED); |
135 | 0 | BackgroundHangMonitor::DisableOnBeta(); |
136 | 0 |
|
137 | 0 | nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService(); |
138 | 0 | MOZ_ASSERT(observerService); |
139 | 0 | observerService->RemoveObserver(this, "profile-after-change"); |
140 | 0 |
|
141 | 0 | return NS_OK; |
142 | 0 | } |
143 | | |
144 | | /** |
145 | | * BackgroundHangThread is a per-thread object that is used |
146 | | * by all instances of BackgroundHangMonitor to monitor hangs. |
147 | | */ |
148 | | class BackgroundHangThread : public LinkedListElement<BackgroundHangThread> |
149 | | { |
150 | | private: |
151 | | static MOZ_THREAD_LOCAL(BackgroundHangThread*) sTlsKey; |
152 | | static bool sTlsKeyInitialized; |
153 | | |
154 | | BackgroundHangThread(const BackgroundHangThread&); |
155 | | BackgroundHangThread& operator=(const BackgroundHangThread&); |
156 | | ~BackgroundHangThread(); |
157 | | |
158 | | /* Keep a reference to the manager, so we can keep going even |
159 | | after BackgroundHangManager::Shutdown is called. */ |
160 | | const RefPtr<BackgroundHangManager> mManager; |
161 | | // Unique thread ID for identification |
162 | | const PRThread* mThreadID; |
163 | | |
164 | | void Update(); |
165 | | |
166 | | public: |
167 | | NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread) |
168 | | /** |
169 | | * Returns the BackgroundHangThread associated with the |
170 | | * running thread. Note that this will not find private |
171 | | * BackgroundHangThread threads. |
172 | | * |
173 | | * @return BackgroundHangThread*, or nullptr if no thread |
174 | | * is found. |
175 | | */ |
176 | | static BackgroundHangThread* FindThread(); |
177 | | |
178 | | static void Startup() |
179 | 3 | { |
180 | 3 | /* We can tolerate init() failing. */ |
181 | 3 | sTlsKeyInitialized = sTlsKey.init(); |
182 | 3 | } |
183 | | |
184 | | // Hang timeout |
185 | | const TimeDuration mTimeout; |
186 | | // PermaHang timeout |
187 | | const TimeDuration mMaxTimeout; |
188 | | // Time at last activity |
189 | | TimeStamp mLastActivity; |
190 | | // Time when a hang started |
191 | | TimeStamp mHangStart; |
192 | | // Is the thread in a hang |
193 | | bool mHanging; |
194 | | // Is the thread in a waiting state |
195 | | bool mWaiting; |
196 | | // Is the thread dedicated to a single BackgroundHangMonitor |
197 | | BackgroundHangMonitor::ThreadType mThreadType; |
198 | | #ifdef MOZ_GECKO_PROFILER |
199 | | // Platform-specific helper to get hang stacks |
200 | | ThreadStackHelper mStackHelper; |
201 | | #endif |
202 | | // Stack of current hang |
203 | | HangStack mHangStack; |
204 | | // Annotations for the current hang |
205 | | BackgroundHangAnnotations mAnnotations; |
206 | | // Annotators registered for this thread |
207 | | BackgroundHangAnnotators mAnnotators; |
208 | | // The name of the runnable which is hanging the current process |
209 | | nsCString mRunnableName; |
210 | | // The name of the thread which is being monitored |
211 | | nsCString mThreadName; |
212 | | |
213 | | BackgroundHangThread(const char* aName, |
214 | | uint32_t aTimeoutMs, |
215 | | uint32_t aMaxTimeoutMs, |
216 | | BackgroundHangMonitor::ThreadType aThreadType = BackgroundHangMonitor::THREAD_SHARED); |
217 | | |
218 | | // Report a hang; aManager->mLock IS locked. The hang will be processed |
219 | | // off-main-thread, and will then be submitted back. |
220 | | void ReportHang(TimeDuration aHangTime); |
221 | | // Report a permanent hang; aManager->mLock IS locked |
222 | | void ReportPermaHang(); |
223 | | // Called by BackgroundHangMonitor::NotifyActivity |
224 | | void NotifyActivity() |
225 | 0 | { |
226 | 0 | MonitorAutoLock autoLock(mManager->mLock); |
227 | 0 | Update(); |
228 | 0 | } |
229 | | // Called by BackgroundHangMonitor::NotifyWait |
230 | | void NotifyWait() |
231 | 0 | { |
232 | 0 | MonitorAutoLock autoLock(mManager->mLock); |
233 | 0 |
|
234 | 0 | if (mWaiting) { |
235 | 0 | return; |
236 | 0 | } |
237 | 0 | |
238 | 0 | Update(); |
239 | 0 | if (mHanging) { |
240 | 0 | // We were hanging! We're done with that now, so let's report it. |
241 | 0 | // ReportHang() doesn't do much work on the current thread, and is |
242 | 0 | // safe to call from any thread as long as we're holding the lock. |
243 | 0 | ReportHang(mLastActivity - mHangStart); |
244 | 0 | mHanging = false; |
245 | 0 | } |
246 | 0 | mWaiting = true; |
247 | 0 | } |
248 | | |
249 | | // Returns true if this thread is (or might be) shared between other |
250 | | // BackgroundHangMonitors for the monitored thread. |
251 | 3 | bool IsShared() { |
252 | 3 | return mThreadType == BackgroundHangMonitor::THREAD_SHARED; |
253 | 3 | } |
254 | | }; |
255 | | |
256 | | StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance; |
257 | | bool BackgroundHangManager::sDisabled = false; |
258 | | |
259 | | MOZ_THREAD_LOCAL(BackgroundHangThread*) BackgroundHangThread::sTlsKey; |
260 | | bool BackgroundHangThread::sTlsKeyInitialized; |
261 | | |
262 | | BackgroundHangManager::BackgroundHangManager() |
263 | | : mShutdown(false) |
264 | | , mLock("BackgroundHangManager") |
265 | 3 | { |
266 | 3 | // Lock so we don't race against the new monitor thread |
267 | 3 | MonitorAutoLock autoLock(mLock); |
268 | 3 | |
269 | 3 | mHangMonitorThread = PR_CreateThread( |
270 | 3 | PR_USER_THREAD, MonitorThread, this, |
271 | 3 | PR_PRIORITY_LOW, PR_GLOBAL_THREAD, PR_JOINABLE_THREAD, |
272 | 3 | nsIThreadManager::DEFAULT_STACK_SIZE); |
273 | 3 | |
274 | 3 | MOZ_ASSERT(mHangMonitorThread, "Failed to create BHR monitor thread"); |
275 | 3 | |
276 | 3 | DebugOnly<nsresult> rv |
277 | 3 | = NS_NewNamedThread("BHMgr Processor", |
278 | 3 | getter_AddRefs(mHangProcessingThread)); |
279 | 3 | MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangProcessingThread, |
280 | 3 | "Failed to create BHR processing thread"); |
281 | 3 | } |
282 | | |
283 | | BackgroundHangManager::~BackgroundHangManager() |
284 | 0 | { |
285 | 0 | MOZ_ASSERT(mShutdown, "Destruction without Shutdown call"); |
286 | 0 | MOZ_ASSERT(mHangThreads.isEmpty(), "Destruction with outstanding monitors"); |
287 | 0 | MOZ_ASSERT(mHangMonitorThread, "No monitor thread"); |
288 | 0 | MOZ_ASSERT(mHangProcessingThread, "No processing thread"); |
289 | 0 |
|
290 | 0 | // PR_CreateThread could have failed above due to resource limitation |
291 | 0 | if (mHangMonitorThread) { |
292 | 0 | // The monitor thread can only live as long as the instance lives |
293 | 0 | PR_JoinThread(mHangMonitorThread); |
294 | 0 | } |
295 | 0 |
|
296 | 0 | // Similarly, NS_NewNamedThread above could have failed. |
297 | 0 | if (mHangProcessingThread) { |
298 | 0 | mHangProcessingThread->Shutdown(); |
299 | 0 | } |
300 | 0 | } |
301 | | |
302 | | void |
303 | | BackgroundHangManager::RunMonitorThread() |
304 | 3 | { |
305 | 3 | // Keep us locked except when waiting |
306 | 3 | MonitorAutoLock autoLock(mLock); |
307 | 3 | |
308 | 3 | /* mNow is updated at various intervals determined by waitTime. |
309 | 3 | However, if an update latency is too long (due to CPU scheduling, system |
310 | 3 | sleep, etc.), we don't update mNow at all. This is done so that |
311 | 3 | long latencies in our timing are not detected as hangs. systemTime is |
312 | 3 | used to track TimeStamp::Now() and determine our latency. */ |
313 | 3 | |
314 | 3 | TimeStamp systemTime = TimeStamp::Now(); |
315 | 3 | // Default values for the first iteration of thread loop |
316 | 3 | TimeDuration waitTime; |
317 | 3 | TimeDuration recheckTimeout; |
318 | 3 | TimeStamp lastCheckedCPUUsage = systemTime; |
319 | 3 | TimeDuration checkCPUUsageInterval = |
320 | 3 | TimeDuration::FromMilliseconds(kCheckCPUIntervalMilliseconds); |
321 | 3 | |
322 | 9 | while (!mShutdown) { |
323 | 6 | autoLock.Wait(waitTime); |
324 | 6 | |
325 | 6 | TimeStamp newTime = TimeStamp::Now(); |
326 | 6 | TimeDuration systemInterval = newTime - systemTime; |
327 | 6 | systemTime = newTime; |
328 | 6 | |
329 | 6 | if (systemTime - lastCheckedCPUUsage > checkCPUUsageInterval) { |
330 | 0 | Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr()); |
331 | 0 | lastCheckedCPUUsage = systemTime; |
332 | 0 | } |
333 | 6 | |
334 | 6 | /* waitTime is a quarter of the shortest timeout value; If our timing |
335 | 6 | latency is low enough (less than half the shortest timeout value), |
336 | 6 | we can update mNow. */ |
337 | 6 | if (MOZ_LIKELY(waitTime != TimeDuration::Forever() && |
338 | 6 | systemInterval < waitTime * 2)) { |
339 | 0 | mNow += systemInterval; |
340 | 0 | } |
341 | 6 | |
342 | 6 | /* If it's before the next recheck timeout, and our wait did not get |
343 | 6 | interrupted, we can keep the current waitTime and skip iterating |
344 | 6 | through hang monitors. */ |
345 | 6 | if (MOZ_LIKELY(systemInterval < recheckTimeout && |
346 | 6 | systemInterval >= waitTime)) { |
347 | 0 | recheckTimeout -= systemInterval; |
348 | 0 | continue; |
349 | 0 | } |
350 | 6 | |
351 | 6 | /* We are in one of the following scenarios, |
352 | 6 | - Hang or permahang recheck timeout |
353 | 6 | - Thread added/removed |
354 | 6 | - Thread wait or hang ended |
355 | 6 | In all cases, we want to go through our list of hang |
356 | 6 | monitors and update waitTime and recheckTimeout. */ |
357 | 6 | waitTime = TimeDuration::Forever(); |
358 | 6 | recheckTimeout = TimeDuration::Forever(); |
359 | 6 | |
360 | 6 | // Locally hold mNow |
361 | 6 | TimeStamp now = mNow; |
362 | 6 | |
363 | 6 | // iterate through hang monitors |
364 | 6 | for (BackgroundHangThread* currentThread = mHangThreads.getFirst(); |
365 | 9 | currentThread; currentThread = currentThread->getNext()) { |
366 | 3 | |
367 | 3 | if (currentThread->mWaiting) { |
368 | 3 | // Thread is waiting, not hanging |
369 | 3 | continue; |
370 | 3 | } |
371 | 0 | TimeStamp lastActivity = currentThread->mLastActivity; |
372 | 0 | TimeDuration hangTime = now - lastActivity; |
373 | 0 | if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) { |
374 | 0 | // A permahang started |
375 | 0 | // Skip subsequent iterations and tolerate a race on mWaiting here |
376 | 0 | currentThread->mWaiting = true; |
377 | 0 | currentThread->mHanging = false; |
378 | 0 | currentThread->ReportPermaHang(); |
379 | 0 | continue; |
380 | 0 | } |
381 | 0 | |
382 | 0 | if (MOZ_LIKELY(!currentThread->mHanging)) { |
383 | 0 | if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) { |
384 | 0 | #ifdef MOZ_GECKO_PROFILER |
385 | 0 | // A hang started, collect a stack |
386 | 0 | currentThread->mStackHelper.GetStack( |
387 | 0 | currentThread->mHangStack, |
388 | 0 | currentThread->mRunnableName, |
389 | 0 | true); |
390 | 0 | #endif |
391 | 0 |
|
392 | 0 | // If we hang immediately on waking, then the most recently collected |
393 | 0 | // CPU usage is going to be an average across the whole time we were |
394 | 0 | // sleeping. Accordingly, we want to make sure that when we hang, we |
395 | 0 | // collect a fresh value. |
396 | 0 | if (systemTime != lastCheckedCPUUsage) { |
397 | 0 | Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr()); |
398 | 0 | lastCheckedCPUUsage = systemTime; |
399 | 0 | } |
400 | 0 |
|
401 | 0 | currentThread->mHangStart = lastActivity; |
402 | 0 | currentThread->mHanging = true; |
403 | 0 | currentThread->mAnnotations = |
404 | 0 | currentThread->mAnnotators.GatherAnnotations(); |
405 | 0 | } |
406 | 0 | } else { |
407 | 0 | if (MOZ_LIKELY(lastActivity != currentThread->mHangStart)) { |
408 | 0 | // A hang ended |
409 | 0 | currentThread->ReportHang(now - currentThread->mHangStart); |
410 | 0 | currentThread->mHanging = false; |
411 | 0 | } |
412 | 0 | } |
413 | 0 |
|
414 | 0 | /* If we are hanging, the next time we check for hang status is when |
415 | 0 | the hang turns into a permahang. If we're not hanging, the next |
416 | 0 | recheck timeout is when we may be entering a hang. */ |
417 | 0 | TimeDuration nextRecheck; |
418 | 0 | if (currentThread->mHanging) { |
419 | 0 | nextRecheck = currentThread->mMaxTimeout; |
420 | 0 | } else { |
421 | 0 | nextRecheck = currentThread->mTimeout; |
422 | 0 | } |
423 | 0 | recheckTimeout = TimeDuration::Min(recheckTimeout, nextRecheck - hangTime); |
424 | 0 |
|
425 | 0 | if (currentThread->mTimeout != TimeDuration::Forever()) { |
426 | 0 | /* We wait for a quarter of the shortest timeout |
427 | 0 | value to give mNow enough granularity. */ |
428 | 0 | waitTime = TimeDuration::Min(waitTime, currentThread->mTimeout / (int64_t) 4); |
429 | 0 | } |
430 | 0 | } |
431 | 6 | } |
432 | 3 | |
433 | 3 | /* We are shutting down now. |
434 | 3 | Wait for all outstanding monitors to unregister. */ |
435 | 3 | while (!mHangThreads.isEmpty()) { |
436 | 0 | autoLock.Wait(); |
437 | 0 | } |
438 | 3 | } |
439 | | |
440 | | |
441 | | BackgroundHangThread::BackgroundHangThread(const char* aName, |
442 | | uint32_t aTimeoutMs, |
443 | | uint32_t aMaxTimeoutMs, |
444 | | BackgroundHangMonitor::ThreadType aThreadType) |
445 | | : mManager(BackgroundHangManager::sInstance) |
446 | | , mThreadID(PR_GetCurrentThread()) |
447 | | , mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout |
448 | | ? TimeDuration::Forever() |
449 | | : TimeDuration::FromMilliseconds(aTimeoutMs)) |
450 | | , mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout |
451 | | ? TimeDuration::Forever() |
452 | | : TimeDuration::FromMilliseconds(aMaxTimeoutMs)) |
453 | | , mLastActivity(mManager->mNow) |
454 | | , mHangStart(mLastActivity) |
455 | | , mHanging(false) |
456 | | , mWaiting(true) |
457 | | , mThreadType(aThreadType) |
458 | | , mThreadName(aName) |
459 | 3 | { |
460 | 3 | if (sTlsKeyInitialized && IsShared()) { |
461 | 3 | sTlsKey.set(this); |
462 | 3 | } |
463 | 3 | // Lock here because LinkedList is not thread-safe |
464 | 3 | MonitorAutoLock autoLock(mManager->mLock); |
465 | 3 | // Add to thread list |
466 | 3 | mManager->mHangThreads.insertBack(this); |
467 | 3 | // Wake up monitor thread to process new thread |
468 | 3 | autoLock.Notify(); |
469 | 3 | } |
470 | | |
471 | | BackgroundHangThread::~BackgroundHangThread() |
472 | 0 | { |
473 | 0 | // Lock here because LinkedList is not thread-safe |
474 | 0 | MonitorAutoLock autoLock(mManager->mLock); |
475 | 0 | // Remove from thread list |
476 | 0 | remove(); |
477 | 0 | // Wake up monitor thread to process removed thread |
478 | 0 | autoLock.Notify(); |
479 | 0 |
|
480 | 0 | // We no longer have a thread |
481 | 0 | if (sTlsKeyInitialized && IsShared()) { |
482 | 0 | sTlsKey.set(nullptr); |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | | void |
487 | | BackgroundHangThread::ReportHang(TimeDuration aHangTime) |
488 | 0 | { |
489 | 0 | // Recovered from a hang; called on the monitor thread |
490 | 0 | // mManager->mLock IS locked |
491 | 0 |
|
492 | 0 | HangDetails hangDetails( |
493 | 0 | aHangTime, |
494 | 0 | nsDependentCString(XRE_ChildProcessTypeToString(XRE_GetProcessType())), |
495 | 0 | VoidString(), |
496 | 0 | mThreadName, |
497 | 0 | mRunnableName, |
498 | 0 | std::move(mHangStack), |
499 | 0 | std::move(mAnnotations) |
500 | 0 | ); |
501 | 0 |
|
502 | 0 | // If the hang processing thread exists, we can process the native stack |
503 | 0 | // on it. Otherwise, we are unable to report a native stack, so we just |
504 | 0 | // report without one. |
505 | 0 | if (mManager->mHangProcessingThread) { |
506 | 0 | nsCOMPtr<nsIRunnable> processHangStackRunnable = |
507 | 0 | new ProcessHangStackRunnable(std::move(hangDetails)); |
508 | 0 | mManager->mHangProcessingThread |
509 | 0 | ->Dispatch(processHangStackRunnable.forget()); |
510 | 0 | } else { |
511 | 0 | NS_WARNING("Unable to report native stack without a BHR processing thread"); |
512 | 0 | RefPtr<nsHangDetails> hd = new nsHangDetails(std::move(hangDetails)); |
513 | 0 | hd->Submit(); |
514 | 0 | } |
515 | 0 |
|
516 | 0 | // If the profiler is enabled, add a marker. |
517 | 0 | #ifdef MOZ_GECKO_PROFILER |
518 | 0 | if (profiler_is_active()) { |
519 | 0 | TimeStamp endTime = TimeStamp::Now(); |
520 | 0 | TimeStamp startTime = endTime - aHangTime; |
521 | 0 | profiler_add_marker_for_thread( |
522 | 0 | mStackHelper.GetThreadId(), |
523 | 0 | "BHR-detected hang", |
524 | 0 | MakeUnique<HangMarkerPayload>(startTime, endTime)); |
525 | 0 | } |
526 | 0 | #endif |
527 | 0 | } |
528 | | |
529 | | void |
530 | | BackgroundHangThread::ReportPermaHang() |
531 | 0 | { |
532 | 0 | // Permanently hanged; called on the monitor thread |
533 | 0 | // mManager->mLock IS locked |
534 | 0 |
|
535 | 0 | // NOTE: We used to capture a native stack in this situation if one had not |
536 | 0 | // already been captured, but with the new ReportHang design that is less |
537 | 0 | // practical. |
538 | 0 | // |
539 | 0 | // We currently don't look at hang reports outside of nightly, and already |
540 | 0 | // collect native stacks eagerly on nightly, so this should be OK. |
541 | 0 | ReportHang(mMaxTimeout); |
542 | 0 | } |
543 | | |
544 | | MOZ_ALWAYS_INLINE void |
545 | | BackgroundHangThread::Update() |
546 | 0 | { |
547 | 0 | TimeStamp now = mManager->mNow; |
548 | 0 | if (mWaiting) { |
549 | 0 | mLastActivity = now; |
550 | 0 | mWaiting = false; |
551 | 0 | /* We have to wake up the manager thread because when all threads |
552 | 0 | are waiting, the manager thread waits indefinitely as well. */ |
553 | 0 | mManager->Wakeup(); |
554 | 0 | } else { |
555 | 0 | TimeDuration duration = now - mLastActivity; |
556 | 0 | if (MOZ_UNLIKELY(duration >= mTimeout)) { |
557 | 0 | /* Wake up the manager thread to tell it that a hang ended */ |
558 | 0 | mManager->Wakeup(); |
559 | 0 | } |
560 | 0 | mLastActivity = now; |
561 | 0 | } |
562 | 0 | } |
563 | | |
564 | | BackgroundHangThread* |
565 | | BackgroundHangThread::FindThread() |
566 | 3 | { |
567 | 3 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
568 | 3 | if (BackgroundHangManager::sInstance == nullptr) { |
569 | 0 | MOZ_ASSERT(BackgroundHangManager::sDisabled, |
570 | 0 | "BackgroundHandleManager is not initialized"); |
571 | 0 | return nullptr; |
572 | 0 | } |
573 | 3 | |
574 | 3 | if (sTlsKeyInitialized) { |
575 | 3 | // Use TLS if available |
576 | 3 | return sTlsKey.get(); |
577 | 3 | } |
578 | 0 | // If TLS is unavailable, we can search through the thread list |
579 | 0 | RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance); |
580 | 0 | MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown"); |
581 | 0 |
|
582 | 0 | PRThread* threadID = PR_GetCurrentThread(); |
583 | 0 | // Lock thread list for traversal |
584 | 0 | MonitorAutoLock autoLock(manager->mLock); |
585 | 0 | for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); |
586 | 0 | thread; thread = thread->getNext()) { |
587 | 0 | if (thread->mThreadID == threadID && thread->IsShared()) { |
588 | 0 | return thread; |
589 | 0 | } |
590 | 0 | } |
591 | 0 | #endif |
592 | 0 | // Current thread is not initialized |
593 | 0 | return nullptr; |
594 | 0 | } |
595 | | |
596 | | bool |
597 | 0 | BackgroundHangMonitor::ShouldDisableOnBeta(const nsCString &clientID) { |
598 | 0 | MOZ_ASSERT(clientID.Length() == 36, "clientID is invalid"); |
599 | 0 | const char *suffix = clientID.get() + clientID.Length() - 4; |
600 | 0 | return strtol(suffix, NULL, 16) % BHR_BETA_MOD; |
601 | 0 | } |
602 | | |
603 | | bool |
604 | 0 | BackgroundHangMonitor::IsDisabled() { |
605 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
606 | 0 | return BackgroundHangManager::sDisabled; |
607 | | #else |
608 | | return true; |
609 | | #endif |
610 | | } |
611 | | |
612 | | bool |
613 | 0 | BackgroundHangMonitor::DisableOnBeta() { |
614 | 0 | nsAutoCString clientID; |
615 | 0 | nsresult rv = |
616 | 0 | Preferences::GetCString("toolkit.telemetry.cachedClientID", clientID); |
617 | 0 | bool telemetryEnabled = Telemetry::CanRecordPrereleaseData(); |
618 | 0 |
|
619 | 0 | if (!telemetryEnabled || NS_FAILED(rv) || |
620 | 0 | BackgroundHangMonitor::ShouldDisableOnBeta(clientID)) { |
621 | 0 | if (XRE_IsParentProcess()) { |
622 | 0 | BackgroundHangMonitor::Shutdown(); |
623 | 0 | } else { |
624 | 0 | BackgroundHangManager::sDisabled = true; |
625 | 0 | } |
626 | 0 | return true; |
627 | 0 | } |
628 | 0 |
|
629 | 0 | return false; |
630 | 0 | } |
631 | | |
632 | | void |
633 | | BackgroundHangMonitor::Startup() |
634 | 3 | { |
635 | 3 | MOZ_RELEASE_ASSERT(NS_IsMainThread()); |
636 | 3 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
637 | 3 | MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized"); |
638 | 3 | |
639 | 3 | if (!strcmp(NS_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) { |
640 | 0 | if (XRE_IsParentProcess()) { // cached ClientID hasn't been read yet |
641 | 0 | BackgroundHangThread::Startup(); |
642 | 0 | BackgroundHangManager::sInstance = new BackgroundHangManager(); |
643 | 0 | Unused << NS_WARN_IF(BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr()); |
644 | 0 |
|
645 | 0 | nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService(); |
646 | 0 | MOZ_ASSERT(observerService); |
647 | 0 |
|
648 | 0 | observerService->AddObserver(BackgroundHangManager::sInstance, "profile-after-change", false); |
649 | 0 | return; |
650 | 0 | } else if(DisableOnBeta()){ |
651 | 0 | return; |
652 | 0 | } |
653 | 3 | } |
654 | 3 | |
655 | 3 | BackgroundHangThread::Startup(); |
656 | 3 | BackgroundHangManager::sInstance = new BackgroundHangManager(); |
657 | 3 | Unused << NS_WARN_IF(BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr()); |
658 | 3 | #endif |
659 | 3 | } |
660 | | |
661 | | void |
662 | | BackgroundHangMonitor::Shutdown() |
663 | 0 | { |
664 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
665 | 0 | if (BackgroundHangManager::sDisabled) { |
666 | 0 | MOZ_ASSERT(!BackgroundHangManager::sInstance, "Initialized"); |
667 | 0 | return; |
668 | 0 | } |
669 | 0 |
|
670 | 0 | MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized"); |
671 | 0 | BackgroundHangManager::sInstance->mCPUUsageWatcher.Uninit(); |
672 | 0 | /* Scope our lock inside Shutdown() because the sInstance object can |
673 | 0 | be destroyed as soon as we set sInstance to nullptr below, and |
674 | 0 | we don't want to hold the lock when it's being destroyed. */ |
675 | 0 | BackgroundHangManager::sInstance->Shutdown(); |
676 | 0 | BackgroundHangManager::sInstance = nullptr; |
677 | 0 | BackgroundHangManager::sDisabled = true; |
678 | 0 | #endif |
679 | 0 | } |
680 | | |
681 | | BackgroundHangMonitor::BackgroundHangMonitor(const char* aName, |
682 | | uint32_t aTimeoutMs, |
683 | | uint32_t aMaxTimeoutMs, |
684 | | ThreadType aThreadType) |
685 | | : mThread(aThreadType == THREAD_SHARED ? BackgroundHangThread::FindThread() : nullptr) |
686 | 3 | { |
687 | 3 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
688 | | # ifdef MOZ_VALGRIND |
689 | | // If we're running on Valgrind, we'll be making forward progress at a |
690 | | // rate of somewhere between 1/25th and 1/50th of normal. This causes the |
691 | | // BHR to capture a lot of stacks, which slows us down even more. As an |
692 | | // attempt to avoid the worst of this, scale up all presented timeouts by |
693 | | // a factor of thirty, and add six seconds so as to impose a six second |
694 | | // floor on all timeouts. For a non-Valgrind-enabled build, or for an |
695 | | // enabled build which isn't running on Valgrind, the timeouts are |
696 | | // unchanged. |
697 | | if (RUNNING_ON_VALGRIND) { |
698 | | const uint32_t scaleUp = 30; |
699 | | const uint32_t extraMs = 6000; |
700 | | if (aTimeoutMs != BackgroundHangMonitor::kNoTimeout) { |
701 | | aTimeoutMs *= scaleUp; |
702 | | aTimeoutMs += extraMs; |
703 | | } |
704 | | if (aMaxTimeoutMs != BackgroundHangMonitor::kNoTimeout) { |
705 | | aMaxTimeoutMs *= scaleUp; |
706 | | aMaxTimeoutMs += extraMs; |
707 | | } |
708 | | } |
709 | | # endif |
710 | | |
711 | 3 | if (!BackgroundHangManager::sDisabled && !mThread && !recordreplay::IsMiddleman()) { |
712 | 3 | mThread = new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs, |
713 | 3 | aThreadType); |
714 | 3 | } |
715 | 3 | #endif |
716 | 3 | } |
717 | | |
718 | | BackgroundHangMonitor::BackgroundHangMonitor() |
719 | | : mThread(BackgroundHangThread::FindThread()) |
720 | 0 | { |
721 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
722 | 0 | if (BackgroundHangManager::sDisabled) { |
723 | 0 | return; |
724 | 0 | } |
725 | 0 | #endif |
726 | 0 | } |
727 | | |
728 | | BackgroundHangMonitor::~BackgroundHangMonitor() |
729 | 0 | { |
730 | 0 | } |
731 | | |
732 | | void |
733 | | BackgroundHangMonitor::NotifyActivity() |
734 | 0 | { |
735 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
736 | 0 | if (mThread == nullptr) { |
737 | 0 | MOZ_ASSERT(BackgroundHangManager::sDisabled, |
738 | 0 | "This thread is not initialized for hang monitoring"); |
739 | 0 | return; |
740 | 0 | } |
741 | 0 |
|
742 | 0 | if (Telemetry::CanRecordExtended()) { |
743 | 0 | mThread->NotifyActivity(); |
744 | 0 | } |
745 | 0 | #endif |
746 | 0 | } |
747 | | |
748 | | void |
749 | | BackgroundHangMonitor::NotifyWait() |
750 | 0 | { |
751 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
752 | 0 | if (mThread == nullptr) { |
753 | 0 | MOZ_ASSERT(BackgroundHangManager::sDisabled, |
754 | 0 | "This thread is not initialized for hang monitoring"); |
755 | 0 | return; |
756 | 0 | } |
757 | 0 |
|
758 | 0 | if (Telemetry::CanRecordExtended()) { |
759 | 0 | mThread->NotifyWait(); |
760 | 0 | } |
761 | 0 | #endif |
762 | 0 | } |
763 | | |
764 | | bool |
765 | | BackgroundHangMonitor::RegisterAnnotator(BackgroundHangAnnotator& aAnnotator) |
766 | 0 | { |
767 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
768 | 0 | BackgroundHangThread* thisThread = BackgroundHangThread::FindThread(); |
769 | 0 | if (!thisThread) { |
770 | 0 | return false; |
771 | 0 | } |
772 | 0 | return thisThread->mAnnotators.Register(aAnnotator); |
773 | | #else |
774 | | return false; |
775 | | #endif |
776 | | } |
777 | | |
778 | | bool |
779 | | BackgroundHangMonitor::UnregisterAnnotator(BackgroundHangAnnotator& aAnnotator) |
780 | 0 | { |
781 | 0 | #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
782 | 0 | BackgroundHangThread* thisThread = BackgroundHangThread::FindThread(); |
783 | 0 | if (!thisThread) { |
784 | 0 | return false; |
785 | 0 | } |
786 | 0 | return thisThread->mAnnotators.Unregister(aAnnotator); |
787 | | #else |
788 | | return false; |
789 | | #endif |
790 | | } |
791 | | |
792 | | } // namespace mozilla |