Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/toolkit/components/terminator/nsTerminator.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3
 * This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
/**
8
 * A watchdog designed to terminate shutdown if it lasts too long.
9
 *
10
 * This watchdog is designed as a worst-case problem container for the
11
 * common case in which Firefox just won't shutdown.
12
 *
13
 * We spawn a thread during quit-application. If any of the shutdown
14
 * steps takes more than n milliseconds (63000 by default), kill the
15
 * process as fast as possible, without any cleanup.
16
 */
17
18
#include "nsTerminator.h"
19
20
#include "prthread.h"
21
#include "prmon.h"
22
#include "plstr.h"
23
#include "prio.h"
24
25
#include "nsString.h"
26
#include "nsServiceManagerUtils.h"
27
#include "nsDirectoryServiceUtils.h"
28
#include "nsAppDirectoryServiceDefs.h"
29
30
#include "nsIObserverService.h"
31
#include "nsIPrefService.h"
32
#include "nsExceptionHandler.h"
33
#include "GeckoProfiler.h"
34
#include "nsThreadUtils.h"
35
36
#if defined(XP_WIN)
37
#include <windows.h>
38
#else
39
#include <unistd.h>
40
#endif
41
42
#include "mozilla/ArrayUtils.h"
43
#include "mozilla/Atomics.h"
44
#include "mozilla/Attributes.h"
45
#include "mozilla/DebugOnly.h"
46
#include "mozilla/MemoryChecking.h"
47
#include "mozilla/Preferences.h"
48
#include "mozilla/Services.h"
49
#include "mozilla/UniquePtr.h"
50
#include "mozilla/Unused.h"
51
#include "mozilla/Telemetry.h"
52
53
#include "mozilla/dom/workerinternals/RuntimeService.h"
54
55
// Normally, the number of milliseconds that AsyncShutdown waits until
56
// it decides to crash is specified as a preference. We use the
57
// following value as a fallback if for some reason the preference is
58
// absent.
59
0
#define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
60
61
// Additional number of milliseconds to wait until we decide to exit
62
// forcefully.
63
0
#define ADDITIONAL_WAIT_BEFORE_CRASH_MS 3000
64
65
namespace mozilla {
66
67
namespace {
68
69
/**
70
 * A step during shutdown.
71
 *
72
 * Shutdown is divided in steps, which all map to an observer
73
 * notification. The duration of a step is defined as the number of
74
 * ticks between the time we receive a notification and the next one.
75
 */
76
struct ShutdownStep
77
{
78
  char const* const mTopic;
79
  int mTicks;
80
81
  constexpr explicit ShutdownStep(const char *const topic)
82
    : mTopic(topic)
83
    , mTicks(-1)
84
0
  {}
85
};
86
87
static ShutdownStep sShutdownSteps[] = {
88
  ShutdownStep("quit-application"),
89
  ShutdownStep("profile-change-teardown"),
90
  ShutdownStep("profile-before-change"),
91
  ShutdownStep("xpcom-will-shutdown"),
92
  ShutdownStep("xpcom-shutdown"),
93
};
94
95
Atomic<bool> sShutdownNotified;
96
97
// Utility function: create a thread that is non-joinable,
98
// does not prevent the process from terminating, is never
99
// cooperatively scheduled, and uses a default stack size.
100
PRThread* CreateSystemThread(void (*start)(void* arg),
101
                             void* arg)
102
0
{
103
0
  PRThread* thread = PR_CreateThread(
104
0
    PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
105
0
    start,
106
0
    arg,
107
0
    PR_PRIORITY_LOW,
108
0
    PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
109
0
    PR_UNJOINABLE_THREAD,
110
0
    0 /* Use default stack size */
111
0
  );
112
0
  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
113
0
  return thread;
114
0
}
115
116
117
////////////////////////////////////////////
118
//
119
// The watchdog
120
//
121
// This nspr thread is in charge of crashing the process if any stage of shutdown
122
// lasts more than some predefined duration. As a side-effect, it measures the
123
// duration of each stage of shutdown.
124
//
125
126
// The heartbeat of the operation.
127
//
128
// Main thread:
129
//
130
// * Whenever a shutdown step has been completed, the main thread
131
// swaps gHeartbeat to 0 to mark that the shutdown process is still
132
// progressing. The value swapped away indicates the number of ticks
133
// it took for the shutdown step to advance.
134
//
135
// Watchdog thread:
136
//
137
// * Every tick, the watchdog thread increments gHearbeat atomically.
138
//
139
// A note about precision:
140
// Since gHeartbeat is generally reset to 0 between two ticks, this means
141
// that gHeartbeat stays at 0 less than one tick. Consequently, values
142
// extracted from gHeartbeat must be considered rounded up.
143
Atomic<uint32_t> gHeartbeat(0);
144
145
struct Options {
146
  /**
147
   * How many ticks before we should crash the process.
148
   */
149
  uint32_t crashAfterTicks;
150
};
151
152
/**
153
 * Entry point for the watchdog thread
154
 */
155
void
156
RunWatchdog(void* arg)
157
0
{
158
0
  NS_SetCurrentThreadName("Shutdown Hang Terminator");
159
0
160
0
  // Let's copy and deallocate options, that's one less leak to worry
161
0
  // about.
162
0
  UniquePtr<Options> options((Options*)arg);
163
0
  uint32_t crashAfterTicks = options->crashAfterTicks;
164
0
  options = nullptr;
165
0
166
0
  const uint32_t timeToLive = crashAfterTicks;
167
0
  while (true) {
168
0
    //
169
0
    // We do not want to sleep for the entire duration,
170
0
    // as putting the computer to sleep would suddenly
171
0
    // cause us to timeout on wakeup.
172
0
    //
173
0
    // Rather, we prefer sleeping for at most 1 second
174
0
    // at a time. If the computer sleeps then wakes up,
175
0
    // we have lost at most one second, which is much
176
0
    // more reasonable.
177
0
    //
178
#if defined(XP_WIN)
179
    Sleep(1000 /* ms */);
180
#else
181
    usleep(1000000 /* usec */);
182
0
#endif
183
0
184
0
    if (gHeartbeat++ < timeToLive) {
185
0
      continue;
186
0
    }
187
0
188
0
    // The shutdown steps are not completed yet. Let's report the last one.
189
0
    if (!sShutdownNotified) {
190
0
      const char* lastStep = nullptr;
191
0
      for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
192
0
        if (sShutdownSteps[i].mTicks == -1) {
193
0
          break;
194
0
        }
195
0
        lastStep = sShutdownSteps[i].mTopic;
196
0
      }
197
0
198
0
      if (lastStep) {
199
0
        nsCString msg;
200
0
        msg.AppendPrintf("Shutdown hanging at step %s. "
201
0
                         "Something is blocking the main-thread.", lastStep);
202
0
        // This string will be leaked.
203
0
        MOZ_CRASH_UNSAFE_OOL(strdup(msg.BeginReading()));
204
0
      }
205
0
206
0
      MOZ_CRASH("Shutdown hanging before starting.");
207
0
    }
208
0
209
0
    // Maybe some workers are blocking the shutdown.
210
0
    mozilla::dom::workerinternals::RuntimeService* runtimeService =
211
0
      mozilla::dom::workerinternals::RuntimeService::GetService();
212
0
    if (runtimeService) {
213
0
     runtimeService->CrashIfHanging();
214
0
    }
215
0
216
0
    // Shutdown is apparently dead. Crash the process.
217
0
    CrashReporter::SetMinidumpAnalysisAllThreads();
218
0
219
0
    MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
220
0
  }
221
0
}
222
223
////////////////////////////////////////////
224
//
225
// Writer thread
226
//
227
// This nspr thread is in charge of writing to disk statistics produced by the
228
// watchdog thread and collected by the main thread. Note that we use a nspr
229
// thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
230
// threads.
231
//
232
233
// Utility class, used by UniquePtr<> to close nspr files.
234
class PR_CloseDelete
235
{
236
public:
237
  constexpr PR_CloseDelete() = default;
238
239
  PR_CloseDelete(const PR_CloseDelete& aOther) = default;
240
241
  void operator()(PRFileDesc* aPtr) const
242
0
  {
243
0
    PR_Close(aPtr);
244
0
  }
245
};
246
247
//
248
// Communication between the main thread and the writer thread.
249
//
250
// Main thread:
251
//
252
// * Whenever a shutdown step has been completed, the main thread
253
// obtains the number of ticks from the watchdog threads, builds
254
// a string representing all the data gathered so far, places
255
// this string in `gWriteData`, and wakes up the writer thread
256
// using `gWriteReady`. If `gWriteData` already contained a non-null
257
// pointer, this means that the writer thread is lagging behind the
258
// main thread, and the main thread cleans up the memory.
259
//
260
// Writer thread:
261
//
262
// * When awake, the writer thread swaps `gWriteData` to nullptr. If
263
// `gWriteData` contained data to write, the . If so, the writer
264
// thread writes the data to a file named "ShutdownDuration.json.tmp",
265
// then moves that file to "ShutdownDuration.json" and cleans up the
266
// data. If `gWriteData` contains a nullptr, the writer goes to sleep
267
// until it is awkened using `gWriteReady`.
268
//
269
//
270
// The data written by the writer thread will be read by another
271
// module upon the next restart and fed to Telemetry.
272
//
273
Atomic<nsCString*> gWriteData(nullptr);
274
PRMonitor* gWriteReady = nullptr;
275
276
void RunWriter(void* arg)
277
0
{
278
0
  AUTO_PROFILER_REGISTER_THREAD("Shutdown Statistics Writer");
279
0
  NS_SetCurrentThreadName("Shutdown Statistics Writer");
280
0
281
0
  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
282
0
  // Shutdown will generally complete before we have a chance to
283
0
  // deallocate. This is not a leak.
284
0
285
0
  // Setup destinationPath and tmpFilePath
286
0
287
0
  nsCString destinationPath;
288
0
  destinationPath.Adopt(static_cast<char*>(arg));
289
0
  nsAutoCString tmpFilePath;
290
0
  tmpFilePath.Append(destinationPath);
291
0
  tmpFilePath.AppendLiteral(".tmp");
292
0
293
0
  // Cleanup any file leftover from a previous run
294
0
  Unused << PR_Delete(tmpFilePath.get());
295
0
  Unused << PR_Delete(destinationPath.get());
296
0
297
0
  while (true) {
298
0
    //
299
0
    // Check whether we have received data from the main thread.
300
0
    //
301
0
    // We perform the check before waiting on `gWriteReady` as we may
302
0
    // have received data while we were busy writing.
303
0
    //
304
0
    // Also note that gWriteData may have been modified several times
305
0
    // since we last checked. That's ok, we are not losing any important
306
0
    // data (since we keep adding data), and we are not leaking memory
307
0
    // (since the main thread deallocates any data that hasn't been
308
0
    // consumed by the writer thread).
309
0
    //
310
0
    UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
311
0
    if (!data) {
312
0
      // Data is not available yet.
313
0
      // Wait until the main thread provides it.
314
0
      PR_EnterMonitor(gWriteReady);
315
0
      PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
316
0
      PR_ExitMonitor(gWriteReady);
317
0
      continue;
318
0
    }
319
0
320
0
    MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
321
0
    // Shutdown may complete before we have a chance to deallocate.
322
0
    // This is not a leak.
323
0
324
0
    //
325
0
    // Write to a temporary file
326
0
    //
327
0
    // In case of any error, we simply give up. Since the data is
328
0
    // hardly critical, we don't want to spend too much effort
329
0
    // salvaging it.
330
0
    //
331
0
    UniquePtr<PRFileDesc, PR_CloseDelete>
332
0
      tmpFileDesc(PR_Open(tmpFilePath.get(),
333
0
                          PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
334
0
                          00600));
335
0
336
0
    // Shutdown may complete before we have a chance to close the file.
337
0
    // This is not a leak.
338
0
    MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
339
0
340
0
    if (tmpFileDesc == nullptr) {
341
0
      break;
342
0
    }
343
0
    if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
344
0
      break;
345
0
    }
346
0
    tmpFileDesc.reset();
347
0
348
0
    //
349
0
    // Rename on top of destination file.
350
0
    //
351
0
    // This is not sufficient to guarantee that the destination file
352
0
    // will be written correctly, but, again, we don't care enough
353
0
    // about the data to make more efforts.
354
0
    //
355
0
    if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
356
0
      break;
357
0
    }
358
0
  }
359
0
}
360
361
} // namespace
362
363
NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
364
365
nsTerminator::nsTerminator()
366
  : mInitialized(false)
367
  , mCurrentStep(-1)
368
0
{
369
0
}
370
371
// During startup, register as an observer for all interesting topics.
372
nsresult
373
nsTerminator::SelfInit()
374
0
{
375
0
  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
376
0
  if (!os) {
377
0
    return NS_ERROR_UNEXPECTED;
378
0
  }
379
0
380
0
  for (auto& shutdownStep : sShutdownSteps) {
381
0
    DebugOnly<nsresult> rv = os->AddObserver(this, shutdownStep.mTopic, false);
382
0
    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "AddObserver failed");
383
0
  }
384
0
385
0
  return NS_OK;
386
0
}
387
388
// Actually launch these threads. This takes place at the first sign of shutdown.
389
void
390
nsTerminator::Start()
391
0
{
392
0
  MOZ_ASSERT(!mInitialized);
393
0
  StartWatchdog();
394
0
#if !defined(NS_FREE_PERMANENT_DATA)
395
0
  // Only allow nsTerminator to write on non-leak-checked builds so we don't
396
0
  // get leak warnings on shutdown for intentional leaks (see bug 1242084).
397
0
  // This will be enabled again by bug 1255484 when 1255478 lands.
398
0
  StartWriter();
399
0
#endif // !defined(NS_FREE_PERMANENT_DATA)
400
0
  mInitialized = true;
401
0
  sShutdownNotified = false;
402
0
}
403
404
// Prepare, allocate and start the watchdog thread.
405
// By design, it will never finish, nor be deallocated.
406
void
407
nsTerminator::StartWatchdog()
408
0
{
409
0
  int32_t crashAfterMS =
410
0
    Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
411
0
                        FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
412
0
  // Ignore negative values
413
0
  if (crashAfterMS <= 0) {
414
0
    crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
415
0
  }
416
0
417
0
  // Add a little padding, to ensure that we do not crash before
418
0
  // AsyncShutdown.
419
0
  if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
420
0
    // Defend against overflow
421
0
    crashAfterMS = INT32_MAX;
422
0
  } else {
423
0
    crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
424
0
  }
425
0
426
# ifdef MOZ_VALGRIND
427
  // If we're running on Valgrind, we'll be making forward progress at a
428
  // rate of somewhere between 1/25th and 1/50th of normal.  This can cause
429
  // timeouts frequently enough to be a problem for the Valgrind runs on
430
  // automation: see bug 1296819.  As an attempt to avoid the worst of this,
431
  // scale up the presented timeout by a factor of three.  For a
432
  // non-Valgrind-enabled build, or for an enabled build which isn't running
433
  // on Valgrind, the timeout is unchanged.
434
  if (RUNNING_ON_VALGRIND) {
435
    const int32_t scaleUp = 3;
436
    if (crashAfterMS >= (INT32_MAX / scaleUp) - 1) {
437
      // Defend against overflow
438
      crashAfterMS = INT32_MAX;
439
    } else {
440
      crashAfterMS *= scaleUp;
441
    }
442
  }
443
# endif
444
445
0
  UniquePtr<Options> options(new Options());
446
0
  const PRIntervalTime ticksDuration = PR_MillisecondsToInterval(1000);
447
0
  options->crashAfterTicks = crashAfterMS / ticksDuration;
448
0
  // Handle systems where ticksDuration is greater than crashAfterMS.
449
0
  if (options->crashAfterTicks == 0) {
450
0
    options->crashAfterTicks = crashAfterMS / 1000;
451
0
  }
452
0
453
0
  DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
454
0
                                                options.release());
455
0
  MOZ_ASSERT(watchdogThread);
456
0
}
457
458
// Prepare, allocate and start the writer thread. By design, it will never
459
// finish, nor be deallocated. In case of error, we degrade
460
// gracefully to not writing Telemetry data.
461
void
462
nsTerminator::StartWriter()
463
0
{
464
0
  if (!Telemetry::CanRecordExtended()) {
465
0
    return;
466
0
  }
467
0
  nsCOMPtr<nsIFile> profLD;
468
0
  nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
469
0
                                       getter_AddRefs(profLD));
470
0
  if (NS_FAILED(rv)) {
471
0
    return;
472
0
  }
473
0
474
0
  rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
475
0
  if (NS_FAILED(rv)) {
476
0
    return;
477
0
  }
478
0
479
0
  nsAutoString path;
480
0
  rv = profLD->GetPath(path);
481
0
  if (NS_FAILED(rv)) {
482
0
    return;
483
0
  }
484
0
485
0
  gWriteReady = PR_NewMonitor();
486
0
  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
487
0
  PRThread* writerThread = CreateSystemThread(RunWriter,
488
0
                                              ToNewUTF8String(path));
489
0
490
0
  if (!writerThread) {
491
0
    return;
492
0
  }
493
0
}
494
495
NS_IMETHODIMP
496
nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
497
0
{
498
0
  if (strcmp(aTopic, "profile-after-change") == 0) {
499
0
    return SelfInit();
500
0
  }
501
0
502
0
  // Other notifications are shutdown-related.
503
0
504
0
  // As we have seen examples in the wild of shutdown notifications
505
0
  // not being sent (or not being sent in the expected order), we do
506
0
  // not assume a specific order.
507
0
  if (!mInitialized) {
508
0
    Start();
509
0
  }
510
0
511
0
  UpdateHeartbeat(aTopic);
512
0
#if !defined(NS_FREE_PERMANENT_DATA)
513
0
  // Only allow nsTerminator to write on non-leak checked builds so we don't get leak warnings on
514
0
  // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
515
0
  // 1255484 when 1255478 lands.
516
0
  UpdateTelemetry();
517
0
#endif // !defined(NS_FREE_PERMANENT_DATA)
518
0
  UpdateCrashReport(aTopic);
519
0
520
0
  // Perform a little cleanup
521
0
  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
522
0
  MOZ_RELEASE_ASSERT(os);
523
0
  (void)os->RemoveObserver(this, aTopic);
524
0
525
0
  return NS_OK;
526
0
}
527
528
void
529
nsTerminator::UpdateHeartbeat(const char* aTopic)
530
0
{
531
0
  // Reset the clock, find out how long the current phase has lasted.
532
0
  uint32_t ticks = gHeartbeat.exchange(0);
533
0
  if (mCurrentStep > 0) {
534
0
    sShutdownSteps[mCurrentStep].mTicks = ticks;
535
0
  }
536
0
537
0
  // Find out where we now are in the current shutdown.
538
0
  // Don't assume that shutdown takes place in the expected order.
539
0
  int nextStep = -1;
540
0
  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
541
0
    if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
542
0
      nextStep = i;
543
0
      break;
544
0
    }
545
0
  }
546
0
  MOZ_ASSERT(nextStep != -1);
547
0
  mCurrentStep = nextStep;
548
0
}
549
550
void
551
nsTerminator::UpdateTelemetry()
552
0
{
553
0
  if (!Telemetry::CanRecordExtended() || !gWriteReady) {
554
0
    return;
555
0
  }
556
0
557
0
  //
558
0
  // We need Telemetry data on the effective duration of each step,
559
0
  // to be able to tune the time-to-crash of each of both the
560
0
  // Terminator and AsyncShutdown. However, at this stage, it is too
561
0
  // late to record such data into Telemetry, so we write it to disk
562
0
  // and read it upon the next startup.
563
0
  //
564
0
565
0
  // Build JSON.
566
0
  UniquePtr<nsCString> telemetryData(new nsCString());
567
0
  telemetryData->AppendLiteral("{");
568
0
  size_t fields = 0;
569
0
  for (auto& shutdownStep : sShutdownSteps) {
570
0
    if (shutdownStep.mTicks < 0) {
571
0
      // Ignore this field.
572
0
      continue;
573
0
    }
574
0
    if (fields++ > 0) {
575
0
      telemetryData->AppendLiteral(", ");
576
0
    }
577
0
    telemetryData->AppendLiteral(R"(")");
578
0
    telemetryData->Append(shutdownStep.mTopic);
579
0
    telemetryData->AppendLiteral(R"(": )");
580
0
    telemetryData->AppendInt(shutdownStep.mTicks);
581
0
  }
582
0
  telemetryData->AppendLiteral("}");
583
0
584
0
  if (fields == 0) {
585
0
    // Nothing to write
586
0
      return;
587
0
  }
588
0
589
0
  //
590
0
  // Send data to the worker thread.
591
0
  //
592
0
  delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
593
0
594
0
  // In case the worker thread was sleeping, wake it up.
595
0
  PR_EnterMonitor(gWriteReady);
596
0
  PR_Notify(gWriteReady);
597
0
  PR_ExitMonitor(gWriteReady);
598
0
}
599
600
void
601
nsTerminator::UpdateCrashReport(const char* aTopic)
602
0
{
603
0
  // In case of crash, we wish to know where in shutdown we are
604
0
  nsAutoCString report(aTopic);
605
0
606
0
  Unused << CrashReporter::AnnotateCrashReport(
607
0
    CrashReporter::Annotation::ShutdownProgress, report);
608
0
}
609
610
void
611
XPCOMShutdownNotified()
612
0
{
613
0
  MOZ_DIAGNOSTIC_ASSERT(sShutdownNotified == false);
614
0
  sShutdownNotified = true;
615
0
}
616
617
} // namespace mozilla