Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 : * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /**
8 : * A watchdog designed to terminate shutdown if it lasts too long.
9 : *
10 : * This watchdog is designed as a worst-case problem container for the
11 : * common case in which Firefox just won't shutdown.
12 : *
13 : * We spawn a thread during quit-application. If any of the shutdown
14 : * steps takes more than n milliseconds (63000 by default), kill the
15 : * process as fast as possible, without any cleanup.
16 : */
17 :
18 : #include "nsTerminator.h"
19 :
20 : #include "prthread.h"
21 : #include "prmon.h"
22 : #include "plstr.h"
23 : #include "prio.h"
24 :
25 : #include "nsString.h"
26 : #include "nsServiceManagerUtils.h"
27 : #include "nsDirectoryServiceUtils.h"
28 : #include "nsAppDirectoryServiceDefs.h"
29 :
30 : #include "nsIObserverService.h"
31 : #include "nsIPrefService.h"
32 : #if defined(MOZ_CRASHREPORTER)
33 : #include "nsExceptionHandler.h"
34 : #endif
35 : #include "GeckoProfiler.h"
36 : #include "nsThreadUtils.h"
37 :
38 : #if defined(XP_WIN)
39 : #include <windows.h>
40 : #else
41 : #include <unistd.h>
42 : #endif
43 :
44 : #include "mozilla/ArrayUtils.h"
45 : #include "mozilla/Attributes.h"
46 : #include "mozilla/DebugOnly.h"
47 : #include "mozilla/MemoryChecking.h"
48 : #include "mozilla/Preferences.h"
49 : #include "mozilla/Services.h"
50 : #include "mozilla/UniquePtr.h"
51 : #include "mozilla/Unused.h"
52 : #include "mozilla/Telemetry.h"
53 :
54 : // Normally, the number of milliseconds that AsyncShutdown waits until
55 : // it decides to crash is specified as a preference. We use the
56 : // following value as a fallback if for some reason the preference is
57 : // absent.
58 : #define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
59 :
60 : // Additional number of milliseconds to wait until we decide to exit
61 : // forcefully.
62 : #define ADDITIONAL_WAIT_BEFORE_CRASH_MS 3000
63 :
64 : namespace mozilla {
65 :
66 : namespace {
67 :
68 : // Utility function: create a thread that is non-joinable,
69 : // does not prevent the process from terminating, is never
70 : // cooperatively scheduled, and uses a default stack size.
71 0 : PRThread* CreateSystemThread(void (*start)(void* arg),
72 : void* arg)
73 : {
74 : PRThread* thread = PR_CreateThread(
75 : PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
76 : start,
77 : arg,
78 : PR_PRIORITY_LOW,
79 : PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
80 : PR_UNJOINABLE_THREAD,
81 : 0 /* Use default stack size */
82 0 : );
83 : MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
84 0 : return thread;
85 : }
86 :
87 :
88 : ////////////////////////////////////////////
89 : //
90 : // The watchdog
91 : //
92 : // This nspr thread is in charge of crashing the process if any stage of shutdown
93 : // lasts more than some predefined duration. As a side-effect, it measures the
94 : // duration of each stage of shutdown.
95 : //
96 :
97 : // The heartbeat of the operation.
98 : //
99 : // Main thread:
100 : //
101 : // * Whenever a shutdown step has been completed, the main thread
102 : // swaps gHeartbeat to 0 to mark that the shutdown process is still
103 : // progressing. The value swapped away indicates the number of ticks
104 : // it took for the shutdown step to advance.
105 : //
106 : // Watchdog thread:
107 : //
108 : // * Every tick, the watchdog thread increments gHearbeat atomically.
109 : //
110 : // A note about precision:
111 : // Since gHeartbeat is generally reset to 0 between two ticks, this means
112 : // that gHeartbeat stays at 0 less than one tick. Consequently, values
113 : // extracted from gHeartbeat must be considered rounded up.
114 : Atomic<uint32_t> gHeartbeat(0);
115 :
116 : struct Options {
117 : /**
118 : * How many ticks before we should crash the process.
119 : */
120 : uint32_t crashAfterTicks;
121 : };
122 :
123 : /**
124 : * Entry point for the watchdog thread
125 : */
126 : void
127 0 : RunWatchdog(void* arg)
128 : {
129 0 : NS_SetCurrentThreadName("Shutdown Hang Terminator");
130 :
131 : // Let's copy and deallocate options, that's one less leak to worry
132 : // about.
133 0 : UniquePtr<Options> options((Options*)arg);
134 0 : uint32_t crashAfterTicks = options->crashAfterTicks;
135 0 : options = nullptr;
136 :
137 0 : const uint32_t timeToLive = crashAfterTicks;
138 : while (true) {
139 : //
140 : // We do not want to sleep for the entire duration,
141 : // as putting the computer to sleep would suddenly
142 : // cause us to timeout on wakeup.
143 : //
144 : // Rather, we prefer sleeping for at most 1 second
145 : // at a time. If the computer sleeps then wakes up,
146 : // we have lost at most one second, which is much
147 : // more reasonable.
148 : //
149 : #if defined(XP_WIN)
150 : Sleep(1000 /* ms */);
151 : #else
152 0 : usleep(1000000 /* usec */);
153 : #endif
154 :
155 0 : if (gHeartbeat++ < timeToLive) {
156 0 : continue;
157 : }
158 :
159 : // Shutdown is apparently dead. Crash the process.
160 0 : MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
161 : }
162 : }
163 :
164 : ////////////////////////////////////////////
165 : //
166 : // Writer thread
167 : //
168 : // This nspr thread is in charge of writing to disk statistics produced by the
169 : // watchdog thread and collected by the main thread. Note that we use a nspr
170 : // thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
171 : // threads.
172 : //
173 :
174 : // Utility class, used by UniquePtr<> to close nspr files.
175 : class PR_CloseDelete
176 : {
177 : public:
178 : constexpr PR_CloseDelete() = default;
179 :
180 : PR_CloseDelete(const PR_CloseDelete& aOther) = default;
181 :
182 0 : void operator()(PRFileDesc* aPtr) const
183 : {
184 0 : PR_Close(aPtr);
185 0 : }
186 : };
187 :
188 : //
189 : // Communication between the main thread and the writer thread.
190 : //
191 : // Main thread:
192 : //
193 : // * Whenever a shutdown step has been completed, the main thread
194 : // obtains the number of ticks from the watchdog threads, builds
195 : // a string representing all the data gathered so far, places
196 : // this string in `gWriteData`, and wakes up the writer thread
197 : // using `gWriteReady`. If `gWriteData` already contained a non-null
198 : // pointer, this means that the writer thread is lagging behind the
199 : // main thread, and the main thread cleans up the memory.
200 : //
201 : // Writer thread:
202 : //
203 : // * When awake, the writer thread swaps `gWriteData` to nullptr. If
204 : // `gWriteData` contained data to write, the . If so, the writer
205 : // thread writes the data to a file named "ShutdownDuration.json.tmp",
206 : // then moves that file to "ShutdownDuration.json" and cleans up the
207 : // data. If `gWriteData` contains a nullptr, the writer goes to sleep
208 : // until it is awkened using `gWriteReady`.
209 : //
210 : //
211 : // The data written by the writer thread will be read by another
212 : // module upon the next restart and fed to Telemetry.
213 : //
214 : Atomic<nsCString*> gWriteData(nullptr);
215 : PRMonitor* gWriteReady = nullptr;
216 :
217 0 : void RunWriter(void* arg)
218 : {
219 0 : AutoProfilerRegisterThread registerThread("Shutdown Statistics Writer");
220 0 : NS_SetCurrentThreadName("Shutdown Statistics Writer");
221 :
222 : MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
223 : // Shutdown will generally complete before we have a chance to
224 : // deallocate. This is not a leak.
225 :
226 : // Setup destinationPath and tmpFilePath
227 :
228 0 : nsCString destinationPath(static_cast<char*>(arg));
229 0 : nsAutoCString tmpFilePath;
230 0 : tmpFilePath.Append(destinationPath);
231 0 : tmpFilePath.AppendLiteral(".tmp");
232 :
233 : // Cleanup any file leftover from a previous run
234 0 : Unused << PR_Delete(tmpFilePath.get());
235 0 : Unused << PR_Delete(destinationPath.get());
236 :
237 : while (true) {
238 : //
239 : // Check whether we have received data from the main thread.
240 : //
241 : // We perform the check before waiting on `gWriteReady` as we may
242 : // have received data while we were busy writing.
243 : //
244 : // Also note that gWriteData may have been modified several times
245 : // since we last checked. That's ok, we are not losing any important
246 : // data (since we keep adding data), and we are not leaking memory
247 : // (since the main thread deallocates any data that hasn't been
248 : // consumed by the writer thread).
249 : //
250 0 : UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
251 0 : if (!data) {
252 : // Data is not available yet.
253 : // Wait until the main thread provides it.
254 0 : PR_EnterMonitor(gWriteReady);
255 0 : PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
256 0 : PR_ExitMonitor(gWriteReady);
257 0 : continue;
258 : }
259 :
260 : MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
261 : // Shutdown may complete before we have a chance to deallocate.
262 : // This is not a leak.
263 :
264 : //
265 : // Write to a temporary file
266 : //
267 : // In case of any error, we simply give up. Since the data is
268 : // hardly critical, we don't want to spend too much effort
269 : // salvaging it.
270 : //
271 : UniquePtr<PRFileDesc, PR_CloseDelete>
272 : tmpFileDesc(PR_Open(tmpFilePath.get(),
273 : PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
274 0 : 00600));
275 :
276 : // Shutdown may complete before we have a chance to close the file.
277 : // This is not a leak.
278 : MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
279 :
280 0 : if (tmpFileDesc == nullptr) {
281 0 : break;
282 : }
283 0 : if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
284 0 : break;
285 : }
286 0 : tmpFileDesc.reset();
287 :
288 : //
289 : // Rename on top of destination file.
290 : //
291 : // This is not sufficient to guarantee that the destination file
292 : // will be written correctly, but, again, we don't care enough
293 : // about the data to make more efforts.
294 : //
295 0 : if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
296 0 : break;
297 : }
298 0 : }
299 0 : }
300 :
301 : /**
302 : * A step during shutdown.
303 : *
304 : * Shutdown is divided in steps, which all map to an observer
305 : * notification. The duration of a step is defined as the number of
306 : * ticks between the time we receive a notification and the next one.
307 : */
308 : struct ShutdownStep
309 : {
310 : char const* const mTopic;
311 : int mTicks;
312 :
313 : constexpr explicit ShutdownStep(const char *const topic)
314 : : mTopic(topic)
315 : , mTicks(-1)
316 : {}
317 :
318 : };
319 :
320 : static ShutdownStep sShutdownSteps[] = {
321 : ShutdownStep("quit-application"),
322 : ShutdownStep("profile-change-teardown"),
323 : ShutdownStep("profile-before-change"),
324 : ShutdownStep("xpcom-will-shutdown"),
325 : ShutdownStep("xpcom-shutdown"),
326 : };
327 :
328 : } // namespace
329 :
330 20 : NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
331 :
332 1 : nsTerminator::nsTerminator()
333 : : mInitialized(false)
334 1 : , mCurrentStep(-1)
335 : {
336 1 : }
337 :
338 : // During startup, register as an observer for all interesting topics.
339 : nsresult
340 1 : nsTerminator::SelfInit()
341 : {
342 2 : nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
343 1 : if (!os) {
344 0 : return NS_ERROR_UNEXPECTED;
345 : }
346 :
347 6 : for (auto& shutdownStep : sShutdownSteps) {
348 10 : DebugOnly<nsresult> rv = os->AddObserver(this, shutdownStep.mTopic, false);
349 5 : NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "AddObserver failed");
350 : }
351 :
352 1 : return NS_OK;
353 : }
354 :
355 : // Actually launch these threads. This takes place at the first sign of shutdown.
356 : void
357 0 : nsTerminator::Start()
358 : {
359 0 : MOZ_ASSERT(!mInitialized);
360 0 : StartWatchdog();
361 : #if !defined(DEBUG)
362 : // Only allow nsTerminator to write on non-debug builds so we don't get leak warnings on
363 : // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
364 : // 1255484 when 1255478 lands.
365 : StartWriter();
366 : #endif // !defined(DEBUG)
367 0 : mInitialized = true;
368 0 : }
369 :
370 : // Prepare, allocate and start the watchdog thread.
371 : // By design, it will never finish, nor be deallocated.
372 : void
373 0 : nsTerminator::StartWatchdog()
374 : {
375 : int32_t crashAfterMS =
376 : Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
377 0 : FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
378 : // Ignore negative values
379 0 : if (crashAfterMS <= 0) {
380 0 : crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
381 : }
382 :
383 : // Add a little padding, to ensure that we do not crash before
384 : // AsyncShutdown.
385 0 : if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
386 : // Defend against overflow
387 0 : crashAfterMS = INT32_MAX;
388 : } else {
389 0 : crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
390 : }
391 :
392 0 : UniquePtr<Options> options(new Options());
393 0 : const PRIntervalTime ticksDuration = PR_MillisecondsToInterval(1000);
394 0 : options->crashAfterTicks = crashAfterMS / ticksDuration;
395 :
396 0 : DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
397 0 : options.release());
398 0 : MOZ_ASSERT(watchdogThread);
399 0 : }
400 :
401 : // Prepare, allocate and start the writer thread. By design, it will never
402 : // finish, nor be deallocated. In case of error, we degrade
403 : // gracefully to not writing Telemetry data.
404 : void
405 0 : nsTerminator::StartWriter()
406 : {
407 0 : if (!Telemetry::CanRecordExtended()) {
408 0 : return;
409 : }
410 0 : nsCOMPtr<nsIFile> profLD;
411 0 : nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
412 0 : getter_AddRefs(profLD));
413 0 : if (NS_FAILED(rv)) {
414 0 : return;
415 : }
416 :
417 0 : rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
418 0 : if (NS_FAILED(rv)) {
419 0 : return;
420 : }
421 :
422 0 : nsAutoString path;
423 0 : rv = profLD->GetPath(path);
424 0 : if (NS_FAILED(rv)) {
425 0 : return;
426 : }
427 :
428 0 : gWriteReady = PR_NewMonitor();
429 : MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
430 : PRThread* writerThread = CreateSystemThread(RunWriter,
431 0 : ToNewUTF8String(path));
432 :
433 0 : if (!writerThread) {
434 0 : return;
435 : }
436 : }
437 :
438 : NS_IMETHODIMP
439 1 : nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
440 : {
441 1 : if (strcmp(aTopic, "profile-after-change") == 0) {
442 1 : return SelfInit();
443 : }
444 :
445 : // Other notifications are shutdown-related.
446 :
447 : // As we have seen examples in the wild of shutdown notifications
448 : // not being sent (or not being sent in the expected order), we do
449 : // not assume a specific order.
450 0 : if (!mInitialized) {
451 0 : Start();
452 : }
453 :
454 0 : UpdateHeartbeat(aTopic);
455 : #if !defined(DEBUG)
456 : // Only allow nsTerminator to write on non-debug builds so we don't get leak warnings on
457 : // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
458 : // 1255484 when 1255478 lands.
459 : UpdateTelemetry();
460 : #endif // !defined(DEBUG)
461 0 : UpdateCrashReport(aTopic);
462 :
463 : // Perform a little cleanup
464 0 : nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
465 0 : MOZ_RELEASE_ASSERT(os);
466 0 : (void)os->RemoveObserver(this, aTopic);
467 :
468 0 : return NS_OK;
469 : }
470 :
471 : void
472 0 : nsTerminator::UpdateHeartbeat(const char* aTopic)
473 : {
474 : // Reset the clock, find out how long the current phase has lasted.
475 0 : uint32_t ticks = gHeartbeat.exchange(0);
476 0 : if (mCurrentStep > 0) {
477 0 : sShutdownSteps[mCurrentStep].mTicks = ticks;
478 : }
479 :
480 : // Find out where we now are in the current shutdown.
481 : // Don't assume that shutdown takes place in the expected order.
482 0 : int nextStep = -1;
483 0 : for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
484 0 : if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
485 0 : nextStep = i;
486 0 : break;
487 : }
488 : }
489 0 : MOZ_ASSERT(nextStep != -1);
490 0 : mCurrentStep = nextStep;
491 0 : }
492 :
493 : void
494 0 : nsTerminator::UpdateTelemetry()
495 : {
496 0 : if (!Telemetry::CanRecordExtended() || !gWriteReady) {
497 0 : return;
498 : }
499 :
500 : //
501 : // We need Telemetry data on the effective duration of each step,
502 : // to be able to tune the time-to-crash of each of both the
503 : // Terminator and AsyncShutdown. However, at this stage, it is too
504 : // late to record such data into Telemetry, so we write it to disk
505 : // and read it upon the next startup.
506 : //
507 :
508 : // Build JSON.
509 0 : UniquePtr<nsCString> telemetryData(new nsCString());
510 0 : telemetryData->AppendLiteral("{");
511 0 : size_t fields = 0;
512 0 : for (auto& shutdownStep : sShutdownSteps) {
513 0 : if (shutdownStep.mTicks < 0) {
514 : // Ignore this field.
515 0 : continue;
516 : }
517 0 : if (fields++ > 0) {
518 0 : telemetryData->Append(", ");
519 : }
520 0 : telemetryData->AppendLiteral(R"(")");
521 0 : telemetryData->Append(shutdownStep.mTopic);
522 0 : telemetryData->AppendLiteral(R"(": )");
523 0 : telemetryData->AppendInt(shutdownStep.mTicks);
524 : }
525 0 : telemetryData->AppendLiteral("}");
526 :
527 0 : if (fields == 0) {
528 : // Nothing to write
529 0 : return;
530 : }
531 :
532 : //
533 : // Send data to the worker thread.
534 : //
535 0 : delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
536 :
537 : // In case the worker thread was sleeping, wake it up.
538 0 : PR_EnterMonitor(gWriteReady);
539 0 : PR_Notify(gWriteReady);
540 0 : PR_ExitMonitor(gWriteReady);
541 : }
542 :
543 : void
544 0 : nsTerminator::UpdateCrashReport(const char* aTopic)
545 : {
546 : #if defined(MOZ_CRASHREPORTER)
547 : // In case of crash, we wish to know where in shutdown we are
548 0 : nsAutoCString report(aTopic);
549 :
550 0 : Unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
551 0 : report);
552 : #endif // defined(MOZ_CRASHREPORTER)
553 0 : }
554 :
555 :
556 : } // namespace mozilla
|