LCOV - code coverage report
Current view: top level - toolkit/components/terminator - nsTerminator.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 14 141 9.9 %
Date: 2017-07-14 16:53:18 Functions: 6 16 37.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
       2             :  * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
       3             :  * This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : /**
       8             :  * A watchdog designed to terminate shutdown if it lasts too long.
       9             :  *
      10             :  * This watchdog is designed as a worst-case problem container for the
      11             :  * common case in which Firefox just won't shutdown.
      12             :  *
      13             :  * We spawn a thread during quit-application. If any of the shutdown
      14             :  * steps takes more than n milliseconds (63000 by default), kill the
      15             :  * process as fast as possible, without any cleanup.
      16             :  */
      17             : 
      18             : #include "nsTerminator.h"
      19             : 
      20             : #include "prthread.h"
      21             : #include "prmon.h"
      22             : #include "plstr.h"
      23             : #include "prio.h"
      24             : 
      25             : #include "nsString.h"
      26             : #include "nsServiceManagerUtils.h"
      27             : #include "nsDirectoryServiceUtils.h"
      28             : #include "nsAppDirectoryServiceDefs.h"
      29             : 
      30             : #include "nsIObserverService.h"
      31             : #include "nsIPrefService.h"
      32             : #if defined(MOZ_CRASHREPORTER)
      33             : #include "nsExceptionHandler.h"
      34             : #endif
      35             : #include "GeckoProfiler.h"
      36             : #include "nsThreadUtils.h"
      37             : 
      38             : #if defined(XP_WIN)
      39             : #include <windows.h>
      40             : #else
      41             : #include <unistd.h>
      42             : #endif
      43             : 
      44             : #include "mozilla/ArrayUtils.h"
      45             : #include "mozilla/Attributes.h"
      46             : #include "mozilla/DebugOnly.h"
      47             : #include "mozilla/MemoryChecking.h"
      48             : #include "mozilla/Preferences.h"
      49             : #include "mozilla/Services.h"
      50             : #include "mozilla/UniquePtr.h"
      51             : #include "mozilla/Unused.h"
      52             : #include "mozilla/Telemetry.h"
      53             : 
      54             : // Normally, the number of milliseconds that AsyncShutdown waits until
      55             : // it decides to crash is specified as a preference. We use the
      56             : // following value as a fallback if for some reason the preference is
      57             : // absent.
      58             : #define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
      59             : 
      60             : // Additional number of milliseconds to wait until we decide to exit
      61             : // forcefully.
      62             : #define ADDITIONAL_WAIT_BEFORE_CRASH_MS 3000
      63             : 
      64             : namespace mozilla {
      65             : 
      66             : namespace {
      67             : 
      68             : // Utility function: create a thread that is non-joinable,
      69             : // does not prevent the process from terminating, is never
      70             : // cooperatively scheduled, and uses a default stack size.
      71           0 : PRThread* CreateSystemThread(void (*start)(void* arg),
      72             :                              void* arg)
      73             : {
      74             :   PRThread* thread = PR_CreateThread(
      75             :     PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
      76             :     start,
      77             :     arg,
      78             :     PR_PRIORITY_LOW,
      79             :     PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
      80             :     PR_UNJOINABLE_THREAD,
      81             :     0 /* Use default stack size */
      82           0 :   );
      83             :   MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
      84           0 :   return thread;
      85             : }
      86             : 
      87             : 
      88             : ////////////////////////////////////////////
      89             : //
      90             : // The watchdog
      91             : //
      92             : // This nspr thread is in charge of crashing the process if any stage of shutdown
      93             : // lasts more than some predefined duration. As a side-effect, it measures the
      94             : // duration of each stage of shutdown.
      95             : //
      96             : 
      97             : // The heartbeat of the operation.
      98             : //
      99             : // Main thread:
     100             : //
     101             : // * Whenever a shutdown step has been completed, the main thread
     102             : // swaps gHeartbeat to 0 to mark that the shutdown process is still
     103             : // progressing. The value swapped away indicates the number of ticks
     104             : // it took for the shutdown step to advance.
     105             : //
     106             : // Watchdog thread:
     107             : //
     108             : // * Every tick, the watchdog thread increments gHearbeat atomically.
     109             : //
     110             : // A note about precision:
     111             : // Since gHeartbeat is generally reset to 0 between two ticks, this means
     112             : // that gHeartbeat stays at 0 less than one tick. Consequently, values
     113             : // extracted from gHeartbeat must be considered rounded up.
     114             : Atomic<uint32_t> gHeartbeat(0);
     115             : 
     116             : struct Options {
     117             :   /**
     118             :    * How many ticks before we should crash the process.
     119             :    */
     120             :   uint32_t crashAfterTicks;
     121             : };
     122             : 
     123             : /**
     124             :  * Entry point for the watchdog thread
     125             :  */
     126             : void
     127           0 : RunWatchdog(void* arg)
     128             : {
     129           0 :   NS_SetCurrentThreadName("Shutdown Hang Terminator");
     130             : 
     131             :   // Let's copy and deallocate options, that's one less leak to worry
     132             :   // about.
     133           0 :   UniquePtr<Options> options((Options*)arg);
     134           0 :   uint32_t crashAfterTicks = options->crashAfterTicks;
     135           0 :   options = nullptr;
     136             : 
     137           0 :   const uint32_t timeToLive = crashAfterTicks;
     138             :   while (true) {
     139             :     //
     140             :     // We do not want to sleep for the entire duration,
     141             :     // as putting the computer to sleep would suddenly
     142             :     // cause us to timeout on wakeup.
     143             :     //
     144             :     // Rather, we prefer sleeping for at most 1 second
     145             :     // at a time. If the computer sleeps then wakes up,
     146             :     // we have lost at most one second, which is much
     147             :     // more reasonable.
     148             :     //
     149             : #if defined(XP_WIN)
     150             :     Sleep(1000 /* ms */);
     151             : #else
     152           0 :     usleep(1000000 /* usec */);
     153             : #endif
     154             : 
     155           0 :     if (gHeartbeat++ < timeToLive) {
     156           0 :       continue;
     157             :     }
     158             : 
     159             :     // Shutdown is apparently dead. Crash the process.
     160           0 :     MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
     161             :   }
     162             : }
     163             : 
     164             : ////////////////////////////////////////////
     165             : //
     166             : // Writer thread
     167             : //
     168             : // This nspr thread is in charge of writing to disk statistics produced by the
     169             : // watchdog thread and collected by the main thread. Note that we use a nspr
     170             : // thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
     171             : // threads.
     172             : //
     173             : 
     174             : // Utility class, used by UniquePtr<> to close nspr files.
     175             : class PR_CloseDelete
     176             : {
     177             : public:
     178             :   constexpr PR_CloseDelete() = default;
     179             : 
     180             :   PR_CloseDelete(const PR_CloseDelete& aOther) = default;
     181             : 
     182           0 :   void operator()(PRFileDesc* aPtr) const
     183             :   {
     184           0 :     PR_Close(aPtr);
     185           0 :   }
     186             : };
     187             : 
     188             : //
     189             : // Communication between the main thread and the writer thread.
     190             : //
     191             : // Main thread:
     192             : //
     193             : // * Whenever a shutdown step has been completed, the main thread
     194             : // obtains the number of ticks from the watchdog threads, builds
     195             : // a string representing all the data gathered so far, places
     196             : // this string in `gWriteData`, and wakes up the writer thread
     197             : // using `gWriteReady`. If `gWriteData` already contained a non-null
     198             : // pointer, this means that the writer thread is lagging behind the
     199             : // main thread, and the main thread cleans up the memory.
     200             : //
     201             : // Writer thread:
     202             : //
     203             : // * When awake, the writer thread swaps `gWriteData` to nullptr. If
     204             : // `gWriteData` contained data to write, the . If so, the writer
     205             : // thread writes the data to a file named "ShutdownDuration.json.tmp",
     206             : // then moves that file to "ShutdownDuration.json" and cleans up the
     207             : // data. If `gWriteData` contains a nullptr, the writer goes to sleep
     208             : // until it is awkened using `gWriteReady`.
     209             : //
     210             : //
     211             : // The data written by the writer thread will be read by another
     212             : // module upon the next restart and fed to Telemetry.
     213             : //
     214             : Atomic<nsCString*> gWriteData(nullptr);
     215             : PRMonitor* gWriteReady = nullptr;
     216             : 
     217           0 : void RunWriter(void* arg)
     218             : {
     219           0 :   AutoProfilerRegisterThread registerThread("Shutdown Statistics Writer");
     220           0 :   NS_SetCurrentThreadName("Shutdown Statistics Writer");
     221             : 
     222             :   MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
     223             :   // Shutdown will generally complete before we have a chance to
     224             :   // deallocate. This is not a leak.
     225             : 
     226             :   // Setup destinationPath and tmpFilePath
     227             : 
     228           0 :   nsCString destinationPath(static_cast<char*>(arg));
     229           0 :   nsAutoCString tmpFilePath;
     230           0 :   tmpFilePath.Append(destinationPath);
     231           0 :   tmpFilePath.AppendLiteral(".tmp");
     232             : 
     233             :   // Cleanup any file leftover from a previous run
     234           0 :   Unused << PR_Delete(tmpFilePath.get());
     235           0 :   Unused << PR_Delete(destinationPath.get());
     236             : 
     237             :   while (true) {
     238             :     //
     239             :     // Check whether we have received data from the main thread.
     240             :     //
     241             :     // We perform the check before waiting on `gWriteReady` as we may
     242             :     // have received data while we were busy writing.
     243             :     //
     244             :     // Also note that gWriteData may have been modified several times
     245             :     // since we last checked. That's ok, we are not losing any important
     246             :     // data (since we keep adding data), and we are not leaking memory
     247             :     // (since the main thread deallocates any data that hasn't been
     248             :     // consumed by the writer thread).
     249             :     //
     250           0 :     UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
     251           0 :     if (!data) {
     252             :       // Data is not available yet.
     253             :       // Wait until the main thread provides it.
     254           0 :       PR_EnterMonitor(gWriteReady);
     255           0 :       PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
     256           0 :       PR_ExitMonitor(gWriteReady);
     257           0 :       continue;
     258             :     }
     259             : 
     260             :     MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
     261             :     // Shutdown may complete before we have a chance to deallocate.
     262             :     // This is not a leak.
     263             : 
     264             :     //
     265             :     // Write to a temporary file
     266             :     //
     267             :     // In case of any error, we simply give up. Since the data is
     268             :     // hardly critical, we don't want to spend too much effort
     269             :     // salvaging it.
     270             :     //
     271             :     UniquePtr<PRFileDesc, PR_CloseDelete>
     272             :       tmpFileDesc(PR_Open(tmpFilePath.get(),
     273             :                           PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
     274           0 :                           00600));
     275             : 
     276             :     // Shutdown may complete before we have a chance to close the file.
     277             :     // This is not a leak.
     278             :     MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
     279             : 
     280           0 :     if (tmpFileDesc == nullptr) {
     281           0 :       break;
     282             :     }
     283           0 :     if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
     284           0 :       break;
     285             :     }
     286           0 :     tmpFileDesc.reset();
     287             : 
     288             :     //
     289             :     // Rename on top of destination file.
     290             :     //
     291             :     // This is not sufficient to guarantee that the destination file
     292             :     // will be written correctly, but, again, we don't care enough
     293             :     // about the data to make more efforts.
     294             :     //
     295           0 :     if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
     296           0 :       break;
     297             :     }
     298           0 :   }
     299           0 : }
     300             : 
     301             : /**
     302             :  * A step during shutdown.
     303             :  *
     304             :  * Shutdown is divided in steps, which all map to an observer
     305             :  * notification. The duration of a step is defined as the number of
     306             :  * ticks between the time we receive a notification and the next one.
     307             :  */
     308             : struct ShutdownStep
     309             : {
     310             :   char const* const mTopic;
     311             :   int mTicks;
     312             : 
     313             :   constexpr explicit ShutdownStep(const char *const topic)
     314             :     : mTopic(topic)
     315             :     , mTicks(-1)
     316             :   {}
     317             : 
     318             : };
     319             : 
     320             : static ShutdownStep sShutdownSteps[] = {
     321             :   ShutdownStep("quit-application"),
     322             :   ShutdownStep("profile-change-teardown"),
     323             :   ShutdownStep("profile-before-change"),
     324             :   ShutdownStep("xpcom-will-shutdown"),
     325             :   ShutdownStep("xpcom-shutdown"),
     326             : };
     327             : 
     328             : } // namespace
     329             : 
     330          20 : NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
     331             : 
     332           1 : nsTerminator::nsTerminator()
     333             :   : mInitialized(false)
     334           1 :   , mCurrentStep(-1)
     335             : {
     336           1 : }
     337             : 
     338             : // During startup, register as an observer for all interesting topics.
     339             : nsresult
     340           1 : nsTerminator::SelfInit()
     341             : {
     342           2 :   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
     343           1 :   if (!os) {
     344           0 :     return NS_ERROR_UNEXPECTED;
     345             :   }
     346             : 
     347           6 :   for (auto& shutdownStep : sShutdownSteps) {
     348          10 :     DebugOnly<nsresult> rv = os->AddObserver(this, shutdownStep.mTopic, false);
     349           5 :     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "AddObserver failed");
     350             :   }
     351             : 
     352           1 :   return NS_OK;
     353             : }
     354             : 
     355             : // Actually launch these threads. This takes place at the first sign of shutdown.
     356             : void
     357           0 : nsTerminator::Start()
     358             : {
     359           0 :   MOZ_ASSERT(!mInitialized);
     360           0 :   StartWatchdog();
     361             : #if !defined(DEBUG)
     362             :   // Only allow nsTerminator to write on non-debug builds so we don't get leak warnings on
     363             :   // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
     364             :   // 1255484 when 1255478 lands.
     365             :   StartWriter();
     366             : #endif // !defined(DEBUG)
     367           0 :   mInitialized = true;
     368           0 : }
     369             : 
     370             : // Prepare, allocate and start the watchdog thread.
     371             : // By design, it will never finish, nor be deallocated.
     372             : void
     373           0 : nsTerminator::StartWatchdog()
     374             : {
     375             :   int32_t crashAfterMS =
     376             :     Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
     377           0 :                         FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
     378             :   // Ignore negative values
     379           0 :   if (crashAfterMS <= 0) {
     380           0 :     crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
     381             :   }
     382             : 
     383             :   // Add a little padding, to ensure that we do not crash before
     384             :   // AsyncShutdown.
     385           0 :   if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
     386             :     // Defend against overflow
     387           0 :     crashAfterMS = INT32_MAX;
     388             :   } else {
     389           0 :     crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
     390             :   }
     391             : 
     392           0 :   UniquePtr<Options> options(new Options());
     393           0 :   const PRIntervalTime ticksDuration = PR_MillisecondsToInterval(1000);
     394           0 :   options->crashAfterTicks = crashAfterMS / ticksDuration;
     395             : 
     396           0 :   DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
     397           0 :                                                 options.release());
     398           0 :   MOZ_ASSERT(watchdogThread);
     399           0 : }
     400             : 
     401             : // Prepare, allocate and start the writer thread. By design, it will never
     402             : // finish, nor be deallocated. In case of error, we degrade
     403             : // gracefully to not writing Telemetry data.
     404             : void
     405           0 : nsTerminator::StartWriter()
     406             : {
     407           0 :   if (!Telemetry::CanRecordExtended()) {
     408           0 :     return;
     409             :   }
     410           0 :   nsCOMPtr<nsIFile> profLD;
     411           0 :   nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
     412           0 :                                        getter_AddRefs(profLD));
     413           0 :   if (NS_FAILED(rv)) {
     414           0 :     return;
     415             :   }
     416             : 
     417           0 :   rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
     418           0 :   if (NS_FAILED(rv)) {
     419           0 :     return;
     420             :   }
     421             : 
     422           0 :   nsAutoString path;
     423           0 :   rv = profLD->GetPath(path);
     424           0 :   if (NS_FAILED(rv)) {
     425           0 :     return;
     426             :   }
     427             : 
     428           0 :   gWriteReady = PR_NewMonitor();
     429             :   MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
     430             :   PRThread* writerThread = CreateSystemThread(RunWriter,
     431           0 :                                               ToNewUTF8String(path));
     432             : 
     433           0 :   if (!writerThread) {
     434           0 :     return;
     435             :   }
     436             : }
     437             : 
     438             : NS_IMETHODIMP
     439           1 : nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
     440             : {
     441           1 :   if (strcmp(aTopic, "profile-after-change") == 0) {
     442           1 :     return SelfInit();
     443             :   }
     444             : 
     445             :   // Other notifications are shutdown-related.
     446             : 
     447             :   // As we have seen examples in the wild of shutdown notifications
     448             :   // not being sent (or not being sent in the expected order), we do
     449             :   // not assume a specific order.
     450           0 :   if (!mInitialized) {
     451           0 :     Start();
     452             :   }
     453             : 
     454           0 :   UpdateHeartbeat(aTopic);
     455             : #if !defined(DEBUG)
     456             :   // Only allow nsTerminator to write on non-debug builds so we don't get leak warnings on
     457             :   // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
     458             :   // 1255484 when 1255478 lands.
     459             :   UpdateTelemetry();
     460             : #endif // !defined(DEBUG)
     461           0 :   UpdateCrashReport(aTopic);
     462             : 
     463             :   // Perform a little cleanup
     464           0 :   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
     465           0 :   MOZ_RELEASE_ASSERT(os);
     466           0 :   (void)os->RemoveObserver(this, aTopic);
     467             : 
     468           0 :   return NS_OK;
     469             : }
     470             : 
     471             : void
     472           0 : nsTerminator::UpdateHeartbeat(const char* aTopic)
     473             : {
     474             :   // Reset the clock, find out how long the current phase has lasted.
     475           0 :   uint32_t ticks = gHeartbeat.exchange(0);
     476           0 :   if (mCurrentStep > 0) {
     477           0 :     sShutdownSteps[mCurrentStep].mTicks = ticks;
     478             :   }
     479             : 
     480             :   // Find out where we now are in the current shutdown.
     481             :   // Don't assume that shutdown takes place in the expected order.
     482           0 :   int nextStep = -1;
     483           0 :   for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
     484           0 :     if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
     485           0 :       nextStep = i;
     486           0 :       break;
     487             :     }
     488             :   }
     489           0 :   MOZ_ASSERT(nextStep != -1);
     490           0 :   mCurrentStep = nextStep;
     491           0 : }
     492             : 
     493             : void
     494           0 : nsTerminator::UpdateTelemetry()
     495             : {
     496           0 :   if (!Telemetry::CanRecordExtended() || !gWriteReady) {
     497           0 :     return;
     498             :   }
     499             : 
     500             :   //
     501             :   // We need Telemetry data on the effective duration of each step,
     502             :   // to be able to tune the time-to-crash of each of both the
     503             :   // Terminator and AsyncShutdown. However, at this stage, it is too
     504             :   // late to record such data into Telemetry, so we write it to disk
     505             :   // and read it upon the next startup.
     506             :   //
     507             : 
     508             :   // Build JSON.
     509           0 :   UniquePtr<nsCString> telemetryData(new nsCString());
     510           0 :   telemetryData->AppendLiteral("{");
     511           0 :   size_t fields = 0;
     512           0 :   for (auto& shutdownStep : sShutdownSteps) {
     513           0 :     if (shutdownStep.mTicks < 0) {
     514             :       // Ignore this field.
     515           0 :       continue;
     516             :     }
     517           0 :     if (fields++ > 0) {
     518           0 :       telemetryData->Append(", ");
     519             :     }
     520           0 :     telemetryData->AppendLiteral(R"(")");
     521           0 :     telemetryData->Append(shutdownStep.mTopic);
     522           0 :     telemetryData->AppendLiteral(R"(": )");
     523           0 :     telemetryData->AppendInt(shutdownStep.mTicks);
     524             :   }
     525           0 :   telemetryData->AppendLiteral("}");
     526             : 
     527           0 :   if (fields == 0) {
     528             :     // Nothing to write
     529           0 :       return;
     530             :   }
     531             : 
     532             :   //
     533             :   // Send data to the worker thread.
     534             :   //
     535           0 :   delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
     536             : 
     537             :   // In case the worker thread was sleeping, wake it up.
     538           0 :   PR_EnterMonitor(gWriteReady);
     539           0 :   PR_Notify(gWriteReady);
     540           0 :   PR_ExitMonitor(gWriteReady);
     541             : }
     542             : 
     543             : void
     544           0 : nsTerminator::UpdateCrashReport(const char* aTopic)
     545             : {
     546             : #if defined(MOZ_CRASHREPORTER)
     547             :   // In case of crash, we wish to know where in shutdown we are
     548           0 :   nsAutoCString report(aTopic);
     549             : 
     550           0 :   Unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
     551           0 :                                                report);
     552             : #endif // defined(MOZ_CRASHREPORTER)
     553           0 : }
     554             : 
     555             : 
     556             : } // namespace mozilla

Generated by: LCOV version 1.13