LCOV - code coverage report
Current view: top level - xpcom/threads - HangMonitor.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 50 93 53.8 %
Date: 2017-07-14 16:53:18 Functions: 6 8 75.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #include "mozilla/HangMonitor.h"
       8             : 
       9             : #include "mozilla/Atomics.h"
      10             : #include "mozilla/BackgroundHangMonitor.h"
      11             : #include "mozilla/Monitor.h"
      12             : #include "mozilla/Preferences.h"
      13             : #include "mozilla/ProcessedStack.h"
      14             : #include "mozilla/Telemetry.h"
      15             : #include "mozilla/StaticPtr.h"
      16             : #include "mozilla/UniquePtr.h"
      17             : #include "nsReadableUtils.h"
      18             : #include "nsThreadUtils.h"
      19             : #include "mozilla/StackWalk.h"
      20             : #include "nsThreadUtils.h"
      21             : #include "nsXULAppAPI.h"
      22             : #include "GeckoProfiler.h"
      23             : 
      24             : #ifdef MOZ_CRASHREPORTER
      25             : #include "nsExceptionHandler.h"
      26             : #endif
      27             : 
      28             : #ifdef XP_WIN
      29             : #include <windows.h>
      30             : #endif
      31             : 
      32             : #if defined(MOZ_GECKO_PROFILER) && defined(MOZ_PROFILING) && defined(XP_WIN)
      33             :   #define REPORT_CHROME_HANGS
      34             : #endif
      35             : 
      36             : namespace mozilla {
      37             : namespace HangMonitor {
      38             : 
      39             : /**
      40             :  * A flag which may be set from within a debugger to disable the hang
      41             :  * monitor.
      42             :  */
      43             : volatile bool gDebugDisableHangMonitor = false;
      44             : 
      45             : const char kHangMonitorPrefName[] = "hangmonitor.timeout";
      46             : 
      47             : #ifdef REPORT_CHROME_HANGS
      48             : const char kTelemetryPrefName[] = "toolkit.telemetry.enabled";
      49             : #endif
      50             : 
      51             : // Monitor protects gShutdown and gTimeout, but not gTimestamp which rely on
      52             : // being atomically set by the processor; synchronization doesn't really matter
      53             : // in this use case.
      54             : Monitor* gMonitor;
      55             : 
      56             : // The timeout preference, in seconds.
      57             : int32_t gTimeout;
      58             : 
      59             : PRThread* gThread;
      60             : 
      61             : // Set when shutdown begins to signal the thread to exit immediately.
      62             : bool gShutdown;
      63             : 
      64             : // The timestamp of the last event notification, or PR_INTERVAL_NO_WAIT if
      65             : // we're currently not processing events.
      66             : Atomic<PRIntervalTime> gTimestamp(PR_INTERVAL_NO_WAIT);
      67             : 
      68             : #ifdef REPORT_CHROME_HANGS
      69             : // Main thread ID used in reporting chrome hangs under Windows
      70             : static HANDLE winMainThreadHandle = nullptr;
      71             : 
      72             : // Default timeout for reporting chrome hangs to Telemetry (5 seconds)
      73             : static const int32_t DEFAULT_CHROME_HANG_INTERVAL = 5;
      74             : 
      75             : // Maximum number of PCs to gather from the stack
      76             : static const int32_t MAX_CALL_STACK_PCS = 400;
      77             : #endif
      78             : 
      79             : // PrefChangedFunc
      80             : void
      81           4 : PrefChanged(const char*, void*)
      82             : {
      83           4 :   int32_t newval = Preferences::GetInt(kHangMonitorPrefName);
      84             : #ifdef REPORT_CHROME_HANGS
      85             :   // Monitor chrome hangs on the profiling branch if Telemetry enabled
      86             :   if (newval == 0) {
      87             :     bool telemetryEnabled = Preferences::GetBool(kTelemetryPrefName);
      88             :     if (telemetryEnabled) {
      89             :       newval = DEFAULT_CHROME_HANG_INTERVAL;
      90             :     }
      91             :   }
      92             : #endif
      93           8 :   MonitorAutoLock lock(*gMonitor);
      94           4 :   if (newval != gTimeout) {
      95           0 :     gTimeout = newval;
      96           0 :     lock.Notify();
      97             :   }
      98           4 : }
      99             : 
     100             : void
     101           0 : Crash()
     102             : {
     103           0 :   if (gDebugDisableHangMonitor) {
     104           0 :     return;
     105             :   }
     106             : 
     107             : #ifdef XP_WIN
     108             :   if (::IsDebuggerPresent()) {
     109             :     return;
     110             :   }
     111             : #endif
     112             : 
     113             : #ifdef MOZ_CRASHREPORTER
     114             :   // If you change this, you must also deal with the threadsafety of AnnotateCrashReport in
     115             :   // non-chrome processes!
     116           0 :   if (GeckoProcessType_Default == XRE_GetProcessType()) {
     117           0 :     CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("Hang"),
     118           0 :                                        NS_LITERAL_CSTRING("1"));
     119             :   }
     120             : #endif
     121             : 
     122           0 :   MOZ_CRASH("HangMonitor triggered");
     123             : }
     124             : 
     125             : #ifdef REPORT_CHROME_HANGS
     126             : 
     127             : static void
     128             : ChromeStackWalker(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
     129             : {
     130             :   MOZ_ASSERT(aClosure);
     131             :   std::vector<uintptr_t>* stack =
     132             :     static_cast<std::vector<uintptr_t>*>(aClosure);
     133             :   if (stack->size() == MAX_CALL_STACK_PCS) {
     134             :     return;
     135             :   }
     136             :   MOZ_ASSERT(stack->size() < MAX_CALL_STACK_PCS);
     137             :   stack->push_back(reinterpret_cast<uintptr_t>(aPC));
     138             : }
     139             : 
     140             : static void
     141             : GetChromeHangReport(Telemetry::ProcessedStack& aStack,
     142             :                     int32_t& aSystemUptime,
     143             :                     int32_t& aFirefoxUptime)
     144             : {
     145             :   MOZ_ASSERT(winMainThreadHandle);
     146             : 
     147             :   // The thread we're about to suspend might have the alloc lock
     148             :   // so allocate ahead of time
     149             :   std::vector<uintptr_t> rawStack;
     150             :   rawStack.reserve(MAX_CALL_STACK_PCS);
     151             : 
     152             :   DWORD ret = ::SuspendThread(winMainThreadHandle);
     153             :   bool suspended = false;
     154             :   if (ret != (DWORD)-1) {
     155             :     // SuspendThread is asynchronous, so the thread may still be running. Use
     156             :     // GetThreadContext to ensure it's really suspended.
     157             :     // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
     158             :     CONTEXT context;
     159             :     context.ContextFlags = CONTEXT_CONTROL;
     160             :     if (::GetThreadContext(winMainThreadHandle, &context)) {
     161             :       suspended = true;
     162             :     }
     163             :   }
     164             : 
     165             :   if (!suspended) {
     166             :     if (ret != -1) {
     167             :       MOZ_ALWAYS_TRUE(::ResumeThread(winMainThreadHandle) != DWORD(-1));
     168             :     }
     169             :     return;
     170             :   }
     171             : 
     172             :   MozStackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0,
     173             :                reinterpret_cast<void*>(&rawStack),
     174             :                reinterpret_cast<uintptr_t>(winMainThreadHandle), nullptr);
     175             :   ret = ::ResumeThread(winMainThreadHandle);
     176             :   if (ret == -1) {
     177             :     return;
     178             :   }
     179             :   aStack = Telemetry::GetStackAndModules(rawStack);
     180             : 
     181             :   // Record system uptime (in minutes) at the time of the hang
     182             :   aSystemUptime = ((GetTickCount() / 1000) - (gTimeout * 2)) / 60;
     183             : 
     184             :   // Record Firefox uptime (in minutes) at the time of the hang
     185             :   bool error;
     186             :   TimeStamp processCreation = TimeStamp::ProcessCreation(&error);
     187             :   if (!error) {
     188             :     TimeDuration td = TimeStamp::Now() - processCreation;
     189             :     aFirefoxUptime = (static_cast<int32_t>(td.ToSeconds()) - (gTimeout * 2)) / 60;
     190             :   } else {
     191             :     aFirefoxUptime = -1;
     192             :   }
     193             : }
     194             : 
     195             : #endif
     196             : 
     197             : void
     198           3 : ThreadMain(void*)
     199             : {
     200           3 :   AutoProfilerRegisterThread registerThread("Hang Monitor");
     201           3 :   NS_SetCurrentThreadName("Hang Monitor");
     202             : 
     203           3 :   MonitorAutoLock lock(*gMonitor);
     204             : 
     205             :   // In order to avoid issues with the hang monitor incorrectly triggering
     206             :   // during a general system stop such as sleeping, the monitor thread must
     207             :   // run twice to trigger hang protection.
     208           3 :   PRIntervalTime lastTimestamp = 0;
     209           3 :   int waitCount = 0;
     210             : 
     211             : #ifdef REPORT_CHROME_HANGS
     212             :   Telemetry::ProcessedStack stack;
     213             :   int32_t systemUptime = -1;
     214             :   int32_t firefoxUptime = -1;
     215             :   UniquePtr<HangAnnotations> annotations;
     216             : #endif
     217             : 
     218             :   while (true) {
     219           3 :     if (gShutdown) {
     220           0 :       return; // Exit the thread
     221             :     }
     222             : 
     223             :     // avoid rereading the volatile value in this loop
     224           3 :     PRIntervalTime timestamp = gTimestamp;
     225             : 
     226           3 :     PRIntervalTime now = PR_IntervalNow();
     227             : 
     228           3 :     if (timestamp != PR_INTERVAL_NO_WAIT &&
     229             :         now < timestamp) {
     230             :       // 32-bit overflow, reset for another waiting period
     231           0 :       timestamp = 1; // lowest legal PRInterval value
     232             :     }
     233             : 
     234           3 :     if (timestamp != PR_INTERVAL_NO_WAIT &&
     235           0 :         timestamp == lastTimestamp &&
     236           0 :         gTimeout > 0) {
     237           0 :       ++waitCount;
     238             : #ifdef REPORT_CHROME_HANGS
     239             :       // Capture the chrome-hang stack + Firefox & system uptimes after
     240             :       // the minimum hang duration has been reached (not when the hang ends)
     241             :       if (waitCount == 2) {
     242             :         GetChromeHangReport(stack, systemUptime, firefoxUptime);
     243             :         annotations = ChromeHangAnnotatorCallout();
     244             :       }
     245             : #else
     246             :       // This is the crash-on-hang feature.
     247             :       // See bug 867313 for the quirk in the waitCount comparison
     248           0 :       if (waitCount >= 2) {
     249             :         int32_t delay =
     250           0 :           int32_t(PR_IntervalToSeconds(now - timestamp));
     251           0 :         if (delay >= gTimeout) {
     252           0 :           MonitorAutoUnlock unlock(*gMonitor);
     253           0 :           Crash();
     254             :         }
     255           0 :       }
     256             : #endif
     257             :     } else {
     258             : #ifdef REPORT_CHROME_HANGS
     259             :       if (waitCount >= 2) {
     260             :         uint32_t hangDuration = PR_IntervalToSeconds(now - lastTimestamp);
     261             :         Telemetry::RecordChromeHang(hangDuration, stack, systemUptime,
     262             :                                     firefoxUptime, Move(annotations));
     263             :         stack.Clear();
     264             :       }
     265             : #endif
     266           3 :       lastTimestamp = timestamp;
     267           3 :       waitCount = 0;
     268             :     }
     269             : 
     270             :     PRIntervalTime timeout;
     271           3 :     if (gTimeout <= 0) {
     272           3 :       timeout = PR_INTERVAL_NO_TIMEOUT;
     273             :     } else {
     274           0 :       timeout = PR_MillisecondsToInterval(gTimeout * 500);
     275             :     }
     276           3 :     lock.Wait(timeout);
     277           0 :   }
     278             : }
     279             : 
     280             : void
     281           3 : Startup()
     282             : {
     283           5 :   if (GeckoProcessType_Default != XRE_GetProcessType() &&
     284           2 :       GeckoProcessType_Content != XRE_GetProcessType()) {
     285           0 :     return;
     286             :   }
     287             : 
     288           3 :   MOZ_ASSERT(!gMonitor, "Hang monitor already initialized");
     289           3 :   gMonitor = new Monitor("HangMonitor");
     290             : 
     291           3 :   Preferences::RegisterCallback(PrefChanged, kHangMonitorPrefName);
     292           3 :   PrefChanged(nullptr, nullptr);
     293             : 
     294             : #ifdef REPORT_CHROME_HANGS
     295             :   Preferences::RegisterCallback(PrefChanged, kTelemetryPrefName);
     296             :   winMainThreadHandle =
     297             :     OpenThread(THREAD_ALL_ACCESS, FALSE, GetCurrentThreadId());
     298             :   if (!winMainThreadHandle) {
     299             :     return;
     300             :   }
     301             : #endif
     302             : 
     303             :   // Don't actually start measuring hangs until we hit the main event loop.
     304             :   // This potentially misses a small class of really early startup hangs,
     305             :   // but avoids dealing with some xpcshell tests and other situations which
     306             :   // start XPCOM but don't ever start the event loop.
     307           3 :   Suspend();
     308             : 
     309           3 :   gThread = PR_CreateThread(PR_USER_THREAD,
     310             :                             ThreadMain,
     311             :                             nullptr, PR_PRIORITY_LOW, PR_GLOBAL_THREAD,
     312             :                             PR_JOINABLE_THREAD, 0);
     313             : }
     314             : 
     315             : void
     316           0 : Shutdown()
     317             : {
     318           0 :   if (GeckoProcessType_Default != XRE_GetProcessType() &&
     319           0 :       GeckoProcessType_Content != XRE_GetProcessType()) {
     320           0 :     return;
     321             :   }
     322             : 
     323           0 :   MOZ_ASSERT(gMonitor, "Hang monitor not started");
     324             : 
     325             :   {
     326             :     // Scope the lock we're going to delete later
     327           0 :     MonitorAutoLock lock(*gMonitor);
     328           0 :     gShutdown = true;
     329           0 :     lock.Notify();
     330             :   }
     331             : 
     332             :   // thread creation could theoretically fail
     333           0 :   if (gThread) {
     334           0 :     PR_JoinThread(gThread);
     335           0 :     gThread = nullptr;
     336             :   }
     337             : 
     338           0 :   delete gMonitor;
     339           0 :   gMonitor = nullptr;
     340             : }
     341             : 
     342             : static bool
     343        2468 : IsUIMessageWaiting()
     344             : {
     345             : #ifndef XP_WIN
     346        2468 :   return false;
     347             : #else
     348             :   // There should never be mouse, keyboard, or IME messages in a message queue
     349             :   // in the content process, so don't waste time making multiple PeekMessage
     350             :   // calls.
     351             :   if (GeckoProcessType_Content == XRE_GetProcessType()) {
     352             :     return false;
     353             :   }
     354             : #define NS_WM_IMEFIRST WM_IME_SETCONTEXT
     355             : #define NS_WM_IMELAST  WM_IME_KEYUP
     356             :   BOOL haveUIMessageWaiting = FALSE;
     357             :   MSG msg;
     358             :   haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_KEYFIRST,
     359             :                                          WM_IME_KEYLAST, PM_NOREMOVE);
     360             :   haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, NS_WM_IMEFIRST,
     361             :                                          NS_WM_IMELAST, PM_NOREMOVE);
     362             :   haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_MOUSEFIRST,
     363             :                                          WM_MOUSELAST, PM_NOREMOVE);
     364             :   return haveUIMessageWaiting;
     365             : #endif
     366             : }
     367             : 
     368             : void
     369        2468 : NotifyActivity(ActivityType aActivityType)
     370             : {
     371        2468 :   MOZ_ASSERT(NS_IsMainThread(),
     372             :              "HangMonitor::Notify called from off the main thread.");
     373             : 
     374             :   // Determine the activity type more specifically
     375        2468 :   if (aActivityType == kGeneralActivity) {
     376        2468 :     aActivityType = IsUIMessageWaiting() ? kActivityUIAVail :
     377             :                                            kActivityNoUIAVail;
     378             :   }
     379             : 
     380             :   // Calculate the cumulative amount of lag time since the last UI message
     381             :   static uint32_t cumulativeUILagMS = 0;
     382        2468 :   switch (aActivityType) {
     383             :     case kActivityNoUIAVail:
     384        2468 :       cumulativeUILagMS = 0;
     385        2468 :       break;
     386             :     case kActivityUIAVail:
     387             :     case kUIActivity:
     388           0 :       if (gTimestamp != PR_INTERVAL_NO_WAIT) {
     389           0 :         cumulativeUILagMS += PR_IntervalToMilliseconds(PR_IntervalNow() -
     390           0 :                                                        gTimestamp);
     391             :       }
     392           0 :       break;
     393             :     default:
     394           0 :       break;
     395             :   }
     396             : 
     397             :   // This is not a locked activity because PRTimeStamp is a 32-bit quantity
     398             :   // which can be read/written atomically, and we don't want to pay locking
     399             :   // penalties here.
     400        2468 :   gTimestamp = PR_IntervalNow();
     401             : 
     402             :   // If we have UI activity we should reset the timer and report it
     403        2468 :   if (aActivityType == kUIActivity) {
     404             :     mozilla::Telemetry::Accumulate(mozilla::Telemetry::EVENTLOOP_UI_ACTIVITY_EXP_MS,
     405           0 :                                      cumulativeUILagMS);
     406           0 :     cumulativeUILagMS = 0;
     407             :   }
     408             : 
     409        2468 :   if (gThread && !gShutdown) {
     410        2468 :     mozilla::BackgroundHangMonitor().NotifyActivity();
     411             :   }
     412        2468 : }
     413             : 
     414             : void
     415        1403 : Suspend()
     416             : {
     417        1403 :   MOZ_ASSERT(NS_IsMainThread(),
     418             :              "HangMonitor::Suspend called from off the main thread.");
     419             : 
     420             :   // Because gTimestamp changes this resets the wait count.
     421        1403 :   gTimestamp = PR_INTERVAL_NO_WAIT;
     422             : 
     423        1403 :   if (gThread && !gShutdown) {
     424        1400 :     mozilla::BackgroundHangMonitor().NotifyWait();
     425             :   }
     426        1403 : }
     427             : 
     428             : } // namespace HangMonitor
     429             : } // namespace mozilla

Generated by: LCOV version 1.13