Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "mozilla/HangMonitor.h"
8 :
9 : #include "mozilla/Atomics.h"
10 : #include "mozilla/BackgroundHangMonitor.h"
11 : #include "mozilla/Monitor.h"
12 : #include "mozilla/Preferences.h"
13 : #include "mozilla/ProcessedStack.h"
14 : #include "mozilla/Telemetry.h"
15 : #include "mozilla/StaticPtr.h"
16 : #include "mozilla/UniquePtr.h"
17 : #include "nsReadableUtils.h"
18 : #include "nsThreadUtils.h"
19 : #include "mozilla/StackWalk.h"
20 : #include "nsThreadUtils.h"
21 : #include "nsXULAppAPI.h"
22 : #include "GeckoProfiler.h"
23 :
24 : #ifdef MOZ_CRASHREPORTER
25 : #include "nsExceptionHandler.h"
26 : #endif
27 :
28 : #ifdef XP_WIN
29 : #include <windows.h>
30 : #endif
31 :
32 : #if defined(MOZ_GECKO_PROFILER) && defined(MOZ_PROFILING) && defined(XP_WIN)
33 : #define REPORT_CHROME_HANGS
34 : #endif
35 :
36 : namespace mozilla {
37 : namespace HangMonitor {
38 :
39 : /**
40 : * A flag which may be set from within a debugger to disable the hang
41 : * monitor.
42 : */
43 : volatile bool gDebugDisableHangMonitor = false;
44 :
45 : const char kHangMonitorPrefName[] = "hangmonitor.timeout";
46 :
47 : #ifdef REPORT_CHROME_HANGS
48 : const char kTelemetryPrefName[] = "toolkit.telemetry.enabled";
49 : #endif
50 :
51 : // Monitor protects gShutdown and gTimeout, but not gTimestamp which rely on
52 : // being atomically set by the processor; synchronization doesn't really matter
53 : // in this use case.
54 : Monitor* gMonitor;
55 :
56 : // The timeout preference, in seconds.
57 : int32_t gTimeout;
58 :
59 : PRThread* gThread;
60 :
61 : // Set when shutdown begins to signal the thread to exit immediately.
62 : bool gShutdown;
63 :
64 : // The timestamp of the last event notification, or PR_INTERVAL_NO_WAIT if
65 : // we're currently not processing events.
66 : Atomic<PRIntervalTime> gTimestamp(PR_INTERVAL_NO_WAIT);
67 :
68 : #ifdef REPORT_CHROME_HANGS
69 : // Main thread ID used in reporting chrome hangs under Windows
70 : static HANDLE winMainThreadHandle = nullptr;
71 :
72 : // Default timeout for reporting chrome hangs to Telemetry (5 seconds)
73 : static const int32_t DEFAULT_CHROME_HANG_INTERVAL = 5;
74 :
75 : // Maximum number of PCs to gather from the stack
76 : static const int32_t MAX_CALL_STACK_PCS = 400;
77 : #endif
78 :
79 : // PrefChangedFunc
80 : void
81 4 : PrefChanged(const char*, void*)
82 : {
83 4 : int32_t newval = Preferences::GetInt(kHangMonitorPrefName);
84 : #ifdef REPORT_CHROME_HANGS
85 : // Monitor chrome hangs on the profiling branch if Telemetry enabled
86 : if (newval == 0) {
87 : bool telemetryEnabled = Preferences::GetBool(kTelemetryPrefName);
88 : if (telemetryEnabled) {
89 : newval = DEFAULT_CHROME_HANG_INTERVAL;
90 : }
91 : }
92 : #endif
93 8 : MonitorAutoLock lock(*gMonitor);
94 4 : if (newval != gTimeout) {
95 0 : gTimeout = newval;
96 0 : lock.Notify();
97 : }
98 4 : }
99 :
100 : void
101 0 : Crash()
102 : {
103 0 : if (gDebugDisableHangMonitor) {
104 0 : return;
105 : }
106 :
107 : #ifdef XP_WIN
108 : if (::IsDebuggerPresent()) {
109 : return;
110 : }
111 : #endif
112 :
113 : #ifdef MOZ_CRASHREPORTER
114 : // If you change this, you must also deal with the threadsafety of AnnotateCrashReport in
115 : // non-chrome processes!
116 0 : if (GeckoProcessType_Default == XRE_GetProcessType()) {
117 0 : CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("Hang"),
118 0 : NS_LITERAL_CSTRING("1"));
119 : }
120 : #endif
121 :
122 0 : MOZ_CRASH("HangMonitor triggered");
123 : }
124 :
125 : #ifdef REPORT_CHROME_HANGS
126 :
127 : static void
128 : ChromeStackWalker(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
129 : {
130 : MOZ_ASSERT(aClosure);
131 : std::vector<uintptr_t>* stack =
132 : static_cast<std::vector<uintptr_t>*>(aClosure);
133 : if (stack->size() == MAX_CALL_STACK_PCS) {
134 : return;
135 : }
136 : MOZ_ASSERT(stack->size() < MAX_CALL_STACK_PCS);
137 : stack->push_back(reinterpret_cast<uintptr_t>(aPC));
138 : }
139 :
140 : static void
141 : GetChromeHangReport(Telemetry::ProcessedStack& aStack,
142 : int32_t& aSystemUptime,
143 : int32_t& aFirefoxUptime)
144 : {
145 : MOZ_ASSERT(winMainThreadHandle);
146 :
147 : // The thread we're about to suspend might have the alloc lock
148 : // so allocate ahead of time
149 : std::vector<uintptr_t> rawStack;
150 : rawStack.reserve(MAX_CALL_STACK_PCS);
151 :
152 : DWORD ret = ::SuspendThread(winMainThreadHandle);
153 : bool suspended = false;
154 : if (ret != (DWORD)-1) {
155 : // SuspendThread is asynchronous, so the thread may still be running. Use
156 : // GetThreadContext to ensure it's really suspended.
157 : // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
158 : CONTEXT context;
159 : context.ContextFlags = CONTEXT_CONTROL;
160 : if (::GetThreadContext(winMainThreadHandle, &context)) {
161 : suspended = true;
162 : }
163 : }
164 :
165 : if (!suspended) {
166 : if (ret != -1) {
167 : MOZ_ALWAYS_TRUE(::ResumeThread(winMainThreadHandle) != DWORD(-1));
168 : }
169 : return;
170 : }
171 :
172 : MozStackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0,
173 : reinterpret_cast<void*>(&rawStack),
174 : reinterpret_cast<uintptr_t>(winMainThreadHandle), nullptr);
175 : ret = ::ResumeThread(winMainThreadHandle);
176 : if (ret == -1) {
177 : return;
178 : }
179 : aStack = Telemetry::GetStackAndModules(rawStack);
180 :
181 : // Record system uptime (in minutes) at the time of the hang
182 : aSystemUptime = ((GetTickCount() / 1000) - (gTimeout * 2)) / 60;
183 :
184 : // Record Firefox uptime (in minutes) at the time of the hang
185 : bool error;
186 : TimeStamp processCreation = TimeStamp::ProcessCreation(&error);
187 : if (!error) {
188 : TimeDuration td = TimeStamp::Now() - processCreation;
189 : aFirefoxUptime = (static_cast<int32_t>(td.ToSeconds()) - (gTimeout * 2)) / 60;
190 : } else {
191 : aFirefoxUptime = -1;
192 : }
193 : }
194 :
195 : #endif
196 :
197 : void
198 3 : ThreadMain(void*)
199 : {
200 3 : AutoProfilerRegisterThread registerThread("Hang Monitor");
201 3 : NS_SetCurrentThreadName("Hang Monitor");
202 :
203 3 : MonitorAutoLock lock(*gMonitor);
204 :
205 : // In order to avoid issues with the hang monitor incorrectly triggering
206 : // during a general system stop such as sleeping, the monitor thread must
207 : // run twice to trigger hang protection.
208 3 : PRIntervalTime lastTimestamp = 0;
209 3 : int waitCount = 0;
210 :
211 : #ifdef REPORT_CHROME_HANGS
212 : Telemetry::ProcessedStack stack;
213 : int32_t systemUptime = -1;
214 : int32_t firefoxUptime = -1;
215 : UniquePtr<HangAnnotations> annotations;
216 : #endif
217 :
218 : while (true) {
219 3 : if (gShutdown) {
220 0 : return; // Exit the thread
221 : }
222 :
223 : // avoid rereading the volatile value in this loop
224 3 : PRIntervalTime timestamp = gTimestamp;
225 :
226 3 : PRIntervalTime now = PR_IntervalNow();
227 :
228 3 : if (timestamp != PR_INTERVAL_NO_WAIT &&
229 : now < timestamp) {
230 : // 32-bit overflow, reset for another waiting period
231 0 : timestamp = 1; // lowest legal PRInterval value
232 : }
233 :
234 3 : if (timestamp != PR_INTERVAL_NO_WAIT &&
235 0 : timestamp == lastTimestamp &&
236 0 : gTimeout > 0) {
237 0 : ++waitCount;
238 : #ifdef REPORT_CHROME_HANGS
239 : // Capture the chrome-hang stack + Firefox & system uptimes after
240 : // the minimum hang duration has been reached (not when the hang ends)
241 : if (waitCount == 2) {
242 : GetChromeHangReport(stack, systemUptime, firefoxUptime);
243 : annotations = ChromeHangAnnotatorCallout();
244 : }
245 : #else
246 : // This is the crash-on-hang feature.
247 : // See bug 867313 for the quirk in the waitCount comparison
248 0 : if (waitCount >= 2) {
249 : int32_t delay =
250 0 : int32_t(PR_IntervalToSeconds(now - timestamp));
251 0 : if (delay >= gTimeout) {
252 0 : MonitorAutoUnlock unlock(*gMonitor);
253 0 : Crash();
254 : }
255 0 : }
256 : #endif
257 : } else {
258 : #ifdef REPORT_CHROME_HANGS
259 : if (waitCount >= 2) {
260 : uint32_t hangDuration = PR_IntervalToSeconds(now - lastTimestamp);
261 : Telemetry::RecordChromeHang(hangDuration, stack, systemUptime,
262 : firefoxUptime, Move(annotations));
263 : stack.Clear();
264 : }
265 : #endif
266 3 : lastTimestamp = timestamp;
267 3 : waitCount = 0;
268 : }
269 :
270 : PRIntervalTime timeout;
271 3 : if (gTimeout <= 0) {
272 3 : timeout = PR_INTERVAL_NO_TIMEOUT;
273 : } else {
274 0 : timeout = PR_MillisecondsToInterval(gTimeout * 500);
275 : }
276 3 : lock.Wait(timeout);
277 0 : }
278 : }
279 :
280 : void
281 3 : Startup()
282 : {
283 5 : if (GeckoProcessType_Default != XRE_GetProcessType() &&
284 2 : GeckoProcessType_Content != XRE_GetProcessType()) {
285 0 : return;
286 : }
287 :
288 3 : MOZ_ASSERT(!gMonitor, "Hang monitor already initialized");
289 3 : gMonitor = new Monitor("HangMonitor");
290 :
291 3 : Preferences::RegisterCallback(PrefChanged, kHangMonitorPrefName);
292 3 : PrefChanged(nullptr, nullptr);
293 :
294 : #ifdef REPORT_CHROME_HANGS
295 : Preferences::RegisterCallback(PrefChanged, kTelemetryPrefName);
296 : winMainThreadHandle =
297 : OpenThread(THREAD_ALL_ACCESS, FALSE, GetCurrentThreadId());
298 : if (!winMainThreadHandle) {
299 : return;
300 : }
301 : #endif
302 :
303 : // Don't actually start measuring hangs until we hit the main event loop.
304 : // This potentially misses a small class of really early startup hangs,
305 : // but avoids dealing with some xpcshell tests and other situations which
306 : // start XPCOM but don't ever start the event loop.
307 3 : Suspend();
308 :
309 3 : gThread = PR_CreateThread(PR_USER_THREAD,
310 : ThreadMain,
311 : nullptr, PR_PRIORITY_LOW, PR_GLOBAL_THREAD,
312 : PR_JOINABLE_THREAD, 0);
313 : }
314 :
315 : void
316 0 : Shutdown()
317 : {
318 0 : if (GeckoProcessType_Default != XRE_GetProcessType() &&
319 0 : GeckoProcessType_Content != XRE_GetProcessType()) {
320 0 : return;
321 : }
322 :
323 0 : MOZ_ASSERT(gMonitor, "Hang monitor not started");
324 :
325 : {
326 : // Scope the lock we're going to delete later
327 0 : MonitorAutoLock lock(*gMonitor);
328 0 : gShutdown = true;
329 0 : lock.Notify();
330 : }
331 :
332 : // thread creation could theoretically fail
333 0 : if (gThread) {
334 0 : PR_JoinThread(gThread);
335 0 : gThread = nullptr;
336 : }
337 :
338 0 : delete gMonitor;
339 0 : gMonitor = nullptr;
340 : }
341 :
342 : static bool
343 2468 : IsUIMessageWaiting()
344 : {
345 : #ifndef XP_WIN
346 2468 : return false;
347 : #else
348 : // There should never be mouse, keyboard, or IME messages in a message queue
349 : // in the content process, so don't waste time making multiple PeekMessage
350 : // calls.
351 : if (GeckoProcessType_Content == XRE_GetProcessType()) {
352 : return false;
353 : }
354 : #define NS_WM_IMEFIRST WM_IME_SETCONTEXT
355 : #define NS_WM_IMELAST WM_IME_KEYUP
356 : BOOL haveUIMessageWaiting = FALSE;
357 : MSG msg;
358 : haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_KEYFIRST,
359 : WM_IME_KEYLAST, PM_NOREMOVE);
360 : haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, NS_WM_IMEFIRST,
361 : NS_WM_IMELAST, PM_NOREMOVE);
362 : haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_MOUSEFIRST,
363 : WM_MOUSELAST, PM_NOREMOVE);
364 : return haveUIMessageWaiting;
365 : #endif
366 : }
367 :
368 : void
369 2468 : NotifyActivity(ActivityType aActivityType)
370 : {
371 2468 : MOZ_ASSERT(NS_IsMainThread(),
372 : "HangMonitor::Notify called from off the main thread.");
373 :
374 : // Determine the activity type more specifically
375 2468 : if (aActivityType == kGeneralActivity) {
376 2468 : aActivityType = IsUIMessageWaiting() ? kActivityUIAVail :
377 : kActivityNoUIAVail;
378 : }
379 :
380 : // Calculate the cumulative amount of lag time since the last UI message
381 : static uint32_t cumulativeUILagMS = 0;
382 2468 : switch (aActivityType) {
383 : case kActivityNoUIAVail:
384 2468 : cumulativeUILagMS = 0;
385 2468 : break;
386 : case kActivityUIAVail:
387 : case kUIActivity:
388 0 : if (gTimestamp != PR_INTERVAL_NO_WAIT) {
389 0 : cumulativeUILagMS += PR_IntervalToMilliseconds(PR_IntervalNow() -
390 0 : gTimestamp);
391 : }
392 0 : break;
393 : default:
394 0 : break;
395 : }
396 :
397 : // This is not a locked activity because PRTimeStamp is a 32-bit quantity
398 : // which can be read/written atomically, and we don't want to pay locking
399 : // penalties here.
400 2468 : gTimestamp = PR_IntervalNow();
401 :
402 : // If we have UI activity we should reset the timer and report it
403 2468 : if (aActivityType == kUIActivity) {
404 : mozilla::Telemetry::Accumulate(mozilla::Telemetry::EVENTLOOP_UI_ACTIVITY_EXP_MS,
405 0 : cumulativeUILagMS);
406 0 : cumulativeUILagMS = 0;
407 : }
408 :
409 2468 : if (gThread && !gShutdown) {
410 2468 : mozilla::BackgroundHangMonitor().NotifyActivity();
411 : }
412 2468 : }
413 :
414 : void
415 1403 : Suspend()
416 : {
417 1403 : MOZ_ASSERT(NS_IsMainThread(),
418 : "HangMonitor::Suspend called from off the main thread.");
419 :
420 : // Because gTimestamp changes this resets the wait count.
421 1403 : gTimestamp = PR_INTERVAL_NO_WAIT;
422 :
423 1403 : if (gThread && !gShutdown) {
424 1400 : mozilla::BackgroundHangMonitor().NotifyWait();
425 : }
426 1403 : }
427 :
428 : } // namespace HangMonitor
429 : } // namespace mozilla
|