Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "mozilla/ArrayUtils.h"
8 : #include "mozilla/BackgroundHangMonitor.h"
9 : #include "mozilla/LinkedList.h"
10 : #include "mozilla/Monitor.h"
11 : #include "mozilla/Move.h"
12 : #include "mozilla/Preferences.h"
13 : #include "mozilla/StaticPtr.h"
14 : #include "mozilla/Telemetry.h"
15 : #include "mozilla/ThreadHangStats.h"
16 : #include "mozilla/ThreadLocal.h"
17 : #include "mozilla/SystemGroup.h"
18 :
19 : #include "prinrval.h"
20 : #include "prthread.h"
21 : #include "ThreadStackHelper.h"
22 : #include "nsIObserverService.h"
23 : #include "nsIObserver.h"
24 : #include "mozilla/Services.h"
25 : #include "nsThreadUtils.h"
26 : #include "nsXULAppAPI.h"
27 : #include "GeckoProfiler.h"
28 : #include "nsNetCID.h"
29 : #include "nsIHangDetails.h"
30 :
31 : #include <algorithm>
32 :
33 : // Activate BHR only for one every BHR_BETA_MOD users.
34 : // This is now 100% of Beta population for the Beta 45/46 e10s A/B trials
35 : // It can be scaled back again in the future
36 : #define BHR_BETA_MOD 1;
37 :
38 : // Maximum depth of the call stack in the reported thread hangs. This value represents
39 : // the 99.9th percentile of the thread hangs stack depths reported by Telemetry.
40 : static const size_t kMaxThreadHangStackDepth = 30;
41 :
42 : // An utility comparator function used by std::unique to collapse "(* script)" entries in
43 : // a vector representing a call stack.
44 0 : bool StackScriptEntriesCollapser(const char* aStackEntry, const char *aAnotherStackEntry)
45 : {
46 0 : return !strcmp(aStackEntry, aAnotherStackEntry) &&
47 0 : (!strcmp(aStackEntry, "(chrome script)") || !strcmp(aStackEntry, "(content script)"));
48 : }
49 :
50 : namespace mozilla {
51 :
52 : class ProcessHangRunnable;
53 :
54 : /**
55 : * BackgroundHangManager is the global object that
56 : * manages all instances of BackgroundHangThread.
57 : */
58 : class BackgroundHangManager : public nsIObserver
59 : {
60 : private:
61 : // Background hang monitor thread function
62 0 : static void MonitorThread(void* aData)
63 : {
64 0 : AutoProfilerRegisterThread registerThread("BgHangMonitor");
65 0 : NS_SetCurrentThreadName("BgHangManager");
66 :
67 : /* We do not hold a reference to BackgroundHangManager here
68 : because the monitor thread only exists as long as the
69 : BackgroundHangManager instance exists. We stop the monitor
70 : thread in the BackgroundHangManager destructor, and we can
71 : only get to the destructor if we don't hold a reference here. */
72 0 : static_cast<BackgroundHangManager*>(aData)->RunMonitorThread();
73 0 : }
74 :
75 : // Hang monitor thread
76 : PRThread* mHangMonitorThread;
77 : // Stop hang monitoring
78 : bool mShutdown;
79 :
80 : BackgroundHangManager(const BackgroundHangManager&);
81 : BackgroundHangManager& operator=(const BackgroundHangManager&);
82 : void RunMonitorThread();
83 :
84 : public:
85 : NS_DECL_THREADSAFE_ISUPPORTS
86 : NS_DECL_NSIOBSERVER
87 : static StaticRefPtr<BackgroundHangManager> sInstance;
88 : static bool sDisabled;
89 :
90 : // Lock for access to members of this class
91 : Monitor mLock;
92 : // Current time as seen by hang monitors
93 : PRIntervalTime mIntervalNow;
94 : // List of BackgroundHangThread instances associated with each thread
95 : LinkedList<BackgroundHangThread> mHangThreads;
96 : // A reference to the StreamTransportService. This is gotten on the main
97 : // thread, and carried around, as nsStreamTransportService::Init is
98 : // non-threadsafe.
99 : nsCOMPtr<nsIEventTarget> mSTS;
100 :
101 : void Shutdown()
102 : {
103 : MonitorAutoLock autoLock(mLock);
104 : mShutdown = true;
105 : autoLock.Notify();
106 : }
107 :
108 : // Attempt to wakeup the hang monitor thread.
109 : void Wakeup()
110 : {
111 : mLock.AssertCurrentThreadOwns();
112 : mLock.NotifyAll();
113 : }
114 :
115 : BackgroundHangManager();
116 : private:
117 : virtual ~BackgroundHangManager();
118 : };
119 :
120 0 : NS_IMPL_ISUPPORTS(BackgroundHangManager, nsIObserver)
121 :
122 : NS_IMETHODIMP
123 0 : BackgroundHangManager::Observe(nsISupports* aSubject, const char* aTopic, const char16_t* aData) {
124 0 : NS_ENSURE_TRUE(!strcmp(aTopic, "profile-after-change"), NS_ERROR_UNEXPECTED);
125 0 : BackgroundHangMonitor::DisableOnBeta();
126 :
127 0 : nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService();
128 0 : MOZ_ASSERT(observerService);
129 0 : observerService->RemoveObserver(this, "profile-after-change");
130 :
131 0 : return NS_OK;
132 : }
133 :
134 : /**
135 : * BackgroundHangThread is a per-thread object that is used
136 : * by all instances of BackgroundHangMonitor to monitor hangs.
137 : */
138 : class BackgroundHangThread : public LinkedListElement<BackgroundHangThread>
139 : {
140 : private:
141 : static MOZ_THREAD_LOCAL(BackgroundHangThread*) sTlsKey;
142 : static bool sTlsKeyInitialized;
143 :
144 : BackgroundHangThread(const BackgroundHangThread&);
145 : BackgroundHangThread& operator=(const BackgroundHangThread&);
146 : ~BackgroundHangThread();
147 :
148 : /* Keep a reference to the manager, so we can keep going even
149 : after BackgroundHangManager::Shutdown is called. */
150 : const RefPtr<BackgroundHangManager> mManager;
151 : // Unique thread ID for identification
152 : const PRThread* mThreadID;
153 :
154 : void Update();
155 :
156 : public:
157 0 : NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread)
158 : /**
159 : * Returns the BackgroundHangThread associated with the
160 : * running thread. Note that this will not find private
161 : * BackgroundHangThread threads.
162 : *
163 : * @return BackgroundHangThread*, or nullptr if no thread
164 : * is found.
165 : */
166 : static BackgroundHangThread* FindThread();
167 :
168 : static void Startup()
169 : {
170 : /* We can tolerate init() failing. */
171 : sTlsKeyInitialized = sTlsKey.init();
172 : }
173 :
174 : // Hang timeout in ticks
175 : const PRIntervalTime mTimeout;
176 : // PermaHang timeout in ticks
177 : const PRIntervalTime mMaxTimeout;
178 : // Time at last activity
179 : PRIntervalTime mInterval;
180 : // Time when a hang started
181 : PRIntervalTime mHangStart;
182 : // Is the thread in a hang
183 : bool mHanging;
184 : // Is the thread in a waiting state
185 : bool mWaiting;
186 : // Is the thread dedicated to a single BackgroundHangMonitor
187 : BackgroundHangMonitor::ThreadType mThreadType;
188 : // Platform-specific helper to get hang stacks
189 : ThreadStackHelper mStackHelper;
190 : // Stack of current hang
191 : Telemetry::HangStack mHangStack;
192 : // Native stack of current hang
193 : Telemetry::NativeHangStack mNativeHangStack;
194 : // Statistics for telemetry
195 : Telemetry::ThreadHangStats mStats;
196 : // Annotations for the current hang
197 : UniquePtr<HangMonitor::HangAnnotations> mAnnotations;
198 : // Annotators registered for this thread
199 : HangMonitor::Observer::Annotators mAnnotators;
200 : // List of runnables which can hold a reference to us which need to be
201 : // canceled before we can go away.
202 : LinkedList<RefPtr<ProcessHangRunnable>> mProcessHangRunnables;
203 : // The name of the runnable which is hanging the current process
204 : nsCString mRunnableName;
205 :
206 : BackgroundHangThread(const char* aName,
207 : uint32_t aTimeoutMs,
208 : uint32_t aMaxTimeoutMs,
209 : BackgroundHangMonitor::ThreadType aThreadType = BackgroundHangMonitor::THREAD_SHARED);
210 :
211 : // Report a hang; aManager->mLock IS locked. The hang will be processed
212 : // off-main-thread, and will then be submitted back.
213 : void ReportHang(PRIntervalTime aHangTime);
214 : // Report a permanent hang; aManager->mLock IS locked
215 : void ReportPermaHang();
216 : // Called by BackgroundHangMonitor::NotifyActivity
217 : void NotifyActivity()
218 : {
219 : MonitorAutoLock autoLock(mManager->mLock);
220 : Update();
221 : }
222 : // Called by BackgroundHangMonitor::NotifyWait
223 : void NotifyWait()
224 : {
225 : MonitorAutoLock autoLock(mManager->mLock);
226 :
227 : if (mWaiting) {
228 : return;
229 : }
230 :
231 : Update();
232 : mWaiting = true;
233 : }
234 :
235 : // Returns true if this thread is (or might be) shared between other
236 : // BackgroundHangMonitors for the monitored thread.
237 0 : bool IsShared() {
238 0 : return mThreadType == BackgroundHangMonitor::THREAD_SHARED;
239 : }
240 : };
241 :
242 : /**
243 : * HangDetails is the concrete implementaion of nsIHangDetails, and contains the
244 : * infromation which we want to expose to observers of the bhr-thread-hang
245 : * observer notification.
246 : */
247 : class HangDetails : public nsIHangDetails
248 : {
249 : public:
250 : NS_DECL_ISUPPORTS
251 : NS_DECL_NSIHANGDETAILS
252 :
253 0 : HangDetails(uint32_t aDuration, const nsACString& aName)
254 0 : : mDuration(aDuration)
255 0 : , mName(aName)
256 0 : {}
257 : private:
258 0 : virtual ~HangDetails() {}
259 :
260 : uint32_t mDuration;
261 : nsCString mName;
262 : };
263 :
264 3 : StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance;
265 : bool BackgroundHangManager::sDisabled = false;
266 :
267 : MOZ_THREAD_LOCAL(BackgroundHangThread*) BackgroundHangThread::sTlsKey;
268 : bool BackgroundHangThread::sTlsKeyInitialized;
269 :
270 0 : BackgroundHangManager::BackgroundHangManager()
271 : : mShutdown(false)
272 : , mLock("BackgroundHangManager")
273 : , mIntervalNow(0)
274 0 : , mSTS(do_GetService(NS_STREAMTRANSPORTSERVICE_CONTRACTID))
275 : {
276 : // Lock so we don't race against the new monitor thread
277 0 : MonitorAutoLock autoLock(mLock);
278 :
279 0 : mHangMonitorThread = PR_CreateThread(
280 : PR_USER_THREAD, MonitorThread, this,
281 : PR_PRIORITY_LOW, PR_GLOBAL_THREAD, PR_JOINABLE_THREAD, 0);
282 :
283 0 : MOZ_ASSERT(mHangMonitorThread, "Failed to create monitor thread");
284 0 : }
285 :
286 0 : BackgroundHangManager::~BackgroundHangManager()
287 : {
288 0 : MOZ_ASSERT(mShutdown, "Destruction without Shutdown call");
289 0 : MOZ_ASSERT(mHangThreads.isEmpty(), "Destruction with outstanding monitors");
290 0 : MOZ_ASSERT(mHangMonitorThread, "No monitor thread");
291 :
292 : // PR_CreateThread could have failed above due to resource limitation
293 0 : if (mHangMonitorThread) {
294 : // The monitor thread can only live as long as the instance lives
295 0 : PR_JoinThread(mHangMonitorThread);
296 : }
297 0 : }
298 :
299 : void
300 0 : BackgroundHangManager::RunMonitorThread()
301 : {
302 : // Keep us locked except when waiting
303 0 : MonitorAutoLock autoLock(mLock);
304 :
305 : /* mIntervalNow is updated at various intervals determined by waitTime.
306 : However, if an update latency is too long (due to CPU scheduling, system
307 : sleep, etc.), we don't update mIntervalNow at all. This is done so that
308 : long latencies in our timing are not detected as hangs. systemTime is
309 : used to track PR_IntervalNow() and determine our latency. */
310 :
311 0 : PRIntervalTime systemTime = PR_IntervalNow();
312 : // Default values for the first iteration of thread loop
313 0 : PRIntervalTime waitTime = PR_INTERVAL_NO_WAIT;
314 0 : PRIntervalTime recheckTimeout = PR_INTERVAL_NO_WAIT;
315 :
316 0 : while (!mShutdown) {
317 0 : nsresult rv = autoLock.Wait(waitTime);
318 :
319 0 : PRIntervalTime newTime = PR_IntervalNow();
320 0 : PRIntervalTime systemInterval = newTime - systemTime;
321 0 : systemTime = newTime;
322 :
323 : /* waitTime is a quarter of the shortest timeout value; If our timing
324 : latency is low enough (less than half the shortest timeout value),
325 : we can update mIntervalNow. */
326 0 : if (MOZ_LIKELY(waitTime != PR_INTERVAL_NO_TIMEOUT &&
327 : systemInterval < 2 * waitTime)) {
328 0 : mIntervalNow += systemInterval;
329 : }
330 :
331 : /* If it's before the next recheck timeout, and our wait did not get
332 : interrupted, we can keep the current waitTime and skip iterating
333 : through hang monitors. */
334 0 : if (MOZ_LIKELY(systemInterval < recheckTimeout &&
335 : systemInterval >= waitTime &&
336 : rv == NS_OK)) {
337 0 : recheckTimeout -= systemInterval;
338 0 : continue;
339 : }
340 :
341 : /* We are in one of the following scenarios,
342 : - Hang or permahang recheck timeout
343 : - Thread added/removed
344 : - Thread wait or hang ended
345 : In all cases, we want to go through our list of hang
346 : monitors and update waitTime and recheckTimeout. */
347 0 : waitTime = PR_INTERVAL_NO_TIMEOUT;
348 0 : recheckTimeout = PR_INTERVAL_NO_TIMEOUT;
349 :
350 : // Locally hold mIntervalNow
351 0 : PRIntervalTime intervalNow = mIntervalNow;
352 :
353 : // iterate through hang monitors
354 0 : for (BackgroundHangThread* currentThread = mHangThreads.getFirst();
355 0 : currentThread; currentThread = currentThread->getNext()) {
356 :
357 0 : if (currentThread->mWaiting) {
358 : // Thread is waiting, not hanging
359 0 : continue;
360 : }
361 0 : PRIntervalTime interval = currentThread->mInterval;
362 0 : PRIntervalTime hangTime = intervalNow - interval;
363 0 : if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) {
364 : // A permahang started
365 : // Skip subsequent iterations and tolerate a race on mWaiting here
366 0 : currentThread->mWaiting = true;
367 0 : currentThread->mHanging = false;
368 0 : currentThread->ReportPermaHang();
369 0 : continue;
370 : }
371 :
372 0 : if (MOZ_LIKELY(!currentThread->mHanging)) {
373 0 : if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
374 : // A hang started
375 : #ifdef NIGHTLY_BUILD
376 0 : if (currentThread->mStats.mNativeStackCnt < Telemetry::kMaximumNativeHangStacks) {
377 : // NOTE: In nightly builds of firefox we want to collect native stacks
378 : // for all hangs, not just permahangs.
379 0 : currentThread->mStats.mNativeStackCnt += 1;
380 0 : currentThread->mStackHelper.GetPseudoAndNativeStack(
381 : currentThread->mHangStack,
382 : currentThread->mNativeHangStack,
383 0 : currentThread->mRunnableName);
384 : } else {
385 0 : currentThread->mStackHelper.GetPseudoStack(currentThread->mHangStack,
386 0 : currentThread->mRunnableName);
387 : }
388 : #else
389 : currentThread->mStackHelper.GetPseudoStack(currentThread->mHangStack,
390 : currentThread->mRunnableName);
391 : #endif
392 0 : currentThread->mHangStart = interval;
393 0 : currentThread->mHanging = true;
394 : currentThread->mAnnotations =
395 0 : currentThread->mAnnotators.GatherAnnotations();
396 : }
397 : } else {
398 0 : if (MOZ_LIKELY(interval != currentThread->mHangStart)) {
399 : // A hang ended
400 0 : currentThread->ReportHang(intervalNow - currentThread->mHangStart);
401 0 : currentThread->mHanging = false;
402 : }
403 : }
404 :
405 : /* If we are hanging, the next time we check for hang status is when
406 : the hang turns into a permahang. If we're not hanging, the next
407 : recheck timeout is when we may be entering a hang. */
408 : PRIntervalTime nextRecheck;
409 0 : if (currentThread->mHanging) {
410 0 : nextRecheck = currentThread->mMaxTimeout;
411 : } else {
412 0 : nextRecheck = currentThread->mTimeout;
413 : }
414 0 : recheckTimeout = std::min(recheckTimeout, nextRecheck - hangTime);
415 :
416 0 : if (currentThread->mTimeout != PR_INTERVAL_NO_TIMEOUT) {
417 : /* We wait for a quarter of the shortest timeout
418 : value to give mIntervalNow enough granularity. */
419 0 : waitTime = std::min(waitTime, currentThread->mTimeout / 4);
420 : }
421 : }
422 : }
423 :
424 : /* We are shutting down now.
425 : Wait for all outstanding monitors to unregister. */
426 0 : while (!mHangThreads.isEmpty()) {
427 0 : autoLock.Wait(PR_INTERVAL_NO_TIMEOUT);
428 : }
429 0 : }
430 :
431 :
432 0 : BackgroundHangThread::BackgroundHangThread(const char* aName,
433 : uint32_t aTimeoutMs,
434 : uint32_t aMaxTimeoutMs,
435 0 : BackgroundHangMonitor::ThreadType aThreadType)
436 : : mManager(BackgroundHangManager::sInstance)
437 0 : , mThreadID(PR_GetCurrentThread())
438 : , mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout
439 : ? PR_INTERVAL_NO_TIMEOUT
440 : : PR_MillisecondsToInterval(aTimeoutMs))
441 : , mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout
442 : ? PR_INTERVAL_NO_TIMEOUT
443 : : PR_MillisecondsToInterval(aMaxTimeoutMs))
444 0 : , mInterval(mManager->mIntervalNow)
445 0 : , mHangStart(mInterval)
446 : , mHanging(false)
447 : , mWaiting(true)
448 : , mThreadType(aThreadType)
449 0 : , mStats(aName)
450 : {
451 0 : if (sTlsKeyInitialized && IsShared()) {
452 0 : sTlsKey.set(this);
453 : }
454 : // Lock here because LinkedList is not thread-safe
455 0 : MonitorAutoLock autoLock(mManager->mLock);
456 : // Add to thread list
457 0 : mManager->mHangThreads.insertBack(this);
458 : // Wake up monitor thread to process new thread
459 0 : autoLock.Notify();
460 0 : }
461 :
462 : // This runnable is used to pre-process a hang, performing any expensive
463 : // operations on it, before submitting it into the BackgroundHangThread object
464 : // for Telemetry.
465 : //
466 : // If this object is canceled, it will submit its payload to the
467 : // BackgroundHangThread without performing the processing.
468 0 : class ProcessHangRunnable final
469 : : public CancelableRunnable
470 : , public LinkedListElement<RefPtr<ProcessHangRunnable>>
471 : {
472 : public:
473 0 : ProcessHangRunnable(BackgroundHangManager* aManager,
474 : BackgroundHangThread* aThread,
475 : Telemetry::HangHistogram&& aHistogram,
476 : Telemetry::NativeHangStack&& aNativeStack)
477 0 : : CancelableRunnable("ProcessHangRunnable")
478 : , mManager(aManager)
479 0 : , mNativeStack(mozilla::Move(aNativeStack))
480 : , mThread(aThread)
481 0 : , mHistogram(mozilla::Move(aHistogram))
482 : {
483 0 : MOZ_ASSERT(mThread);
484 0 : }
485 :
486 : NS_IMETHOD
487 0 : Run() override
488 : {
489 : // Start processing this histogram's native hang stack before we try to lock
490 : // anything, as we can do this without any locks held. This is the expensive
491 : // part of the operation.
492 0 : Telemetry::ProcessedStack processed;
493 0 : if (!mNativeStack.empty()) {
494 0 : processed = Telemetry::GetStackAndModules(mNativeStack);
495 : }
496 :
497 : // Lock the manager's lock, so that we can take a look at our mThread
498 : {
499 0 : MonitorAutoLock autoLock(mManager->mLock);
500 0 : if (NS_WARN_IF(!mThread)) {
501 0 : return NS_OK;
502 : }
503 :
504 : // If we have a stack, check if we can add it to combined stacks. This is
505 : // a relatively cheap operation, and must occur with the lock held.
506 0 : if (!mNativeStack.empty() &&
507 0 : mThread->mStats.mCombinedStacks.GetStackCount() < Telemetry::kMaximumNativeHangStacks) {
508 0 : mHistogram.SetNativeStackIndex(mThread->mStats.mCombinedStacks.AddStack(processed));
509 : }
510 :
511 : // Submit, remove ourselves from the list, and clear out mThread so we
512 : // don't run again.
513 0 : MOZ_ALWAYS_TRUE(mThread->mStats.mHangs.append(Move(mHistogram)));
514 0 : remove();
515 0 : mThread = nullptr;
516 : }
517 :
518 0 : return NS_OK;
519 : }
520 :
521 : // Submits hang, and removes from list.
522 : nsresult
523 0 : Cancel() override
524 : {
525 0 : mManager->mLock.AssertCurrentThreadOwns();
526 0 : if (NS_WARN_IF(!mThread)) {
527 0 : return NS_OK;
528 : }
529 :
530 : // Submit, remove ourselves from the list, and clear out mThread so we
531 : // don't run again.
532 0 : MOZ_ALWAYS_TRUE(mThread->mStats.mHangs.append(Move(mHistogram)));
533 0 : if (isInList()) {
534 0 : remove();
535 : }
536 0 : mThread = nullptr;
537 0 : return NS_OK;
538 : }
539 :
540 : private:
541 : // These variables are constant after initialization, and do not need
542 : // synchronization.
543 : RefPtr<BackgroundHangManager> mManager;
544 : const Telemetry::NativeHangStack mNativeStack;
545 : // These variables are guarded by mManager->mLock.
546 : BackgroundHangThread* MOZ_NON_OWNING_REF mThread; // Will Cancel us before it dies
547 : Telemetry::HangHistogram mHistogram;
548 : };
549 :
550 0 : BackgroundHangThread::~BackgroundHangThread()
551 : {
552 : // Lock here because LinkedList is not thread-safe
553 0 : MonitorAutoLock autoLock(mManager->mLock);
554 : // Remove from thread list
555 0 : remove();
556 : // Wake up monitor thread to process removed thread
557 0 : autoLock.Notify();
558 :
559 : // We no longer have a thread
560 0 : if (sTlsKeyInitialized && IsShared()) {
561 0 : sTlsKey.set(nullptr);
562 : }
563 :
564 : // Cancel any remaining process hang runnables, as they hold a weak reference
565 : // into our mStats variable, which we're about to move.
566 0 : while (RefPtr<ProcessHangRunnable> runnable = mProcessHangRunnables.popFirst()) {
567 0 : runnable->Cancel();
568 0 : }
569 :
570 : // Record the ThreadHangStats for this thread before we go away. All stats
571 : // should be in this method now, as we canceled any pending runnables.
572 0 : Telemetry::RecordThreadHangStats(Move(mStats));
573 0 : }
574 :
575 : void
576 0 : BackgroundHangThread::ReportHang(PRIntervalTime aHangTime)
577 : {
578 : // Recovered from a hang; called on the monitor thread
579 : // mManager->mLock IS locked
580 :
581 : // Remove unwanted "js::RunScript" frame from the stack
582 0 : for (size_t i = 0; i < mHangStack.length(); ) {
583 0 : const char** f = mHangStack.begin() + i;
584 0 : if (!mHangStack.IsInBuffer(*f) && !strcmp(*f, "js::RunScript")) {
585 0 : mHangStack.erase(f);
586 : } else {
587 0 : i++;
588 : }
589 : }
590 :
591 : // Collapse duplicated "(chrome script)" and "(content script)" entries in the stack.
592 0 : auto it = std::unique(mHangStack.begin(), mHangStack.end(), StackScriptEntriesCollapser);
593 0 : mHangStack.erase(it, mHangStack.end());
594 :
595 : // Limit the depth of the reported stack if greater than our limit. Only keep its
596 : // last entries, since the most recent frames are at the end of the vector.
597 0 : if (mHangStack.length() > kMaxThreadHangStackDepth) {
598 0 : const int elementsToRemove = mHangStack.length() - kMaxThreadHangStackDepth;
599 : // Replace the oldest frame with a known label so that we can tell this stack
600 : // was limited.
601 0 : mHangStack[0] = "(reduced stack)";
602 0 : mHangStack.erase(mHangStack.begin() + 1, mHangStack.begin() + elementsToRemove);
603 : }
604 :
605 0 : Telemetry::HangHistogram newHistogram(Move(mHangStack), mRunnableName);
606 0 : for (Telemetry::HangHistogram* oldHistogram = mStats.mHangs.begin();
607 0 : oldHistogram != mStats.mHangs.end(); oldHistogram++) {
608 0 : if (newHistogram == *oldHistogram) {
609 : // New histogram matches old one
610 0 : oldHistogram->Add(aHangTime, Move(mAnnotations));
611 0 : return;
612 : }
613 : }
614 0 : newHistogram.Add(aHangTime, Move(mAnnotations));
615 :
616 : // Notify any observers of the "bhr-thread-hang" topic that a thread has hung.
617 0 : nsCString name;
618 0 : name.AssignASCII(mStats.GetName());
619 0 : nsCOMPtr<nsIRunnable> runnable = NS_NewRunnableFunction("NotifyBHRHangObservers", [=] {
620 0 : nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
621 0 : if (os) {
622 : // NOTE: Make sure to construct this on the main thread.
623 0 : nsCOMPtr<nsIHangDetails> hangDetails = new HangDetails(aHangTime, name);
624 0 : os->NotifyObservers(hangDetails, "bhr-thread-hang", nullptr);
625 : }
626 0 : });
627 0 : if (SystemGroup::Initialized()) {
628 : // XXX(HACK): This is really sketchy. We need to keep a reference to the
629 : // runnable in case the dispatch fails. If it fails, the already_AddRefed
630 : // runnable which we passed in has been leaked, and we need to free it
631 : // ourselves. The only time when this should fail is if we're shutting down.
632 : //
633 : // Most components just avoid dispatching runnables during shutdown, but BHR
634 : // is not shut down until way too late, so we cannot do that. Instead, we
635 : // just detect that the dispatch failed and manually unleak the leaked
636 : // nsIRunnable in that situation.
637 : nsresult rv = SystemGroup::Dispatch("NotifyBHRHangObservers",
638 : TaskCategory::Other,
639 0 : do_AddRef(runnable.get()));
640 0 : if (NS_FAILED(rv)) {
641 : // NOTE: We go through `get()` here in order to avoid the
642 : // MOZ_NO_ADDREF_RELEASE_ON_RETURN static analysis.
643 0 : nsrefcnt refcnt = runnable.get()->Release();
644 0 : MOZ_RELEASE_ASSERT(refcnt == 1, "runnable should have had 1 reference leaked");
645 : }
646 : }
647 :
648 : // Process the hang off-main thread. We record a reference to the runnable in
649 : // mProcessHangRunnables so we can abort this preprocessing and just submit
650 : // the message if the processing takes too long and our thread is going away.
651 : RefPtr<ProcessHangRunnable> processHang =
652 0 : new ProcessHangRunnable(mManager, this, Move(newHistogram), Move(mNativeHangStack));
653 0 : mProcessHangRunnables.insertFront(processHang);
654 :
655 : // Try to dispatch the runnable to the StreamTransportService threadpool. If
656 : // we fail, cancel our runnable.
657 0 : if (!mManager->mSTS || NS_FAILED(mManager->mSTS->Dispatch(processHang.forget()))) {
658 0 : RefPtr<ProcessHangRunnable> runnable = mProcessHangRunnables.popFirst();
659 0 : runnable->Cancel();
660 : }
661 : }
662 :
663 : void
664 0 : BackgroundHangThread::ReportPermaHang()
665 : {
666 : // Permanently hanged; called on the monitor thread
667 : // mManager->mLock IS locked
668 :
669 : // NOTE: We used to capture a native stack in this situation if one had not
670 : // already been captured, but with the new ReportHang design that is less
671 : // practical.
672 : //
673 : // We currently don't look at hang reports outside of nightly, and already
674 : // collect native stacks eagerly on nightly, so this should be OK.
675 0 : ReportHang(mMaxTimeout);
676 0 : }
677 :
678 : MOZ_ALWAYS_INLINE void
679 : BackgroundHangThread::Update()
680 : {
681 : PRIntervalTime intervalNow = mManager->mIntervalNow;
682 : if (mWaiting) {
683 : mInterval = intervalNow;
684 : mWaiting = false;
685 : /* We have to wake up the manager thread because when all threads
686 : are waiting, the manager thread waits indefinitely as well. */
687 : mManager->Wakeup();
688 : } else {
689 : PRIntervalTime duration = intervalNow - mInterval;
690 : mStats.mActivity.Add(duration);
691 : if (MOZ_UNLIKELY(duration >= mTimeout)) {
692 : /* Wake up the manager thread to tell it that a hang ended */
693 : mManager->Wakeup();
694 : }
695 : mInterval = intervalNow;
696 : }
697 : }
698 :
699 : BackgroundHangThread*
700 3876 : BackgroundHangThread::FindThread()
701 : {
702 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
703 : if (BackgroundHangManager::sInstance == nullptr) {
704 : MOZ_ASSERT(BackgroundHangManager::sDisabled,
705 : "BackgroundHandleManager is not initialized");
706 : return nullptr;
707 : }
708 :
709 : if (sTlsKeyInitialized) {
710 : // Use TLS if available
711 : return sTlsKey.get();
712 : }
713 : // If TLS is unavailable, we can search through the thread list
714 : RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance);
715 : MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown");
716 :
717 : PRThread* threadID = PR_GetCurrentThread();
718 : // Lock thread list for traversal
719 : MonitorAutoLock autoLock(manager->mLock);
720 : for (BackgroundHangThread* thread = manager->mHangThreads.getFirst();
721 : thread; thread = thread->getNext()) {
722 : if (thread->mThreadID == threadID && thread->IsShared()) {
723 : return thread;
724 : }
725 : }
726 : #endif
727 : // Current thread is not initialized
728 3876 : return nullptr;
729 : }
730 :
731 : bool
732 0 : BackgroundHangMonitor::ShouldDisableOnBeta(const nsCString &clientID) {
733 0 : MOZ_ASSERT(clientID.Length() == 36, "clientID is invalid");
734 0 : const char *suffix = clientID.get() + clientID.Length() - 4;
735 0 : return strtol(suffix, NULL, 16) % BHR_BETA_MOD;
736 : }
737 :
738 : bool
739 0 : BackgroundHangMonitor::IsDisabled() {
740 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
741 : return BackgroundHangManager::sDisabled;
742 : #else
743 0 : return true;
744 : #endif
745 : }
746 :
747 : bool
748 0 : BackgroundHangMonitor::DisableOnBeta() {
749 0 : nsAdoptingCString clientID = Preferences::GetCString("toolkit.telemetry.cachedClientID");
750 0 : bool telemetryEnabled = Preferences::GetBool("toolkit.telemetry.enabled");
751 :
752 0 : if (!telemetryEnabled || !clientID || BackgroundHangMonitor::ShouldDisableOnBeta(clientID)) {
753 0 : if (XRE_IsParentProcess()) {
754 0 : BackgroundHangMonitor::Shutdown();
755 : } else {
756 0 : BackgroundHangManager::sDisabled = true;
757 : }
758 0 : return true;
759 : }
760 :
761 0 : return false;
762 : }
763 :
764 : void
765 3 : BackgroundHangMonitor::Startup()
766 : {
767 3 : MOZ_RELEASE_ASSERT(NS_IsMainThread());
768 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
769 : MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
770 :
771 : if (!strcmp(NS_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) {
772 : if (XRE_IsParentProcess()) { // cached ClientID hasn't been read yet
773 : BackgroundHangThread::Startup();
774 : BackgroundHangManager::sInstance = new BackgroundHangManager();
775 :
776 : nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService();
777 : MOZ_ASSERT(observerService);
778 :
779 : observerService->AddObserver(BackgroundHangManager::sInstance, "profile-after-change", false);
780 : return;
781 : } else if(DisableOnBeta()){
782 : return;
783 : }
784 : }
785 :
786 : BackgroundHangThread::Startup();
787 : BackgroundHangManager::sInstance = new BackgroundHangManager();
788 : #endif
789 3 : }
790 :
791 : void
792 0 : BackgroundHangMonitor::Shutdown()
793 : {
794 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
795 : if (BackgroundHangManager::sDisabled) {
796 : MOZ_ASSERT(!BackgroundHangManager::sInstance, "Initialized");
797 : return;
798 : }
799 :
800 : MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
801 : /* Scope our lock inside Shutdown() because the sInstance object can
802 : be destroyed as soon as we set sInstance to nullptr below, and
803 : we don't want to hold the lock when it's being destroyed. */
804 : BackgroundHangManager::sInstance->Shutdown();
805 : BackgroundHangManager::sInstance = nullptr;
806 : BackgroundHangManager::sDisabled = true;
807 : #endif
808 0 : }
809 :
810 10 : BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
811 : uint32_t aTimeoutMs,
812 : uint32_t aMaxTimeoutMs,
813 10 : ThreadType aThreadType)
814 10 : : mThread(aThreadType == THREAD_SHARED ? BackgroundHangThread::FindThread() : nullptr)
815 : {
816 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
817 : if (!BackgroundHangManager::sDisabled && !mThread) {
818 : mThread = new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs,
819 : aThreadType);
820 : }
821 : #endif
822 10 : }
823 :
824 3868 : BackgroundHangMonitor::BackgroundHangMonitor()
825 3868 : : mThread(BackgroundHangThread::FindThread())
826 : {
827 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
828 : if (BackgroundHangManager::sDisabled) {
829 : return;
830 : }
831 : #endif
832 3868 : }
833 :
834 3868 : BackgroundHangMonitor::~BackgroundHangMonitor()
835 : {
836 3868 : }
837 :
838 : void
839 8163 : BackgroundHangMonitor::NotifyActivity()
840 : {
841 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
842 : if (mThread == nullptr) {
843 : MOZ_ASSERT(BackgroundHangManager::sDisabled,
844 : "This thread is not initialized for hang monitoring");
845 : return;
846 : }
847 :
848 : if (Telemetry::CanRecordExtended()) {
849 : mThread->NotifyActivity();
850 : }
851 : #endif
852 8163 : }
853 :
854 : void
855 2735 : BackgroundHangMonitor::NotifyWait()
856 : {
857 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
858 : if (mThread == nullptr) {
859 : MOZ_ASSERT(BackgroundHangManager::sDisabled,
860 : "This thread is not initialized for hang monitoring");
861 : return;
862 : }
863 :
864 : if (Telemetry::CanRecordExtended()) {
865 : mThread->NotifyWait();
866 : }
867 : #endif
868 2735 : }
869 :
870 : bool
871 3 : BackgroundHangMonitor::RegisterAnnotator(HangMonitor::Annotator& aAnnotator)
872 : {
873 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
874 : BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
875 : if (!thisThread) {
876 : return false;
877 : }
878 : return thisThread->mAnnotators.Register(aAnnotator);
879 : #else
880 3 : return false;
881 : #endif
882 : }
883 :
884 : bool
885 0 : BackgroundHangMonitor::UnregisterAnnotator(HangMonitor::Annotator& aAnnotator)
886 : {
887 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
888 : BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
889 : if (!thisThread) {
890 : return false;
891 : }
892 : return thisThread->mAnnotators.Unregister(aAnnotator);
893 : #else
894 0 : return false;
895 : #endif
896 : }
897 :
898 : /* Because we are iterating through the BackgroundHangThread linked list,
899 : we need to take a lock. Using MonitorAutoLock as a base class makes
900 : sure all of that is taken care of for us. */
901 0 : BackgroundHangMonitor::ThreadHangStatsIterator::ThreadHangStatsIterator()
902 0 : : MonitorAutoLock(BackgroundHangManager::sInstance->mLock)
903 0 : , mThread(BackgroundHangManager::sInstance ?
904 0 : BackgroundHangManager::sInstance->mHangThreads.getFirst() :
905 0 : nullptr)
906 : {
907 : #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
908 : MOZ_ASSERT(BackgroundHangManager::sInstance ||
909 : BackgroundHangManager::sDisabled,
910 : "Inconsistent state");
911 : #endif
912 0 : }
913 :
914 : Telemetry::ThreadHangStats*
915 0 : BackgroundHangMonitor::ThreadHangStatsIterator::GetNext()
916 : {
917 0 : if (!mThread) {
918 0 : return nullptr;
919 : }
920 0 : Telemetry::ThreadHangStats* stats = &mThread->mStats;
921 0 : mThread = mThread->getNext();
922 0 : return stats;
923 : }
924 :
925 : NS_IMETHODIMP
926 0 : HangDetails::GetDuration(uint32_t* aDuration)
927 : {
928 0 : *aDuration = mDuration;
929 0 : return NS_OK;
930 : }
931 :
932 : NS_IMETHODIMP
933 0 : HangDetails::GetThreadName(nsACString& aName)
934 : {
935 0 : aName.Assign(mName);
936 0 : return NS_OK;
937 : }
938 :
939 0 : NS_IMPL_ISUPPORTS(HangDetails, nsIHangDetails)
940 :
941 : } // namespace mozilla
|