Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : // There are three kinds of samples done by the profiler.
8 : //
9 : // - A "periodic" sample is the most complex kind. It is done in response to a
10 : // timer while the profiler is active. It involves writing a stack trace plus
11 : // a variety of other values (memory measurements, responsiveness
12 : // measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13 : // done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14 : // get the register values.
15 : //
16 : // - A "synchronous" sample is a simpler kind. It is done in response to an API
17 : // call (profiler_get_backtrace()). It involves writing a stack trace and
18 : // little else into a temporary ProfileBuffer, and wrapping that up in a
19 : // ProfilerBacktrace that can be subsequently used in a marker. The sampling
20 : // is done on-thread, and so Registers::SyncPopulate() is used to get the
21 : // register values.
22 : //
23 : // - A "backtrace" sample is the simplest kind. It is done in response to an
24 : // API call (profiler_suspend_and_sample_thread()). It involves getting a
25 : // stack trace and passing it to a callback function; it does not write to a
26 : // ProfileBuffer. The sampling is done from off-thread, and so uses
27 : // SuspendAndSampleAndResumeThread() to get the register values.
28 :
29 : #include <algorithm>
30 : #include <ostream>
31 : #include <fstream>
32 : #include <sstream>
33 : #include <errno.h>
34 :
35 : #include "platform.h"
36 : #include "PlatformMacros.h"
37 : #include "mozilla/ArrayUtils.h"
38 : #include "mozilla/Atomics.h"
39 : #include "mozilla/UniquePtr.h"
40 : #include "mozilla/Vector.h"
41 : #include "GeckoProfiler.h"
42 : #include "GeckoProfilerReporter.h"
43 : #include "ProfilerIOInterposeObserver.h"
44 : #include "mozilla/AutoProfilerLabel.h"
45 : #include "mozilla/StackWalk.h"
46 : #include "mozilla/StaticPtr.h"
47 : #include "mozilla/ThreadLocal.h"
48 : #include "mozilla/TimeStamp.h"
49 : #include "mozilla/StaticPtr.h"
50 : #include "ThreadInfo.h"
51 : #include "nsIHttpProtocolHandler.h"
52 : #include "nsIObserverService.h"
53 : #include "nsIXULAppInfo.h"
54 : #include "nsIXULRuntime.h"
55 : #include "nsDirectoryServiceUtils.h"
56 : #include "nsDirectoryServiceDefs.h"
57 : #include "nsMemoryReporterManager.h"
58 : #include "nsXULAppAPI.h"
59 : #include "nsProfilerStartParams.h"
60 : #include "ProfilerParent.h"
61 : #include "mozilla/Services.h"
62 : #include "nsThreadUtils.h"
63 : #include "ProfilerMarkerPayload.h"
64 : #include "shared-libraries.h"
65 : #include "prdtoa.h"
66 : #include "prtime.h"
67 :
68 : #ifdef MOZ_TASK_TRACER
69 : #include "GeckoTaskTracer.h"
70 : #endif
71 :
72 : #if defined(GP_OS_android)
73 : # include "FennecJNINatives.h"
74 : # include "FennecJNIWrappers.h"
75 : #endif
76 :
77 : #if defined(MOZ_PROFILING) && \
78 : (defined(GP_OS_windows) || defined(GP_OS_darwin))
79 : # define HAVE_NATIVE_UNWIND
80 : # define USE_NS_STACKWALK
81 : #endif
82 :
83 : // This should also work on ARM Linux, but not tested there yet.
84 : #if defined(GP_PLAT_arm_android)
85 : # define HAVE_NATIVE_UNWIND
86 : # define USE_EHABI_STACKWALK
87 : # include "EHABIStackWalk.h"
88 : #endif
89 :
90 : #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux)
91 : # define HAVE_NATIVE_UNWIND
92 : # define USE_LUL_STACKWALK
93 : # include "lul/LulMain.h"
94 : # include "lul/platform-linux-lul.h"
95 : #endif
96 :
97 : #ifdef MOZ_VALGRIND
98 : # include <valgrind/memcheck.h>
99 : #else
100 : # define VALGRIND_MAKE_MEM_DEFINED(_addr,_len) ((void)0)
101 : #endif
102 :
103 : #if defined(GP_OS_linux) || defined(GP_OS_android)
104 : #include <ucontext.h>
105 : #endif
106 :
107 : using namespace mozilla;
108 :
109 : LazyLogModule gProfilerLog("prof");
110 :
111 : #if defined(GP_OS_android)
112 : class GeckoJavaSampler : public java::GeckoJavaSampler::Natives<GeckoJavaSampler>
113 : {
114 : private:
115 : GeckoJavaSampler();
116 :
117 : public:
118 : static double GetProfilerTime() {
119 : if (!profiler_is_active()) {
120 : return 0.0;
121 : }
122 : return profiler_time();
123 : };
124 : };
125 : #endif
126 :
127 3 : class PSMutex : public StaticMutex {};
128 :
129 : typedef BaseAutoLock<PSMutex> PSAutoLock;
130 :
131 : // Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
132 : // fields.
133 : typedef const PSAutoLock& PSLockRef;
134 :
135 : #define PS_GET(type_, name_) \
136 : static type_ name_(PSLockRef) { return sInstance->m##name_; } \
137 :
138 : #define PS_GET_LOCKLESS(type_, name_) \
139 : static type_ name_() { return sInstance->m##name_; } \
140 :
141 : #define PS_GET_AND_SET(type_, name_) \
142 : PS_GET(type_, name_) \
143 : static void Set##name_(PSLockRef, type_ a##name_) \
144 : { sInstance->m##name_ = a##name_; }
145 :
146 : // All functions in this file can run on multiple threads unless they have an
147 : // NS_IsMainThread() assertion.
148 :
149 : // This class contains the profiler's core global state, i.e. that which is
150 : // valid even when the profiler is not active. Most profile operations can't do
151 : // anything useful when this class is not instantiated, so we release-assert
152 : // its non-nullness in all such operations.
153 : //
154 : // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
155 : // PSAutoLock reference as an argument as proof that the gPSMutex is currently
156 : // locked. This makes it clear when gPSMutex is locked and helps avoid
157 : // accidental unlocked accesses to global state. There are ways to circumvent
158 : // this mechanism, but please don't do so without *very* good reason and a
159 : // detailed explanation.
160 : //
161 : // The exceptions to this rule:
162 : //
163 : // - mProcessStartTime, because it's immutable;
164 : //
165 : // - each thread's RacyThreadInfo object is accessible without locking via
166 : // TLSInfo::RacyThreadInfo().
167 : class CorePS
168 : {
169 : private:
170 3 : CorePS()
171 3 : : mProcessStartTime(TimeStamp::ProcessCreation())
172 : #ifdef USE_LUL_STACKWALK
173 3 : , mLul(nullptr)
174 : #endif
175 3 : {}
176 :
177 0 : ~CorePS()
178 0 : {
179 0 : while (mLiveThreads.size() > 0) {
180 0 : delete mLiveThreads.back();
181 0 : mLiveThreads.pop_back();
182 : }
183 :
184 0 : while (mDeadThreads.size() > 0) {
185 0 : delete mDeadThreads.back();
186 0 : mDeadThreads.pop_back();
187 : }
188 0 : }
189 :
190 : public:
191 : typedef std::vector<ThreadInfo*> ThreadVector;
192 :
193 6 : static void Create(PSLockRef aLock) { sInstance = new CorePS(); }
194 :
195 0 : static void Destroy(PSLockRef aLock)
196 : {
197 0 : delete sInstance;
198 0 : sInstance = nullptr;
199 0 : }
200 :
201 : // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
202 : // being locked. This is because CorePS is instantiated so early on the main
203 : // thread that we don't have to worry about it being racy.
204 7275 : static bool Exists() { return !!sInstance; }
205 :
206 0 : static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
207 : size_t& aProfSize, size_t& aLulSize)
208 : {
209 0 : aProfSize += aMallocSizeOf(sInstance);
210 :
211 0 : for (uint32_t i = 0; i < sInstance->mLiveThreads.size(); i++) {
212 0 : aProfSize +=
213 0 : sInstance->mLiveThreads.at(i)->SizeOfIncludingThis(aMallocSizeOf);
214 : }
215 :
216 0 : for (uint32_t i = 0; i < sInstance->mDeadThreads.size(); i++) {
217 0 : aProfSize +=
218 0 : sInstance->mDeadThreads.at(i)->SizeOfIncludingThis(aMallocSizeOf);
219 : }
220 :
221 : // Measurement of the following things may be added later if DMD finds it
222 : // is worthwhile:
223 : // - CorePS::mLiveThreads itself (its elements' children are measured
224 : // above)
225 : // - CorePS::mDeadThreads itself (ditto)
226 : // - CorePS::mInterposeObserver
227 :
228 : #if defined(USE_LUL_STACKWALK)
229 0 : if (sInstance->mLul) {
230 0 : aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
231 : }
232 : #endif
233 0 : }
234 :
235 : // No PSLockRef is needed for this field because it's immutable.
236 0 : PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
237 :
238 152 : PS_GET(ThreadVector&, LiveThreads)
239 0 : PS_GET(ThreadVector&, DeadThreads)
240 :
241 : #ifdef USE_LUL_STACKWALK
242 0 : static lul::LUL* Lul(PSLockRef) { return sInstance->mLul.get(); }
243 0 : static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul)
244 : {
245 0 : sInstance->mLul = Move(aLul);
246 0 : }
247 : #endif
248 :
249 : private:
250 : // The singleton instance
251 : static CorePS* sInstance;
252 :
253 : // The time that the process started.
254 : const TimeStamp mProcessStartTime;
255 :
256 : // Info on all the registered threads, both live and dead. ThreadIds in
257 : // mLiveThreads are unique. ThreadIds in mDeadThreads may not be, because
258 : // ThreadIds can be reused. IsBeingProfiled() is true for all ThreadInfos in
259 : // mDeadThreads because we don't hold on to ThreadInfos for non-profiled dead
260 : // threads.
261 : ThreadVector mLiveThreads;
262 : ThreadVector mDeadThreads;
263 :
264 : #ifdef USE_LUL_STACKWALK
265 : // LUL's state. Null prior to the first activation, non-null thereafter.
266 : UniquePtr<lul::LUL> mLul;
267 : #endif
268 : };
269 :
270 : CorePS* CorePS::sInstance = nullptr;
271 :
272 : class SamplerThread;
273 :
274 : static SamplerThread*
275 : NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval);
276 :
277 : // This class contains the profiler's global state that is valid only when the
278 : // profiler is active. When not instantiated, the profiler is inactive.
279 : //
280 : // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
281 : // CorePS.
282 : //
283 : class ActivePS
284 : {
285 : private:
286 0 : static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount)
287 : {
288 : // Filter out any features unavailable in this platform/configuration.
289 0 : aFeatures &= profiler_get_available_features();
290 :
291 : #if defined(GP_OS_android)
292 : if (!jni::IsFennec()) {
293 : aFeatures &= ~ProfilerFeature::Java;
294 : }
295 : #endif
296 :
297 : // Always enable ProfilerFeature::Threads if we have a filter, because
298 : // users sometimes ask to filter by a list of threads but forget to
299 : // explicitly specify ProfilerFeature::Threads.
300 0 : if (aFilterCount > 0) {
301 0 : aFeatures |= ProfilerFeature::Threads;
302 : }
303 :
304 0 : return aFeatures;
305 : }
306 :
307 0 : ActivePS(PSLockRef aLock, int aEntries, double aInterval,
308 : uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount)
309 0 : : mGeneration(sNextGeneration++)
310 : , mEntries(aEntries)
311 : , mInterval(aInterval)
312 0 : , mFeatures(AdjustFeatures(aFeatures, aFilterCount))
313 : , mBuffer(MakeUnique<ProfileBuffer>(aEntries))
314 : // The new sampler thread doesn't start sampling immediately because the
315 : // main loop within Run() is blocked until this function's caller unlocks
316 : // gPSMutex.
317 0 : , mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval))
318 0 : , mInterposeObserver(ProfilerFeature::HasMainThreadIO(aFeatures)
319 0 : ? new ProfilerIOInterposeObserver()
320 : : nullptr)
321 : #undef HAS_FEATURE
322 : , mIsPaused(false)
323 : #if defined(GP_OS_linux)
324 0 : , mWasPaused(false)
325 : #endif
326 : {
327 : // Deep copy aFilters.
328 0 : MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
329 0 : for (uint32_t i = 0; i < aFilterCount; ++i) {
330 0 : mFilters[i] = aFilters[i];
331 : }
332 :
333 0 : if (mInterposeObserver) {
334 : // We need to register the observer on the main thread, because we want
335 : // to observe IO that happens on the main thread.
336 0 : if (NS_IsMainThread()) {
337 0 : IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
338 : } else {
339 0 : RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
340 0 : NS_DispatchToMainThread(
341 0 : NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
342 0 : IOInterposer::Register(IOInterposeObserver::OpAll, observer);
343 0 : }));
344 : }
345 : }
346 0 : }
347 :
348 0 : ~ActivePS()
349 0 : {
350 0 : if (mInterposeObserver) {
351 : // We need to unregister the observer on the main thread, because that's
352 : // where we've registered it.
353 0 : if (NS_IsMainThread()) {
354 0 : IOInterposer::Unregister(IOInterposeObserver::OpAll, mInterposeObserver);
355 : } else {
356 0 : RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
357 0 : NS_DispatchToMainThread(
358 0 : NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
359 0 : IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
360 0 : }));
361 : }
362 : }
363 0 : }
364 :
365 0 : bool ThreadSelected(const char* aThreadName)
366 : {
367 0 : MOZ_RELEASE_ASSERT(sInstance);
368 :
369 0 : if (mFilters.empty()) {
370 0 : return true;
371 : }
372 :
373 0 : std::string name = aThreadName;
374 0 : std::transform(name.begin(), name.end(), name.begin(), ::tolower);
375 :
376 0 : for (uint32_t i = 0; i < mFilters.length(); ++i) {
377 0 : std::string filter = mFilters[i];
378 0 : std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
379 :
380 : // Crude, non UTF-8 compatible, case insensitive substring search
381 0 : if (name.find(filter) != std::string::npos) {
382 0 : return true;
383 : }
384 : }
385 :
386 0 : return false;
387 : }
388 :
389 : public:
390 0 : static void Create(PSLockRef aLock, int aEntries, double aInterval,
391 : uint32_t aFeatures,
392 : const char** aFilters, uint32_t aFilterCount)
393 : {
394 0 : sInstance = new ActivePS(aLock, aEntries, aInterval, aFeatures,
395 0 : aFilters, aFilterCount);
396 0 : }
397 :
398 0 : static MOZ_MUST_USE SamplerThread* Destroy(PSLockRef aLock)
399 : {
400 0 : auto samplerThread = sInstance->mSamplerThread;
401 0 : delete sInstance;
402 0 : sInstance = nullptr;
403 :
404 0 : return samplerThread;
405 : }
406 :
407 167 : static bool Exists(PSLockRef) { return !!sInstance; }
408 :
409 0 : static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf)
410 : {
411 0 : size_t n = aMallocSizeOf(sInstance);
412 :
413 0 : n += sInstance->mBuffer->SizeOfIncludingThis(aMallocSizeOf);
414 :
415 0 : return n;
416 : }
417 :
418 0 : static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo)
419 : {
420 0 : MOZ_RELEASE_ASSERT(sInstance);
421 :
422 0 : return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
423 0 : sInstance->ThreadSelected(aInfo->Name()));
424 : }
425 :
426 0 : PS_GET(uint32_t, Generation)
427 :
428 0 : PS_GET(int, Entries)
429 :
430 0 : PS_GET(double, Interval)
431 :
432 0 : PS_GET(uint32_t, Features)
433 :
434 : #define PS_GET_FEATURE(n_, str_, Name_) \
435 : static bool Feature##Name_(PSLockRef) \
436 : { \
437 : return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
438 : }
439 :
440 0 : PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
441 :
442 : #undef PS_GET_FEATURE
443 :
444 0 : PS_GET(const Vector<std::string>&, Filters)
445 :
446 0 : static ProfileBuffer& Buffer(PSLockRef) { return *sInstance->mBuffer.get(); }
447 :
448 0 : PS_GET_AND_SET(bool, IsPaused)
449 :
450 : #if defined(GP_OS_linux)
451 0 : PS_GET_AND_SET(bool, WasPaused)
452 : #endif
453 :
454 : private:
455 : // The singleton instance.
456 : static ActivePS* sInstance;
457 :
458 : // We need to track activity generations. If we didn't we could have the
459 : // following scenario.
460 : //
461 : // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
462 : // gPSMutex, deletes the SamplerThread (which does a join).
463 : //
464 : // - profiler_start() runs on a different thread, locks gPSMutex,
465 : // re-instantiates ActivePS, unlocks gPSMutex -- all before the join
466 : // completes.
467 : //
468 : // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
469 : // and continues as if the start/stop pair didn't occur. Also
470 : // profiler_stop() is stuck, unable to finish.
471 : //
472 : // By checking ActivePS *and* the generation, we can avoid this scenario.
473 : // sNextGeneration is used to track the next generation number; it is static
474 : // because it must persist across different ActivePS instantiations.
475 : const uint32_t mGeneration;
476 : static uint32_t sNextGeneration;
477 :
478 : // The number of entries in mBuffer.
479 : const int mEntries;
480 :
481 : // The interval between samples, measured in milliseconds.
482 : const double mInterval;
483 :
484 : // The profile features that are enabled.
485 : const uint32_t mFeatures;
486 :
487 : // Substrings of names of threads we want to profile.
488 : Vector<std::string> mFilters;
489 :
490 : // The buffer into which all samples are recorded. Always non-null. Always
491 : // used in conjunction with CorePS::m{Live,Dead}Threads.
492 : const UniquePtr<ProfileBuffer> mBuffer;
493 :
494 : // The current sampler thread. This class is not responsible for destroying
495 : // the SamplerThread object; the Destroy() method returns it so the caller
496 : // can destroy it.
497 : SamplerThread* const mSamplerThread;
498 :
499 : // The interposer that records main thread I/O.
500 : const RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;
501 :
502 : // Is the profiler paused?
503 : bool mIsPaused;
504 :
505 : #if defined(GP_OS_linux)
506 : // Used to record whether the profiler was paused just before forking. False
507 : // at all times except just before/after forking.
508 : bool mWasPaused;
509 : #endif
510 : };
511 :
512 : ActivePS* ActivePS::sInstance = nullptr;
513 : uint32_t ActivePS::sNextGeneration = 0;
514 :
515 : #undef PS_GET
516 : #undef PS_GET_LOCKLESS
517 : #undef PS_GET_AND_SET
518 :
519 : // The mutex that guards accesses to CorePS and ActivePS.
520 3 : static PSMutex gPSMutex;
521 :
522 : // The preferred way to check profiler activeness and features is via
523 : // ActivePS(). However, that requires locking gPSMutex. There are some hot
524 : // operations where absolute precision isn't required, so we duplicate the
525 : // activeness/feature state in a lock-free manner in this class.
526 : class RacyFeatures
527 : {
528 : public:
529 0 : static void SetActive(uint32_t aFeatures)
530 : {
531 0 : sActiveAndFeatures = Active | aFeatures;
532 0 : }
533 :
534 0 : static void SetInactive() { sActiveAndFeatures = 0; }
535 :
536 541 : static bool IsActive() { return uint32_t(sActiveAndFeatures) & Active; }
537 :
538 352 : static bool IsActiveWithFeature(uint32_t aFeature)
539 : {
540 352 : uint32_t af = sActiveAndFeatures; // copy it first
541 352 : return (af & Active) && (af & aFeature);
542 : }
543 :
544 808 : static bool IsActiveWithoutPrivacy()
545 : {
546 808 : uint32_t af = sActiveAndFeatures; // copy it first
547 808 : return (af & Active) && !(af & ProfilerFeature::Privacy);
548 : }
549 :
550 : private:
551 : static const uint32_t Active = 1u << 31;
552 :
553 : // Ensure Active doesn't overlap with any of the feature bits.
554 : #define NO_OVERLAP(n_, str_, Name_) \
555 : static_assert(ProfilerFeature::Name_ != Active, "bad Active value");
556 :
557 : PROFILER_FOR_EACH_FEATURE(NO_OVERLAP);
558 :
559 : #undef NO_OVERLAP
560 :
561 : // We combine the active bit with the feature bits so they can be read or
562 : // written in a single atomic operation.
563 : static Atomic<uint32_t> sActiveAndFeatures;
564 : };
565 :
566 : Atomic<uint32_t> RacyFeatures::sActiveAndFeatures(0);
567 :
568 : // Each live thread has a ThreadInfo, and we store a reference to it in TLS.
569 : // This class encapsulates that TLS.
570 : class TLSInfo
571 : {
572 : public:
573 75 : static bool Init(PSLockRef)
574 : {
575 75 : bool ok1 = sThreadInfo.init();
576 75 : bool ok2 = AutoProfilerLabel::sPseudoStack.init();
577 75 : return ok1 && ok2;
578 : }
579 :
580 : // Get the entire ThreadInfo. Accesses are guarded by gPSMutex.
581 32 : static ThreadInfo* Info(PSLockRef) { return sThreadInfo.get(); }
582 :
583 : // Get only the RacyThreadInfo. Accesses are not guarded by gPSMutex.
584 5308 : static RacyThreadInfo* RacyInfo()
585 : {
586 5308 : ThreadInfo* info = sThreadInfo.get();
587 5309 : return info ? info->RacyInfo().get() : nullptr;
588 : }
589 :
590 : // Get only the PseudoStack. Accesses are not guarded by gPSMutex. RacyInfo()
591 : // can also be used to get the PseudoStack, but that is marginally slower
592 : // because it requires an extra pointer indirection.
593 0 : static PseudoStack* Stack() { return AutoProfilerLabel::sPseudoStack.get(); }
594 :
595 76 : static void SetInfo(PSLockRef, ThreadInfo* aInfo)
596 : {
597 76 : sThreadInfo.set(aInfo);
598 226 : AutoProfilerLabel::sPseudoStack.set(
599 226 : aInfo ? aInfo->RacyInfo().get() : nullptr); // an upcast
600 76 : }
601 :
602 : private:
603 : // This is a non-owning reference to the ThreadInfo; CorePS::mLiveThreads is
604 : // the owning reference. On thread destruction, this reference is cleared and
605 : // the ThreadInfo is destroyed or transferred to CorePS::mDeadThreads.
606 : static MOZ_THREAD_LOCAL(ThreadInfo*) sThreadInfo;
607 : };
608 :
609 : MOZ_THREAD_LOCAL(ThreadInfo*) TLSInfo::sThreadInfo;
610 :
611 : // Although you can access a thread's PseudoStack via TLSInfo::sThreadInfo, we
612 : // also have a second TLS pointer directly to the PseudoStack. Here's why.
613 : //
614 : // - We need to be able to push to and pop from the PseudoStack in
615 : // AutoProfilerLabel.
616 : //
617 : // - The class functions are hot and must be defined in GeckoProfiler.h so they
618 : // can be inlined.
619 : //
620 : // - We don't want to expose TLSInfo (and ThreadInfo) in GeckoProfiler.h.
621 : //
622 : // This second pointer isn't ideal, but does provide a way to satisfy those
623 : // constraints. TLSInfo is responsible for updating it.
624 : MOZ_THREAD_LOCAL(PseudoStack*) AutoProfilerLabel::sPseudoStack;
625 :
626 : // The name of the main thread.
627 : static const char* const kMainThreadName = "GeckoMain";
628 :
629 : ////////////////////////////////////////////////////////////////////////
630 : // BEGIN sampling/unwinding code
631 :
632 : // The registers used for stack unwinding and a few other sampling purposes.
633 : // The ctor does nothing; users are responsible for filling in the fields.
634 : class Registers
635 : {
636 : public:
637 0 : Registers() {}
638 :
639 : #if defined(HAVE_NATIVE_UNWIND)
640 : // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
641 : void SyncPopulate();
642 : #endif
643 :
644 : void Clear() { memset(this, 0, sizeof(*this)); }
645 :
646 : // These fields are filled in by
647 : // SamplerThread::SuspendAndSampleAndResumeThread() for periodic and
648 : // backtrace samples, and by SyncPopulate() for synchronous samples.
649 : Address mPC; // Instruction pointer.
650 : Address mSP; // Stack pointer.
651 : Address mFP; // Frame pointer.
652 : Address mLR; // ARM link register.
653 : #if defined(GP_OS_linux) || defined(GP_OS_android)
654 : // This contains all the registers, which means it duplicates the four fields
655 : // above. This is ok.
656 : ucontext_t* mContext; // The context from the signal handler.
657 : #endif
658 : };
659 :
660 : static void
661 0 : AddPseudoEntry(PSLockRef aLock, NotNull<RacyThreadInfo*> aRacyInfo,
662 : const js::ProfileEntry& entry, ProfileBuffer& aBuffer)
663 : {
664 : // WARNING: this function runs within the profiler's "critical section".
665 :
666 0 : MOZ_ASSERT(entry.kind() == js::ProfileEntry::Kind::CPP_NORMAL ||
667 : entry.kind() == js::ProfileEntry::Kind::JS_NORMAL);
668 :
669 0 : aBuffer.AddEntry(ProfileBufferEntry::Label(entry.label()));
670 :
671 0 : const char* dynamicString = entry.dynamicString();
672 0 : int lineno = -1;
673 :
674 : // XXX: it's unclear why the computation of lineno should depend on
675 : // |dynamicString|. Perhaps it shouldn't?
676 :
677 0 : if (dynamicString) {
678 : // Adjust the dynamic string as necessary.
679 0 : if (ActivePS::FeaturePrivacy(aLock)) {
680 0 : dynamicString = "(private)";
681 0 : } else if (strlen(dynamicString) >= ProfileBuffer::kMaxFrameKeyLength) {
682 0 : dynamicString = "(too long)";
683 : }
684 :
685 : // Store the string using one or more DynamicStringFragment entries.
686 0 : aBuffer.AddDynamicStringEntry(dynamicString);
687 0 : if (entry.isJs()) {
688 0 : JSScript* script = entry.script();
689 0 : if (script) {
690 0 : if (!entry.pc()) {
691 : // The JIT only allows the top-most entry to have a nullptr pc.
692 0 : MOZ_ASSERT(&entry == &aRacyInfo->entries[aRacyInfo->stackSize() - 1]);
693 : } else {
694 0 : lineno = JS_PCToLineNumber(script, entry.pc());
695 : }
696 : }
697 : } else {
698 0 : lineno = entry.line();
699 : }
700 : } else {
701 : // XXX: Bug 1010578. Don't assume a CPP entry and try to get the line for
702 : // js entries as well.
703 0 : if (entry.isCpp()) {
704 0 : lineno = entry.line();
705 : }
706 : }
707 :
708 0 : if (lineno != -1) {
709 0 : aBuffer.AddEntry(ProfileBufferEntry::LineNumber(lineno));
710 : }
711 :
712 0 : aBuffer.AddEntry(ProfileBufferEntry::Category(int(entry.category())));
713 0 : }
714 :
715 : // Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
716 : // looping on corrupted stacks.
717 : //
718 : // The PseudoStack frame size is found in PseudoStack::MaxEntries.
719 : static const size_t MAX_NATIVE_FRAMES = 1024;
720 : static const size_t MAX_JS_FRAMES = 1024;
721 :
722 : struct NativeStack
723 : {
724 : void* mPCs[MAX_NATIVE_FRAMES];
725 : void* mSPs[MAX_NATIVE_FRAMES];
726 : size_t mCount; // Number of entries filled.
727 :
728 0 : NativeStack()
729 0 : : mCount(0)
730 0 : {}
731 : };
732 :
733 : Atomic<bool> WALKING_JS_STACK(false);
734 :
735 : struct AutoWalkJSStack
736 : {
737 : bool walkAllowed;
738 :
739 0 : AutoWalkJSStack() : walkAllowed(false) {
740 0 : walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
741 0 : }
742 :
743 0 : ~AutoWalkJSStack() {
744 0 : if (walkAllowed) {
745 0 : WALKING_JS_STACK = false;
746 : }
747 0 : }
748 : };
749 :
750 : static void
751 0 : MergeStacksIntoProfile(PSLockRef aLock, bool aIsSynchronous,
752 : const ThreadInfo& aThreadInfo, const Registers& aRegs,
753 : const NativeStack& aNativeStack, ProfileBuffer& aBuffer)
754 : {
755 : // WARNING: this function runs within the profiler's "critical section".
756 :
757 0 : NotNull<RacyThreadInfo*> racyInfo = aThreadInfo.RacyInfo();
758 0 : js::ProfileEntry* pseudoEntries = racyInfo->entries;
759 0 : uint32_t pseudoCount = racyInfo->stackSize();
760 0 : JSContext* context = aThreadInfo.mContext;
761 :
762 : // Make a copy of the JS stack into a JSFrame array. This is necessary since,
763 : // like the native stack, the JS stack is iterated youngest-to-oldest and we
764 : // need to iterate oldest-to-youngest when adding entries to aInfo.
765 :
766 : // Synchronous sampling reports an invalid buffer generation to
767 : // ProfilingFrameIterator to avoid incorrectly resetting the generation of
768 : // sampled JIT entries inside the JS engine. See note below concerning 'J'
769 : // entries.
770 : uint32_t startBufferGen;
771 0 : startBufferGen = aIsSynchronous
772 0 : ? UINT32_MAX
773 : : aBuffer.mGeneration;
774 0 : uint32_t jsCount = 0;
775 : JS::ProfilingFrameIterator::Frame jsFrames[MAX_JS_FRAMES];
776 :
777 : // Only walk jit stack if profiling frame iterator is turned on.
778 0 : if (context && JS::IsProfilingEnabledForContext(context)) {
779 0 : AutoWalkJSStack autoWalkJSStack;
780 0 : const uint32_t maxFrames = ArrayLength(jsFrames);
781 :
782 0 : if (autoWalkJSStack.walkAllowed) {
783 0 : JS::ProfilingFrameIterator::RegisterState registerState;
784 0 : registerState.pc = aRegs.mPC;
785 0 : registerState.sp = aRegs.mSP;
786 0 : registerState.lr = aRegs.mLR;
787 0 : registerState.fp = aRegs.mFP;
788 :
789 : JS::ProfilingFrameIterator jsIter(context, registerState,
790 0 : startBufferGen);
791 0 : for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) {
792 : // See note below regarding 'J' entries.
793 0 : if (aIsSynchronous || jsIter.isWasm()) {
794 : uint32_t extracted =
795 0 : jsIter.extractStack(jsFrames, jsCount, maxFrames);
796 0 : jsCount += extracted;
797 0 : if (jsCount == maxFrames) {
798 0 : break;
799 : }
800 : } else {
801 : Maybe<JS::ProfilingFrameIterator::Frame> frame =
802 0 : jsIter.getPhysicalFrameWithoutLabel();
803 0 : if (frame.isSome()) {
804 0 : jsFrames[jsCount++] = frame.value();
805 : }
806 : }
807 : }
808 : }
809 : }
810 :
811 : // While the pseudo-stack array is ordered oldest-to-youngest, the JS and
812 : // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
813 : // oldest-to-youngest. Thus, iterate over the pseudo-stack forwards and JS
814 : // and native arrays backwards. Note: this means the terminating condition
815 : // jsIndex and nativeIndex is being < 0.
816 0 : uint32_t pseudoIndex = 0;
817 0 : int32_t jsIndex = jsCount - 1;
818 0 : int32_t nativeIndex = aNativeStack.mCount - 1;
819 :
820 0 : uint8_t* lastPseudoCppStackAddr = nullptr;
821 :
822 : // Iterate as long as there is at least one frame remaining.
823 0 : while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) {
824 : // There are 1 to 3 frames available. Find and add the oldest.
825 0 : uint8_t* pseudoStackAddr = nullptr;
826 0 : uint8_t* jsStackAddr = nullptr;
827 0 : uint8_t* nativeStackAddr = nullptr;
828 :
829 0 : if (pseudoIndex != pseudoCount) {
830 0 : js::ProfileEntry& pseudoEntry = pseudoEntries[pseudoIndex];
831 :
832 0 : if (pseudoEntry.isCpp()) {
833 0 : lastPseudoCppStackAddr = (uint8_t*) pseudoEntry.stackAddress();
834 : }
835 :
836 : // Skip any JS_OSR frames. Such frames are used when the JS interpreter
837 : // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
838 : // To avoid both the pseudoframe and jit frame being recorded (and
839 : // showing up twice), the interpreter marks the interpreter pseudostack
840 : // frame as JS_OSR to ensure that it doesn't get counted.
841 0 : if (pseudoEntry.kind() == js::ProfileEntry::Kind::JS_OSR) {
842 0 : pseudoIndex++;
843 0 : continue;
844 : }
845 :
846 0 : MOZ_ASSERT(lastPseudoCppStackAddr);
847 0 : pseudoStackAddr = lastPseudoCppStackAddr;
848 : }
849 :
850 0 : if (jsIndex >= 0) {
851 0 : jsStackAddr = (uint8_t*) jsFrames[jsIndex].stackAddress;
852 : }
853 :
854 0 : if (nativeIndex >= 0) {
855 0 : nativeStackAddr = (uint8_t*) aNativeStack.mSPs[nativeIndex];
856 : }
857 :
858 : // If there's a native stack entry which has the same SP as a pseudo stack
859 : // entry, pretend we didn't see the native stack entry. Ditto for a native
860 : // stack entry which has the same SP as a JS stack entry. In effect this
861 : // means pseudo or JS entries trump conflicting native entries.
862 0 : if (nativeStackAddr && (pseudoStackAddr == nativeStackAddr ||
863 : jsStackAddr == nativeStackAddr)) {
864 0 : nativeStackAddr = nullptr;
865 0 : nativeIndex--;
866 0 : MOZ_ASSERT(pseudoStackAddr || jsStackAddr);
867 : }
868 :
869 : // Sanity checks.
870 0 : MOZ_ASSERT_IF(pseudoStackAddr, pseudoStackAddr != jsStackAddr &&
871 : pseudoStackAddr != nativeStackAddr);
872 0 : MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != pseudoStackAddr &&
873 : jsStackAddr != nativeStackAddr);
874 0 : MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != pseudoStackAddr &&
875 : nativeStackAddr != jsStackAddr);
876 :
877 : // Check to see if pseudoStack frame is top-most.
878 0 : if (pseudoStackAddr > jsStackAddr && pseudoStackAddr > nativeStackAddr) {
879 0 : MOZ_ASSERT(pseudoIndex < pseudoCount);
880 0 : js::ProfileEntry& pseudoEntry = pseudoEntries[pseudoIndex];
881 :
882 : // Pseudo-frames with the CPP_MARKER_FOR_JS kind are just annotations and
883 : // should not be recorded in the profile.
884 0 : if (pseudoEntry.kind() != js::ProfileEntry::Kind::CPP_MARKER_FOR_JS) {
885 0 : AddPseudoEntry(aLock, racyInfo, pseudoEntry, aBuffer);
886 : }
887 0 : pseudoIndex++;
888 0 : continue;
889 : }
890 :
891 : // Check to see if JS jit stack frame is top-most
892 0 : if (jsStackAddr > nativeStackAddr) {
893 0 : MOZ_ASSERT(jsIndex >= 0);
894 0 : const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex];
895 :
896 : // Stringifying non-wasm JIT frames is delayed until streaming time. To
897 : // re-lookup the entry in the JitcodeGlobalTable, we need to store the
898 : // JIT code address (OptInfoAddr) in the circular buffer.
899 : //
900 : // Note that we cannot do this when we are sychronously sampling the
901 : // current thread; that is, when called from profiler_get_backtrace. The
902 : // captured backtrace is usually externally stored for an indeterminate
903 : // amount of time, such as in nsRefreshDriver. Problematically, the
904 : // stored backtrace may be alive across a GC during which the profiler
905 : // itself is disabled. In that case, the JS engine is free to discard its
906 : // JIT code. This means that if we inserted such OptInfoAddr entries into
907 : // the buffer, nsRefreshDriver would now be holding on to a backtrace
908 : // with stale JIT code return addresses.
909 0 : if (aIsSynchronous ||
910 0 : jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
911 0 : aBuffer.AddEntry(ProfileBufferEntry::Label(""));
912 0 : aBuffer.AddDynamicStringEntry(jsFrame.label);
913 : } else {
914 0 : MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
915 : jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
916 : aBuffer.AddEntry(
917 0 : ProfileBufferEntry::JitReturnAddr(jsFrames[jsIndex].returnAddress));
918 : }
919 :
920 0 : jsIndex--;
921 0 : continue;
922 : }
923 :
924 : // If we reach here, there must be a native stack entry and it must be the
925 : // greatest entry.
926 0 : if (nativeStackAddr) {
927 0 : MOZ_ASSERT(nativeIndex >= 0);
928 0 : void* addr = (void*)aNativeStack.mPCs[nativeIndex];
929 0 : aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr(addr));
930 : }
931 0 : if (nativeIndex >= 0) {
932 0 : nativeIndex--;
933 : }
934 : }
935 :
936 : // Update the JS context with the current profile sample buffer generation.
937 : //
938 : // Do not do this for synchronous samples, which use their own
939 : // ProfileBuffers instead of the global one in CorePS.
940 0 : if (!aIsSynchronous && context) {
941 0 : MOZ_ASSERT(aBuffer.mGeneration >= startBufferGen);
942 0 : uint32_t lapCount = aBuffer.mGeneration - startBufferGen;
943 0 : JS::UpdateJSContextProfilerSampleBufferGen(context, aBuffer.mGeneration,
944 0 : lapCount);
945 : }
946 0 : }
947 :
948 : #if defined(GP_OS_windows)
949 : static uintptr_t GetThreadHandle(PlatformData* aData);
950 : #endif
951 :
952 : #ifdef USE_NS_STACKWALK
953 : static void
954 : StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
955 : {
956 : NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
957 : MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
958 : nativeStack->mSPs[nativeStack->mCount] = aSP;
959 : nativeStack->mPCs[nativeStack->mCount] = aPC;
960 : nativeStack->mCount++;
961 : }
962 :
963 : static void
964 : DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
965 : const Registers& aRegs, NativeStack& aNativeStack)
966 : {
967 : // WARNING: this function runs within the profiler's "critical section".
968 :
969 : // Start with the current function. We use 0 as the frame number here because
970 : // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
971 : // This is a bit weird but it doesn't matter because StackWalkCallback()
972 : // doesn't use the frame number argument.
973 : StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
974 :
975 : uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
976 :
977 : #if defined(GP_OS_darwin) || (defined(GP_PLAT_x86_windows))
978 : void* stackEnd = aThreadInfo.StackTop();
979 : if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
980 : FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
981 : &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
982 : stackEnd);
983 : }
984 : #else
985 : // Win64 always omits frame pointers so for it we use the slower
986 : // MozStackWalk().
987 : uintptr_t thread = GetThreadHandle(aThreadInfo.GetPlatformData());
988 : MOZ_ASSERT(thread);
989 : MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &aNativeStack,
990 : thread, /* platformData */ nullptr);
991 : #endif
992 : }
993 : #endif
994 :
995 : #ifdef USE_EHABI_STACKWALK
996 : static void
997 : DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
998 : const Registers& aRegs, NativeStack& aNativeStack)
999 : {
1000 : // WARNING: this function runs within the profiler's "critical section".
1001 :
1002 : const mcontext_t* mcontext = &aRegs.mContext->uc_mcontext;
1003 : mcontext_t savedContext;
1004 : NotNull<RacyThreadInfo*> racyInfo = aThreadInfo.RacyInfo();
1005 :
1006 : // The pseudostack contains an "EnterJIT" frame whenever we enter
1007 : // JIT code with profiling enabled; the stack pointer value points
1008 : // the saved registers. We use this to unwind resume unwinding
1009 : // after encounting JIT code.
1010 : for (uint32_t i = racyInfo->stackSize(); i > 0; --i) {
1011 : // The pseudostack grows towards higher indices, so we iterate
1012 : // backwards (from callee to caller).
1013 : js::ProfileEntry& entry = racyInfo->entries[i - 1];
1014 : if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) {
1015 : // Found JIT entry frame. Unwind up to that point (i.e., force
1016 : // the stack walk to stop before the block of saved registers;
1017 : // note that it yields nondecreasing stack pointers), then restore
1018 : // the saved state.
1019 : uint32_t* vSP = reinterpret_cast<uint32_t*>(entry.stackAddress());
1020 :
1021 : aNativeStack.mCount +=
1022 : EHABIStackWalk(*mcontext, /* stackBase = */ vSP,
1023 : aNativeStack.mSPs + aNativeStack.mCount,
1024 : aNativeStack.mPCs + aNativeStack.mCount,
1025 : MAX_NATIVE_FRAMES - aNativeStack.mCount);
1026 :
1027 : memset(&savedContext, 0, sizeof(savedContext));
1028 :
1029 : // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
1030 : savedContext.arm_r4 = *vSP++;
1031 : savedContext.arm_r5 = *vSP++;
1032 : savedContext.arm_r6 = *vSP++;
1033 : savedContext.arm_r7 = *vSP++;
1034 : savedContext.arm_r8 = *vSP++;
1035 : savedContext.arm_r9 = *vSP++;
1036 : savedContext.arm_r10 = *vSP++;
1037 : savedContext.arm_fp = *vSP++;
1038 : savedContext.arm_lr = *vSP++;
1039 : savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP);
1040 : savedContext.arm_pc = savedContext.arm_lr;
1041 : mcontext = &savedContext;
1042 : }
1043 : }
1044 :
1045 : // Now unwind whatever's left (starting from either the last EnterJIT frame
1046 : // or, if no EnterJIT was found, the original registers).
1047 : aNativeStack.mCount +=
1048 : EHABIStackWalk(*mcontext, aThreadInfo.StackTop(),
1049 : aNativeStack.mSPs + aNativeStack.mCount,
1050 : aNativeStack.mPCs + aNativeStack.mCount,
1051 : MAX_NATIVE_FRAMES - aNativeStack.mCount);
1052 : }
1053 : #endif
1054 :
1055 : #ifdef USE_LUL_STACKWALK
1056 :
1057 : // See the comment at the callsite for why this function is necessary.
1058 : #if defined(MOZ_HAVE_ASAN_BLACKLIST)
1059 : MOZ_ASAN_BLACKLIST static void
1060 : ASAN_memcpy(void* aDst, const void* aSrc, size_t aLen)
1061 : {
1062 : // The obvious thing to do here is call memcpy(). However, although
1063 : // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
1064 : // false positive still manifests! So we must implement memcpy() ourselves
1065 : // within this function.
1066 : char* dst = static_cast<char*>(aDst);
1067 : const char* src = static_cast<const char*>(aSrc);
1068 :
1069 : for (size_t i = 0; i < aLen; i++) {
1070 : dst[i] = src[i];
1071 : }
1072 : }
1073 : #endif
1074 :
1075 : static void
1076 0 : DoNativeBacktrace(PSLockRef aLock, const ThreadInfo& aThreadInfo,
1077 : const Registers& aRegs, NativeStack& aNativeStack)
1078 : {
1079 : // WARNING: this function runs within the profiler's "critical section".
1080 :
1081 0 : const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
1082 :
1083 0 : lul::UnwindRegs startRegs;
1084 0 : memset(&startRegs, 0, sizeof(startRegs));
1085 :
1086 : #if defined(GP_PLAT_amd64_linux)
1087 0 : startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
1088 0 : startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
1089 0 : startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
1090 : #elif defined(GP_PLAT_arm_android)
1091 : startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
1092 : startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
1093 : startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
1094 : startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
1095 : startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
1096 : startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
1097 : #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1098 : startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
1099 : startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
1100 : startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
1101 : #else
1102 : # error "Unknown plat"
1103 : #endif
1104 :
1105 : // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
1106 : // stack's registered top point. Do some basic sanity checks too. This
1107 : // assumes that the TaggedUWord holding the stack pointer value is valid, but
1108 : // it should be, since it was constructed that way in the code just above.
1109 :
1110 : // We could construct |stackImg| so that LUL reads directly from the stack in
1111 : // question, rather than from a copy of it. That would reduce overhead and
1112 : // space use a bit. However, it gives a problem with dynamic analysis tools
1113 : // (ASan, TSan, Valgrind) which is that such tools will report invalid or
1114 : // racing memory accesses, and such accesses will be reported deep inside LUL.
1115 : // By taking a copy here, we can either sanitise the copy (for Valgrind) or
1116 : // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
1117 : // to try and suppress errors inside LUL.
1118 : //
1119 : // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
1120 : // observed in some minutes of testing, whilst keeping the size of this
1121 : // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
1122 : // practice are small, 4KB or less, and so the copy costs are insignificant
1123 : // compared to other profiler overhead.
1124 : //
1125 : // |stackImg| is allocated on this (the sampling thread's) stack. That
1126 : // implies that the frame for this function is at least N_STACK_BYTES large.
1127 : // In general it would be considered unacceptable to have such a large frame
1128 : // on a stack, but it only exists for the unwinder thread, and so is not
1129 : // expected to be a problem. Allocating it on the heap is troublesome because
1130 : // this function runs whilst the sampled thread is suspended, so any heap
1131 : // allocation risks deadlock. Allocating it as a global variable is not
1132 : // thread safe, which would be a problem if we ever allow multiple sampler
1133 : // threads. Hence allocating it on the stack seems to be the least-worst
1134 : // option.
1135 :
1136 : lul::StackImage stackImg;
1137 :
1138 : {
1139 : #if defined(GP_PLAT_amd64_linux)
1140 0 : uintptr_t rEDZONE_SIZE = 128;
1141 0 : uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1142 : #elif defined(GP_PLAT_arm_android)
1143 : uintptr_t rEDZONE_SIZE = 0;
1144 : uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
1145 : #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1146 : uintptr_t rEDZONE_SIZE = 0;
1147 : uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1148 : #else
1149 : # error "Unknown plat"
1150 : #endif
1151 0 : uintptr_t end = reinterpret_cast<uintptr_t>(aThreadInfo.StackTop());
1152 0 : uintptr_t ws = sizeof(void*);
1153 0 : start &= ~(ws-1);
1154 0 : end &= ~(ws-1);
1155 0 : uintptr_t nToCopy = 0;
1156 0 : if (start < end) {
1157 0 : nToCopy = end - start;
1158 0 : if (nToCopy > lul::N_STACK_BYTES)
1159 0 : nToCopy = lul::N_STACK_BYTES;
1160 : }
1161 0 : MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1162 0 : stackImg.mLen = nToCopy;
1163 0 : stackImg.mStartAvma = start;
1164 0 : if (nToCopy > 0) {
1165 : // If this is a vanilla memcpy(), ASAN makes the following complaint:
1166 : //
1167 : // ERROR: AddressSanitizer: stack-buffer-underflow ...
1168 : // ...
1169 : // HINT: this may be a false positive if your program uses some custom
1170 : // stack unwind mechanism or swapcontext
1171 : //
1172 : // This code is very much a custom stack unwind mechanism! So we use an
1173 : // alternative memcpy() implementation that is ignored by ASAN.
1174 : #if defined(MOZ_HAVE_ASAN_BLACKLIST)
1175 : ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1176 : #else
1177 0 : memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1178 : #endif
1179 : (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
1180 : }
1181 : }
1182 :
1183 0 : size_t framePointerFramesAcquired = 0;
1184 0 : lul::LUL* lul = CorePS::Lul(aLock);
1185 0 : lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
1186 : reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
1187 : &aNativeStack.mCount, &framePointerFramesAcquired,
1188 0 : MAX_NATIVE_FRAMES, &startRegs, &stackImg);
1189 :
1190 : // Update stats in the LUL stats object. Unfortunately this requires
1191 : // three global memory operations.
1192 0 : lul->mStats.mContext += 1;
1193 0 : lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
1194 0 : lul->mStats.mFP += framePointerFramesAcquired;
1195 0 : }
1196 :
1197 : #endif
1198 :
1199 : // Writes some components shared by periodic and synchronous profiles to
1200 : // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
1201 : // and DoPeriodicSample().)
1202 : //
1203 : // The grammar for entry sequences is in a comment above
1204 : // ProfileBuffer::StreamSamplesToJSON.
1205 : static inline void
1206 0 : DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
1207 : ThreadInfo& aThreadInfo, const TimeStamp& aNow,
1208 : const Registers& aRegs, ProfileBuffer::LastSample* aLS,
1209 : ProfileBuffer& aBuffer)
1210 : {
1211 : // WARNING: this function runs within the profiler's "critical section".
1212 :
1213 0 : MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
1214 :
1215 0 : aBuffer.AddThreadIdEntry(aThreadInfo.ThreadId(), aLS);
1216 :
1217 0 : TimeDuration delta = aNow - CorePS::ProcessStartTime();
1218 0 : aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
1219 :
1220 0 : NativeStack nativeStack;
1221 : #if defined(HAVE_NATIVE_UNWIND)
1222 0 : if (ActivePS::FeatureStackWalk(aLock)) {
1223 0 : DoNativeBacktrace(aLock, aThreadInfo, aRegs, nativeStack);
1224 :
1225 0 : MergeStacksIntoProfile(aLock, aIsSynchronous, aThreadInfo, aRegs,
1226 0 : nativeStack, aBuffer);
1227 : } else
1228 : #endif
1229 : {
1230 0 : MergeStacksIntoProfile(aLock, aIsSynchronous, aThreadInfo, aRegs,
1231 0 : nativeStack, aBuffer);
1232 :
1233 0 : if (ActivePS::FeatureLeaf(aLock)) {
1234 0 : aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
1235 : }
1236 : }
1237 0 : }
1238 :
1239 : // Writes the components of a synchronous sample to the given ProfileBuffer.
1240 : static void
1241 0 : DoSyncSample(PSLockRef aLock, ThreadInfo& aThreadInfo, const TimeStamp& aNow,
1242 : const Registers& aRegs, ProfileBuffer& aBuffer)
1243 : {
1244 : // WARNING: this function runs within the profiler's "critical section".
1245 :
1246 : DoSharedSample(aLock, /* isSynchronous = */ true, aThreadInfo, aNow, aRegs,
1247 0 : /* lastSample = */ nullptr, aBuffer);
1248 0 : }
1249 :
1250 : // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
1251 : static void
1252 0 : DoPeriodicSample(PSLockRef aLock, ThreadInfo& aThreadInfo,
1253 : const TimeStamp& aNow, const Registers& aRegs,
1254 : int64_t aRSSMemory, int64_t aUSSMemory)
1255 : {
1256 : // WARNING: this function runs within the profiler's "critical section".
1257 :
1258 0 : ProfileBuffer& buffer = ActivePS::Buffer(aLock);
1259 :
1260 : DoSharedSample(aLock, /* isSynchronous = */ false, aThreadInfo, aNow, aRegs,
1261 0 : &aThreadInfo.LastSample(), buffer);
1262 :
1263 : ProfilerMarkerLinkedList* pendingMarkersList =
1264 0 : aThreadInfo.RacyInfo()->GetPendingMarkers();
1265 0 : while (pendingMarkersList && pendingMarkersList->peek()) {
1266 0 : ProfilerMarker* marker = pendingMarkersList->popHead();
1267 0 : buffer.AddStoredMarker(marker);
1268 0 : buffer.AddEntry(ProfileBufferEntry::Marker(marker));
1269 : }
1270 :
1271 0 : ThreadResponsiveness* resp = aThreadInfo.GetThreadResponsiveness();
1272 0 : if (resp && resp->HasData()) {
1273 0 : TimeDuration delta = resp->GetUnresponsiveDuration(aNow);
1274 0 : buffer.AddEntry(ProfileBufferEntry::Responsiveness(delta.ToMilliseconds()));
1275 : }
1276 :
1277 0 : if (aRSSMemory != 0) {
1278 0 : double rssMemory = static_cast<double>(aRSSMemory);
1279 0 : buffer.AddEntry(ProfileBufferEntry::ResidentMemory(rssMemory));
1280 : }
1281 :
1282 0 : if (aUSSMemory != 0) {
1283 0 : double ussMemory = static_cast<double>(aUSSMemory);
1284 0 : buffer.AddEntry(ProfileBufferEntry::UnsharedMemory(ussMemory));
1285 : }
1286 0 : }
1287 :
1288 : // END sampling/unwinding code
1289 : ////////////////////////////////////////////////////////////////////////
1290 :
1291 : ////////////////////////////////////////////////////////////////////////
1292 : // BEGIN saving/streaming code
1293 :
1294 : const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
1295 :
1296 : static int64_t
1297 0 : SafeJSInteger(uint64_t aValue) {
1298 0 : return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
1299 : }
1300 :
1301 : static void
1302 0 : AddSharedLibraryInfoToStream(JSONWriter& aWriter, const SharedLibrary& aLib)
1303 : {
1304 0 : aWriter.StartObjectElement();
1305 0 : aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
1306 0 : aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
1307 0 : aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
1308 0 : aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName()).get());
1309 0 : aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath()).get());
1310 0 : aWriter.StringProperty("debugName", NS_ConvertUTF16toUTF8(aLib.GetDebugName()).get());
1311 0 : aWriter.StringProperty("debugPath", NS_ConvertUTF16toUTF8(aLib.GetDebugPath()).get());
1312 0 : aWriter.StringProperty("breakpadId", aLib.GetBreakpadId().c_str());
1313 0 : aWriter.StringProperty("arch", aLib.GetArch().c_str());
1314 0 : aWriter.EndObject();
1315 0 : }
1316 :
1317 : void
1318 0 : AppendSharedLibraries(JSONWriter& aWriter)
1319 : {
1320 0 : SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
1321 0 : info.SortByAddress();
1322 0 : for (size_t i = 0; i < info.GetSize(); i++) {
1323 0 : AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
1324 : }
1325 0 : }
1326 :
1327 : #ifdef MOZ_TASK_TRACER
1328 : static void
1329 : StreamNameAndThreadId(JSONWriter& aWriter, const char* aName, int aThreadId)
1330 : {
1331 : aWriter.StartObjectElement();
1332 : {
1333 : if (XRE_GetProcessType() == GeckoProcessType_Plugin) {
1334 : // TODO Add the proper plugin name
1335 : aWriter.StringProperty("name", "Plugin");
1336 : } else {
1337 : aWriter.StringProperty("name", aName);
1338 : }
1339 : aWriter.IntProperty("tid", aThreadId);
1340 : }
1341 : aWriter.EndObject();
1342 : }
1343 : #endif
1344 :
1345 : static void
1346 0 : StreamTaskTracer(PSLockRef aLock, SpliceableJSONWriter& aWriter)
1347 : {
1348 : #ifdef MOZ_TASK_TRACER
1349 : MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1350 :
1351 : aWriter.StartArrayProperty("data");
1352 : {
1353 : UniquePtr<nsTArray<nsCString>> data =
1354 : tasktracer::GetLoggedData(CorePS::ProcessStartTime());
1355 : for (uint32_t i = 0; i < data->Length(); ++i) {
1356 : aWriter.StringElement((data->ElementAt(i)).get());
1357 : }
1358 : }
1359 : aWriter.EndArray();
1360 :
1361 : aWriter.StartArrayProperty("threads");
1362 : {
1363 : const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(aLock);
1364 : for (size_t i = 0; i < liveThreads.size(); i++) {
1365 : ThreadInfo* info = liveThreads.at(i);
1366 : StreamNameAndThreadId(aWriter, info->Name(), info->ThreadId());
1367 : }
1368 :
1369 : const CorePS::ThreadVector& deadThreads = CorePS::DeadThreads(aLock);
1370 : for (size_t i = 0; i < deadThreads.size(); i++) {
1371 : ThreadInfo* info = deadThreads.at(i);
1372 : StreamNameAndThreadId(aWriter, info->Name(), info->ThreadId());
1373 : }
1374 : }
1375 : aWriter.EndArray();
1376 :
1377 : aWriter.DoubleProperty(
1378 : "start", static_cast<double>(tasktracer::GetStartTime()));
1379 : #endif
1380 0 : }
1381 :
1382 : static void
1383 0 : StreamMetaJSCustomObject(PSLockRef aLock, SpliceableJSONWriter& aWriter)
1384 : {
1385 0 : MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1386 :
1387 0 : aWriter.IntProperty("version", 6);
1388 :
1389 : // The "startTime" field holds the number of milliseconds since midnight
1390 : // January 1, 1970 GMT. This grotty code computes (Now - (Now -
1391 : // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
1392 0 : TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
1393 0 : aWriter.DoubleProperty(
1394 0 : "startTime", static_cast<double>(PR_Now()/1000.0 - delta.ToMilliseconds()));
1395 :
1396 0 : if (!NS_IsMainThread()) {
1397 : // Leave the rest of the properties out if we're not on the main thread.
1398 : // At the moment, the only case in which this function is called on a
1399 : // background thread is if we're in a content process and are going to
1400 : // send this profile to the parent process. In that case, the parent
1401 : // process profile's "meta" object already has the rest of the properties,
1402 : // and the parent process profile is dumped on that process's main thread.
1403 0 : return;
1404 : }
1405 :
1406 0 : aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
1407 0 : aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
1408 :
1409 : #ifdef DEBUG
1410 0 : aWriter.IntProperty("debug", 1);
1411 : #else
1412 : aWriter.IntProperty("debug", 0);
1413 : #endif
1414 :
1415 0 : aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0);
1416 :
1417 0 : bool asyncStacks = Preferences::GetBool("javascript.options.asyncstack");
1418 0 : aWriter.IntProperty("asyncstack", asyncStacks);
1419 :
1420 0 : aWriter.IntProperty("processType", XRE_GetProcessType());
1421 :
1422 : nsresult res;
1423 : nsCOMPtr<nsIHttpProtocolHandler> http =
1424 0 : do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);
1425 :
1426 0 : if (!NS_FAILED(res)) {
1427 0 : nsAutoCString string;
1428 :
1429 0 : res = http->GetPlatform(string);
1430 0 : if (!NS_FAILED(res)) {
1431 0 : aWriter.StringProperty("platform", string.Data());
1432 : }
1433 :
1434 0 : res = http->GetOscpu(string);
1435 0 : if (!NS_FAILED(res)) {
1436 0 : aWriter.StringProperty("oscpu", string.Data());
1437 : }
1438 :
1439 0 : res = http->GetMisc(string);
1440 0 : if (!NS_FAILED(res)) {
1441 0 : aWriter.StringProperty("misc", string.Data());
1442 : }
1443 : }
1444 :
1445 0 : nsCOMPtr<nsIXULRuntime> runtime = do_GetService("@mozilla.org/xre/runtime;1");
1446 0 : if (runtime) {
1447 0 : nsAutoCString string;
1448 :
1449 0 : res = runtime->GetXPCOMABI(string);
1450 0 : if (!NS_FAILED(res))
1451 0 : aWriter.StringProperty("abi", string.Data());
1452 :
1453 0 : res = runtime->GetWidgetToolkit(string);
1454 0 : if (!NS_FAILED(res))
1455 0 : aWriter.StringProperty("toolkit", string.Data());
1456 : }
1457 :
1458 : nsCOMPtr<nsIXULAppInfo> appInfo =
1459 0 : do_GetService("@mozilla.org/xre/app-info;1");
1460 :
1461 0 : if (appInfo) {
1462 0 : nsAutoCString string;
1463 0 : res = appInfo->GetName(string);
1464 0 : if (!NS_FAILED(res))
1465 0 : aWriter.StringProperty("product", string.Data());
1466 : }
1467 : }
1468 :
1469 : #if defined(GP_OS_android)
1470 : static void
1471 : BuildJavaThreadJSObject(SpliceableJSONWriter& aWriter)
1472 : {
1473 : aWriter.StringProperty("name", "Java Main Thread");
1474 :
1475 : aWriter.StartArrayProperty("samples");
1476 : {
1477 : for (int sampleId = 0; true; sampleId++) {
1478 : bool firstRun = true;
1479 : for (int frameId = 0; true; frameId++) {
1480 : jni::String::LocalRef frameName =
1481 : java::GeckoJavaSampler::GetFrameName(0, sampleId, frameId);
1482 :
1483 : // When we run out of frames, we stop looping.
1484 : if (!frameName) {
1485 : // If we found at least one frame, we have objects to close.
1486 : if (!firstRun) {
1487 : aWriter.EndArray();
1488 : aWriter.EndObject();
1489 : }
1490 : break;
1491 : }
1492 : // The first time around, open the sample object and frames array.
1493 : if (firstRun) {
1494 : firstRun = false;
1495 :
1496 : double sampleTime =
1497 : java::GeckoJavaSampler::GetSampleTime(0, sampleId);
1498 :
1499 : aWriter.StartObjectElement();
1500 : aWriter.DoubleProperty("time", sampleTime);
1501 :
1502 : aWriter.StartArrayProperty("frames");
1503 : }
1504 :
1505 : // Add a frame to the sample.
1506 : aWriter.StartObjectElement();
1507 : {
1508 : aWriter.StringProperty("location",
1509 : frameName->ToCString().BeginReading());
1510 : }
1511 : aWriter.EndObject();
1512 : }
1513 :
1514 : // If we found no frames for this sample, we are done.
1515 : if (firstRun) {
1516 : break;
1517 : }
1518 : }
1519 : }
1520 : aWriter.EndArray();
1521 : }
1522 : #endif
1523 :
1524 : static void
1525 0 : locked_profiler_stream_json_for_this_process(PSLockRef aLock,
1526 : SpliceableJSONWriter& aWriter,
1527 : double aSinceTime)
1528 : {
1529 0 : LOG("locked_profiler_stream_json_for_this_process");
1530 :
1531 0 : MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1532 :
1533 : // Put shared library info
1534 0 : aWriter.StartArrayProperty("libs");
1535 0 : AppendSharedLibraries(aWriter);
1536 0 : aWriter.EndArray();
1537 :
1538 : // Put meta data
1539 0 : aWriter.StartObjectProperty("meta");
1540 : {
1541 0 : StreamMetaJSCustomObject(aLock, aWriter);
1542 : }
1543 0 : aWriter.EndObject();
1544 :
1545 : // Data of TaskTracer doesn't belong in the circular buffer.
1546 0 : if (ActivePS::FeatureTaskTracer(aLock)) {
1547 0 : aWriter.StartObjectProperty("tasktracer");
1548 0 : StreamTaskTracer(aLock, aWriter);
1549 0 : aWriter.EndObject();
1550 : }
1551 :
1552 : // Lists the samples for each thread profile
1553 0 : aWriter.StartArrayProperty("threads");
1554 : {
1555 0 : const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(aLock);
1556 0 : for (size_t i = 0; i < liveThreads.size(); i++) {
1557 0 : ThreadInfo* info = liveThreads.at(i);
1558 0 : if (!info->IsBeingProfiled()) {
1559 0 : continue;
1560 : }
1561 0 : info->StreamJSON(ActivePS::Buffer(aLock), aWriter,
1562 0 : CorePS::ProcessStartTime(), aSinceTime);
1563 : }
1564 :
1565 0 : const CorePS::ThreadVector& deadThreads = CorePS::DeadThreads(aLock);
1566 0 : for (size_t i = 0; i < deadThreads.size(); i++) {
1567 0 : ThreadInfo* info = deadThreads.at(i);
1568 0 : MOZ_ASSERT(info->IsBeingProfiled());
1569 0 : info->StreamJSON(ActivePS::Buffer(aLock), aWriter,
1570 0 : CorePS::ProcessStartTime(), aSinceTime);
1571 : }
1572 :
1573 : #if defined(GP_OS_android)
1574 : if (ActivePS::FeatureJava(aLock)) {
1575 : java::GeckoJavaSampler::Pause();
1576 :
1577 : aWriter.Start();
1578 : {
1579 : BuildJavaThreadJSObject(aWriter);
1580 : }
1581 : aWriter.End();
1582 :
1583 : java::GeckoJavaSampler::Unpause();
1584 : }
1585 : #endif
1586 : }
1587 0 : aWriter.EndArray();
1588 0 : }
1589 :
1590 : bool
1591 0 : profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter, double aSinceTime)
1592 : {
1593 0 : LOG("profiler_stream_json_for_this_process");
1594 :
1595 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
1596 :
1597 0 : PSAutoLock lock(gPSMutex);
1598 :
1599 0 : if (!ActivePS::Exists(lock)) {
1600 0 : return false;
1601 : }
1602 :
1603 0 : locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime);
1604 0 : return true;
1605 : }
1606 :
1607 : // END saving/streaming code
1608 : ////////////////////////////////////////////////////////////////////////
1609 :
1610 : static void
1611 0 : PrintUsageThenExit(int aExitCode)
1612 : {
1613 0 : MOZ_RELEASE_ASSERT(NS_IsMainThread());
1614 :
1615 : printf(
1616 : "\n"
1617 : "Profiler environment variable usage:\n"
1618 : "\n"
1619 : " MOZ_PROFILER_HELP\n"
1620 : " If set to any value, prints this message.\n"
1621 : "\n"
1622 : " MOZ_LOG\n"
1623 : " Enables logging. The levels of logging available are\n"
1624 : " 'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n"
1625 : "\n"
1626 : " MOZ_PROFILER_STARTUP\n"
1627 : " If set to any value, starts the profiler immediately on start-up.\n"
1628 : " Useful if you want profile code that runs very early.\n"
1629 : "\n"
1630 : " MOZ_PROFILER_STARTUP_ENTRIES=<1..>\n"
1631 : " If MOZ_PROFILER_STARTUP is set, specifies the number of entries in\n"
1632 : " the profiler's circular buffer when the profiler is first started.\n"
1633 : " If unset, the platform default is used.\n"
1634 : "\n"
1635 : " MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
1636 : " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
1637 : " measured in milliseconds, when the profiler is first started.\n"
1638 : " If unset, the platform default is used.\n"
1639 : "\n"
1640 : " MOZ_PROFILER_SHUTDOWN\n"
1641 : " If set, the profiler saves a profile to the named file on shutdown.\n"
1642 : "\n"
1643 : " MOZ_PROFILER_LUL_TEST\n"
1644 : " If set to any value, runs LUL unit tests at startup.\n"
1645 : "\n"
1646 : " This platform %s native unwinding.\n"
1647 : "\n",
1648 : #if defined(HAVE_NATIVE_UNWIND)
1649 : "supports"
1650 : #else
1651 : "does not support"
1652 : #endif
1653 0 : );
1654 :
1655 0 : exit(aExitCode);
1656 : }
1657 :
1658 : ////////////////////////////////////////////////////////////////////////
1659 : // BEGIN Sampler
1660 :
1661 : #if defined(GP_OS_linux) || defined(GP_OS_android)
1662 : struct SigHandlerCoordinator;
1663 : #endif
1664 :
1665 : // Sampler performs setup and teardown of the state required to sample with the
1666 : // profiler. Sampler may exist when ActivePS is not present.
1667 : //
1668 : // SuspendAndSampleAndResumeThread must only be called from a single thread,
1669 : // and must not sample the thread it is being called from. A separate Sampler
1670 : // instance must be used for each thread which wants to capture samples.
1671 :
1672 : // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
1673 : //
1674 : // With the exception of SamplerThread, all Sampler objects must be Disable-d
1675 : // before releasing the lock which was used to create them. This avoids races
1676 : // on linux with the SIGPROF signal handler.
1677 :
1678 : class Sampler
1679 : {
1680 : public:
1681 : // Sets up the profiler such that it can begin sampling.
1682 : explicit Sampler(PSLockRef aLock);
1683 :
1684 : // Disable the sampler, restoring it to its previous state. This must be
1685 : // called once, and only once, before the Sampler is destroyed.
1686 : void Disable(PSLockRef aLock);
1687 :
1688 : // This method suspends and resumes the samplee thread. It calls the passed-in
1689 : // function-like object aProcessRegs (passing it a populated |const
1690 : // Registers&| arg) while the samplee thread is suspended.
1691 : //
1692 : // Func must be a function-like object of type `void()`.
1693 : template<typename Func>
1694 : void SuspendAndSampleAndResumeThread(PSLockRef aLock,
1695 : const ThreadInfo& aThreadInfo,
1696 : const Func& aProcessRegs);
1697 :
1698 : private:
1699 : #if defined(GP_OS_linux) || defined(GP_OS_android)
1700 : // Used to restore the SIGPROF handler when ours is removed.
1701 : struct sigaction mOldSigprofHandler;
1702 :
1703 : // This process' ID. Needed as an argument for tgkill in
1704 : // SuspendAndSampleAndResumeThread.
1705 : int mMyPid;
1706 :
1707 : // The sampler thread's ID. Used to assert that it is not sampling itself,
1708 : // which would lead to deadlock.
1709 : int mSamplerTid;
1710 :
1711 : public:
1712 : // This is the one-and-only variable used to communicate between the sampler
1713 : // thread and the samplee thread's signal handler. It's static because the
1714 : // samplee thread's signal handler is static.
1715 : static struct SigHandlerCoordinator* sSigHandlerCoordinator;
1716 : #endif
1717 : };
1718 :
1719 : // END Sampler
1720 : ////////////////////////////////////////////////////////////////////////
1721 :
1722 : ////////////////////////////////////////////////////////////////////////
1723 : // BEGIN SamplerThread
1724 :
1725 : // The sampler thread controls sampling and runs whenever the profiler is
1726 : // active. It periodically runs through all registered threads, finds those
1727 : // that should be sampled, then pauses and samples them.
1728 :
1729 : class SamplerThread : public Sampler
1730 : {
1731 : public:
1732 : // Creates a sampler thread, but doesn't start it.
1733 : SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
1734 : double aIntervalMilliseconds);
1735 : ~SamplerThread();
1736 :
1737 : // This runs on (is!) the sampler thread.
1738 : void Run();
1739 :
1740 : // This runs on the main thread.
1741 : void Stop(PSLockRef aLock);
1742 :
1743 : private:
1744 : // This suspends the calling thread for the given number of microseconds.
1745 : // Best effort timing.
1746 : void SleepMicro(uint32_t aMicroseconds);
1747 :
1748 : // The activity generation, for detecting when the sampler thread must stop.
1749 : const uint32_t mActivityGeneration;
1750 :
1751 : // The interval between samples, measured in microseconds.
1752 : const int mIntervalMicroseconds;
1753 :
1754 : // The OS-specific handle for the sampler thread.
1755 : #if defined(GP_OS_windows)
1756 : HANDLE mThread;
1757 : #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || defined(GP_OS_android)
1758 : pthread_t mThread;
1759 : #endif
1760 :
1761 : SamplerThread(const SamplerThread&) = delete;
1762 : void operator=(const SamplerThread&) = delete;
1763 : };
1764 :
1765 : // This function is required because we need to create a SamplerThread within
1766 : // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
1767 : // could probably be removed by moving some code around.
1768 : static SamplerThread*
1769 0 : NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval)
1770 : {
1771 0 : return new SamplerThread(aLock, aGeneration, aInterval);
1772 : }
1773 :
1774 : // This function is the sampler thread. This implementation is used for all
1775 : // targets.
1776 : void
1777 0 : SamplerThread::Run()
1778 : {
1779 0 : PR_SetCurrentThreadName("SamplerThread");
1780 :
1781 : // This will be positive if we are running behind schedule (sampling less
1782 : // frequently than desired) and negative if we are ahead of schedule.
1783 0 : TimeDuration lastSleepOvershoot = 0;
1784 0 : TimeStamp sampleStart = TimeStamp::Now();
1785 :
1786 : while (true) {
1787 : // This scope is for |lock|. It ends before we sleep below.
1788 : {
1789 0 : PSAutoLock lock(gPSMutex);
1790 :
1791 0 : if (!ActivePS::Exists(lock)) {
1792 0 : return;
1793 : }
1794 :
1795 : // At this point profiler_stop() might have been called, and
1796 : // profiler_start() might have been called on another thread. If this
1797 : // happens the generation won't match.
1798 0 : if (ActivePS::Generation(lock) != mActivityGeneration) {
1799 0 : return;
1800 : }
1801 :
1802 0 : ActivePS::Buffer(lock).DeleteExpiredStoredMarkers();
1803 :
1804 0 : if (!ActivePS::IsPaused(lock)) {
1805 0 : const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(lock);
1806 0 : for (uint32_t i = 0; i < liveThreads.size(); i++) {
1807 0 : ThreadInfo* info = liveThreads.at(i);
1808 :
1809 0 : if (!info->IsBeingProfiled()) {
1810 : // We are not interested in profiling this thread.
1811 0 : continue;
1812 : }
1813 :
1814 : // If the thread is asleep and has been sampled before in the same
1815 : // sleep episode, find and copy the previous sample, as that's
1816 : // cheaper than taking a new sample.
1817 0 : if (info->RacyInfo()->CanDuplicateLastSampleDueToSleep()) {
1818 : bool dup_ok =
1819 0 : ActivePS::Buffer(lock).DuplicateLastSample(
1820 0 : info->ThreadId(), CorePS::ProcessStartTime(),
1821 0 : info->LastSample());
1822 0 : if (dup_ok) {
1823 0 : continue;
1824 : }
1825 : }
1826 :
1827 : // We only track responsiveness for the main thread.
1828 0 : if (info->IsMainThread()) {
1829 0 : info->GetThreadResponsiveness()->Update();
1830 : }
1831 :
1832 : // We only get the memory measurements once for all live threads.
1833 0 : int64_t rssMemory = 0;
1834 0 : int64_t ussMemory = 0;
1835 0 : if (i == 0 && ActivePS::FeatureMemory(lock)) {
1836 0 : rssMemory = nsMemoryReporterManager::ResidentFast();
1837 : #if defined(GP_OS_linux) || defined(GP_OS_android)
1838 0 : ussMemory = nsMemoryReporterManager::ResidentUnique();
1839 : #endif
1840 : }
1841 :
1842 0 : TimeStamp now = TimeStamp::Now();
1843 0 : SuspendAndSampleAndResumeThread(lock, *info,
1844 0 : [&](const Registers& aRegs) {
1845 0 : DoPeriodicSample(lock, *info, now, aRegs, rssMemory, ussMemory);
1846 0 : });
1847 : }
1848 :
1849 : #if defined(USE_LUL_STACKWALK)
1850 : // The LUL unwind object accumulates frame statistics. Periodically we
1851 : // should poke it to give it a chance to print those statistics. This
1852 : // involves doing I/O (fprintf, __android_log_print, etc.) and so
1853 : // can't safely be done from the critical section inside
1854 : // SuspendAndSampleAndResumeThread, which is why it is done here.
1855 0 : CorePS::Lul(lock)->MaybeShowStats();
1856 : #endif
1857 : }
1858 : }
1859 : // gPSMutex is not held after this point.
1860 :
1861 : // Calculate how long a sleep to request. After the sleep, measure how
1862 : // long we actually slept and take the difference into account when
1863 : // calculating the sleep interval for the next iteration. This is an
1864 : // attempt to keep "to schedule" in the presence of inaccuracy of the
1865 : // actual sleep intervals.
1866 : TimeStamp targetSleepEndTime =
1867 0 : sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
1868 0 : TimeStamp beforeSleep = TimeStamp::Now();
1869 0 : TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
1870 0 : double sleepTime = std::max(0.0, (targetSleepDuration -
1871 0 : lastSleepOvershoot).ToMicroseconds());
1872 0 : SleepMicro(static_cast<uint32_t>(sleepTime));
1873 0 : sampleStart = TimeStamp::Now();
1874 : lastSleepOvershoot =
1875 0 : sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
1876 0 : }
1877 : }
1878 :
1879 : // We #include these files directly because it means those files can use
1880 : // declarations from this file trivially. These provide target-specific
1881 : // implementations of all SamplerThread methods except Run().
1882 : #if defined(GP_OS_windows)
1883 : # include "platform-win32.cpp"
1884 : #elif defined(GP_OS_darwin)
1885 : # include "platform-macos.cpp"
1886 : #elif defined(GP_OS_linux) || defined(GP_OS_android)
1887 : # include "platform-linux-android.cpp"
1888 : #else
1889 : # error "bad platform"
1890 : #endif
1891 :
1892 : UniquePlatformData
1893 75 : AllocPlatformData(int aThreadId)
1894 : {
1895 75 : return UniquePlatformData(new PlatformData(aThreadId));
1896 : }
1897 :
1898 : void
1899 1 : PlatformDataDestructor::operator()(PlatformData* aData)
1900 : {
1901 1 : delete aData;
1902 1 : }
1903 :
1904 : // END SamplerThread
1905 : ////////////////////////////////////////////////////////////////////////
1906 :
1907 : ////////////////////////////////////////////////////////////////////////
1908 : // BEGIN externally visible functions
1909 :
1910 0 : MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)
1911 :
1912 : NS_IMETHODIMP
1913 0 : GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport,
1914 : nsISupports* aData, bool aAnonymize)
1915 : {
1916 0 : MOZ_RELEASE_ASSERT(NS_IsMainThread());
1917 :
1918 0 : size_t profSize = 0;
1919 0 : size_t lulSize = 0;
1920 :
1921 : {
1922 0 : PSAutoLock lock(gPSMutex);
1923 :
1924 0 : if (CorePS::Exists()) {
1925 0 : CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
1926 : }
1927 :
1928 0 : if (ActivePS::Exists(lock)) {
1929 0 : profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
1930 : }
1931 : }
1932 :
1933 0 : MOZ_COLLECT_REPORT(
1934 : "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize,
1935 : "Memory used by the Gecko Profiler's global state (excluding memory used "
1936 0 : "by LUL).");
1937 :
1938 : #if defined(USE_LUL_STACKWALK)
1939 0 : MOZ_COLLECT_REPORT(
1940 : "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize,
1941 0 : "Memory used by LUL, a stack unwinder used by the Gecko Profiler.");
1942 : #endif
1943 :
1944 0 : return NS_OK;
1945 : }
1946 :
1947 24 : NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter)
1948 :
1949 : // Find the ThreadInfo for the current thread. This should only be called in
1950 : // places where TLSInfo can't be used. On success, *aIndexOut is set to the
1951 : // index if it is non-null.
1952 : static ThreadInfo*
1953 76 : FindLiveThreadInfo(PSLockRef aLock, int* aIndexOut = nullptr)
1954 : {
1955 76 : ThreadInfo* ret = nullptr;
1956 76 : Thread::tid_t id = Thread::GetCurrentId();
1957 76 : const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(aLock);
1958 1115 : for (uint32_t i = 0; i < liveThreads.size(); i++) {
1959 1040 : ThreadInfo* info = liveThreads.at(i);
1960 1040 : if (info->ThreadId() == id) {
1961 1 : if (aIndexOut) {
1962 1 : *aIndexOut = i;
1963 : }
1964 1 : ret = info;
1965 1 : break;
1966 : }
1967 : }
1968 :
1969 76 : return ret;
1970 : }
1971 :
1972 : static void
1973 75 : locked_register_thread(PSLockRef aLock, const char* aName, void* aStackTop)
1974 : {
1975 75 : MOZ_RELEASE_ASSERT(CorePS::Exists());
1976 :
1977 75 : MOZ_RELEASE_ASSERT(!FindLiveThreadInfo(aLock));
1978 :
1979 75 : if (!TLSInfo::Init(aLock)) {
1980 0 : return;
1981 : }
1982 :
1983 75 : ThreadInfo* info = new ThreadInfo(aName, Thread::GetCurrentId(),
1984 150 : NS_IsMainThread(), aStackTop);
1985 75 : TLSInfo::SetInfo(aLock, info);
1986 :
1987 75 : if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
1988 0 : info->StartProfiling();
1989 0 : if (ActivePS::FeatureJS(aLock)) {
1990 : // This StartJSSampling() call is on-thread, so we can poll manually to
1991 : // start JS sampling immediately.
1992 0 : info->StartJSSampling();
1993 0 : info->PollJSSampling();
1994 : }
1995 : }
1996 :
1997 75 : CorePS::LiveThreads(aLock).push_back(info);
1998 : }
1999 :
2000 : static void
2001 0 : NotifyObservers(const char* aTopic, nsISupports* aSubject = nullptr)
2002 : {
2003 0 : if (!NS_IsMainThread()) {
2004 : // Dispatch a task to the main thread that notifies observers.
2005 : // If NotifyObservers is called both on and off the main thread within a
2006 : // short time, the order of the notifications can be different from the
2007 : // order of the calls to NotifyObservers.
2008 : // Getting the order 100% right isn't that important at the moment, because
2009 : // these notifications are only observed in the parent process, where the
2010 : // profiler_* functions are currently only called on the main thread.
2011 0 : nsCOMPtr<nsISupports> subject = aSubject;
2012 0 : NS_DispatchToMainThread(NS_NewRunnableFunction(
2013 0 : "NotifyObservers", [=] { NotifyObservers(aTopic, subject); }));
2014 0 : return;
2015 : }
2016 :
2017 0 : if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
2018 0 : os->NotifyObservers(aSubject, aTopic, nullptr);
2019 : }
2020 : }
2021 :
2022 : static void
2023 0 : NotifyProfilerStarted(const int aEntries, double aInterval, uint32_t aFeatures,
2024 : const char** aFilters, uint32_t aFilterCount)
2025 : {
2026 0 : nsTArray<nsCString> filtersArray;
2027 0 : for (size_t i = 0; i < aFilterCount; ++i) {
2028 0 : filtersArray.AppendElement(aFilters[i]);
2029 : }
2030 :
2031 : nsCOMPtr<nsIProfilerStartParams> params =
2032 0 : new nsProfilerStartParams(aEntries, aInterval, aFeatures, filtersArray);
2033 :
2034 0 : ProfilerParent::ProfilerStarted(params);
2035 0 : NotifyObservers("profiler-started", params);
2036 0 : }
2037 :
2038 : static void
2039 : locked_profiler_start(PSLockRef aLock, const int aEntries, double aInterval,
2040 : uint32_t aFeatures,
2041 : const char** aFilters, uint32_t aFilterCount);
2042 :
2043 : // This basically duplicates AutoProfilerLabel's constructor.
2044 : PseudoStack*
2045 0 : MozGlueLabelEnter(const char* aLabel, const char* aDynamicString, void* aSp,
2046 : uint32_t aLine)
2047 : {
2048 0 : PseudoStack* pseudoStack = AutoProfilerLabel::sPseudoStack.get();
2049 0 : if (pseudoStack) {
2050 : pseudoStack->pushCppFrame(aLabel, aDynamicString, aSp, aLine,
2051 : js::ProfileEntry::Kind::CPP_NORMAL,
2052 0 : js::ProfileEntry::Category::OTHER);
2053 : }
2054 0 : return pseudoStack;
2055 : }
2056 :
2057 : // This basically duplicates AutoProfilerLabel's destructor.
2058 : void
2059 0 : MozGlueLabelExit(PseudoStack* aPseudoStack)
2060 : {
2061 0 : if (aPseudoStack) {
2062 0 : aPseudoStack->pop();
2063 : }
2064 0 : }
2065 :
2066 : void
2067 3 : profiler_init(void* aStackTop)
2068 : {
2069 3 : LOG("profiler_init");
2070 :
2071 3 : MOZ_RELEASE_ASSERT(!CorePS::Exists());
2072 :
2073 3 : SharedLibraryInfo::Initialize();
2074 :
2075 : uint32_t features =
2076 : #if defined(GP_OS_android)
2077 : ProfilerFeature::Java |
2078 : #endif
2079 : ProfilerFeature::JS |
2080 : ProfilerFeature::Leaf |
2081 : #if defined(HAVE_NATIVE_UNWIND)
2082 : ProfilerFeature::StackWalk |
2083 : #endif
2084 : ProfilerFeature::Threads |
2085 3 : 0;
2086 :
2087 3 : const char* filters[] = { "GeckoMain", "Compositor", "DOM Worker" };
2088 :
2089 3 : if (getenv("MOZ_PROFILER_HELP")) {
2090 0 : PrintUsageThenExit(0); // terminates execution
2091 : }
2092 :
2093 3 : int entries = PROFILER_DEFAULT_ENTRIES;
2094 3 : double interval = PROFILER_DEFAULT_INTERVAL;
2095 :
2096 : {
2097 3 : PSAutoLock lock(gPSMutex);
2098 :
2099 : // We've passed the possible failure point. Instantiate CorePS, which
2100 : // indicates that the profiler has initialized successfully.
2101 3 : CorePS::Create(lock);
2102 :
2103 3 : locked_register_thread(lock, kMainThreadName, aStackTop);
2104 :
2105 : // Platform-specific initialization.
2106 3 : PlatformInit(lock);
2107 :
2108 : #ifdef MOZ_TASK_TRACER
2109 : tasktracer::InitTaskTracer();
2110 : #endif
2111 :
2112 : #if defined(GP_OS_android)
2113 : if (jni::IsFennec()) {
2114 : GeckoJavaSampler::Init();
2115 : }
2116 : #endif
2117 :
2118 : // Setup support for pushing/popping labels in mozglue.
2119 3 : RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);
2120 :
2121 : // (Linux-only) We could create CorePS::mLul and read unwind info into it
2122 : // at this point. That would match the lifetime implied by destruction of
2123 : // it in profiler_shutdown() just below. However, that gives a big delay on
2124 : // startup, even if no profiling is actually to be done. So, instead, it is
2125 : // created on demand at the first call to PlatformStart().
2126 :
2127 3 : if (!getenv("MOZ_PROFILER_STARTUP")) {
2128 3 : return;
2129 : }
2130 :
2131 0 : LOG("- MOZ_PROFILER_STARTUP is set");
2132 :
2133 0 : const char* startupEntries = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
2134 0 : if (startupEntries) {
2135 0 : errno = 0;
2136 0 : entries = strtol(startupEntries, nullptr, 10);
2137 0 : if (errno == 0 && entries > 0) {
2138 0 : LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %d", entries);
2139 : } else {
2140 0 : PrintUsageThenExit(1);
2141 : }
2142 : }
2143 :
2144 0 : const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
2145 0 : if (startupInterval) {
2146 0 : errno = 0;
2147 0 : interval = PR_strtod(startupInterval, nullptr);
2148 0 : if (errno == 0 && interval > 0.0 && interval <= 1000.0) {
2149 0 : LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
2150 : } else {
2151 0 : PrintUsageThenExit(1);
2152 : }
2153 : }
2154 :
2155 : locked_profiler_start(lock, entries, interval, features,
2156 0 : filters, MOZ_ARRAY_LENGTH(filters));
2157 : }
2158 :
2159 : // We do this with gPSMutex unlocked. The comment in profiler_stop() explains
2160 : // why.
2161 : NotifyProfilerStarted(entries, interval, features, filters,
2162 0 : MOZ_ARRAY_LENGTH(filters));
2163 : }
2164 :
2165 : static void
2166 : locked_profiler_save_profile_to_file(PSLockRef aLock, const char* aFilename);
2167 :
2168 : static SamplerThread*
2169 : locked_profiler_stop(PSLockRef aLock);
2170 :
2171 : void
2172 0 : profiler_shutdown()
2173 : {
2174 0 : LOG("profiler_shutdown");
2175 :
2176 0 : MOZ_RELEASE_ASSERT(NS_IsMainThread());
2177 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2178 :
2179 : // If the profiler is active we must get a handle to the SamplerThread before
2180 : // ActivePS is destroyed, in order to delete it.
2181 0 : SamplerThread* samplerThread = nullptr;
2182 : {
2183 0 : PSAutoLock lock(gPSMutex);
2184 :
2185 : // Save the profile on shutdown if requested.
2186 0 : if (ActivePS::Exists(lock)) {
2187 0 : const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
2188 0 : if (filename) {
2189 0 : locked_profiler_save_profile_to_file(lock, filename);
2190 : }
2191 :
2192 0 : samplerThread = locked_profiler_stop(lock);
2193 : }
2194 :
2195 0 : CorePS::Destroy(lock);
2196 :
2197 : // We just destroyed CorePS and the ThreadInfos it contains, so we can
2198 : // clear this thread's TLSInfo.
2199 0 : TLSInfo::SetInfo(lock, nullptr);
2200 :
2201 : #ifdef MOZ_TASK_TRACER
2202 : tasktracer::ShutdownTaskTracer();
2203 : #endif
2204 : }
2205 :
2206 : // We do these operations with gPSMutex unlocked. The comments in
2207 : // profiler_stop() explain why.
2208 0 : if (samplerThread) {
2209 0 : ProfilerParent::ProfilerStopped();
2210 0 : NotifyObservers("profiler-stopped");
2211 0 : delete samplerThread;
2212 : }
2213 0 : }
2214 :
2215 : UniquePtr<char[]>
2216 0 : profiler_get_profile(double aSinceTime)
2217 : {
2218 0 : LOG("profiler_get_profile");
2219 :
2220 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2221 :
2222 0 : SpliceableChunkedJSONWriter b;
2223 0 : b.Start(SpliceableJSONWriter::SingleLineStyle);
2224 : {
2225 0 : if (!profiler_stream_json_for_this_process(b, aSinceTime)) {
2226 0 : return nullptr;
2227 : }
2228 :
2229 : // Don't include profiles from other processes because this is a
2230 : // synchronous function.
2231 0 : b.StartArrayProperty("processes");
2232 0 : b.EndArray();
2233 : }
2234 0 : b.End();
2235 :
2236 0 : return b.WriteFunc()->CopyData();
2237 : }
2238 :
2239 : void
2240 0 : profiler_get_start_params(int* aEntries, double* aInterval, uint32_t* aFeatures,
2241 : Vector<const char*>* aFilters)
2242 : {
2243 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2244 :
2245 0 : if (NS_WARN_IF(!aEntries) || NS_WARN_IF(!aInterval) ||
2246 0 : NS_WARN_IF(!aFeatures) || NS_WARN_IF(!aFilters)) {
2247 0 : return;
2248 : }
2249 :
2250 0 : PSAutoLock lock(gPSMutex);
2251 :
2252 0 : if (!ActivePS::Exists(lock)) {
2253 0 : *aEntries = 0;
2254 0 : *aInterval = 0;
2255 0 : *aFeatures = 0;
2256 0 : aFilters->clear();
2257 0 : return;
2258 : }
2259 :
2260 0 : *aEntries = ActivePS::Entries(lock);
2261 0 : *aInterval = ActivePS::Interval(lock);
2262 0 : *aFeatures = ActivePS::Features(lock);
2263 :
2264 0 : const Vector<std::string>& filters = ActivePS::Filters(lock);
2265 0 : MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
2266 0 : for (uint32_t i = 0; i < filters.length(); ++i) {
2267 0 : (*aFilters)[i] = filters[i].c_str();
2268 : }
2269 : }
2270 :
2271 : static void
2272 0 : locked_profiler_save_profile_to_file(PSLockRef aLock, const char* aFilename)
2273 : {
2274 0 : LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
2275 :
2276 0 : MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
2277 :
2278 0 : std::ofstream stream;
2279 0 : stream.open(aFilename);
2280 0 : if (stream.is_open()) {
2281 0 : SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
2282 0 : w.Start(SpliceableJSONWriter::SingleLineStyle);
2283 : {
2284 0 : locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0);
2285 :
2286 : // Don't include profiles from other processes because this is a
2287 : // synchronous function.
2288 0 : w.StartArrayProperty("processes");
2289 0 : w.EndArray();
2290 : }
2291 0 : w.End();
2292 :
2293 0 : stream.close();
2294 : }
2295 0 : }
2296 :
2297 : void
2298 0 : profiler_save_profile_to_file(const char* aFilename)
2299 : {
2300 0 : LOG("profiler_save_profile_to_file(%s)", aFilename);
2301 :
2302 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2303 :
2304 0 : PSAutoLock lock(gPSMutex);
2305 :
2306 0 : if (!ActivePS::Exists(lock)) {
2307 0 : return;
2308 : }
2309 :
2310 0 : locked_profiler_save_profile_to_file(lock, aFilename);
2311 : }
2312 :
2313 : uint32_t
2314 0 : profiler_get_available_features()
2315 : {
2316 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2317 :
2318 0 : uint32_t features = 0;
2319 :
2320 : #define ADD_FEATURE(n_, str_, Name_) ProfilerFeature::Set##Name_(features);
2321 :
2322 : // Add all the possible features.
2323 0 : PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
2324 :
2325 : #undef ADD_FEATURE
2326 :
2327 : // Now remove features not supported on this platform/configuration.
2328 : #if !defined(GP_OS_android)
2329 0 : ProfilerFeature::ClearJava(features);
2330 : #endif
2331 : #if !defined(HAVE_NATIVE_UNWIND)
2332 : ProfilerFeature::ClearStackWalk(features);
2333 : #endif
2334 : #if !defined(MOZ_TASK_TRACER)
2335 0 : ProfilerFeature::ClearTaskTracer(features);
2336 : #endif
2337 :
2338 0 : return features;
2339 : }
2340 :
2341 : void
2342 0 : profiler_get_buffer_info_helper(uint32_t* aCurrentPosition,
2343 : uint32_t* aEntries,
2344 : uint32_t* aGeneration)
2345 : {
2346 : // This function is called by profiler_get_buffer_info(), which has already
2347 : // zeroed the outparams.
2348 :
2349 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2350 :
2351 0 : PSAutoLock lock(gPSMutex);
2352 :
2353 0 : if (!ActivePS::Exists(lock)) {
2354 0 : return;
2355 : }
2356 :
2357 0 : *aCurrentPosition = ActivePS::Buffer(lock).mWritePos;
2358 0 : *aEntries = ActivePS::Entries(lock);
2359 0 : *aGeneration = ActivePS::Buffer(lock).mGeneration;
2360 : }
2361 :
2362 : static void
2363 0 : locked_profiler_start(PSLockRef aLock, int aEntries, double aInterval,
2364 : uint32_t aFeatures,
2365 : const char** aFilters, uint32_t aFilterCount)
2366 : {
2367 0 : if (LOG_TEST) {
2368 0 : LOG("locked_profiler_start");
2369 0 : LOG("- entries = %d", aEntries);
2370 0 : LOG("- interval = %.2f", aInterval);
2371 :
2372 : #define LOG_FEATURE(n_, str_, Name_) \
2373 : if (ProfilerFeature::Has##Name_(aFeatures)) { \
2374 : LOG("- feature = %s", str_); \
2375 : }
2376 :
2377 0 : PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
2378 :
2379 : #undef LOG_FEATURE
2380 :
2381 0 : for (uint32_t i = 0; i < aFilterCount; i++) {
2382 0 : LOG("- threads = %s", aFilters[i]);
2383 : }
2384 : }
2385 :
2386 0 : MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
2387 :
2388 : #if defined(GP_PLAT_amd64_windows)
2389 : InitializeWin64ProfilerHooks();
2390 : #endif
2391 :
2392 : // Fall back to the default values if the passed-in values are unreasonable.
2393 0 : int entries = aEntries > 0 ? aEntries : PROFILER_DEFAULT_ENTRIES;
2394 0 : double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL;
2395 :
2396 0 : ActivePS::Create(aLock, entries, interval, aFeatures, aFilters, aFilterCount);
2397 :
2398 : // Set up profiling for each registered thread, if appropriate.
2399 0 : Thread::tid_t tid = Thread::GetCurrentId();
2400 0 : const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(aLock);
2401 0 : for (uint32_t i = 0; i < liveThreads.size(); i++) {
2402 0 : ThreadInfo* info = liveThreads.at(i);
2403 :
2404 0 : if (ActivePS::ShouldProfileThread(aLock, info)) {
2405 0 : info->StartProfiling();
2406 0 : if (ActivePS::FeatureJS(aLock)) {
2407 0 : info->StartJSSampling();
2408 0 : if (info->ThreadId() == tid) {
2409 : // We can manually poll the current thread so it starts sampling
2410 : // immediately.
2411 0 : info->PollJSSampling();
2412 : }
2413 : }
2414 : }
2415 : }
2416 :
2417 : // Dead ThreadInfos are deleted in profiler_stop(), and dead ThreadInfos
2418 : // aren't saved when the profiler is inactive. Therefore the dead threads
2419 : // vector should be empty here.
2420 0 : MOZ_RELEASE_ASSERT(CorePS::DeadThreads(aLock).empty());
2421 :
2422 : #ifdef MOZ_TASK_TRACER
2423 : if (ActivePS::FeatureTaskTracer(aLock)) {
2424 : tasktracer::StartLogging();
2425 : }
2426 : #endif
2427 :
2428 : #if defined(GP_OS_android)
2429 : if (ActivePS::FeatureJava(aLock)) {
2430 : int javaInterval = interval;
2431 : // Java sampling doesn't accurately keep up with 1ms sampling.
2432 : if (javaInterval < 10) {
2433 : javaInterval = 10;
2434 : }
2435 : java::GeckoJavaSampler::Start(javaInterval, 1000);
2436 : }
2437 : #endif
2438 :
2439 : // At the very end, set up RacyFeatures.
2440 0 : RacyFeatures::SetActive(ActivePS::Features(aLock));
2441 0 : }
2442 :
2443 : void
2444 0 : profiler_start(int aEntries, double aInterval, uint32_t aFeatures,
2445 : const char** aFilters, uint32_t aFilterCount)
2446 : {
2447 0 : LOG("profiler_start");
2448 :
2449 :
2450 0 : SamplerThread* samplerThread = nullptr;
2451 : {
2452 0 : PSAutoLock lock(gPSMutex);
2453 :
2454 : // Initialize if necessary.
2455 0 : if (!CorePS::Exists()) {
2456 0 : profiler_init(nullptr);
2457 : }
2458 :
2459 : // Reset the current state if the profiler is running.
2460 0 : if (ActivePS::Exists(lock)) {
2461 0 : samplerThread = locked_profiler_stop(lock);
2462 : }
2463 :
2464 : locked_profiler_start(lock, aEntries, aInterval, aFeatures,
2465 0 : aFilters, aFilterCount);
2466 : }
2467 :
2468 : // We do these operations with gPSMutex unlocked. The comments in
2469 : // profiler_stop() explain why.
2470 0 : if (samplerThread) {
2471 0 : ProfilerParent::ProfilerStopped();
2472 0 : NotifyObservers("profiler-stopped");
2473 0 : delete samplerThread;
2474 : }
2475 : NotifyProfilerStarted(aEntries, aInterval, aFeatures,
2476 0 : aFilters, aFilterCount);
2477 0 : }
2478 :
2479 : static MOZ_MUST_USE SamplerThread*
2480 0 : locked_profiler_stop(PSLockRef aLock)
2481 : {
2482 0 : LOG("locked_profiler_stop");
2483 :
2484 0 : MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
2485 :
2486 : // At the very start, clear RacyFeatures.
2487 0 : RacyFeatures::SetInactive();
2488 :
2489 : #ifdef MOZ_TASK_TRACER
2490 : if (ActivePS::FeatureTaskTracer(aLock)) {
2491 : tasktracer::StopLogging();
2492 : }
2493 : #endif
2494 :
2495 : // Stop sampling live threads.
2496 0 : Thread::tid_t tid = Thread::GetCurrentId();
2497 0 : CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(aLock);
2498 0 : for (uint32_t i = 0; i < liveThreads.size(); i++) {
2499 0 : ThreadInfo* info = liveThreads.at(i);
2500 0 : if (info->IsBeingProfiled()) {
2501 0 : if (ActivePS::FeatureJS(aLock)) {
2502 0 : info->StopJSSampling();
2503 0 : if (info->ThreadId() == tid) {
2504 : // We can manually poll the current thread so it stops profiling
2505 : // immediately.
2506 0 : info->PollJSSampling();
2507 : }
2508 : }
2509 0 : info->StopProfiling();
2510 : }
2511 : }
2512 :
2513 : // This is where we destroy the ThreadInfos for all dead threads.
2514 0 : CorePS::ThreadVector& deadThreads = CorePS::DeadThreads(aLock);
2515 0 : while (deadThreads.size() > 0) {
2516 0 : delete deadThreads.back();
2517 0 : deadThreads.pop_back();
2518 : }
2519 :
2520 : // The Stop() call doesn't actually stop Run(); that happens in this
2521 : // function's caller when the sampler thread is destroyed. Stop() just gives
2522 : // the SamplerThread a chance to do some cleanup with gPSMutex locked.
2523 0 : SamplerThread* samplerThread = ActivePS::Destroy(aLock);
2524 0 : samplerThread->Stop(aLock);
2525 :
2526 0 : return samplerThread;
2527 : }
2528 :
2529 : void
2530 0 : profiler_stop()
2531 : {
2532 0 : LOG("profiler_stop");
2533 :
2534 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2535 :
2536 : SamplerThread* samplerThread;
2537 : {
2538 0 : PSAutoLock lock(gPSMutex);
2539 :
2540 0 : if (!ActivePS::Exists(lock)) {
2541 0 : return;
2542 : }
2543 :
2544 0 : samplerThread = locked_profiler_stop(lock);
2545 : }
2546 :
2547 : // We notify observers with gPSMutex unlocked. Otherwise we might get a
2548 : // deadlock, if code run by these functions calls a profiler function that
2549 : // locks gPSMutex, for example when it wants to insert a marker.
2550 : // (This has been seen in practise in bug 1346356, when we were still firing
2551 : // these notifications synchronously.)
2552 0 : ProfilerParent::ProfilerStopped();
2553 0 : NotifyObservers("profiler-stopped");
2554 :
2555 : // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
2556 : // would be waiting here with gPSMutex locked for SamplerThread::Run() to
2557 : // return so the join operation within the destructor can complete, but Run()
2558 : // needs to lock gPSMutex to return.
2559 : //
2560 : // Because this call occurs with gPSMutex unlocked, it -- including the final
2561 : // iteration of Run()'s loop -- must be able detect deactivation and return
2562 : // in a way that's safe with respect to other gPSMutex-locking operations
2563 : // that may have occurred in the meantime.
2564 0 : delete samplerThread;
2565 : }
2566 :
2567 : bool
2568 0 : profiler_is_paused()
2569 : {
2570 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2571 :
2572 0 : PSAutoLock lock(gPSMutex);
2573 :
2574 0 : if (!ActivePS::Exists(lock)) {
2575 0 : return false;
2576 : }
2577 :
2578 0 : return ActivePS::IsPaused(lock);
2579 : }
2580 :
2581 : void
2582 0 : profiler_pause()
2583 : {
2584 0 : LOG("profiler_pause");
2585 :
2586 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2587 :
2588 : {
2589 0 : PSAutoLock lock(gPSMutex);
2590 :
2591 0 : if (!ActivePS::Exists(lock)) {
2592 0 : return;
2593 : }
2594 :
2595 0 : ActivePS::SetIsPaused(lock, true);
2596 : }
2597 :
2598 : // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
2599 0 : ProfilerParent::ProfilerPaused();
2600 0 : NotifyObservers("profiler-paused");
2601 : }
2602 :
2603 : void
2604 0 : profiler_resume()
2605 : {
2606 0 : LOG("profiler_resume");
2607 :
2608 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2609 :
2610 : {
2611 0 : PSAutoLock lock(gPSMutex);
2612 :
2613 0 : if (!ActivePS::Exists(lock)) {
2614 0 : return;
2615 : }
2616 :
2617 0 : ActivePS::SetIsPaused(lock, false);
2618 : }
2619 :
2620 : // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
2621 0 : ProfilerParent::ProfilerResumed();
2622 0 : NotifyObservers("profiler-resumed");
2623 : }
2624 :
2625 : bool
2626 352 : profiler_feature_active(uint32_t aFeature)
2627 : {
2628 : // This function runs both on and off the main thread.
2629 :
2630 352 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2631 :
2632 : // This function is hot enough that we use RacyFeatures, not ActivePS.
2633 352 : return RacyFeatures::IsActiveWithFeature(aFeature);
2634 : }
2635 :
2636 : bool
2637 541 : profiler_is_active()
2638 : {
2639 : // This function runs both on and off the main thread.
2640 :
2641 541 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2642 :
2643 : // This function is hot enough that we use RacyFeatures, notActivePS.
2644 541 : return RacyFeatures::IsActive();
2645 : }
2646 :
2647 : void
2648 72 : profiler_register_thread(const char* aName, void* aGuessStackTop)
2649 : {
2650 72 : DEBUG_LOG("profiler_register_thread(%s)", aName);
2651 :
2652 72 : MOZ_RELEASE_ASSERT(!NS_IsMainThread());
2653 72 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2654 :
2655 144 : PSAutoLock lock(gPSMutex);
2656 :
2657 72 : void* stackTop = GetStackTop(aGuessStackTop);
2658 72 : locked_register_thread(lock, aName, stackTop);
2659 72 : }
2660 :
2661 : void
2662 1 : profiler_unregister_thread()
2663 : {
2664 1 : MOZ_RELEASE_ASSERT(!NS_IsMainThread());
2665 1 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2666 :
2667 2 : PSAutoLock lock(gPSMutex);
2668 :
2669 : // We don't call ThreadInfo::StopJSSampling() here; there's no point doing
2670 : // that for a JS thread that is in the process of disappearing.
2671 :
2672 : int i;
2673 1 : ThreadInfo* info = FindLiveThreadInfo(lock, &i);
2674 1 : MOZ_RELEASE_ASSERT(info == TLSInfo::Info(lock));
2675 1 : if (info) {
2676 1 : DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
2677 1 : if (ActivePS::Exists(lock) && info->IsBeingProfiled()) {
2678 0 : CorePS::DeadThreads(lock).push_back(info);
2679 : } else {
2680 1 : delete info;
2681 : }
2682 1 : CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(lock);
2683 1 : liveThreads.erase(liveThreads.begin() + i);
2684 :
2685 : // Whether or not we just destroyed the ThreadInfo or transferred it to the
2686 : // dead thread vector, we no longer need to access it via TLS.
2687 1 : TLSInfo::SetInfo(lock, nullptr);
2688 :
2689 : } else {
2690 : // There are two ways FindLiveThreadInfo() might have failed.
2691 : //
2692 : // - TLSInfo::Init() failed in locked_register_thread().
2693 : //
2694 : // - We've already called profiler_unregister_thread() for this thread.
2695 : // (Whether or not it should, this does happen in practice.)
2696 : //
2697 : // Either way, TLSInfo should be empty.
2698 0 : MOZ_RELEASE_ASSERT(!TLSInfo::Info(lock));
2699 : }
2700 1 : }
2701 :
2702 : void
2703 2656 : profiler_thread_sleep()
2704 : {
2705 : // This function runs both on and off the main thread.
2706 :
2707 2656 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2708 :
2709 2656 : RacyThreadInfo* racyInfo = TLSInfo::RacyInfo();
2710 2657 : if (!racyInfo) {
2711 0 : return;
2712 : }
2713 :
2714 2657 : racyInfo->SetSleeping();
2715 : }
2716 :
2717 : void
2718 2652 : profiler_thread_wake()
2719 : {
2720 : // This function runs both on and off the main thread.
2721 :
2722 2652 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2723 :
2724 2652 : RacyThreadInfo* racyInfo = TLSInfo::RacyInfo();
2725 2652 : if (!racyInfo) {
2726 0 : return;
2727 : }
2728 :
2729 2652 : racyInfo->SetAwake();
2730 : }
2731 :
2732 : bool
2733 0 : profiler_thread_is_sleeping()
2734 : {
2735 0 : MOZ_RELEASE_ASSERT(NS_IsMainThread());
2736 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2737 :
2738 0 : RacyThreadInfo* racyInfo = TLSInfo::RacyInfo();
2739 0 : if (!racyInfo) {
2740 0 : return false;
2741 : }
2742 0 : return racyInfo->IsSleeping();
2743 : }
2744 :
2745 : void
2746 27 : profiler_js_interrupt_callback()
2747 : {
2748 : // This function runs on JS threads being sampled.
2749 :
2750 27 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2751 :
2752 54 : PSAutoLock lock(gPSMutex);
2753 :
2754 27 : ThreadInfo* info = TLSInfo::Info(lock);
2755 27 : if (!info) {
2756 0 : return;
2757 : }
2758 :
2759 27 : info->PollJSSampling();
2760 : }
2761 :
2762 : double
2763 0 : profiler_time()
2764 : {
2765 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2766 :
2767 0 : TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
2768 0 : return delta.ToMilliseconds();
2769 : }
2770 :
2771 : UniqueProfilerBacktrace
2772 83 : profiler_get_backtrace()
2773 : {
2774 83 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2775 :
2776 166 : PSAutoLock lock(gPSMutex);
2777 :
2778 83 : if (!ActivePS::Exists(lock) || ActivePS::FeaturePrivacy(lock)) {
2779 83 : return nullptr;
2780 : }
2781 :
2782 0 : ThreadInfo* info = TLSInfo::Info(lock);
2783 0 : if (!info) {
2784 0 : MOZ_ASSERT(info);
2785 0 : return nullptr;
2786 : }
2787 :
2788 0 : Thread::tid_t tid = Thread::GetCurrentId();
2789 :
2790 0 : TimeStamp now = TimeStamp::Now();
2791 :
2792 0 : Registers regs;
2793 : #if defined(HAVE_NATIVE_UNWIND)
2794 0 : regs.SyncPopulate();
2795 : #else
2796 : regs.Clear();
2797 : #endif
2798 :
2799 : // 1000 should be plenty for a single backtrace.
2800 0 : auto buffer = MakeUnique<ProfileBuffer>(1000);
2801 :
2802 0 : DoSyncSample(lock, *info, now, regs, *buffer.get());
2803 :
2804 : return UniqueProfilerBacktrace(
2805 0 : new ProfilerBacktrace("SyncProfile", tid, Move(buffer)));
2806 : }
2807 :
2808 : void
2809 0 : ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace)
2810 : {
2811 0 : delete aBacktrace;
2812 0 : }
2813 :
2814 : // Fill the output buffer with the following pattern:
2815 : // "Label 1" "\0" "Label 2" "\0" ... "Label N" "\0" "\0"
2816 : // TODO: use the unwinder instead of pseudo stack.
2817 : void
2818 0 : profiler_get_backtrace_noalloc(char *output, size_t outputSize)
2819 : {
2820 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2821 :
2822 0 : MOZ_ASSERT(outputSize >= 2);
2823 0 : char *bound = output + outputSize - 2;
2824 0 : output[0] = output[1] = '\0';
2825 :
2826 0 : PSAutoLock lock(gPSMutex);
2827 :
2828 0 : if (!ActivePS::Exists(lock)) {
2829 0 : return;
2830 : }
2831 :
2832 0 : PseudoStack* pseudoStack = TLSInfo::Stack();
2833 0 : if (!pseudoStack) {
2834 0 : return;
2835 : }
2836 :
2837 0 : bool includeDynamicString = !ActivePS::FeaturePrivacy(lock);
2838 :
2839 0 : js::ProfileEntry* pseudoEntries = pseudoStack->entries;
2840 0 : uint32_t pseudoCount = pseudoStack->stackSize();
2841 :
2842 0 : for (uint32_t i = 0; i < pseudoCount; i++) {
2843 0 : const char* label = pseudoEntries[i].label();
2844 : const char* dynamicString =
2845 0 : includeDynamicString ? pseudoEntries[i].dynamicString() : nullptr;
2846 0 : size_t labelLength = strlen(label);
2847 0 : if (dynamicString) {
2848 : // Put the label, maybe a space, and the dynamic string into output.
2849 0 : size_t spaceLength = label[0] == '\0' ? 0 : 1;
2850 0 : size_t dynamicStringLength = strlen(dynamicString);
2851 0 : if (output + labelLength + spaceLength + dynamicStringLength >= bound) {
2852 0 : break;
2853 : }
2854 0 : strcpy(output, label);
2855 0 : output += labelLength;
2856 0 : if (spaceLength != 0) {
2857 0 : *output++ = ' ';
2858 : }
2859 0 : strcpy(output, dynamicString);
2860 0 : output += dynamicStringLength;
2861 : } else {
2862 : // Only put the label into output.
2863 0 : if (output + labelLength >= bound) {
2864 0 : break;
2865 : }
2866 0 : strcpy(output, label);
2867 0 : output += labelLength;
2868 : }
2869 0 : *output++ = '\0';
2870 0 : *output = '\0';
2871 : }
2872 : }
2873 :
2874 : static void
2875 0 : racy_profiler_add_marker(const char* aMarkerName,
2876 : UniquePtr<ProfilerMarkerPayload> aPayload)
2877 : {
2878 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2879 :
2880 : // We don't assert that RacyFeatures::IsActiveWithoutPrivacy() is true here,
2881 : // because it's possible that the result has changed since we tested it in
2882 : // the caller.
2883 : //
2884 : // Because of this imprecision it's possible to miss a marker or record one
2885 : // we shouldn't. Either way is not a big deal.
2886 :
2887 0 : RacyThreadInfo* racyInfo = TLSInfo::RacyInfo();
2888 0 : if (!racyInfo) {
2889 0 : return;
2890 : }
2891 :
2892 0 : TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull())
2893 0 : ? aPayload->GetStartTime()
2894 0 : : TimeStamp::Now();
2895 0 : TimeDuration delta = origin - CorePS::ProcessStartTime();
2896 0 : racyInfo->AddPendingMarker(aMarkerName, Move(aPayload),
2897 0 : delta.ToMilliseconds());
2898 : }
2899 :
2900 : void
2901 66 : profiler_add_marker(const char* aMarkerName,
2902 : UniquePtr<ProfilerMarkerPayload> aPayload)
2903 : {
2904 66 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2905 :
2906 : // This function is hot enough that we use RacyFeatures, not ActivePS.
2907 66 : if (!RacyFeatures::IsActiveWithoutPrivacy()) {
2908 66 : return;
2909 : }
2910 :
2911 0 : racy_profiler_add_marker(aMarkerName, Move(aPayload));
2912 : }
2913 :
2914 : void
2915 66 : profiler_add_marker(const char* aMarkerName)
2916 : {
2917 66 : profiler_add_marker(aMarkerName, nullptr);
2918 66 : }
2919 :
2920 : void
2921 701 : profiler_tracing(const char* aCategory, const char* aMarkerName,
2922 : TracingKind aKind)
2923 : {
2924 701 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2925 :
2926 : // This function is hot enough that we use RacyFeatures, notActivePS.
2927 701 : if (!RacyFeatures::IsActiveWithoutPrivacy()) {
2928 701 : return;
2929 : }
2930 :
2931 0 : auto payload = MakeUnique<TracingMarkerPayload>(aCategory, aKind);
2932 0 : racy_profiler_add_marker(aMarkerName, Move(payload));
2933 : }
2934 :
2935 : void
2936 41 : profiler_tracing(const char* aCategory, const char* aMarkerName,
2937 : UniqueProfilerBacktrace aCause, TracingKind aKind)
2938 : {
2939 41 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2940 :
2941 : // This function is hot enough that we use RacyFeatures, notActivePS.
2942 41 : if (!RacyFeatures::IsActiveWithoutPrivacy()) {
2943 41 : return;
2944 : }
2945 :
2946 : auto payload =
2947 0 : MakeUnique<TracingMarkerPayload>(aCategory, aKind, Move(aCause));
2948 0 : racy_profiler_add_marker(aMarkerName, Move(payload));
2949 : }
2950 :
2951 : PseudoStack*
2952 0 : profiler_get_pseudo_stack()
2953 : {
2954 0 : return TLSInfo::Stack();
2955 : }
2956 :
2957 : void
2958 4 : profiler_set_js_context(JSContext* aCx)
2959 : {
2960 4 : MOZ_ASSERT(aCx);
2961 :
2962 8 : PSAutoLock lock(gPSMutex);
2963 :
2964 4 : ThreadInfo* info = TLSInfo::Info(lock);
2965 4 : if (!info) {
2966 0 : return;
2967 : }
2968 :
2969 4 : info->SetJSContext(aCx);
2970 : }
2971 :
2972 : void
2973 0 : profiler_clear_js_context()
2974 : {
2975 0 : MOZ_RELEASE_ASSERT(CorePS::Exists());
2976 :
2977 0 : PSAutoLock lock(gPSMutex);
2978 :
2979 0 : ThreadInfo* info = TLSInfo::Info(lock);
2980 0 : if (!info || !info->mContext) {
2981 0 : return;
2982 : }
2983 :
2984 : // On JS shut down, flush the current buffer as stringifying JIT samples
2985 : // requires a live JSContext.
2986 :
2987 0 : if (ActivePS::Exists(lock)) {
2988 : // Flush this thread's ThreadInfo, if it is being profiled.
2989 0 : if (info->IsBeingProfiled()) {
2990 0 : info->FlushSamplesAndMarkers(CorePS::ProcessStartTime(),
2991 0 : ActivePS::Buffer(lock));
2992 : }
2993 : }
2994 :
2995 : // We don't call info->StopJSSampling() here; there's no point doing that for
2996 : // a JS thread that is in the process of disappearing.
2997 :
2998 0 : info->mContext = nullptr;
2999 : }
3000 :
3001 : int
3002 0 : profiler_current_thread_id()
3003 : {
3004 0 : return Thread::GetCurrentId();
3005 : }
3006 :
3007 : // NOTE: The callback function passed in will be called while the target thread
3008 : // is paused. Doing stuff in this function like allocating which may try to
3009 : // claim locks is a surefire way to deadlock.
3010 : void
3011 0 : profiler_suspend_and_sample_thread(
3012 : int aThreadId,
3013 : const std::function<ProfilerStackCallback>& aCallback,
3014 : bool aSampleNative /* = true */)
3015 : {
3016 : // Allocate the space for the native stack
3017 0 : NativeStack nativeStack;
3018 :
3019 : // Lock the profiler mutex
3020 0 : PSAutoLock lock(gPSMutex);
3021 :
3022 0 : const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(lock);
3023 0 : for (uint32_t i = 0; i < liveThreads.size(); i++) {
3024 0 : ThreadInfo* info = liveThreads.at(i);
3025 :
3026 0 : if (info->ThreadId() == aThreadId) {
3027 : // Suspend, sample, and then resume the target thread.
3028 0 : Sampler sampler(lock);
3029 0 : sampler.SuspendAndSampleAndResumeThread(lock, *info,
3030 0 : [&](const Registers& aRegs) {
3031 : // The target thread is now suspended. Collect a native backtrace, and
3032 : // call the callback.
3033 : #if defined(HAVE_NATIVE_UNWIND)
3034 0 : if (aSampleNative) {
3035 0 : DoNativeBacktrace(lock, *info, aRegs, nativeStack);
3036 : }
3037 : #endif
3038 0 : aCallback(nativeStack.mPCs, nativeStack.mCount, info->IsMainThread());
3039 0 : });
3040 :
3041 : // NOTE: Make sure to disable the sampler before it is destroyed, in case
3042 : // the profiler is running at the same time.
3043 0 : sampler.Disable(lock);
3044 0 : break;
3045 : }
3046 : }
3047 0 : }
3048 :
3049 : // END externally visible functions
3050 : ////////////////////////////////////////////////////////////////////////
|