/src/hermes/lib/VM/Profiler/SamplingProfilerSampler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #include "SamplingProfilerSampler.h" |
9 | | |
10 | | #if HERMESVM_SAMPLING_PROFILER_AVAILABLE |
11 | | |
12 | | #include "hermes/VM/Callable.h" |
13 | | #include "hermes/VM/HostModel.h" |
14 | | #include "hermes/VM/Runtime.h" |
15 | | #include "hermes/VM/RuntimeModule-inline.h" |
16 | | #include "hermes/VM/StackFrame-inline.h" |
17 | | |
18 | | #include "llvh/Support/Compiler.h" |
19 | | |
20 | | #include "ChromeTraceSerializer.h" |
21 | | |
22 | | #include <fcntl.h> |
23 | | #include <cassert> |
24 | | #include <chrono> |
25 | | #include <cmath> |
26 | | #include <csignal> |
27 | | #include <random> |
28 | | #include <thread> |
29 | | |
30 | | #if defined(_WINDOWS) |
31 | | #include <windows.h> |
32 | | // Must be included after windows.h |
33 | | #include <mmsystem.h> |
34 | | #include "llvh/ADT/ScopeExit.h" |
35 | | #endif |
36 | | |
37 | | namespace hermes { |
38 | | namespace vm { |
39 | | namespace sampling_profiler { |
40 | | |
41 | 0 | Sampler::~Sampler() = default; |
42 | | |
43 | 0 | void Sampler::registerRuntime(SamplingProfiler *profiler) { |
44 | 0 | std::lock_guard<std::mutex> lockGuard(profilerLock_); |
45 | 0 | profilers_.insert(profiler); |
46 | 0 | platformRegisterRuntime(profiler); |
47 | 0 | } |
48 | | |
49 | 0 | void Sampler::unregisterRuntime(SamplingProfiler *profiler) { |
50 | 0 | std::lock_guard<std::mutex> lockGuard(profilerLock_); |
51 | 0 | bool succeed = profilers_.erase(profiler); |
52 | | // TODO: should we allow recursive style |
53 | | // register/register -> unregister/unregister call? |
54 | 0 | assert(succeed && "How can runtime not registered yet?"); |
55 | 0 | (void)succeed; |
56 | 0 | platformUnregisterRuntime(profiler); |
57 | 0 | } |
58 | | |
59 | 0 | bool Sampler::sampleStacks() { |
60 | 0 | for (SamplingProfiler *localProfiler : profilers_) { |
61 | 0 | std::lock_guard<std::mutex> lk(localProfiler->runtimeDataLock_); |
62 | 0 | if (!sampleStack(localProfiler)) { |
63 | 0 | return false; |
64 | 0 | } |
65 | 0 | platformPostSampleStack(localProfiler); |
66 | 0 | } |
67 | 0 | return true; |
68 | 0 | } |
69 | | |
70 | 0 | bool Sampler::sampleStack(SamplingProfiler *localProfiler) { |
71 | 0 | if (localProfiler->suspendCount_ > 0) { |
72 | | // Sampling profiler is suspended. Copy pre-captured stack instead without |
73 | | // interrupting the VM thread. |
74 | 0 | if (localProfiler->preSuspendStackDepth_ > 0) { |
75 | 0 | sampleStorage_ = localProfiler->preSuspendStackStorage_; |
76 | 0 | sampledStackDepth_ = localProfiler->preSuspendStackDepth_; |
77 | 0 | } else { |
78 | | // This suspension didn't record a stack trace. For example, a GC (like |
79 | | // mallocGC) did not record JS stack. |
80 | | // TODO: fix this for all cases. |
81 | 0 | sampledStackDepth_ = 0; |
82 | 0 | } |
83 | 0 | } else { |
84 | | // Ensure there are no allocations in the signal handler by keeping ample |
85 | | // reserved space. |
86 | 0 | localProfiler->domains_.reserve( |
87 | 0 | localProfiler->domains_.size() + SamplingProfiler::kMaxStackDepth); |
88 | 0 | size_t domainCapacityBefore = localProfiler->domains_.capacity(); |
89 | 0 | (void)domainCapacityBefore; |
90 | | |
91 | | // Ditto for native functions. |
92 | 0 | localProfiler->nativeFunctions_.reserve( |
93 | 0 | localProfiler->nativeFunctions_.size() + |
94 | 0 | SamplingProfiler::kMaxStackDepth); |
95 | 0 | size_t nativeFunctionsCapacityBefore = |
96 | 0 | localProfiler->nativeFunctions_.capacity(); |
97 | 0 | (void)nativeFunctionsCapacityBefore; |
98 | |
|
99 | 0 | if (!platformSuspendVMAndWalkStack(localProfiler)) { |
100 | 0 | return false; |
101 | 0 | } |
102 | | |
103 | 0 | assert( |
104 | 0 | localProfiler->domains_.capacity() == domainCapacityBefore && |
105 | 0 | "Must not dynamically allocate in signal handler"); |
106 | | |
107 | 0 | assert( |
108 | 0 | localProfiler->nativeFunctions_.capacity() == |
109 | 0 | nativeFunctionsCapacityBefore && |
110 | 0 | "Must not dynamically allocate in signal handler"); |
111 | 0 | } |
112 | | |
113 | 0 | assert( |
114 | 0 | sampledStackDepth_ <= sampleStorage_.stack.size() && |
115 | 0 | "How can we sample more frames than storage?"); |
116 | 0 | localProfiler->sampledStacks_.emplace_back( |
117 | 0 | sampleStorage_.tid, |
118 | 0 | sampleStorage_.timeStamp, |
119 | 0 | sampleStorage_.stack.begin(), |
120 | 0 | sampleStorage_.stack.begin() + sampledStackDepth_); |
121 | 0 | return true; |
122 | 0 | } |
123 | | |
124 | 0 | void Sampler::walkRuntimeStack(SamplingProfiler *profiler) { |
125 | 0 | assert( |
126 | 0 | profiler->suspendCount_ == 0 && |
127 | 0 | "Shouldn't interrupt the VM thread when the sampling profiler is " |
128 | 0 | "suspended."); |
129 | | |
130 | | // Sampling stack will touch GC objects(like closure) so only do so if heap |
131 | | // is valid. |
132 | 0 | auto &curThreadRuntime = profiler->runtime_; |
133 | 0 | assert( |
134 | 0 | !curThreadRuntime.getHeap().inGC() && |
135 | 0 | "sampling profiler should be suspended before GC"); |
136 | 0 | (void)curThreadRuntime; |
137 | 0 | sampledStackDepth_ = |
138 | 0 | profiler->walkRuntimeStack(sampleStorage_, SamplingProfiler::InLoom::No); |
139 | 0 | } |
140 | | |
141 | 0 | void Sampler::timerLoop(double meanHzFreq) { |
142 | 0 | oscompat::set_thread_name("hermes-sampling-profiler"); |
143 | |
|
144 | 0 | std::random_device rd{}; |
145 | 0 | std::mt19937 gen{rd()}; |
146 | | // The amount of time that is spent sleeping comes from a normal distribution, |
147 | | // to avoid the case where the timer thread samples a stack at a predictable |
148 | | // period. |
149 | 0 | double interval = 1.0 / meanHzFreq; |
150 | 0 | std::normal_distribution<> distribution{interval, interval / 2}; |
151 | 0 | std::unique_lock<std::mutex> uniqueLock(profilerLock_); |
152 | |
|
153 | | #if defined(_WINDOWS) |
154 | | // By default, timer resolution is approximately 64Hz on Windows, so if the |
155 | | // meanHzFreq parameter is greater than 64, sampling will occur at a lower |
156 | | // frequency than desired. Setting the period to 1 is the minimum useful |
157 | | // value, resulting in timer resolution of roughly 1 millsecond. |
158 | | timeBeginPeriod(1); |
159 | | auto restorePeriod = llvh::make_scope_exit([] { timeEndPeriod(1); }); |
160 | | #endif |
161 | 0 | while (enabled_) { |
162 | 0 | if (!sampleStacks()) { |
163 | 0 | return; |
164 | 0 | } |
165 | | |
166 | 0 | double dur = std::fabs(distribution(gen)); |
167 | 0 | enabledCondVar_.wait_for( |
168 | 0 | uniqueLock, std::chrono::duration<double>(dur), [this]() { |
169 | 0 | return !enabled_; |
170 | 0 | }); |
171 | 0 | } |
172 | 0 | } |
173 | | |
174 | 0 | bool Sampler::enabled() { |
175 | 0 | std::lock_guard<std::mutex> lockGuard(profilerLock_); |
176 | 0 | return enabled_; |
177 | 0 | } |
178 | | |
179 | 0 | bool Sampler::enable(double meanHzFreq) { |
180 | 0 | std::lock_guard<std::mutex> lockGuard(profilerLock_); |
181 | 0 | if (enabled_) { |
182 | 0 | return true; |
183 | 0 | } |
184 | 0 | if (!platformEnable()) { |
185 | 0 | return false; |
186 | 0 | } |
187 | 0 | enabled_ = true; |
188 | | // Start timer thread. |
189 | 0 | timerThread_ = std::thread(&Sampler::timerLoop, this, meanHzFreq); |
190 | 0 | return true; |
191 | 0 | } |
192 | | |
193 | 0 | bool Sampler::disable() { |
194 | 0 | { |
195 | 0 | std::lock_guard<std::mutex> lockGuard(profilerLock_); |
196 | 0 | if (!enabled_) { |
197 | | // Already disabled. |
198 | 0 | return true; |
199 | 0 | } |
200 | 0 | if (!platformDisable()) { |
201 | 0 | return false; |
202 | 0 | } |
203 | | // Telling timer thread to exit. |
204 | 0 | enabled_ = false; |
205 | 0 | } |
206 | | // Notify the timer thread that it has been disabled. |
207 | 0 | enabledCondVar_.notify_all(); |
208 | | // Wait for timer thread to exit. This avoids the timer thread reading from |
209 | | // memory that is freed after a main thread exits. This is outside the lock |
210 | | // on profilerLock_ since the timer thread needs to acquire that lock. |
211 | 0 | timerThread_.join(); |
212 | 0 | return true; |
213 | 0 | } |
214 | | |
215 | | } // namespace sampling_profiler |
216 | | } // namespace vm |
217 | | } // namespace hermes |
218 | | |
219 | | #endif // HERMESVM_SAMPLING_PROFILER_AVAILABLE |