Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : /* This variant of nsIPerfMeasurement uses the perf_event interface
7 : * added in Linux 2.6.31. We key compilation of this file off the
8 : * existence of <linux/perf_event.h>.
9 : */
10 :
11 : #include <errno.h>
12 : #include <linux/perf_event.h>
13 : #include <string.h>
14 : #include <sys/ioctl.h>
15 : #include <sys/syscall.h>
16 : #include <unistd.h>
17 :
18 : #include "perf/jsperf.h"
19 :
20 : using namespace js;
21 :
22 : // As of July 2010, this system call has not been added to the
23 : // C library, so we have to provide our own wrapper function.
24 : // If this code runs on a kernel that does not implement the
25 : // system call (2.6.30 or older) nothing unpredictable will
26 : // happen - it will just always fail and return -1.
27 : static int
28 0 : sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu,
29 : int group_fd, unsigned long flags)
30 : {
31 0 : return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
32 : }
33 :
34 : namespace {
35 :
36 : using JS::PerfMeasurement;
37 : typedef PerfMeasurement::EventMask EventMask;
38 :
39 : // Additional state required by this implementation.
40 : struct Impl
41 : {
42 : // Each active counter corresponds to an open file descriptor.
43 : int f_cpu_cycles;
44 : int f_instructions;
45 : int f_cache_references;
46 : int f_cache_misses;
47 : int f_branch_instructions;
48 : int f_branch_misses;
49 : int f_bus_cycles;
50 : int f_page_faults;
51 : int f_major_page_faults;
52 : int f_context_switches;
53 : int f_cpu_migrations;
54 :
55 : // Counter group leader, for Start and Stop.
56 : int group_leader;
57 :
58 : // Whether counters are running.
59 : bool running;
60 :
61 : Impl();
62 : ~Impl();
63 :
64 : EventMask init(EventMask toMeasure);
65 : void start();
66 : void stop(PerfMeasurement* counters);
67 : };
68 :
69 : // Mapping from our event bitmask to codes passed into the kernel, and
70 : // to fields in the PerfMeasurement and PerfMeasurement::impl structures.
71 : static const struct
72 : {
73 : EventMask bit;
74 : uint32_t type;
75 : uint32_t config;
76 : uint64_t PerfMeasurement::* counter;
77 : int Impl::* fd;
78 : } kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
79 : #define HW(mask, constant, fieldname) \
80 : { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
81 : &PerfMeasurement::fieldname, &Impl::f_##fieldname }
82 : #define SW(mask, constant, fieldname) \
83 : { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
84 : &PerfMeasurement::fieldname, &Impl::f_##fieldname }
85 :
86 : HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles),
87 : HW(INSTRUCTIONS, INSTRUCTIONS, instructions),
88 : HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references),
89 : HW(CACHE_MISSES, CACHE_MISSES, cache_misses),
90 : HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
91 : HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses),
92 : HW(BUS_CYCLES, BUS_CYCLES, bus_cycles),
93 : SW(PAGE_FAULTS, PAGE_FAULTS, page_faults),
94 : SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults),
95 : SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches),
96 : SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations),
97 :
98 : #undef HW
99 : #undef SW
100 : };
101 :
102 0 : Impl::Impl()
103 : : f_cpu_cycles(-1),
104 : f_instructions(-1),
105 : f_cache_references(-1),
106 : f_cache_misses(-1),
107 : f_branch_instructions(-1),
108 : f_branch_misses(-1),
109 : f_bus_cycles(-1),
110 : f_page_faults(-1),
111 : f_major_page_faults(-1),
112 : f_context_switches(-1),
113 : f_cpu_migrations(-1),
114 : group_leader(-1),
115 0 : running(false)
116 : {
117 0 : }
118 :
119 0 : Impl::~Impl()
120 : {
121 : // Close all active counter descriptors. Take care to do the group
122 : // leader last (this may not be necessary, but it's unclear what
123 : // happens if you close the group leader out from under a group).
124 0 : for (const auto& slot : kSlots) {
125 0 : int fd = this->*(slot.fd);
126 0 : if (fd != -1 && fd != group_leader)
127 0 : close(fd);
128 : }
129 :
130 0 : if (group_leader != -1)
131 0 : close(group_leader);
132 0 : }
133 :
134 : EventMask
135 0 : Impl::init(EventMask toMeasure)
136 : {
137 0 : MOZ_ASSERT(group_leader == -1);
138 0 : if (!toMeasure)
139 0 : return EventMask(0);
140 :
141 0 : EventMask measured = EventMask(0);
142 : struct perf_event_attr attr;
143 0 : for (const auto& slot : kSlots) {
144 0 : if (!(toMeasure & slot.bit))
145 0 : continue;
146 :
147 0 : memset(&attr, 0, sizeof(attr));
148 0 : attr.size = sizeof(attr);
149 :
150 : // Set the type and config fields to indicate the counter we
151 : // want to enable. We want read format 0, and we're not using
152 : // sampling, so leave those fields unset.
153 0 : attr.type = slot.type;
154 0 : attr.config = slot.config;
155 :
156 : // If this will be the group leader it should start off
157 : // disabled. Otherwise it should start off enabled (but blocked
158 : // on the group leader).
159 0 : if (group_leader == -1)
160 0 : attr.disabled = 1;
161 :
162 : // The rest of the bit fields are really poorly documented.
163 : // For instance, I have *no idea* whether we should be setting
164 : // the inherit, inherit_stat, or task flags. I'm pretty sure
165 : // we do want to set mmap and comm, and not any of the ones I
166 : // haven't mentioned.
167 0 : attr.mmap = 1;
168 0 : attr.comm = 1;
169 :
170 0 : int fd = sys_perf_event_open(&attr,
171 : 0 /* trace self */,
172 : -1 /* on any cpu */,
173 : group_leader,
174 0 : 0 /* no flags presently defined */);
175 0 : if (fd == -1)
176 0 : continue;
177 :
178 0 : measured = EventMask(measured | slot.bit);
179 0 : this->*(slot.fd) = fd;
180 0 : if (group_leader == -1)
181 0 : group_leader = fd;
182 : }
183 0 : return measured;
184 : }
185 :
186 : void
187 0 : Impl::start()
188 : {
189 0 : if (running || group_leader == -1)
190 0 : return;
191 :
192 0 : running = true;
193 0 : ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
194 : }
195 :
196 : void
197 0 : Impl::stop(PerfMeasurement* counters)
198 : {
199 : // This scratch buffer is to ensure that we have read all the
200 : // available data, even if that's more than we expect.
201 : unsigned char buf[1024];
202 :
203 0 : if (!running || group_leader == -1)
204 0 : return;
205 :
206 0 : ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
207 0 : running = false;
208 :
209 : // read out and reset all the counter values
210 0 : for (const auto& slot : kSlots) {
211 0 : int fd = this->*(slot.fd);
212 0 : if (fd == -1)
213 0 : continue;
214 :
215 0 : if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) {
216 : uint64_t cur;
217 0 : memcpy(&cur, buf, sizeof(uint64_t));
218 0 : counters->*(slot.counter) += cur;
219 : }
220 :
221 : // Reset the counter regardless of whether the read did what
222 : // we expected.
223 0 : ioctl(fd, PERF_EVENT_IOC_RESET, 0);
224 : }
225 : }
226 :
227 : } // namespace
228 :
229 :
230 : namespace JS {
231 :
232 : #define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
233 :
234 0 : PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
235 0 : : impl(js_new<Impl>()),
236 0 : eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
237 : : EventMask(0)),
238 0 : cpu_cycles(initCtr(CPU_CYCLES)),
239 0 : instructions(initCtr(INSTRUCTIONS)),
240 0 : cache_references(initCtr(CACHE_REFERENCES)),
241 0 : cache_misses(initCtr(CACHE_MISSES)),
242 0 : branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
243 0 : branch_misses(initCtr(BRANCH_MISSES)),
244 0 : bus_cycles(initCtr(BUS_CYCLES)),
245 0 : page_faults(initCtr(PAGE_FAULTS)),
246 0 : major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
247 0 : context_switches(initCtr(CONTEXT_SWITCHES)),
248 0 : cpu_migrations(initCtr(CPU_MIGRATIONS))
249 : {
250 0 : }
251 :
252 : #undef initCtr
253 :
254 0 : PerfMeasurement::~PerfMeasurement()
255 : {
256 0 : js_delete(static_cast<Impl*>(impl));
257 0 : }
258 :
259 : void
260 0 : PerfMeasurement::start()
261 : {
262 0 : if (impl)
263 0 : static_cast<Impl*>(impl)->start();
264 0 : }
265 :
266 : void
267 0 : PerfMeasurement::stop()
268 : {
269 0 : if (impl)
270 0 : static_cast<Impl*>(impl)->stop(this);
271 0 : }
272 :
273 : void
274 0 : PerfMeasurement::reset()
275 : {
276 0 : for (const auto& slot : kSlots) {
277 0 : if (eventsMeasured & slot.bit)
278 0 : this->*(slot.counter) = 0;
279 : else
280 0 : this->*(slot.counter) = -1;
281 : }
282 0 : }
283 :
284 : bool
285 0 : PerfMeasurement::canMeasureSomething()
286 : {
287 : // Find out if the kernel implements the performance measurement
288 : // API. If it doesn't, syscall(__NR_perf_event_open, ...) is
289 : // guaranteed to return -1 and set errno to ENOSYS.
290 : //
291 : // We set up input parameters that should provoke an EINVAL error
292 : // from a kernel that does implement perf_event_open, but we can't
293 : // be sure it will (newer kernels might add more event types), so
294 : // we have to take care to close any valid fd it might return.
295 :
296 : struct perf_event_attr attr;
297 0 : memset(&attr, 0, sizeof(attr));
298 0 : attr.size = sizeof(attr);
299 0 : attr.type = PERF_TYPE_MAX;
300 :
301 0 : int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
302 0 : if (fd >= 0) {
303 0 : close(fd);
304 0 : return true;
305 : }
306 0 : return errno != ENOSYS;
307 : }
308 :
309 : } // namespace JS
|