1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2018, Matthew Macy
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/param.h>
30 #include <sys/cpuset.h>
31 #include <sys/event.h>
32 #include <sys/queue.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/sysctl.h>
36 #include <sys/time.h>
37 #include <sys/ttycom.h>
38 #include <sys/user.h>
39 #include <sys/wait.h>
40
41 #include <assert.h>
42 #include <curses.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <getopt.h>
47 #include <kvm.h>
48 #include <libgen.h>
49 #include <limits.h>
50 #include <locale.h>
51 #include <math.h>
52 #include <pmc.h>
53 #include <pmclog.h>
54 #include <regex.h>
55 #include <signal.h>
56 #include <stdarg.h>
57 #include <stdint.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <sysexits.h>
62 #include <unistd.h>
63
64 #include <libpmcstat.h>
65 #include "cmd_pmc.h"
66
67 /*
68 * Return the frequency of the kernel's statistics clock.
69 */
70 static int
getstathz(void)71 getstathz(void)
72 {
73 int mib[2];
74 size_t size;
75 struct clockinfo clockrate;
76
77 mib[0] = CTL_KERN;
78 mib[1] = KERN_CLOCKRATE;
79 size = sizeof clockrate;
80 if (sysctl(mib, 2, &clockrate, &size, NULL, 0) == -1)
81 err(1, "sysctl kern.clockrate");
82 return clockrate.stathz;
83 }
84
85 #define STAT_MODE_NPMCS 6
86 #define FIXED_MODE_NPMCS 2
87 static struct timespec before_ts;
88 #define CYCLES 0
89 #define INST 1
90 #define BR 2
91 #define IAP_START BR
92 #define BR_MISS 3
93 #define CACHE 4
94 #define CACHE_MISS 5
95 static const char *pmc_stat_mode_names[] = {
96 "cycles",
97 "instructions",
98 "branches",
99 "branch-misses",
100 "cache-references",
101 "cache-misses",
102 };
103
104 /* Common aliases for the desired stat counter */
105 static const char *pmc_stat_mode_aliases[] = {
106 "unhalted-cycles",
107 "instructions",
108 "branches",
109 "branch-mispredicts",
110 "LLC-REFERENCE",
111 "LLC-MISSES",
112 };
113
114 static int pmcstat_sockpair[NSOCKPAIRFD];
115
116 static void __dead2
usage(void)117 usage(void)
118 {
119 errx(EX_USAGE,
120 "\t get basic stats from command line program\n"
121 "\t -j <eventlist>, --events <eventlist> comma-delimited list of event specifiers\n"
122 );
123 }
124
125 static void
showtime(FILE * out,struct timespec * before,struct timespec * after,struct rusage * ru)126 showtime(FILE *out, struct timespec *before, struct timespec *after,
127 struct rusage *ru)
128 {
129 char decimal_point;
130 uint64_t real, user, sys;
131
132 (void)setlocale(LC_NUMERIC, "");
133 decimal_point = localeconv()->decimal_point[0];
134
135 after->tv_sec -= before->tv_sec;
136 after->tv_nsec -= before->tv_nsec;
137 if (after->tv_nsec < 0) {
138 after->tv_sec--;
139 after->tv_nsec += 1000000000;
140 }
141
142 real = (after->tv_sec * 1000000000 + after->tv_nsec) / 1000;
143 user = ru->ru_utime.tv_sec * 1000000 + ru->ru_utime.tv_usec;
144 sys = ru->ru_stime.tv_sec * 1000000 + ru->ru_stime.tv_usec;
145 fprintf(out, "%13jd%c%02ld real\t\t\t#\t%2.02f%% cpu\n",
146 (intmax_t)after->tv_sec, decimal_point,
147 after->tv_nsec / 10000000, 100 * (double)(sys + user + 1) / (double)(real + 1));
148 fprintf(out, "%13jd%c%02ld user\t\t\t#\t%2.2f%% cpu\n",
149 (intmax_t)ru->ru_utime.tv_sec, decimal_point,
150 ru->ru_utime.tv_usec / 10000, 100 * (double)(user + 1) / (double)(real + 1));
151 fprintf(out, "%13jd%c%02ld sys\t\t\t#\t%2.02f%% cpu\n",
152 (intmax_t)ru->ru_stime.tv_sec, decimal_point,
153 ru->ru_stime.tv_usec / 10000, 100 * (double)(sys + 1) / (double)(real + 1));
154 }
155
156 static const char *stat_mode_cntrs[STAT_MODE_NPMCS];
157 static const char *stat_mode_names[STAT_MODE_NPMCS];
158
159 static void
pmc_stat_setup_stat(int system_mode,const char * arg)160 pmc_stat_setup_stat(int system_mode, const char *arg)
161 {
162 const char *new_cntrs[STAT_MODE_NPMCS];
163 struct pmcstat_ev *ev;
164 char *counters, *counter;
165 int i, c, start, newcnt;
166 cpuset_t cpumask, rootmask;
167
168 if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
169 sizeof(rootmask), &rootmask) == -1)
170 err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs");
171 CPU_COPY(&rootmask, &cpumask);
172
173 if (system_mode && geteuid() != 0)
174 errx(EX_USAGE, "ERROR: system mode counters can only be used as root");
175 counters = NULL;
176 for (i = 0; i < STAT_MODE_NPMCS; i++) {
177 stat_mode_cntrs[i] = pmc_stat_mode_aliases[i];
178 stat_mode_names[i] = pmc_stat_mode_names[i];
179 }
180 if (arg) {
181 counters = strdup(arg);
182 newcnt = 0;
183 while ((counter = strsep(&counters, ",")) != NULL &&
184 newcnt < STAT_MODE_NPMCS - IAP_START) {
185 new_cntrs[newcnt++] = counter;
186 if (pmc_pmu_sample_rate_get(counter) == DEFAULT_SAMPLE_COUNT)
187 errx(EX_USAGE, "ERROR: %s not recognized on host", counter);
188 }
189 start = IAP_START + STAT_MODE_NPMCS - FIXED_MODE_NPMCS - newcnt;
190 for (i = 0; i < newcnt; i++) {
191 stat_mode_cntrs[start + i] = new_cntrs[i];
192 stat_mode_names[start + i] = new_cntrs[i];
193 }
194 }
195 if (system_mode)
196 pmc_args.pa_flags |= FLAG_HAS_SYSTEM_PMCS;
197 else
198 pmc_args.pa_flags |= FLAG_HAS_PROCESS_PMCS;
199 pmc_args.pa_flags |= FLAG_HAS_COUNTING_PMCS;
200 pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET;
201 pmc_args.pa_flags |= FLAG_HAS_PIPE;
202 pmc_args.pa_required |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET | FLAG_HAS_OUTPUT_LOGFILE;
203 pmc_args.pa_outputpath = strdup("/dev/null");
204 pmc_args.pa_logfd = pmcstat_open_log(pmc_args.pa_outputpath,
205 PMCSTAT_OPEN_FOR_WRITE);
206 for (i = 0; i < STAT_MODE_NPMCS; i++) {
207 if ((ev = malloc(sizeof(*ev))) == NULL)
208 errx(EX_SOFTWARE, "ERROR: Out of memory.");
209 if (system_mode)
210 ev->ev_mode = PMC_MODE_SC;
211 else
212 ev->ev_mode = PMC_MODE_TC;
213 ev->ev_spec = strdup(stat_mode_cntrs[i]);
214 if (ev->ev_spec == NULL)
215 errx(EX_SOFTWARE, "ERROR: Out of memory.");
216 c = strcspn(strdup(stat_mode_cntrs[i]), ", \t");
217 ev->ev_name = malloc(c + 1);
218 if (ev->ev_name == NULL)
219 errx(EX_SOFTWARE, "ERROR: Out of memory.");
220 (void)strncpy(ev->ev_name, stat_mode_cntrs[i], c);
221 *(ev->ev_name + c) = '\0';
222
223 ev->ev_count = -1;
224 ev->ev_flags = 0;
225 ev->ev_flags |= PMC_F_DESCENDANTS;
226 ev->ev_cumulative = 1;
227
228 ev->ev_saved = 0LL;
229 ev->ev_pmcid = PMC_ID_INVALID;
230 STAILQ_INSERT_TAIL(&pmc_args.pa_events, ev, ev_next);
231 if (system_mode) {
232 ev->ev_cpu = CPU_FFS(&cpumask) - 1;
233 CPU_CLR(ev->ev_cpu, &cpumask);
234 pmcstat_clone_event_descriptor(ev, &cpumask, &pmc_args);
235 CPU_SET(ev->ev_cpu, &cpumask);
236 } else
237 ev->ev_cpu = PMC_CPU_ANY;
238
239 }
240 if (clock_gettime(CLOCK_MONOTONIC, &before_ts))
241 err(1, "clock_gettime");
242 }
243
244 static void
pmc_stat_print_stat(struct rusage * ru)245 pmc_stat_print_stat(struct rusage *ru)
246 {
247 struct pmcstat_ev *ev;
248 struct timespec after;
249 uint64_t cvals[STAT_MODE_NPMCS];
250 uint64_t ticks, value;
251 int hz, i;
252
253 if (ru) {
254 hz = getstathz();
255 ticks = hz * (ru->ru_utime.tv_sec + ru->ru_stime.tv_sec) +
256 hz * (ru->ru_utime.tv_usec + ru->ru_stime.tv_usec) / 1000000;
257 if (clock_gettime(CLOCK_MONOTONIC, &after))
258 err(1, "clock_gettime");
259 /*
260 * If our round-off on the tick calculation still puts us at 0,
261 * then always assume at least one tick.
262 */
263 if (ticks == 0)
264 ticks = 1;
265 fprintf(pmc_args.pa_printfile, "%16ld %s\t\t#\t%02.03f M/sec\n",
266 ru->ru_minflt, "page faults", ((double)ru->ru_minflt / (double)ticks) / hz);
267 fprintf(pmc_args.pa_printfile, "%16ld %s\t\t#\t%02.03f M/sec\n",
268 ru->ru_nvcsw, "voluntary csw", ((double)ru->ru_nvcsw / (double)ticks) / hz);
269 fprintf(pmc_args.pa_printfile, "%16ld %s\t#\t%02.03f M/sec\n",
270 ru->ru_nivcsw, "involuntary csw", ((double)ru->ru_nivcsw / (double)ticks) / hz);
271 }
272
273 bzero(&cvals, sizeof(cvals));
274 STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) {
275 if (pmc_read(ev->ev_pmcid, &value) < 0)
276 err(EX_OSERR, "ERROR: Cannot read pmc \"%s\"",
277 ev->ev_name);
278 for (i = 0; i < STAT_MODE_NPMCS; i++)
279 if (strcmp(ev->ev_name, stat_mode_cntrs[i]) == 0)
280 cvals[i] += value;
281 }
282
283 fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[CYCLES], stat_mode_names[CYCLES]);
284 fprintf(pmc_args.pa_printfile, "%16jd %s\t\t#\t%01.03f inst/cycle\n", (uintmax_t)cvals[INST], stat_mode_names[INST],
285 (double)cvals[INST] / cvals[CYCLES]);
286 fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[BR], stat_mode_names[BR]);
287 if (stat_mode_names[BR_MISS] == pmc_stat_mode_names[BR_MISS])
288 fprintf(pmc_args.pa_printfile, "%16jd %s\t\t#\t%.03f%%\n",
289 (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS],
290 100 * ((double)cvals[BR_MISS] / cvals[BR]));
291 else
292 fprintf(pmc_args.pa_printfile, "%16jd %s\n",
293 (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS]);
294 fprintf(pmc_args.pa_printfile, "%16jd %s%s", (uintmax_t)cvals[CACHE], stat_mode_names[CACHE],
295 stat_mode_names[CACHE] != pmc_stat_mode_names[CACHE] ? "\n" : "");
296 if (stat_mode_names[CACHE] == pmc_stat_mode_names[CACHE])
297 fprintf(pmc_args.pa_printfile, "\t#\t%.03f refs/inst\n",
298 ((double)cvals[CACHE] / cvals[INST]));
299 fprintf(pmc_args.pa_printfile, "%16jd %s%s", (uintmax_t)cvals[CACHE_MISS], stat_mode_names[CACHE_MISS],
300 stat_mode_names[CACHE_MISS] != pmc_stat_mode_names[CACHE_MISS] ? "\n" : "");
301 if (stat_mode_names[CACHE_MISS] == pmc_stat_mode_names[CACHE_MISS])
302 fprintf(pmc_args.pa_printfile, "\t\t#\t%.03f%%\n",
303 100 * ((double)cvals[CACHE_MISS] / cvals[CACHE]));
304
305 if (ru)
306 showtime(pmc_args.pa_printfile, &before_ts, &after, ru);
307 }
308
309 static struct option longopts[] = {
310 {"events", required_argument, NULL, 'j'},
311 {NULL, 0, NULL, 0}
312 };
313
314 static int
pmc_stat_internal(int argc,char ** argv,int system_mode)315 pmc_stat_internal(int argc, char **argv, int system_mode)
316 {
317 char *event, *r;
318 struct sigaction sa;
319 struct kevent kev;
320 struct rusage ru;
321 struct winsize ws;
322 struct pmcstat_ev *ev;
323 int c, option, runstate;
324 int waitstatus, ru_valid, do_debug;
325
326 do_debug = ru_valid = 0;
327 r = event = NULL;
328 while ((option = getopt_long(argc, argv, "dj:", longopts, NULL)) != -1) {
329 switch (option) {
330 case 'j':
331 r = event = strdup(optarg);
332 break;
333 case 'd':
334 do_debug = 1;
335 break;
336 case '?':
337 default:
338 usage();
339 }
340 }
341 pmc_args.pa_argc = (argc -= optind);
342 pmc_args.pa_argv = (argv += optind);
343 if (argc == 0)
344 usage();
345 pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE;
346 pmc_stat_setup_stat(system_mode, event);
347 free(r);
348 bzero(&ru, sizeof(ru));
349 EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
350 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
351 err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT");
352
353 EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
354 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
355 err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO");
356 EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, 1000, NULL);
357 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
358 err(EX_OSERR,
359 "ERROR: Cannot register kevent for timer");
360
361 STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) {
362 if (pmc_allocate(ev->ev_spec, ev->ev_mode,
363 ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid, ev->ev_count) < 0)
364 err(EX_OSERR,
365 "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"",
366 PMC_IS_SYSTEM_MODE(ev->ev_mode) ?
367 "system" : "process", ev->ev_spec);
368
369 if (PMC_IS_SAMPLING_MODE(ev->ev_mode) &&
370 pmc_set(ev->ev_pmcid, ev->ev_count) < 0)
371 err(EX_OSERR,
372 "ERROR: Cannot set sampling count for PMC \"%s\"",
373 ev->ev_name);
374 }
375
376 /*
377 * An exec() failure of a forked child is signalled by the
378 * child sending the parent a SIGCHLD. We don't register an
379 * actual signal handler for SIGCHLD, but instead use our
380 * kqueue to pick up the signal.
381 */
382 EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
383 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
384 err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD");
385
386 pmcstat_create_process(pmcstat_sockpair, &pmc_args, pmc_kq);
387
388 if (SLIST_EMPTY(&pmc_args.pa_targets))
389 errx(EX_DATAERR,
390 "ERROR: No matching target processes.");
391 if (pmc_args.pa_flags & FLAG_HAS_PROCESS_PMCS)
392 pmcstat_attach_pmcs(&pmc_args);
393
394 /* start the pmcs */
395 pmc_util_start_pmcs(&pmc_args);
396
397 /* start the (commandline) process if needed */
398 pmcstat_start_process(pmcstat_sockpair);
399
400 /* Handle SIGINT using the kqueue loop */
401 sa.sa_handler = SIG_IGN;
402 sa.sa_flags = 0;
403 (void)sigemptyset(&sa.sa_mask);
404
405 if (sigaction(SIGINT, &sa, NULL) < 0)
406 err(EX_OSERR, "ERROR: Cannot install signal handler");
407
408 /*
409 * loop till either the target process (if any) exits, or we
410 * are killed by a SIGINT or we reached the time duration.
411 */
412 runstate = PMCSTAT_RUNNING;
413 do {
414 if ((c = kevent(pmc_kq, NULL, 0, &kev, 1, NULL)) <= 0) {
415 if (errno != EINTR)
416 err(EX_OSERR, "ERROR: kevent failed");
417 else
418 continue;
419 }
420 if (kev.flags & EV_ERROR)
421 errc(EX_OSERR, kev.data, "ERROR: kevent failed");
422
423 switch (kev.filter) {
424 case EVFILT_PROC: /* target has exited */
425 if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) {
426 getrusage(RUSAGE_CHILDREN, &ru);
427 ru_valid = 1;
428 }
429 break;
430
431 case EVFILT_READ: /* log file data is present */
432 break;
433 case EVFILT_TIMER:
434 if (do_debug)
435 pmc_stat_print_stat(NULL);
436 break;
437 case EVFILT_SIGNAL:
438 if (kev.ident == SIGCHLD) {
439 /*
440 * The child process sends us a
441 * SIGCHLD if its exec() failed. We
442 * wait for it to exit and then exit
443 * ourselves.
444 */
445 (void)wait(&c);
446 runstate = PMCSTAT_FINISHED;
447 } else if (kev.ident == SIGIO) {
448 /*
449 * We get a SIGIO if a PMC loses all
450 * of its targets, or if logfile
451 * writes encounter an error.
452 */
453 if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) {
454 getrusage(RUSAGE_CHILDREN, &ru);
455 ru_valid = 1;
456 }
457 runstate = pmcstat_close_log(&pmc_args);
458 } else if (kev.ident == SIGINT) {
459 /* Kill the child process if we started it */
460 if (pmc_args.pa_flags & FLAG_HAS_COMMANDLINE)
461 pmc_util_kill_process(&pmc_args);
462 runstate = pmcstat_close_log(&pmc_args);
463 } else if (kev.ident == SIGWINCH) {
464 if (ioctl(fileno(pmc_args.pa_printfile),
465 TIOCGWINSZ, &ws) < 0)
466 err(EX_OSERR,
467 "ERROR: Cannot determine window size");
468 pmc_displayheight = ws.ws_row - 1;
469 pmc_displaywidth = ws.ws_col - 1;
470 } else
471 assert(0);
472
473 break;
474 }
475 } while (runstate != PMCSTAT_FINISHED);
476 if (!ru_valid)
477 warnx("couldn't get rusage");
478 pmc_stat_print_stat(&ru);
479 pmc_util_cleanup(&pmc_args);
480 return (0);
481 }
482
483 int
cmd_pmc_stat(int argc,char ** argv)484 cmd_pmc_stat(int argc, char **argv)
485 {
486 return (pmc_stat_internal(argc, argv, 0));
487 }
488
489 int
cmd_pmc_stat_system(int argc,char ** argv)490 cmd_pmc_stat_system(int argc, char **argv)
491 {
492 return (pmc_stat_internal(argc, argv, 1));
493 }
494