1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2018, Matthew Macy 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/cpuset.h> 34 #include <sys/event.h> 35 #include <sys/queue.h> 36 #include <sys/socket.h> 37 #include <sys/stat.h> 38 #include <sys/sysctl.h> 39 #include <sys/time.h> 40 #include <sys/ttycom.h> 41 #include <sys/user.h> 42 #include <sys/wait.h> 43 44 #include <assert.h> 45 #include <curses.h> 46 #include <err.h> 47 #include <errno.h> 48 #include <fcntl.h> 49 #include <getopt.h> 50 #include <kvm.h> 51 #include <libgen.h> 52 #include <limits.h> 53 #include <locale.h> 54 #include <math.h> 55 #include <pmc.h> 56 #include <pmclog.h> 57 #include <regex.h> 58 #include <signal.h> 59 #include <stdarg.h> 60 #include <stdint.h> 61 #include <stdio.h> 62 #include <stdlib.h> 63 #include <string.h> 64 #include <sysexits.h> 65 #include <unistd.h> 66 67 #include <libpmcstat.h> 68 #include "cmd_pmc.h" 69 70 /* 71 * Return the frequency of the kernel's statistics clock. 72 */ 73 static int 74 getstathz(void) 75 { 76 int mib[2]; 77 size_t size; 78 struct clockinfo clockrate; 79 80 mib[0] = CTL_KERN; 81 mib[1] = KERN_CLOCKRATE; 82 size = sizeof clockrate; 83 if (sysctl(mib, 2, &clockrate, &size, NULL, 0) == -1) 84 err(1, "sysctl kern.clockrate"); 85 return clockrate.stathz; 86 } 87 88 #define STAT_MODE_NPMCS 6 89 #define FIXED_MODE_NPMCS 2 90 static struct timespec before_ts; 91 #define CYCLES 0 92 #define INST 1 93 #define BR 2 94 #define IAP_START BR 95 #define BR_MISS 3 96 #define CACHE 4 97 #define CACHE_MISS 5 98 static const char *pmc_stat_mode_names[] = { 99 "cycles", 100 "instructions", 101 "branches", 102 "branch-misses", 103 "cache-references", 104 "cache-misses", 105 }; 106 107 /* Common aliases for the desired stat counter */ 108 static const char *pmc_stat_mode_aliases[] = { 109 "unhalted-cycles", 110 "instructions", 111 "branches", 112 "branch-mispredicts", 113 "LLC-REFERENCE", 114 "LLC-MISSES", 115 }; 116 117 static int pmcstat_sockpair[NSOCKPAIRFD]; 118 119 static void __dead2 120 usage(void) 121 { 122 errx(EX_USAGE, 123 "\t get basic stats from command line program\n" 124 "\t -j <eventlist>, --events <eventlist> comma-delimited list of event specifiers\n" 125 ); 126 } 127 128 static void 129 showtime(FILE *out, struct timespec *before, struct timespec *after, 130 struct rusage *ru) 131 { 132 char decimal_point; 133 uint64_t real, user, sys; 134 135 (void)setlocale(LC_NUMERIC, ""); 136 decimal_point = localeconv()->decimal_point[0]; 137 138 after->tv_sec -= before->tv_sec; 139 after->tv_nsec -= before->tv_nsec; 140 if (after->tv_nsec < 0) { 141 after->tv_sec--; 142 after->tv_nsec += 1000000000; 143 } 144 145 real = (after->tv_sec * 1000000000 + after->tv_nsec) / 1000; 146 user = ru->ru_utime.tv_sec * 1000000 + ru->ru_utime.tv_usec; 147 sys = ru->ru_stime.tv_sec * 1000000 + ru->ru_stime.tv_usec; 148 fprintf(out, "%13jd%c%02ld real\t\t\t#\t%2.02f%% cpu\n", 149 (intmax_t)after->tv_sec, decimal_point, 150 after->tv_nsec / 10000000, 100 * (double)(sys + user + 1) / (double)(real + 1)); 151 fprintf(out, "%13jd%c%02ld user\t\t\t#\t%2.2f%% cpu\n", 152 (intmax_t)ru->ru_utime.tv_sec, decimal_point, 153 ru->ru_utime.tv_usec / 10000, 100 * (double)(user + 1) / (double)(real + 1)); 154 fprintf(out, "%13jd%c%02ld sys\t\t\t#\t%2.02f%% cpu\n", 155 (intmax_t)ru->ru_stime.tv_sec, decimal_point, 156 ru->ru_stime.tv_usec / 10000, 100 * (double)(sys + 1) / (double)(real + 1)); 157 } 158 159 static const char *stat_mode_cntrs[STAT_MODE_NPMCS]; 160 static const char *stat_mode_names[STAT_MODE_NPMCS]; 161 162 static void 163 pmc_stat_setup_stat(int system_mode, const char *arg) 164 { 165 const char *new_cntrs[STAT_MODE_NPMCS]; 166 struct pmcstat_ev *ev; 167 char *counters, *counter; 168 int i, c, start, newcnt; 169 cpuset_t cpumask, rootmask; 170 171 if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, 172 sizeof(rootmask), &rootmask) == -1) 173 err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs"); 174 CPU_COPY(&rootmask, &cpumask); 175 176 if (system_mode && geteuid() != 0) 177 errx(EX_USAGE, "ERROR: system mode counters can only be used as root"); 178 counters = NULL; 179 for (i = 0; i < STAT_MODE_NPMCS; i++) { 180 stat_mode_cntrs[i] = pmc_stat_mode_aliases[i]; 181 stat_mode_names[i] = pmc_stat_mode_names[i]; 182 } 183 if (arg) { 184 counters = strdup(arg); 185 newcnt = 0; 186 while ((counter = strsep(&counters, ",")) != NULL && 187 newcnt < STAT_MODE_NPMCS - IAP_START) { 188 new_cntrs[newcnt++] = counter; 189 if (pmc_pmu_sample_rate_get(counter) == DEFAULT_SAMPLE_COUNT) 190 errx(EX_USAGE, "ERROR: %s not recognized on host", counter); 191 } 192 start = IAP_START + STAT_MODE_NPMCS - FIXED_MODE_NPMCS - newcnt; 193 for (i = 0; i < newcnt; i++) { 194 stat_mode_cntrs[start + i] = new_cntrs[i]; 195 stat_mode_names[start + i] = new_cntrs[i]; 196 } 197 } 198 if (system_mode) 199 pmc_args.pa_flags |= FLAG_HAS_SYSTEM_PMCS; 200 else 201 pmc_args.pa_flags |= FLAG_HAS_PROCESS_PMCS; 202 pmc_args.pa_flags |= FLAG_HAS_COUNTING_PMCS; 203 pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET; 204 pmc_args.pa_flags |= FLAG_HAS_PIPE; 205 pmc_args.pa_required |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET | FLAG_HAS_OUTPUT_LOGFILE; 206 pmc_args.pa_outputpath = strdup("/dev/null"); 207 pmc_args.pa_logfd = pmcstat_open_log(pmc_args.pa_outputpath, 208 PMCSTAT_OPEN_FOR_WRITE); 209 for (i = 0; i < STAT_MODE_NPMCS; i++) { 210 if ((ev = malloc(sizeof(*ev))) == NULL) 211 errx(EX_SOFTWARE, "ERROR: Out of memory."); 212 if (system_mode) 213 ev->ev_mode = PMC_MODE_SC; 214 else 215 ev->ev_mode = PMC_MODE_TC; 216 ev->ev_spec = strdup(stat_mode_cntrs[i]); 217 if (ev->ev_spec == NULL) 218 errx(EX_SOFTWARE, "ERROR: Out of memory."); 219 c = strcspn(strdup(stat_mode_cntrs[i]), ", \t"); 220 ev->ev_name = malloc(c + 1); 221 if (ev->ev_name == NULL) 222 errx(EX_SOFTWARE, "ERROR: Out of memory."); 223 (void)strncpy(ev->ev_name, stat_mode_cntrs[i], c); 224 *(ev->ev_name + c) = '\0'; 225 226 ev->ev_count = -1; 227 ev->ev_flags = 0; 228 ev->ev_flags |= PMC_F_DESCENDANTS; 229 ev->ev_cumulative = 1; 230 231 ev->ev_saved = 0LL; 232 ev->ev_pmcid = PMC_ID_INVALID; 233 STAILQ_INSERT_TAIL(&pmc_args.pa_events, ev, ev_next); 234 if (system_mode) { 235 ev->ev_cpu = CPU_FFS(&cpumask) - 1; 236 CPU_CLR(ev->ev_cpu, &cpumask); 237 pmcstat_clone_event_descriptor(ev, &cpumask, &pmc_args); 238 CPU_SET(ev->ev_cpu, &cpumask); 239 } else 240 ev->ev_cpu = PMC_CPU_ANY; 241 242 } 243 if (clock_gettime(CLOCK_MONOTONIC, &before_ts)) 244 err(1, "clock_gettime"); 245 } 246 247 static void 248 pmc_stat_print_stat(struct rusage *ru) 249 { 250 struct pmcstat_ev *ev; 251 struct timespec after; 252 uint64_t cvals[STAT_MODE_NPMCS]; 253 uint64_t ticks, value; 254 int hz, i; 255 256 if (ru) { 257 hz = getstathz(); 258 ticks = hz * (ru->ru_utime.tv_sec + ru->ru_stime.tv_sec) + 259 hz * (ru->ru_utime.tv_usec + ru->ru_stime.tv_usec) / 1000000; 260 if (clock_gettime(CLOCK_MONOTONIC, &after)) 261 err(1, "clock_gettime"); 262 /* 263 * If our round-off on the tick calculation still puts us at 0, 264 * then always assume at least one tick. 265 */ 266 if (ticks == 0) 267 ticks = 1; 268 fprintf(pmc_args.pa_printfile, "%16ld %s\t\t#\t%02.03f M/sec\n", 269 ru->ru_minflt, "page faults", ((double)ru->ru_minflt / (double)ticks) / hz); 270 fprintf(pmc_args.pa_printfile, "%16ld %s\t\t#\t%02.03f M/sec\n", 271 ru->ru_nvcsw, "voluntary csw", ((double)ru->ru_nvcsw / (double)ticks) / hz); 272 fprintf(pmc_args.pa_printfile, "%16ld %s\t#\t%02.03f M/sec\n", 273 ru->ru_nivcsw, "involuntary csw", ((double)ru->ru_nivcsw / (double)ticks) / hz); 274 } 275 276 bzero(&cvals, sizeof(cvals)); 277 STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) { 278 if (pmc_read(ev->ev_pmcid, &value) < 0) 279 err(EX_OSERR, "ERROR: Cannot read pmc \"%s\"", 280 ev->ev_name); 281 for (i = 0; i < STAT_MODE_NPMCS; i++) 282 if (strcmp(ev->ev_name, stat_mode_cntrs[i]) == 0) 283 cvals[i] += value; 284 } 285 286 fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[CYCLES], stat_mode_names[CYCLES]); 287 fprintf(pmc_args.pa_printfile, "%16jd %s\t\t#\t%01.03f inst/cycle\n", (uintmax_t)cvals[INST], stat_mode_names[INST], 288 (double)cvals[INST] / cvals[CYCLES]); 289 fprintf(pmc_args.pa_printfile, "%16jd %s\n", (uintmax_t)cvals[BR], stat_mode_names[BR]); 290 if (stat_mode_names[BR_MISS] == pmc_stat_mode_names[BR_MISS]) 291 fprintf(pmc_args.pa_printfile, "%16jd %s\t\t#\t%.03f%%\n", 292 (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS], 293 100 * ((double)cvals[BR_MISS] / cvals[BR])); 294 else 295 fprintf(pmc_args.pa_printfile, "%16jd %s\n", 296 (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS]); 297 fprintf(pmc_args.pa_printfile, "%16jd %s%s", (uintmax_t)cvals[CACHE], stat_mode_names[CACHE], 298 stat_mode_names[CACHE] != pmc_stat_mode_names[CACHE] ? "\n" : ""); 299 if (stat_mode_names[CACHE] == pmc_stat_mode_names[CACHE]) 300 fprintf(pmc_args.pa_printfile, "\t#\t%.03f refs/inst\n", 301 ((double)cvals[CACHE] / cvals[INST])); 302 fprintf(pmc_args.pa_printfile, "%16jd %s%s", (uintmax_t)cvals[CACHE_MISS], stat_mode_names[CACHE_MISS], 303 stat_mode_names[CACHE_MISS] != pmc_stat_mode_names[CACHE_MISS] ? "\n" : ""); 304 if (stat_mode_names[CACHE_MISS] == pmc_stat_mode_names[CACHE_MISS]) 305 fprintf(pmc_args.pa_printfile, "\t\t#\t%.03f%%\n", 306 100 * ((double)cvals[CACHE_MISS] / cvals[CACHE])); 307 308 if (ru) 309 showtime(pmc_args.pa_printfile, &before_ts, &after, ru); 310 } 311 312 static struct option longopts[] = { 313 {"events", required_argument, NULL, 'j'}, 314 {NULL, 0, NULL, 0} 315 }; 316 317 static int 318 pmc_stat_internal(int argc, char **argv, int system_mode) 319 { 320 char *event, *r; 321 struct sigaction sa; 322 struct kevent kev; 323 struct rusage ru; 324 struct winsize ws; 325 struct pmcstat_ev *ev; 326 int c, option, runstate; 327 int waitstatus, ru_valid, do_debug; 328 329 do_debug = ru_valid = 0; 330 r = event = NULL; 331 while ((option = getopt_long(argc, argv, "dj:", longopts, NULL)) != -1) { 332 switch (option) { 333 case 'j': 334 r = event = strdup(optarg); 335 break; 336 case 'd': 337 do_debug = 1; 338 break; 339 case '?': 340 default: 341 usage(); 342 } 343 } 344 pmc_args.pa_argc = (argc -= optind); 345 pmc_args.pa_argv = (argv += optind); 346 if (argc == 0) 347 usage(); 348 pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE; 349 pmc_stat_setup_stat(system_mode, event); 350 free(r); 351 bzero(&ru, sizeof(ru)); 352 EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); 353 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) 354 err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT"); 355 356 EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); 357 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) 358 err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO"); 359 EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, 1000, NULL); 360 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) 361 err(EX_OSERR, 362 "ERROR: Cannot register kevent for timer"); 363 364 STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) { 365 if (pmc_allocate(ev->ev_spec, ev->ev_mode, 366 ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid, ev->ev_count) < 0) 367 err(EX_OSERR, 368 "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"", 369 PMC_IS_SYSTEM_MODE(ev->ev_mode) ? 370 "system" : "process", ev->ev_spec); 371 372 if (PMC_IS_SAMPLING_MODE(ev->ev_mode) && 373 pmc_set(ev->ev_pmcid, ev->ev_count) < 0) 374 err(EX_OSERR, 375 "ERROR: Cannot set sampling count for PMC \"%s\"", 376 ev->ev_name); 377 } 378 379 /* 380 * An exec() failure of a forked child is signalled by the 381 * child sending the parent a SIGCHLD. We don't register an 382 * actual signal handler for SIGCHLD, but instead use our 383 * kqueue to pick up the signal. 384 */ 385 EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); 386 if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0) 387 err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD"); 388 389 pmcstat_create_process(pmcstat_sockpair, &pmc_args, pmc_kq); 390 391 if (SLIST_EMPTY(&pmc_args.pa_targets)) 392 errx(EX_DATAERR, 393 "ERROR: No matching target processes."); 394 if (pmc_args.pa_flags & FLAG_HAS_PROCESS_PMCS) 395 pmcstat_attach_pmcs(&pmc_args); 396 397 /* start the pmcs */ 398 pmc_util_start_pmcs(&pmc_args); 399 400 /* start the (commandline) process if needed */ 401 pmcstat_start_process(pmcstat_sockpair); 402 403 /* Handle SIGINT using the kqueue loop */ 404 sa.sa_handler = SIG_IGN; 405 sa.sa_flags = 0; 406 (void)sigemptyset(&sa.sa_mask); 407 408 if (sigaction(SIGINT, &sa, NULL) < 0) 409 err(EX_OSERR, "ERROR: Cannot install signal handler"); 410 411 /* 412 * loop till either the target process (if any) exits, or we 413 * are killed by a SIGINT or we reached the time duration. 414 */ 415 runstate = PMCSTAT_RUNNING; 416 do { 417 if ((c = kevent(pmc_kq, NULL, 0, &kev, 1, NULL)) <= 0) { 418 if (errno != EINTR) 419 err(EX_OSERR, "ERROR: kevent failed"); 420 else 421 continue; 422 } 423 if (kev.flags & EV_ERROR) 424 errc(EX_OSERR, kev.data, "ERROR: kevent failed"); 425 426 switch (kev.filter) { 427 case EVFILT_PROC: /* target has exited */ 428 if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) { 429 getrusage(RUSAGE_CHILDREN, &ru); 430 ru_valid = 1; 431 } 432 break; 433 434 case EVFILT_READ: /* log file data is present */ 435 break; 436 case EVFILT_TIMER: 437 if (do_debug) 438 pmc_stat_print_stat(NULL); 439 break; 440 case EVFILT_SIGNAL: 441 if (kev.ident == SIGCHLD) { 442 /* 443 * The child process sends us a 444 * SIGCHLD if its exec() failed. We 445 * wait for it to exit and then exit 446 * ourselves. 447 */ 448 (void)wait(&c); 449 runstate = PMCSTAT_FINISHED; 450 } else if (kev.ident == SIGIO) { 451 /* 452 * We get a SIGIO if a PMC loses all 453 * of its targets, or if logfile 454 * writes encounter an error. 455 */ 456 if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) { 457 getrusage(RUSAGE_CHILDREN, &ru); 458 ru_valid = 1; 459 } 460 runstate = pmcstat_close_log(&pmc_args); 461 } else if (kev.ident == SIGINT) { 462 /* Kill the child process if we started it */ 463 if (pmc_args.pa_flags & FLAG_HAS_COMMANDLINE) 464 pmc_util_kill_process(&pmc_args); 465 runstate = pmcstat_close_log(&pmc_args); 466 } else if (kev.ident == SIGWINCH) { 467 if (ioctl(fileno(pmc_args.pa_printfile), 468 TIOCGWINSZ, &ws) < 0) 469 err(EX_OSERR, 470 "ERROR: Cannot determine window size"); 471 pmc_displayheight = ws.ws_row - 1; 472 pmc_displaywidth = ws.ws_col - 1; 473 } else 474 assert(0); 475 476 break; 477 } 478 } while (runstate != PMCSTAT_FINISHED); 479 if (!ru_valid) 480 warnx("couldn't get rusage"); 481 pmc_stat_print_stat(&ru); 482 pmc_util_cleanup(&pmc_args); 483 return (0); 484 } 485 486 int 487 cmd_pmc_stat(int argc, char **argv) 488 { 489 return (pmc_stat_internal(argc, argv, 0)); 490 } 491 492 int 493 cmd_pmc_stat_system(int argc, char **argv) 494 { 495 return (pmc_stat_internal(argc, argv, 1)); 496 } 497