/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cpucmds.h" static struct options { int debug; int verbose; int dotitle; int dohelp; int dotick; int cpuver; char *pgmname; uint_t mseconds; uint_t nsamples; uint_t nsets; cpc_setgrp_t *master; int followfork; int followexec; pid_t pid; FILE *log; } __options; static const struct options *opts = (const struct options *)&__options; static cpc_t *cpc; /* * How many signals caught from terminal * We bail out as soon as possible when interrupt is set */ static int interrupt = 0; /*ARGSUSED*/ static void cputrack_errfn(const char *fn, int subcode, const char *fmt, va_list ap) { (void) fprintf(stderr, "%s: ", opts->pgmname); if (opts->debug) (void) fprintf(stderr, "%s: ", fn); (void) vfprintf(stderr, fmt, ap); } static void cputrack_pctx_errfn(const char *fn, const char *fmt, va_list ap) { cputrack_errfn(fn, -1, fmt, ap); } static int cputrack(int argc, char *argv[], int optind); static void intr(int); #if defined(__i386) static void p4_ht_error(void); #endif #if !defined(TEXT_DOMAIN) #define TEXT_DOMAIN "SYS_TEST" #endif int main(int argc, char *argv[]) { struct options *opts = &__options; int c, errcnt = 0; int nsamples; cpc_setgrp_t *sgrp; char *errstr; int ret; (void) setlocale(LC_ALL, ""); (void) textdomain(TEXT_DOMAIN); if ((opts->pgmname = strrchr(argv[0], '/')) == NULL) opts->pgmname = argv[0]; else opts->pgmname++; if ((cpc = cpc_open(CPC_VER_CURRENT)) == NULL) { errstr = strerror(errno); (void) fprintf(stderr, gettext("%s: cannot access performance " "counter library - %s\n"), opts->pgmname, errstr); return (1); } (void) cpc_seterrhndlr(cpc, cputrack_errfn); strtoset_errfn = cputrack_errfn; /* * Establish (non-zero) defaults */ opts->mseconds = 1000; opts->dotitle = 1; opts->log = stdout; if ((opts->master = cpc_setgrp_new(cpc, 0)) == NULL) { (void) fprintf(stderr, gettext("%s: no memory available\n"), opts->pgmname); exit(1); } while ((c = getopt(argc, argv, "T:N:Defhntvo:r:c:p:")) != EOF) switch (c) { case 'T': /* sample time, seconds */ opts->mseconds = (uint_t)(atof(optarg) * 1000.0); break; case 'N': /* number of samples */ nsamples = atoi(optarg); if (nsamples < 0) errcnt++; else opts->nsamples = (uint_t)nsamples; break; case 'D': /* enable debugging */ opts->debug++; break; case 'f': /* follow fork */ opts->followfork++; break; case 'e': /* follow exec */ opts->followexec++; break; case 'n': /* no titles */ opts->dotitle = 0; break; case 't': /* print %tick */ opts->dotick = 1; break; case 'v': opts->verbose = 1; /* more chatty */ break; case 'o': if (optarg == NULL) { errcnt++; break; } if ((opts->log = fopen(optarg, "w")) == NULL) { (void) fprintf(stderr, gettext( "%s: cannot open '%s' for writing\n"), opts->pgmname, optarg); return (1); } break; case 'c': /* specify statistics */ if ((sgrp = cpc_setgrp_newset(opts->master, optarg, &errcnt)) != NULL) opts->master = sgrp; break; case 'p': /* grab given pid */ if ((opts->pid = atoi(optarg)) <= 0) errcnt++; break; case 'h': opts->dohelp = 1; break; case '?': default: errcnt++; break; } if (opts->nsamples == 0) opts->nsamples = UINT_MAX; if (errcnt != 0 || opts->dohelp || (argc == optind && opts->pid == 0) || (argc > optind && opts->pid != 0) || (opts->nsets = cpc_setgrp_numsets(opts->master)) == 0) { (void) fprintf(opts->dohelp ? stdout : stderr, gettext( "Usage:\n\t%s [-T secs] [-N count] [-Defhnv] [-o file]\n" "\t\t-c events [command [args] | -p pid]\n\n" "\t-T secs\t seconds between samples, default 1\n" "\t-N count number of samples, default unlimited\n" "\t-D\t enable debug mode\n" "\t-e\t follow exec(2), and execve(2)\n" "\t-f\t follow fork(2), fork1(2), and vfork(2)\n" "\t-h\t print extended usage information\n" "\t-n\t suppress titles\n" "\t-t\t include virtualized %s register\n" "\t-v\t verbose mode\n" "\t-o file\t write cpu statistics to this file\n" "\t-c events specify processor events to be monitored\n" "\t-p pid\t pid of existing process to capture\n\n" "\tUse cpustat(1M) to monitor system-wide statistics.\n"), opts->pgmname, CPC_TICKREG_NAME); if (opts->dohelp) { (void) putchar('\n'); (void) capabilities(cpc, stdout); exit(0); } exit(2); } /* * Catch signals from terminal, so they can be handled asynchronously * when we're ready instead of when we're not (;-) */ if (sigset(SIGHUP, SIG_IGN) == SIG_DFL) (void) sigset(SIGHUP, intr); if (sigset(SIGINT, SIG_IGN) == SIG_DFL) (void) sigset(SIGINT, intr); if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL) (void) sigset(SIGQUIT, intr); (void) sigset(SIGPIPE, intr); (void) sigset(SIGTERM, intr); cpc_setgrp_reset(opts->master); (void) setvbuf(opts->log, NULL, _IOLBF, 0); ret = cputrack(argc, argv, optind); (void) cpc_close(cpc); return (ret); } static void print_title(cpc_setgrp_t *sgrp) { (void) fprintf(opts->log, "%7s ", "time"); if (opts->followfork) (void) fprintf(opts->log, "%6s ", "pid"); (void) fprintf(opts->log, "%3s %10s ", "lwp", "event"); if (opts->dotick) (void) fprintf(opts->log, "%9s ", CPC_TICKREG_NAME); (void) fprintf(opts->log, "%s\n", cpc_setgrp_gethdr(sgrp)); (void) fflush(opts->log); } static void print_exec(float now, pid_t pid, char *name) { if (name == NULL) name = "(unknown)"; (void) fprintf(opts->log, "%7.3f ", now); if (opts->followfork) (void) fprintf(opts->log, "%6d ", (int)pid); (void) fprintf(opts->log, "%3d %10s ", 1, "exec"); if (opts->dotick) (void) fprintf(opts->log, "%9s ", ""); (void) fprintf(opts->log, "%9s %9s # '%s'\n", "", "", name); (void) fflush(opts->log); } static void print_fork(float now, pid_t newpid, id_t lwpid, pid_t oldpid) { (void) fprintf(opts->log, "%7.3f ", now); if (opts->followfork) (void) fprintf(opts->log, "%6d ", (int)oldpid); (void) fprintf(opts->log, "%3d %10s ", (int)lwpid, "fork"); if (opts->dotick) (void) fprintf(opts->log, "%9s ", ""); (void) fprintf(opts->log, "%9s %9s # %d\n", "", "", (int)newpid); (void) fflush(opts->log); } static void print_sample(pid_t pid, id_t lwpid, char *pevent, cpc_buf_t *buf, int nreq, const char *evname) { uint64_t val; int i; (void) fprintf(opts->log, "%7.3f ", mstimestamp(cpc_buf_hrtime(cpc, buf))); if (opts->followfork) (void) fprintf(opts->log, "%6d ", (int)pid); (void) fprintf(opts->log, "%3d %10s ", (int)lwpid, pevent); if (opts->dotick) (void) fprintf(opts->log, "%9" PRId64 " ", cpc_buf_tick(cpc, buf)); for (i = 0; i < nreq; i++) { (void) cpc_buf_get(cpc, buf, i, &val); (void) fprintf(opts->log, "%9" PRId64 " ", val); } if (opts->nsets > 1) (void) fprintf(opts->log, " # %s\n", evname); else (void) fputc('\n', opts->log); } struct pstate { cpc_setgrp_t *accum; cpc_setgrp_t **sgrps; int maxlwpid; }; static int pinit_lwp(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg) { struct pstate *state = arg; cpc_setgrp_t *sgrp; cpc_set_t *set; cpc_buf_t **data1, **data2, **scratch; char *errstr; int nreq; if (interrupt) return (0); if (state->maxlwpid < lwpid) { state->sgrps = realloc(state->sgrps, lwpid * sizeof (state->sgrps)); if (state->sgrps == NULL) { (void) fprintf(stderr, gettext( "%6d: init_lwp: out of memory\n"), (int)pid); return (-1); } while (state->maxlwpid < lwpid) { state->sgrps[state->maxlwpid] = NULL; state->maxlwpid++; } } if ((sgrp = state->sgrps[lwpid-1]) == NULL) { if ((sgrp = cpc_setgrp_clone(opts->master)) == NULL) { (void) fprintf(stderr, gettext( "%6d: init_lwp: out of memory\n"), (int)pid); return (-1); } state->sgrps[lwpid-1] = sgrp; set = cpc_setgrp_getset(sgrp); } else { cpc_setgrp_reset(sgrp); set = cpc_setgrp_getset(sgrp); } nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch); if (cpc_bind_pctx(cpc, pctx, lwpid, set, 0) != 0 || cpc_set_sample(cpc, set, *data2) != 0) { errstr = strerror(errno); if (errno == EAGAIN) (void) cpc_unbind(cpc, set); #if defined(__i386) if (errno == EACCES) p4_ht_error(); else #endif (void) fprintf(stderr, gettext( "%6d: init_lwp: can't bind perf counters " "to lwp%d - %s\n"), (int)pid, (int)lwpid, errstr); return (-1); } if (opts->verbose) print_sample(pid, lwpid, "init_lwp", *data2, nreq, cpc_setgrp_getname(sgrp)); return (0); } /*ARGSUSED*/ static int pfini_lwp(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg) { struct pstate *state = arg; cpc_setgrp_t *sgrp = state->sgrps[lwpid-1]; cpc_set_t *set; char *errstr; cpc_buf_t **data1, **data2, **scratch; int nreq; if (interrupt) return (0); set = cpc_setgrp_getset(sgrp); nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch); if (cpc_set_sample(cpc, set, *scratch) == 0) { if (opts->nsets == 1) { /* * When we only have one set of counts, the sample * gives us the accumulated count. */ *data1 = *scratch; } else { /* * When we have more than one set of counts, the * sample gives us the count for the latest sample * period. *data1 contains the accumulated count but * does not include the count for the latest sample * period for this set of counters. */ cpc_buf_add(cpc, *data1, *data1, *scratch); } if (opts->verbose) print_sample(pid, lwpid, "fini_lwp", *data1, nreq, cpc_setgrp_getname(sgrp)); cpc_setgrp_accum(state->accum, sgrp); if (cpc_unbind(cpc, set) == 0) return (0); } switch (errno) { case EAGAIN: (void) fprintf(stderr, gettext("%6d: fini_lwp: " "lwp%d: perf counter contents invalidated\n"), (int)pid, (int)lwpid); break; default: errstr = strerror(errno); (void) fprintf(stderr, gettext("%6d: fini_lwp: " "lwp%d: can't access perf counters - %s\n"), (int)pid, (int)lwpid, errstr); break; } return (-1); } /*ARGSUSED*/ static int plwp_create(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg) { cpc_setgrp_t *sgrp = opts->master; cpc_buf_t **data1, **data2, **scratch; int nreq; if (interrupt) return (0); nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch); print_sample(pid, lwpid, "lwp_create", *data1, nreq, cpc_setgrp_getname(sgrp)); return (0); } /*ARGSUSED*/ static int plwp_exit(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg) { struct pstate *state = arg; cpc_setgrp_t *sgrp = state->sgrps[lwpid-1]; cpc_set_t *start; int nreq; cpc_buf_t **data1, **data2, **scratch; if (interrupt) return (0); start = cpc_setgrp_getset(sgrp); do { nreq = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch); if (cpc_buf_hrtime(cpc, *data1) == 0) continue; print_sample(pid, lwpid, "lwp_exit", *data1, nreq, cpc_setgrp_getname(sgrp)); } while (cpc_setgrp_nextset(sgrp) != start); return (0); } /*ARGSUSED*/ static int pexec(pctx_t *pctx, pid_t pid, id_t lwpid, char *name, void *arg) { struct pstate *state = arg; float now = 0.0; cpc_set_t *start; int nreq; cpc_buf_t **data1, **data2, **scratch; hrtime_t hrt; if (interrupt) return (0); /* * Print the accumulated results from the previous program image */ cpc_setgrp_reset(state->accum); start = cpc_setgrp_getset(state->accum); do { nreq = cpc_setgrp_getbufs(state->accum, &data1, &data2, &scratch); hrt = cpc_buf_hrtime(cpc, *data1); if (hrt == 0) continue; print_sample(pid, lwpid, "exec", *data1, nreq, cpc_setgrp_getname(state->accum)); if (now < mstimestamp(hrt)) now = mstimestamp(hrt); } while (cpc_setgrp_nextset(state->accum) != start); print_exec(now, pid, name); if (state->accum != NULL) { cpc_setgrp_free(state->accum); state->accum = NULL; } if (opts->followexec) { state->accum = cpc_setgrp_clone(opts->master); return (0); } return (-1); } /*ARGSUSED*/ static void pexit(pctx_t *pctx, pid_t pid, id_t lwpid, int status, void *arg) { struct pstate *state = arg; cpc_set_t *start; int nreq; cpc_buf_t **data1, **data2, **scratch; if (interrupt) return; cpc_setgrp_reset(state->accum); start = cpc_setgrp_getset(state->accum); do { nreq = cpc_setgrp_getbufs(state->accum, &data1, &data2, &scratch); if (cpc_buf_hrtime(cpc, *data1) == 0) continue; print_sample(pid, lwpid, "exit", *data1, nreq, cpc_setgrp_getname(state->accum)); } while (cpc_setgrp_nextset(state->accum) != start); cpc_setgrp_free(state->accum); state->accum = NULL; for (lwpid = 1; lwpid < state->maxlwpid; lwpid++) if (state->sgrps[lwpid-1] != NULL) { cpc_setgrp_free(state->sgrps[lwpid-1]); state->sgrps[lwpid-1] = NULL; } free(state->sgrps); state->sgrps = NULL; } static int ptick(pctx_t *pctx, pid_t pid, id_t lwpid, void *arg) { struct pstate *state = arg; cpc_setgrp_t *sgrp = state->sgrps[lwpid-1]; cpc_set_t *this = cpc_setgrp_getset(sgrp); const char *name = cpc_setgrp_getname(sgrp); cpc_buf_t **data1, **data2, **scratch, *tmp; char *errstr; int nreqs; if (interrupt) return (0); nreqs = cpc_setgrp_getbufs(sgrp, &data1, &data2, &scratch); if (opts->nsets == 1) { /* * If we're dealing with one set, buffer usage is: * * data1 = most recent data snapshot * data2 = previous data snapshot * scratch = used for diffing data1 and data2 * * Save the snapshot from the previous sample in data2 * before putting the current sample in data1. */ tmp = *data1; *data1 = *data2; *data2 = tmp; if (cpc_set_sample(cpc, this, *data1) != 0) goto broken; cpc_buf_sub(cpc, *scratch, *data1, *data2); } else { cpc_set_t *next = cpc_setgrp_nextset(sgrp); /* * If there is more than set in use, we will need to * unbind and re-bind on each go-around because each * time a counter is bound, it is preset to 0 (as it was * specified when the requests were added to the set). * * Buffer usage in this case is: * * data1 = total counts for this set since program began * data2 = unused * scratch = most recent data snapshot */ if (cpc_set_sample(cpc, this, *scratch) != 0) goto broken; cpc_buf_add(cpc, *data1, *data1, *scratch); /* * No need to unbind the previous set, as binding another set * automatically unbinds the most recently bound set. */ if (cpc_bind_pctx(cpc, pctx, lwpid, next, 0) != 0) goto broken; } print_sample(pid, lwpid, "tick", *scratch, nreqs, name); return (0); broken: switch (errno) { case EAGAIN: (void) fprintf(stderr, gettext( "%6d: tick: lwp%d: perf counter contents invalidated\n"), (int)pid, (int)lwpid); break; default: errstr = strerror(errno); (void) fprintf(stderr, gettext( "%6d: tick: lwp%d: can't access perf counter - %s\n"), (int)pid, (int)lwpid, errstr); break; } (void) cpc_unbind(cpc, this); return (-1); } /* * The system has just created a new address space that has a new pid. * We're running in a child of the controlling process, with a new * pctx handle already opened on the child of the original controlled process. */ static void pfork(pctx_t *pctx, pid_t oldpid, pid_t pid, id_t lwpid, void *arg) { struct pstate *state = arg; print_fork(mstimestamp(0), pid, lwpid, oldpid); if (!opts->followfork) return; if (pctx_set_events(pctx, PCTX_SYSC_EXEC_EVENT, pexec, PCTX_SYSC_FORK_EVENT, pfork, PCTX_SYSC_EXIT_EVENT, pexit, PCTX_SYSC_LWP_CREATE_EVENT, plwp_create, PCTX_INIT_LWP_EVENT, pinit_lwp, PCTX_FINI_LWP_EVENT, pfini_lwp, PCTX_SYSC_LWP_EXIT_EVENT, plwp_exit, PCTX_NULL_EVENT) == 0) { state->accum = cpc_setgrp_clone(opts->master); (void) pctx_run(pctx, opts->mseconds, opts->nsamples, ptick); if (state->accum) { free(state->accum); state->accum = NULL; } } } /* * Translate the incoming options into actions, and get the * tool and the process to control running. */ static int cputrack(int argc, char *argv[], int optind) { struct pstate __state, *state = &__state; pctx_t *pctx; int err; bzero(state, sizeof (*state)); if (opts->pid == 0) { if (argc <= optind) { (void) fprintf(stderr, "%s: %s\n", opts->pgmname, gettext("no program to start")); return (1); } pctx = pctx_create(argv[optind], &argv[optind], state, 1, cputrack_pctx_errfn); if (pctx == NULL) { (void) fprintf(stderr, "%s: %s '%s'\n", opts->pgmname, gettext("failed to start program"), argv[optind]); return (1); } } else { pctx = pctx_capture(opts->pid, state, 1, cputrack_pctx_errfn); if (pctx == NULL) { (void) fprintf(stderr, "%s: %s %d\n", opts->pgmname, gettext("failed to capture pid"), (int)opts->pid); return (1); } } err = pctx_set_events(pctx, PCTX_SYSC_EXEC_EVENT, pexec, PCTX_SYSC_FORK_EVENT, pfork, PCTX_SYSC_EXIT_EVENT, pexit, PCTX_SYSC_LWP_CREATE_EVENT, plwp_create, PCTX_INIT_LWP_EVENT, pinit_lwp, PCTX_FINI_LWP_EVENT, pfini_lwp, PCTX_SYSC_LWP_EXIT_EVENT, plwp_exit, PCTX_NULL_EVENT); if (err != 0) { (void) fprintf(stderr, "%s: %s\n", opts->pgmname, gettext("can't bind process context ops to process")); } else { if (opts->dotitle) print_title(opts->master); state->accum = cpc_setgrp_clone(opts->master); zerotime(); err = pctx_run(pctx, opts->mseconds, opts->nsamples, ptick); if (state->accum) { cpc_setgrp_free(state->accum); state->accum = NULL; } } return (err != 0 ? 1 : 0); } #if defined(__i386) #define OFFLINE_CMD "/usr/sbin/psradm -f " #define BUFSIZE 5 /* enough for "n " where n is a cpuid */ /* * cpc_bind_pctx() failed with EACCES, which means the user must first offline * all but one logical processor on each physical processor. Print to stderr the * psradm command string to do this. */ static void p4_ht_error(void) { kstat_ctl_t *kc; kstat_t *ksp; kstat_named_t *k; int i; int max; int stat; int *designees; int *must_offline; char buf[BUFSIZE]; char *cmd; int noffline = 0; int ndone = 0; (void) fprintf(stderr, "%s\n", gettext("Pentium 4 processors with HyperThreading present.\nOffline" " all but one logical processor on each physical processor in" " order to use\ncputrack.\n")); if ((kc = kstat_open()) == NULL) return; max = sysconf(_SC_CPUID_MAX); if ((designees = malloc(max * sizeof (*designees))) == NULL) { (void) fprintf(stderr, gettext("%s: no memory available\n"), opts->pgmname); exit(0); } if ((must_offline = malloc(max * sizeof (*designees))) == NULL) { (void) fprintf(stderr, gettext("%s: no memory available\n"), opts->pgmname); exit(0); } for (i = 0; i < max; i++) { designees[i] = -1; must_offline[i] = 0; } for (i = 0; i < max; i++) { stat = p_online(i, P_STATUS); if (stat != P_ONLINE && stat != P_NOINTR) continue; if ((ksp = kstat_lookup(kc, "cpu_info", i, NULL)) == NULL) { free(designees); free(must_offline); return; } if (kstat_read(kc, ksp, NULL) == -1) { free(designees); free(must_offline); return; } if ((k = (kstat_named_t *)kstat_data_lookup(ksp, "chip_id")) == NULL) { free(designees); free(must_offline); return; } if (designees[k->value.i32] == -1) /* * This chip doesn't yet have a CPU designated to remain * online; let this one be it. */ designees[k->value.i32] = i; else { /* * This chip already has a designated CPU; this CPU must * go offline. */ must_offline[i] = 1; noffline++; } } /* * Now construct a string containing the command line used to offline * the appropriate processors. */ if ((cmd = malloc(strlen(OFFLINE_CMD) + (noffline * BUFSIZE) + 1)) == NULL) { (void) fprintf(stderr, gettext("%s: no memory available\n"), opts->pgmname); exit(0); } (void) strcpy(cmd, OFFLINE_CMD); for (i = 0; i < max; i++) { if (must_offline[i] == 0) continue; ndone++; (void) snprintf(buf, BUFSIZE, "%d", i); if (ndone < noffline) (void) strcat(buf, " "); (void) strcat(cmd, buf); } (void) fprintf(stderr, "%s:\n%s\n", gettext("The following command " "will configure the system appropriately"), cmd); exit(1); } #endif /* defined(__i386) */ /*ARGSUSED*/ static void intr(int sig) { interrupt++; if (cpc != NULL) cpc_terminate(cpc); }