xref: /freebsd/usr.sbin/pmcstat/pmcstat.c (revision f37852c17391fdf0e8309bcf684384dd0d854e43)
1 /*-
2  * Copyright (c) 2003-2008, Joseph Koshy
3  * Copyright (c) 2007 The FreeBSD Foundation
4  * All rights reserved.
5  *
6  * Portions of this software were developed by A. Joseph Koshy under
7  * sponsorship from the FreeBSD Foundation and Google, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/cpuset.h>
36 #include <sys/event.h>
37 #include <sys/queue.h>
38 #include <sys/socket.h>
39 #include <sys/stat.h>
40 #include <sys/sysctl.h>
41 #include <sys/time.h>
42 #include <sys/ttycom.h>
43 #include <sys/user.h>
44 #include <sys/wait.h>
45 
46 #include <assert.h>
47 #include <curses.h>
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <kvm.h>
52 #include <libgen.h>
53 #include <limits.h>
54 #include <math.h>
55 #include <pmc.h>
56 #include <pmclog.h>
57 #include <regex.h>
58 #include <signal.h>
59 #include <stdarg.h>
60 #include <stdint.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <sysexits.h>
65 #include <unistd.h>
66 
67 #include "pmcstat.h"
68 
69 /*
70  * A given invocation of pmcstat(8) can manage multiple PMCs of both
71  * the system-wide and per-process variety.  Each of these could be in
72  * 'counting mode' or in 'sampling mode'.
73  *
74  * For 'counting mode' PMCs, pmcstat(8) will periodically issue a
75  * pmc_read() at the configured time interval and print out the value
76  * of the requested PMCs.
77  *
78  * For 'sampling mode' PMCs it can log to a file for offline analysis,
79  * or can analyse sampling data "on the fly", either by converting
80  * samples to printed textual form or by creating gprof(1) compatible
81  * profiles, one per program executed.  When creating gprof(1)
82  * profiles it can optionally merge entries from multiple processes
83  * for a given executable into a single profile file.
84  *
85  * pmcstat(8) can also execute a command line and attach PMCs to the
86  * resulting child process.  The protocol used is as follows:
87  *
88  * - parent creates a socketpair for two way communication and
89  *   fork()s.
90  * - subsequently:
91  *
92  *   /Parent/				/Child/
93  *
94  *   - Wait for childs token.
95  *					- Sends token.
96  *					- Awaits signal to start.
97  *  - Attaches PMCs to the child's pid
98  *    and starts them. Sets up
99  *    monitoring for the child.
100  *  - Signals child to start.
101  *					- Receives signal, attempts exec().
102  *
103  * After this point normal processing can happen.
104  */
105 
106 /* Globals */
107 
108 int		pmcstat_displayheight = DEFAULT_DISPLAY_HEIGHT;
109 int		pmcstat_displaywidth  = DEFAULT_DISPLAY_WIDTH;
110 static int	pmcstat_sockpair[NSOCKPAIRFD];
111 static int	pmcstat_kq;
112 static kvm_t	*pmcstat_kvm;
113 static struct kinfo_proc *pmcstat_plist;
114 struct pmcstat_args args;
115 
116 static void
117 pmcstat_clone_event_descriptor(struct pmcstat_ev *ev, const cpuset_t *cpumask)
118 {
119 	int cpu;
120 	struct pmcstat_ev *ev_clone;
121 
122 	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
123 		if (!CPU_ISSET(cpu, cpumask))
124 			continue;
125 
126 		if ((ev_clone = malloc(sizeof(*ev_clone))) == NULL)
127 			errx(EX_SOFTWARE, "ERROR: Out of memory");
128 		(void) memset(ev_clone, 0, sizeof(*ev_clone));
129 
130 		ev_clone->ev_count = ev->ev_count;
131 		ev_clone->ev_cpu   = cpu;
132 		ev_clone->ev_cumulative = ev->ev_cumulative;
133 		ev_clone->ev_flags = ev->ev_flags;
134 		ev_clone->ev_mode  = ev->ev_mode;
135 		ev_clone->ev_name  = strdup(ev->ev_name);
136 		if (ev_clone->ev_name == NULL)
137 			errx(EX_SOFTWARE, "ERROR: Out of memory");
138 		ev_clone->ev_pmcid = ev->ev_pmcid;
139 		ev_clone->ev_saved = ev->ev_saved;
140 		ev_clone->ev_spec  = strdup(ev->ev_spec);
141 		if (ev_clone->ev_spec == NULL)
142 			errx(EX_SOFTWARE, "ERROR: Out of memory");
143 
144 		STAILQ_INSERT_TAIL(&args.pa_events, ev_clone, ev_next);
145 	}
146 }
147 
148 static void
149 pmcstat_get_cpumask(const char *cpuspec, cpuset_t *cpumask)
150 {
151 	int cpu;
152 	const char *s;
153 	char *end;
154 
155 	CPU_ZERO(cpumask);
156 	s = cpuspec;
157 
158 	do {
159 		cpu = strtol(s, &end, 0);
160 		if (cpu < 0 || end == s)
161 			errx(EX_USAGE,
162 			    "ERROR: Illegal CPU specification \"%s\".",
163 			    cpuspec);
164 		CPU_SET(cpu, cpumask);
165 		s = end + strspn(end, ", \t");
166 	} while (*s);
167 	assert(!CPU_EMPTY(cpumask));
168 }
169 
170 void
171 pmcstat_attach_pmcs(void)
172 {
173 	struct pmcstat_ev *ev;
174 	struct pmcstat_target *pt;
175 	int count;
176 
177 	/* Attach all process PMCs to target processes. */
178 	count = 0;
179 	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
180 		if (PMC_IS_SYSTEM_MODE(ev->ev_mode))
181 			continue;
182 		SLIST_FOREACH(pt, &args.pa_targets, pt_next)
183 			if (pmc_attach(ev->ev_pmcid, pt->pt_pid) == 0)
184 				count++;
185 			else if (errno != ESRCH)
186 				err(EX_OSERR,
187 "ERROR: cannot attach pmc \"%s\" to process %d",
188 				    ev->ev_name, (int)pt->pt_pid);
189 	}
190 
191 	if (count == 0)
192 		errx(EX_DATAERR, "ERROR: No processes were attached to.");
193 }
194 
195 
196 void
197 pmcstat_cleanup(void)
198 {
199 	struct pmcstat_ev *ev;
200 
201 	/* release allocated PMCs. */
202 	STAILQ_FOREACH(ev, &args.pa_events, ev_next)
203 		if (ev->ev_pmcid != PMC_ID_INVALID) {
204 			if (pmc_stop(ev->ev_pmcid) < 0)
205 				err(EX_OSERR,
206 				    "ERROR: cannot stop pmc 0x%x \"%s\"",
207 				    ev->ev_pmcid, ev->ev_name);
208 			if (pmc_release(ev->ev_pmcid) < 0)
209 				err(EX_OSERR,
210 				    "ERROR: cannot release pmc 0x%x \"%s\"",
211 				    ev->ev_pmcid, ev->ev_name);
212 		}
213 
214 	/* de-configure the log file if present. */
215 	if (args.pa_flags & (FLAG_HAS_PIPE | FLAG_HAS_OUTPUT_LOGFILE))
216 		(void) pmc_configure_logfile(-1);
217 
218 	if (args.pa_logparser) {
219 		pmclog_close(args.pa_logparser);
220 		args.pa_logparser = NULL;
221 	}
222 
223 	pmcstat_shutdown_logging();
224 }
225 
226 void
227 pmcstat_create_process(void)
228 {
229 	char token;
230 	pid_t pid;
231 	struct kevent kev;
232 	struct pmcstat_target *pt;
233 
234 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, pmcstat_sockpair) < 0)
235 		err(EX_OSERR, "ERROR: cannot create socket pair");
236 
237 	switch (pid = fork()) {
238 	case -1:
239 		err(EX_OSERR, "ERROR: cannot fork");
240 		/*NOTREACHED*/
241 
242 	case 0:		/* child */
243 		(void) close(pmcstat_sockpair[PARENTSOCKET]);
244 
245 		/* Write a token to tell our parent we've started executing. */
246 		if (write(pmcstat_sockpair[CHILDSOCKET], "+", 1) != 1)
247 			err(EX_OSERR, "ERROR (child): cannot write token");
248 
249 		/* Wait for our parent to signal us to start. */
250 		if (read(pmcstat_sockpair[CHILDSOCKET], &token, 1) < 0)
251 			err(EX_OSERR, "ERROR (child): cannot read token");
252 		(void) close(pmcstat_sockpair[CHILDSOCKET]);
253 
254 		/* exec() the program requested */
255 		execvp(*args.pa_argv, args.pa_argv);
256 		/* and if that fails, notify the parent */
257 		kill(getppid(), SIGCHLD);
258 		err(EX_OSERR, "ERROR: execvp \"%s\" failed", *args.pa_argv);
259 		/*NOTREACHED*/
260 
261 	default:	/* parent */
262 		(void) close(pmcstat_sockpair[CHILDSOCKET]);
263 		break;
264 	}
265 
266 	/* Ask to be notified via a kevent when the target process exits. */
267 	EV_SET(&kev, pid, EVFILT_PROC, EV_ADD|EV_ONESHOT, NOTE_EXIT, 0,
268 	    NULL);
269 	if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
270 		err(EX_OSERR, "ERROR: cannot monitor child process %d", pid);
271 
272 	if ((pt = malloc(sizeof(*pt))) == NULL)
273 		errx(EX_SOFTWARE, "ERROR: Out of memory.");
274 
275 	pt->pt_pid = pid;
276 	SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next);
277 
278 	/* Wait for the child to signal that its ready to go. */
279 	if (read(pmcstat_sockpair[PARENTSOCKET], &token, 1) < 0)
280 		err(EX_OSERR, "ERROR (parent): cannot read token");
281 
282 	return;
283 }
284 
285 void
286 pmcstat_find_targets(const char *spec)
287 {
288 	int n, nproc, pid, rv;
289 	struct pmcstat_target *pt;
290 	char errbuf[_POSIX2_LINE_MAX], *end;
291 	static struct kinfo_proc *kp;
292 	regex_t reg;
293 	regmatch_t regmatch;
294 
295 	/* First check if we've been given a process id. */
296       	pid = strtol(spec, &end, 0);
297 	if (end != spec && pid >= 0) {
298 		if ((pt = malloc(sizeof(*pt))) == NULL)
299 			goto outofmemory;
300 		pt->pt_pid = pid;
301 		SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next);
302 		return;
303 	}
304 
305 	/* Otherwise treat arg as a regular expression naming processes. */
306 	if (pmcstat_kvm == NULL) {
307 		if ((pmcstat_kvm = kvm_openfiles(NULL, "/dev/null", NULL, 0,
308 		    errbuf)) == NULL)
309 			err(EX_OSERR, "ERROR: Cannot open kernel \"%s\"",
310 			    errbuf);
311 		if ((pmcstat_plist = kvm_getprocs(pmcstat_kvm, KERN_PROC_PROC,
312 		    0, &nproc)) == NULL)
313 			err(EX_OSERR, "ERROR: Cannot get process list: %s",
314 			    kvm_geterr(pmcstat_kvm));
315 	} else
316 		nproc = 0;
317 
318 	if ((rv = regcomp(&reg, spec, REG_EXTENDED|REG_NOSUB)) != 0) {
319 		regerror(rv, &reg, errbuf, sizeof(errbuf));
320 		err(EX_DATAERR, "ERROR: Failed to compile regex \"%s\": %s",
321 		    spec, errbuf);
322 	}
323 
324 	for (n = 0, kp = pmcstat_plist; n < nproc; n++, kp++) {
325 		if ((rv = regexec(&reg, kp->ki_comm, 1, &regmatch, 0)) == 0) {
326 			if ((pt = malloc(sizeof(*pt))) == NULL)
327 				goto outofmemory;
328 			pt->pt_pid = kp->ki_pid;
329 			SLIST_INSERT_HEAD(&args.pa_targets, pt, pt_next);
330 		} else if (rv != REG_NOMATCH) {
331 			regerror(rv, &reg, errbuf, sizeof(errbuf));
332 			errx(EX_SOFTWARE, "ERROR: Regex evalation failed: %s",
333 			    errbuf);
334 		}
335 	}
336 
337 	regfree(&reg);
338 
339 	return;
340 
341  outofmemory:
342 	errx(EX_SOFTWARE, "Out of memory.");
343 	/*NOTREACHED*/
344 }
345 
346 void
347 pmcstat_kill_process(void)
348 {
349 	struct pmcstat_target *pt;
350 
351 	assert(args.pa_flags & FLAG_HAS_COMMANDLINE);
352 
353 	/*
354 	 * If a command line was specified, it would be the very first
355 	 * in the list, before any other processes specified by -t.
356 	 */
357 	pt = SLIST_FIRST(&args.pa_targets);
358 	assert(pt != NULL);
359 
360 	if (kill(pt->pt_pid, SIGINT) != 0)
361 		err(EX_OSERR, "ERROR: cannot signal child process");
362 }
363 
364 void
365 pmcstat_start_pmcs(void)
366 {
367 	struct pmcstat_ev *ev;
368 
369 	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
370 
371 	    assert(ev->ev_pmcid != PMC_ID_INVALID);
372 
373 	    if (pmc_start(ev->ev_pmcid) < 0) {
374 	        warn("ERROR: Cannot start pmc 0x%x \"%s\"",
375 		    ev->ev_pmcid, ev->ev_name);
376 		pmcstat_cleanup();
377 		exit(EX_OSERR);
378 	    }
379 	}
380 
381 }
382 
383 void
384 pmcstat_print_headers(void)
385 {
386 	struct pmcstat_ev *ev;
387 	int c, w;
388 
389 	(void) fprintf(args.pa_printfile, PRINT_HEADER_PREFIX);
390 
391 	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
392 		if (PMC_IS_SAMPLING_MODE(ev->ev_mode))
393 			continue;
394 
395 		c = PMC_IS_SYSTEM_MODE(ev->ev_mode) ? 's' : 'p';
396 
397 		if (ev->ev_fieldskip != 0)
398 			(void) fprintf(args.pa_printfile, "%*s",
399 			    ev->ev_fieldskip, "");
400 		w = ev->ev_fieldwidth - ev->ev_fieldskip - 2;
401 
402 		if (c == 's')
403 			(void) fprintf(args.pa_printfile, "s/%02d/%-*s ",
404 			    ev->ev_cpu, w-3, ev->ev_name);
405 		else
406 			(void) fprintf(args.pa_printfile, "p/%*s ", w,
407 			    ev->ev_name);
408 	}
409 
410 	(void) fflush(args.pa_printfile);
411 }
412 
413 void
414 pmcstat_print_counters(void)
415 {
416 	int extra_width;
417 	struct pmcstat_ev *ev;
418 	pmc_value_t value;
419 
420 	extra_width = sizeof(PRINT_HEADER_PREFIX) - 1;
421 
422 	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
423 
424 		/* skip sampling mode counters */
425 		if (PMC_IS_SAMPLING_MODE(ev->ev_mode))
426 			continue;
427 
428 		if (pmc_read(ev->ev_pmcid, &value) < 0)
429 			err(EX_OSERR, "ERROR: Cannot read pmc \"%s\"",
430 			    ev->ev_name);
431 
432 		(void) fprintf(args.pa_printfile, "%*ju ",
433 		    ev->ev_fieldwidth + extra_width,
434 		    (uintmax_t) ev->ev_cumulative ? value :
435 		    (value - ev->ev_saved));
436 
437 		if (ev->ev_cumulative == 0)
438 			ev->ev_saved = value;
439 		extra_width = 0;
440 	}
441 
442 	(void) fflush(args.pa_printfile);
443 }
444 
445 /*
446  * Print output
447  */
448 
449 void
450 pmcstat_print_pmcs(void)
451 {
452 	static int linecount = 0;
453 
454 	/* check if we need to print a header line */
455 	if (++linecount > pmcstat_displayheight) {
456 		(void) fprintf(args.pa_printfile, "\n");
457 		linecount = 1;
458 	}
459 	if (linecount == 1)
460 		pmcstat_print_headers();
461 	(void) fprintf(args.pa_printfile, "\n");
462 
463 	pmcstat_print_counters();
464 
465 	return;
466 }
467 
468 /*
469  * Do process profiling
470  *
471  * If a pid was specified, attach each allocated PMC to the target
472  * process.  Otherwise, fork a child and attach the PMCs to the child,
473  * and have the child exec() the target program.
474  */
475 
476 void
477 pmcstat_start_process(void)
478 {
479 	/* Signal the child to proceed. */
480 	if (write(pmcstat_sockpair[PARENTSOCKET], "!", 1) != 1)
481 		err(EX_OSERR, "ERROR (parent): write of token failed");
482 
483 	(void) close(pmcstat_sockpair[PARENTSOCKET]);
484 }
485 
486 void
487 pmcstat_show_usage(void)
488 {
489 	errx(EX_USAGE,
490 	    "[options] [commandline]\n"
491 	    "\t Measure process and/or system performance using hardware\n"
492 	    "\t performance monitoring counters.\n"
493 	    "\t Options include:\n"
494 	    "\t -C\t\t (toggle) show cumulative counts\n"
495 	    "\t -D path\t create profiles in directory \"path\"\n"
496 	    "\t -E\t\t (toggle) show counts at process exit\n"
497 	    "\t -F file\t write a system-wide callgraph (Kcachegrind format)"
498 		" to \"file\"\n"
499 	    "\t -G file\t write a system-wide callgraph to \"file\"\n"
500 	    "\t -M file\t print executable/gmon file map to \"file\"\n"
501 	    "\t -N\t\t (toggle) capture callchains\n"
502 	    "\t -O file\t send log output to \"file\"\n"
503 	    "\t -P spec\t allocate a process-private sampling PMC\n"
504 	    "\t -R file\t read events from \"file\"\n"
505 	    "\t -S spec\t allocate a system-wide sampling PMC\n"
506 	    "\t -T\t\t start in top mode\n"
507 	    "\t -W\t\t (toggle) show counts per context switch\n"
508 	    "\t -a file\t print sampled PCs and callgraph to \"file\"\n"
509 	    "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n"
510 	    "\t -d\t\t (toggle) track descendants\n"
511 	    "\t -e\t\t use wide history counter for gprof(1) output\n"
512 	    "\t -f spec\t pass \"spec\" to as plugin option\n"
513 	    "\t -g\t\t produce gprof(1) compatible profiles\n"
514 	    "\t -k dir\t\t set the path to the kernel\n"
515 	    "\t -l secs\t set duration time\n"
516 	    "\t -m file\t print sampled PCs to \"file\"\n"
517 	    "\t -n rate\t set sampling rate\n"
518 	    "\t -o file\t send print output to \"file\"\n"
519 	    "\t -p spec\t allocate a process-private counting PMC\n"
520 	    "\t -q\t\t suppress verbosity\n"
521 	    "\t -r fsroot\t specify FS root directory\n"
522 	    "\t -s spec\t allocate a system-wide counting PMC\n"
523 	    "\t -t process-spec attach to running processes matching "
524 		"\"process-spec\"\n"
525 	    "\t -v\t\t increase verbosity\n"
526 	    "\t -w secs\t set printing time interval\n"
527 	    "\t -z depth\t limit callchain display depth"
528 	);
529 }
530 
531 /*
532  * At exit handler for top mode
533  */
534 
535 void
536 pmcstat_topexit(void)
537 {
538 	if (!args.pa_toptty)
539 		return;
540 
541 	/*
542 	 * Shutdown ncurses.
543 	 */
544 	clrtoeol();
545 	refresh();
546 	endwin();
547 }
548 
549 /*
550  * Main
551  */
552 
553 int
554 main(int argc, char **argv)
555 {
556 	cpuset_t cpumask, rootmask;
557 	double interval;
558 	double duration;
559 	int option, npmc;
560 	int c, check_driver_stats, current_sampling_count;
561 	int do_callchain, do_descendants, do_logproccsw, do_logprocexit;
562 	int do_print, do_read;
563 	size_t len;
564 	int graphdepth;
565 	int pipefd[2], rfd;
566 	int use_cumulative_counts;
567 	short cf, cb;
568 	char *end, *tmp;
569 	const char *errmsg, *graphfilename;
570 	enum pmcstat_state runstate;
571 	struct pmc_driverstats ds_start, ds_end;
572 	struct pmcstat_ev *ev;
573 	struct sigaction sa;
574 	struct kevent kev;
575 	struct winsize ws;
576 	struct stat sb;
577 	char buffer[PATH_MAX];
578 
579 	check_driver_stats      = 0;
580 	current_sampling_count  = DEFAULT_SAMPLE_COUNT;
581 	do_callchain		= 1;
582 	do_descendants          = 0;
583 	do_logproccsw           = 0;
584 	do_logprocexit          = 0;
585 	use_cumulative_counts   = 0;
586 	graphfilename		= "-";
587 	args.pa_required	= 0;
588 	args.pa_flags		= 0;
589 	args.pa_verbosity	= 1;
590 	args.pa_logfd		= -1;
591 	args.pa_fsroot		= "";
592 	args.pa_samplesdir	= ".";
593 	args.pa_printfile	= stderr;
594 	args.pa_graphdepth	= DEFAULT_CALLGRAPH_DEPTH;
595 	args.pa_graphfile	= NULL;
596 	args.pa_interval	= DEFAULT_WAIT_INTERVAL;
597 	args.pa_mapfilename	= NULL;
598 	args.pa_inputpath	= NULL;
599 	args.pa_outputpath	= NULL;
600 	args.pa_pplugin		= PMCSTAT_PL_NONE;
601 	args.pa_plugin		= PMCSTAT_PL_NONE;
602 	args.pa_ctdumpinstr	= 1;
603 	args.pa_topmode		= PMCSTAT_TOP_DELTA;
604 	args.pa_toptty		= 0;
605 	args.pa_topcolor	= 0;
606 	args.pa_mergepmc	= 0;
607 	args.pa_duration	= 0.0;
608 	STAILQ_INIT(&args.pa_events);
609 	SLIST_INIT(&args.pa_targets);
610 	bzero(&ds_start, sizeof(ds_start));
611 	bzero(&ds_end, sizeof(ds_end));
612 	ev = NULL;
613 	CPU_ZERO(&cpumask);
614 
615 	/* Default to using the running system kernel. */
616 	len = 0;
617 	if (sysctlbyname("kern.bootfile", NULL, &len, NULL, 0) == -1)
618 		err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
619 	args.pa_kernel = malloc(len);
620 	if (args.pa_kernel == NULL)
621 		errx(EX_SOFTWARE, "ERROR: Out of memory.");
622 	if (sysctlbyname("kern.bootfile", args.pa_kernel, &len, NULL, 0) == -1)
623 		err(EX_OSERR, "ERROR: Cannot determine path of running kernel");
624 
625 	/*
626 	 * The initial CPU mask specifies the root mask of this process
627 	 * which is usually all CPUs in the system.
628 	 */
629 	if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
630 	    sizeof(rootmask), &rootmask) == -1)
631 		err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs");
632 	CPU_COPY(&rootmask, &cpumask);
633 
634 	while ((option = getopt(argc, argv,
635 	    "CD:EF:G:M:NO:P:R:S:TWa:c:def:gk:l:m:n:o:p:qr:s:t:vw:z:")) != -1)
636 		switch (option) {
637 		case 'a':	/* Annotate + callgraph */
638 			args.pa_flags |= FLAG_DO_ANNOTATE;
639 			args.pa_plugin = PMCSTAT_PL_ANNOTATE_CG;
640 			graphfilename  = optarg;
641 			break;
642 
643 		case 'C':	/* cumulative values */
644 			use_cumulative_counts = !use_cumulative_counts;
645 			args.pa_required |= FLAG_HAS_COUNTING_PMCS;
646 			break;
647 
648 		case 'c':	/* CPU */
649 			if (optarg[0] == '*' && optarg[1] == '\0')
650 				CPU_COPY(&rootmask, &cpumask);
651 			else
652 				pmcstat_get_cpumask(optarg, &cpumask);
653 
654 			args.pa_flags	 |= FLAGS_HAS_CPUMASK;
655 			args.pa_required |= FLAG_HAS_SYSTEM_PMCS;
656 			break;
657 
658 		case 'D':
659 			if (stat(optarg, &sb) < 0)
660 				err(EX_OSERR, "ERROR: Cannot stat \"%s\"",
661 				    optarg);
662 			if (!S_ISDIR(sb.st_mode))
663 				errx(EX_USAGE,
664 				    "ERROR: \"%s\" is not a directory.",
665 				    optarg);
666 			args.pa_samplesdir = optarg;
667 			args.pa_flags     |= FLAG_HAS_SAMPLESDIR;
668 			args.pa_required  |= FLAG_DO_GPROF;
669 			break;
670 
671 		case 'd':	/* toggle descendents */
672 			do_descendants = !do_descendants;
673 			args.pa_required |= FLAG_HAS_PROCESS_PMCS;
674 			break;
675 
676 		case 'e':	/* wide gprof metrics */
677 			args.pa_flags |= FLAG_DO_WIDE_GPROF_HC;
678 			break;
679 
680 		case 'F':	/* produce a system-wide calltree */
681 			args.pa_flags |= FLAG_DO_CALLGRAPHS;
682 			args.pa_plugin = PMCSTAT_PL_CALLTREE;
683 			graphfilename = optarg;
684 			break;
685 
686 		case 'f':	/* plugins options */
687 			if (args.pa_plugin == PMCSTAT_PL_NONE)
688 				err(EX_USAGE, "ERROR: Need -g/-G/-m/-T.");
689 			pmcstat_pluginconfigure_log(optarg);
690 			break;
691 
692 		case 'G':	/* produce a system-wide callgraph */
693 			args.pa_flags |= FLAG_DO_CALLGRAPHS;
694 			args.pa_plugin = PMCSTAT_PL_CALLGRAPH;
695 			graphfilename = optarg;
696 			break;
697 
698 		case 'g':	/* produce gprof compatible profiles */
699 			args.pa_flags |= FLAG_DO_GPROF;
700 			args.pa_pplugin = PMCSTAT_PL_CALLGRAPH;
701 			args.pa_plugin	= PMCSTAT_PL_GPROF;
702 			break;
703 
704 		case 'k':	/* pathname to the kernel */
705 			free(args.pa_kernel);
706 			args.pa_kernel = strdup(optarg);
707 			if (args.pa_kernel == NULL)
708 				errx(EX_SOFTWARE, "ERROR: Out of memory");
709 			args.pa_required |= FLAG_DO_ANALYSIS;
710 			args.pa_flags    |= FLAG_HAS_KERNELPATH;
711 			break;
712 
713 		case 'l':	/* time duration in seconds */
714 			duration = strtod(optarg, &end);
715 			if (*end != '\0' || duration <= 0)
716 				errx(EX_USAGE, "ERROR: Illegal duration time "
717 				    "value \"%s\".", optarg);
718 			args.pa_flags |= FLAG_HAS_DURATION;
719 			args.pa_duration = duration;
720 			break;
721 
722 		case 'm':
723 			args.pa_flags |= FLAG_DO_ANNOTATE;
724 			args.pa_plugin = PMCSTAT_PL_ANNOTATE;
725 			graphfilename  = optarg;
726 			break;
727 
728 		case 'E':	/* log process exit */
729 			do_logprocexit = !do_logprocexit;
730 			args.pa_required |= (FLAG_HAS_PROCESS_PMCS |
731 			    FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE);
732 			break;
733 
734 		case 'M':	/* mapfile */
735 			args.pa_mapfilename = optarg;
736 			break;
737 
738 		case 'N':
739 			do_callchain = !do_callchain;
740 			args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
741 			break;
742 
743 		case 'p':	/* process virtual counting PMC */
744 		case 's':	/* system-wide counting PMC */
745 		case 'P':	/* process virtual sampling PMC */
746 		case 'S':	/* system-wide sampling PMC */
747 			if ((ev = malloc(sizeof(*ev))) == NULL)
748 				errx(EX_SOFTWARE, "ERROR: Out of memory.");
749 
750 			switch (option) {
751 			case 'p': ev->ev_mode = PMC_MODE_TC; break;
752 			case 's': ev->ev_mode = PMC_MODE_SC; break;
753 			case 'P': ev->ev_mode = PMC_MODE_TS; break;
754 			case 'S': ev->ev_mode = PMC_MODE_SS; break;
755 			}
756 
757 			if (option == 'P' || option == 'p') {
758 				args.pa_flags |= FLAG_HAS_PROCESS_PMCS;
759 				args.pa_required |= (FLAG_HAS_COMMANDLINE |
760 				    FLAG_HAS_TARGET);
761 			}
762 
763 			if (option == 'P' || option == 'S') {
764 				args.pa_flags |= FLAG_HAS_SAMPLING_PMCS;
765 				args.pa_required |= (FLAG_HAS_PIPE |
766 				    FLAG_HAS_OUTPUT_LOGFILE);
767 			}
768 
769 			if (option == 'p' || option == 's')
770 				args.pa_flags |= FLAG_HAS_COUNTING_PMCS;
771 
772 			if (option == 's' || option == 'S')
773 				args.pa_flags |= FLAG_HAS_SYSTEM_PMCS;
774 
775 			ev->ev_spec  = strdup(optarg);
776 			if (ev->ev_spec == NULL)
777 				errx(EX_SOFTWARE, "ERROR: Out of memory.");
778 
779 			if (option == 'S' || option == 'P')
780 				ev->ev_count = current_sampling_count;
781 			else
782 				ev->ev_count = -1;
783 
784 			if (option == 'S' || option == 's')
785 				ev->ev_cpu = CPU_FFS(&cpumask) - 1;
786 			else
787 				ev->ev_cpu = PMC_CPU_ANY;
788 
789 			ev->ev_flags = 0;
790 			if (do_callchain)
791 				ev->ev_flags |= PMC_F_CALLCHAIN;
792 			if (do_descendants)
793 				ev->ev_flags |= PMC_F_DESCENDANTS;
794 			if (do_logprocexit)
795 				ev->ev_flags |= PMC_F_LOG_PROCEXIT;
796 			if (do_logproccsw)
797 				ev->ev_flags |= PMC_F_LOG_PROCCSW;
798 
799 			ev->ev_cumulative  = use_cumulative_counts;
800 
801 			ev->ev_saved = 0LL;
802 			ev->ev_pmcid = PMC_ID_INVALID;
803 
804 			/* extract event name */
805 			c = strcspn(optarg, ", \t");
806 			ev->ev_name = malloc(c + 1);
807 			if (ev->ev_name == NULL)
808 				errx(EX_SOFTWARE, "ERROR: Out of memory.");
809 			(void) strncpy(ev->ev_name, optarg, c);
810 			*(ev->ev_name + c) = '\0';
811 
812 			STAILQ_INSERT_TAIL(&args.pa_events, ev, ev_next);
813 
814 			if (option == 's' || option == 'S') {
815 				CPU_CLR(ev->ev_cpu, &cpumask);
816 				pmcstat_clone_event_descriptor(ev, &cpumask);
817 				CPU_SET(ev->ev_cpu, &cpumask);
818 			}
819 
820 			break;
821 
822 		case 'n':	/* sampling count */
823 			current_sampling_count = strtol(optarg, &end, 0);
824 			if (*end != '\0' || current_sampling_count <= 0)
825 				errx(EX_USAGE,
826 				    "ERROR: Illegal count value \"%s\".",
827 				    optarg);
828 			args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
829 			break;
830 
831 		case 'o':	/* outputfile */
832 			if (args.pa_printfile != NULL &&
833 			    args.pa_printfile != stdout &&
834 			    args.pa_printfile != stderr)
835 				(void) fclose(args.pa_printfile);
836 			if ((args.pa_printfile = fopen(optarg, "w")) == NULL)
837 				errx(EX_OSERR,
838 				    "ERROR: cannot open \"%s\" for writing.",
839 				    optarg);
840 			args.pa_flags |= FLAG_DO_PRINT;
841 			break;
842 
843 		case 'O':	/* sampling output */
844 			if (args.pa_outputpath)
845 				errx(EX_USAGE,
846 "ERROR: option -O may only be specified once.");
847 			args.pa_outputpath = optarg;
848 			args.pa_flags |= FLAG_HAS_OUTPUT_LOGFILE;
849 			break;
850 
851 		case 'q':	/* quiet mode */
852 			args.pa_verbosity = 0;
853 			break;
854 
855 		case 'r':	/* root FS path */
856 			args.pa_fsroot = optarg;
857 			break;
858 
859 		case 'R':	/* read an existing log file */
860 			if (args.pa_inputpath != NULL)
861 				errx(EX_USAGE,
862 "ERROR: option -R may only be specified once.");
863 			args.pa_inputpath = optarg;
864 			if (args.pa_printfile == stderr)
865 				args.pa_printfile = stdout;
866 			args.pa_flags |= FLAG_READ_LOGFILE;
867 			break;
868 
869 		case 't':	/* target pid or process name */
870 			pmcstat_find_targets(optarg);
871 
872 			args.pa_flags |= FLAG_HAS_TARGET;
873 			args.pa_required |= FLAG_HAS_PROCESS_PMCS;
874 			break;
875 
876 		case 'T':	/* top mode */
877 			args.pa_flags |= FLAG_DO_TOP;
878 			args.pa_plugin = PMCSTAT_PL_CALLGRAPH;
879 			args.pa_ctdumpinstr = 0;
880 			args.pa_mergepmc = 1;
881 			if (args.pa_printfile == stderr)
882 				args.pa_printfile = stdout;
883 			break;
884 
885 		case 'v':	/* verbose */
886 			args.pa_verbosity++;
887 			break;
888 
889 		case 'w':	/* wait interval */
890 			interval = strtod(optarg, &end);
891 			if (*end != '\0' || interval <= 0)
892 				errx(EX_USAGE,
893 "ERROR: Illegal wait interval value \"%s\".",
894 				    optarg);
895 			args.pa_flags |= FLAG_HAS_WAIT_INTERVAL;
896 			args.pa_interval = interval;
897 			break;
898 
899 		case 'W':	/* toggle LOG_CSW */
900 			do_logproccsw = !do_logproccsw;
901 			args.pa_required |= (FLAG_HAS_PROCESS_PMCS |
902 			    FLAG_HAS_COUNTING_PMCS | FLAG_HAS_OUTPUT_LOGFILE);
903 			break;
904 
905 		case 'z':
906 			graphdepth = strtod(optarg, &end);
907 			if (*end != '\0' || graphdepth <= 0)
908 				errx(EX_USAGE,
909 				    "ERROR: Illegal callchain depth \"%s\".",
910 				    optarg);
911 			args.pa_graphdepth = graphdepth;
912 			args.pa_required |= FLAG_DO_CALLGRAPHS;
913 			break;
914 
915 		case '?':
916 		default:
917 			pmcstat_show_usage();
918 			break;
919 
920 		}
921 
922 	args.pa_argc = (argc -= optind);
923 	args.pa_argv = (argv += optind);
924 
925 	/* If we read from logfile and no specified CPU mask use
926 	 * the maximum CPU count.
927 	 */
928 	if ((args.pa_flags & FLAG_READ_LOGFILE) &&
929 	    (args.pa_flags & FLAGS_HAS_CPUMASK) == 0)
930 		CPU_FILL(&cpumask);
931 
932 	args.pa_cpumask = cpumask; /* For selecting CPUs using -R. */
933 
934 	if (argc)	/* command line present */
935 		args.pa_flags |= FLAG_HAS_COMMANDLINE;
936 
937 	if (args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS |
938 	    FLAG_DO_ANNOTATE | FLAG_DO_TOP))
939 		args.pa_flags |= FLAG_DO_ANALYSIS;
940 
941 	/*
942 	 * Check invocation syntax.
943 	 */
944 
945 	/* disallow -O and -R together */
946 	if (args.pa_outputpath && args.pa_inputpath)
947 		errx(EX_USAGE,
948 		    "ERROR: options -O and -R are mutually exclusive.");
949 
950 	/* disallow -T and -l together */
951 	if ((args.pa_flags & FLAG_HAS_DURATION) &&
952 	    (args.pa_flags & FLAG_DO_TOP))
953 		errx(EX_USAGE, "ERROR: options -T and -l are mutually "
954 		    "exclusive.");
955 
956 	/* -a and -m require -R */
957 	if (args.pa_flags & FLAG_DO_ANNOTATE && args.pa_inputpath == NULL)
958 		errx(EX_USAGE, "ERROR: option %s requires an input file",
959 		    args.pa_plugin == PMCSTAT_PL_ANNOTATE ? "-m" : "-a");
960 
961 	/* -m option is not allowed combined with -g or -G. */
962 	if (args.pa_flags & FLAG_DO_ANNOTATE &&
963 	    args.pa_flags & (FLAG_DO_GPROF | FLAG_DO_CALLGRAPHS))
964 		errx(EX_USAGE,
965 		    "ERROR: option -m and -g | -G are mutually exclusive");
966 
967 	if (args.pa_flags & FLAG_READ_LOGFILE) {
968 		errmsg = NULL;
969 		if (args.pa_flags & FLAG_HAS_COMMANDLINE)
970 			errmsg = "a command line specification";
971 		else if (args.pa_flags & FLAG_HAS_TARGET)
972 			errmsg = "option -t";
973 		else if (!STAILQ_EMPTY(&args.pa_events))
974 			errmsg = "a PMC event specification";
975 		if (errmsg)
976 			errx(EX_USAGE,
977 			    "ERROR: option -R may not be used with %s.",
978 			    errmsg);
979 	} else if (STAILQ_EMPTY(&args.pa_events))
980 		/* All other uses require a PMC spec. */
981 		pmcstat_show_usage();
982 
983 	/* check for -t pid without a process PMC spec */
984 	if ((args.pa_required & FLAG_HAS_TARGET) &&
985 	    (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0)
986 		errx(EX_USAGE,
987 "ERROR: option -t requires a process mode PMC to be specified."
988 		    );
989 
990 	/* check for process-mode options without a command or -t pid */
991 	if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) &&
992 	    (args.pa_flags & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) == 0)
993 		errx(EX_USAGE,
994 "ERROR: options -d, -E, -p, -P, and -W require a command line or target process."
995 		    );
996 
997 	/* check for -p | -P without a target process of some sort */
998 	if ((args.pa_required & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) &&
999 	    (args.pa_flags & (FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET)) == 0)
1000 		errx(EX_USAGE,
1001 "ERROR: options -P and -p require a target process or a command line."
1002 		    );
1003 
1004 	/* check for process-mode options without a process-mode PMC */
1005 	if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) &&
1006 	    (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0)
1007 		errx(EX_USAGE,
1008 "ERROR: options -d, -E, and -W require a process mode PMC to be specified."
1009 		    );
1010 
1011 	/* check for -c cpu with no system mode PMCs or logfile. */
1012 	if ((args.pa_required & FLAG_HAS_SYSTEM_PMCS) &&
1013 	    (args.pa_flags & FLAG_HAS_SYSTEM_PMCS) == 0 &&
1014 	    (args.pa_flags & FLAG_READ_LOGFILE) == 0)
1015 		errx(EX_USAGE,
1016 "ERROR: option -c requires at least one system mode PMC to be specified."
1017 		    );
1018 
1019 	/* check for counting mode options without a counting PMC */
1020 	if ((args.pa_required & FLAG_HAS_COUNTING_PMCS) &&
1021 	    (args.pa_flags & FLAG_HAS_COUNTING_PMCS) == 0)
1022 		errx(EX_USAGE,
1023 "ERROR: options -C, -W and -o require at least one counting mode PMC to be specified."
1024 		    );
1025 
1026 	/* check for sampling mode options without a sampling PMC spec */
1027 	if ((args.pa_required & FLAG_HAS_SAMPLING_PMCS) &&
1028 	    (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) == 0)
1029 		errx(EX_USAGE,
1030 "ERROR: options -N, -n and -O require at least one sampling mode PMC to be specified."
1031 		    );
1032 
1033 	/* check if -g/-G/-m/-T are being used correctly */
1034 	if ((args.pa_flags & FLAG_DO_ANALYSIS) &&
1035 	    !(args.pa_flags & (FLAG_HAS_SAMPLING_PMCS|FLAG_READ_LOGFILE)))
1036 		errx(EX_USAGE,
1037 "ERROR: options -g/-G/-m/-T require sampling PMCs or -R to be specified."
1038 		    );
1039 
1040 	/* check if -e was specified without -g */
1041 	if ((args.pa_flags & FLAG_DO_WIDE_GPROF_HC) &&
1042 	    !(args.pa_flags & FLAG_DO_GPROF))
1043 		errx(EX_USAGE,
1044 "ERROR: option -e requires gprof mode to be specified."
1045 		    );
1046 
1047 	/* check if -O was spuriously specified */
1048 	if ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) &&
1049 	    (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0)
1050 		errx(EX_USAGE,
1051 "ERROR: option -O is used only with options -E, -P, -S and -W."
1052 		    );
1053 
1054 	/* -k kernel path require -g/-G/-m/-T or -R */
1055 	if ((args.pa_flags & FLAG_HAS_KERNELPATH) &&
1056 	    (args.pa_flags & FLAG_DO_ANALYSIS) == 0 &&
1057 	    (args.pa_flags & FLAG_READ_LOGFILE) == 0)
1058 	    errx(EX_USAGE, "ERROR: option -k is only used with -g/-R/-m/-T.");
1059 
1060 	/* -D only applies to gprof output mode (-g) */
1061 	if ((args.pa_flags & FLAG_HAS_SAMPLESDIR) &&
1062 	    (args.pa_flags & FLAG_DO_GPROF) == 0)
1063 	    errx(EX_USAGE, "ERROR: option -D is only used with -g.");
1064 
1065 	/* -M mapfile requires -g or -R */
1066 	if (args.pa_mapfilename != NULL &&
1067 	    (args.pa_flags & FLAG_DO_GPROF) == 0 &&
1068 	    (args.pa_flags & FLAG_READ_LOGFILE) == 0)
1069 	    errx(EX_USAGE, "ERROR: option -M is only used with -g/-R.");
1070 
1071 	/*
1072 	 * Disallow textual output of sampling PMCs if counting PMCs
1073 	 * have also been asked for, mostly because the combined output
1074 	 * is difficult to make sense of.
1075 	 */
1076 	if ((args.pa_flags & FLAG_HAS_COUNTING_PMCS) &&
1077 	    (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) &&
1078 	    ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) == 0))
1079 		errx(EX_USAGE,
1080 "ERROR: option -O is required if counting and sampling PMCs are specified together."
1081 		    );
1082 
1083 	/*
1084 	 * Check if 'kerneldir' refers to a file rather than a
1085 	 * directory.  If so, use `dirname path` to determine the
1086 	 * kernel directory.
1087 	 */
1088 	(void) snprintf(buffer, sizeof(buffer), "%s%s", args.pa_fsroot,
1089 	    args.pa_kernel);
1090 	if (stat(buffer, &sb) < 0)
1091 		err(EX_OSERR, "ERROR: Cannot locate kernel \"%s\"",
1092 		    buffer);
1093 	if (!S_ISREG(sb.st_mode) && !S_ISDIR(sb.st_mode))
1094 		errx(EX_USAGE, "ERROR: \"%s\": Unsupported file type.",
1095 		    buffer);
1096 	if (!S_ISDIR(sb.st_mode)) {
1097 		tmp = args.pa_kernel;
1098 		args.pa_kernel = strdup(dirname(args.pa_kernel));
1099 		if (args.pa_kernel == NULL)
1100 			errx(EX_SOFTWARE, "ERROR: Out of memory");
1101 		free(tmp);
1102 		(void) snprintf(buffer, sizeof(buffer), "%s%s",
1103 		    args.pa_fsroot, args.pa_kernel);
1104 		if (stat(buffer, &sb) < 0)
1105 			err(EX_OSERR, "ERROR: Cannot stat \"%s\"",
1106 			    buffer);
1107 		if (!S_ISDIR(sb.st_mode))
1108 			errx(EX_USAGE,
1109 			    "ERROR: \"%s\" is not a directory.",
1110 			    buffer);
1111 	}
1112 
1113 	/*
1114 	 * If we have a callgraph be created, select the outputfile.
1115 	 */
1116 	if (args.pa_flags & FLAG_DO_CALLGRAPHS) {
1117 		if (strcmp(graphfilename, "-") == 0)
1118 		    args.pa_graphfile = args.pa_printfile;
1119 		else {
1120 			args.pa_graphfile = fopen(graphfilename, "w");
1121 			if (args.pa_graphfile == NULL)
1122 				err(EX_OSERR,
1123 				    "ERROR: cannot open \"%s\" for writing",
1124 				    graphfilename);
1125 		}
1126 	}
1127 	if (args.pa_flags & FLAG_DO_ANNOTATE) {
1128 		args.pa_graphfile = fopen(graphfilename, "w");
1129 		if (args.pa_graphfile == NULL)
1130 			err(EX_OSERR, "ERROR: cannot open \"%s\" for writing",
1131 			    graphfilename);
1132 	}
1133 
1134 	/* if we've been asked to process a log file, skip init */
1135 	if ((args.pa_flags & FLAG_READ_LOGFILE) == 0) {
1136 		if (pmc_init() < 0)
1137 			err(EX_UNAVAILABLE,
1138 			    "ERROR: Initialization of the pmc(3) library failed"
1139 			    );
1140 
1141 		if ((npmc = pmc_npmc(0)) < 0) /* assume all CPUs are identical */
1142 			err(EX_OSERR,
1143 "ERROR: Cannot determine the number of PMCs on CPU %d",
1144 			    0);
1145 	}
1146 
1147 	/* Allocate a kqueue */
1148 	if ((pmcstat_kq = kqueue()) < 0)
1149 		err(EX_OSERR, "ERROR: Cannot allocate kqueue");
1150 
1151 	/* Setup the logfile as the source. */
1152 	if (args.pa_flags & FLAG_READ_LOGFILE) {
1153 		/*
1154 		 * Print the log in textual form if we haven't been
1155 		 * asked to generate profiling information.
1156 		 */
1157 		if ((args.pa_flags & FLAG_DO_ANALYSIS) == 0)
1158 			args.pa_flags |= FLAG_DO_PRINT;
1159 
1160 		pmcstat_initialize_logging();
1161 		rfd = pmcstat_open_log(args.pa_inputpath,
1162 		    PMCSTAT_OPEN_FOR_READ);
1163 		if ((args.pa_logparser = pmclog_open(rfd)) == NULL)
1164 			err(EX_OSERR, "ERROR: Cannot create parser");
1165 		if (fcntl(rfd, F_SETFL, O_NONBLOCK) < 0)
1166 			err(EX_OSERR, "ERROR: fcntl(2) failed");
1167 		EV_SET(&kev, rfd, EVFILT_READ, EV_ADD,
1168 		    0, 0, NULL);
1169 		if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1170 			err(EX_OSERR, "ERROR: Cannot register kevent");
1171 	}
1172 	/*
1173 	 * Configure the specified log file or setup a default log
1174 	 * consumer via a pipe.
1175 	 */
1176 	if (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) {
1177 		if (args.pa_outputpath)
1178 			args.pa_logfd = pmcstat_open_log(args.pa_outputpath,
1179 			    PMCSTAT_OPEN_FOR_WRITE);
1180 		else {
1181 			/*
1182 			 * process the log on the fly by reading it in
1183 			 * through a pipe.
1184 			 */
1185 			if (pipe(pipefd) < 0)
1186 				err(EX_OSERR, "ERROR: pipe(2) failed");
1187 
1188 			if (fcntl(pipefd[READPIPEFD], F_SETFL, O_NONBLOCK) < 0)
1189 				err(EX_OSERR, "ERROR: fcntl(2) failed");
1190 
1191 			EV_SET(&kev, pipefd[READPIPEFD], EVFILT_READ, EV_ADD,
1192 			    0, 0, NULL);
1193 
1194 			if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1195 				err(EX_OSERR, "ERROR: Cannot register kevent");
1196 
1197 			args.pa_logfd = pipefd[WRITEPIPEFD];
1198 
1199 			args.pa_flags |= FLAG_HAS_PIPE;
1200 			if ((args.pa_flags & FLAG_DO_TOP) == 0)
1201 				args.pa_flags |= FLAG_DO_PRINT;
1202 			args.pa_logparser = pmclog_open(pipefd[READPIPEFD]);
1203 		}
1204 
1205 		if (pmc_configure_logfile(args.pa_logfd) < 0)
1206 			err(EX_OSERR, "ERROR: Cannot configure log file");
1207 	}
1208 
1209 	/* remember to check for driver errors if we are sampling or logging */
1210 	check_driver_stats = (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) ||
1211 	    (args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE);
1212 
1213 	/*
1214 	if (args.pa_flags & FLAG_READ_LOGFILE) {
1215 	 * Allocate PMCs.
1216 	 */
1217 
1218 	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
1219 		if (pmc_allocate(ev->ev_spec, ev->ev_mode,
1220 		    ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0)
1221 			err(EX_OSERR,
1222 "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"",
1223 			    PMC_IS_SYSTEM_MODE(ev->ev_mode) ?
1224 			    "system" : "process", ev->ev_spec);
1225 
1226 		if (PMC_IS_SAMPLING_MODE(ev->ev_mode) &&
1227 		    pmc_set(ev->ev_pmcid, ev->ev_count) < 0)
1228 			err(EX_OSERR,
1229 			    "ERROR: Cannot set sampling count for PMC \"%s\"",
1230 			    ev->ev_name);
1231 	}
1232 
1233 	/* compute printout widths */
1234 	STAILQ_FOREACH(ev, &args.pa_events, ev_next) {
1235 		int counter_width;
1236 		int display_width;
1237 		int header_width;
1238 
1239 		(void) pmc_width(ev->ev_pmcid, &counter_width);
1240 		header_width = strlen(ev->ev_name) + 2; /* prefix '%c/' */
1241 		display_width = (int) floor(counter_width / 3.32193) + 1;
1242 
1243 		if (PMC_IS_SYSTEM_MODE(ev->ev_mode))
1244 			header_width += 3; /* 2 digit CPU number + '/' */
1245 
1246 		if (header_width > display_width) {
1247 			ev->ev_fieldskip = 0;
1248 			ev->ev_fieldwidth = header_width;
1249 		} else {
1250 			ev->ev_fieldskip = display_width -
1251 			    header_width;
1252 			ev->ev_fieldwidth = display_width;
1253 		}
1254 	}
1255 
1256 	/*
1257 	 * If our output is being set to a terminal, register a handler
1258 	 * for window size changes.
1259 	 */
1260 
1261 	if (isatty(fileno(args.pa_printfile))) {
1262 
1263 		if (ioctl(fileno(args.pa_printfile), TIOCGWINSZ, &ws) < 0)
1264 			err(EX_OSERR, "ERROR: Cannot determine window size");
1265 
1266 		pmcstat_displayheight = ws.ws_row - 1;
1267 		pmcstat_displaywidth  = ws.ws_col - 1;
1268 
1269 		EV_SET(&kev, SIGWINCH, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
1270 
1271 		if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1272 			err(EX_OSERR,
1273 			    "ERROR: Cannot register kevent for SIGWINCH");
1274 
1275 		args.pa_toptty = 1;
1276 	}
1277 
1278 	/*
1279 	 * Listen to key input in top mode.
1280 	 */
1281 	if (args.pa_flags & FLAG_DO_TOP) {
1282 		EV_SET(&kev, fileno(stdin), EVFILT_READ, EV_ADD, 0, 0, NULL);
1283 		if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1284 			err(EX_OSERR, "ERROR: Cannot register kevent");
1285 	}
1286 
1287 	EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
1288 	if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1289 		err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT");
1290 
1291 	EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
1292 	if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1293 		err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO");
1294 
1295 	/*
1296 	 * An exec() failure of a forked child is signalled by the
1297 	 * child sending the parent a SIGCHLD.  We don't register an
1298 	 * actual signal handler for SIGCHLD, but instead use our
1299 	 * kqueue to pick up the signal.
1300 	 */
1301 	EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
1302 	if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1303 		err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD");
1304 
1305 	/*
1306 	 * Setup a timer if we have counting mode PMCs needing to be printed or
1307 	 * top mode plugin is active.
1308 	 */
1309 	if (((args.pa_flags & FLAG_HAS_COUNTING_PMCS) &&
1310 	     (args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) ||
1311 	    (args.pa_flags & FLAG_DO_TOP)) {
1312 		EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0,
1313 		    args.pa_interval * 1000, NULL);
1314 
1315 		if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1316 			err(EX_OSERR,
1317 			    "ERROR: Cannot register kevent for timer");
1318 	}
1319 
1320 	/*
1321 	 * Setup a duration timer if we have sampling mode PMCs and
1322 	 * a duration time is set
1323 	 */
1324 	if ((args.pa_flags & FLAG_HAS_SAMPLING_PMCS) &&
1325 	    (args.pa_flags & FLAG_HAS_DURATION)) {
1326 		EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0,
1327 		    args.pa_duration * 1000, NULL);
1328 
1329 		if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
1330 			err(EX_OSERR, "ERROR: Cannot register kevent for "
1331 			    "time duration");
1332 	}
1333 
1334 	/* attach PMCs to the target process, starting it if specified */
1335 	if (args.pa_flags & FLAG_HAS_COMMANDLINE)
1336 		pmcstat_create_process();
1337 
1338 	if (check_driver_stats && pmc_get_driver_stats(&ds_start) < 0)
1339 		err(EX_OSERR, "ERROR: Cannot retrieve driver statistics");
1340 
1341 	/* Attach process pmcs to the target process. */
1342 	if (args.pa_flags & (FLAG_HAS_TARGET | FLAG_HAS_COMMANDLINE)) {
1343 		if (SLIST_EMPTY(&args.pa_targets))
1344 			errx(EX_DATAERR,
1345 			    "ERROR: No matching target processes.");
1346 		if (args.pa_flags & FLAG_HAS_PROCESS_PMCS)
1347 			pmcstat_attach_pmcs();
1348 
1349 		if (pmcstat_kvm) {
1350 			kvm_close(pmcstat_kvm);
1351 			pmcstat_kvm = NULL;
1352 		}
1353 	}
1354 
1355 	/* start the pmcs */
1356 	pmcstat_start_pmcs();
1357 
1358 	/* start the (commandline) process if needed */
1359 	if (args.pa_flags & FLAG_HAS_COMMANDLINE)
1360 		pmcstat_start_process();
1361 
1362 	/* initialize logging */
1363 	pmcstat_initialize_logging();
1364 
1365 	/* Handle SIGINT using the kqueue loop */
1366 	sa.sa_handler = SIG_IGN;
1367 	sa.sa_flags   = 0;
1368 	(void) sigemptyset(&sa.sa_mask);
1369 
1370 	if (sigaction(SIGINT, &sa, NULL) < 0)
1371 		err(EX_OSERR, "ERROR: Cannot install signal handler");
1372 
1373 	/*
1374 	 * Setup the top mode display.
1375 	 */
1376 	if (args.pa_flags & FLAG_DO_TOP) {
1377 		args.pa_flags &= ~FLAG_DO_PRINT;
1378 
1379 		if (args.pa_toptty) {
1380 			/*
1381 			 * Init ncurses.
1382 			 */
1383 			initscr();
1384 			if(has_colors() == TRUE) {
1385 				args.pa_topcolor = 1;
1386 				start_color();
1387 				use_default_colors();
1388 				pair_content(0, &cf, &cb);
1389 				init_pair(1, COLOR_RED, cb);
1390 				init_pair(2, COLOR_YELLOW, cb);
1391 				init_pair(3, COLOR_GREEN, cb);
1392 			}
1393 			cbreak();
1394 			noecho();
1395 			nonl();
1396 			nodelay(stdscr, 1);
1397 			intrflush(stdscr, FALSE);
1398 			keypad(stdscr, TRUE);
1399 			clear();
1400 			/* Get terminal width / height with ncurses. */
1401 			getmaxyx(stdscr,
1402 			    pmcstat_displayheight, pmcstat_displaywidth);
1403 			pmcstat_displayheight--; pmcstat_displaywidth--;
1404 			atexit(pmcstat_topexit);
1405 		}
1406 	}
1407 
1408 	/*
1409 	 * loop till either the target process (if any) exits, or we
1410 	 * are killed by a SIGINT or we reached the time duration.
1411 	 */
1412 	runstate = PMCSTAT_RUNNING;
1413 	do_print = do_read = 0;
1414 	do {
1415 		if ((c = kevent(pmcstat_kq, NULL, 0, &kev, 1, NULL)) <= 0) {
1416 			if (errno != EINTR)
1417 				err(EX_OSERR, "ERROR: kevent failed");
1418 			else
1419 				continue;
1420 		}
1421 
1422 		if (kev.flags & EV_ERROR)
1423 			errc(EX_OSERR, kev.data, "ERROR: kevent failed");
1424 
1425 		switch (kev.filter) {
1426 		case EVFILT_PROC:  /* target has exited */
1427 			runstate = pmcstat_close_log();
1428 			do_print = 1;
1429 			break;
1430 
1431 		case EVFILT_READ:  /* log file data is present */
1432 			if (kev.ident == (unsigned)fileno(stdin) &&
1433 			    (args.pa_flags & FLAG_DO_TOP)) {
1434 				if (pmcstat_keypress_log())
1435 					runstate = pmcstat_close_log();
1436 			} else {
1437 				do_read = 0;
1438 				runstate = pmcstat_process_log();
1439 			}
1440 			break;
1441 
1442 		case EVFILT_SIGNAL:
1443 			if (kev.ident == SIGCHLD) {
1444 				/*
1445 				 * The child process sends us a
1446 				 * SIGCHLD if its exec() failed.  We
1447 				 * wait for it to exit and then exit
1448 				 * ourselves.
1449 				 */
1450 				(void) wait(&c);
1451 				runstate = PMCSTAT_FINISHED;
1452 			} else if (kev.ident == SIGIO) {
1453 				/*
1454 				 * We get a SIGIO if a PMC loses all
1455 				 * of its targets, or if logfile
1456 				 * writes encounter an error.
1457 				 */
1458 				runstate = pmcstat_close_log();
1459 				do_print = 1; /* print PMCs at exit */
1460 			} else if (kev.ident == SIGINT) {
1461 				/* Kill the child process if we started it */
1462 				if (args.pa_flags & FLAG_HAS_COMMANDLINE)
1463 					pmcstat_kill_process();
1464 				runstate = pmcstat_close_log();
1465 			} else if (kev.ident == SIGWINCH) {
1466 				if (ioctl(fileno(args.pa_printfile),
1467 					TIOCGWINSZ, &ws) < 0)
1468 				    err(EX_OSERR,
1469 				        "ERROR: Cannot determine window size");
1470 				pmcstat_displayheight = ws.ws_row - 1;
1471 				pmcstat_displaywidth  = ws.ws_col - 1;
1472 			} else
1473 				assert(0);
1474 
1475 			break;
1476 
1477 		case EVFILT_TIMER:
1478 			/* time duration reached, exit */
1479 			if (args.pa_flags & FLAG_HAS_DURATION) {
1480 				runstate = PMCSTAT_FINISHED;
1481 				break;
1482 			}
1483 			/* print out counting PMCs */
1484 			if ((args.pa_flags & FLAG_DO_TOP) &&
1485 			     pmc_flush_logfile() == 0)
1486 				do_read = 1;
1487 			do_print = 1;
1488 			break;
1489 
1490 		}
1491 
1492 		if (do_print && !do_read) {
1493 			if ((args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0) {
1494 				pmcstat_print_pmcs();
1495 				if (runstate == PMCSTAT_FINISHED &&
1496 				    /* final newline */
1497 				    (args.pa_flags & FLAG_DO_PRINT) == 0)
1498 					(void) fprintf(args.pa_printfile, "\n");
1499 			}
1500 			if (args.pa_flags & FLAG_DO_TOP)
1501 				pmcstat_display_log();
1502 			do_print = 0;
1503 		}
1504 
1505 	} while (runstate != PMCSTAT_FINISHED);
1506 
1507 	if ((args.pa_flags & FLAG_DO_TOP) && args.pa_toptty) {
1508 		pmcstat_topexit();
1509 		args.pa_toptty = 0;
1510 	}
1511 
1512 	/* flush any pending log entries */
1513 	if (args.pa_flags & (FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE))
1514 		pmc_close_logfile();
1515 
1516 	pmcstat_cleanup();
1517 
1518 	/* check if the driver lost any samples or events */
1519 	if (check_driver_stats) {
1520 		if (pmc_get_driver_stats(&ds_end) < 0)
1521 			err(EX_OSERR,
1522 			    "ERROR: Cannot retrieve driver statistics");
1523 		if (ds_start.pm_intr_bufferfull != ds_end.pm_intr_bufferfull &&
1524 		    args.pa_verbosity > 0)
1525 			warnx(
1526 "WARNING: sampling was paused at least %u time%s.\n"
1527 "Please consider tuning the \"kern.hwpmc.nsamples\" tunable.",
1528 			    ds_end.pm_intr_bufferfull -
1529 			    ds_start.pm_intr_bufferfull,
1530 			    ((ds_end.pm_intr_bufferfull -
1531 			    ds_start.pm_intr_bufferfull) != 1) ? "s" : ""
1532 			    );
1533 		if (ds_start.pm_buffer_requests_failed !=
1534 		    ds_end.pm_buffer_requests_failed &&
1535 		    args.pa_verbosity > 0)
1536 			warnx(
1537 "WARNING: at least %u event%s were discarded while running.\n"
1538 "Please consider tuning the \"kern.hwpmc.nbuffers\" tunable.",
1539 	 		    ds_end.pm_buffer_requests_failed -
1540 			    ds_start.pm_buffer_requests_failed,
1541 			    ((ds_end.pm_buffer_requests_failed -
1542 			    ds_start.pm_buffer_requests_failed) != 1) ? "s" : ""
1543 			    );
1544 	}
1545 
1546 	exit(EX_OK);
1547 }
1548