xref: /illumos-gate/usr/src/cmd/trapstat/sun4/trapstat.c (revision 2a6e99a0f1f7d22c0396e8b2ce9b9babbd1056cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <fcntl.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <signal.h>
33 #include <strings.h>
34 #include <limits.h>
35 #include <sys/mman.h>
36 #include <sys/pset.h>
37 #include <sys/varargs.h>
38 #include <sys/trapstat.h>
39 #include <sys/wait.h>
40 #include <stddef.h>
41 #include <termio.h>
42 #include "_trapstat.h"
43 
44 #define	TSTAT_DEVICE	"/dev/trapstat"
45 #define	TSTAT_COMMAND	"trapstat"
46 #define	TSTAT_DELTA(data, old, member) g_absolute ? (data)->member : \
47 	(uint64_t)(0.5 + (g_interval / (double)((data)->tdata_snapts - \
48 	(old)->tdata_snapts)) * (double)((data)->member - (old)->member))
49 
50 #define	TSTAT_PRINT_MISSDATA(diff, time) \
51 	(void) printf(" %9lld %4.1f", (diff), (time));
52 
53 #define	TSTAT_PAGESIZE_MODIFIERS	" kmgtp"
54 #define	TSTAT_PAGESIZE_STRLEN		10
55 #define	TSTAT_MAX_RATE			5000
56 #define	TSTAT_COLUMN_OFFS	26
57 #define	TSTAT_COLUMNS_PER_CPU	9
58 
59 static tstat_data_t *g_data[2];
60 static tstat_data_t *g_ndata, *g_odata;
61 static processorid_t g_max_cpus;
62 static int8_t *g_selected;
63 static timer_t g_tid;
64 static int g_interval = NANOSEC;
65 static int g_peffect = 1;
66 static int g_absolute = 0;
67 static sigset_t g_oset;
68 
69 static psetid_t g_pset = PS_NONE;
70 static processorid_t *g_pset_cpus;
71 static uint_t g_pset_ncpus;
72 
73 static int g_cpus_per_line = (80 - TSTAT_COLUMN_OFFS) / TSTAT_COLUMNS_PER_CPU;
74 static int g_winch;
75 
76 static int g_pgsizes;
77 static size_t *g_pgsize;
78 static char **g_pgnames;
79 static size_t g_datasize;
80 
81 static int g_gen;
82 static int g_fd;
83 static uint8_t g_active[TSTAT_NENT];
84 
85 static hrtime_t g_start;
86 
87 static int g_exec_errno;
88 static int g_child_exited;
89 static int g_child_status;
90 
91 static void (*g_process)(void *, uint64_t, double);
92 static void *g_arg;
93 
94 typedef struct tstat_sum {
95 	uint64_t	tsum_diff;
96 	double		tsum_time;
97 } tstat_sum_t;
98 
99 /*
100  * Define a dummy g_traps reader to establish a symbol capabilities lead.
101  * This routine should never be called, as the sun4u and sun4v variants
102  * will be used as appropriate.
103  */
104 /* ARGSUSED0 */
105 tstat_ent_t *
106 get_trap_ent(int ndx)
107 {
108 	return (NULL);
109 }
110 
111 static void
112 usage(void)
113 {
114 	(void) fprintf(stderr,
115 	    "\nusage:  trapstat [ -t | -T | -e entrylist ]\n"
116 	    "   [ -C psrset | -c cpulist ]\n"
117 	    "   [ -P ] [ -a ] [ -r rate ] [[ interval [ count ] ] | "
118 	    "command [ args ] ]\n\n"
119 	    "Trap selection options:\n\n"
120 	    " -t             TLB statistics\n"
121 	    " -T             TLB statistics, with pagesize information\n"
122 	    " -e entrylist   Enable statistics only for entries specified "
123 	    "by entrylist\n\n"
124 	    "CPU selection options:\n\n"
125 	    " -c cpulist     Enable statistics only for specified CPU list\n"
126 	    " -C psrset      Enable statistics only for specified processor "
127 	    "set\n\n"
128 	    "Other options:\n\n"
129 	    " -a             Display trap values as accumulating values "
130 	    "instead of rates\n"
131 	    " -l             List trap table entries and exit\n"
132 	    " -P             Display output in parsable format\n"
133 	    " -r hz          Set sampling rate to be hz samples "
134 	    "per second\n\n");
135 
136 	exit(EXIT_FAILURE);
137 }
138 
139 static void
140 fatal(char *fmt, ...)
141 {
142 	va_list ap;
143 	int error = errno;
144 
145 	va_start(ap, fmt);
146 
147 	(void) fprintf(stderr, TSTAT_COMMAND ": ");
148 	(void) vfprintf(stderr, fmt, ap);
149 
150 	if (fmt[strlen(fmt) - 1] != '\n')
151 		(void) fprintf(stderr, ": %s\n", strerror(error));
152 
153 	exit(EXIT_FAILURE);
154 }
155 
156 static void
157 set_width(void)
158 {
159 	struct winsize win;
160 
161 	if (!isatty(fileno(stdout)))
162 		return;
163 
164 	if (ioctl(fileno(stdout), TIOCGWINSZ, &win) == -1)
165 		return;
166 
167 	if (win.ws_col == 0) {
168 		/*
169 		 * If TIOCGWINSZ returned 0 for the columns, just return --
170 		 * thereby using the default value of g_cpus_per_line.  (This
171 		 * happens, e.g., when running over a tip line.)
172 		 */
173 		return;
174 	}
175 
176 	g_cpus_per_line = (win.ws_col - TSTAT_COLUMN_OFFS) /
177 	    TSTAT_COLUMNS_PER_CPU;
178 
179 	if (g_cpus_per_line < 1)
180 		g_cpus_per_line = 1;
181 }
182 
183 static void
184 intr(int signo)
185 {
186 	int error = errno;
187 
188 	switch (signo) {
189 	case SIGWINCH:
190 		g_winch = 1;
191 		set_width();
192 		break;
193 
194 	case SIGCHLD:
195 		g_child_exited = 1;
196 
197 		while (wait(&g_child_status) == -1 && errno == EINTR)
198 			continue;
199 		break;
200 
201 	default:
202 		break;
203 	}
204 
205 	errno = error;
206 }
207 
208 static void
209 setup(void)
210 {
211 	struct sigaction act;
212 	struct sigevent ev;
213 	sigset_t set;
214 	int i;
215 
216 	for (i = 0; i < TSTAT_NENT; i++) {
217 		tstat_ent_t	*gtp;
218 
219 		if ((gtp = get_trap_ent(i)) == NULL)
220 			continue;
221 
222 		if (gtp->tent_type == TSTAT_ENT_RESERVED)
223 			gtp->tent_name = "reserved";
224 
225 		if (gtp->tent_type == TSTAT_ENT_UNUSED)
226 			gtp->tent_name = "unused";
227 	}
228 
229 	g_max_cpus = (processorid_t)sysconf(_SC_CPUID_MAX) + 1;
230 
231 	if ((g_selected = malloc(sizeof (int8_t) * g_max_cpus)) == NULL)
232 		fatal("could not allocate g_selected");
233 
234 	bzero(g_selected, sizeof (int8_t) * g_max_cpus);
235 
236 	g_pset_cpus = malloc(sizeof (processorid_t) * g_max_cpus);
237 	if (g_pset_cpus == NULL)
238 		fatal("could not allocate g_pset_cpus");
239 
240 	bzero(g_pset_cpus, sizeof (processorid_t) * g_max_cpus);
241 
242 	if ((g_pgsizes = getpagesizes(NULL, 0)) == -1)
243 		fatal("getpagesizes()");
244 
245 	if ((g_pgsize = malloc(sizeof (size_t) * g_pgsizes)) == NULL)
246 		fatal("could not allocate g_pgsize array");
247 
248 	if (getpagesizes(g_pgsize, g_pgsizes) == -1)
249 		fatal("getpagesizes(%d)", g_pgsizes);
250 
251 	if ((g_pgnames = malloc(sizeof (char *) * g_pgsizes)) == NULL)
252 		fatal("could not allocate g_pgnames");
253 
254 	for (i = 0; i < g_pgsizes; i++) {
255 		size_t j, mul;
256 		size_t sz = g_pgsize[i];
257 
258 		if ((g_pgnames[i] = malloc(TSTAT_PAGESIZE_STRLEN)) == NULL)
259 			fatal("could not allocate g_pgnames[%d]", i);
260 
261 		for (j = 0, mul = 10; (1 << mul) <= sz; j++, mul += 10)
262 			continue;
263 
264 		(void) snprintf(g_pgnames[i], TSTAT_PAGESIZE_STRLEN,
265 		    "%d%c", sz >> (mul - 10), " kmgtpe"[j]);
266 	}
267 
268 	g_datasize =
269 	    sizeof (tstat_data_t) + (g_pgsizes - 1) * sizeof (tstat_pgszdata_t);
270 
271 	if ((g_data[0] = malloc(g_datasize * g_max_cpus)) == NULL)
272 		fatal("could not allocate data buffer 0");
273 
274 	if ((g_data[1] = malloc(g_datasize * g_max_cpus)) == NULL)
275 		fatal("could not allocate data buffer 1");
276 
277 	(void) sigemptyset(&act.sa_mask);
278 	act.sa_flags = 0;
279 	act.sa_handler = intr;
280 	(void) sigaction(SIGUSR1, &act, NULL);
281 	(void) sigaction(SIGCHLD, &act, NULL);
282 
283 	(void) sigaddset(&act.sa_mask, SIGCHLD);
284 	(void) sigaddset(&act.sa_mask, SIGUSR1);
285 	(void) sigaction(SIGWINCH, &act, NULL);
286 	set_width();
287 
288 	(void) sigemptyset(&set);
289 	(void) sigaddset(&set, SIGCHLD);
290 	(void) sigaddset(&set, SIGUSR1);
291 	(void) sigaddset(&set, SIGWINCH);
292 	(void) sigprocmask(SIG_BLOCK, &set, &g_oset);
293 
294 	ev.sigev_notify = SIGEV_SIGNAL;
295 	ev.sigev_signo = SIGUSR1;
296 
297 	if (timer_create(CLOCK_HIGHRES, &ev, &g_tid) == -1)
298 		fatal("cannot create CLOCK_HIGHRES timer");
299 }
300 
301 static void
302 set_interval(hrtime_t nsec)
303 {
304 	struct itimerspec ts;
305 
306 	/*
307 	 * If the interval is less than one second, we'll report the
308 	 * numbers in terms of rate-per-interval.  If the interval is
309 	 * greater than one second, we'll report numbers in terms of
310 	 * rate-per-second.
311 	 */
312 	g_interval = nsec < NANOSEC ? nsec : NANOSEC;
313 
314 	ts.it_value.tv_sec = nsec / NANOSEC;
315 	ts.it_value.tv_nsec = nsec % NANOSEC;
316 	ts.it_interval.tv_sec = nsec / NANOSEC;
317 	ts.it_interval.tv_nsec = nsec % NANOSEC;
318 
319 	if (timer_settime(g_tid, TIMER_RELTIME, &ts, NULL) == -1)
320 		fatal("cannot set time on CLOCK_HIGHRES timer");
321 }
322 
323 static void
324 print_entries(FILE *stream, int parsable)
325 {
326 	int entno;
327 
328 	if (!parsable) {
329 		(void) fprintf(stream, "  %3s %3s | %-20s | %s\n", "hex",
330 		    "dec", "entry name", "description");
331 
332 		(void) fprintf(stream, "----------+----------------------"
333 		    "+-----------------------\n");
334 	}
335 
336 	for (entno = 0; entno < TSTAT_NENT; entno++) {
337 		tstat_ent_t	*gtp;
338 
339 		if ((gtp = get_trap_ent(entno)) == NULL)
340 			continue;
341 
342 		if (gtp->tent_type != TSTAT_ENT_USED)
343 			continue;
344 
345 		(void) fprintf(stream, "0x%03x %3d %s%-20s %s%s\n",
346 		    entno, entno,
347 		    parsable ? "" : "| ", gtp->tent_name,
348 		    parsable ? "" : "| ", gtp->tent_descr);
349 	}
350 }
351 
352 static void
353 select_entry(char *entry)
354 {
355 	ulong_t entno;
356 	char *end;
357 
358 	/*
359 	 * The entry may be specified as a number (e.g., "0x68", "104") or
360 	 * as a name ("dtlb-miss").
361 	 */
362 	entno = strtoul(entry, &end, 0);
363 
364 	if (*end == '\0') {
365 		if (entno >= TSTAT_NENT)
366 			goto bad_entry;
367 	} else {
368 		for (entno = 0; entno < TSTAT_NENT; entno++) {
369 			tstat_ent_t	*gtp;
370 
371 			if ((gtp = get_trap_ent(entno)) == NULL)
372 				continue;
373 
374 			if (gtp->tent_type != TSTAT_ENT_USED)
375 				continue;
376 
377 			if (strcmp(entry, gtp->tent_name) == 0)
378 				break;
379 		}
380 
381 		if (entno == TSTAT_NENT)
382 			goto bad_entry;
383 	}
384 
385 	if (ioctl(g_fd, TSTATIOC_ENTRY, entno) == -1)
386 		fatal("TSTATIOC_ENTRY failed for entry 0x%x", entno);
387 
388 	g_active[entno] = 1;
389 	return;
390 
391 bad_entry:
392 	(void) fprintf(stderr, TSTAT_COMMAND ": invalid entry '%s'", entry);
393 	(void) fprintf(stderr, "; valid entries:\n\n");
394 	print_entries(stderr, 0);
395 	exit(EXIT_FAILURE);
396 }
397 
398 static void
399 select_cpu(processorid_t cpu)
400 {
401 	if (g_pset != PS_NONE)
402 		fatal("cannot specify both a processor set and a processor\n");
403 
404 	if (cpu < 0 || cpu >= g_max_cpus)
405 		fatal("cpu %d out of range\n", cpu);
406 
407 	if (p_online(cpu, P_STATUS) == -1) {
408 		if (errno != EINVAL)
409 			fatal("could not get status for cpu %d", cpu);
410 		fatal("cpu %d not present\n", cpu);
411 	}
412 
413 	g_selected[cpu] = 1;
414 }
415 
416 static void
417 select_cpus(processorid_t low, processorid_t high)
418 {
419 	if (g_pset != PS_NONE)
420 		fatal("cannot specify both a processor set and processors\n");
421 
422 	if (low < 0 || low >= g_max_cpus)
423 		fatal("invalid cpu '%d'\n", low);
424 
425 	if (high < 0 || high >= g_max_cpus)
426 		fatal("invalid cpu '%d'\n", high);
427 
428 	if (low >= high)
429 		fatal("invalid range '%d' to '%d'\n", low, high);
430 
431 	do {
432 		if (p_online(low, P_STATUS) != -1)
433 			g_selected[low] = 1;
434 	} while (++low <= high);
435 }
436 
437 static void
438 select_pset(psetid_t pset)
439 {
440 	processorid_t i;
441 
442 	if (pset < 0)
443 		fatal("processor set %d is out of range\n", pset);
444 
445 	/*
446 	 * Only one processor set can be specified.
447 	 */
448 	if (g_pset != PS_NONE)
449 		fatal("at most one processor set may be specified\n");
450 
451 	/*
452 	 * One cannot select processors _and_ a processor set.
453 	 */
454 	for (i = 0; i < g_max_cpus; i++)
455 		if (g_selected[i])
456 			break;
457 
458 	if (i != g_max_cpus)
459 		fatal("cannot specify both a processor and a processor set\n");
460 
461 	g_pset = pset;
462 	g_pset_ncpus = g_max_cpus;
463 
464 	if (pset_info(g_pset, NULL, &g_pset_ncpus, g_pset_cpus) == -1)
465 		fatal("invalid processor set: %d\n", g_pset);
466 
467 	if (g_pset_ncpus == 0)
468 		fatal("processor set %d empty\n", g_pset);
469 
470 	if (ioctl(g_fd, TSTATIOC_NOCPU) == -1)
471 		fatal("TSTATIOC_NOCPU failed");
472 
473 	for (i = 0; i < g_pset_ncpus; i++)
474 		g_selected[g_pset_cpus[i]] = 1;
475 }
476 
477 static void
478 check_pset(void)
479 {
480 	uint_t ncpus = g_max_cpus;
481 	processorid_t i;
482 
483 	if (g_pset == PS_NONE)
484 		return;
485 
486 	if (pset_info(g_pset, NULL, &ncpus, g_pset_cpus) == -1) {
487 		if (errno == EINVAL)
488 			fatal("processor set %d destroyed\n", g_pset);
489 
490 		fatal("couldn't get info for processor set %d", g_pset);
491 	}
492 
493 	if (ncpus == 0)
494 		fatal("processor set %d empty\n", g_pset);
495 
496 	if (ncpus == g_pset_ncpus) {
497 		for (i = 0; i < g_pset_ncpus; i++) {
498 			if (!g_selected[g_pset_cpus[i]])
499 				break;
500 		}
501 
502 		/*
503 		 * If the number of CPUs hasn't changed, and every CPU
504 		 * in the processor set is also selected, we know that the
505 		 * processor set itself hasn't changed.
506 		 */
507 		if (i == g_pset_ncpus)
508 			return;
509 	}
510 
511 	/*
512 	 * If we're here, we have a new processor set.  First, we need
513 	 * to zero out the selection array.
514 	 */
515 	bzero(g_selected, sizeof (int8_t) * g_max_cpus);
516 
517 	g_pset_ncpus = ncpus;
518 
519 	if (ioctl(g_fd, TSTATIOC_STOP) == -1)
520 		fatal("TSTATIOC_STOP failed");
521 
522 	if (ioctl(g_fd, TSTATIOC_NOCPU) == -1)
523 		fatal("TSATIOC_NOCPU failed");
524 
525 	for (i = 0; i < g_pset_ncpus; i++) {
526 		g_selected[g_pset_cpus[i]] = 1;
527 		if (ioctl(g_fd, TSTATIOC_CPU, g_pset_cpus[i]) == -1)
528 			fatal("TSTATIOC_CPU failed for cpu %d", i);
529 	}
530 
531 	/*
532 	 * Now that we have selected the CPUs, we're going to reenable
533 	 * trapstat, and reread the data for the current generation.
534 	 */
535 	if (ioctl(g_fd, TSTATIOC_GO) == -1)
536 		fatal("TSTATIOC_GO failed");
537 
538 	if (ioctl(g_fd, TSTATIOC_READ, g_data[g_gen]) == -1)
539 		fatal("TSTATIOC_READ failed");
540 }
541 
542 static void
543 missdata(tstat_missdata_t *miss, tstat_missdata_t *omiss)
544 {
545 	hrtime_t ts = g_ndata->tdata_snapts - g_odata->tdata_snapts;
546 	hrtime_t tick = g_ndata->tdata_snaptick - g_odata->tdata_snaptick;
547 	uint64_t raw = miss->tmiss_count - omiss->tmiss_count;
548 	uint64_t diff = g_absolute ? miss->tmiss_count :
549 	    (uint64_t)(0.5 + g_interval /
550 	    (double)ts * (double)(miss->tmiss_count - omiss->tmiss_count));
551 	hrtime_t peffect = raw * g_ndata->tdata_peffect * g_peffect, time;
552 	double p;
553 
554 	/*
555 	 * Now we need to account for the trapstat probe effect.  Take
556 	 * the amount of time spent in the handler, and add the
557 	 * amount of time known to be due to the trapstat probe effect.
558 	 */
559 	time = miss->tmiss_time - omiss->tmiss_time + peffect;
560 
561 	if (time >= tick) {
562 		/*
563 		 * This really shouldn't happen unless our calculation of
564 		 * the probe effect was vastly incorrect.  In any case,
565 		 * print 99.9 for the time instead of printing negative
566 		 * values...
567 		 */
568 		time = tick / 1000 * 999;
569 	}
570 
571 	p = (double)time / (double)tick * (double)100.0;
572 
573 	(*g_process)(g_arg, diff, p);
574 }
575 
576 static void
577 tlbdata(tstat_tlbdata_t *tlb, tstat_tlbdata_t *otlb)
578 {
579 	missdata(&tlb->ttlb_tlb, &otlb->ttlb_tlb);
580 	missdata(&tlb->ttlb_tsb, &otlb->ttlb_tsb);
581 }
582 
583 static void
584 print_missdata(double *ttl, uint64_t diff, double p)
585 {
586 	TSTAT_PRINT_MISSDATA(diff, p);
587 
588 	if (ttl != NULL)
589 		*ttl += p;
590 }
591 
592 static void
593 print_modepgsz(char *prefix, tstat_modedata_t *data, tstat_modedata_t *odata)
594 {
595 	int ps;
596 	size_t incr = sizeof (tstat_pgszdata_t);
597 
598 	for (ps = 0; ps < g_pgsizes; ps++) {
599 		double ttl = 0.0;
600 
601 		g_process = (void(*)(void *, uint64_t, double))print_missdata;
602 		g_arg = &ttl;
603 
604 		(void) printf("%s %4s|", prefix, g_pgnames[ps]);
605 		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
606 		(void) printf(" |");
607 		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
608 
609 		(void) printf(" |%4.1f\n", ttl);
610 
611 		data = (tstat_modedata_t *)((uintptr_t)data + incr);
612 		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
613 	}
614 }
615 
616 static void
617 parsable_modepgsz(char *prefix, tstat_modedata_t *data, tstat_modedata_t *odata)
618 {
619 	int ps;
620 	size_t incr = sizeof (tstat_pgszdata_t);
621 
622 	g_process = (void(*)(void *, uint64_t, double))print_missdata;
623 	g_arg = NULL;
624 
625 	for (ps = 0; ps < g_pgsizes; ps++) {
626 		(void) printf("%s %7d", prefix, g_pgsize[ps]);
627 		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
628 		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
629 		(void) printf("\n");
630 
631 		data = (tstat_modedata_t *)((uintptr_t)data + incr);
632 		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
633 	}
634 }
635 
636 static void
637 sum_missdata(void *sump, uint64_t diff, double p)
638 {
639 	tstat_sum_t *sum = *((tstat_sum_t **)sump);
640 
641 	sum->tsum_diff += diff;
642 	sum->tsum_time += p;
643 
644 	(*(tstat_sum_t **)sump)++;
645 }
646 
647 static void
648 sum_modedata(tstat_modedata_t *data, tstat_modedata_t *odata, tstat_sum_t *sum)
649 {
650 	int ps, incr = sizeof (tstat_pgszdata_t);
651 	tstat_sum_t *sump;
652 
653 	for (ps = 0; ps < g_pgsizes; ps++) {
654 		sump = sum;
655 
656 		g_process = sum_missdata;
657 		g_arg = &sump;
658 
659 		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
660 		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
661 
662 		data = (tstat_modedata_t *)((uintptr_t)data + incr);
663 		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
664 	}
665 }
666 
667 static void
668 print_sum(tstat_sum_t *sum, int divisor)
669 {
670 	int i;
671 	double ttl = 0.0;
672 
673 	for (i = 0; i < 4; i++) {
674 		if (i == 2)
675 			(void) printf(" |");
676 
677 		sum[i].tsum_time /= divisor;
678 
679 		TSTAT_PRINT_MISSDATA(sum[i].tsum_diff, sum[i].tsum_time);
680 		ttl += sum[i].tsum_time;
681 	}
682 
683 	(void) printf(" |%4.1f\n", ttl);
684 }
685 
686 static void
687 print_tlbpgsz(tstat_data_t *data, tstat_data_t *odata)
688 {
689 	int i, cpu, ncpus = 0;
690 	char pre[12];
691 	tstat_sum_t sum[4];
692 
693 	(void) printf("cpu m size| %9s %4s %9s %4s | %9s %4s %9s %4s |%4s\n"
694 	    "----------+-------------------------------+-----------------------"
695 	    "--------+----\n", "itlb-miss", "%tim", "itsb-miss", "%tim",
696 	    "dtlb-miss", "%tim", "dtsb-miss", "%tim", "%tim");
697 
698 	bzero(sum, sizeof (sum));
699 
700 	for (i = 0; i < g_max_cpus; i++) {
701 		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
702 		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
703 
704 		if ((cpu = data->tdata_cpuid) == -1)
705 			break;
706 
707 		if (i != 0)
708 			(void) printf("----------+-----------------------------"
709 			    "--+-------------------------------+----\n");
710 
711 		g_ndata = data;
712 		g_odata = odata;
713 
714 		(void) sprintf(pre, "%3d u", cpu);
715 		print_modepgsz(pre, &pgsz->tpgsz_user, &opgsz->tpgsz_user);
716 		sum_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, sum);
717 
718 		(void) printf("- - - - - + - - - - - - - - - - - - - -"
719 		    " - + - - - - - - - - - - - - - - - + - -\n");
720 
721 		(void) sprintf(pre, "%3d k", cpu);
722 		print_modepgsz(pre, &pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel);
723 		sum_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, sum);
724 
725 		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
726 		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
727 		ncpus++;
728 	}
729 
730 	(void) printf("==========+===============================+========="
731 	    "======================+====\n");
732 	(void) printf("      ttl |");
733 	print_sum(sum, ncpus);
734 	(void) printf("\n");
735 }
736 
737 static void
738 parsable_tlbpgsz(tstat_data_t *data, tstat_data_t *odata)
739 {
740 	int i, cpu;
741 	char pre[30];
742 
743 	for (i = 0; i < g_max_cpus; i++) {
744 		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
745 		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
746 
747 		if ((cpu = data->tdata_cpuid) == -1)
748 			break;
749 
750 		g_ndata = data;
751 		g_odata = odata;
752 
753 		(void) sprintf(pre, "%lld %3d u",
754 		    data->tdata_snapts - g_start, cpu);
755 		parsable_modepgsz(pre, &pgsz->tpgsz_user, &opgsz->tpgsz_user);
756 
757 		pre[strlen(pre) - 1] = 'k';
758 		parsable_modepgsz(pre, &pgsz->tpgsz_kernel,
759 		    &opgsz->tpgsz_kernel);
760 
761 		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
762 		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
763 	}
764 }
765 
766 static void
767 print_modedata(tstat_modedata_t *data, tstat_modedata_t *odata, int parsable)
768 {
769 	int ps, i;
770 	size_t incr = sizeof (tstat_pgszdata_t);
771 	tstat_sum_t sum[4], *sump = sum;
772 	double ttl = 0.0;
773 
774 	bzero(sum, sizeof (sum));
775 	g_process = sum_missdata;
776 	g_arg = &sump;
777 
778 	for (ps = 0; ps < g_pgsizes; ps++) {
779 		tlbdata(&data->tmode_itlb, &odata->tmode_itlb);
780 		tlbdata(&data->tmode_dtlb, &odata->tmode_dtlb);
781 
782 		data = (tstat_modedata_t *)((uintptr_t)data + incr);
783 		odata = (tstat_modedata_t *)((uintptr_t)odata + incr);
784 		sump = sum;
785 	}
786 
787 	for (i = 0; i < 4; i++) {
788 		if (i == 2 && !parsable)
789 			(void) printf(" |");
790 
791 		TSTAT_PRINT_MISSDATA(sum[i].tsum_diff, sum[i].tsum_time);
792 		ttl += sum[i].tsum_time;
793 	}
794 
795 	if (parsable) {
796 		(void) printf("\n");
797 		return;
798 	}
799 
800 	(void) printf(" |%4.1f\n", ttl);
801 }
802 
803 static void
804 print_tlb(tstat_data_t *data, tstat_data_t *odata)
805 {
806 	int i, cpu, ncpus = 0;
807 	tstat_sum_t sum[4];
808 
809 	(void) printf("cpu m| %9s %4s %9s %4s | %9s %4s %9s %4s |%4s\n"
810 	    "-----+-------------------------------+-----------------------"
811 	    "--------+----\n", "itlb-miss", "%tim", "itsb-miss", "%tim",
812 	    "dtlb-miss", "%tim", "dtsb-miss", "%tim", "%tim");
813 
814 	bzero(sum, sizeof (sum));
815 
816 	for (i = 0; i < g_max_cpus; i++) {
817 		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
818 		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
819 
820 		if ((cpu = data->tdata_cpuid) == -1)
821 			break;
822 
823 		if (i != 0)
824 			(void) printf("-----+-------------------------------+-"
825 			    "------------------------------+----\n");
826 
827 		g_ndata = data;
828 		g_odata = odata;
829 
830 		(void) printf("%3d u|", cpu);
831 		print_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, 0);
832 		sum_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, sum);
833 
834 		(void) printf("%3d k|", cpu);
835 		print_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, 0);
836 		sum_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, sum);
837 
838 		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
839 		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
840 		ncpus++;
841 	}
842 
843 	(void) printf("=====+===============================+========="
844 	    "======================+====\n");
845 
846 	(void) printf(" ttl |");
847 	print_sum(sum, ncpus);
848 	(void) printf("\n");
849 }
850 
851 static void
852 parsable_tlb(tstat_data_t *data, tstat_data_t *odata)
853 {
854 	int i, cpu;
855 
856 	for (i = 0; i < g_max_cpus; i++) {
857 		tstat_pgszdata_t *pgsz = data->tdata_pgsz;
858 		tstat_pgszdata_t *opgsz = odata->tdata_pgsz;
859 
860 		if ((cpu = data->tdata_cpuid) == -1)
861 			break;
862 
863 		g_ndata = data;
864 		g_odata = odata;
865 
866 		(void) printf("%lld %3d u ", data->tdata_snapts - g_start, cpu);
867 		print_modedata(&pgsz->tpgsz_user, &opgsz->tpgsz_user, 1);
868 		(void) printf("%lld %3d k ", data->tdata_snapts - g_start, cpu);
869 		print_modedata(&pgsz->tpgsz_kernel, &opgsz->tpgsz_kernel, 1);
870 
871 		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
872 		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
873 	}
874 }
875 
876 static void
877 print_stats(tstat_data_t *data, tstat_data_t *odata)
878 {
879 	int i, j, k, done;
880 	processorid_t id;
881 	tstat_data_t *base = data;
882 
883 	/*
884 	 * First, blast through all of the data updating our array
885 	 * of active traps.  We keep an array of active traps to prevent
886 	 * printing lines for traps that are never seen -- while still printing
887 	 * lines for traps that have been seen only once on some CPU.
888 	 */
889 	for (i = 0; i < g_max_cpus; i++) {
890 		if (data[i].tdata_cpuid == -1)
891 			break;
892 
893 		for (j = 0; j < TSTAT_NENT; j++) {
894 			if (!data[i].tdata_traps[j] || g_active[j])
895 				continue;
896 
897 			g_active[j] = 1;
898 		}
899 	}
900 
901 	data = base;
902 
903 	for (done = 0; !done; data += g_cpus_per_line) {
904 		for (i = 0; i < g_cpus_per_line; i++) {
905 			if (&data[i] - base >= g_max_cpus)
906 				break;
907 
908 			if ((id = data[i].tdata_cpuid) == -1)
909 				break;
910 
911 			if (i == 0)
912 				(void) printf("vct name                |");
913 
914 			(void) printf("   %scpu%d", id >= 100 ? "" :
915 			    id >= 10 ? " " : "  ", id);
916 		}
917 
918 		if (i == 0)
919 			break;
920 
921 		if (i != g_cpus_per_line)
922 			done = 1;
923 
924 		(void) printf("\n------------------------+");
925 
926 		for (j = 0; j < i; j++)
927 			(void) printf("---------");
928 		(void) printf("\n");
929 
930 		for (j = 0; j < TSTAT_NENT; j++) {
931 			tstat_ent_t	*gtp;
932 
933 			if ((!g_active[j]) || ((gtp = get_trap_ent(j)) == NULL))
934 				continue;
935 
936 			(void) printf("%3x %-20s|", j, gtp->tent_name);
937 			for (k = 0; k < i; k++) {
938 				(void) printf(" %8lld", TSTAT_DELTA(&data[k],
939 				    &odata[data - base + k], tdata_traps[j]));
940 			}
941 			(void) printf("\n");
942 		}
943 		(void) printf("\n");
944 	}
945 }
946 
947 static void
948 parsable_stats(tstat_data_t *data, tstat_data_t *odata)
949 {
950 	tstat_data_t *base;
951 	int i;
952 
953 	for (base = data; data - base < g_max_cpus; data++, odata++) {
954 		if (data->tdata_cpuid == -1)
955 			break;
956 
957 		for (i = 0; i < TSTAT_NENT; i++) {
958 			tstat_ent_t	*gtp;
959 
960 			if ((!data->tdata_traps[i] && !g_active[i]) ||
961 			    ((gtp = get_trap_ent(i)) == NULL))
962 				continue;
963 
964 			(void) printf("%lld %d %x %s ",
965 			    data->tdata_snapts - g_start, data->tdata_cpuid, i,
966 			    gtp->tent_name);
967 
968 			(void) printf("%lld\n", TSTAT_DELTA(data, odata,
969 			    tdata_traps[i]));
970 		}
971 	}
972 }
973 
974 static void
975 check_data(tstat_data_t *data, tstat_data_t *odata)
976 {
977 	tstat_data_t *ndata;
978 	int i;
979 
980 	if (data->tdata_cpuid == -1) {
981 		/*
982 		 * The last CPU we were watching must have been DR'd out
983 		 * of the system.  Print a vaguely useful message and exit.
984 		 */
985 		fatal("all initially selected CPUs have been unconfigured\n");
986 	}
987 
988 	/*
989 	 * If a CPU is DR'd out of the system, we'll stop receiving data
990 	 * for it.  CPUs are never added, however (that is, if a CPU is
991 	 * DR'd into the system, we won't automatically start receiving
992 	 * data for it).  We check for this by making sure that all of
993 	 * the CPUs present in the old data are present in the new data.
994 	 * If we find one missing in the new data, we correct the old data
995 	 * by removing the old CPU.  This assures that delta are printed
996 	 * correctly.
997 	 */
998 	for (i = 0; i < g_max_cpus; i++) {
999 		if (odata->tdata_cpuid == -1)
1000 			return;
1001 
1002 		if (data->tdata_cpuid != odata->tdata_cpuid)
1003 			break;
1004 
1005 		data = (tstat_data_t *)((uintptr_t)data + g_datasize);
1006 		odata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
1007 	}
1008 
1009 	if (i == g_max_cpus)
1010 		return;
1011 
1012 	/*
1013 	 * If we're here, we know that the odata is a CPU which has been
1014 	 * DR'd out.  We'll now smoosh it out of the old data.
1015 	 */
1016 	for (odata->tdata_cpuid = -1; i < g_max_cpus - 1; i++) {
1017 		ndata = (tstat_data_t *)((uintptr_t)odata + g_datasize);
1018 		bcopy(ndata, odata, g_datasize);
1019 		ndata->tdata_cpuid = -1;
1020 	}
1021 
1022 	/*
1023 	 * There may be other CPUs DR'd out; tail-call recurse.
1024 	 */
1025 	check_data(data, odata);
1026 }
1027 
1028 int
1029 main(int argc, char **argv)
1030 {
1031 	processorid_t id;
1032 	char c, *end;
1033 	ulong_t indefinite;
1034 	long count = 0, rate = 0;
1035 	int list = 0, parsable = 0;
1036 	void (*print)(tstat_data_t *, tstat_data_t *);
1037 	sigset_t set;
1038 
1039 	struct {
1040 		char opt;
1041 		void (*print)(tstat_data_t *, tstat_data_t *);
1042 		void (*parsable)(tstat_data_t *, tstat_data_t *);
1043 		int repeat;
1044 	} tab[] = {
1045 		{ '\0',	print_stats,	parsable_stats,		0 },
1046 		{ 'e',	print_stats,	parsable_stats,		1 },
1047 		{ 't',	print_tlb,	parsable_tlb,		0 },
1048 		{ 'T',	print_tlbpgsz,	parsable_tlbpgsz,	0 },
1049 		{ -1,	NULL,		NULL,			0 }
1050 	}, *tabent = NULL, *iter;
1051 
1052 	uintptr_t offs = (uintptr_t)&tab->print - (uintptr_t)tab;
1053 
1054 	/*
1055 	 * If argv[0] is non-NULL, set argv[0] to keep any getopt(3C) output
1056 	 * consistent with other error output.
1057 	 */
1058 	if (argv[0] != NULL)
1059 		argv[0] = TSTAT_COMMAND;
1060 
1061 	if ((g_fd = open(TSTAT_DEVICE, O_RDWR)) == -1)
1062 		fatal("couldn't open " TSTAT_DEVICE);
1063 
1064 	setup();
1065 
1066 	while ((c = getopt(argc, argv, "alnNtTc:C:r:e:P")) != EOF) {
1067 		/*
1068 		 * First, check to see if this option changes our printing
1069 		 * function.
1070 		 */
1071 		for (iter = tab; iter->opt >= 0; iter++) {
1072 			if (c != iter->opt)
1073 				continue;
1074 
1075 			if (tabent != NULL) {
1076 				if (tabent == iter) {
1077 					if (tabent->repeat) {
1078 						/*
1079 						 * This option is allowed to
1080 						 * have repeats; break out.
1081 						 */
1082 						break;
1083 					}
1084 
1085 					fatal("expected -%c at most once\n", c);
1086 				}
1087 
1088 				fatal("only one of -%c, -%c expected\n",
1089 				    tabent->opt, c);
1090 			}
1091 
1092 			tabent = iter;
1093 			break;
1094 		}
1095 
1096 		switch (c) {
1097 		case 'a':
1098 			g_absolute = 1;
1099 			break;
1100 
1101 		case 'e': {
1102 			char *s = strtok(optarg, ",");
1103 
1104 			while (s != NULL) {
1105 				select_entry(s);
1106 				s = strtok(NULL, ",");
1107 			}
1108 
1109 			break;
1110 		}
1111 
1112 		case 'l':
1113 			list = 1;
1114 			break;
1115 
1116 		case 'n':
1117 			/*
1118 			 * This undocumented option prevents trapstat from
1119 			 * actually switching the %tba to point to the
1120 			 * interposing trap table.  It's very useful when
1121 			 * debugging trapstat bugs:  one can specify "-n"
1122 			 * and then examine the would-be interposing trap
1123 			 * table without running the risk of RED stating.
1124 			 */
1125 			if (ioctl(g_fd, TSTATIOC_NOGO) == -1)
1126 				fatal("TSTATIOC_NOGO");
1127 			break;
1128 
1129 		case 'N':
1130 			/*
1131 			 * This undocumented option forces trapstat to ignore
1132 			 * its determined probe effect.  This may be useful
1133 			 * if it is believed that the probe effect has been
1134 			 * grossly overestimated.
1135 			 */
1136 			g_peffect = 0;
1137 			break;
1138 
1139 		case 't':
1140 		case 'T':
1141 			/*
1142 			 * When running with TLB statistics, we want to
1143 			 * minimize probe effect by running with all other
1144 			 * entries explicitly disabled.
1145 			 */
1146 			if (ioctl(g_fd, TSTATIOC_NOENTRY) == -1)
1147 				fatal("TSTATIOC_NOENTRY");
1148 
1149 			if (ioctl(g_fd, TSTATIOC_TLBDATA) == -1)
1150 				fatal("TSTATIOC_TLBDATA");
1151 			break;
1152 
1153 		case 'c': {
1154 			/*
1155 			 * We allow CPUs to be specified as an optionally
1156 			 * comma separated list of either CPU IDs or ranges
1157 			 * of CPU IDs.
1158 			 */
1159 			char *s = strtok(optarg, ",");
1160 
1161 			while (s != NULL) {
1162 				id = strtoul(s, &end, 0);
1163 
1164 				if (id == ULONG_MAX && errno == ERANGE) {
1165 					*end = '\0';
1166 					fatal("invalid cpu '%s'\n", s);
1167 				}
1168 
1169 				if (*(s = end) != '\0') {
1170 					processorid_t p;
1171 
1172 					if (*s != '-')
1173 						fatal("invalid cpu '%s'\n", s);
1174 					p = strtoul(++s, &end, 0);
1175 
1176 					if (*end != '\0' ||
1177 					    (p == ULONG_MAX && errno == ERANGE))
1178 						fatal("invalid cpu '%s'\n", s);
1179 
1180 					select_cpus(id, p);
1181 				} else {
1182 					select_cpu(id);
1183 				}
1184 
1185 				s = strtok(NULL, ",");
1186 			}
1187 
1188 			break;
1189 		}
1190 
1191 		case 'C': {
1192 			psetid_t pset = strtoul(optarg, &end, 0);
1193 
1194 			if (*end != '\0' ||
1195 			    (pset == ULONG_MAX && errno == ERANGE))
1196 				fatal("invalid processor set '%s'\n", optarg);
1197 
1198 			select_pset(pset);
1199 			break;
1200 		}
1201 
1202 		case 'r': {
1203 			rate = strtol(optarg, &end, 0);
1204 
1205 			if (*end != '\0' ||
1206 			    (rate == LONG_MAX && errno == ERANGE))
1207 				fatal("invalid rate '%s'\n", optarg);
1208 
1209 			if (rate <= 0)
1210 				fatal("rate must be greater than zero\n");
1211 
1212 			if (rate > TSTAT_MAX_RATE)
1213 				fatal("rate may not exceed %d\n",
1214 				    TSTAT_MAX_RATE);
1215 
1216 			set_interval(NANOSEC / rate);
1217 			break;
1218 		}
1219 
1220 		case 'P':
1221 			offs = (uintptr_t)&tab->parsable - (uintptr_t)tab;
1222 			parsable = 1;
1223 			break;
1224 
1225 		default:
1226 			usage();
1227 		}
1228 	}
1229 
1230 	if (list) {
1231 		print_entries(stdout, parsable);
1232 		exit(EXIT_SUCCESS);
1233 	}
1234 
1235 	if (optind != argc) {
1236 
1237 		int interval = strtol(argv[optind], &end, 0);
1238 
1239 		if (*end != '\0') {
1240 			/*
1241 			 * That wasn't a valid number.  It must be that we're
1242 			 * to execute this command.
1243 			 */
1244 			switch (vfork()) {
1245 			case 0:
1246 				(void) close(g_fd);
1247 				(void) sigprocmask(SIG_SETMASK, &g_oset, NULL);
1248 				(void) execvp(argv[optind], &argv[optind]);
1249 
1250 				/*
1251 				 * No luck.  Set errno.
1252 				 */
1253 				g_exec_errno = errno;
1254 				_exit(EXIT_FAILURE);
1255 				/*NOTREACHED*/
1256 			case -1:
1257 				fatal("cannot fork");
1258 				/*NOTREACHED*/
1259 			default:
1260 				break;
1261 			}
1262 		} else {
1263 			if (interval <= 0)
1264 				fatal("interval must be greater than zero.\n");
1265 
1266 			if (interval == LONG_MAX && errno == ERANGE)
1267 				fatal("invalid interval '%s'\n", argv[optind]);
1268 
1269 			set_interval(NANOSEC * (hrtime_t)interval);
1270 
1271 			if (++optind != argc) {
1272 				char *s = argv[optind];
1273 
1274 				count = strtol(s, &end, 0);
1275 
1276 				if (*end != '\0' || count <= 0 ||
1277 				    (count == LONG_MAX && errno == ERANGE))
1278 					fatal("invalid count '%s'\n", s);
1279 			}
1280 		}
1281 	} else {
1282 		if (!rate)
1283 			set_interval(NANOSEC);
1284 	}
1285 
1286 	if (tabent == NULL)
1287 		tabent = tab;
1288 
1289 	print = *(void(**)(tstat_data_t *, tstat_data_t *))
1290 	    ((uintptr_t)tabent + offs);
1291 
1292 	for (id = 0; id < g_max_cpus; id++) {
1293 		if (!g_selected[id])
1294 			continue;
1295 
1296 		if (ioctl(g_fd, TSTATIOC_CPU, id) == -1)
1297 			fatal("TSTATIOC_CPU failed for cpu %d", id);
1298 	}
1299 
1300 	g_start = gethrtime();
1301 
1302 	if (ioctl(g_fd, TSTATIOC_GO) == -1)
1303 		fatal("TSTATIOC_GO failed");
1304 
1305 	if (ioctl(g_fd, TSTATIOC_READ, g_data[g_gen ^ 1]) == -1)
1306 		fatal("initial TSTATIOC_READ failed");
1307 
1308 	(void) sigemptyset(&set);
1309 
1310 	for (indefinite = (count == 0); indefinite || count; count--) {
1311 
1312 		(void) sigsuspend(&set);
1313 
1314 		if (g_winch) {
1315 			g_winch = 0;
1316 			continue;
1317 		}
1318 
1319 		if (g_child_exited && g_exec_errno != 0) {
1320 			errno = g_exec_errno;
1321 			fatal("could not execute %s", argv[optind]);
1322 		}
1323 
1324 		if (ioctl(g_fd, TSTATIOC_READ, g_data[g_gen]) == -1)
1325 			fatal("TSTATIOC_READ failed");
1326 
1327 		/*
1328 		 * Before we blithely print the data, we need to
1329 		 * make sure that we haven't lost a CPU.
1330 		 */
1331 		check_data(g_data[g_gen], g_data[g_gen ^ 1]);
1332 		(*print)(g_data[g_gen], g_data[g_gen ^ 1]);
1333 		(void) fflush(stdout);
1334 
1335 		if (g_child_exited) {
1336 			if (WIFEXITED(g_child_status)) {
1337 				if (WEXITSTATUS(g_child_status) == 0)
1338 					break;
1339 
1340 				(void) fprintf(stderr, TSTAT_COMMAND ": "
1341 				    "warning: %s exited with code %d\n",
1342 				    argv[optind], WEXITSTATUS(g_child_status));
1343 			} else {
1344 				(void) fprintf(stderr, TSTAT_COMMAND ": "
1345 				    "warning: %s died on signal %d\n",
1346 				    argv[optind], WTERMSIG(g_child_status));
1347 			}
1348 			break;
1349 		}
1350 
1351 		check_pset();
1352 
1353 		g_gen ^= 1;
1354 	}
1355 
1356 	return (0);
1357 }
1358