xref: /titanic_44/usr/src/cmd/stat/iostat/iostat.c (revision 03fc868668dd42b1b163d1fb8af3968f7283a7eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * rewritten from UCB 4.13 83/09/25
27  * rewritten from SunOS 4.1 SID 1.18 89/10/06
28  */
29 /*
30  * Copyright (c) 2012 by Delphix. All rights reserved.
31  */
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdarg.h>
36 #include <ctype.h>
37 #include <unistd.h>
38 #include <memory.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <signal.h>
42 #include <sys/types.h>
43 #include <time.h>
44 #include <sys/time.h>
45 #include <sys/sysinfo.h>
46 #include <inttypes.h>
47 #include <strings.h>
48 #include <sys/systeminfo.h>
49 #include <kstat.h>
50 #include <locale.h>
51 
52 #include "dsr.h"
53 #include "statcommon.h"
54 
55 #define	DISK_OLD		0x0001
56 #define	DISK_NEW		0x0002
57 #define	DISK_EXTENDED		0x0004
58 #define	DISK_ERRORS		0x0008
59 #define	DISK_EXTENDED_ERRORS	0x0010
60 #define	DISK_IOPATH_LI		0x0020	/* LunInitiator */
61 #define	DISK_IOPATH_LTI		0x0040	/* LunTargetInitiator */
62 
63 #define	DISK_NORMAL		(DISK_OLD | DISK_NEW)
64 #define	DISK_IO_MASK		(DISK_OLD | DISK_NEW | DISK_EXTENDED)
65 #define	DISK_ERROR_MASK		(DISK_ERRORS | DISK_EXTENDED_ERRORS)
66 #define	PRINT_VERTICAL		(DISK_ERROR_MASK | DISK_EXTENDED)
67 
68 #define	REPRINT 19
69 
70 #define	NUMBER_OF_ERR_COUNTERS	3
71 
72 /*
73  * It's really a pseudo-gigabyte. We use 1000000000 bytes so that the disk
74  * labels don't look bad. 1GB is really 1073741824 bytes.
75  */
76 #define	DISK_GIGABYTE   1000000000.0
77 
78 /*
79  * Function desciptor to be called when extended
80  * headers are used.
81  */
82 typedef struct formatter {
83 	void (*nfunc)(void);
84 	struct formatter *next;
85 } format_t;
86 
87 /*
88  * Used to get formatting right when printing tty/cpu
89  * data to the right of disk data
90  */
91 enum show_disk_mode {
92 	SHOW_FIRST_ONLY,
93 	SHOW_SECOND_ONWARDS,
94 	SHOW_ALL
95 };
96 
97 enum show_disk_mode show_disk_mode = SHOW_ALL;
98 
99 char *cmdname = "iostat";
100 int caught_cont = 0;
101 
102 static char one_blank[] = " ";
103 static char two_blanks[] = "  ";
104 
105 /*
106  * count for number of lines to be emitted before a header is
107  * shown again. Only used for the basic format.
108  */
109 static	uint_t	tohdr = 1;
110 
111 /*
112  * If we're in raw format, have we printed a header? We only do it
113  * once for raw but we emit it every REPRINT lines in non-raw format.
114  * This applies only for the basic header. The extended header is
115  * done only once in both formats.
116  */
117 static	uint_t	hdr_out;
118 
119 /*
120  * Flags representing arguments from command line
121  */
122 static	uint_t	do_tty;			/* show tty info (-t) */
123 static	uint_t	do_disk;		/* show disk info per selected */
124 					/* format (-d, -D, -e, -E, -x -X -Y) */
125 static	uint_t	do_cpu;			/* show cpu info (-c) */
126 static	uint_t	do_interval;		/* do intervals (-I) */
127 static	int	do_partitions;		/* per-partition stats (-p) */
128 static	int	do_partitions_only;	/* per-partition stats only (-P) */
129 					/* no per-device stats for disks */
130 static	uint_t	do_conversions;		/* display disks as cXtYdZ (-n) */
131 static	uint_t	do_megabytes;		/* display data in MB/sec (-M) */
132 static  uint_t	do_controller;		/* display controller info (-C) */
133 static  uint_t	do_raw;			/* emit raw format (-r) */
134 static	uint_t	timestamp_fmt = NODATE;	/* timestamp  each display (-T) */
135 static	uint_t	do_devid;		/* -E should show devid */
136 
137 /*
138  * Default number of disk drives to be displayed in basic format
139  */
140 #define	DEFAULT_LIMIT	4
141 
142 struct iodev_filter df;
143 
144 static  uint_t	suppress_state;		/* skip state change messages */
145 static	uint_t	suppress_zero;		/* skip zero valued lines */
146 static  uint_t	show_mountpts;		/* show mount points */
147 static	int 	interval;		/* interval (seconds) to output */
148 static	int 	iter;			/* iterations from command line */
149 
150 #define	SMALL_SCRATCH_BUFLEN	MAXNAMELEN
151 
152 static int	iodevs_nl;		/* name field width */
153 #define	IODEVS_NL_MIN		6	/* not too thin for "device" */
154 #define	IODEVS_NL_MAX		24	/* but keep full width under 80 */
155 
156 static	char	disk_header[132];
157 static	uint_t 	dh_len;			/* disk header length for centering */
158 static  int 	lineout;		/* data waiting to be printed? */
159 
160 static struct snapshot *newss;
161 static struct snapshot *oldss;
162 static	double	getime;			/* elapsed time */
163 static	double	percent;		/* 100 / etime */
164 
165 /*
166  * List of functions to be called which will construct the desired output
167  */
168 static format_t	*formatter_list;
169 static format_t *formatter_end;
170 
171 static u_longlong_t	ull_delta(u_longlong_t, u_longlong_t);
172 static uint_t 	u32_delta(uint_t, uint_t);
173 static void setup(void (*nfunc)(void));
174 static void print_tty_hdr1(void);
175 static void print_tty_hdr2(void);
176 static void print_cpu_hdr1(void);
177 static void print_cpu_hdr2(void);
178 static void print_tty_data(void);
179 static void print_cpu_data(void);
180 static void print_err_hdr(void);
181 static void print_disk_header(void);
182 static void hdrout(void);
183 static void disk_errors(void);
184 static void do_newline(void);
185 static void push_out(const char *, ...);
186 static void printhdr(int);
187 static void printxhdr(void);
188 static void usage(void);
189 static void do_args(int, char **);
190 static void do_format(void);
191 static void show_all_disks(void);
192 static void show_first_disk(void);
193 static void show_other_disks(void);
194 static void show_disk_errors(void *, void *, void *);
195 static void write_core_header(void);
196 static int  fzero(double value);
197 static int  safe_strtoi(char const *val, char *errmsg);
198 
199 int
200 main(int argc, char **argv)
201 {
202 	enum snapshot_types types = SNAP_SYSTEM;
203 	kstat_ctl_t *kc;
204 	long hz;
205 	int forever;
206 	hrtime_t start_n;
207 	hrtime_t period_n;
208 
209 	(void) setlocale(LC_ALL, "");
210 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
211 #define	TEXT_DOMAIN "SYS_TEST"		/* Use this only if it weren't */
212 #endif
213 	(void) textdomain(TEXT_DOMAIN);
214 
215 	do_args(argc, argv);
216 
217 	/*
218 	 * iostat historically showed CPU changes, even though
219 	 * it doesn't provide much useful information
220 	 */
221 	types |= SNAP_CPUS;
222 
223 	if (do_disk)
224 		types |= SNAP_IODEVS;
225 
226 	if (do_disk && !do_partitions_only)
227 		df.if_allowed_types |= IODEV_DISK;
228 	if (do_disk & DISK_IOPATH_LI) {
229 		df.if_allowed_types |= IODEV_IOPATH_LTI;
230 		types |= SNAP_IOPATHS_LI;
231 	}
232 	if (do_disk & DISK_IOPATH_LTI) {
233 		df.if_allowed_types |= IODEV_IOPATH_LTI;
234 		types |= SNAP_IOPATHS_LTI;
235 	}
236 	if (do_disk & DISK_ERROR_MASK)
237 		types |= SNAP_IODEV_ERRORS;
238 	if (do_partitions || do_partitions_only)
239 		df.if_allowed_types |= IODEV_PARTITION;
240 	if (do_conversions)
241 		types |= SNAP_IODEV_PRETTY;
242 	if (do_devid)
243 		types |= SNAP_IODEV_DEVID;
244 	if (do_controller) {
245 		if (!(do_disk & PRINT_VERTICAL) ||
246 		    (do_disk & DISK_EXTENDED_ERRORS))
247 			fail(0, "-C can only be used with -e or -x.");
248 		types |= SNAP_CONTROLLERS;
249 		df.if_allowed_types |= IODEV_CONTROLLER;
250 	}
251 
252 	hz = sysconf(_SC_CLK_TCK);
253 
254 	/*
255 	 * Undocumented behavior - sending a SIGCONT will result
256 	 * in a new header being emitted. Used only if we're not
257 	 * doing extended headers. This is a historical
258 	 * artifact.
259 	 */
260 	if (!(do_disk & PRINT_VERTICAL))
261 		(void) signal(SIGCONT, printhdr);
262 
263 	if (interval)
264 		period_n = (hrtime_t)interval * NANOSEC;
265 
266 	kc = open_kstat();
267 	if (interval)
268 		start_n = gethrtime();
269 	newss = acquire_snapshot(kc, types, &df);
270 
271 	/* compute width of "device" field */
272 	iodevs_nl = newss->s_iodevs_is_name_maxlen;
273 	iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
274 	    IODEVS_NL_MIN : iodevs_nl;
275 	iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
276 	    IODEVS_NL_MAX : iodevs_nl;
277 
278 	do_format();
279 
280 	forever = (iter == 0);
281 	do {
282 		if (do_conversions && show_mountpts)
283 			do_mnttab();
284 
285 		if (do_tty || do_cpu) {
286 			kstat_t *oldks;
287 			oldks = oldss ? &oldss->s_sys.ss_agg_sys : NULL;
288 			getime = cpu_ticks_delta(oldks,
289 			    &newss->s_sys.ss_agg_sys);
290 			percent = (getime > 0.0) ? 100.0 / getime : 0.0;
291 			getime = (getime / nr_active_cpus(newss)) / hz;
292 			if (getime == 0.0)
293 				getime = (double)interval;
294 			if (getime == 0.0 || do_interval)
295 				getime = 1.0;
296 		}
297 
298 		if (formatter_list) {
299 			format_t *tmp;
300 			tmp = formatter_list;
301 
302 			if (timestamp_fmt != NODATE)
303 				print_timestamp(timestamp_fmt);
304 
305 			while (tmp) {
306 				(tmp->nfunc)();
307 				tmp = tmp->next;
308 			}
309 			(void) fflush(stdout);
310 		}
311 
312 		/* only remaining/doing a single iteration, we are done */
313 		if (iter == 1)
314 			continue;
315 
316 		if (interval > 0)
317 			/* Have a kip */
318 			sleep_until(&start_n, period_n, forever, &caught_cont);
319 
320 		free_snapshot(oldss);
321 		oldss = newss;
322 		newss = acquire_snapshot(kc, types, &df);
323 		iodevs_nl = (newss->s_iodevs_is_name_maxlen > iodevs_nl) ?
324 		    newss->s_iodevs_is_name_maxlen : iodevs_nl;
325 		iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
326 		    IODEVS_NL_MIN : iodevs_nl;
327 		iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
328 		    IODEVS_NL_MAX : iodevs_nl;
329 
330 		if (!suppress_state)
331 			snapshot_report_changes(oldss, newss);
332 
333 		/* if config changed, show stats from boot */
334 		if (snapshot_has_changed(oldss, newss)) {
335 			free_snapshot(oldss);
336 			oldss = NULL;
337 		}
338 
339 	} while (--iter);
340 
341 	free_snapshot(oldss);
342 	free_snapshot(newss);
343 	(void) kstat_close(kc);
344 	free(df.if_names);
345 	return (0);
346 }
347 
348 /*
349  * Some magic numbers used in header formatting.
350  *
351  * DISK_LEN = length of either "kps tps serv" or "wps rps util"
352  *	      using 0 as the first position
353  *
354  * DISK_ERROR_LEN = length of "s/w h/w trn tot" with one space on
355  *		either side. Does not use zero as first pos.
356  *
357  * DEVICE_LEN = length of "device" + 1 character.
358  */
359 
360 #define	DISK_LEN	11
361 #define	DISK_ERROR_LEN	16
362 #define	DEVICE_LEN	7
363 
364 /*ARGSUSED*/
365 static void
366 show_disk_name(void *v1, void *v2, void *data)
367 {
368 	struct iodev_snapshot *dev = (struct iodev_snapshot *)v2;
369 	size_t slen;
370 	char *name;
371 	char fbuf[SMALL_SCRATCH_BUFLEN];
372 
373 	if (dev == NULL)
374 		return;
375 
376 	name = do_conversions ? dev->is_pretty : dev->is_name;
377 	name = name ? name : dev->is_name;
378 
379 	if (!do_raw) {
380 		uint_t width;
381 
382 		slen = strlen(name);
383 		/*
384 		 * The length is less
385 		 * than the section
386 		 * which will be displayed
387 		 * on the next line.
388 		 * Center the entry.
389 		 */
390 
391 		width = (DISK_LEN + 1)/2 + (slen / 2);
392 		(void) snprintf(fbuf, sizeof (fbuf),
393 		    "%*s", width, name);
394 		name = fbuf;
395 		push_out("%-13.13s ", name);
396 	} else {
397 		push_out(name);
398 	}
399 }
400 
401 /*ARGSUSED*/
402 static void
403 show_disk_header(void *v1, void *v2, void *data)
404 {
405 	push_out(disk_header);
406 }
407 
408 /*
409  * Write out a two line header. What is written out depends on the flags
410  * selected but in the worst case consists of a tty header, a disk header
411  * providing information for 4 disks and a cpu header.
412  *
413  * The tty header consists of the word "tty" on the first line above the
414  * words "tin tout" on the next line. If present the tty portion consumes
415  * the first 10 characters of each line since "tin tout" is surrounded
416  * by single spaces.
417  *
418  * Each of the disk sections is a 14 character "block" in which the name of
419  * the disk is centered in the first 12 characters of the first line.
420  *
421  * The cpu section is an 11 character block with "cpu" centered over the
422  * section.
423  *
424  * The worst case should look as follows:
425  *
426  * 0---------1--------2---------3---------4---------5---------6---------7-------
427  *    tty        sd0           sd1           sd2           sd3           cpu
428  *  tin tout kps tps serv  kps tps serv  kps tps serv  kps tps serv  us sy dt id
429  *  NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NN NN NN NN
430  *
431  * When -D is specified, the disk header looks as follows (worst case):
432  *
433  * 0---------1--------2---------3---------4---------5---------6---------7-------
434  *     tty        sd0           sd1             sd2          sd3          cpu
435  *   tin tout rps wps util  rps wps util  rps wps util  rps wps util us sy dt id
436  *   NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN NN NN NN NN
437  */
438 static void
439 printhdr(int sig)
440 {
441 	/*
442 	 * If we're here because a signal fired, reenable the
443 	 * signal.
444 	 */
445 	if (sig)
446 		(void) signal(SIGCONT, printhdr);
447 	if (sig == SIGCONT)
448 		caught_cont = 1;
449 	/*
450 	 * Horizontal mode headers
451 	 *
452 	 * First line
453 	 */
454 	if (do_tty)
455 		print_tty_hdr1();
456 
457 	if (do_disk & DISK_NORMAL) {
458 		(void) snapshot_walk(SNAP_IODEVS, NULL, newss,
459 		    show_disk_name, NULL);
460 	}
461 
462 	if (do_cpu)
463 		print_cpu_hdr1();
464 	do_newline();
465 
466 	/*
467 	 * Second line
468 	 */
469 	if (do_tty)
470 		print_tty_hdr2();
471 
472 	if (do_disk & DISK_NORMAL) {
473 		(void) snapshot_walk(SNAP_IODEVS, NULL, newss,
474 		    show_disk_header, NULL);
475 	}
476 
477 	if (do_cpu)
478 		print_cpu_hdr2();
479 	do_newline();
480 
481 	tohdr = REPRINT;
482 }
483 
484 /*
485  * Write out the extended header centered over the core information.
486  */
487 static void
488 write_core_header(void)
489 {
490 	char *edev = "extended device statistics";
491 	uint_t lead_space_ct;
492 	uint_t follow_space_ct;
493 	size_t edevlen;
494 
495 	if (do_raw == 0) {
496 		/*
497 		 * The things we do to look nice...
498 		 *
499 		 * Center the core output header. Make sure we have the
500 		 * right number of trailing spaces for follow-on headers
501 		 * (i.e., cpu and/or tty and/or errors).
502 		 */
503 		edevlen = strlen(edev);
504 		lead_space_ct = dh_len - edevlen;
505 		lead_space_ct /= 2;
506 		if (lead_space_ct > 0) {
507 			follow_space_ct = dh_len - (lead_space_ct + edevlen);
508 			if (do_disk & DISK_ERRORS)
509 				follow_space_ct -= DISK_ERROR_LEN;
510 			if ((do_disk & DISK_EXTENDED) && do_conversions)
511 				follow_space_ct -= DEVICE_LEN;
512 
513 			push_out("%1$*2$.*2$s%3$s%4$*5$.*5$s", one_blank,
514 			    lead_space_ct, edev, one_blank, follow_space_ct);
515 		} else
516 			push_out("%56s", edev);
517 	} else
518 		push_out(edev);
519 }
520 
521 /*
522  * In extended mode headers, we don't want to reprint the header on
523  * signals as they are printed every time anyways.
524  */
525 static void
526 printxhdr(void)
527 {
528 
529 	/*
530 	 * Vertical mode headers
531 	 */
532 	if (do_disk & DISK_EXTENDED)
533 		setup(write_core_header);
534 	if (do_disk & DISK_ERRORS)
535 		setup(print_err_hdr);
536 
537 	if (do_conversions) {
538 		setup(do_newline);
539 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
540 			setup(print_disk_header);
541 		setup(do_newline);
542 	} else {
543 		if (do_tty)
544 			setup(print_tty_hdr1);
545 		if (do_cpu)
546 			setup(print_cpu_hdr1);
547 		setup(do_newline);
548 
549 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
550 			setup(print_disk_header);
551 		if (do_tty)
552 			setup(print_tty_hdr2);
553 		if (do_cpu)
554 			setup(print_cpu_hdr2);
555 		setup(do_newline);
556 	}
557 }
558 
559 /*
560  * Write out a line for this disk - note that show_disk writes out
561  * full lines or blocks for each selected disk.
562  */
563 static void
564 show_disk(void *v1, void *v2, void *data)
565 {
566 	uint32_t err_counters[NUMBER_OF_ERR_COUNTERS];
567 	boolean_t display_err_counters = do_disk & DISK_ERRORS;
568 	struct iodev_snapshot *old = (struct iodev_snapshot *)v1;
569 	struct iodev_snapshot *new = (struct iodev_snapshot *)v2;
570 	int *count = (int *)data;
571 	double rps, wps, tps, mtps, krps, kwps, kps, avw, avr, w_pct, r_pct;
572 	double wserv, rserv, serv;
573 	double iosize;	/* kb/sec or MB/sec */
574 	double etime, hr_etime;
575 	char *disk_name;
576 	u_longlong_t ldeltas;
577 	uint_t udeltas;
578 	uint64_t t_delta;
579 	uint64_t w_delta;
580 	uint64_t r_delta;
581 	int doit = 1;
582 	uint_t toterrs;
583 	char *fstr;
584 
585 	if (new == NULL)
586 		return;
587 
588 	switch (show_disk_mode) {
589 	case SHOW_FIRST_ONLY:
590 		if (count != NULL && *count)
591 			return;
592 		break;
593 
594 	case SHOW_SECOND_ONWARDS:
595 		if (count != NULL && !*count) {
596 			(*count)++;
597 			return;
598 		}
599 		break;
600 
601 	default:
602 		break;
603 	}
604 
605 	disk_name = do_conversions ? new->is_pretty : new->is_name;
606 	disk_name = disk_name ? disk_name : new->is_name;
607 
608 	/*
609 	 * Only do if we want IO stats - Avoids errors traveling this
610 	 * section if that's all we want to see.
611 	 */
612 	if (do_disk & DISK_IO_MASK) {
613 		if (old) {
614 			t_delta = hrtime_delta(old->is_snaptime,
615 			    new->is_snaptime);
616 		} else {
617 			t_delta = hrtime_delta(new->is_crtime,
618 			    new->is_snaptime);
619 		}
620 
621 		if (new->is_nr_children) {
622 			if (new->is_type == IODEV_CONTROLLER) {
623 				t_delta /= new->is_nr_children;
624 			} else if ((new->is_type == IODEV_IOPATH_LT) ||
625 			    (new->is_type == IODEV_IOPATH_LI)) {
626 				/* synthetic path */
627 				if (!old) {
628 					t_delta = new->is_crtime;
629 				}
630 				t_delta /= new->is_nr_children;
631 			}
632 		}
633 
634 		hr_etime = (double)t_delta;
635 		if (hr_etime == 0.0)
636 			hr_etime = (double)NANOSEC;
637 		etime = hr_etime / (double)NANOSEC;
638 
639 		/* reads per second */
640 		udeltas = u32_delta(old ? old->is_stats.reads : 0,
641 		    new->is_stats.reads);
642 		rps = (double)udeltas;
643 		rps /= etime;
644 
645 		/* writes per second */
646 		udeltas = u32_delta(old ? old->is_stats.writes : 0,
647 		    new->is_stats.writes);
648 		wps = (double)udeltas;
649 		wps /= etime;
650 
651 		tps = rps + wps;
652 			/* transactions per second */
653 
654 		/*
655 		 * report throughput as either kb/sec or MB/sec
656 		 */
657 
658 		if (!do_megabytes)
659 			iosize = 1024.0;
660 		else
661 			iosize = 1048576.0;
662 
663 		ldeltas = ull_delta(old ? old->is_stats.nread : 0,
664 		    new->is_stats.nread);
665 		if (ldeltas) {
666 			krps = (double)ldeltas;
667 			krps /= etime;
668 			krps /= iosize;
669 		} else
670 			krps = 0.0;
671 
672 		ldeltas = ull_delta(old ? old->is_stats.nwritten : 0,
673 		    new->is_stats.nwritten);
674 		if (ldeltas) {
675 			kwps = (double)ldeltas;
676 			kwps /= etime;
677 			kwps /= iosize;
678 		} else
679 			kwps = 0.0;
680 
681 		/*
682 		 * Blocks transferred per second
683 		 */
684 		kps = krps + kwps;
685 
686 		/*
687 		 * Average number of wait transactions waiting
688 		 */
689 		w_delta = hrtime_delta((u_longlong_t)
690 		    (old ? old->is_stats.wlentime : 0),
691 		    new->is_stats.wlentime);
692 		if (w_delta) {
693 			avw = (double)w_delta;
694 			avw /= hr_etime;
695 		} else
696 			avw = 0.0;
697 
698 		/*
699 		 * Average number of run transactions waiting
700 		 */
701 		r_delta = hrtime_delta(old ? old->is_stats.rlentime : 0,
702 		    new->is_stats.rlentime);
703 		if (r_delta) {
704 			avr = (double)r_delta;
705 			avr /= hr_etime;
706 		} else
707 			avr = 0.0;
708 
709 		/*
710 		 * Average wait service time in milliseconds
711 		 */
712 		if (tps > 0.0 && (avw != 0.0 || avr != 0.0)) {
713 			mtps = 1000.0 / tps;
714 			if (avw != 0.0)
715 				wserv = avw * mtps;
716 			else
717 				wserv = 0.0;
718 
719 			if (avr != 0.0)
720 				rserv = avr * mtps;
721 			else
722 				rserv = 0.0;
723 			serv = rserv + wserv;
724 		} else {
725 			rserv = 0.0;
726 			wserv = 0.0;
727 			serv = 0.0;
728 		}
729 
730 		/* % of time there is a transaction waiting for service */
731 		t_delta = hrtime_delta(old ? old->is_stats.wtime : 0,
732 		    new->is_stats.wtime);
733 		if (t_delta) {
734 			w_pct = (double)t_delta;
735 			w_pct /= hr_etime;
736 			w_pct *= 100.0;
737 
738 			/*
739 			 * Average the wait queue utilization over the
740 			 * the controller's devices, if this is a controller.
741 			 */
742 			if (new->is_type == IODEV_CONTROLLER)
743 				w_pct /= new->is_nr_children;
744 		} else
745 			w_pct = 0.0;
746 
747 		/* % of time there is a transaction running */
748 		t_delta = hrtime_delta(old ? old->is_stats.rtime : 0,
749 		    new->is_stats.rtime);
750 		if (t_delta) {
751 			r_pct = (double)t_delta;
752 			r_pct /= hr_etime;
753 			r_pct *= 100.0;
754 
755 			/*
756 			 * Average the percent busy over the controller's
757 			 * devices, if this is a controller.
758 			 */
759 			if (new->is_type == IODEV_CONTROLLER)
760 				w_pct /= new->is_nr_children;
761 		} else {
762 			r_pct = 0.0;
763 		}
764 
765 		/* % of time there is a transaction running */
766 		if (do_interval) {
767 			rps	*= etime;
768 			wps	*= etime;
769 			tps	*= etime;
770 			krps	*= etime;
771 			kwps	*= etime;
772 			kps	*= etime;
773 		}
774 	}
775 
776 	if (do_disk & (DISK_EXTENDED | DISK_ERRORS)) {
777 		if ((!do_conversions) && ((suppress_zero == 0) ||
778 		    ((do_disk & DISK_EXTENDED) == 0))) {
779 			if (do_raw == 0) {
780 				push_out("%-*.*s",
781 				    iodevs_nl, iodevs_nl, disk_name);
782 			} else {
783 				push_out(disk_name);
784 			}
785 		}
786 	}
787 
788 	/*
789 	 * The error counters are read first (if asked for and if they are
790 	 * available).
791 	 */
792 	bzero(err_counters, sizeof (err_counters));
793 	toterrs = 0;
794 	if (display_err_counters && (new->is_errors.ks_data != NULL)) {
795 		kstat_named_t	*knp;
796 		int		i;
797 
798 		knp = KSTAT_NAMED_PTR(&new->is_errors);
799 		for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++) {
800 			switch (knp[i].data_type) {
801 				case KSTAT_DATA_ULONG:
802 				case KSTAT_DATA_ULONGLONG:
803 					err_counters[i] = knp[i].value.ui32;
804 					toterrs += knp[i].value.ui32;
805 					break;
806 				default:
807 					break;
808 			}
809 		}
810 	}
811 
812 	switch (do_disk & DISK_IO_MASK) {
813 	case DISK_OLD:
814 		if (do_raw == 0)
815 			fstr = "%3.0f %3.0f %4.0f  ";
816 		else
817 			fstr = "%.0f,%.0f,%.0f";
818 		push_out(fstr, kps, tps, serv);
819 		break;
820 	case DISK_NEW:
821 		if (do_raw == 0)
822 			fstr = "%3.0f %3.0f %4.1f  ";
823 		else
824 			fstr = "%.0f,%.0f,%.1f";
825 		push_out(fstr, rps, wps, r_pct);
826 		break;
827 	case DISK_EXTENDED:
828 		if (suppress_zero) {
829 			if (fzero(rps) && fzero(wps) && fzero(krps) &&
830 			    fzero(kwps) && fzero(avw) && fzero(avr) &&
831 			    fzero(serv) && fzero(w_pct) && fzero(r_pct) &&
832 			    (toterrs == 0)) {
833 				doit = 0;
834 				display_err_counters = B_FALSE;
835 			} else if (do_conversions == 0) {
836 				if (do_raw == 0) {
837 					push_out("%-*.*s",
838 					    iodevs_nl, iodevs_nl, disk_name);
839 				} else {
840 					push_out(disk_name);
841 				}
842 			}
843 		}
844 		if (doit) {
845 			if (!do_conversions) {
846 				if (do_raw == 0) {
847 					fstr = " %6.1f %6.1f %6.1f %6.1f "
848 					    "%4.1f %4.1f %6.1f %3.0f "
849 					    "%3.0f ";
850 				} else {
851 					fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
852 					    "%.1f,%.0f,%.0f";
853 				}
854 				push_out(fstr, rps, wps, krps, kwps, avw, avr,
855 				    serv, w_pct, r_pct);
856 			} else {
857 				if (do_raw == 0) {
858 					fstr = " %6.1f %6.1f %6.1f %6.1f "
859 					    "%4.1f %4.1f %6.1f %6.1f "
860 					    "%3.0f %3.0f ";
861 				} else {
862 					fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
863 					    "%.1f,%.1f,%.0f,%.0f";
864 				}
865 				push_out(fstr, rps, wps, krps, kwps, avw, avr,
866 				    wserv, rserv, w_pct, r_pct);
867 			}
868 		}
869 		break;
870 	}
871 
872 	if (display_err_counters) {
873 		char	*efstr;
874 		int	i;
875 
876 		if (do_raw == 0) {
877 			if (do_disk == DISK_ERRORS)
878 				push_out(two_blanks);
879 			efstr = "%3u ";
880 		} else {
881 			efstr = "%u";
882 		}
883 
884 		for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++)
885 			push_out(efstr, err_counters[i]);
886 
887 		push_out(efstr, toterrs);
888 	}
889 
890 	if (suppress_zero == 0 || doit == 1) {
891 		if ((do_disk & (DISK_EXTENDED | DISK_ERRORS)) &&
892 		    do_conversions) {
893 			push_out("%s", disk_name);
894 			if (show_mountpts && new->is_dname) {
895 				mnt_t *mount_pt;
896 				char *lu;
897 				char *dnlu;
898 				char lub[SMALL_SCRATCH_BUFLEN];
899 
900 				lu = strrchr(new->is_dname, '/');
901 				if (lu) {
902 					/* only the part after a possible '/' */
903 					dnlu = strrchr(disk_name, '/');
904 					if (dnlu != NULL &&
905 					    strcmp(dnlu, lu) == 0)
906 						lu = new->is_dname;
907 					else {
908 						*lu = 0;
909 						(void) strcpy(lub,
910 						    new->is_dname);
911 						*lu = '/';
912 						(void) strcat(lub, "/");
913 						(void) strcat(lub,
914 						    disk_name);
915 						lu = lub;
916 					}
917 				} else
918 					lu = disk_name;
919 				mount_pt = lookup_mntent_byname(lu);
920 				if (mount_pt) {
921 					if (do_raw == 0)
922 						push_out(" (%s)",
923 						    mount_pt->mount_point);
924 					else
925 						push_out("(%s)",
926 						    mount_pt->mount_point);
927 				}
928 			}
929 		}
930 	}
931 
932 	if ((do_disk & PRINT_VERTICAL) && show_disk_mode != SHOW_FIRST_ONLY)
933 		do_newline();
934 
935 	if (count != NULL)
936 		(*count)++;
937 }
938 
939 static void
940 usage(void)
941 {
942 	(void) fprintf(stderr,
943 	    "Usage: iostat [-cCdDeEiImMnpPrstxXYz] "
944 	    " [-l n] [-T d|u] [disk ...] [interval [count]]\n"
945 	    "\t\t-c: 	report percentage of time system has spent\n"
946 	    "\t\t\tin user/system/dtrace/idle mode\n"
947 	    "\t\t-C: 	report disk statistics by controller\n"
948 	    "\t\t-d: 	display disk Kb/sec, transfers/sec, avg. \n"
949 	    "\t\t\tservice time in milliseconds  \n"
950 	    "\t\t-D: 	display disk reads/sec, writes/sec, \n"
951 	    "\t\t\tpercentage disk utilization \n"
952 	    "\t\t-e: 	report device error summary statistics\n"
953 	    "\t\t-E: 	report extended device error statistics\n"
954 	    "\t\t-i:	show device IDs for -E output\n"
955 	    "\t\t-I: 	report the counts in each interval,\n"
956 	    "\t\t\tinstead of rates, where applicable\n"
957 	    "\t\t-l n:	Limit the number of disks to n\n"
958 	    "\t\t-m: 	Display mount points (most useful with -p)\n"
959 	    "\t\t-M: 	Display data throughput in MB/sec "
960 	    "instead of Kb/sec\n"
961 	    "\t\t-n: 	convert device names to cXdYtZ format\n"
962 	    "\t\t-p: 	report per-partition disk statistics\n"
963 	    "\t\t-P: 	report per-partition disk statistics only,\n"
964 	    "\t\t\tno per-device disk statistics\n"
965 	    "\t\t-r: 	Display data in comma separated format\n"
966 	    "\t\t-s: 	Suppress state change messages\n"
967 	    "\t\t-T d|u	Display a timestamp in date (d) or unix "
968 	    "time_t (u)\n"
969 	    "\t\t-t: 	display chars read/written to terminals\n"
970 	    "\t\t-x: 	display extended disk statistics\n"
971 	    "\t\t-X: 	display I/O path statistics\n"
972 	    "\t\t-Y: 	display I/O path (I/T/L) statistics\n"
973 	    "\t\t-z: 	Suppress entries with all zero values\n");
974 	exit(1);
975 }
976 
977 /*ARGSUSED*/
978 static void
979 show_disk_errors(void *v1, void *v2, void *d)
980 {
981 	struct iodev_snapshot *disk = (struct iodev_snapshot *)v2;
982 	kstat_named_t *knp;
983 	size_t  col;
984 	int	i, len;
985 	char	*dev_name;
986 
987 	if (disk->is_errors.ks_ndata == 0)
988 		return;
989 	if (disk->is_type == IODEV_CONTROLLER)
990 		return;
991 
992 	dev_name = do_conversions ? disk->is_pretty : disk->is_name;
993 	dev_name = dev_name ? dev_name : disk->is_name;
994 
995 	len = strlen(dev_name);
996 	if (len > 20)
997 		push_out("%s ", dev_name);
998 	else if (len > 16)
999 		push_out("%-20.20s ", dev_name);
1000 	else {
1001 		if (do_conversions)
1002 			push_out("%-16.16s ", dev_name);
1003 		else
1004 			push_out("%-9.9s ", dev_name);
1005 	}
1006 	col = 0;
1007 
1008 	knp = KSTAT_NAMED_PTR(&disk->is_errors);
1009 	for (i = 0; i < disk->is_errors.ks_ndata; i++) {
1010 		/* skip kstats that the driver did not kstat_named_init */
1011 		if (knp[i].name[0] == 0)
1012 			continue;
1013 
1014 		col += strlen(knp[i].name);
1015 
1016 		switch (knp[i].data_type) {
1017 			case KSTAT_DATA_CHAR:
1018 				if ((strcmp(knp[i].name, "Serial No") == 0) &&
1019 				    do_devid) {
1020 					if (disk->is_devid) {
1021 						push_out("Device Id: %s ",
1022 						    disk->is_devid);
1023 						col += strlen(disk->is_devid);
1024 					} else
1025 						push_out("Device Id: ");
1026 				} else {
1027 					push_out("%s: %-.16s ", knp[i].name,
1028 					    &knp[i].value.c[0]);
1029 					col += strlen(&knp[i].value.c[0]);
1030 				}
1031 				break;
1032 			case KSTAT_DATA_ULONG:
1033 				push_out("%s: %u ", knp[i].name,
1034 				    knp[i].value.ui32);
1035 				col += 4;
1036 				break;
1037 			case KSTAT_DATA_ULONGLONG:
1038 				if (strcmp(knp[i].name, "Size") == 0) {
1039 					push_out("%s: %2.2fGB <%llu bytes>\n",
1040 					    knp[i].name,
1041 					    (float)knp[i].value.ui64 /
1042 					    DISK_GIGABYTE,
1043 					    knp[i].value.ui64);
1044 					col = 0;
1045 					break;
1046 				}
1047 				push_out("%s: %u ", knp[i].name,
1048 				    knp[i].value.ui32);
1049 				col += 4;
1050 				break;
1051 			}
1052 		if ((col >= 62) || (i == 2)) {
1053 			do_newline();
1054 			col = 0;
1055 		}
1056 	}
1057 	if (col > 0) {
1058 		do_newline();
1059 	}
1060 	do_newline();
1061 }
1062 
1063 void
1064 do_args(int argc, char **argv)
1065 {
1066 	int 		c;
1067 	int 		errflg = 0;
1068 	extern char 	*optarg;
1069 	extern int 	optind;
1070 
1071 	while ((c = getopt(argc, argv, "tdDxXYCciIpPnmMeEszrT:l:")) != EOF)
1072 		switch (c) {
1073 		case 't':
1074 			do_tty++;
1075 			break;
1076 		case 'd':
1077 			do_disk |= DISK_OLD;
1078 			break;
1079 		case 'D':
1080 			do_disk |= DISK_NEW;
1081 			break;
1082 		case 'x':
1083 			do_disk |= DISK_EXTENDED;
1084 			break;
1085 		case 'X':
1086 			if (do_disk & DISK_IOPATH_LTI)
1087 				errflg++;	/* -Y already used */
1088 			else
1089 				do_disk |= DISK_IOPATH_LI;
1090 			break;
1091 		case 'Y':
1092 			if (do_disk & DISK_IOPATH_LI)
1093 				errflg++;	/* -X already used */
1094 			else
1095 				do_disk |= DISK_IOPATH_LTI;
1096 			break;
1097 		case 'C':
1098 			do_controller++;
1099 			break;
1100 		case 'c':
1101 			do_cpu++;
1102 			break;
1103 		case 'I':
1104 			do_interval++;
1105 			break;
1106 		case 'p':
1107 			do_partitions++;
1108 			break;
1109 		case 'P':
1110 			do_partitions_only++;
1111 			break;
1112 		case 'n':
1113 			do_conversions++;
1114 			break;
1115 		case 'M':
1116 			do_megabytes++;
1117 			break;
1118 		case 'e':
1119 			do_disk |= DISK_ERRORS;
1120 			break;
1121 		case 'E':
1122 			do_disk |= DISK_EXTENDED_ERRORS;
1123 			break;
1124 		case 'i':
1125 			do_devid = 1;
1126 			break;
1127 		case 's':
1128 			suppress_state = 1;
1129 			break;
1130 		case 'z':
1131 			suppress_zero = 1;
1132 			break;
1133 		case 'm':
1134 			show_mountpts = 1;
1135 			break;
1136 		case 'T':
1137 			if (optarg) {
1138 				if (*optarg == 'u')
1139 					timestamp_fmt = UDATE;
1140 				else if (*optarg == 'd')
1141 					timestamp_fmt = DDATE;
1142 				else
1143 					errflg++;
1144 			} else {
1145 				errflg++;
1146 			}
1147 			break;
1148 		case 'r':
1149 			do_raw = 1;
1150 			break;
1151 		case 'l':
1152 			df.if_max_iodevs = safe_strtoi(optarg, "invalid limit");
1153 			if (df.if_max_iodevs < 1)
1154 				usage();
1155 			break;
1156 		case '?':
1157 			errflg++;
1158 	}
1159 
1160 	if ((do_disk & DISK_OLD) && (do_disk & DISK_NEW)) {
1161 		(void) fprintf(stderr, "-d and -D are incompatible.\n");
1162 		usage();
1163 	}
1164 
1165 	if (errflg) {
1166 		usage();
1167 	}
1168 
1169 	/* if no output classes explicity specified, use defaults */
1170 	if (do_tty == 0 && do_disk == 0 && do_cpu == 0)
1171 		do_tty = do_cpu = 1, do_disk = DISK_OLD;
1172 
1173 	/*
1174 	 * multi-path options (-X, -Y) without a specific vertical
1175 	 * output format (-x, -e, -E) imply extended -x format
1176 	 */
1177 	if ((do_disk & (DISK_IOPATH_LI | DISK_IOPATH_LTI)) &&
1178 	    !(do_disk & PRINT_VERTICAL))
1179 		do_disk |= DISK_EXTENDED;
1180 
1181 	/*
1182 	 * If conflicting options take the preferred
1183 	 * -D and -x result in -x
1184 	 * -d or -D and -e or -E gives only whatever -d or -D was specified
1185 	 */
1186 	if ((do_disk & DISK_EXTENDED) && (do_disk & DISK_NORMAL))
1187 		do_disk &= ~DISK_NORMAL;
1188 	if ((do_disk & DISK_NORMAL) && (do_disk & DISK_ERROR_MASK))
1189 		do_disk &= ~DISK_ERROR_MASK;
1190 
1191 	/* nfs, tape, always shown */
1192 	df.if_allowed_types = IODEV_NFS | IODEV_TAPE;
1193 
1194 	/*
1195 	 * If limit == 0 then no command line limit was set, else if any of
1196 	 * the flags that cause unlimited disks were not set,
1197 	 * use the default of 4
1198 	 */
1199 	if (df.if_max_iodevs == 0) {
1200 		df.if_max_iodevs = DEFAULT_LIMIT;
1201 		df.if_skip_floppy = 1;
1202 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS |
1203 		    DISK_EXTENDED_ERRORS)) {
1204 			df.if_max_iodevs = UNLIMITED_IODEVS;
1205 			df.if_skip_floppy = 0;
1206 		}
1207 	}
1208 	if (do_disk) {
1209 		size_t count = 0;
1210 		size_t i = optind;
1211 
1212 		while (i < argc && !isdigit(argv[i][0])) {
1213 			count++;
1214 			i++;
1215 		}
1216 
1217 		/*
1218 		 * "Note:  disks  explicitly  requested
1219 		 * are not subject to this disk limit"
1220 		 */
1221 		if ((count > df.if_max_iodevs) ||
1222 		    (count && (df.if_max_iodevs == UNLIMITED_IODEVS)))
1223 			df.if_max_iodevs = count;
1224 
1225 		df.if_names = safe_alloc(count * sizeof (char *));
1226 		(void) memset(df.if_names, 0, count * sizeof (char *));
1227 
1228 		df.if_nr_names = 0;
1229 		while (optind < argc && !isdigit(argv[optind][0]))
1230 			df.if_names[df.if_nr_names++] = argv[optind++];
1231 	}
1232 	if (optind < argc) {
1233 		interval = safe_strtoi(argv[optind], "invalid interval");
1234 		if (interval < 1)
1235 			fail(0, "invalid interval");
1236 		optind++;
1237 
1238 		if (optind < argc) {
1239 			iter = safe_strtoi(argv[optind], "invalid count");
1240 			if (iter < 1)
1241 				fail(0, "invalid count");
1242 			optind++;
1243 		}
1244 	}
1245 	if (interval == 0)
1246 		iter = 1;
1247 	if (optind < argc)
1248 		usage();
1249 }
1250 
1251 /*
1252  * Driver for doing the extended header formatting. Will produce
1253  * the function stack needed to output an extended header based
1254  * on the options selected.
1255  */
1256 
1257 void
1258 do_format(void)
1259 {
1260 	char	header[SMALL_SCRATCH_BUFLEN];
1261 	char 	ch;
1262 	char 	iosz;
1263 	const char    *fstr;
1264 
1265 	disk_header[0] = 0;
1266 	ch = (do_interval ? 'i' : 's');
1267 	iosz = (do_megabytes ? 'M' : 'k');
1268 	if (do_disk & DISK_ERRORS) {
1269 		if (do_raw == 0) {
1270 			(void) sprintf(header, "s/w h/w trn tot ");
1271 		} else
1272 			(void) sprintf(header, "s/w,h/w,trn,tot");
1273 	} else
1274 		*header = NULL;
1275 	switch (do_disk & DISK_IO_MASK) {
1276 		case DISK_OLD:
1277 			if (do_raw == 0)
1278 				fstr = "%cp%c tp%c serv  ";
1279 			else
1280 				fstr = "%cp%c,tp%c,serv";
1281 			(void) snprintf(disk_header, sizeof (disk_header),
1282 			    fstr, iosz, ch, ch);
1283 			break;
1284 		case DISK_NEW:
1285 			if (do_raw == 0)
1286 				fstr = "rp%c wp%c util  ";
1287 			else
1288 				fstr = "%rp%c,wp%c,util";
1289 			(void) snprintf(disk_header, sizeof (disk_header),
1290 			    fstr, ch, ch);
1291 			break;
1292 		case DISK_EXTENDED:
1293 			/* This is -x option */
1294 			if (!do_conversions) {
1295 				/* without -n option */
1296 				if (do_raw == 0) {
1297 					/* without -r option */
1298 					(void) snprintf(disk_header,
1299 					    sizeof (disk_header),
1300 					    "%-*.*s    r/%c    w/%c   "
1301 					    "%cr/%c   %cw/%c wait actv  "
1302 					    "svc_t  %%%%w  %%%%b %s",
1303 					    iodevs_nl, iodevs_nl, "device",
1304 					    ch, ch, iosz, ch, iosz, ch, header);
1305 				} else {
1306 					/* with -r option */
1307 					(void) snprintf(disk_header,
1308 					    sizeof (disk_header),
1309 					    "device,r/%c,w/%c,%cr/%c,%cw/%c,"
1310 					    "wait,actv,svc_t,%%%%w,"
1311 					    "%%%%b,%s",
1312 					    ch, ch, iosz, ch, iosz, ch, header);
1313 				}
1314 			} else {
1315 				/* with -n option */
1316 				if (do_raw == 0) {
1317 					fstr = "    r/%c    w/%c   %cr/%c   "
1318 					    "%cw/%c wait actv wsvc_t asvc_t  "
1319 					    "%%%%w  %%%%b %sdevice";
1320 				} else {
1321 					fstr = "r/%c,w/%c,%cr/%c,%cw/%c,"
1322 					    "wait,actv,wsvc_t,asvc_t,"
1323 					    "%%%%w,%%%%b,%sdevice";
1324 				}
1325 				(void) snprintf(disk_header,
1326 				    sizeof (disk_header),
1327 				    fstr, ch, ch, iosz, ch, iosz,
1328 				    ch, header);
1329 			}
1330 			break;
1331 		default:
1332 			break;
1333 	}
1334 
1335 	/* do DISK_ERRORS header (already added above for DISK_EXTENDED) */
1336 	if ((do_disk & DISK_ERRORS) &&
1337 	    ((do_disk & DISK_IO_MASK) != DISK_EXTENDED)) {
1338 		if (!do_conversions) {
1339 			if (do_raw == 0)
1340 				(void) snprintf(disk_header,
1341 				    sizeof (disk_header), "%-*.*s  %s",
1342 				    iodevs_nl, iodevs_nl, "device", header);
1343 			else
1344 				(void) snprintf(disk_header,
1345 				    sizeof (disk_header), "device,%s", header);
1346 		} else {
1347 			if (do_raw == 0) {
1348 				(void) snprintf(disk_header,
1349 				    sizeof (disk_header),
1350 				    "  %sdevice", header);
1351 			} else {
1352 				(void) snprintf(disk_header,
1353 				    sizeof (disk_header),
1354 				    "%s,device", header);
1355 			}
1356 		}
1357 	} else {
1358 		/*
1359 		 * Need to subtract two characters for the % escape in
1360 		 * the string.
1361 		 */
1362 		dh_len = strlen(disk_header) - 2;
1363 	}
1364 
1365 	/*
1366 	 * -n *and* (-E *or* -e *or* -x)
1367 	 */
1368 	if (do_conversions && (do_disk & PRINT_VERTICAL)) {
1369 		if (do_tty)
1370 			setup(print_tty_hdr1);
1371 		if (do_cpu)
1372 			setup(print_cpu_hdr1);
1373 		if (do_tty || do_cpu)
1374 			setup(do_newline);
1375 		if (do_tty)
1376 			setup(print_tty_hdr2);
1377 		if (do_cpu)
1378 			setup(print_cpu_hdr2);
1379 		if (do_tty || do_cpu)
1380 			setup(do_newline);
1381 		if (do_tty)
1382 			setup(print_tty_data);
1383 		if (do_cpu)
1384 			setup(print_cpu_data);
1385 		if (do_tty || do_cpu)
1386 			setup(do_newline);
1387 		printxhdr();
1388 
1389 		setup(show_all_disks);
1390 	} else {
1391 		/*
1392 		 * These unholy gymnastics are necessary to place CPU/tty
1393 		 * data to the right of the disks/errors for the first
1394 		 * line in vertical mode.
1395 		 */
1396 		if (do_disk & PRINT_VERTICAL) {
1397 			printxhdr();
1398 
1399 			setup(show_first_disk);
1400 			if (do_tty)
1401 				setup(print_tty_data);
1402 			if (do_cpu)
1403 				setup(print_cpu_data);
1404 			setup(do_newline);
1405 
1406 			setup(show_other_disks);
1407 		} else {
1408 			setup(hdrout);
1409 			if (do_tty)
1410 				setup(print_tty_data);
1411 			setup(show_all_disks);
1412 			if (do_cpu)
1413 				setup(print_cpu_data);
1414 		}
1415 
1416 		setup(do_newline);
1417 	}
1418 	if (do_disk & DISK_EXTENDED_ERRORS)
1419 		setup(disk_errors);
1420 }
1421 
1422 /*
1423  * Add a new function to the list of functions
1424  * for this invocation. Once on the stack the
1425  * function is never removed nor does its place
1426  * change.
1427  */
1428 void
1429 setup(void (*nfunc)(void))
1430 {
1431 	format_t *tmp;
1432 
1433 	tmp = safe_alloc(sizeof (format_t));
1434 	tmp->nfunc = nfunc;
1435 	tmp->next = 0;
1436 	if (formatter_end)
1437 		formatter_end->next = tmp;
1438 	else
1439 		formatter_list = tmp;
1440 	formatter_end = tmp;
1441 
1442 }
1443 
1444 /*
1445  * The functions after this comment are devoted to printing
1446  * various parts of the header. They are selected based on the
1447  * options provided when the program was invoked. The functions
1448  * are either directly invoked in printhdr() or are indirectly
1449  * invoked by being placed on the list of functions used when
1450  * extended headers are used.
1451  */
1452 void
1453 print_tty_hdr1(void)
1454 {
1455 	char *fstr;
1456 	char *dstr;
1457 
1458 	if (do_raw == 0) {
1459 		fstr = "%10.10s";
1460 		dstr = "tty    ";
1461 	} else {
1462 		fstr = "%s";
1463 		dstr = "tty";
1464 	}
1465 	push_out(fstr, dstr);
1466 }
1467 
1468 void
1469 print_tty_hdr2(void)
1470 {
1471 	if (do_raw == 0)
1472 		push_out("%-10.10s", " tin tout");
1473 	else
1474 		push_out("tin,tout");
1475 }
1476 
1477 void
1478 print_cpu_hdr1(void)
1479 {
1480 	char *dstr;
1481 
1482 	if (do_raw == 0)
1483 		dstr = "     cpu";
1484 	else
1485 		dstr = "cpu";
1486 	push_out(dstr);
1487 }
1488 
1489 void
1490 print_cpu_hdr2(void)
1491 {
1492 	char *dstr;
1493 
1494 	if (do_raw == 0)
1495 		dstr = " us sy dt id";
1496 	else
1497 		dstr = "us,sy,dt,id";
1498 	push_out(dstr);
1499 }
1500 
1501 /*
1502  * Assumption is that tty data is always first - no need for raw mode leading
1503  * comma.
1504  */
1505 void
1506 print_tty_data(void)
1507 {
1508 	char *fstr;
1509 	uint64_t deltas;
1510 	double raw;
1511 	double outch;
1512 	kstat_t *oldks = NULL;
1513 
1514 	if (oldss)
1515 		oldks = &oldss->s_sys.ss_agg_sys;
1516 
1517 	if (do_raw == 0)
1518 		fstr = " %3.0f %4.0f ";
1519 	else
1520 		fstr = "%.0f,%.0f";
1521 	deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "rawch");
1522 	raw = deltas;
1523 	raw /= getime;
1524 	deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "outch");
1525 	outch = deltas;
1526 	outch /= getime;
1527 	push_out(fstr, raw, outch);
1528 }
1529 
1530 /*
1531  * Write out CPU data
1532  */
1533 void
1534 print_cpu_data(void)
1535 {
1536 	char *fstr;
1537 	uint64_t idle;
1538 	uint64_t user;
1539 	uint64_t kern;
1540 	uint64_t dtrace;
1541 	uint64_t nsec_elapsed;
1542 	kstat_t *oldks = NULL;
1543 
1544 	if (oldss)
1545 		oldks = &oldss->s_sys.ss_agg_sys;
1546 
1547 	if (do_raw == 0)
1548 		fstr = " %2.0f %2.0f %2.0f %2.0f";
1549 	else
1550 		fstr = "%.0f,%.0f,%.0f,%.0f";
1551 
1552 	idle = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_idle");
1553 	user = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_user");
1554 	kern = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_kernel");
1555 	dtrace = kstat_delta(oldks, &newss->s_sys.ss_agg_sys,
1556 	    "cpu_nsec_dtrace");
1557 	nsec_elapsed = newss->s_sys.ss_agg_sys.ks_snaptime -
1558 	    (oldks == NULL ? 0 : oldks->ks_snaptime);
1559 	push_out(fstr, user * percent, kern * percent,
1560 	    dtrace * 100.0 / nsec_elapsed / newss->s_nr_active_cpus,
1561 	    idle * percent);
1562 }
1563 
1564 /*
1565  * Emit the appropriate header.
1566  */
1567 void
1568 hdrout(void)
1569 {
1570 	if (do_raw == 0) {
1571 		if (--tohdr == 0)
1572 			printhdr(0);
1573 	} else if (hdr_out == 0) {
1574 		printhdr(0);
1575 		hdr_out = 1;
1576 	}
1577 }
1578 
1579 /*
1580  * Write out disk errors when -E is specified.
1581  */
1582 void
1583 disk_errors(void)
1584 {
1585 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk_errors, NULL);
1586 }
1587 
1588 void
1589 show_first_disk(void)
1590 {
1591 	int count = 0;
1592 
1593 	show_disk_mode = SHOW_FIRST_ONLY;
1594 
1595 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1596 }
1597 
1598 void
1599 show_other_disks(void)
1600 {
1601 	int count = 0;
1602 
1603 	show_disk_mode = SHOW_SECOND_ONWARDS;
1604 
1605 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1606 }
1607 
1608 void
1609 show_all_disks(void)
1610 {
1611 	int count = 0;
1612 
1613 	show_disk_mode = SHOW_ALL;
1614 
1615 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1616 }
1617 
1618 /*
1619  * Write a newline out and clear the lineout flag.
1620  */
1621 static void
1622 do_newline(void)
1623 {
1624 	if (lineout) {
1625 		(void) putchar('\n');
1626 		lineout = 0;
1627 	}
1628 }
1629 
1630 /*
1631  * Generalized printf function that determines what extra
1632  * to print out if we're in raw mode. At this time we
1633  * don't care about errors.
1634  */
1635 static void
1636 push_out(const char *message, ...)
1637 {
1638 	va_list args;
1639 
1640 	va_start(args, message);
1641 	if (do_raw && lineout == 1)
1642 		(void) putchar(',');
1643 	(void) vprintf(message, args);
1644 	va_end(args);
1645 	lineout = 1;
1646 }
1647 
1648 /*
1649  * Emit the header string when -e is specified.
1650  */
1651 static void
1652 print_err_hdr(void)
1653 {
1654 	char obuf[SMALL_SCRATCH_BUFLEN];
1655 
1656 	if (do_raw) {
1657 		push_out("errors");
1658 		return;
1659 	}
1660 
1661 	if (do_conversions == 0) {
1662 		if (!(do_disk & DISK_EXTENDED)) {
1663 			(void) snprintf(obuf, sizeof (obuf),
1664 			    "%11s", one_blank);
1665 			push_out(obuf);
1666 		}
1667 	} else if (do_disk == DISK_ERRORS)
1668 		push_out(two_blanks);
1669 	else
1670 		push_out(one_blank);
1671 	push_out("---- errors --- ");
1672 }
1673 
1674 /*
1675  * Emit the header string when -e is specified.
1676  */
1677 static void
1678 print_disk_header(void)
1679 {
1680 	push_out(disk_header);
1681 }
1682 
1683 /*
1684  * No, UINTMAX_MAX isn't the right thing here since
1685  * it is #defined to be either INT32_MAX or INT64_MAX
1686  * depending on the whether _LP64 is defined.
1687  *
1688  * We want to handle the odd future case of having
1689  * ulonglong_t be more than 64 bits but we have
1690  * no nice #define MAX value we can drop in place
1691  * without having to change this code in the future.
1692  */
1693 
1694 u_longlong_t
1695 ull_delta(u_longlong_t old, u_longlong_t new)
1696 {
1697 	if (new >= old)
1698 		return (new - old);
1699 	else
1700 		return ((UINT64_MAX - old) + new + 1);
1701 }
1702 
1703 /*
1704  * Take the difference of an unsigned 32
1705  * bit int attempting to cater for
1706  * overflow.
1707  */
1708 uint_t
1709 u32_delta(uint_t old, uint_t new)
1710 {
1711 	if (new >= old)
1712 		return (new - old);
1713 	else
1714 		return ((UINT32_MAX - old) + new + 1);
1715 }
1716 
1717 /*
1718  * This is exactly what is needed for standard iostat output,
1719  * but make sure to use it only for that
1720  */
1721 #define	EPSILON	(0.1)
1722 static int
1723 fzero(double value)
1724 {
1725 	return (value >= 0.0 && value < EPSILON);
1726 }
1727 
1728 static int
1729 safe_strtoi(char const *val, char *errmsg)
1730 {
1731 	char *end;
1732 	long tmp;
1733 
1734 	errno = 0;
1735 	tmp = strtol(val, &end, 10);
1736 	if (*end != '\0' || errno)
1737 		fail(0, "%s %s", errmsg, val);
1738 	return ((int)tmp);
1739 }
1740