xref: /titanic_44/usr/src/cmd/stat/iostat/iostat.c (revision 15b64165382ed8da51f28837e4a69bea9c9a2b45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * rewritten from UCB 4.13 83/09/25
27  * rewritten from SunOS 4.1 SID 1.18 89/10/06
28  */
29 /*
30  * Copyright (c) 2012 by Delphix. All rights reserved.
31  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
32  */
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdarg.h>
37 #include <ctype.h>
38 #include <unistd.h>
39 #include <memory.h>
40 #include <errno.h>
41 #include <string.h>
42 #include <signal.h>
43 #include <sys/types.h>
44 #include <time.h>
45 #include <sys/time.h>
46 #include <sys/sysinfo.h>
47 #include <inttypes.h>
48 #include <strings.h>
49 #include <sys/systeminfo.h>
50 #include <kstat.h>
51 #include <locale.h>
52 
53 #include "dsr.h"
54 #include "statcommon.h"
55 
56 #define	DISK_OLD		0x0001
57 #define	DISK_NEW		0x0002
58 #define	DISK_EXTENDED		0x0004
59 #define	DISK_ERRORS		0x0008
60 #define	DISK_EXTENDED_ERRORS	0x0010
61 #define	DISK_IOPATH_LI		0x0020	/* LunInitiator */
62 #define	DISK_IOPATH_LTI		0x0040	/* LunTargetInitiator */
63 
64 #define	DISK_NORMAL		(DISK_OLD | DISK_NEW)
65 #define	DISK_IO_MASK		(DISK_OLD | DISK_NEW | DISK_EXTENDED)
66 #define	DISK_ERROR_MASK		(DISK_ERRORS | DISK_EXTENDED_ERRORS)
67 #define	PRINT_VERTICAL		(DISK_ERROR_MASK | DISK_EXTENDED)
68 
69 #define	REPRINT 19
70 
71 #define	NUMBER_OF_ERR_COUNTERS	3
72 
73 /*
74  * It's really a pseudo-gigabyte. We use 1000000000 bytes so that the disk
75  * labels don't look bad. 1GB is really 1073741824 bytes.
76  */
77 #define	DISK_GIGABYTE   1000000000.0
78 
79 /*
80  * Function desciptor to be called when extended
81  * headers are used.
82  */
83 typedef struct formatter {
84 	void (*nfunc)(void);
85 	struct formatter *next;
86 } format_t;
87 
88 /*
89  * Used to get formatting right when printing tty/cpu
90  * data to the right of disk data
91  */
92 enum show_disk_mode {
93 	SHOW_FIRST_ONLY,
94 	SHOW_SECOND_ONWARDS,
95 	SHOW_ALL
96 };
97 
98 enum show_disk_mode show_disk_mode = SHOW_ALL;
99 
100 char *cmdname = "iostat";
101 int caught_cont = 0;
102 
103 static char one_blank[] = " ";
104 static char two_blanks[] = "  ";
105 
106 /*
107  * count for number of lines to be emitted before a header is
108  * shown again. Only used for the basic format.
109  */
110 static	uint_t	tohdr = 1;
111 
112 /*
113  * If we're in raw format, have we printed a header? We only do it
114  * once for raw but we emit it every REPRINT lines in non-raw format.
115  * This applies only for the basic header. The extended header is
116  * done only once in both formats.
117  */
118 static	uint_t	hdr_out;
119 
120 /*
121  * Flags representing arguments from command line
122  */
123 static	uint_t	do_tty;			/* show tty info (-t) */
124 static	uint_t	do_disk;		/* show disk info per selected */
125 					/* format (-d, -D, -e, -E, -x -X -Y) */
126 static	uint_t	do_cpu;			/* show cpu info (-c) */
127 static	uint_t	do_interval;		/* do intervals (-I) */
128 static	int	do_partitions;		/* per-partition stats (-p) */
129 static	int	do_partitions_only;	/* per-partition stats only (-P) */
130 					/* no per-device stats for disks */
131 static	uint_t	do_conversions;		/* display disks as cXtYdZ (-n) */
132 static	uint_t	do_megabytes;		/* display data in MB/sec (-M) */
133 static  uint_t	do_controller;		/* display controller info (-C) */
134 static  uint_t	do_raw;			/* emit raw format (-r) */
135 static	uint_t	timestamp_fmt = NODATE;	/* timestamp  each display (-T) */
136 static	uint_t	do_devid;		/* -E should show devid */
137 
138 /*
139  * Default number of disk drives to be displayed in basic format
140  */
141 #define	DEFAULT_LIMIT	4
142 
143 struct iodev_filter df;
144 
145 static  uint_t	suppress_state;		/* skip state change messages */
146 static	uint_t	suppress_zero;		/* skip zero valued lines */
147 static  uint_t	show_mountpts;		/* show mount points */
148 static	int 	interval;		/* interval (seconds) to output */
149 static	int 	iter;			/* iterations from command line */
150 
151 #define	SMALL_SCRATCH_BUFLEN	MAXNAMELEN
152 
153 static int	iodevs_nl;		/* name field width */
154 #define	IODEVS_NL_MIN		6	/* not too thin for "device" */
155 #define	IODEVS_NL_MAX		24	/* but keep full width under 80 */
156 
157 static	char	disk_header[132];
158 static	uint_t 	dh_len;			/* disk header length for centering */
159 static  int 	lineout;		/* data waiting to be printed? */
160 
161 static struct snapshot *newss;
162 static struct snapshot *oldss;
163 static	double	getime;			/* elapsed time */
164 static	double	percent;		/* 100 / etime */
165 
166 /*
167  * List of functions to be called which will construct the desired output
168  */
169 static format_t	*formatter_list;
170 static format_t *formatter_end;
171 
172 static u_longlong_t	ull_delta(u_longlong_t, u_longlong_t);
173 static uint_t 	u32_delta(uint_t, uint_t);
174 static void setup(void (*nfunc)(void));
175 static void print_tty_hdr1(void);
176 static void print_tty_hdr2(void);
177 static void print_cpu_hdr1(void);
178 static void print_cpu_hdr2(void);
179 static void print_tty_data(void);
180 static void print_cpu_data(void);
181 static void print_err_hdr(void);
182 static void print_disk_header(void);
183 static void hdrout(void);
184 static void disk_errors(void);
185 static void do_newline(void);
186 static void push_out(const char *, ...);
187 static void printhdr(int);
188 static void printxhdr(void);
189 static void usage(void);
190 static void do_args(int, char **);
191 static void do_format(void);
192 static void show_all_disks(void);
193 static void show_first_disk(void);
194 static void show_other_disks(void);
195 static void show_disk_errors(void *, void *, void *);
196 static void write_core_header(void);
197 static int  fzero(double value);
198 static int  safe_strtoi(char const *val, char *errmsg);
199 
200 int
201 main(int argc, char **argv)
202 {
203 	enum snapshot_types types = SNAP_SYSTEM;
204 	kstat_ctl_t *kc;
205 	long hz;
206 	int forever;
207 	hrtime_t start_n;
208 	hrtime_t period_n;
209 
210 	(void) setlocale(LC_ALL, "");
211 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
212 #define	TEXT_DOMAIN "SYS_TEST"		/* Use this only if it weren't */
213 #endif
214 	(void) textdomain(TEXT_DOMAIN);
215 
216 	do_args(argc, argv);
217 
218 	/*
219 	 * iostat historically showed CPU changes, even though
220 	 * it doesn't provide much useful information
221 	 */
222 	types |= SNAP_CPUS;
223 
224 	if (do_disk)
225 		types |= SNAP_IODEVS;
226 
227 	if (do_disk && !do_partitions_only)
228 		df.if_allowed_types |= IODEV_DISK;
229 	if (do_disk & DISK_IOPATH_LI) {
230 		df.if_allowed_types |= IODEV_IOPATH_LTI;
231 		types |= SNAP_IOPATHS_LI;
232 	}
233 	if (do_disk & DISK_IOPATH_LTI) {
234 		df.if_allowed_types |= IODEV_IOPATH_LTI;
235 		types |= SNAP_IOPATHS_LTI;
236 	}
237 	if (do_disk & DISK_ERROR_MASK)
238 		types |= SNAP_IODEV_ERRORS;
239 	if (do_partitions || do_partitions_only)
240 		df.if_allowed_types |= IODEV_PARTITION;
241 	if (do_conversions)
242 		types |= SNAP_IODEV_PRETTY;
243 	if (do_devid)
244 		types |= SNAP_IODEV_DEVID;
245 	if (do_controller) {
246 		if (!(do_disk & PRINT_VERTICAL) ||
247 		    (do_disk & DISK_EXTENDED_ERRORS))
248 			fail(0, "-C can only be used with -e or -x.");
249 		types |= SNAP_CONTROLLERS;
250 		df.if_allowed_types |= IODEV_CONTROLLER;
251 	}
252 
253 	hz = sysconf(_SC_CLK_TCK);
254 
255 	/*
256 	 * Undocumented behavior - sending a SIGCONT will result
257 	 * in a new header being emitted. Used only if we're not
258 	 * doing extended headers. This is a historical
259 	 * artifact.
260 	 */
261 	if (!(do_disk & PRINT_VERTICAL))
262 		(void) signal(SIGCONT, printhdr);
263 
264 	if (interval)
265 		period_n = (hrtime_t)interval * NANOSEC;
266 
267 	kc = open_kstat();
268 	if (interval)
269 		start_n = gethrtime();
270 	newss = acquire_snapshot(kc, types, &df);
271 
272 	/* compute width of "device" field */
273 	iodevs_nl = newss->s_iodevs_is_name_maxlen;
274 	iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
275 	    IODEVS_NL_MIN : iodevs_nl;
276 	iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
277 	    IODEVS_NL_MAX : iodevs_nl;
278 
279 	do_format();
280 
281 	forever = (iter == 0);
282 	do {
283 		if (do_conversions && show_mountpts)
284 			do_mnttab();
285 
286 		if (do_tty || do_cpu) {
287 			kstat_t *oldks;
288 			oldks = oldss ? &oldss->s_sys.ss_agg_sys : NULL;
289 			getime = cpu_ticks_delta(oldks,
290 			    &newss->s_sys.ss_agg_sys);
291 			percent = (getime > 0.0) ? 100.0 / getime : 0.0;
292 			getime = (getime / nr_active_cpus(newss)) / hz;
293 			if (getime == 0.0)
294 				getime = (double)interval;
295 			if (getime == 0.0 || do_interval)
296 				getime = 1.0;
297 		}
298 
299 		if (formatter_list) {
300 			format_t *tmp;
301 			tmp = formatter_list;
302 
303 			if (timestamp_fmt != NODATE)
304 				print_timestamp(timestamp_fmt);
305 
306 			while (tmp) {
307 				(tmp->nfunc)();
308 				tmp = tmp->next;
309 			}
310 			(void) fflush(stdout);
311 		}
312 
313 		/* only remaining/doing a single iteration, we are done */
314 		if (iter == 1)
315 			continue;
316 
317 		if (interval > 0)
318 			/* Have a kip */
319 			sleep_until(&start_n, period_n, forever, &caught_cont);
320 
321 		free_snapshot(oldss);
322 		oldss = newss;
323 		newss = acquire_snapshot(kc, types, &df);
324 		iodevs_nl = (newss->s_iodevs_is_name_maxlen > iodevs_nl) ?
325 		    newss->s_iodevs_is_name_maxlen : iodevs_nl;
326 		iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
327 		    IODEVS_NL_MIN : iodevs_nl;
328 		iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
329 		    IODEVS_NL_MAX : iodevs_nl;
330 
331 		if (!suppress_state)
332 			snapshot_report_changes(oldss, newss);
333 
334 		/* if config changed, show stats from boot */
335 		if (snapshot_has_changed(oldss, newss)) {
336 			free_snapshot(oldss);
337 			oldss = NULL;
338 		}
339 
340 	} while (--iter);
341 
342 	free_snapshot(oldss);
343 	free_snapshot(newss);
344 	(void) kstat_close(kc);
345 	free(df.if_names);
346 	return (0);
347 }
348 
349 /*
350  * Some magic numbers used in header formatting.
351  *
352  * DISK_LEN = length of either "kps tps serv" or "wps rps util"
353  *	      using 0 as the first position
354  *
355  * DISK_ERROR_LEN = length of "s/w h/w trn tot" with one space on
356  *		either side. Does not use zero as first pos.
357  *
358  * DEVICE_LEN = length of "device" + 1 character.
359  */
360 
361 #define	DISK_LEN	11
362 #define	DISK_ERROR_LEN	16
363 #define	DEVICE_LEN	7
364 
365 /*ARGSUSED*/
366 static void
367 show_disk_name(void *v1, void *v2, void *data)
368 {
369 	struct iodev_snapshot *dev = (struct iodev_snapshot *)v2;
370 	size_t slen;
371 	char *name;
372 	char fbuf[SMALL_SCRATCH_BUFLEN];
373 
374 	if (dev == NULL)
375 		return;
376 
377 	name = do_conversions ? dev->is_pretty : dev->is_name;
378 	name = name ? name : dev->is_name;
379 
380 	if (!do_raw) {
381 		uint_t width;
382 
383 		slen = strlen(name);
384 		/*
385 		 * The length is less
386 		 * than the section
387 		 * which will be displayed
388 		 * on the next line.
389 		 * Center the entry.
390 		 */
391 
392 		width = (DISK_LEN + 1)/2 + (slen / 2);
393 		(void) snprintf(fbuf, sizeof (fbuf),
394 		    "%*s", width, name);
395 		name = fbuf;
396 		push_out("%-13.13s ", name);
397 	} else {
398 		push_out(name);
399 	}
400 }
401 
402 /*ARGSUSED*/
403 static void
404 show_disk_header(void *v1, void *v2, void *data)
405 {
406 	push_out(disk_header);
407 }
408 
409 /*
410  * Write out a two line header. What is written out depends on the flags
411  * selected but in the worst case consists of a tty header, a disk header
412  * providing information for 4 disks and a cpu header.
413  *
414  * The tty header consists of the word "tty" on the first line above the
415  * words "tin tout" on the next line. If present the tty portion consumes
416  * the first 10 characters of each line since "tin tout" is surrounded
417  * by single spaces.
418  *
419  * Each of the disk sections is a 14 character "block" in which the name of
420  * the disk is centered in the first 12 characters of the first line.
421  *
422  * The cpu section is an 11 character block with "cpu" centered over the
423  * section.
424  *
425  * The worst case should look as follows:
426  *
427  * 0---------1--------2---------3---------4---------5---------6---------7-------
428  *    tty        sd0           sd1           sd2           sd3           cpu
429  *  tin tout kps tps serv  kps tps serv  kps tps serv  kps tps serv  us sy dt id
430  *  NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NN NN NN NN
431  *
432  * When -D is specified, the disk header looks as follows (worst case):
433  *
434  * 0---------1--------2---------3---------4---------5---------6---------7-------
435  *     tty        sd0           sd1             sd2          sd3          cpu
436  *   tin tout rps wps util  rps wps util  rps wps util  rps wps util us sy dt id
437  *   NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN NN NN NN NN
438  */
439 static void
440 printhdr(int sig)
441 {
442 	/*
443 	 * If we're here because a signal fired, reenable the
444 	 * signal.
445 	 */
446 	if (sig)
447 		(void) signal(SIGCONT, printhdr);
448 	if (sig == SIGCONT)
449 		caught_cont = 1;
450 	/*
451 	 * Horizontal mode headers
452 	 *
453 	 * First line
454 	 */
455 	if (do_tty)
456 		print_tty_hdr1();
457 
458 	if (do_disk & DISK_NORMAL) {
459 		(void) snapshot_walk(SNAP_IODEVS, NULL, newss,
460 		    show_disk_name, NULL);
461 	}
462 
463 	if (do_cpu)
464 		print_cpu_hdr1();
465 	do_newline();
466 
467 	/*
468 	 * Second line
469 	 */
470 	if (do_tty)
471 		print_tty_hdr2();
472 
473 	if (do_disk & DISK_NORMAL) {
474 		(void) snapshot_walk(SNAP_IODEVS, NULL, newss,
475 		    show_disk_header, NULL);
476 	}
477 
478 	if (do_cpu)
479 		print_cpu_hdr2();
480 	do_newline();
481 
482 	tohdr = REPRINT;
483 }
484 
485 /*
486  * Write out the extended header centered over the core information.
487  */
488 static void
489 write_core_header(void)
490 {
491 	char *edev = "extended device statistics";
492 	uint_t lead_space_ct;
493 	uint_t follow_space_ct;
494 	size_t edevlen;
495 
496 	if (do_raw == 0) {
497 		/*
498 		 * The things we do to look nice...
499 		 *
500 		 * Center the core output header. Make sure we have the
501 		 * right number of trailing spaces for follow-on headers
502 		 * (i.e., cpu and/or tty and/or errors).
503 		 */
504 		edevlen = strlen(edev);
505 		lead_space_ct = dh_len - edevlen;
506 		lead_space_ct /= 2;
507 		if (lead_space_ct > 0) {
508 			follow_space_ct = dh_len - (lead_space_ct + edevlen);
509 			if (do_disk & DISK_ERRORS)
510 				follow_space_ct -= DISK_ERROR_LEN;
511 			if ((do_disk & DISK_EXTENDED) && do_conversions)
512 				follow_space_ct -= DEVICE_LEN;
513 
514 			push_out("%1$*2$.*2$s%3$s%4$*5$.*5$s", one_blank,
515 			    lead_space_ct, edev, one_blank, follow_space_ct);
516 		} else
517 			push_out("%56s", edev);
518 	} else
519 		push_out(edev);
520 }
521 
522 /*
523  * In extended mode headers, we don't want to reprint the header on
524  * signals as they are printed every time anyways.
525  */
526 static void
527 printxhdr(void)
528 {
529 
530 	/*
531 	 * Vertical mode headers
532 	 */
533 	if (do_disk & DISK_EXTENDED)
534 		setup(write_core_header);
535 	if (do_disk & DISK_ERRORS)
536 		setup(print_err_hdr);
537 
538 	if (do_conversions) {
539 		setup(do_newline);
540 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
541 			setup(print_disk_header);
542 		setup(do_newline);
543 	} else {
544 		if (do_tty)
545 			setup(print_tty_hdr1);
546 		if (do_cpu)
547 			setup(print_cpu_hdr1);
548 		setup(do_newline);
549 
550 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
551 			setup(print_disk_header);
552 		if (do_tty)
553 			setup(print_tty_hdr2);
554 		if (do_cpu)
555 			setup(print_cpu_hdr2);
556 		setup(do_newline);
557 	}
558 }
559 
560 /*
561  * Write out a line for this disk - note that show_disk writes out
562  * full lines or blocks for each selected disk.
563  */
564 static void
565 show_disk(void *v1, void *v2, void *data)
566 {
567 	uint32_t err_counters[NUMBER_OF_ERR_COUNTERS];
568 	boolean_t display_err_counters = do_disk & DISK_ERRORS;
569 	struct iodev_snapshot *old = (struct iodev_snapshot *)v1;
570 	struct iodev_snapshot *new = (struct iodev_snapshot *)v2;
571 	int *count = (int *)data;
572 	double rps, wps, tps, mtps, krps, kwps, kps, avw, avr, w_pct, r_pct;
573 	double wserv, rserv, serv;
574 	double iosize;	/* kb/sec or MB/sec */
575 	double etime, hr_etime;
576 	char *disk_name;
577 	u_longlong_t ldeltas;
578 	uint_t udeltas;
579 	uint64_t t_delta;
580 	uint64_t w_delta;
581 	uint64_t r_delta;
582 	int doit = 1;
583 	uint_t toterrs;
584 	char *fstr;
585 
586 	if (new == NULL)
587 		return;
588 
589 	switch (show_disk_mode) {
590 	case SHOW_FIRST_ONLY:
591 		if (count != NULL && *count)
592 			return;
593 		break;
594 
595 	case SHOW_SECOND_ONWARDS:
596 		if (count != NULL && !*count) {
597 			(*count)++;
598 			return;
599 		}
600 		break;
601 
602 	default:
603 		break;
604 	}
605 
606 	disk_name = do_conversions ? new->is_pretty : new->is_name;
607 	disk_name = disk_name ? disk_name : new->is_name;
608 
609 	/*
610 	 * Only do if we want IO stats - Avoids errors traveling this
611 	 * section if that's all we want to see.
612 	 */
613 	if (do_disk & DISK_IO_MASK) {
614 		if (old) {
615 			t_delta = hrtime_delta(old->is_snaptime,
616 			    new->is_snaptime);
617 		} else {
618 			t_delta = hrtime_delta(new->is_crtime,
619 			    new->is_snaptime);
620 		}
621 
622 		if (new->is_nr_children) {
623 			if (new->is_type == IODEV_CONTROLLER) {
624 				t_delta /= new->is_nr_children;
625 			} else if ((new->is_type == IODEV_IOPATH_LT) ||
626 			    (new->is_type == IODEV_IOPATH_LI)) {
627 				/* synthetic path */
628 				if (!old) {
629 					t_delta = new->is_crtime;
630 				}
631 				t_delta /= new->is_nr_children;
632 			}
633 		}
634 
635 		hr_etime = (double)t_delta;
636 		if (hr_etime == 0.0)
637 			hr_etime = (double)NANOSEC;
638 		etime = hr_etime / (double)NANOSEC;
639 
640 		/* reads per second */
641 		udeltas = u32_delta(old ? old->is_stats.reads : 0,
642 		    new->is_stats.reads);
643 		rps = (double)udeltas;
644 		rps /= etime;
645 
646 		/* writes per second */
647 		udeltas = u32_delta(old ? old->is_stats.writes : 0,
648 		    new->is_stats.writes);
649 		wps = (double)udeltas;
650 		wps /= etime;
651 
652 		tps = rps + wps;
653 			/* transactions per second */
654 
655 		/*
656 		 * report throughput as either kb/sec or MB/sec
657 		 */
658 
659 		if (!do_megabytes)
660 			iosize = 1024.0;
661 		else
662 			iosize = 1048576.0;
663 
664 		ldeltas = ull_delta(old ? old->is_stats.nread : 0,
665 		    new->is_stats.nread);
666 		if (ldeltas) {
667 			krps = (double)ldeltas;
668 			krps /= etime;
669 			krps /= iosize;
670 		} else
671 			krps = 0.0;
672 
673 		ldeltas = ull_delta(old ? old->is_stats.nwritten : 0,
674 		    new->is_stats.nwritten);
675 		if (ldeltas) {
676 			kwps = (double)ldeltas;
677 			kwps /= etime;
678 			kwps /= iosize;
679 		} else
680 			kwps = 0.0;
681 
682 		/*
683 		 * Blocks transferred per second
684 		 */
685 		kps = krps + kwps;
686 
687 		/*
688 		 * Average number of wait transactions waiting
689 		 */
690 		w_delta = hrtime_delta((u_longlong_t)
691 		    (old ? old->is_stats.wlentime : 0),
692 		    new->is_stats.wlentime);
693 		if (w_delta) {
694 			avw = (double)w_delta;
695 			avw /= hr_etime;
696 		} else
697 			avw = 0.0;
698 
699 		/*
700 		 * Average number of run transactions waiting
701 		 */
702 		r_delta = hrtime_delta(old ? old->is_stats.rlentime : 0,
703 		    new->is_stats.rlentime);
704 		if (r_delta) {
705 			avr = (double)r_delta;
706 			avr /= hr_etime;
707 		} else
708 			avr = 0.0;
709 
710 		/*
711 		 * Average wait service time in milliseconds
712 		 */
713 		if (tps > 0.0 && (avw != 0.0 || avr != 0.0)) {
714 			mtps = 1000.0 / tps;
715 			if (avw != 0.0)
716 				wserv = avw * mtps;
717 			else
718 				wserv = 0.0;
719 
720 			if (avr != 0.0)
721 				rserv = avr * mtps;
722 			else
723 				rserv = 0.0;
724 			serv = rserv + wserv;
725 		} else {
726 			rserv = 0.0;
727 			wserv = 0.0;
728 			serv = 0.0;
729 		}
730 
731 		/* % of time there is a transaction waiting for service */
732 		t_delta = hrtime_delta(old ? old->is_stats.wtime : 0,
733 		    new->is_stats.wtime);
734 		if (t_delta) {
735 			w_pct = (double)t_delta;
736 			w_pct /= hr_etime;
737 			w_pct *= 100.0;
738 
739 			/*
740 			 * Average the wait queue utilization over the
741 			 * the controller's devices, if this is a controller.
742 			 */
743 			if (new->is_type == IODEV_CONTROLLER)
744 				w_pct /= new->is_nr_children;
745 		} else
746 			w_pct = 0.0;
747 
748 		/* % of time there is a transaction running */
749 		t_delta = hrtime_delta(old ? old->is_stats.rtime : 0,
750 		    new->is_stats.rtime);
751 		if (t_delta) {
752 			r_pct = (double)t_delta;
753 			r_pct /= hr_etime;
754 			r_pct *= 100.0;
755 
756 			/*
757 			 * Average the percent busy over the controller's
758 			 * devices, if this is a controller.
759 			 */
760 			if (new->is_type == IODEV_CONTROLLER)
761 				w_pct /= new->is_nr_children;
762 		} else {
763 			r_pct = 0.0;
764 		}
765 
766 		/* % of time there is a transaction running */
767 		if (do_interval) {
768 			rps	*= etime;
769 			wps	*= etime;
770 			tps	*= etime;
771 			krps	*= etime;
772 			kwps	*= etime;
773 			kps	*= etime;
774 		}
775 	}
776 
777 	if (do_disk & (DISK_EXTENDED | DISK_ERRORS)) {
778 		if ((!do_conversions) && ((suppress_zero == 0) ||
779 		    ((do_disk & DISK_EXTENDED) == 0))) {
780 			if (do_raw == 0) {
781 				push_out("%-*.*s",
782 				    iodevs_nl, iodevs_nl, disk_name);
783 			} else {
784 				push_out(disk_name);
785 			}
786 		}
787 	}
788 
789 	/*
790 	 * The error counters are read first (if asked for and if they are
791 	 * available).
792 	 */
793 	bzero(err_counters, sizeof (err_counters));
794 	toterrs = 0;
795 	if (display_err_counters && (new->is_errors.ks_data != NULL)) {
796 		kstat_named_t	*knp;
797 		int		i;
798 
799 		knp = KSTAT_NAMED_PTR(&new->is_errors);
800 		for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++) {
801 			switch (knp[i].data_type) {
802 				case KSTAT_DATA_ULONG:
803 				case KSTAT_DATA_ULONGLONG:
804 					err_counters[i] = knp[i].value.ui32;
805 					toterrs += knp[i].value.ui32;
806 					break;
807 				default:
808 					break;
809 			}
810 		}
811 	}
812 
813 	switch (do_disk & DISK_IO_MASK) {
814 	case DISK_OLD:
815 		if (do_raw == 0)
816 			fstr = "%3.0f %3.0f %4.0f  ";
817 		else
818 			fstr = "%.0f,%.0f,%.0f";
819 		push_out(fstr, kps, tps, serv);
820 		break;
821 	case DISK_NEW:
822 		if (do_raw == 0)
823 			fstr = "%3.0f %3.0f %4.1f  ";
824 		else
825 			fstr = "%.0f,%.0f,%.1f";
826 		push_out(fstr, rps, wps, r_pct);
827 		break;
828 	case DISK_EXTENDED:
829 		if (suppress_zero) {
830 			if (fzero(rps) && fzero(wps) && fzero(krps) &&
831 			    fzero(kwps) && fzero(avw) && fzero(avr) &&
832 			    fzero(serv) && fzero(w_pct) && fzero(r_pct) &&
833 			    (toterrs == 0)) {
834 				doit = 0;
835 				display_err_counters = B_FALSE;
836 			} else if (do_conversions == 0) {
837 				if (do_raw == 0) {
838 					push_out("%-*.*s",
839 					    iodevs_nl, iodevs_nl, disk_name);
840 				} else {
841 					push_out(disk_name);
842 				}
843 			}
844 		}
845 		if (doit) {
846 			if (!do_conversions) {
847 				if (do_raw == 0) {
848 					fstr = " %6.1f %6.1f %6.1f %6.1f "
849 					    "%4.1f %4.1f %6.1f %3.0f "
850 					    "%3.0f ";
851 				} else {
852 					fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
853 					    "%.1f,%.0f,%.0f";
854 				}
855 				push_out(fstr, rps, wps, krps, kwps, avw, avr,
856 				    serv, w_pct, r_pct);
857 			} else {
858 				if (do_raw == 0) {
859 					fstr = " %6.1f %6.1f %6.1f %6.1f "
860 					    "%4.1f %4.1f %6.1f %6.1f "
861 					    "%3.0f %3.0f ";
862 				} else {
863 					fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
864 					    "%.1f,%.1f,%.0f,%.0f";
865 				}
866 				push_out(fstr, rps, wps, krps, kwps, avw, avr,
867 				    wserv, rserv, w_pct, r_pct);
868 			}
869 		}
870 		break;
871 	}
872 
873 	if (display_err_counters) {
874 		char	*efstr;
875 		int	i;
876 
877 		if (do_raw == 0) {
878 			if (do_disk == DISK_ERRORS)
879 				push_out(two_blanks);
880 			efstr = "%3u ";
881 		} else {
882 			efstr = "%u";
883 		}
884 
885 		for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++)
886 			push_out(efstr, err_counters[i]);
887 
888 		push_out(efstr, toterrs);
889 	}
890 
891 	if (suppress_zero == 0 || doit == 1) {
892 		if ((do_disk & (DISK_EXTENDED | DISK_ERRORS)) &&
893 		    do_conversions) {
894 			push_out("%s", disk_name);
895 			if (show_mountpts && new->is_dname) {
896 				mnt_t *mount_pt;
897 				char *lu;
898 				char *dnlu;
899 				char lub[SMALL_SCRATCH_BUFLEN];
900 
901 				lu = strrchr(new->is_dname, '/');
902 				if (lu) {
903 					/* only the part after a possible '/' */
904 					dnlu = strrchr(disk_name, '/');
905 					if (dnlu != NULL &&
906 					    strcmp(dnlu, lu) == 0)
907 						lu = new->is_dname;
908 					else {
909 						*lu = 0;
910 						(void) strcpy(lub,
911 						    new->is_dname);
912 						*lu = '/';
913 						(void) strcat(lub, "/");
914 						(void) strcat(lub,
915 						    disk_name);
916 						lu = lub;
917 					}
918 				} else
919 					lu = disk_name;
920 				mount_pt = lookup_mntent_byname(lu);
921 				if (mount_pt) {
922 					if (do_raw == 0)
923 						push_out(" (%s)",
924 						    mount_pt->mount_point);
925 					else
926 						push_out("(%s)",
927 						    mount_pt->mount_point);
928 				}
929 			}
930 		}
931 	}
932 
933 	if ((do_disk & PRINT_VERTICAL) && show_disk_mode != SHOW_FIRST_ONLY)
934 		do_newline();
935 
936 	if (count != NULL)
937 		(*count)++;
938 }
939 
940 static void
941 usage(void)
942 {
943 	(void) fprintf(stderr,
944 	    "Usage: iostat [-cCdDeEiImMnpPrstxXYz] "
945 	    " [-l n] [-T d|u] [disk ...] [interval [count]]\n"
946 	    "\t\t-c: 	report percentage of time system has spent\n"
947 	    "\t\t\tin user/system/dtrace/idle mode\n"
948 	    "\t\t-C: 	report disk statistics by controller\n"
949 	    "\t\t-d: 	display disk Kb/sec, transfers/sec, avg. \n"
950 	    "\t\t\tservice time in milliseconds  \n"
951 	    "\t\t-D: 	display disk reads/sec, writes/sec, \n"
952 	    "\t\t\tpercentage disk utilization \n"
953 	    "\t\t-e: 	report device error summary statistics\n"
954 	    "\t\t-E: 	report extended device error statistics\n"
955 	    "\t\t-i:	show device IDs for -E output\n"
956 	    "\t\t-I: 	report the counts in each interval,\n"
957 	    "\t\t\tinstead of rates, where applicable\n"
958 	    "\t\t-l n:	Limit the number of disks to n\n"
959 	    "\t\t-m: 	Display mount points (most useful with -p)\n"
960 	    "\t\t-M: 	Display data throughput in MB/sec "
961 	    "instead of Kb/sec\n"
962 	    "\t\t-n: 	convert device names to cXdYtZ format\n"
963 	    "\t\t-p: 	report per-partition disk statistics\n"
964 	    "\t\t-P: 	report per-partition disk statistics only,\n"
965 	    "\t\t\tno per-device disk statistics\n"
966 	    "\t\t-r: 	Display data in comma separated format\n"
967 	    "\t\t-s: 	Suppress state change messages\n"
968 	    "\t\t-T d|u	Display a timestamp in date (d) or unix "
969 	    "time_t (u)\n"
970 	    "\t\t-t: 	display chars read/written to terminals\n"
971 	    "\t\t-x: 	display extended disk statistics\n"
972 	    "\t\t-X: 	display I/O path statistics\n"
973 	    "\t\t-Y: 	display I/O path (I/T/L) statistics\n"
974 	    "\t\t-z: 	Suppress entries with all zero values\n");
975 	exit(1);
976 }
977 
978 /*ARGSUSED*/
979 static void
980 show_disk_errors(void *v1, void *v2, void *d)
981 {
982 	struct iodev_snapshot *disk = (struct iodev_snapshot *)v2;
983 	kstat_named_t *knp;
984 	size_t  col;
985 	int	i, len;
986 	char	*dev_name;
987 
988 	if (disk->is_errors.ks_ndata == 0)
989 		return;
990 	if (disk->is_type == IODEV_CONTROLLER)
991 		return;
992 
993 	dev_name = do_conversions ? disk->is_pretty : disk->is_name;
994 	dev_name = dev_name ? dev_name : disk->is_name;
995 
996 	len = strlen(dev_name);
997 	if (len > 20)
998 		push_out("%s ", dev_name);
999 	else if (len > 16)
1000 		push_out("%-20.20s ", dev_name);
1001 	else {
1002 		if (do_conversions)
1003 			push_out("%-16.16s ", dev_name);
1004 		else
1005 			push_out("%-9.9s ", dev_name);
1006 	}
1007 	col = 0;
1008 
1009 	knp = KSTAT_NAMED_PTR(&disk->is_errors);
1010 	for (i = 0; i < disk->is_errors.ks_ndata; i++) {
1011 		/* skip kstats that the driver did not kstat_named_init */
1012 		if (knp[i].name[0] == 0)
1013 			continue;
1014 
1015 		col += strlen(knp[i].name);
1016 
1017 		switch (knp[i].data_type) {
1018 			case KSTAT_DATA_CHAR:
1019 			case KSTAT_DATA_STRING:
1020 				if ((strcmp(knp[i].name, "Serial No") == 0) &&
1021 				    do_devid) {
1022 					if (disk->is_devid) {
1023 						push_out("Device Id: %s ",
1024 						    disk->is_devid);
1025 						col += strlen(disk->is_devid);
1026 					} else {
1027 						push_out("Device Id: ");
1028 					}
1029 
1030 					break;
1031 				}
1032 				if (knp[i].data_type == KSTAT_DATA_CHAR) {
1033 					push_out("%s: %-.16s ", knp[i].name,
1034 					    &knp[i].value.c[0]);
1035 					col += strnlen(&knp[i].value.c[0], 16);
1036 				} else {
1037 					push_out("%s: %s ", knp[i].name,
1038 					    KSTAT_NAMED_STR_PTR(&knp[i]));
1039 					col +=
1040 					    KSTAT_NAMED_STR_BUFLEN(&knp[i]) - 1;
1041 				}
1042 				break;
1043 			case KSTAT_DATA_ULONG:
1044 				push_out("%s: %u ", knp[i].name,
1045 				    knp[i].value.ui32);
1046 				col += 4;
1047 				break;
1048 			case KSTAT_DATA_ULONGLONG:
1049 				if (strcmp(knp[i].name, "Size") == 0) {
1050 					do_newline();
1051 					push_out("%s: %2.2fGB <%llu bytes>",
1052 					    knp[i].name,
1053 					    (float)knp[i].value.ui64 /
1054 					    DISK_GIGABYTE,
1055 					    knp[i].value.ui64);
1056 					do_newline();
1057 					col = 0;
1058 					break;
1059 				}
1060 				push_out("%s: %u ", knp[i].name,
1061 				    knp[i].value.ui32);
1062 				col += 4;
1063 				break;
1064 			}
1065 		if ((col >= 62) || (i == 2)) {
1066 			do_newline();
1067 			col = 0;
1068 		}
1069 	}
1070 	if (col > 0) {
1071 		do_newline();
1072 	}
1073 	do_newline();
1074 }
1075 
1076 void
1077 do_args(int argc, char **argv)
1078 {
1079 	int 		c;
1080 	int 		errflg = 0;
1081 	extern char 	*optarg;
1082 	extern int 	optind;
1083 
1084 	while ((c = getopt(argc, argv, "tdDxXYCciIpPnmMeEszrT:l:")) != EOF)
1085 		switch (c) {
1086 		case 't':
1087 			do_tty++;
1088 			break;
1089 		case 'd':
1090 			do_disk |= DISK_OLD;
1091 			break;
1092 		case 'D':
1093 			do_disk |= DISK_NEW;
1094 			break;
1095 		case 'x':
1096 			do_disk |= DISK_EXTENDED;
1097 			break;
1098 		case 'X':
1099 			if (do_disk & DISK_IOPATH_LTI)
1100 				errflg++;	/* -Y already used */
1101 			else
1102 				do_disk |= DISK_IOPATH_LI;
1103 			break;
1104 		case 'Y':
1105 			if (do_disk & DISK_IOPATH_LI)
1106 				errflg++;	/* -X already used */
1107 			else
1108 				do_disk |= DISK_IOPATH_LTI;
1109 			break;
1110 		case 'C':
1111 			do_controller++;
1112 			break;
1113 		case 'c':
1114 			do_cpu++;
1115 			break;
1116 		case 'I':
1117 			do_interval++;
1118 			break;
1119 		case 'p':
1120 			do_partitions++;
1121 			break;
1122 		case 'P':
1123 			do_partitions_only++;
1124 			break;
1125 		case 'n':
1126 			do_conversions++;
1127 			break;
1128 		case 'M':
1129 			do_megabytes++;
1130 			break;
1131 		case 'e':
1132 			do_disk |= DISK_ERRORS;
1133 			break;
1134 		case 'E':
1135 			do_disk |= DISK_EXTENDED_ERRORS;
1136 			break;
1137 		case 'i':
1138 			do_devid = 1;
1139 			break;
1140 		case 's':
1141 			suppress_state = 1;
1142 			break;
1143 		case 'z':
1144 			suppress_zero = 1;
1145 			break;
1146 		case 'm':
1147 			show_mountpts = 1;
1148 			break;
1149 		case 'T':
1150 			if (optarg) {
1151 				if (*optarg == 'u')
1152 					timestamp_fmt = UDATE;
1153 				else if (*optarg == 'd')
1154 					timestamp_fmt = DDATE;
1155 				else
1156 					errflg++;
1157 			} else {
1158 				errflg++;
1159 			}
1160 			break;
1161 		case 'r':
1162 			do_raw = 1;
1163 			break;
1164 		case 'l':
1165 			df.if_max_iodevs = safe_strtoi(optarg, "invalid limit");
1166 			if (df.if_max_iodevs < 1)
1167 				usage();
1168 			break;
1169 		case '?':
1170 			errflg++;
1171 	}
1172 
1173 	if ((do_disk & DISK_OLD) && (do_disk & DISK_NEW)) {
1174 		(void) fprintf(stderr, "-d and -D are incompatible.\n");
1175 		usage();
1176 	}
1177 
1178 	if (errflg) {
1179 		usage();
1180 	}
1181 
1182 	/* if no output classes explicity specified, use defaults */
1183 	if (do_tty == 0 && do_disk == 0 && do_cpu == 0)
1184 		do_tty = do_cpu = 1, do_disk = DISK_OLD;
1185 
1186 	/*
1187 	 * multi-path options (-X, -Y) without a specific vertical
1188 	 * output format (-x, -e, -E) imply extended -x format
1189 	 */
1190 	if ((do_disk & (DISK_IOPATH_LI | DISK_IOPATH_LTI)) &&
1191 	    !(do_disk & PRINT_VERTICAL))
1192 		do_disk |= DISK_EXTENDED;
1193 
1194 	/*
1195 	 * If conflicting options take the preferred
1196 	 * -D and -x result in -x
1197 	 * -d or -D and -e or -E gives only whatever -d or -D was specified
1198 	 */
1199 	if ((do_disk & DISK_EXTENDED) && (do_disk & DISK_NORMAL))
1200 		do_disk &= ~DISK_NORMAL;
1201 	if ((do_disk & DISK_NORMAL) && (do_disk & DISK_ERROR_MASK))
1202 		do_disk &= ~DISK_ERROR_MASK;
1203 
1204 	/* nfs, tape, always shown */
1205 	df.if_allowed_types = IODEV_NFS | IODEV_TAPE;
1206 
1207 	/*
1208 	 * If limit == 0 then no command line limit was set, else if any of
1209 	 * the flags that cause unlimited disks were not set,
1210 	 * use the default of 4
1211 	 */
1212 	if (df.if_max_iodevs == 0) {
1213 		df.if_max_iodevs = DEFAULT_LIMIT;
1214 		df.if_skip_floppy = 1;
1215 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS |
1216 		    DISK_EXTENDED_ERRORS)) {
1217 			df.if_max_iodevs = UNLIMITED_IODEVS;
1218 			df.if_skip_floppy = 0;
1219 		}
1220 	}
1221 	if (do_disk) {
1222 		size_t count = 0;
1223 		size_t i = optind;
1224 
1225 		while (i < argc && !isdigit(argv[i][0])) {
1226 			count++;
1227 			i++;
1228 		}
1229 
1230 		/*
1231 		 * "Note:  disks  explicitly  requested
1232 		 * are not subject to this disk limit"
1233 		 */
1234 		if ((count > df.if_max_iodevs) ||
1235 		    (count && (df.if_max_iodevs == UNLIMITED_IODEVS)))
1236 			df.if_max_iodevs = count;
1237 
1238 		df.if_names = safe_alloc(count * sizeof (char *));
1239 		(void) memset(df.if_names, 0, count * sizeof (char *));
1240 
1241 		df.if_nr_names = 0;
1242 		while (optind < argc && !isdigit(argv[optind][0]))
1243 			df.if_names[df.if_nr_names++] = argv[optind++];
1244 	}
1245 	if (optind < argc) {
1246 		interval = safe_strtoi(argv[optind], "invalid interval");
1247 		if (interval < 1)
1248 			fail(0, "invalid interval");
1249 		optind++;
1250 
1251 		if (optind < argc) {
1252 			iter = safe_strtoi(argv[optind], "invalid count");
1253 			if (iter < 1)
1254 				fail(0, "invalid count");
1255 			optind++;
1256 		}
1257 	}
1258 	if (interval == 0)
1259 		iter = 1;
1260 	if (optind < argc)
1261 		usage();
1262 }
1263 
1264 /*
1265  * Driver for doing the extended header formatting. Will produce
1266  * the function stack needed to output an extended header based
1267  * on the options selected.
1268  */
1269 
1270 void
1271 do_format(void)
1272 {
1273 	char	header[SMALL_SCRATCH_BUFLEN];
1274 	char 	ch;
1275 	char 	iosz;
1276 	const char    *fstr;
1277 
1278 	disk_header[0] = 0;
1279 	ch = (do_interval ? 'i' : 's');
1280 	iosz = (do_megabytes ? 'M' : 'k');
1281 	if (do_disk & DISK_ERRORS) {
1282 		if (do_raw == 0) {
1283 			(void) sprintf(header, "s/w h/w trn tot ");
1284 		} else
1285 			(void) sprintf(header, "s/w,h/w,trn,tot");
1286 	} else
1287 		*header = NULL;
1288 	switch (do_disk & DISK_IO_MASK) {
1289 		case DISK_OLD:
1290 			if (do_raw == 0)
1291 				fstr = "%cp%c tp%c serv  ";
1292 			else
1293 				fstr = "%cp%c,tp%c,serv";
1294 			(void) snprintf(disk_header, sizeof (disk_header),
1295 			    fstr, iosz, ch, ch);
1296 			break;
1297 		case DISK_NEW:
1298 			if (do_raw == 0)
1299 				fstr = "rp%c wp%c util  ";
1300 			else
1301 				fstr = "%rp%c,wp%c,util";
1302 			(void) snprintf(disk_header, sizeof (disk_header),
1303 			    fstr, ch, ch);
1304 			break;
1305 		case DISK_EXTENDED:
1306 			/* This is -x option */
1307 			if (!do_conversions) {
1308 				/* without -n option */
1309 				if (do_raw == 0) {
1310 					/* without -r option */
1311 					(void) snprintf(disk_header,
1312 					    sizeof (disk_header),
1313 					    "%-*.*s    r/%c    w/%c   "
1314 					    "%cr/%c   %cw/%c wait actv  "
1315 					    "svc_t  %%%%w  %%%%b %s",
1316 					    iodevs_nl, iodevs_nl, "device",
1317 					    ch, ch, iosz, ch, iosz, ch, header);
1318 				} else {
1319 					/* with -r option */
1320 					(void) snprintf(disk_header,
1321 					    sizeof (disk_header),
1322 					    "device,r/%c,w/%c,%cr/%c,%cw/%c,"
1323 					    "wait,actv,svc_t,%%%%w,"
1324 					    "%%%%b,%s",
1325 					    ch, ch, iosz, ch, iosz, ch, header);
1326 				}
1327 			} else {
1328 				/* with -n option */
1329 				if (do_raw == 0) {
1330 					fstr = "    r/%c    w/%c   %cr/%c   "
1331 					    "%cw/%c wait actv wsvc_t asvc_t  "
1332 					    "%%%%w  %%%%b %sdevice";
1333 				} else {
1334 					fstr = "r/%c,w/%c,%cr/%c,%cw/%c,"
1335 					    "wait,actv,wsvc_t,asvc_t,"
1336 					    "%%%%w,%%%%b,%sdevice";
1337 				}
1338 				(void) snprintf(disk_header,
1339 				    sizeof (disk_header),
1340 				    fstr, ch, ch, iosz, ch, iosz,
1341 				    ch, header);
1342 			}
1343 			break;
1344 		default:
1345 			break;
1346 	}
1347 
1348 	/* do DISK_ERRORS header (already added above for DISK_EXTENDED) */
1349 	if ((do_disk & DISK_ERRORS) &&
1350 	    ((do_disk & DISK_IO_MASK) != DISK_EXTENDED)) {
1351 		if (!do_conversions) {
1352 			if (do_raw == 0)
1353 				(void) snprintf(disk_header,
1354 				    sizeof (disk_header), "%-*.*s  %s",
1355 				    iodevs_nl, iodevs_nl, "device", header);
1356 			else
1357 				(void) snprintf(disk_header,
1358 				    sizeof (disk_header), "device,%s", header);
1359 		} else {
1360 			if (do_raw == 0) {
1361 				(void) snprintf(disk_header,
1362 				    sizeof (disk_header),
1363 				    "  %sdevice", header);
1364 			} else {
1365 				(void) snprintf(disk_header,
1366 				    sizeof (disk_header),
1367 				    "%s,device", header);
1368 			}
1369 		}
1370 	} else {
1371 		/*
1372 		 * Need to subtract two characters for the % escape in
1373 		 * the string.
1374 		 */
1375 		dh_len = strlen(disk_header) - 2;
1376 	}
1377 
1378 	/*
1379 	 * -n *and* (-E *or* -e *or* -x)
1380 	 */
1381 	if (do_conversions && (do_disk & PRINT_VERTICAL)) {
1382 		if (do_tty)
1383 			setup(print_tty_hdr1);
1384 		if (do_cpu)
1385 			setup(print_cpu_hdr1);
1386 		if (do_tty || do_cpu)
1387 			setup(do_newline);
1388 		if (do_tty)
1389 			setup(print_tty_hdr2);
1390 		if (do_cpu)
1391 			setup(print_cpu_hdr2);
1392 		if (do_tty || do_cpu)
1393 			setup(do_newline);
1394 		if (do_tty)
1395 			setup(print_tty_data);
1396 		if (do_cpu)
1397 			setup(print_cpu_data);
1398 		if (do_tty || do_cpu)
1399 			setup(do_newline);
1400 		printxhdr();
1401 
1402 		setup(show_all_disks);
1403 	} else {
1404 		/*
1405 		 * These unholy gymnastics are necessary to place CPU/tty
1406 		 * data to the right of the disks/errors for the first
1407 		 * line in vertical mode.
1408 		 */
1409 		if (do_disk & PRINT_VERTICAL) {
1410 			printxhdr();
1411 
1412 			setup(show_first_disk);
1413 			if (do_tty)
1414 				setup(print_tty_data);
1415 			if (do_cpu)
1416 				setup(print_cpu_data);
1417 			setup(do_newline);
1418 
1419 			setup(show_other_disks);
1420 		} else {
1421 			setup(hdrout);
1422 			if (do_tty)
1423 				setup(print_tty_data);
1424 			setup(show_all_disks);
1425 			if (do_cpu)
1426 				setup(print_cpu_data);
1427 		}
1428 
1429 		setup(do_newline);
1430 	}
1431 	if (do_disk & DISK_EXTENDED_ERRORS)
1432 		setup(disk_errors);
1433 }
1434 
1435 /*
1436  * Add a new function to the list of functions
1437  * for this invocation. Once on the stack the
1438  * function is never removed nor does its place
1439  * change.
1440  */
1441 void
1442 setup(void (*nfunc)(void))
1443 {
1444 	format_t *tmp;
1445 
1446 	tmp = safe_alloc(sizeof (format_t));
1447 	tmp->nfunc = nfunc;
1448 	tmp->next = 0;
1449 	if (formatter_end)
1450 		formatter_end->next = tmp;
1451 	else
1452 		formatter_list = tmp;
1453 	formatter_end = tmp;
1454 
1455 }
1456 
1457 /*
1458  * The functions after this comment are devoted to printing
1459  * various parts of the header. They are selected based on the
1460  * options provided when the program was invoked. The functions
1461  * are either directly invoked in printhdr() or are indirectly
1462  * invoked by being placed on the list of functions used when
1463  * extended headers are used.
1464  */
1465 void
1466 print_tty_hdr1(void)
1467 {
1468 	char *fstr;
1469 	char *dstr;
1470 
1471 	if (do_raw == 0) {
1472 		fstr = "%10.10s";
1473 		dstr = "tty    ";
1474 	} else {
1475 		fstr = "%s";
1476 		dstr = "tty";
1477 	}
1478 	push_out(fstr, dstr);
1479 }
1480 
1481 void
1482 print_tty_hdr2(void)
1483 {
1484 	if (do_raw == 0)
1485 		push_out("%-10.10s", " tin tout");
1486 	else
1487 		push_out("tin,tout");
1488 }
1489 
1490 void
1491 print_cpu_hdr1(void)
1492 {
1493 	char *dstr;
1494 
1495 	if (do_raw == 0)
1496 		dstr = "     cpu";
1497 	else
1498 		dstr = "cpu";
1499 	push_out(dstr);
1500 }
1501 
1502 void
1503 print_cpu_hdr2(void)
1504 {
1505 	char *dstr;
1506 
1507 	if (do_raw == 0)
1508 		dstr = " us sy dt id";
1509 	else
1510 		dstr = "us,sy,dt,id";
1511 	push_out(dstr);
1512 }
1513 
1514 /*
1515  * Assumption is that tty data is always first - no need for raw mode leading
1516  * comma.
1517  */
1518 void
1519 print_tty_data(void)
1520 {
1521 	char *fstr;
1522 	uint64_t deltas;
1523 	double raw;
1524 	double outch;
1525 	kstat_t *oldks = NULL;
1526 
1527 	if (oldss)
1528 		oldks = &oldss->s_sys.ss_agg_sys;
1529 
1530 	if (do_raw == 0)
1531 		fstr = " %3.0f %4.0f ";
1532 	else
1533 		fstr = "%.0f,%.0f";
1534 	deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "rawch");
1535 	raw = deltas;
1536 	raw /= getime;
1537 	deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "outch");
1538 	outch = deltas;
1539 	outch /= getime;
1540 	push_out(fstr, raw, outch);
1541 }
1542 
1543 /*
1544  * Write out CPU data
1545  */
1546 void
1547 print_cpu_data(void)
1548 {
1549 	char *fstr;
1550 	uint64_t idle;
1551 	uint64_t user;
1552 	uint64_t kern;
1553 	uint64_t dtrace;
1554 	uint64_t nsec_elapsed;
1555 	kstat_t *oldks = NULL;
1556 
1557 	if (oldss)
1558 		oldks = &oldss->s_sys.ss_agg_sys;
1559 
1560 	if (do_raw == 0)
1561 		fstr = " %2.0f %2.0f %2.0f %2.0f";
1562 	else
1563 		fstr = "%.0f,%.0f,%.0f,%.0f";
1564 
1565 	idle = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_idle");
1566 	user = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_user");
1567 	kern = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_kernel");
1568 	dtrace = kstat_delta(oldks, &newss->s_sys.ss_agg_sys,
1569 	    "cpu_nsec_dtrace");
1570 	nsec_elapsed = newss->s_sys.ss_agg_sys.ks_snaptime -
1571 	    (oldks == NULL ? 0 : oldks->ks_snaptime);
1572 	push_out(fstr, user * percent, kern * percent,
1573 	    dtrace * 100.0 / nsec_elapsed / newss->s_nr_active_cpus,
1574 	    idle * percent);
1575 }
1576 
1577 /*
1578  * Emit the appropriate header.
1579  */
1580 void
1581 hdrout(void)
1582 {
1583 	if (do_raw == 0) {
1584 		if (--tohdr == 0)
1585 			printhdr(0);
1586 	} else if (hdr_out == 0) {
1587 		printhdr(0);
1588 		hdr_out = 1;
1589 	}
1590 }
1591 
1592 /*
1593  * Write out disk errors when -E is specified.
1594  */
1595 void
1596 disk_errors(void)
1597 {
1598 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk_errors, NULL);
1599 }
1600 
1601 void
1602 show_first_disk(void)
1603 {
1604 	int count = 0;
1605 
1606 	show_disk_mode = SHOW_FIRST_ONLY;
1607 
1608 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1609 }
1610 
1611 void
1612 show_other_disks(void)
1613 {
1614 	int count = 0;
1615 
1616 	show_disk_mode = SHOW_SECOND_ONWARDS;
1617 
1618 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1619 }
1620 
1621 void
1622 show_all_disks(void)
1623 {
1624 	int count = 0;
1625 
1626 	show_disk_mode = SHOW_ALL;
1627 
1628 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1629 }
1630 
1631 /*
1632  * Write a newline out and clear the lineout flag.
1633  */
1634 static void
1635 do_newline(void)
1636 {
1637 	if (lineout) {
1638 		(void) putchar('\n');
1639 		lineout = 0;
1640 	}
1641 }
1642 
1643 /*
1644  * Generalized printf function that determines what extra
1645  * to print out if we're in raw mode. At this time we
1646  * don't care about errors.
1647  */
1648 static void
1649 push_out(const char *message, ...)
1650 {
1651 	va_list args;
1652 
1653 	va_start(args, message);
1654 	if (do_raw && lineout == 1)
1655 		(void) putchar(',');
1656 	(void) vprintf(message, args);
1657 	va_end(args);
1658 	lineout = 1;
1659 }
1660 
1661 /*
1662  * Emit the header string when -e is specified.
1663  */
1664 static void
1665 print_err_hdr(void)
1666 {
1667 	char obuf[SMALL_SCRATCH_BUFLEN];
1668 
1669 	if (do_raw) {
1670 		push_out("errors");
1671 		return;
1672 	}
1673 
1674 	if (do_conversions == 0) {
1675 		if (!(do_disk & DISK_EXTENDED)) {
1676 			(void) snprintf(obuf, sizeof (obuf),
1677 			    "%11s", one_blank);
1678 			push_out(obuf);
1679 		}
1680 	} else if (do_disk == DISK_ERRORS)
1681 		push_out(two_blanks);
1682 	else
1683 		push_out(one_blank);
1684 	push_out("---- errors --- ");
1685 }
1686 
1687 /*
1688  * Emit the header string when -e is specified.
1689  */
1690 static void
1691 print_disk_header(void)
1692 {
1693 	push_out(disk_header);
1694 }
1695 
1696 /*
1697  * No, UINTMAX_MAX isn't the right thing here since
1698  * it is #defined to be either INT32_MAX or INT64_MAX
1699  * depending on the whether _LP64 is defined.
1700  *
1701  * We want to handle the odd future case of having
1702  * ulonglong_t be more than 64 bits but we have
1703  * no nice #define MAX value we can drop in place
1704  * without having to change this code in the future.
1705  */
1706 
1707 u_longlong_t
1708 ull_delta(u_longlong_t old, u_longlong_t new)
1709 {
1710 	if (new >= old)
1711 		return (new - old);
1712 	else
1713 		return ((UINT64_MAX - old) + new + 1);
1714 }
1715 
1716 /*
1717  * Take the difference of an unsigned 32
1718  * bit int attempting to cater for
1719  * overflow.
1720  */
1721 uint_t
1722 u32_delta(uint_t old, uint_t new)
1723 {
1724 	if (new >= old)
1725 		return (new - old);
1726 	else
1727 		return ((UINT32_MAX - old) + new + 1);
1728 }
1729 
1730 /*
1731  * This is exactly what is needed for standard iostat output,
1732  * but make sure to use it only for that
1733  */
1734 #define	EPSILON	(0.1)
1735 static int
1736 fzero(double value)
1737 {
1738 	return (value >= 0.0 && value < EPSILON);
1739 }
1740 
1741 static int
1742 safe_strtoi(char const *val, char *errmsg)
1743 {
1744 	char *end;
1745 	long tmp;
1746 
1747 	errno = 0;
1748 	tmp = strtol(val, &end, 10);
1749 	if (*end != '\0' || errno)
1750 		fail(0, "%s %s", errmsg, val);
1751 	return ((int)tmp);
1752 }
1753