xref: /illumos-gate/usr/src/cmd/stat/iostat/iostat.c (revision 66597161e2ba69a84fa138bce7ac02a1e6b9746c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * rewritten from UCB 4.13 83/09/25
27  * rewritten from SunOS 4.1 SID 1.18 89/10/06
28  */
29 /*
30  * Copyright (c) 2012 by Delphix. All rights reserved.
31  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
32  * Copyright 2016 James S. Blachly, MD. All rights reserved.
33  */
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdarg.h>
38 #include <ctype.h>
39 #include <unistd.h>
40 #include <memory.h>
41 #include <errno.h>
42 #include <string.h>
43 #include <signal.h>
44 #include <sys/types.h>
45 #include <time.h>
46 #include <sys/time.h>
47 #include <sys/sysinfo.h>
48 #include <inttypes.h>
49 #include <strings.h>
50 #include <sys/systeminfo.h>
51 #include <kstat.h>
52 #include <locale.h>
53 
54 #include "dsr.h"
55 #include "statcommon.h"
56 
57 #define	DISK_OLD		0x0001
58 #define	DISK_NEW		0x0002
59 #define	DISK_EXTENDED		0x0004
60 #define	DISK_ERRORS		0x0008
61 #define	DISK_EXTENDED_ERRORS	0x0010
62 #define	DISK_IOPATH_LI		0x0020	/* LunInitiator */
63 #define	DISK_IOPATH_LTI		0x0040	/* LunTargetInitiator */
64 
65 #define	DISK_NORMAL		(DISK_OLD | DISK_NEW)
66 #define	DISK_IO_MASK		(DISK_OLD | DISK_NEW | DISK_EXTENDED)
67 #define	DISK_ERROR_MASK		(DISK_ERRORS | DISK_EXTENDED_ERRORS)
68 #define	PRINT_VERTICAL		(DISK_ERROR_MASK | DISK_EXTENDED)
69 
70 #define	REPRINT 19
71 
72 #define	NUMBER_OF_ERR_COUNTERS	3
73 
74 /*
75  * It's really a pseudo-gigabyte. We use 1000000000 bytes so that the disk
76  * labels don't look bad. 1GB is really 1073741824 bytes.
77  */
78 #define	DISK_GIGABYTE   1000000000.0
79 
80 /*
81  * Function desciptor to be called when extended
82  * headers are used.
83  */
84 typedef struct formatter {
85 	void (*nfunc)(void);
86 	struct formatter *next;
87 } format_t;
88 
89 /*
90  * Used to get formatting right when printing tty/cpu
91  * data to the right of disk data
92  */
93 enum show_disk_mode {
94 	SHOW_FIRST_ONLY,
95 	SHOW_SECOND_ONWARDS,
96 	SHOW_ALL
97 };
98 
99 enum show_disk_mode show_disk_mode = SHOW_ALL;
100 
101 char *cmdname = "iostat";
102 int caught_cont = 0;
103 
104 static char one_blank[] = " ";
105 static char two_blanks[] = "  ";
106 
107 /*
108  * count for number of lines to be emitted before a header is
109  * shown again. Only used for the basic format.
110  */
111 static	uint_t	tohdr = 1;
112 
113 /*
114  * If we're in raw format, have we printed a header? We only do it
115  * once for raw but we emit it every REPRINT lines in non-raw format.
116  * This applies only for the basic header. The extended header is
117  * done only once in both formats.
118  */
119 static	uint_t	hdr_out;
120 
121 /*
122  * Flags representing arguments from command line
123  */
124 static	uint_t	do_tty;			/* show tty info (-t) */
125 static	uint_t	do_disk;		/* show disk info per selected */
126 					/* format (-d, -D, -e, -E, -x -X -Y) */
127 static	uint_t	do_cpu;			/* show cpu info (-c) */
128 static	uint_t	do_interval;		/* do intervals (-I) */
129 static	int	do_partitions;		/* per-partition stats (-p) */
130 static	int	do_partitions_only;	/* per-partition stats only (-P) */
131 					/* no per-device stats for disks */
132 static	uint_t	do_conversions;		/* display disks as cXtYdZ (-n) */
133 static	uint_t	do_megabytes;		/* display data in MB/sec (-M) */
134 static  uint_t	do_controller;		/* display controller info (-C) */
135 static  uint_t	do_raw;			/* emit raw format (-r) */
136 static	uint_t	timestamp_fmt = NODATE;	/* timestamp  each display (-T) */
137 static	uint_t	do_devid;		/* -E should show devid */
138 
139 /*
140  * Default number of disk drives to be displayed in basic format
141  */
142 #define	DEFAULT_LIMIT	4
143 
144 struct iodev_filter df;
145 
146 static  uint_t	suppress_state;		/* skip state change messages */
147 static	uint_t	suppress_zero;		/* skip zero valued lines */
148 static  uint_t	show_mountpts;		/* show mount points */
149 static	int	interval;		/* interval (seconds) to output */
150 static	int	iter;			/* iterations from command line */
151 
152 #define	SMALL_SCRATCH_BUFLEN	MAXNAMELEN
153 
154 static int	iodevs_nl;		/* name field width */
155 #define	IODEVS_NL_MIN		6	/* not too thin for "device" */
156 #define	IODEVS_NL_MAX		24	/* but keep full width under 80 */
157 
158 static	char	disk_header[132];
159 static	uint_t	dh_len;			/* disk header length for centering */
160 static  int	lineout;		/* data waiting to be printed? */
161 
162 static struct snapshot *newss;
163 static struct snapshot *oldss;
164 static	double	getime;			/* elapsed time */
165 static	double	percent;		/* 100 / etime */
166 
167 /*
168  * List of functions to be called which will construct the desired output
169  */
170 static format_t	*formatter_list;
171 static format_t *formatter_end;
172 
173 static u_longlong_t	ull_delta(u_longlong_t, u_longlong_t);
174 static uint_t	u32_delta(uint_t, uint_t);
175 static void setup(void (*nfunc)(void));
176 static void print_tty_hdr1(void);
177 static void print_tty_hdr2(void);
178 static void print_cpu_hdr1(void);
179 static void print_cpu_hdr2(void);
180 static void print_tty_data(void);
181 static void print_cpu_data(void);
182 static void print_err_hdr(void);
183 static void print_disk_header(void);
184 static void hdrout(void);
185 static void disk_errors(void);
186 static void do_newline(void);
187 static void push_out(const char *, ...);
188 static void printhdr(int);
189 static void printxhdr(void);
190 static void usage(void);
191 static void do_args(int, char **);
192 static void do_format(void);
193 static void show_all_disks(void);
194 static void show_first_disk(void);
195 static void show_other_disks(void);
196 static void show_disk_errors(void *, void *, void *);
197 static void write_core_header(void);
198 static int  fzero(double value);
199 static int  safe_strtoi(char const *val, char *errmsg);
200 
201 int
202 main(int argc, char **argv)
203 {
204 	enum snapshot_types types = SNAP_SYSTEM;
205 	kstat_ctl_t *kc;
206 	long hz;
207 	int forever;
208 	hrtime_t start_n;
209 	hrtime_t period_n = 0;
210 
211 	(void) setlocale(LC_ALL, "");
212 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
213 #define	TEXT_DOMAIN "SYS_TEST"		/* Use this only if it weren't */
214 #endif
215 	(void) textdomain(TEXT_DOMAIN);
216 
217 	do_args(argc, argv);
218 
219 	/*
220 	 * iostat historically showed CPU changes, even though
221 	 * it doesn't provide much useful information
222 	 */
223 	types |= SNAP_CPUS;
224 
225 	if (do_disk)
226 		types |= SNAP_IODEVS;
227 
228 	if (do_disk && !do_partitions_only)
229 		df.if_allowed_types |= IODEV_DISK;
230 	if (do_disk & DISK_IOPATH_LI) {
231 		df.if_allowed_types |= IODEV_IOPATH_LTI;
232 		types |= SNAP_IOPATHS_LI;
233 	}
234 	if (do_disk & DISK_IOPATH_LTI) {
235 		df.if_allowed_types |= IODEV_IOPATH_LTI;
236 		types |= SNAP_IOPATHS_LTI;
237 	}
238 	if (do_disk & DISK_ERROR_MASK)
239 		types |= SNAP_IODEV_ERRORS;
240 	if (do_partitions || do_partitions_only)
241 		df.if_allowed_types |= IODEV_PARTITION;
242 	if (do_conversions)
243 		types |= SNAP_IODEV_PRETTY;
244 	if (do_devid)
245 		types |= SNAP_IODEV_DEVID;
246 	if (do_controller) {
247 		if (!(do_disk & PRINT_VERTICAL) ||
248 		    (do_disk & DISK_EXTENDED_ERRORS))
249 			fail(0, "-C can only be used with -e or -x.");
250 		types |= SNAP_CONTROLLERS;
251 		df.if_allowed_types |= IODEV_CONTROLLER;
252 	}
253 
254 	hz = sysconf(_SC_CLK_TCK);
255 
256 	/*
257 	 * Undocumented behavior - sending a SIGCONT will result
258 	 * in a new header being emitted. Used only if we're not
259 	 * doing extended headers. This is a historical
260 	 * artifact.
261 	 */
262 	if (!(do_disk & PRINT_VERTICAL))
263 		(void) signal(SIGCONT, printhdr);
264 
265 	if (interval)
266 		period_n = (hrtime_t)interval * NANOSEC;
267 
268 	kc = open_kstat();
269 	if (interval)
270 		start_n = gethrtime();
271 	newss = acquire_snapshot(kc, types, &df);
272 
273 	/* compute width of "device" field */
274 	iodevs_nl = newss->s_iodevs_is_name_maxlen;
275 	iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
276 	    IODEVS_NL_MIN : iodevs_nl;
277 	iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
278 	    IODEVS_NL_MAX : iodevs_nl;
279 
280 	do_format();
281 
282 	forever = (iter == 0);
283 	do {
284 		if (do_conversions && show_mountpts)
285 			do_mnttab();
286 
287 		if (do_tty || do_cpu) {
288 			kstat_t *oldks;
289 			oldks = oldss ? &oldss->s_sys.ss_agg_sys : NULL;
290 			getime = cpu_ticks_delta(oldks,
291 			    &newss->s_sys.ss_agg_sys);
292 			percent = (getime > 0.0) ? 100.0 / getime : 0.0;
293 			getime = (getime / nr_active_cpus(newss)) / hz;
294 			if (getime == 0.0)
295 				getime = (double)interval;
296 			if (getime == 0.0 || do_interval)
297 				getime = 1.0;
298 		}
299 
300 		if (formatter_list) {
301 			format_t *tmp;
302 			tmp = formatter_list;
303 
304 			if (timestamp_fmt != NODATE)
305 				print_timestamp(timestamp_fmt);
306 
307 			while (tmp) {
308 				(tmp->nfunc)();
309 				tmp = tmp->next;
310 			}
311 			(void) fflush(stdout);
312 		}
313 
314 		/* only remaining/doing a single iteration, we are done */
315 		if (iter == 1)
316 			continue;
317 
318 		if (interval > 0)
319 			/* Have a kip */
320 			sleep_until(&start_n, period_n, forever, &caught_cont);
321 
322 		free_snapshot(oldss);
323 		oldss = newss;
324 		newss = acquire_snapshot(kc, types, &df);
325 		iodevs_nl = (newss->s_iodevs_is_name_maxlen > iodevs_nl) ?
326 		    newss->s_iodevs_is_name_maxlen : iodevs_nl;
327 		iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
328 		    IODEVS_NL_MIN : iodevs_nl;
329 		iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
330 		    IODEVS_NL_MAX : iodevs_nl;
331 
332 		if (!suppress_state)
333 			snapshot_report_changes(oldss, newss);
334 
335 		/* if config changed, show stats from boot */
336 		if (snapshot_has_changed(oldss, newss)) {
337 			free_snapshot(oldss);
338 			oldss = NULL;
339 		}
340 
341 	} while (--iter);
342 
343 	free_snapshot(oldss);
344 	free_snapshot(newss);
345 	(void) kstat_close(kc);
346 	free(df.if_names);
347 	return (0);
348 }
349 
350 /*
351  * Some magic numbers used in header formatting.
352  *
353  * DISK_LEN = length of either "kps tps serv" or "wps rps util"
354  *	      using 0 as the first position
355  *
356  * DISK_ERROR_LEN = length of "s/w h/w trn tot" with one space on
357  *		either side. Does not use zero as first pos.
358  *
359  * DEVICE_LEN = length of "device" + 1 character.
360  */
361 
362 #define	DISK_LEN	11
363 #define	DISK_ERROR_LEN	16
364 #define	DEVICE_LEN	7
365 
366 /*ARGSUSED*/
367 static void
368 show_disk_name(void *v1, void *v2, void *data)
369 {
370 	struct iodev_snapshot *dev = (struct iodev_snapshot *)v2;
371 	size_t slen;
372 	char *name;
373 	char fbuf[SMALL_SCRATCH_BUFLEN];
374 
375 	if (dev == NULL)
376 		return;
377 
378 	name = do_conversions ? dev->is_pretty : dev->is_name;
379 	name = name ? name : dev->is_name;
380 
381 	if (!do_raw) {
382 		uint_t width;
383 
384 		slen = strlen(name);
385 		/*
386 		 * The length is less
387 		 * than the section
388 		 * which will be displayed
389 		 * on the next line.
390 		 * Center the entry.
391 		 */
392 
393 		width = (DISK_LEN + 1)/2 + (slen / 2);
394 		(void) snprintf(fbuf, sizeof (fbuf),
395 		    "%*s", width, name);
396 		name = fbuf;
397 		push_out("%-13.13s ", name);
398 	} else {
399 		push_out(name);
400 	}
401 }
402 
403 /*ARGSUSED*/
404 static void
405 show_disk_header(void *v1, void *v2, void *data)
406 {
407 	push_out(disk_header);
408 }
409 
410 /*
411  * Write out a two line header. What is written out depends on the flags
412  * selected but in the worst case consists of a tty header, a disk header
413  * providing information for 4 disks and a cpu header.
414  *
415  * The tty header consists of the word "tty" on the first line above the
416  * words "tin tout" on the next line. If present the tty portion consumes
417  * the first 10 characters of each line since "tin tout" is surrounded
418  * by single spaces.
419  *
420  * Each of the disk sections is a 14 character "block" in which the name of
421  * the disk is centered in the first 12 characters of the first line.
422  *
423  * The cpu section is an 11 character block with "cpu" centered over the
424  * section.
425  *
426  * The worst case should look as follows:
427  *
428  * 0---------1--------2---------3---------4---------5---------6---------7-------
429  *    tty        sd0           sd1           sd2           sd3           cpu
430  *  tin tout kps tps serv  kps tps serv  kps tps serv  kps tps serv  us sy dt id
431  *  NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NN NN NN NN
432  *
433  * When -D is specified, the disk header looks as follows (worst case):
434  *
435  * 0---------1--------2---------3---------4---------5---------6---------7-------
436  *     tty        sd0           sd1             sd2          sd3          cpu
437  *   tin tout rps wps util  rps wps util  rps wps util  rps wps util us sy dt id
438  *   NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN NN NN NN NN
439  */
440 static void
441 printhdr(int sig)
442 {
443 	/*
444 	 * If we're here because a signal fired, reenable the
445 	 * signal.
446 	 */
447 	if (sig)
448 		(void) signal(SIGCONT, printhdr);
449 	if (sig == SIGCONT)
450 		caught_cont = 1;
451 	/*
452 	 * Horizontal mode headers
453 	 *
454 	 * First line
455 	 */
456 	if (do_tty)
457 		print_tty_hdr1();
458 
459 	if (do_disk & DISK_NORMAL) {
460 		(void) snapshot_walk(SNAP_IODEVS, NULL, newss,
461 		    show_disk_name, NULL);
462 	}
463 
464 	if (do_cpu)
465 		print_cpu_hdr1();
466 	do_newline();
467 
468 	/*
469 	 * Second line
470 	 */
471 	if (do_tty)
472 		print_tty_hdr2();
473 
474 	if (do_disk & DISK_NORMAL) {
475 		(void) snapshot_walk(SNAP_IODEVS, NULL, newss,
476 		    show_disk_header, NULL);
477 	}
478 
479 	if (do_cpu)
480 		print_cpu_hdr2();
481 	do_newline();
482 
483 	tohdr = REPRINT;
484 }
485 
486 /*
487  * Write out the extended header centered over the core information.
488  */
489 static void
490 write_core_header(void)
491 {
492 	char *edev = "extended device statistics";
493 	uint_t lead_space_ct;
494 	uint_t follow_space_ct;
495 	size_t edevlen;
496 
497 	if (do_raw == 0) {
498 		/*
499 		 * The things we do to look nice...
500 		 *
501 		 * Center the core output header. Make sure we have the
502 		 * right number of trailing spaces for follow-on headers
503 		 * (i.e., cpu and/or tty and/or errors).
504 		 */
505 		edevlen = strlen(edev);
506 		lead_space_ct = dh_len - edevlen;
507 		lead_space_ct /= 2;
508 		if (lead_space_ct > 0) {
509 			follow_space_ct = dh_len - (lead_space_ct + edevlen);
510 			if (do_disk & DISK_ERRORS)
511 				follow_space_ct -= DISK_ERROR_LEN;
512 			if ((do_disk & DISK_EXTENDED) && do_conversions)
513 				follow_space_ct -= DEVICE_LEN;
514 
515 			push_out("%1$*2$.*2$s%3$s%4$*5$.*5$s", one_blank,
516 			    lead_space_ct, edev, one_blank, follow_space_ct);
517 		} else
518 			push_out("%56s", edev);
519 	} else
520 		push_out(edev);
521 }
522 
523 /*
524  * In extended mode headers, we don't want to reprint the header on
525  * signals as they are printed every time anyways.
526  */
527 static void
528 printxhdr(void)
529 {
530 
531 	/*
532 	 * Vertical mode headers
533 	 */
534 	if (do_disk & DISK_EXTENDED)
535 		setup(write_core_header);
536 	if (do_disk & DISK_ERRORS)
537 		setup(print_err_hdr);
538 
539 	if (do_conversions) {
540 		setup(do_newline);
541 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
542 			setup(print_disk_header);
543 		setup(do_newline);
544 	} else {
545 		if (do_tty)
546 			setup(print_tty_hdr1);
547 		if (do_cpu)
548 			setup(print_cpu_hdr1);
549 		setup(do_newline);
550 
551 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
552 			setup(print_disk_header);
553 		if (do_tty)
554 			setup(print_tty_hdr2);
555 		if (do_cpu)
556 			setup(print_cpu_hdr2);
557 		setup(do_newline);
558 	}
559 }
560 
561 /*
562  * Write out a line for this disk - note that show_disk writes out
563  * full lines or blocks for each selected disk.
564  */
565 static void
566 show_disk(void *v1, void *v2, void *data)
567 {
568 	uint32_t err_counters[NUMBER_OF_ERR_COUNTERS];
569 	boolean_t display_err_counters = do_disk & DISK_ERRORS;
570 	struct iodev_snapshot *old = (struct iodev_snapshot *)v1;
571 	struct iodev_snapshot *new = (struct iodev_snapshot *)v2;
572 	int *count = (int *)data;
573 	double rps = 0, wps = 0, tps = 0, mtps, krps = 0, kwps = 0;
574 	double kps = 0, avw = 0, avr = 0, w_pct = 0, r_pct = 0;
575 	double wserv = 0, rserv = 0, serv = 0;
576 	double iosize;	/* kb/sec or MB/sec */
577 	double etime, hr_etime;
578 	char *disk_name;
579 	u_longlong_t ldeltas;
580 	uint_t udeltas;
581 	uint64_t t_delta;
582 	uint64_t w_delta;
583 	uint64_t r_delta;
584 	int doit = 1;
585 	uint_t toterrs;
586 	char *fstr;
587 
588 	if (new == NULL)
589 		return;
590 
591 	switch (show_disk_mode) {
592 	case SHOW_FIRST_ONLY:
593 		if (count != NULL && *count)
594 			return;
595 		break;
596 
597 	case SHOW_SECOND_ONWARDS:
598 		if (count != NULL && !*count) {
599 			(*count)++;
600 			return;
601 		}
602 		break;
603 
604 	default:
605 		break;
606 	}
607 
608 	disk_name = do_conversions ? new->is_pretty : new->is_name;
609 	disk_name = disk_name ? disk_name : new->is_name;
610 
611 	/*
612 	 * Only do if we want IO stats - Avoids errors traveling this
613 	 * section if that's all we want to see.
614 	 */
615 	if (do_disk & DISK_IO_MASK) {
616 		if (old) {
617 			t_delta = hrtime_delta(old->is_snaptime,
618 			    new->is_snaptime);
619 		} else {
620 			t_delta = hrtime_delta(new->is_crtime,
621 			    new->is_snaptime);
622 		}
623 
624 		if (new->is_nr_children) {
625 			if (new->is_type == IODEV_CONTROLLER) {
626 				t_delta /= new->is_nr_children;
627 			} else if ((new->is_type == IODEV_IOPATH_LT) ||
628 			    (new->is_type == IODEV_IOPATH_LI)) {
629 				/* synthetic path */
630 				if (!old) {
631 					t_delta = new->is_crtime;
632 				}
633 				t_delta /= new->is_nr_children;
634 			}
635 		}
636 
637 		hr_etime = (double)t_delta;
638 		if (hr_etime == 0.0)
639 			hr_etime = (double)NANOSEC;
640 		etime = hr_etime / (double)NANOSEC;
641 
642 		/* reads per second */
643 		udeltas = u32_delta(old ? old->is_stats.reads : 0,
644 		    new->is_stats.reads);
645 		rps = (double)udeltas;
646 		rps /= etime;
647 
648 		/* writes per second */
649 		udeltas = u32_delta(old ? old->is_stats.writes : 0,
650 		    new->is_stats.writes);
651 		wps = (double)udeltas;
652 		wps /= etime;
653 
654 		tps = rps + wps;
655 			/* transactions per second */
656 
657 		/*
658 		 * report throughput as either kb/sec or MB/sec
659 		 */
660 
661 		if (!do_megabytes)
662 			iosize = 1024.0;
663 		else
664 			iosize = 1048576.0;
665 
666 		ldeltas = ull_delta(old ? old->is_stats.nread : 0,
667 		    new->is_stats.nread);
668 		if (ldeltas) {
669 			krps = (double)ldeltas;
670 			krps /= etime;
671 			krps /= iosize;
672 		} else
673 			krps = 0.0;
674 
675 		ldeltas = ull_delta(old ? old->is_stats.nwritten : 0,
676 		    new->is_stats.nwritten);
677 		if (ldeltas) {
678 			kwps = (double)ldeltas;
679 			kwps /= etime;
680 			kwps /= iosize;
681 		} else
682 			kwps = 0.0;
683 
684 		/*
685 		 * Blocks transferred per second
686 		 */
687 		kps = krps + kwps;
688 
689 		/*
690 		 * Average number of wait transactions waiting
691 		 */
692 		w_delta = hrtime_delta((u_longlong_t)
693 		    (old ? old->is_stats.wlentime : 0),
694 		    new->is_stats.wlentime);
695 		if (w_delta) {
696 			avw = (double)w_delta;
697 			avw /= hr_etime;
698 		} else
699 			avw = 0.0;
700 
701 		/*
702 		 * Average number of run transactions waiting
703 		 */
704 		r_delta = hrtime_delta(old ? old->is_stats.rlentime : 0,
705 		    new->is_stats.rlentime);
706 		if (r_delta) {
707 			avr = (double)r_delta;
708 			avr /= hr_etime;
709 		} else
710 			avr = 0.0;
711 
712 		/*
713 		 * Average wait service time in milliseconds
714 		 */
715 		if (tps > 0.0 && (avw != 0.0 || avr != 0.0)) {
716 			mtps = 1000.0 / tps;
717 			if (avw != 0.0)
718 				wserv = avw * mtps;
719 			else
720 				wserv = 0.0;
721 
722 			if (avr != 0.0)
723 				rserv = avr * mtps;
724 			else
725 				rserv = 0.0;
726 			serv = rserv + wserv;
727 		} else {
728 			rserv = 0.0;
729 			wserv = 0.0;
730 			serv = 0.0;
731 		}
732 
733 		/* % of time there is a transaction waiting for service */
734 		t_delta = hrtime_delta(old ? old->is_stats.wtime : 0,
735 		    new->is_stats.wtime);
736 		if (t_delta) {
737 			w_pct = (double)t_delta;
738 			w_pct /= hr_etime;
739 			w_pct *= 100.0;
740 
741 			/*
742 			 * Average the wait queue utilization over the
743 			 * the controller's devices, if this is a controller.
744 			 */
745 			if (new->is_type == IODEV_CONTROLLER)
746 				w_pct /= new->is_nr_children;
747 		} else
748 			w_pct = 0.0;
749 
750 		/* % of time there is a transaction running */
751 		t_delta = hrtime_delta(old ? old->is_stats.rtime : 0,
752 		    new->is_stats.rtime);
753 		if (t_delta) {
754 			r_pct = (double)t_delta;
755 			r_pct /= hr_etime;
756 			r_pct *= 100.0;
757 
758 			/*
759 			 * Average the percent busy over the controller's
760 			 * devices, if this is a controller.
761 			 */
762 			if (new->is_type == IODEV_CONTROLLER)
763 				w_pct /= new->is_nr_children;
764 		} else {
765 			r_pct = 0.0;
766 		}
767 
768 		/* % of time there is a transaction running */
769 		if (do_interval) {
770 			rps	*= etime;
771 			wps	*= etime;
772 			tps	*= etime;
773 			krps	*= etime;
774 			kwps	*= etime;
775 			kps	*= etime;
776 		}
777 	}
778 
779 	if (do_disk & (DISK_EXTENDED | DISK_ERRORS)) {
780 		if ((!do_conversions) && ((suppress_zero == 0) ||
781 		    ((do_disk & DISK_EXTENDED) == 0))) {
782 			if (do_raw == 0) {
783 				push_out("%-*.*s",
784 				    iodevs_nl, iodevs_nl, disk_name);
785 			} else {
786 				push_out(disk_name);
787 			}
788 		}
789 	}
790 
791 	/*
792 	 * The error counters are read first (if asked for and if they are
793 	 * available).
794 	 */
795 	bzero(err_counters, sizeof (err_counters));
796 	toterrs = 0;
797 	if (display_err_counters && (new->is_errors.ks_data != NULL)) {
798 		kstat_named_t	*knp;
799 		int		i;
800 
801 		knp = KSTAT_NAMED_PTR(&new->is_errors);
802 		for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++) {
803 			switch (knp[i].data_type) {
804 				case KSTAT_DATA_ULONG:
805 				case KSTAT_DATA_ULONGLONG:
806 					err_counters[i] = knp[i].value.ui32;
807 					toterrs += knp[i].value.ui32;
808 					break;
809 				default:
810 					break;
811 			}
812 		}
813 	}
814 
815 	switch (do_disk & DISK_IO_MASK) {
816 	case DISK_OLD:
817 		if (do_raw == 0)
818 			fstr = "%3.0f %3.0f %4.0f  ";
819 		else
820 			fstr = "%.0f,%.0f,%.0f";
821 		push_out(fstr, kps, tps, serv);
822 		break;
823 	case DISK_NEW:
824 		if (do_raw == 0)
825 			fstr = "%3.0f %3.0f %4.1f  ";
826 		else
827 			fstr = "%.0f,%.0f,%.1f";
828 		push_out(fstr, rps, wps, r_pct);
829 		break;
830 	case DISK_EXTENDED:
831 		if (suppress_zero) {
832 			if (fzero(rps) && fzero(wps) && fzero(krps) &&
833 			    fzero(kwps) && fzero(avw) && fzero(avr) &&
834 			    fzero(serv) && fzero(w_pct) && fzero(r_pct) &&
835 			    (toterrs == 0)) {
836 				doit = 0;
837 				display_err_counters = B_FALSE;
838 			} else if (do_conversions == 0) {
839 				if (do_raw == 0) {
840 					push_out("%-*.*s",
841 					    iodevs_nl, iodevs_nl, disk_name);
842 				} else {
843 					push_out(disk_name);
844 				}
845 			}
846 		}
847 		if (doit) {
848 			if (!do_conversions) {
849 				if (do_raw == 0) {
850 					fstr = " %6.1f %6.1f %6.1f %6.1f "
851 					    "%4.1f %4.1f %6.1f %3.0f "
852 					    "%3.0f ";
853 				} else {
854 					fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
855 					    "%.1f,%.0f,%.0f";
856 				}
857 				push_out(fstr, rps, wps, krps, kwps, avw, avr,
858 				    serv, w_pct, r_pct);
859 			} else {
860 				if (do_raw == 0) {
861 					fstr = " %6.1f %6.1f %6.1f %6.1f "
862 					    "%4.1f %4.1f %6.1f %6.1f "
863 					    "%3.0f %3.0f ";
864 				} else {
865 					fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
866 					    "%.1f,%.1f,%.0f,%.0f";
867 				}
868 				push_out(fstr, rps, wps, krps, kwps, avw, avr,
869 				    wserv, rserv, w_pct, r_pct);
870 			}
871 		}
872 		break;
873 	}
874 
875 	if (display_err_counters) {
876 		char	*efstr;
877 		int	i;
878 
879 		if (do_raw == 0) {
880 			if (do_disk == DISK_ERRORS)
881 				push_out(two_blanks);
882 			efstr = "%3u ";
883 		} else {
884 			efstr = "%u";
885 		}
886 
887 		for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++)
888 			push_out(efstr, err_counters[i]);
889 
890 		push_out(efstr, toterrs);
891 	}
892 
893 	if (suppress_zero == 0 || doit == 1) {
894 		if ((do_disk & (DISK_EXTENDED | DISK_ERRORS)) &&
895 		    do_conversions) {
896 			push_out("%s", disk_name);
897 			if (show_mountpts && new->is_dname) {
898 				mnt_t *mount_pt;
899 				char *lu;
900 				char *dnlu;
901 				char lub[SMALL_SCRATCH_BUFLEN];
902 
903 				lu = strrchr(new->is_dname, '/');
904 				if (lu) {
905 					/* only the part after a possible '/' */
906 					dnlu = strrchr(disk_name, '/');
907 					if (dnlu != NULL &&
908 					    strcmp(dnlu, lu) == 0)
909 						lu = new->is_dname;
910 					else {
911 						*lu = 0;
912 						(void) strcpy(lub,
913 						    new->is_dname);
914 						*lu = '/';
915 						(void) strcat(lub, "/");
916 						(void) strcat(lub,
917 						    disk_name);
918 						lu = lub;
919 					}
920 				} else
921 					lu = disk_name;
922 				mount_pt = lookup_mntent_byname(lu);
923 				if (mount_pt) {
924 					if (do_raw == 0)
925 						push_out(" (%s)",
926 						    mount_pt->mount_point);
927 					else
928 						push_out("(%s)",
929 						    mount_pt->mount_point);
930 				}
931 			}
932 		}
933 	}
934 
935 	if ((do_disk & PRINT_VERTICAL) && show_disk_mode != SHOW_FIRST_ONLY)
936 		do_newline();
937 
938 	if (count != NULL)
939 		(*count)++;
940 }
941 
942 static void
943 usage(void)
944 {
945 	(void) fprintf(stderr,
946 	    "Usage: iostat [-cCdDeEiImMnpPrstxXYz] "
947 	    " [-l n] [-T d|u] [disk ...] [interval [count]]\n"
948 	    "\t\t-c:	report percentage of time system has spent\n"
949 	    "\t\t\tin user/system/dtrace/idle mode\n"
950 	    "\t\t-C:	report disk statistics by controller\n"
951 	    "\t\t-d:	display disk Kb/sec, transfers/sec, avg. \n"
952 	    "\t\t\tservice time in milliseconds  \n"
953 	    "\t\t-D:	display disk reads/sec, writes/sec, \n"
954 	    "\t\t\tpercentage disk utilization \n"
955 	    "\t\t-e:	report device error summary statistics\n"
956 	    "\t\t-E:	report extended device error statistics\n"
957 	    "\t\t-i:	show device IDs for -E output\n"
958 	    "\t\t-I:	report the counts in each interval,\n"
959 	    "\t\t\tinstead of rates, where applicable\n"
960 	    "\t\t-l n:	Limit the number of disks to n\n"
961 	    "\t\t-m:	Display mount points (most useful with -p)\n"
962 	    "\t\t-M:	Display data throughput in MB/sec "
963 	    "instead of Kb/sec\n"
964 	    "\t\t-n:	convert device names to cXdYtZ format\n"
965 	    "\t\t-p:	report per-partition disk statistics\n"
966 	    "\t\t-P:	report per-partition disk statistics only,\n"
967 	    "\t\t\tno per-device disk statistics\n"
968 	    "\t\t-r:	Display data in comma separated format\n"
969 	    "\t\t-s:	Suppress state change messages\n"
970 	    "\t\t-T d|u	Display a timestamp in date (d) or unix "
971 	    "time_t (u)\n"
972 	    "\t\t-t:	display chars read/written to terminals\n"
973 	    "\t\t-x:	display extended disk statistics\n"
974 	    "\t\t-X:	display I/O path statistics\n"
975 	    "\t\t-Y:	display I/O path (I/T/L) statistics\n"
976 	    "\t\t-z:	Suppress entries with all zero values\n");
977 	exit(1);
978 }
979 
980 /*ARGSUSED*/
981 static void
982 show_disk_errors(void *v1, void *v2, void *d)
983 {
984 	struct iodev_snapshot *disk = (struct iodev_snapshot *)v2;
985 	kstat_named_t *knp;
986 	size_t  col;
987 	int	i, len;
988 	char	*dev_name;
989 
990 	if (disk->is_errors.ks_ndata == 0)
991 		return;
992 	if (disk->is_type == IODEV_CONTROLLER)
993 		return;
994 
995 	dev_name = do_conversions ? disk->is_pretty : disk->is_name;
996 	dev_name = dev_name ? dev_name : disk->is_name;
997 
998 	len = strlen(dev_name);
999 	if (len > 20)
1000 		push_out("%s ", dev_name);
1001 	else if (len > 16)
1002 		push_out("%-20.20s ", dev_name);
1003 	else {
1004 		if (do_conversions)
1005 			push_out("%-16.16s ", dev_name);
1006 		else
1007 			push_out("%-9.9s ", dev_name);
1008 	}
1009 	col = 0;
1010 
1011 	knp = KSTAT_NAMED_PTR(&disk->is_errors);
1012 	for (i = 0; i < disk->is_errors.ks_ndata; i++) {
1013 		/* skip kstats that the driver did not kstat_named_init */
1014 		if (knp[i].name[0] == 0)
1015 			continue;
1016 
1017 		col += strlen(knp[i].name);
1018 
1019 		switch (knp[i].data_type) {
1020 			case KSTAT_DATA_CHAR:
1021 			case KSTAT_DATA_STRING:
1022 				if ((strcmp(knp[i].name, "Serial No") == 0) &&
1023 				    do_devid) {
1024 					if (disk->is_devid) {
1025 						push_out("Device Id: %s ",
1026 						    disk->is_devid);
1027 						col += strlen(disk->is_devid);
1028 					} else {
1029 						push_out("Device Id: ");
1030 					}
1031 
1032 					break;
1033 				}
1034 				if (knp[i].data_type == KSTAT_DATA_CHAR) {
1035 					push_out("%s: %-.16s ", knp[i].name,
1036 					    &knp[i].value.c[0]);
1037 					col += strnlen(&knp[i].value.c[0], 16);
1038 				} else {
1039 					push_out("%s: %s ", knp[i].name,
1040 					    KSTAT_NAMED_STR_PTR(&knp[i]));
1041 					col +=
1042 					    KSTAT_NAMED_STR_BUFLEN(&knp[i]) - 1;
1043 				}
1044 				break;
1045 			case KSTAT_DATA_ULONG:
1046 				push_out("%s: %u ", knp[i].name,
1047 				    knp[i].value.ui32);
1048 				col += 4;
1049 				break;
1050 			case KSTAT_DATA_ULONGLONG:
1051 				if (strcmp(knp[i].name, "Size") == 0) {
1052 					do_newline();
1053 					push_out("%s: %2.2fGB <%llu bytes>",
1054 					    knp[i].name,
1055 					    (float)knp[i].value.ui64 /
1056 					    DISK_GIGABYTE,
1057 					    knp[i].value.ui64);
1058 					do_newline();
1059 					col = 0;
1060 					break;
1061 				}
1062 				push_out("%s: %u ", knp[i].name,
1063 				    knp[i].value.ui32);
1064 				col += 4;
1065 				break;
1066 			}
1067 		if ((col >= 62) || (i == 2)) {
1068 			do_newline();
1069 			col = 0;
1070 		}
1071 	}
1072 	if (col > 0) {
1073 		do_newline();
1074 	}
1075 	do_newline();
1076 }
1077 
1078 void
1079 do_args(int argc, char **argv)
1080 {
1081 	int		c;
1082 	int		errflg = 0;
1083 	extern char	*optarg;
1084 	extern int	optind;
1085 
1086 	while ((c = getopt(argc, argv, "tdDxXYCciIpPnmMeEszrT:l:")) != EOF)
1087 		switch (c) {
1088 		case 't':
1089 			do_tty++;
1090 			break;
1091 		case 'd':
1092 			do_disk |= DISK_OLD;
1093 			break;
1094 		case 'D':
1095 			do_disk |= DISK_NEW;
1096 			break;
1097 		case 'x':
1098 			do_disk |= DISK_EXTENDED;
1099 			break;
1100 		case 'X':
1101 			if (do_disk & DISK_IOPATH_LTI)
1102 				errflg++;	/* -Y already used */
1103 			else
1104 				do_disk |= DISK_IOPATH_LI;
1105 			break;
1106 		case 'Y':
1107 			if (do_disk & DISK_IOPATH_LI)
1108 				errflg++;	/* -X already used */
1109 			else
1110 				do_disk |= DISK_IOPATH_LTI;
1111 			break;
1112 		case 'C':
1113 			do_controller++;
1114 			break;
1115 		case 'c':
1116 			do_cpu++;
1117 			break;
1118 		case 'I':
1119 			do_interval++;
1120 			break;
1121 		case 'p':
1122 			do_partitions++;
1123 			break;
1124 		case 'P':
1125 			do_partitions_only++;
1126 			break;
1127 		case 'n':
1128 			do_conversions++;
1129 			break;
1130 		case 'M':
1131 			do_megabytes++;
1132 			break;
1133 		case 'e':
1134 			do_disk |= DISK_ERRORS;
1135 			break;
1136 		case 'E':
1137 			do_disk |= DISK_EXTENDED_ERRORS;
1138 			break;
1139 		case 'i':
1140 			do_devid = 1;
1141 			break;
1142 		case 's':
1143 			suppress_state = 1;
1144 			break;
1145 		case 'z':
1146 			suppress_zero = 1;
1147 			break;
1148 		case 'm':
1149 			show_mountpts = 1;
1150 			break;
1151 		case 'T':
1152 			if (optarg) {
1153 				if (*optarg == 'u')
1154 					timestamp_fmt = UDATE;
1155 				else if (*optarg == 'd')
1156 					timestamp_fmt = DDATE;
1157 				else
1158 					errflg++;
1159 			} else {
1160 				errflg++;
1161 			}
1162 			break;
1163 		case 'r':
1164 			do_raw = 1;
1165 			break;
1166 		case 'l':
1167 			df.if_max_iodevs = safe_strtoi(optarg, "invalid limit");
1168 			if (df.if_max_iodevs < 1)
1169 				usage();
1170 			break;
1171 		case '?':
1172 			errflg++;
1173 	}
1174 
1175 	if ((do_disk & DISK_OLD) && (do_disk & DISK_NEW)) {
1176 		(void) fprintf(stderr, "-d and -D are incompatible.\n");
1177 		usage();
1178 	}
1179 
1180 	if (errflg) {
1181 		usage();
1182 	}
1183 
1184 	/* if no output classes explicity specified, use defaults */
1185 	if (do_tty == 0 && do_disk == 0 && do_cpu == 0)
1186 		do_tty = do_cpu = 1, do_disk = DISK_OLD;
1187 
1188 	/*
1189 	 * multi-path options (-X, -Y) without a specific vertical
1190 	 * output format (-x, -e, -E) imply extended -x format
1191 	 */
1192 	if ((do_disk & (DISK_IOPATH_LI | DISK_IOPATH_LTI)) &&
1193 	    !(do_disk & PRINT_VERTICAL))
1194 		do_disk |= DISK_EXTENDED;
1195 
1196 	/*
1197 	 * If conflicting options take the preferred
1198 	 * -D and -x result in -x
1199 	 * -d or -D and -e or -E gives only whatever -d or -D was specified
1200 	 */
1201 	if ((do_disk & DISK_EXTENDED) && (do_disk & DISK_NORMAL))
1202 		do_disk &= ~DISK_NORMAL;
1203 	if ((do_disk & DISK_NORMAL) && (do_disk & DISK_ERROR_MASK))
1204 		do_disk &= ~DISK_ERROR_MASK;
1205 
1206 	/* nfs, tape, always shown */
1207 	df.if_allowed_types = IODEV_NFS | IODEV_TAPE;
1208 
1209 	/*
1210 	 * If limit == 0 then no command line limit was set, else if any of
1211 	 * the flags that cause unlimited disks were not set,
1212 	 * use the default of 4
1213 	 */
1214 	if (df.if_max_iodevs == 0) {
1215 		df.if_max_iodevs = DEFAULT_LIMIT;
1216 		df.if_skip_floppy = 1;
1217 		if (do_disk & (DISK_EXTENDED | DISK_ERRORS |
1218 		    DISK_EXTENDED_ERRORS)) {
1219 			df.if_max_iodevs = UNLIMITED_IODEVS;
1220 			df.if_skip_floppy = 0;
1221 		}
1222 	}
1223 	if (do_disk) {
1224 		size_t count = 0;
1225 		size_t i = optind;
1226 
1227 		while (i < argc && !isdigit(argv[i][0])) {
1228 			count++;
1229 			i++;
1230 		}
1231 
1232 		/*
1233 		 * "Note:  disks  explicitly  requested
1234 		 * are not subject to this disk limit"
1235 		 */
1236 		if ((count > df.if_max_iodevs) ||
1237 		    (count && (df.if_max_iodevs == UNLIMITED_IODEVS)))
1238 			df.if_max_iodevs = count;
1239 
1240 		df.if_names = safe_alloc(count * sizeof (char *));
1241 		(void) memset(df.if_names, 0, count * sizeof (char *));
1242 
1243 		df.if_nr_names = 0;
1244 		while (optind < argc && !isdigit(argv[optind][0]))
1245 			df.if_names[df.if_nr_names++] = argv[optind++];
1246 	}
1247 	if (optind < argc) {
1248 		interval = safe_strtoi(argv[optind], "invalid interval");
1249 		if (interval < 1)
1250 			fail(0, "invalid interval");
1251 		optind++;
1252 
1253 		if (optind < argc) {
1254 			iter = safe_strtoi(argv[optind], "invalid count");
1255 			if (iter < 1)
1256 				fail(0, "invalid count");
1257 			optind++;
1258 		}
1259 	}
1260 	if (interval == 0)
1261 		iter = 1;
1262 	if (optind < argc)
1263 		usage();
1264 }
1265 
1266 /*
1267  * Driver for doing the extended header formatting. Will produce
1268  * the function stack needed to output an extended header based
1269  * on the options selected.
1270  */
1271 
1272 void
1273 do_format(void)
1274 {
1275 	char	header[SMALL_SCRATCH_BUFLEN] = {0};
1276 	char	ch;
1277 	char	iosz;
1278 	const char    *fstr;
1279 
1280 	disk_header[0] = 0;
1281 	ch = (do_interval ? 'i' : 's');
1282 	iosz = (do_megabytes ? 'M' : 'k');
1283 	if (do_disk & DISK_ERRORS) {
1284 		if (do_raw == 0) {
1285 			(void) sprintf(header, "s/w h/w trn tot ");
1286 		} else
1287 			(void) sprintf(header, "s/w,h/w,trn,tot");
1288 	}
1289 	switch (do_disk & DISK_IO_MASK) {
1290 		case DISK_OLD:
1291 			if (do_raw == 0)
1292 				fstr = "%cp%c tp%c serv  ";
1293 			else
1294 				fstr = "%cp%c,tp%c,serv";
1295 			(void) snprintf(disk_header, sizeof (disk_header),
1296 			    fstr, iosz, ch, ch);
1297 			break;
1298 		case DISK_NEW:
1299 			if (do_raw == 0)
1300 				fstr = "rp%c wp%c util  ";
1301 			else
1302 				fstr = "%rp%c,wp%c,util";
1303 			(void) snprintf(disk_header, sizeof (disk_header),
1304 			    fstr, ch, ch);
1305 			break;
1306 		case DISK_EXTENDED:
1307 			/* This is -x option */
1308 			if (!do_conversions) {
1309 				/* without -n option */
1310 				if (do_raw == 0) {
1311 					/* without -r option */
1312 					(void) snprintf(disk_header,
1313 					    sizeof (disk_header),
1314 					    "%-*.*s    r/%c    w/%c   "
1315 					    "%cr/%c   %cw/%c wait actv  "
1316 					    "svc_t  %%%%w  %%%%b %s",
1317 					    iodevs_nl, iodevs_nl, "device",
1318 					    ch, ch, iosz, ch, iosz, ch, header);
1319 				} else {
1320 					/* with -r option */
1321 					(void) snprintf(disk_header,
1322 					    sizeof (disk_header),
1323 					    "device,r/%c,w/%c,%cr/%c,%cw/%c,"
1324 					    "wait,actv,svc_t,%%%%w,"
1325 					    "%%%%b%s%s",
1326 					    ch, ch, iosz, ch, iosz, ch,
1327 					    *header == '\0' ? "" : ",",
1328 					    header);
1329 					/*
1330 					 * if no -e flag, header == '\0...'
1331 					 * Ternary operator above is to prevent
1332 					 * trailing comma in full disk_header
1333 					 */
1334 				}
1335 			} else {
1336 				/* with -n option */
1337 				if (do_raw == 0) {
1338 					fstr = "    r/%c    w/%c   %cr/%c   "
1339 					    "%cw/%c wait actv wsvc_t asvc_t  "
1340 					    "%%%%w  %%%%b %sdevice";
1341 				} else {
1342 					fstr = "r/%c,w/%c,%cr/%c,%cw/%c,"
1343 					    "wait,actv,wsvc_t,asvc_t,"
1344 					    "%%%%w,%%%%b,%sdevice";
1345 					/*
1346 					 * if -rnxe, "tot" (from -e) and
1347 					 * "device" are run together
1348 					 * due to lack of trailing comma
1349 					 * in 'header'. However, adding
1350 					 * trailing comma to header at
1351 					 * its definition leads to prob-
1352 					 * lems elsewhere so it's added
1353 					 * here in this edge case -rnxe
1354 					 */
1355 					if (*header != '\0')
1356 						(void) strcat(header, ",");
1357 				}
1358 				(void) snprintf(disk_header,
1359 				    sizeof (disk_header),
1360 				    fstr, ch, ch, iosz, ch, iosz,
1361 				    ch, header);
1362 			}
1363 			break;
1364 		default:
1365 			break;
1366 	}
1367 
1368 	/* do DISK_ERRORS header (already added above for DISK_EXTENDED) */
1369 	if ((do_disk & DISK_ERRORS) &&
1370 	    ((do_disk & DISK_IO_MASK) != DISK_EXTENDED)) {
1371 		if (!do_conversions) {
1372 			if (do_raw == 0)
1373 				(void) snprintf(disk_header,
1374 				    sizeof (disk_header), "%-*.*s  %s",
1375 				    iodevs_nl, iodevs_nl, "device", header);
1376 			else
1377 				(void) snprintf(disk_header,
1378 				    sizeof (disk_header), "device,%s", header);
1379 		} else {
1380 			if (do_raw == 0) {
1381 				(void) snprintf(disk_header,
1382 				    sizeof (disk_header),
1383 				    "  %sdevice", header);
1384 			} else {
1385 				(void) snprintf(disk_header,
1386 				    sizeof (disk_header),
1387 				    "%s,device", header);
1388 			}
1389 		}
1390 	} else {
1391 		/*
1392 		 * Need to subtract two characters for the % escape in
1393 		 * the string.
1394 		 */
1395 		dh_len = strlen(disk_header) - 2;
1396 	}
1397 
1398 	/*
1399 	 * -n *and* (-E *or* -e *or* -x)
1400 	 */
1401 	if (do_conversions && (do_disk & PRINT_VERTICAL)) {
1402 		if (do_tty)
1403 			setup(print_tty_hdr1);
1404 		if (do_cpu)
1405 			setup(print_cpu_hdr1);
1406 		if (do_tty || do_cpu)
1407 			setup(do_newline);
1408 		if (do_tty)
1409 			setup(print_tty_hdr2);
1410 		if (do_cpu)
1411 			setup(print_cpu_hdr2);
1412 		if (do_tty || do_cpu)
1413 			setup(do_newline);
1414 		if (do_tty)
1415 			setup(print_tty_data);
1416 		if (do_cpu)
1417 			setup(print_cpu_data);
1418 		if (do_tty || do_cpu)
1419 			setup(do_newline);
1420 		printxhdr();
1421 
1422 		setup(show_all_disks);
1423 	} else {
1424 		/*
1425 		 * These unholy gymnastics are necessary to place CPU/tty
1426 		 * data to the right of the disks/errors for the first
1427 		 * line in vertical mode.
1428 		 */
1429 		if (do_disk & PRINT_VERTICAL) {
1430 			printxhdr();
1431 
1432 			setup(show_first_disk);
1433 			if (do_tty)
1434 				setup(print_tty_data);
1435 			if (do_cpu)
1436 				setup(print_cpu_data);
1437 			setup(do_newline);
1438 
1439 			setup(show_other_disks);
1440 		} else {
1441 			setup(hdrout);
1442 			if (do_tty)
1443 				setup(print_tty_data);
1444 			setup(show_all_disks);
1445 			if (do_cpu)
1446 				setup(print_cpu_data);
1447 		}
1448 
1449 		setup(do_newline);
1450 	}
1451 	if (do_disk & DISK_EXTENDED_ERRORS)
1452 		setup(disk_errors);
1453 }
1454 
1455 /*
1456  * Add a new function to the list of functions
1457  * for this invocation. Once on the stack the
1458  * function is never removed nor does its place
1459  * change.
1460  */
1461 void
1462 setup(void (*nfunc)(void))
1463 {
1464 	format_t *tmp;
1465 
1466 	tmp = safe_alloc(sizeof (format_t));
1467 	tmp->nfunc = nfunc;
1468 	tmp->next = 0;
1469 	if (formatter_end)
1470 		formatter_end->next = tmp;
1471 	else
1472 		formatter_list = tmp;
1473 	formatter_end = tmp;
1474 
1475 }
1476 
1477 /*
1478  * The functions after this comment are devoted to printing
1479  * various parts of the header. They are selected based on the
1480  * options provided when the program was invoked. The functions
1481  * are either directly invoked in printhdr() or are indirectly
1482  * invoked by being placed on the list of functions used when
1483  * extended headers are used.
1484  */
1485 void
1486 print_tty_hdr1(void)
1487 {
1488 	char *fstr;
1489 	char *dstr;
1490 
1491 	if (do_raw == 0) {
1492 		fstr = "%10.10s";
1493 		dstr = "tty    ";
1494 	} else {
1495 		fstr = "%s";
1496 		dstr = "tty";
1497 	}
1498 	push_out(fstr, dstr);
1499 }
1500 
1501 void
1502 print_tty_hdr2(void)
1503 {
1504 	if (do_raw == 0)
1505 		push_out("%-10.10s", " tin tout");
1506 	else
1507 		push_out("tin,tout");
1508 }
1509 
1510 void
1511 print_cpu_hdr1(void)
1512 {
1513 	char *dstr;
1514 
1515 	if (do_raw == 0)
1516 		dstr = "     cpu";
1517 	else
1518 		dstr = "cpu";
1519 	push_out(dstr);
1520 }
1521 
1522 void
1523 print_cpu_hdr2(void)
1524 {
1525 	char *dstr;
1526 
1527 	if (do_raw == 0)
1528 		dstr = " us sy dt id";
1529 	else
1530 		dstr = "us,sy,dt,id";
1531 	push_out(dstr);
1532 }
1533 
1534 /*
1535  * Assumption is that tty data is always first - no need for raw mode leading
1536  * comma.
1537  */
1538 void
1539 print_tty_data(void)
1540 {
1541 	char *fstr;
1542 	uint64_t deltas;
1543 	double raw;
1544 	double outch;
1545 	kstat_t *oldks = NULL;
1546 
1547 	if (oldss)
1548 		oldks = &oldss->s_sys.ss_agg_sys;
1549 
1550 	if (do_raw == 0)
1551 		fstr = " %3.0f %4.0f ";
1552 	else
1553 		fstr = "%.0f,%.0f";
1554 	deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "rawch");
1555 	raw = deltas;
1556 	raw /= getime;
1557 	deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "outch");
1558 	outch = deltas;
1559 	outch /= getime;
1560 	push_out(fstr, raw, outch);
1561 }
1562 
1563 /*
1564  * Write out CPU data
1565  */
1566 void
1567 print_cpu_data(void)
1568 {
1569 	char *fstr;
1570 	uint64_t idle;
1571 	uint64_t user;
1572 	uint64_t kern;
1573 	uint64_t dtrace;
1574 	uint64_t nsec_elapsed;
1575 	kstat_t *oldks = NULL;
1576 
1577 	if (oldss)
1578 		oldks = &oldss->s_sys.ss_agg_sys;
1579 
1580 	if (do_raw == 0)
1581 		fstr = " %2.0f %2.0f %2.0f %2.0f";
1582 	else
1583 		fstr = "%.0f,%.0f,%.0f,%.0f";
1584 
1585 	idle = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_idle");
1586 	user = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_user");
1587 	kern = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_kernel");
1588 	dtrace = kstat_delta(oldks, &newss->s_sys.ss_agg_sys,
1589 	    "cpu_nsec_dtrace");
1590 	nsec_elapsed = newss->s_sys.ss_agg_sys.ks_snaptime -
1591 	    (oldks == NULL ? 0 : oldks->ks_snaptime);
1592 	push_out(fstr, user * percent, kern * percent,
1593 	    dtrace * 100.0 / nsec_elapsed / newss->s_nr_active_cpus,
1594 	    idle * percent);
1595 }
1596 
1597 /*
1598  * Emit the appropriate header.
1599  */
1600 void
1601 hdrout(void)
1602 {
1603 	if (do_raw == 0) {
1604 		if (--tohdr == 0)
1605 			printhdr(0);
1606 	} else if (hdr_out == 0) {
1607 		printhdr(0);
1608 		hdr_out = 1;
1609 	}
1610 }
1611 
1612 /*
1613  * Write out disk errors when -E is specified.
1614  */
1615 void
1616 disk_errors(void)
1617 {
1618 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk_errors, NULL);
1619 }
1620 
1621 void
1622 show_first_disk(void)
1623 {
1624 	int count = 0;
1625 
1626 	show_disk_mode = SHOW_FIRST_ONLY;
1627 
1628 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1629 }
1630 
1631 void
1632 show_other_disks(void)
1633 {
1634 	int count = 0;
1635 
1636 	show_disk_mode = SHOW_SECOND_ONWARDS;
1637 
1638 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1639 }
1640 
1641 void
1642 show_all_disks(void)
1643 {
1644 	int count = 0;
1645 
1646 	show_disk_mode = SHOW_ALL;
1647 
1648 	(void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1649 }
1650 
1651 /*
1652  * Write a newline out and clear the lineout flag.
1653  */
1654 static void
1655 do_newline(void)
1656 {
1657 	if (lineout) {
1658 		(void) putchar('\n');
1659 		lineout = 0;
1660 	}
1661 }
1662 
1663 /*
1664  * Generalized printf function that determines what extra
1665  * to print out if we're in raw mode. At this time we
1666  * don't care about errors.
1667  */
1668 static void
1669 push_out(const char *message, ...)
1670 {
1671 	va_list args;
1672 
1673 	va_start(args, message);
1674 	if (do_raw && lineout == 1)
1675 		(void) putchar(',');
1676 	(void) vprintf(message, args);
1677 	va_end(args);
1678 	lineout = 1;
1679 }
1680 
1681 /*
1682  * Emit the header string when -e is specified.
1683  */
1684 static void
1685 print_err_hdr(void)
1686 {
1687 	char obuf[SMALL_SCRATCH_BUFLEN];
1688 
1689 	if (do_raw) {
1690 		push_out("errors");
1691 		return;
1692 	}
1693 
1694 	if (do_conversions == 0) {
1695 		if (!(do_disk & DISK_EXTENDED)) {
1696 			(void) snprintf(obuf, sizeof (obuf),
1697 			    "%11s", one_blank);
1698 			push_out(obuf);
1699 		}
1700 	} else if (do_disk == DISK_ERRORS)
1701 		push_out(two_blanks);
1702 	else
1703 		push_out(one_blank);
1704 	push_out("---- errors --- ");
1705 }
1706 
1707 /*
1708  * Emit the header string when -e is specified.
1709  */
1710 static void
1711 print_disk_header(void)
1712 {
1713 	push_out(disk_header);
1714 }
1715 
1716 /*
1717  * No, UINTMAX_MAX isn't the right thing here since
1718  * it is #defined to be either INT32_MAX or INT64_MAX
1719  * depending on the whether _LP64 is defined.
1720  *
1721  * We want to handle the odd future case of having
1722  * ulonglong_t be more than 64 bits but we have
1723  * no nice #define MAX value we can drop in place
1724  * without having to change this code in the future.
1725  */
1726 
1727 u_longlong_t
1728 ull_delta(u_longlong_t old, u_longlong_t new)
1729 {
1730 	if (new >= old)
1731 		return (new - old);
1732 	else
1733 		return ((UINT64_MAX - old) + new + 1);
1734 }
1735 
1736 /*
1737  * Take the difference of an unsigned 32
1738  * bit int attempting to cater for
1739  * overflow.
1740  */
1741 uint_t
1742 u32_delta(uint_t old, uint_t new)
1743 {
1744 	if (new >= old)
1745 		return (new - old);
1746 	else
1747 		return ((UINT32_MAX - old) + new + 1);
1748 }
1749 
1750 /*
1751  * This is exactly what is needed for standard iostat output,
1752  * but make sure to use it only for that
1753  */
1754 #define	EPSILON	(0.1)
1755 static int
1756 fzero(double value)
1757 {
1758 	return (value >= 0.0 && value < EPSILON);
1759 }
1760 
1761 static int
1762 safe_strtoi(char const *val, char *errmsg)
1763 {
1764 	char *end;
1765 	long tmp;
1766 
1767 	errno = 0;
1768 	tmp = strtol(val, &end, 10);
1769 	if (*end != '\0' || errno)
1770 		fail(0, "%s %s", errmsg, val);
1771 	return ((int)tmp);
1772 }
1773