xref: /freebsd/lib/libdevstat/devstat.c (revision 0640d357f29fb1c0daaaffadd0416c5981413afd)
1 /*
2  * Copyright (c) 1997, 1998 Kenneth D. Merry.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *	$Id: devstat.c,v 1.3 1998/09/20 00:11:09 ken Exp $
29  */
30 
31 #include <sys/types.h>
32 #include <sys/sysctl.h>
33 #include <sys/errno.h>
34 #include <sys/dkstat.h>
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include "devstat.h"
43 
44 char devstat_errbuf[DEVSTAT_ERRBUF_SIZE];
45 
46 /*
47  * Table to match descriptive strings with device types.  These are in
48  * order from most common to least common to speed search time.
49  */
50 struct devstat_match_table match_table[] = {
51 	{"da",		DEVSTAT_TYPE_DIRECT,	DEVSTAT_MATCH_TYPE},
52 	{"cd",		DEVSTAT_TYPE_CDROM,	DEVSTAT_MATCH_TYPE},
53 	{"scsi",	DEVSTAT_TYPE_IF_SCSI,	DEVSTAT_MATCH_IF},
54 	{"ide",		DEVSTAT_TYPE_IF_IDE,	DEVSTAT_MATCH_IF},
55 	{"other",	DEVSTAT_TYPE_IF_OTHER,	DEVSTAT_MATCH_IF},
56 	{"worm",	DEVSTAT_TYPE_WORM,	DEVSTAT_MATCH_TYPE},
57 	{"sa",		DEVSTAT_TYPE_SEQUENTIAL,DEVSTAT_MATCH_TYPE},
58 	{"pass",	DEVSTAT_TYPE_PASS,	DEVSTAT_MATCH_PASS},
59 	{"optical",	DEVSTAT_TYPE_OPTICAL,	DEVSTAT_MATCH_TYPE},
60 	{"array",	DEVSTAT_TYPE_STORARRAY,	DEVSTAT_MATCH_TYPE},
61 	{"changer",	DEVSTAT_TYPE_CHANGER,	DEVSTAT_MATCH_TYPE},
62 	{"scanner",	DEVSTAT_TYPE_SCANNER,	DEVSTAT_MATCH_TYPE},
63 	{"printer",	DEVSTAT_TYPE_PRINTER,	DEVSTAT_MATCH_TYPE},
64 	{"floppy",	DEVSTAT_TYPE_FLOPPY,	DEVSTAT_MATCH_TYPE},
65 	{"proc",	DEVSTAT_TYPE_PROCESSOR,	DEVSTAT_MATCH_TYPE},
66 	{"comm",	DEVSTAT_TYPE_COMM,	DEVSTAT_MATCH_TYPE},
67 	{"enclosure",	DEVSTAT_TYPE_ENCLOSURE,	DEVSTAT_MATCH_TYPE},
68 	{NULL,		0,			0}
69 };
70 
71 /*
72  * Local function declarations.
73  */
74 static int compare_select(const void *arg1, const void *arg2);
75 
76 int
77 getnumdevs(void)
78 {
79 	size_t numdevsize;
80 	int numdevs;
81 	char *func_name = "getnumdevs";
82 
83 	numdevsize = sizeof(int);
84 
85 	/*
86 	 * Find out how many devices we have in the system.
87 	 */
88 	if (sysctlbyname("kern.devstat.numdevs", &numdevs,
89 			 &numdevsize, NULL, 0) == -1) {
90 		sprintf(devstat_errbuf, "%s: error getting number of devices\n"
91 			"%s: %s", func_name, func_name, strerror(errno));
92 		return(-1);
93 	} else
94 		return(numdevs);
95 }
96 
97 /*
98  * This is an easy way to get the generation number, but the generation is
99  * supplied in a more atmoic manner by the kern.devstat.all sysctl.
100  * Because this generation sysctl is separate from the statistics sysctl,
101  * the device list and the generation could change between the time that
102  * this function is called and the device list is retreived.
103  */
104 long
105 getgeneration(void)
106 {
107 	size_t gensize;
108 	long generation;
109 	char *func_name = "getgeneration";
110 
111 	gensize = sizeof(long);
112 
113 	/*
114 	 * Get the current generation number.
115 	 */
116 	if (sysctlbyname("kern.devstat.generation", &generation,
117 			 &gensize, NULL, 0) == -1) {
118 		sprintf(devstat_errbuf,"%s: error getting devstat generation\n"
119 			"%s: %s", func_name, func_name, strerror(errno));
120 		return(-1);
121 	} else
122 		return(generation);
123 }
124 
125 /*
126  * Get the current devstat version.  The return value of this function
127  * should be compared with DEVSTAT_VERSION, which is defined in
128  * sys/devicestat.h.  This will enable userland programs to determine
129  * whether they are out of sync with the kernel.
130  */
131 int
132 getversion(void)
133 {
134 	size_t versize;
135 	int version;
136 	char *func_name = "getversion";
137 
138 	versize = sizeof(int);
139 
140 	/*
141 	 * Get the current devstat version.
142 	 */
143 	if (sysctlbyname("kern.devstat.version", &version, &versize,
144 			 NULL, 0) == -1) {
145 		sprintf(devstat_errbuf, "%s: error getting devstat version\n"
146 			"%s: %s", func_name, func_name, strerror(errno));
147 		return(-1);
148 	} else
149 		return(version);
150 }
151 
152 /*
153  * Check the devstat version we know about against the devstat version the
154  * kernel knows about.  If they don't match, print an error into the
155  * devstat error buffer, and return -1.  If they match, return 0.
156  */
157 int
158 checkversion(void)
159 {
160 	int retval = 0;
161 	int errlen = 0;
162 	char *func_name = "checkversion";
163 	int version;
164 
165 	version = getversion();
166 
167 	if (version != DEVSTAT_VERSION) {
168 		int buflen = 0;
169 		char tmpstr[256];
170 
171 		/*
172 		 * This is really pretty silly, but basically the idea is
173 		 * that if getversion() returns an error (i.e. -1), then it
174 		 * has printed an error message in the buffer.  Therefore,
175 		 * we need to add a \n to the end of that message before we
176 		 * print our own message in the buffer.
177 		 */
178 		if (version == -1) {
179 			buflen = strlen(devstat_errbuf);
180 			errlen = snprintf(tmpstr, sizeof(tmpstr), "\n");
181 			strncat(devstat_errbuf, tmpstr,
182 				DEVSTAT_ERRBUF_SIZE - buflen - 1);
183 			buflen += errlen;
184 		}
185 
186 		errlen = snprintf(tmpstr, sizeof(tmpstr),
187 				  "%s: userland devstat version %d is not "
188 				  "the same as the kernel\n%s: devstat "
189 				  "version %d\n", func_name, DEVSTAT_VERSION,
190 				  func_name, version);
191 
192 		if (version == -1) {
193 			strncat(devstat_errbuf, tmpstr,
194 				DEVSTAT_ERRBUF_SIZE - buflen - 1);
195 			buflen += errlen;
196 		} else {
197 			strncpy(devstat_errbuf, tmpstr, DEVSTAT_ERRBUF_SIZE);
198 			devstat_errbuf[DEVSTAT_ERRBUF_SIZE - 1] = '\0';
199 		}
200 
201                 if (version < DEVSTAT_VERSION)
202 			snprintf(tmpstr, sizeof(tmpstr),
203 				 "%s: you really should know better"
204 				 " than to upgrade your\n%s: "
205 				 "userland binaries without "
206 				 "upgrading your kernel",
207 				 func_name, func_name);
208                 else
209 			snprintf(tmpstr, sizeof(tmpstr),
210 				 "%s: you really should know better"
211 				 " than to upgrade your kernel "
212 				 "without\n%s: upgrading your "
213 				 "userland binaries",
214 				 func_name, func_name);
215 
216 		strncat(devstat_errbuf, tmpstr,
217 			DEVSTAT_ERRBUF_SIZE - buflen - 1);
218 
219 		retval = -1;
220 	}
221 
222 	return(retval);
223 }
224 
225 /*
226  * Get the current list of devices and statistics, and the current
227  * generation number.
228  *
229  * Return values:
230  * -1  -- error
231  *  0  -- device list is unchanged
232  *  1  -- device list has changed
233  */
234 int
235 getdevs(struct statinfo *stats)
236 {
237 	int error;
238 	size_t dssize;
239 	int oldnumdevs;
240 	long oldgeneration;
241 	int retval = 0;
242 	struct devinfo *dinfo;
243 	char *func_name = "getdevs";
244 
245 	dinfo = stats->dinfo;
246 
247 	if (dinfo == NULL) {
248 		sprintf(devstat_errbuf, "%s: stats->dinfo was NULL", func_name);
249 		return(-1);
250 	}
251 
252 	oldnumdevs = dinfo->numdevs;
253 	oldgeneration = dinfo->generation;
254 
255 	/*
256 	 * If this is our first time through, mem_ptr will be null.
257 	 */
258 	if (dinfo->mem_ptr == NULL) {
259 		/*
260 		 * Get the number of devices.  If it's negative, it's an
261 		 * error.  Don't bother setting the error string, since
262 		 * getnumdevs() has already done that for us.
263 		 */
264 		if ((dinfo->numdevs = getnumdevs()) < 0)
265 			return(-1);
266 
267 		/*
268 		 * The kern.devstat.all sysctl returns the current generation
269 		 * number, as well as all the devices.  So we need four
270 		 * bytes more.
271 		 */
272 		dssize =(dinfo->numdevs * sizeof(struct devstat)) +sizeof(long);
273 		dinfo->mem_ptr = (u_int8_t *)malloc(dssize);
274 	} else
275 		dssize =(dinfo->numdevs * sizeof(struct devstat)) +sizeof(long);
276 
277 	/* Get the current time when we get the stats */
278 	gettimeofday(&stats->busy_time, NULL);
279 
280 	/*
281 	 * Request all of the devices.  We only really allow for one
282 	 * ENOMEM failure.  It would, of course, be possible to just go in
283 	 * a loop and keep reallocing the device structure until we don't
284 	 * get ENOMEM back.  I'm not sure it's worth it, though.  If
285 	 * devices are being added to the system that quickly, maybe the
286 	 * user can just wait until all devices are added.
287 	 */
288 	if ((error = sysctlbyname("kern.devstat.all", dinfo->mem_ptr,
289 	     &dssize, NULL, 0)) == -1) {
290 		/*
291 		 * If we get ENOMEM back, that means that there are
292 		 * more devices now, so we need to allocate more
293 		 * space for the device array.
294 		 */
295 		if (errno == ENOMEM) {
296 			/*
297 			 * No need to set the error string here, getnumdevs()
298 			 * will do that if it fails.
299 			 */
300 			if ((dinfo->numdevs = getnumdevs()) < 0)
301 				return(-1);
302 
303 			dssize = (dinfo->numdevs * sizeof(struct devstat)) +
304 				sizeof(long);
305 			dinfo->mem_ptr = (u_int8_t *)realloc(dinfo->mem_ptr,
306 							     dssize);
307 			if ((error = sysctlbyname("kern.devstat.all",
308 			    dinfo->mem_ptr, &dssize, NULL, 0)) == -1) {
309 				sprintf(devstat_errbuf,
310 					"%s: error getting device stats\n"
311 					"%s: %s", func_name, func_name,
312 					strerror(errno));
313 				return(-1);
314 			}
315 		} else {
316 			sprintf(devstat_errbuf,
317 				"%s: error getting device stats\n"
318 				"%s: %s", func_name, func_name,
319 				strerror(errno));
320 			return(-1);
321 		}
322 	}
323 
324 	/*
325 	 * The sysctl spits out the generation as the first four bytes,
326 	 * then all of the device statistics structures.
327 	 */
328 	dinfo->generation = *(long *)dinfo->mem_ptr;
329 
330 	/*
331 	 * If the generation has changed, and if the current number of
332 	 * devices is not the same as the number of devices recorded in the
333 	 * devinfo structure, it is likely that the device list has shrunk.
334 	 * The reason that it is likely that the device list has shrunk in
335 	 * this case is that if the device list has grown, the sysctl above
336 	 * will return an ENOMEM error, and we will reset the number of
337 	 * devices and reallocate the device array.  If the second sysctl
338 	 * fails, we will return an error and therefore never get to this
339 	 * point.  If the device list has shrunk, the sysctl will not
340 	 * return an error since we have more space allocated than is
341 	 * necessary.  So, in the shrinkage case, we catch it here and
342 	 * reallocate the array so that we don't use any more space than is
343 	 * necessary.
344 	 */
345 	if (oldgeneration != dinfo->generation) {
346 		if (getnumdevs() != dinfo->numdevs) {
347 			if ((dinfo->numdevs = getnumdevs()) < 0)
348 				return(-1);
349 			dssize = (dinfo->numdevs * sizeof(struct devstat)) +
350 				sizeof(long);
351 			dinfo->mem_ptr = (u_int8_t *)realloc(dinfo->mem_ptr,
352 							     dssize);
353 		}
354 		retval = 1;
355 	}
356 
357 	dinfo->devices = (struct devstat *)(dinfo->mem_ptr + sizeof(long));
358 
359 	return(retval);
360 }
361 
362 /*
363  * selectdevs():
364  *
365  * Devices are selected/deselected based upon the following criteria:
366  * - devices specified by the user on the command line
367  * - devices matching any device type expressions given on the command line
368  * - devices with the highest I/O, if 'top' mode is enabled
369  * - the first n unselected devices in the device list, if maxshowdevs
370  *   devices haven't already been selected and if the user has not
371  *   specified any devices on the command line and if we're in "add" mode.
372  *
373  * Input parameters:
374  * - device selection list (dev_select)
375  * - current number of devices selected (num_selected)
376  * - total number of devices in the selection list (num_selections)
377  * - devstat generation as of the last time selectdevs() was called
378  *   (select_generation)
379  * - current devstat generation (current_generation)
380  * - current list of devices and statistics (devices)
381  * - number of devices in the current device list (numdevs)
382  * - compiled version of the command line device type arguments (matches)
383  *   - This is optional.  If the number of devices is 0, this will be ignored.
384  *   - The matching code pays attention to the current selection mode.  So
385  *     if you pass in a matching expression, it will be evaluated based
386  *     upon the selection mode that is passed in.  See below for details.
387  * - number of device type matching expressions (num_matches)
388  *   - Set to 0 to disable the matching code.
389  * - list of devices specified on the command line by the user (dev_selections)
390  * - number of devices selected on the command line by the user
391  *   (num_dev_selections)
392  * - Our selection mode.  There are four different selection modes:
393  *      - add mode.  (DS_SELECT_ADD) Any devices matching devices explicitly
394  *        selected by the user or devices matching a pattern given by the
395  *        user will be selected in addition to devices that are already
396  *        selected.  Additional devices will be selected, up to maxshowdevs
397  *        number of devices.
398  *      - only mode. (DS_SELECT_ONLY)  Only devices matching devices
399  *        explicitly given by the user or devices matching a pattern
400  *        given by the user will be selected.  No other devices will be
401  *        selected.
402  *      - addonly mode.  (DS_SELECT_ADDONLY)  This is similar to add and
403  *        only.  Basically, this will not de-select any devices that are
404  *        current selected, as only mode would, but it will also not
405  *        gratuitously select up to maxshowdevs devices as add mode would.
406  *      - remove mode.  (DS_SELECT_REMOVE)  Any devices matching devices
407  *        explicitly selected by the user or devices matching a pattern
408  *        given by the user will be de-selected.
409  * - maximum number of devices we can select (maxshowdevs)
410  * - flag indicating whether or not we're in 'top' mode (perf_select)
411  *
412  * Output data:
413  * - the device selection list may be modified and passed back out
414  * - the number of devices selected and the total number of items in the
415  *   device selection list may be changed
416  * - the selection generation may be changed to match the current generation
417  *
418  * Return values:
419  * -1  -- error
420  *  0  -- selected devices are unchanged
421  *  1  -- selected devices changed
422  */
423 int
424 selectdevs(struct device_selection **dev_select, int *num_selected,
425 	   int *num_selections, long *select_generation,
426 	   long current_generation, struct devstat *devices, int numdevs,
427 	   struct devstat_match *matches, int num_matches,
428 	   char **dev_selections, int num_dev_selections,
429 	   devstat_select_mode select_mode, int maxshowdevs, int perf_select)
430 {
431 	register int i, j, k;
432 	int init_selections = 0, init_selected_var = 0;
433 	struct device_selection *old_dev_select = NULL;
434 	int old_num_selections = 0, old_num_selected;
435 	int selection_number = 0;
436 	int changed = 0, found = 0;
437 
438 	if ((dev_select == NULL) || (devices == NULL) || (numdevs <= 0))
439 		return(-1);
440 
441 	/*
442 	 * We always want to make sure that we have as many dev_select
443 	 * entries as there are devices.
444 	 */
445 	/*
446 	 * In this case, we haven't selected devices before.
447 	 */
448 	if (*dev_select == NULL) {
449 		*dev_select = (struct device_selection *)malloc(numdevs *
450 			sizeof(struct device_selection));
451 		*select_generation = current_generation;
452 		init_selections = 1;
453 		changed = 1;
454 	/*
455 	 * In this case, we have selected devices before, but the device
456 	 * list has changed since we last selected devices, so we need to
457 	 * either enlarge or reduce the size of the device selection list.
458 	 */
459 	} else if (*num_selections != numdevs) {
460 		*dev_select = (struct device_selection *)realloc(*dev_select,
461 			numdevs * sizeof(struct device_selection));
462 		*select_generation = current_generation;
463 		init_selections = 1;
464 	/*
465 	 * In this case, we've selected devices before, and the selection
466 	 * list is the same size as it was the last time, but the device
467 	 * list has changed.
468 	 */
469 	} else if (*select_generation < current_generation) {
470 		*select_generation = current_generation;
471 		init_selections = 1;
472 	}
473 
474 	/*
475 	 * If we're in "only" mode, we want to clear out the selected
476 	 * variable since we're going to select exactly what the user wants
477 	 * this time through.
478 	 */
479 	if (select_mode == DS_SELECT_ONLY)
480 		init_selected_var = 1;
481 
482 	/*
483 	 * In all cases, we want to back up the number of selected devices.
484 	 * It is a quick and accurate way to determine whether the selected
485 	 * devices have changed.
486 	 */
487 	old_num_selected = *num_selected;
488 
489 	/*
490 	 * We want to make a backup of the current selection list if
491 	 * the list of devices has changed, or if we're in performance
492 	 * selection mode.  In both cases, we don't want to make a backup
493 	 * if we already know for sure that the list will be different.
494 	 * This is certainly the case if this is our first time through the
495 	 * selection code.
496 	 */
497 	if (((init_selected_var != 0) || (init_selections != 0)
498 	 || (perf_select != 0)) && (changed == 0)){
499 		old_dev_select = (struct device_selection *)malloc(
500 		    *num_selections * sizeof(struct device_selection));
501 		old_num_selections = *num_selections;
502 		bcopy(*dev_select, old_dev_select,
503 		    sizeof(struct device_selection) * *num_selections);
504 	}
505 
506 	if (init_selections != 0) {
507 		bzero(*dev_select, sizeof(struct device_selection) * numdevs);
508 
509 		for (i = 0; i < numdevs; i++) {
510 			(*dev_select)[i].device_number =
511 				devices[i].device_number;
512 			strncpy((*dev_select)[i].device_name,
513 				devices[i].device_name,
514 				DEVSTAT_NAME_LEN);
515 			(*dev_select)[i].device_name[DEVSTAT_NAME_LEN - 1]='\0';
516 			(*dev_select)[i].unit_number = devices[i].unit_number;
517 			(*dev_select)[i].position = i;
518 		}
519 		*num_selections = numdevs;
520 	} else if (init_selected_var != 0) {
521 		for (i = 0; i < numdevs; i++)
522 			(*dev_select)[i].selected = 0;
523 	}
524 
525 	/* we haven't gotten around to selecting anything yet.. */
526 	if ((select_mode == DS_SELECT_ONLY) || (init_selections != 0)
527 	 || (init_selected_var != 0))
528 		*num_selected = 0;
529 
530 	/*
531 	 * Look through any devices the user specified on the command line
532 	 * and see if they match known devices.  If so, select them.
533 	 */
534 	for (i = 0; (i < *num_selections) && (num_dev_selections > 0); i++) {
535 		char tmpstr[80];
536 
537 		snprintf(tmpstr, sizeof(tmpstr), "%s%d",
538 			(*dev_select)[i].device_name,
539 			(*dev_select)[i].unit_number);
540 		for (j = 0; j < num_dev_selections; j++) {
541 			if (strcmp(tmpstr, dev_selections[j]) == 0) {
542 				/*
543 				 * Here we do different things based on the
544 				 * mode we're in.  If we're in add or
545 				 * addonly mode, we only select this device
546 				 * if it hasn't already been selected.
547 				 * Otherwise, we would be unnecessarily
548 				 * changing the selection order and
549 				 * incrementing the selection count.  If
550 				 * we're in only mode, we unconditionally
551 				 * select this device, since in only mode
552 				 * any previous selections are erased and
553 				 * manually specified devices are the first
554 				 * ones to be selected.  If we're in remove
555 				 * mode, we de-select the specified device and
556 				 * decrement the selection count.
557 				 */
558 				switch(select_mode) {
559 				case DS_SELECT_ADD:
560 				case DS_SELECT_ADDONLY:
561 					if ((*dev_select)[i].selected)
562 						break;
563 					/* FALLTHROUGH */
564 				case DS_SELECT_ONLY:
565 					(*dev_select)[i].selected =
566 						++selection_number;
567 					(*num_selected)++;
568 					break;
569 				case DS_SELECT_REMOVE:
570 					(*dev_select)[i].selected = 0;
571 					(*num_selected)--;
572 					/*
573 					 * This isn't passed back out, we
574 					 * just use it to keep track of
575 					 * how many devices we've removed.
576 					 */
577 					num_dev_selections--;
578 					break;
579 				}
580 				break;
581 			}
582 		}
583 	}
584 
585 	/*
586 	 * Go through the user's device type expressions and select devices
587 	 * accordingly.  We only do this if the number of devices already
588 	 * selected is less than the maximum number we can show.
589 	 */
590 	for (i = 0; (i < num_matches) && (*num_selected < maxshowdevs); i++) {
591 		/* We should probably indicate some error here */
592 		if ((matches[i].match_fields == DEVSTAT_MATCH_NONE)
593 		 || (matches[i].num_match_categories <= 0))
594 			continue;
595 
596 		for (j = 0; j < numdevs; j++) {
597 			int num_match_categories;
598 
599 			num_match_categories = matches[i].num_match_categories;
600 
601 			/*
602 			 * Determine whether or not the current device
603 			 * matches the given matching expression.  This if
604 			 * statement consists of three components:
605 			 *   - the device type check
606 			 *   - the device interface check
607 			 *   - the passthrough check
608 			 * If a the matching test is successful, it
609 			 * decrements the number of matching categories,
610 			 * and if we've reached the last element that
611 			 * needed to be matched, the if statement succeeds.
612 			 *
613 			 */
614 			if ((((matches[i].match_fields & DEVSTAT_MATCH_TYPE)!=0)
615 			  && ((devices[j].device_type & DEVSTAT_TYPE_MASK) ==
616 			        (matches[i].device_type & DEVSTAT_TYPE_MASK))
617 			  &&(((matches[i].match_fields & DEVSTAT_MATCH_PASS)!=0)
618 			   || (((matches[i].match_fields &
619 				DEVSTAT_MATCH_PASS) == 0)
620 			    && ((devices[j].device_type &
621 			        DEVSTAT_TYPE_PASS) == 0)))
622 			  && (--num_match_categories == 0))
623 			 || (((matches[i].match_fields & DEVSTAT_MATCH_IF) != 0)
624 			  && ((devices[j].device_type & DEVSTAT_TYPE_IF_MASK) ==
625 			        (matches[i].device_type & DEVSTAT_TYPE_IF_MASK))
626 			  &&(((matches[i].match_fields & DEVSTAT_MATCH_PASS)!=0)
627 			   || (((matches[i].match_fields &
628 				DEVSTAT_MATCH_PASS) == 0)
629 			    && ((devices[j].device_type &
630 				DEVSTAT_TYPE_PASS) == 0)))
631 			  && (--num_match_categories == 0))
632 			 || (((matches[i].match_fields & DEVSTAT_MATCH_PASS)!=0)
633 			  && ((devices[j].device_type & DEVSTAT_TYPE_PASS) != 0)
634 			  && (--num_match_categories == 0))) {
635 
636 				/*
637 				 * This is probably a non-optimal solution
638 				 * to the problem that the devices in the
639 				 * device list will not be in the same
640 				 * order as the devices in the selection
641 				 * array.
642 				 */
643 				for (k = 0; k < numdevs; k++) {
644 					if ((*dev_select)[k].position == j) {
645 						found = 1;
646 						break;
647 					}
648 				}
649 
650 				/*
651 				 * There shouldn't be a case where a device
652 				 * in the device list is not in the
653 				 * selection list...but it could happen.
654 				 */
655 				if (found != 1) {
656 					fprintf(stderr, "selectdevs: couldn't"
657 						" find %s%d in selection "
658 						"list\n",
659 						devices[j].device_name,
660 						devices[j].unit_number);
661 					break;
662 				}
663 
664 				/*
665 				 * We do different things based upon the
666 				 * mode we're in.  If we're in add or only
667 				 * mode, we go ahead and select this device
668 				 * if it hasn't already been selected.  If
669 				 * it has already been selected, we leave
670 				 * it alone so we don't mess up the
671 				 * selection ordering.  Manually specified
672 				 * devices have already been selected, and
673 				 * they have higher priority than pattern
674 				 * matched devices.  If we're in remove
675 				 * mode, we de-select the given device and
676 				 * decrement the selected count.
677 				 */
678 				switch(select_mode) {
679 				case DS_SELECT_ADD:
680 				case DS_SELECT_ADDONLY:
681 				case DS_SELECT_ONLY:
682 					if ((*dev_select)[k].selected != 0)
683 						break;
684 					(*dev_select)[k].selected =
685 						++selection_number;
686 					(*num_selected)++;
687 					break;
688 				case DS_SELECT_REMOVE:
689 					(*dev_select)[k].selected = 0;
690 					(*num_selected)--;
691 					break;
692 				}
693 			}
694 		}
695 	}
696 
697 	/*
698 	 * Here we implement "top" mode.  Devices are sorted in the
699 	 * selection array based on two criteria:  whether or not they are
700 	 * selected (not selection number, just the fact that they are
701 	 * selected!) and the number of bytes in the "bytes" field of the
702 	 * selection structure.  The bytes field generally must be kept up
703 	 * by the user.  In the future, it may be maintained by library
704 	 * functions, but for now the user has to do the work.
705 	 *
706 	 * At first glance, it may seem wrong that we don't go through and
707 	 * select every device in the case where the user hasn't specified
708 	 * any devices or patterns.  In fact, though, it won't make any
709 	 * difference in the device sorting.  In that particular case (i.e.
710 	 * when we're in "add" or "only" mode, and the user hasn't
711 	 * specified anything) the first time through no devices will be
712 	 * selected, so the only criterion used to sort them will be their
713 	 * performance.  The second time through, and every time thereafter,
714 	 * all devices will be selected, so again selection won't matter.
715 	 */
716 	if (perf_select != 0) {
717 
718 		/* Sort the device array by throughput  */
719 		qsort(*dev_select, *num_selections,
720 		      sizeof(struct device_selection),
721 		      compare_select);
722 
723 		if (*num_selected == 0) {
724 			/*
725 			 * Here we select every device in the array, if it
726 			 * isn't already selected.  Because the 'selected'
727 			 * variable in the selection array entries contains
728 			 * the selection order, the devstats routine can show
729 			 * the devices that were selected first.
730 			 */
731 			for (i = 0; i < *num_selections; i++) {
732 				if ((*dev_select)[i].selected == 0) {
733 					(*dev_select)[i].selected =
734 						++selection_number;
735 					(*num_selected)++;
736 				}
737 			}
738 		} else {
739 			selection_number = 0;
740 			for (i = 0; i < *num_selections; i++) {
741 				if ((*dev_select)[i].selected != 0) {
742 					(*dev_select)[i].selected =
743 						++selection_number;
744 				}
745 			}
746 		}
747 	}
748 
749 	/*
750 	 * If we're in the "add" selection mode and if we haven't already
751 	 * selected maxshowdevs number of devices, go through the array and
752 	 * select any unselected devices.  If we're in "only" mode, we
753 	 * obviously don't want to select anything other than what the user
754 	 * specifies.  If we're in "remove" mode, it probably isn't a good
755 	 * idea to go through and select any more devices, since we might
756 	 * end up selecting something that the user wants removed.  Through
757 	 * more complicated logic, we could actually figure this out, but
758 	 * that would probably require combining this loop with the various
759 	 * selections loops above.
760 	 */
761 	if ((select_mode == DS_SELECT_ADD) && (*num_selected < maxshowdevs)) {
762 		for (i = 0; i < *num_selections; i++)
763 			if ((*dev_select)[i].selected == 0) {
764 				(*dev_select)[i].selected = ++selection_number;
765 				(*num_selected)++;
766 			}
767 	}
768 
769 	/*
770 	 * Look at the number of devices that have been selected.  If it
771 	 * has changed, set the changed variable.  Otherwise, if we've
772 	 * made a backup of the selection list, compare it to the current
773 	 * selection list to see if the selected devices have changed.
774 	 */
775 	if ((changed == 0) && (old_num_selected != *num_selected))
776 		changed = 1;
777 	else if ((changed == 0) && (old_dev_select != NULL)) {
778 		/*
779 		 * Now we go through the selection list and we look at
780 		 * it three different ways.
781 		 */
782 		for (i = 0; (i < *num_selections) && (changed == 0) &&
783 		     (i < old_num_selections); i++) {
784 			/*
785 			 * If the device at index i in both the new and old
786 			 * selection arrays has the same device number and
787 			 * selection status, it hasn't changed.  We
788 			 * continue on to the next index.
789 			 */
790 			if (((*dev_select)[i].device_number ==
791 			     old_dev_select[i].device_number)
792 			 && ((*dev_select)[i].selected ==
793 			     old_dev_select[i].selected))
794 				continue;
795 
796 			/*
797 			 * Now, if we're still going through the if
798 			 * statement, the above test wasn't true.  So we
799 			 * check here to see if the device at index i in
800 			 * the current array is the same as the device at
801 			 * index i in the old array.  If it is, that means
802 			 * that its selection number has changed.  Set
803 			 * changed to 1 and exit the loop.
804 			 */
805 			else if ((*dev_select)[i].device_number ==
806 			          old_dev_select[i].device_number) {
807 				changed = 1;
808 				break;
809 			}
810 			/*
811 			 * If we get here, then the device at index i in
812 			 * the current array isn't the same device as the
813 			 * device at index i in the old array.
814 			 */
815 			else {
816 				int found = 0;
817 
818 				/*
819 				 * Search through the old selection array
820 				 * looking for a device with the same
821 				 * device number as the device at index i
822 				 * in the current array.  If the selection
823 				 * status is the same, then we mark it as
824 				 * found.  If the selection status isn't
825 				 * the same, we break out of the loop.
826 				 * Since found isn't set, changed will be
827 				 * set to 1 below.
828 				 */
829 				for (j = 0; j < old_num_selections; j++) {
830 					if (((*dev_select)[i].device_number ==
831 					      old_dev_select[j].device_number)
832 					 && ((*dev_select)[i].selected ==
833 					      old_dev_select[j].selected)){
834 						found = 1;
835 						break;
836 					}
837 					else if ((*dev_select)[i].device_number
838 					    == old_dev_select[j].device_number)
839 						break;
840 				}
841 				if (found == 0)
842 					changed = 1;
843 			}
844 		}
845 	}
846 	if (old_dev_select != NULL)
847 		free(old_dev_select);
848 
849 	return(changed);
850 }
851 
852 /*
853  * Comparison routine for qsort() above.  Note that the comparison here is
854  * backwards -- generally, it should return a value to indicate whether
855  * arg1 is <, =, or > arg2.  Instead, it returns the opposite.  The reason
856  * it returns the opposite is so that the selection array will be sorted in
857  * order of decreasing performance.  We sort on two parameters.  The first
858  * sort key is whether or not one or the other of the devices in question
859  * has been selected.  If one of them has, and the other one has not, the
860  * selected device is automatically more important than the unselected
861  * device.  If neither device is selected, we judge the devices based upon
862  * performance.
863  */
864 static int
865 compare_select(const void *arg1, const void *arg2)
866 {
867 	if ((((struct device_selection *)arg1)->selected)
868 	 && (((struct device_selection *)arg2)->selected == 0))
869 		return(-1);
870 	else if ((((struct device_selection *)arg1)->selected == 0)
871 	      && (((struct device_selection *)arg2)->selected))
872 		return(1);
873 	else if (((struct device_selection *)arg2)->bytes <
874 	         ((struct device_selection *)arg1)->bytes)
875 		return(-1);
876 	else if (((struct device_selection *)arg2)->bytes >
877 		 ((struct device_selection *)arg1)->bytes)
878 		return(1);
879 	else
880 		return(0);
881 }
882 
883 /*
884  * Take a string with the general format "arg1,arg2,arg3", and build a
885  * device matching expression from it.
886  */
887 int
888 buildmatch(char *match_str, struct devstat_match **matches, int *num_matches)
889 {
890 	char *tstr[5];
891 	char **tempstr;
892 	int num_args;
893 	register int i, j;
894 	char *func_name = "buildmatch";
895 
896 	/* We can't do much without a string to parse */
897 	if (match_str == NULL) {
898 		sprintf(devstat_errbuf, "%s: no match expression", func_name);
899 		return(-1);
900 	}
901 
902 	/*
903 	 * Break the (comma delimited) input string out into separate strings.
904 	 */
905 	for (tempstr = tstr, num_args  = 0;
906 	     (*tempstr = strsep(&match_str, ",")) != NULL && (num_args < 5);
907 	     num_args++)
908 		if (**tempstr != '\0')
909 			if (++tempstr >= &tstr[5])
910 				break;
911 
912 	/* The user gave us too many type arguments */
913 	if (num_args > 3) {
914 		sprintf(devstat_errbuf, "%s: too many type arguments",
915 			func_name);
916 		return(-1);
917 	}
918 
919 	/*
920 	 * Since you can't realloc a pointer that hasn't been malloced
921 	 * first, we malloc first and then realloc.
922 	 */
923 	if (*num_matches == 0)
924 		*matches = (struct devstat_match *)malloc(
925 			   sizeof(struct devstat_match));
926 	else
927 		*matches = (struct devstat_match *)realloc(*matches,
928 			  sizeof(struct devstat_match) * (*num_matches + 1));
929 
930 	/* Make sure the current entry is clear */
931 	bzero(&matches[0][*num_matches], sizeof(struct devstat_match));
932 
933 	/*
934 	 * Step through the arguments the user gave us and build a device
935 	 * matching expression from them.
936 	 */
937 	for (i = 0; i < num_args; i++) {
938 		char *tempstr2, *tempstr3;
939 
940 		/*
941 		 * Get rid of leading white space.
942 		 */
943 		tempstr2 = tstr[i];
944 		while (isspace(*tempstr2) && (*tempstr2 != '\0'))
945 			tempstr2++;
946 
947 		/*
948 		 * Get rid of trailing white space.
949 		 */
950 		tempstr3 = &tempstr2[strlen(tempstr2) - 1];
951 
952 		while ((*tempstr3 != '\0') && (tempstr3 > tempstr2)
953 		    && (isspace(*tempstr3))) {
954 			*tempstr3 = '\0';
955 			tempstr3--;
956 		}
957 
958 		/*
959 		 * Go through the match table comparing the user's
960 		 * arguments to known device types, interfaces, etc.
961 		 */
962 		for (j = 0; match_table[j].match_str != NULL; j++) {
963 			/*
964 			 * We do case-insensitive matching, in case someone
965 			 * wants to enter "SCSI" instead of "scsi" or
966 			 * something like that.  Only compare as many
967 			 * characters as are in the string in the match
968 			 * table.  This should help if someone tries to use
969 			 * a super-long match expression.
970 			 */
971 			if (strncasecmp(tempstr2, match_table[j].match_str,
972 			    strlen(match_table[j].match_str)) == 0) {
973 				/*
974 				 * Make sure the user hasn't specified two
975 				 * items of the same type, like "da" and
976 				 * "cd".  One device cannot be both.
977 				 */
978 				if (((*matches)[*num_matches].match_fields &
979 				    match_table[j].match_field) != 0) {
980 					sprintf(devstat_errbuf,
981 						"%s: cannot have more than "
982 						"one match item in a single "
983 						"category", func_name);
984 					return(-1);
985 				}
986 				/*
987 				 * If we've gotten this far, we have a
988 				 * winner.  Set the appropriate fields in
989 				 * the match entry.
990 				 */
991 				(*matches)[*num_matches].match_fields |=
992 					match_table[j].match_field;
993 				(*matches)[*num_matches].device_type |=
994 					match_table[j].type;
995 				(*matches)[*num_matches].num_match_categories++;
996 				break;
997 			}
998 		}
999 		/*
1000 		 * We should have found a match in the above for loop.  If
1001 		 * not, that means the user entered an invalid device type
1002 		 * or interface.
1003 		 */
1004 		if ((*matches)[*num_matches].num_match_categories != (i + 1)) {
1005 			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1006 				"%s: unknown match item \"%s\"", func_name,
1007 				tstr[i]);
1008 			return(-1);
1009 		}
1010 	}
1011 
1012 	(*num_matches)++;
1013 
1014 	return(0);
1015 }
1016 
1017 /*
1018  * Compute a number of device statistics.  Only one field is mandatory, and
1019  * that is "current".  Everything else is optional.  The caller passes in
1020  * pointers to variables to hold the various statistics he desires.  If he
1021  * doesn't want a particular staistic, he should pass in a NULL pointer.
1022  * Return values:
1023  * 0   -- success
1024  * -1  -- failure
1025  */
1026 int
1027 compute_stats(struct devstat *current, struct devstat *previous,
1028 	      long double etime, u_int64_t *total_bytes,
1029 	      u_int64_t *total_transfers, u_int64_t *total_blocks,
1030 	      long double *kb_per_transfer, long double *transfers_per_second,
1031 	      long double *mb_per_second, long double *blocks_per_second,
1032 	      long double *ms_per_transaction)
1033 {
1034 	u_int64_t totalbytes, totaltransfers, totalblocks;
1035 	char *func_name = "compute_stats";
1036 
1037 	/*
1038 	 * current is the only mandatory field.
1039 	 */
1040 	if (current == NULL) {
1041 		sprintf(devstat_errbuf, "%s: current stats structure was NULL",
1042 			func_name);
1043 		return(-1);
1044 	}
1045 
1046 	totalbytes = (current->bytes_written + current->bytes_read) -
1047 		     ((previous) ? (previous->bytes_written +
1048 				    previous->bytes_read) : 0);
1049 
1050 	if (total_bytes)
1051 		*total_bytes = totalbytes;
1052 
1053 	totaltransfers = (current->num_reads +
1054 			  current->num_writes +
1055 			  current->num_other) -
1056 			 ((previous) ?
1057 			  (previous->num_reads +
1058 			   previous->num_writes +
1059 			   previous->num_other) : 0);
1060 	if (total_transfers)
1061 		*total_transfers = totaltransfers;
1062 
1063 	if (transfers_per_second) {
1064 		if (etime > 0.0) {
1065 			*transfers_per_second = totaltransfers;
1066 			*transfers_per_second /= etime;
1067 		} else
1068 			*transfers_per_second = 0.0;
1069 	}
1070 
1071 	if (kb_per_transfer) {
1072 		*kb_per_transfer = totalbytes;
1073 		*kb_per_transfer /= 1024;
1074 		if (totaltransfers > 0)
1075 			*kb_per_transfer /= totaltransfers;
1076 		else
1077 			*kb_per_transfer = 0.0;
1078 	}
1079 
1080 	if (mb_per_second) {
1081 		*mb_per_second = totalbytes;
1082 		*mb_per_second /= 1024 * 1024;
1083 		if (etime > 0.0)
1084 			*mb_per_second /= etime;
1085 		else
1086 			*mb_per_second = 0.0;
1087 	}
1088 
1089 	totalblocks = totalbytes;
1090 	if (current->block_size > 0)
1091 		totalblocks /= current->block_size;
1092 	else
1093 		totalblocks /= 512;
1094 
1095 	if (total_blocks)
1096 		*total_blocks = totalblocks;
1097 
1098 	if (blocks_per_second) {
1099 		*blocks_per_second = totalblocks;
1100 		if (etime > 0.0)
1101 			*blocks_per_second /= etime;
1102 		else
1103 			*blocks_per_second = 0.0;
1104 	}
1105 
1106 	if (ms_per_transaction) {
1107 		if (totaltransfers > 0) {
1108 			*ms_per_transaction = etime;
1109 			*ms_per_transaction /= totaltransfers;
1110 			*ms_per_transaction *= 1000;
1111 		} else
1112 			*ms_per_transaction = 0.0;
1113 	}
1114 
1115 	return(0);
1116 }
1117 
1118 long double
1119 compute_etime(struct timeval cur_time, struct timeval prev_time)
1120 {
1121 	struct timeval busy_time;
1122 	u_int64_t busy_usec;
1123 	long double etime;
1124 
1125 	timersub(&cur_time, &prev_time, &busy_time);
1126 
1127         busy_usec = busy_time.tv_sec;
1128         busy_usec *= 1000000;
1129         busy_usec += busy_time.tv_usec;
1130         etime = busy_usec;
1131         etime /= 1000000;
1132 
1133 	return(etime);
1134 }
1135