xref: /illumos-gate/usr/src/uts/common/sys/kstat.h (revision c0e96d867a045da50bf7e014cab94cb44528a0f5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 #ifndef	_SYS_KSTAT_H
29 #define	_SYS_KSTAT_H
30 
31 /*
32  * Definition of general kernel statistics structures and /dev/kstat ioctls
33  */
34 
35 #include <sys/types.h>
36 #include <sys/time.h>
37 
38 #ifdef	__cplusplus
39 extern "C" {
40 #endif
41 
42 typedef int	kid_t;		/* unique kstat id */
43 
44 /*
45  * Kernel statistics driver (/dev/kstat) ioctls
46  */
47 
48 #define	KSTAT_IOC_BASE		('K' << 8)
49 
50 #define	KSTAT_IOC_CHAIN_ID	KSTAT_IOC_BASE | 0x01
51 #define	KSTAT_IOC_READ		KSTAT_IOC_BASE | 0x02
52 #define	KSTAT_IOC_WRITE		KSTAT_IOC_BASE | 0x03
53 
54 /*
55  * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
56  *
57  *	kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
58  *	kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
59  *	kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
60  */
61 
62 #define	KSTAT_STRLEN	31	/* 30 chars + NULL; must be 16 * n - 1 */
63 
64 /*
65  * The generic kstat header
66  */
67 
68 typedef struct kstat {
69 	/*
70 	 * Fields relevant to both kernel and user
71 	 */
72 	hrtime_t	ks_crtime;	/* creation time (from gethrtime()) */
73 	struct kstat	*ks_next;	/* kstat chain linkage */
74 	kid_t		ks_kid;		/* unique kstat ID */
75 	char		ks_module[KSTAT_STRLEN]; /* provider module name */
76 	uchar_t		ks_resv;	/* reserved, currently just padding */
77 	int		ks_instance;	/* provider module's instance */
78 	char		ks_name[KSTAT_STRLEN]; /* kstat name */
79 	uchar_t		ks_type;	/* kstat data type */
80 	char		ks_class[KSTAT_STRLEN]; /* kstat class */
81 	uchar_t		ks_flags;	/* kstat flags */
82 	void		*ks_data;	/* kstat type-specific data */
83 	uint_t		ks_ndata;	/* # of type-specific data records */
84 	size_t		ks_data_size;	/* total size of kstat data section */
85 	hrtime_t	ks_snaptime;	/* time of last data shapshot */
86 	/*
87 	 * Fields relevant to kernel only
88 	 */
89 	int		(*ks_update)(struct kstat *, int); /* dynamic update */
90 	void		*ks_private;	/* arbitrary provider-private data */
91 	int		(*ks_snapshot)(struct kstat *, void *, int);
92 	void		*ks_lock;	/* protects this kstat's data */
93 } kstat_t;
94 
95 #ifdef _SYSCALL32
96 
97 typedef int32_t kid32_t;
98 
99 typedef struct kstat32 {
100 	/*
101 	 * Fields relevant to both kernel and user
102 	 */
103 	hrtime_t	ks_crtime;
104 	caddr32_t	ks_next;		/* struct kstat pointer */
105 	kid32_t		ks_kid;
106 	char		ks_module[KSTAT_STRLEN];
107 	uint8_t		ks_resv;
108 	int32_t		ks_instance;
109 	char		ks_name[KSTAT_STRLEN];
110 	uint8_t		ks_type;
111 	char		ks_class[KSTAT_STRLEN];
112 	uint8_t		ks_flags;
113 	caddr32_t	ks_data;		/* type-specific data */
114 	uint32_t	ks_ndata;
115 	size32_t	ks_data_size;
116 	hrtime_t	ks_snaptime;
117 	/*
118 	 * Fields relevant to kernel only (only needed here for padding)
119 	 */
120 	int32_t		_ks_update;
121 	caddr32_t	_ks_private;
122 	int32_t		_ks_snapshot;
123 	caddr32_t	_ks_lock;
124 } kstat32_t;
125 
126 #endif	/* _SYSCALL32 */
127 
128 /*
129  * kstat structure and locking strategy
130  *
131  * Each kstat consists of a header section (a kstat_t) and a data section.
132  * The system maintains a set of kstats, protected by kstat_chain_lock.
133  * kstat_chain_lock protects all additions to/deletions from this set,
134  * as well as all changes to kstat headers.  kstat data sections are
135  * *optionally* protected by the per-kstat ks_lock.  If ks_lock is non-NULL,
136  * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
137  * operations on that kstat.  It is up to the kstat provider to decide whether
138  * guaranteeing consistent data to kstat clients is sufficiently important
139  * to justify the locking cost.  Note, however, that most statistic updates
140  * already occur under one of the provider's mutexes, so if the provider sets
141  * ks_lock to point to that mutex, then kstat data locking is free.
142  *
143  * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
144  * data-size races with kstat clients.
145  *
146  * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
147  * in the kstat header so that users don't have to be exposed to all of the
148  * kernel's lock-related data structures.
149  */
150 
151 #if	defined(_KERNEL)
152 
153 #define	KSTAT_ENTER(k)	\
154 	{ kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
155 
156 #define	KSTAT_EXIT(k)	\
157 	{ kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
158 
159 #define	KSTAT_UPDATE(k, rw)		(*(k)->ks_update)((k), (rw))
160 
161 #define	KSTAT_SNAPSHOT(k, buf, rw)	(*(k)->ks_snapshot)((k), (buf), (rw))
162 
163 #endif	/* defined(_KERNEL) */
164 
165 /*
166  * kstat time
167  *
168  * All times associated with kstats (e.g. creation time, snapshot time,
169  * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
170  * as returned by gethrtime().  The accuracy of these timestamps is machine
171  * dependent, but the precision (units) is the same across all platforms.
172  */
173 
174 /*
175  * kstat identity (KID)
176  *
177  * Each kstat is assigned a unique KID (kstat ID) when it is added to the
178  * global kstat chain.  The KID is used as a cookie by /dev/kstat to
179  * request information about the corresponding kstat.  There is also
180  * an identity associated with the entire kstat chain, kstat_chain_id,
181  * which is bumped each time a kstat is added or deleted.  /dev/kstat uses
182  * the chain ID to detect changes in the kstat chain (e.g., a new disk
183  * coming online) between ioctl()s.
184  */
185 
186 /*
187  * kstat module, kstat instance
188  *
189  * ks_module and ks_instance contain the name and instance of the module
190  * that created the kstat.  In cases where there can only be one instance,
191  * ks_instance is 0.  The kernel proper (/kernel/unix) uses "unix" as its
192  * module name.
193  */
194 
195 /*
196  * kstat name
197  *
198  * ks_name gives a meaningful name to a kstat.  The full kstat namespace
199  * is module.instance.name, so the name only need be unique within a
200  * module.  kstat_create() will fail if you try to create a kstat with
201  * an already-used (ks_module, ks_instance, ks_name) triplet.  Spaces are
202  * allowed in kstat names, but strongly discouraged, since they hinder
203  * awk-style processing at user level.
204  */
205 
206 /*
207  * kstat type
208  *
209  * The kstat mechanism provides several flavors of kstat data, defined
210  * below.  The "raw" kstat type is just treated as an array of bytes; you
211  * can use this to export any kind of data you want.
212  *
213  * Some kstat types allow multiple data structures per kstat, e.g.
214  * KSTAT_TYPE_NAMED; others do not.  This is part of the spec for each
215  * kstat data type.
216  *
217  * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES.  To
218  * get this information, read out the standard system kstat "kstat_types".
219  */
220 
221 #define	KSTAT_TYPE_RAW		0	/* can be anything */
222 					/* ks_ndata >= 1 */
223 #define	KSTAT_TYPE_NAMED	1	/* name/value pair */
224 					/* ks_ndata >= 1 */
225 #define	KSTAT_TYPE_INTR		2	/* interrupt statistics */
226 					/* ks_ndata == 1 */
227 #define	KSTAT_TYPE_IO		3	/* I/O statistics */
228 					/* ks_ndata == 1 */
229 #define	KSTAT_TYPE_TIMER	4	/* event timer */
230 					/* ks_ndata >= 1 */
231 
232 #define	KSTAT_NUM_TYPES		5
233 
234 /*
235  * kstat class
236  *
237  * Each kstat can be characterized as belonging to some broad class
238  * of statistics, e.g. disk, tape, net, vm, streams, etc.  This field
239  * can be used as a filter to extract related kstats.  The following
240  * values are currently in use: disk, tape, net, controller, vm, kvm,
241  * hat, streams, kstat, and misc.  (The kstat class encompasses things
242  * like kstat_types.)
243  */
244 
245 /*
246  * kstat flags
247  *
248  * Any of the following flags may be passed to kstat_create().  They are
249  * all zero by default.
250  *
251  *	KSTAT_FLAG_VIRTUAL:
252  *
253  *		Tells kstat_create() not to allocate memory for the
254  *		kstat data section; instead, you will set the ks_data
255  *		field to point to the data you wish to export.  This
256  *		provides a convenient way to export existing data
257  *		structures.
258  *
259  *	KSTAT_FLAG_VAR_SIZE:
260  *
261  *		The size of the kstat you are creating will vary over time.
262  *		For example, you may want to use the kstat mechanism to
263  *		export a linked list.  NOTE: The kstat framework does not
264  *		manage the data section, so all variable-size kstats must be
265  *		virtual kstats.  Moreover, variable-size kstats MUST employ
266  *		kstat data locking to prevent data-size races with kstat
267  *		clients.  See the section on "kstat snapshot" for details.
268  *
269  *	KSTAT_FLAG_WRITABLE:
270  *
271  *		Makes the kstat's data section writable by root.
272  *		The ks_snapshot routine (see below) does not need to check for
273  *		this; permission checking is handled in the kstat driver.
274  *
275  *	KSTAT_FLAG_PERSISTENT:
276  *
277  *		Indicates that this kstat is to be persistent over time.
278  *		For persistent kstats, kstat_delete() simply marks the
279  *		kstat as dormant; a subsequent kstat_create() reactivates
280  *		the kstat.  This feature is provided so that statistics
281  *		are not lost across driver close/open (e.g., raw disk I/O
282  *		on a disk with no mounted partitions.)
283  *		NOTE: Persistent kstats cannot be virtual, since ks_data
284  *		points to garbage as soon as the driver goes away.
285  *
286  * The following flags are maintained by the kstat framework:
287  *
288  *	KSTAT_FLAG_DORMANT:
289  *
290  *		For persistent kstats, indicates that the kstat is in the
291  *		dormant state (e.g., the corresponding device is closed).
292  *
293  *	KSTAT_FLAG_INVALID:
294  *
295  *		This flag is set when a kstat is in a transitional state,
296  *		e.g. between kstat_create() and kstat_install().
297  *		kstat clients must not attempt to access the kstat's data
298  *		if this flag is set.
299  *
300  *	KSTAT_FLAG_LONGSTRINGS:
301  *
302  *		Indicates that this kstat contains long strings (which
303  *		are stored outside of the kstat data section). When copied
304  *		out to user space the string data will be held in the data
305  *		section provided by the user.
306  */
307 
308 #define	KSTAT_FLAG_VIRTUAL		0x01
309 #define	KSTAT_FLAG_VAR_SIZE		0x02
310 #define	KSTAT_FLAG_WRITABLE		0x04
311 #define	KSTAT_FLAG_PERSISTENT		0x08
312 #define	KSTAT_FLAG_DORMANT		0x10
313 #define	KSTAT_FLAG_INVALID		0x20
314 #define	KSTAT_FLAG_LONGSTRINGS		0x40
315 
316 /*
317  * Dynamic update support
318  *
319  * The kstat mechanism allows for an optional ks_update function to update
320  * kstat data.  This is useful for drivers where the underlying device
321  * keeps cheap hardware stats, but extraction is expensive.  Instead of
322  * constantly keeping the kstat data section up to date, you can supply a
323  * ks_update function which updates the kstat's data section on demand.
324  * To take advantage of this feature, simply set the ks_update field before
325  * calling kstat_install().
326  *
327  * The ks_update function, if supplied, must have the following structure:
328  *
329  *	int
330  *	foo_kstat_update(kstat_t *ksp, int rw)
331  *	{
332  *		if (rw == KSTAT_WRITE) {
333  *			... update the native stats from ksp->ks_data;
334  *				return EACCES if you don't support this
335  *		} else {
336  *			... update ksp->ks_data from the native stats
337  *		}
338  *	}
339  *
340  * The ks_update return codes are: 0 for success, EACCES if you don't allow
341  * KSTAT_WRITE, and EIO for any other type of error.
342  *
343  * In general, the ks_update function may need to refer to provider-private
344  * data; for example, it may need a pointer to the provider's raw statistics.
345  * The ks_private field is available for this purpose.  Its use is entirely
346  * at the provider's discretion.
347  *
348  * All variable-size kstats MUST supply a ks_update routine, which computes
349  * and sets ks_data_size (and ks_ndata if that is meaningful), since these
350  * are needed to perform kstat snapshots (see below).
351  *
352  * No kstat locking should be done inside the ks_update routine.  The caller
353  * will already be holding the kstat's ks_lock (to ensure consistent data).
354  */
355 
356 #define	KSTAT_READ	0
357 #define	KSTAT_WRITE	1
358 
359 /*
360  * Kstat snapshot
361  *
362  * In order to get a consistent view of a kstat's data, clients must obey
363  * the kstat's locking strategy.  However, these clients may need to perform
364  * operations on the data which could cause a fault (e.g. copyout()), or
365  * operations which are simply expensive.  Doing so could cause deadlock
366  * (e.g. if you're holding a disk's kstat lock which is ultimately required
367  * to resolve a copyout() fault), performance degradation (since the providers'
368  * activity is serialized at the kstat lock), device timing problems, etc.
369  *
370  * To avoid these problems, kstat data is provided via snapshots.  Taking
371  * a snapshot is a simple process: allocate a wired-down kernel buffer,
372  * acquire the kstat's data lock, copy the data into the buffer ("take the
373  * snapshot"), and release the lock.  This ensures that the kstat's data lock
374  * will be held as briefly as possible, and that no faults will occur while
375  * the lock is held.
376  *
377  * Normally, the snapshot is taken by default_kstat_snapshot(), which
378  * timestamps the data (sets ks_snaptime), copies it, and does a little
379  * massaging to deal with incomplete transactions on i/o kstats.  However,
380  * this routine only works for kstats with contiguous data (the typical case).
381  * If you create a kstat whose data is, say, a linked list, you must provide
382  * your own ks_snapshot routine.  The routine you supply must have the
383  * following prototype (replace "foo" with something appropriate):
384  *
385  *	int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
386  *
387  * The minimal snapshot routine -- one which copies contiguous data that
388  * doesn't need any massaging -- would be this:
389  *
390  *	ksp->ks_snaptime = gethrtime();
391  *	if (rw == KSTAT_WRITE)
392  *		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
393  *	else
394  *		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
395  *	return (0);
396  *
397  * A more illuminating example is taking a snapshot of a linked list:
398  *
399  *	ksp->ks_snaptime = gethrtime();
400  *	if (rw == KSTAT_WRITE)
401  *		return (EACCES);		... See below ...
402  *	for (foo = first_foo; foo; foo = foo->next) {
403  *		bcopy((char *) foo, (char *) buf, sizeof (struct foo));
404  *		buf = ((struct foo *) buf) + 1;
405  *	}
406  *	return (0);
407  *
408  * In the example above, we have decided that we don't want to allow
409  * KSTAT_WRITE access, so we return EACCES if this is attempted.
410  *
411  * The key points are:
412  *
413  *	(1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
414  *	(2) Data gets copied from the kstat to the buffer on KSTAT_READ,
415  *		and from the buffer to the kstat on KSTAT_WRITE.
416  *	(3) ks_snapshot return values are: 0 for success, EACCES if you
417  *		don't allow KSTAT_WRITE, and EIO for any other type of error.
418  *
419  * Named kstats (see section on "Named statistics" below) containing long
420  * strings (KSTAT_DATA_STRING) need special handling.  The kstat driver
421  * assumes that all strings are copied into the buffer after the array of
422  * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
423  * into the copy within the buffer. The default snapshot routine does this,
424  * but overriding routines should contain at least the following:
425  *
426  * if (rw == KSTAT_READ) {
427  * 	kstat_named_t *knp = buf;
428  * 	char *end = knp + ksp->ks_ndata;
429  * 	uint_t i;
430  *
431  * 	... Do the regular copy ...
432  * 	bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata);
433  *
434  * 	for (i = 0; i < ksp->ks_ndata; i++, knp++) {
435  *		if (knp[i].data_type == KSTAT_DATA_STRING &&
436  *		    KSTAT_NAMED_STR_PTR(knp) != NULL) {
437  *			bcopy(KSTAT_NAMED_STR_PTR(knp), end,
438  *			    KSTAT_NAMED_STR_BUFLEN(knp));
439  *			KSTAT_NAMED_STR_PTR(knp) = end;
440  *			end += KSTAT_NAMED_STR_BUFLEN(knp);
441  *		}
442  *	}
443  */
444 
445 /*
446  * Named statistics.
447  *
448  * List of arbitrary name=value statistics.
449  */
450 
451 typedef struct kstat_named {
452 	char	name[KSTAT_STRLEN];	/* name of counter */
453 	uchar_t	data_type;		/* data type */
454 	union {
455 		char		c[16];	/* enough for 128-bit ints */
456 		int32_t		i32;
457 		uint32_t	ui32;
458 		struct {
459 			union {
460 				char 		*ptr;	/* NULL-term string */
461 #if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
462 				caddr32_t	ptr32;
463 #endif
464 				char 		__pad[8]; /* 64-bit padding */
465 			} addr;
466 			uint32_t	len;	/* # bytes for strlen + '\0' */
467 		} str;
468 /*
469  * The int64_t and uint64_t types are not valid for a maximally conformant
470  * 32-bit compilation environment (cc -Xc) using compilers prior to the
471  * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
472  * In these cases, the visibility of i64 and ui64 is only permitted for
473  * 64-bit compilation environments or 32-bit non-maximally conformant
474  * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
475  * C99 ANSI C compilation environment, the long long type is supported.
476  * The _INT64_TYPE is defined by the implementation (see sys/int_types.h).
477  */
478 #if defined(_INT64_TYPE)
479 		int64_t		i64;
480 		uint64_t	ui64;
481 #endif
482 		long		l;
483 		ulong_t		ul;
484 
485 		/* These structure members are obsolete */
486 
487 		longlong_t	ll;
488 		u_longlong_t	ull;
489 		float		f;
490 		double		d;
491 	} value;			/* value of counter */
492 } kstat_named_t;
493 
494 #define	KSTAT_DATA_CHAR		0
495 #define	KSTAT_DATA_INT32	1
496 #define	KSTAT_DATA_UINT32	2
497 #define	KSTAT_DATA_INT64	3
498 #define	KSTAT_DATA_UINT64	4
499 
500 #if !defined(_LP64)
501 #define	KSTAT_DATA_LONG		KSTAT_DATA_INT32
502 #define	KSTAT_DATA_ULONG	KSTAT_DATA_UINT32
503 #else
504 #if !defined(_KERNEL)
505 #define	KSTAT_DATA_LONG		KSTAT_DATA_INT64
506 #define	KSTAT_DATA_ULONG	KSTAT_DATA_UINT64
507 #else
508 #define	KSTAT_DATA_LONG		7	/* only visible to the kernel */
509 #define	KSTAT_DATA_ULONG	8	/* only visible to the kernel */
510 #endif	/* !_KERNEL */
511 #endif	/* !_LP64 */
512 
513 /*
514  * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
515  * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
516  * (kstat_named_t)).  ks_data_size in these cases is equal to the sum of the
517  * amount of space required to store the strings (ie, the sum of
518  * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
519  * space required to store the kstat_named_t's.
520  *
521  * The default update routine will update ks_data_size automatically for
522  * variable-length kstats containing long strings (using the default update
523  * routine only makes sense if the string is the only thing that is changing
524  * in size, and ks_ndata is constant).  Fixed-length kstats containing long
525  * strings must explicitly change ks_data_size (after creation but before
526  * initialization) to reflect the correct amount of space required for the
527  * long strings and the kstat_named_t's.
528  */
529 #define	KSTAT_DATA_STRING	9
530 
531 /* These types are obsolete */
532 
533 #define	KSTAT_DATA_LONGLONG	KSTAT_DATA_INT64
534 #define	KSTAT_DATA_ULONGLONG	KSTAT_DATA_UINT64
535 #define	KSTAT_DATA_FLOAT	5
536 #define	KSTAT_DATA_DOUBLE	6
537 
538 #define	KSTAT_NAMED_PTR(kptr)	((kstat_named_t *)(kptr)->ks_data)
539 
540 /*
541  * Retrieve the pointer of the string contained in the given named kstat.
542  */
543 #define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
544 
545 /*
546  * Retrieve the length of the buffer required to store the string in the given
547  * named kstat.
548  */
549 #define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
550 
551 /*
552  * Interrupt statistics.
553  *
554  * An interrupt is a hard interrupt (sourced from the hardware device
555  * itself), a soft interrupt (induced by the system via the use of
556  * some system interrupt source), a watchdog interrupt (induced by
557  * a periodic timer call), spurious (an interrupt entry point was
558  * entered but there was no interrupt condition to service),
559  * or multiple service (an interrupt condition was detected and
560  * serviced just prior to returning from any of the other types).
561  *
562  * Measurement of the spurious class of interrupts is useful for
563  * autovectored devices in order to pinpoint any interrupt latency
564  * problems in a particular system configuration.
565  *
566  * Devices that have more than one interrupt of the same
567  * type should use multiple structures.
568  */
569 
570 #define	KSTAT_INTR_HARD			0
571 #define	KSTAT_INTR_SOFT			1
572 #define	KSTAT_INTR_WATCHDOG		2
573 #define	KSTAT_INTR_SPURIOUS		3
574 #define	KSTAT_INTR_MULTSVC		4
575 
576 #define	KSTAT_NUM_INTRS			5
577 
578 typedef struct kstat_intr {
579 	uint_t	intrs[KSTAT_NUM_INTRS];	/* interrupt counters */
580 } kstat_intr_t;
581 
582 #define	KSTAT_INTR_PTR(kptr)	((kstat_intr_t *)(kptr)->ks_data)
583 
584 /*
585  * I/O statistics.
586  */
587 
588 typedef struct kstat_io {
589 
590 	/*
591 	 * Basic counters.
592 	 *
593 	 * The counters should be updated at the end of service
594 	 * (e.g., just prior to calling biodone()).
595 	 */
596 
597 	u_longlong_t	nread;		/* number of bytes read */
598 	u_longlong_t	nwritten;	/* number of bytes written */
599 	uint_t		reads;		/* number of read operations */
600 	uint_t		writes;		/* number of write operations */
601 
602 	/*
603 	 * Accumulated time and queue length statistics.
604 	 *
605 	 * Accumulated time statistics are kept as a running sum
606 	 * of "active" time.  Queue length statistics are kept as a
607 	 * running sum of the product of queue length and elapsed time
608 	 * at that length -- i.e., a Riemann sum for queue length
609 	 * integrated against time.  (You can also think of the active time
610 	 * as a Riemann sum, for the boolean function (queue_length > 0)
611 	 * integrated against time, or you can think of it as the
612 	 * Lebesgue measure of the set on which queue_length > 0.)
613 	 *
614 	 *		^
615 	 *		|			_________
616 	 *		8			| i4	|
617 	 *		|			|	|
618 	 *	Queue	6			|	|
619 	 *	Length	|	_________	|	|
620 	 *		4	| i2	|_______|	|
621 	 *		|	|	    i3		|
622 	 *		2_______|			|
623 	 *		|    i1				|
624 	 *		|_______________________________|
625 	 *		Time->	t1	t2	t3	t4
626 	 *
627 	 * At each change of state (entry or exit from the queue),
628 	 * we add the elapsed time (since the previous state change)
629 	 * to the active time if the queue length was non-zero during
630 	 * that interval; and we add the product of the elapsed time
631 	 * times the queue length to the running length*time sum.
632 	 *
633 	 * This method is generalizable to measuring residency
634 	 * in any defined system: instead of queue lengths, think
635 	 * of "outstanding RPC calls to server X".
636 	 *
637 	 * A large number of I/O subsystems have at least two basic
638 	 * "lists" of transactions they manage: one for transactions
639 	 * that have been accepted for processing but for which processing
640 	 * has yet to begin, and one for transactions which are actively
641 	 * being processed (but not done). For this reason, two cumulative
642 	 * time statistics are defined here: wait (pre-service) time,
643 	 * and run (service) time.
644 	 *
645 	 * All times are 64-bit nanoseconds (hrtime_t), as returned by
646 	 * gethrtime().
647 	 *
648 	 * The units of cumulative busy time are accumulated nanoseconds.
649 	 * The units of cumulative length*time products are elapsed time
650 	 * times queue length.
651 	 *
652 	 * Updates to the fields below are performed implicitly by calls to
653 	 * these five functions:
654 	 *
655 	 *	kstat_waitq_enter()
656 	 *	kstat_waitq_exit()
657 	 *	kstat_runq_enter()
658 	 *	kstat_runq_exit()
659 	 *
660 	 *	kstat_waitq_to_runq()		(see below)
661 	 *	kstat_runq_back_to_waitq()	(see below)
662 	 *
663 	 * Since kstat_waitq_exit() is typically followed immediately
664 	 * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
665 	 * function which performs both operations.  This is a performance
666 	 * win since only one timestamp is required.
667 	 *
668 	 * In some instances, it may be necessary to move a request from
669 	 * the run queue back to the wait queue, e.g. for write throttling.
670 	 * For these situations, call kstat_runq_back_to_waitq().
671 	 *
672 	 * These fields should never be updated by any other means.
673 	 */
674 
675 	hrtime_t wtime;		/* cumulative wait (pre-service) time */
676 	hrtime_t wlentime;	/* cumulative wait length*time product */
677 	hrtime_t wlastupdate;	/* last time wait queue changed */
678 	hrtime_t rtime;		/* cumulative run (service) time */
679 	hrtime_t rlentime;	/* cumulative run length*time product */
680 	hrtime_t rlastupdate;	/* last time run queue changed */
681 
682 	uint_t	wcnt;		/* count of elements in wait state */
683 	uint_t	rcnt;		/* count of elements in run state */
684 
685 } kstat_io_t;
686 
687 #define	KSTAT_IO_PTR(kptr)	((kstat_io_t *)(kptr)->ks_data)
688 
689 /*
690  * Event timer statistics - cumulative elapsed time and number of events.
691  *
692  * Updates to these fields are performed implicitly by calls to
693  * kstat_timer_start() and kstat_timer_stop().
694  */
695 
696 typedef struct kstat_timer {
697 	char		name[KSTAT_STRLEN];	/* event name */
698 	uchar_t		resv;			/* reserved */
699 	u_longlong_t	num_events;		/* number of events */
700 	hrtime_t	elapsed_time;		/* cumulative elapsed time */
701 	hrtime_t	min_time;		/* shortest event duration */
702 	hrtime_t	max_time;		/* longest event duration */
703 	hrtime_t	start_time;		/* previous event start time */
704 	hrtime_t	stop_time;		/* previous event stop time */
705 } kstat_timer_t;
706 
707 #define	KSTAT_TIMER_PTR(kptr)	((kstat_timer_t *)(kptr)->ks_data)
708 
709 #if	defined(_KERNEL) || defined(_FAKE_KERNEL)
710 
711 #include <sys/t_lock.h>
712 
713 extern kid_t	kstat_chain_id;		/* bumped at each state change */
714 extern void	kstat_init(void);	/* initialize kstat framework */
715 
716 /*
717  * Adding and deleting kstats.
718  *
719  * The typical sequence to add a kstat is:
720  *
721  *	ksp = kstat_create(module, instance, name, class, type, ndata, flags);
722  *	if (ksp) {
723  *		... provider initialization, if necessary
724  *		kstat_install(ksp);
725  *	}
726  *
727  * There are three logically distinct steps here:
728  *
729  * Step 1: System Initialization (kstat_create)
730  *
731  * kstat_create() performs system initialization.  kstat_create()
732  * allocates memory for the entire kstat (header plus data), initializes
733  * all header fields, initializes the data section to all zeroes, assigns
734  * a unique KID, and puts the kstat onto the system's kstat chain.
735  * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
736  * because the provider (caller) has not yet had a chance to initialize
737  * the data section.
738  *
739  * By default, kstats are exported to all zones on the system.  A kstat may be
740  * created via kstat_create_zone() to specify a zone to which the statistics
741  * should be exported.  kstat_zone_add() may be used to specify additional
742  * zones to which the statistics are to be exported.
743  *
744  * Step 2: Provider Initialization
745  *
746  * The provider performs any necessary initialization of the data section,
747  * e.g. setting the name fields in a KSTAT_TYPE_NAMED.  Virtual kstats set
748  * the ks_data field at this time.  The provider may also set the ks_update,
749  * ks_snapshot, ks_private, and ks_lock fields if necessary.
750  *
751  * Step 3: Installation (kstat_install)
752  *
753  * Once the kstat is completely initialized, kstat_install() clears the
754  * INVALID flag, thus making the kstat accessible to the outside world.
755  * kstat_install() also clears the DORMANT flag for persistent kstats.
756  *
757  * Removing a kstat from the system
758  *
759  * kstat_delete(ksp) removes ksp from the kstat chain and frees all
760  * associated system resources.  NOTE: When you call kstat_delete(),
761  * you must NOT be holding that kstat's ks_lock.  Otherwise, you may
762  * deadlock with a kstat reader.
763  *
764  * Persistent kstats
765  *
766  * From the provider's point of view, persistence is transparent.  The only
767  * difference between ephemeral (normal) kstats and persistent kstats
768  * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create().  Magically,
769  * this has the effect of making your data visible even when you're
770  * not home.  Persistence is important to tools like iostat, which want
771  * to get a meaningful picture of disk activity.  Without persistence,
772  * raw disk i/o statistics could never accumulate: they would come and
773  * go with each open/close of the raw device.
774  *
775  * The magic of persistence works by slightly altering the behavior of
776  * kstat_create() and kstat_delete().  The first call to kstat_create()
777  * creates a new kstat, as usual.  However, kstat_delete() does not
778  * actually delete the kstat: it performs one final update of the data
779  * (i.e., calls the ks_update routine), marks the kstat as dormant, and
780  * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
781  * to their default values (since they might otherwise point to garbage,
782  * e.g. if the provider is going away).  kstat clients can still access
783  * the dormant kstat just like a live kstat; they just continue to see
784  * the final data values as long as the kstat remains dormant.
785  * All subsequent kstat_create() calls simply find the already-existing,
786  * dormant kstat and return a pointer to it, without altering any fields.
787  * The provider then performs its usual initialization sequence, and
788  * calls kstat_install().  kstat_install() uses the old data values to
789  * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
790  * thus making it seem like you were never gone.
791  */
792 
793 extern kstat_t *kstat_create(const char *, int, const char *, const char *,
794     uchar_t, uint_t, uchar_t);
795 extern kstat_t *kstat_create_zone(const char *, int, const char *,
796     const char *, uchar_t, uint_t, uchar_t, zoneid_t);
797 extern void kstat_install(kstat_t *);
798 extern void kstat_delete(kstat_t *);
799 extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
800 extern void kstat_set_string(char *, const char *);
801 extern void kstat_delete_byname(const char *, int, const char *);
802 extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
803 extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
804 extern void kstat_timer_init(kstat_timer_t *, const char *);
805 extern void kstat_waitq_enter(kstat_io_t *);
806 extern void kstat_waitq_exit(kstat_io_t *);
807 extern void kstat_runq_enter(kstat_io_t *);
808 extern void kstat_runq_exit(kstat_io_t *);
809 extern void kstat_waitq_to_runq(kstat_io_t *);
810 extern void kstat_runq_back_to_waitq(kstat_io_t *);
811 extern void kstat_timer_start(kstat_timer_t *);
812 extern void kstat_timer_stop(kstat_timer_t *);
813 
814 extern void kstat_zone_add(kstat_t *, zoneid_t);
815 extern void kstat_zone_remove(kstat_t *, zoneid_t);
816 extern int kstat_zone_find(kstat_t *, zoneid_t);
817 
818 extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
819 extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
820 extern void kstat_rele(kstat_t *);
821 
822 #endif	/* defined(_KERNEL) */
823 
824 #ifdef	__cplusplus
825 }
826 #endif
827 
828 #endif	/* _SYS_KSTAT_H */
829