xref: /illumos-gate/usr/src/uts/common/os/kstat_fr.c (revision 354507029a42e4bcb1ea64fc4685f2bfd4792db8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 /*
28  * Kernel statistics framework
29  */
30 
31 #include <sys/types.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
37 #include <sys/errno.h>
38 #include <sys/vmem.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/kstat.h>
42 #include <sys/sysinfo.h>
43 #include <sys/cpuvar.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/dnlc.h>
49 #include <sys/var.h>
50 #include <sys/vmmeter.h>
51 #include <sys/debug.h>
52 #include <sys/kobj.h>
53 #include <sys/avl.h>
54 #include <sys/pool_pset.h>
55 #include <sys/cpupart.h>
56 #include <sys/zone.h>
57 #include <sys/loadavg.h>
58 #include <vm/page.h>
59 #include <vm/anon.h>
60 #include <vm/seg_kmem.h>
61 
62 /*
63  * Global lock to protect the AVL trees and kstat_chain_id.
64  */
65 static kmutex_t kstat_chain_lock;
66 
67 /*
68  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
69  *
70  * (1)	/dev/kstat, to detect changes in the kstat chain across ioctls;
71  *
72  * (2)	kstat_create(), to assign a KID (kstat ID) to each new kstat.
73  *	/dev/kstat uses the KID as a cookie for kstat lookups.
74  *
75  * We reserve the first two IDs because some kstats are created before
76  * the well-known ones (kstat_headers = 0, kstat_types = 1).
77  *
78  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
79  * into a particular kstat, which is logically equivalent to a kstat being
80  * installed/deleted.
81  */
82 
83 kid_t kstat_chain_id = 2;
84 
85 /*
86  * As far as zones are concerned, there are 3 types of kstat:
87  *
88  * 1) Those which have a well-known name, and which should return per-zone data
89  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
90  * is an example of this type of kstat.
91  *
92  * 2) Those which should only be exported to a particular list of zones.
93  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
94  * able to see NFS mounts associated with zone B, while we want the
95  * global zone to be able to see all mounts on the system.
96  *
97  * 3) Those that can be exported to all zones.  Most system-related
98  * kstats fall within this category.
99  *
100  * An ekstat_t thus contains a list of kstats that the zone is to be
101  * exported to.  The lookup of a name:instance:module thus translates to a
102  * lookup of name:instance:module:myzone; if the kstat is not exported
103  * to all zones, and does not have the caller's zoneid explicitly
104  * enumerated in the list of zones to be exported to, it is the same as
105  * if the kstat didn't exist.
106  *
107  * Writing to kstats is currently disallowed from within a non-global
108  * zone, although this restriction could be removed in the future.
109  */
110 typedef struct kstat_zone {
111 	zoneid_t zoneid;
112 	struct kstat_zone *next;
113 } kstat_zone_t;
114 
115 /*
116  * Extended kstat structure -- for internal use only.
117  */
118 typedef struct ekstat {
119 	kstat_t		e_ks;		/* the kstat itself */
120 	size_t		e_size;		/* total allocation size */
121 	kthread_t	*e_owner;	/* thread holding this kstat */
122 	kcondvar_t	e_cv;		/* wait for owner == NULL */
123 	avl_node_t	e_avl_bykid;	/* AVL tree to sort by KID */
124 	avl_node_t	e_avl_byname;	/* AVL tree to sort by name */
125 	kstat_zone_t	e_zone;		/* zone to export stats to */
126 } ekstat_t;
127 
128 static uint64_t kstat_initial[8192];
129 static void *kstat_initial_ptr = kstat_initial;
130 static size_t kstat_initial_avail = sizeof (kstat_initial);
131 static vmem_t *kstat_arena;
132 
133 #define	KSTAT_ALIGN	(sizeof (uint64_t))
134 
135 static avl_tree_t kstat_avl_bykid;
136 static avl_tree_t kstat_avl_byname;
137 
138 /*
139  * Various pointers we need to create kstats at boot time in kstat_init()
140  */
141 extern	kstat_named_t	*segmapcnt_ptr;
142 extern	uint_t		segmapcnt_ndata;
143 extern	int		segmap_kstat_update(kstat_t *, int);
144 extern	kstat_named_t	*biostats_ptr;
145 extern	uint_t		biostats_ndata;
146 extern	kstat_named_t	*pollstats_ptr;
147 extern	uint_t		pollstats_ndata;
148 
149 extern	int	vac;
150 extern	uint_t	nproc;
151 extern	time_t	boot_time;
152 extern	sysinfo_t	sysinfo;
153 extern	vminfo_t	vminfo;
154 
155 struct {
156 	kstat_named_t ncpus;
157 	kstat_named_t lbolt;
158 	kstat_named_t deficit;
159 	kstat_named_t clk_intr;
160 	kstat_named_t vac;
161 	kstat_named_t nproc;
162 	kstat_named_t avenrun_1min;
163 	kstat_named_t avenrun_5min;
164 	kstat_named_t avenrun_15min;
165 	kstat_named_t boot_time;
166 } system_misc_kstat = {
167 	{ "ncpus",		KSTAT_DATA_UINT32 },
168 	{ "lbolt",		KSTAT_DATA_UINT32 },
169 	{ "deficit",		KSTAT_DATA_UINT32 },
170 	{ "clk_intr",		KSTAT_DATA_UINT32 },
171 	{ "vac",		KSTAT_DATA_UINT32 },
172 	{ "nproc",		KSTAT_DATA_UINT32 },
173 	{ "avenrun_1min",	KSTAT_DATA_UINT32 },
174 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
175 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
176 	{ "boot_time",		KSTAT_DATA_UINT32 },
177 };
178 
179 struct {
180 	kstat_named_t physmem;
181 	kstat_named_t nalloc;
182 	kstat_named_t nfree;
183 	kstat_named_t nalloc_calls;
184 	kstat_named_t nfree_calls;
185 	kstat_named_t kernelbase;
186 	kstat_named_t econtig;
187 	kstat_named_t freemem;
188 	kstat_named_t availrmem;
189 	kstat_named_t lotsfree;
190 	kstat_named_t desfree;
191 	kstat_named_t minfree;
192 	kstat_named_t fastscan;
193 	kstat_named_t slowscan;
194 	kstat_named_t nscan;
195 	kstat_named_t desscan;
196 	kstat_named_t pp_kernel;
197 	kstat_named_t pagesfree;
198 	kstat_named_t pageslocked;
199 	kstat_named_t pagestotal;
200 } system_pages_kstat = {
201 	{ "physmem",		KSTAT_DATA_ULONG },
202 	{ "nalloc",		KSTAT_DATA_ULONG },
203 	{ "nfree",		KSTAT_DATA_ULONG },
204 	{ "nalloc_calls",	KSTAT_DATA_ULONG },
205 	{ "nfree_calls",	KSTAT_DATA_ULONG },
206 	{ "kernelbase",		KSTAT_DATA_ULONG },
207 	{ "econtig", 		KSTAT_DATA_ULONG },
208 	{ "freemem", 		KSTAT_DATA_ULONG },
209 	{ "availrmem", 		KSTAT_DATA_ULONG },
210 	{ "lotsfree", 		KSTAT_DATA_ULONG },
211 	{ "desfree", 		KSTAT_DATA_ULONG },
212 	{ "minfree", 		KSTAT_DATA_ULONG },
213 	{ "fastscan", 		KSTAT_DATA_ULONG },
214 	{ "slowscan", 		KSTAT_DATA_ULONG },
215 	{ "nscan", 		KSTAT_DATA_ULONG },
216 	{ "desscan", 		KSTAT_DATA_ULONG },
217 	{ "pp_kernel", 		KSTAT_DATA_ULONG },
218 	{ "pagesfree", 		KSTAT_DATA_ULONG },
219 	{ "pageslocked", 	KSTAT_DATA_ULONG },
220 	{ "pagestotal",		KSTAT_DATA_ULONG },
221 };
222 
223 static int header_kstat_update(kstat_t *, int);
224 static int header_kstat_snapshot(kstat_t *, void *, int);
225 static int system_misc_kstat_update(kstat_t *, int);
226 static int system_pages_kstat_update(kstat_t *, int);
227 
228 static struct {
229 	char	name[KSTAT_STRLEN];
230 	size_t	size;
231 	uint_t	min_ndata;
232 	uint_t	max_ndata;
233 } kstat_data_type[KSTAT_NUM_TYPES] = {
234 	{ "raw",		1,			0,	INT_MAX	},
235 	{ "name=value",		sizeof (kstat_named_t),	0,	INT_MAX	},
236 	{ "interrupt",		sizeof (kstat_intr_t),	1,	1	},
237 	{ "i/o",		sizeof (kstat_io_t),	1,	1	},
238 	{ "event_timer",	sizeof (kstat_timer_t),	0,	INT_MAX	},
239 };
240 
241 int
242 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
243 {
244 	ekstat_t *e = (ekstat_t *)k;
245 	kstat_zone_t *kz;
246 
247 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
248 	for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
249 		if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
250 			return (1);
251 		if (zoneid == kz->zoneid)
252 			return (1);
253 	}
254 	return (0);
255 }
256 
257 void
258 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
259 {
260 	ekstat_t *e = (ekstat_t *)k;
261 	kstat_zone_t *kz, *t = NULL;
262 
263 	mutex_enter(&kstat_chain_lock);
264 	if (zoneid == e->e_zone.zoneid) {
265 		kz = e->e_zone.next;
266 		ASSERT(kz != NULL);
267 		e->e_zone.zoneid = kz->zoneid;
268 		e->e_zone.next = kz->next;
269 		goto out;
270 	}
271 	for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
272 		if (kz->next->zoneid == zoneid) {
273 			t = kz->next;
274 			kz->next = t->next;
275 			break;
276 		}
277 	}
278 	ASSERT(t != NULL);	/* we removed something */
279 	kz = t;
280 out:
281 	kstat_chain_id++;
282 	mutex_exit(&kstat_chain_lock);
283 	kmem_free(kz, sizeof (*kz));
284 }
285 
286 void
287 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
288 {
289 	ekstat_t *e = (ekstat_t *)k;
290 	kstat_zone_t *kz;
291 
292 	kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
293 	if (kz == NULL)
294 		return;
295 	mutex_enter(&kstat_chain_lock);
296 	kz->zoneid = zoneid;
297 	kz->next = e->e_zone.next;
298 	e->e_zone.next = kz;
299 	kstat_chain_id++;
300 	mutex_exit(&kstat_chain_lock);
301 }
302 
303 /*
304  * Compare the list of zones for the given kstats, returning 0 if they match
305  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
306  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
307  * two lists always has one element, and this is an O(n) operation rather than
308  * O(n^2).
309  */
310 static int
311 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
312 {
313 	kstat_zone_t *kz1, *kz2;
314 
315 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
316 	for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
317 		for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
318 			if (kz1->zoneid == ALL_ZONES ||
319 			    kz2->zoneid == ALL_ZONES)
320 				return (0);
321 			if (kz1->zoneid == kz2->zoneid)
322 				return (0);
323 		}
324 	}
325 	return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
326 }
327 
328 /*
329  * Support for keeping kstats sorted in AVL trees for fast lookups.
330  */
331 static int
332 kstat_compare_bykid(const void *a1, const void *a2)
333 {
334 	const kstat_t *k1 = a1;
335 	const kstat_t *k2 = a2;
336 
337 	if (k1->ks_kid < k2->ks_kid)
338 		return (-1);
339 	if (k1->ks_kid > k2->ks_kid)
340 		return (1);
341 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
342 }
343 
344 static int
345 kstat_compare_byname(const void *a1, const void *a2)
346 {
347 	const kstat_t *k1 = a1;
348 	const kstat_t *k2 = a2;
349 	int s;
350 
351 	s = strcmp(k1->ks_module, k2->ks_module);
352 	if (s > 0)
353 		return (1);
354 	if (s < 0)
355 		return (-1);
356 
357 	if (k1->ks_instance < k2->ks_instance)
358 		return (-1);
359 	if (k1->ks_instance > k2->ks_instance)
360 		return (1);
361 
362 	s = strcmp(k1->ks_name, k2->ks_name);
363 	if (s > 0)
364 		return (1);
365 	if (s < 0)
366 		return (-1);
367 
368 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
369 }
370 
371 static kstat_t *
372 kstat_hold(avl_tree_t *t, ekstat_t *template)
373 {
374 	kstat_t *ksp;
375 	ekstat_t *e;
376 
377 	mutex_enter(&kstat_chain_lock);
378 	for (;;) {
379 		ksp = avl_find(t, template, NULL);
380 		if (ksp == NULL)
381 			break;
382 		e = (ekstat_t *)ksp;
383 		if (e->e_owner == NULL) {
384 			e->e_owner = curthread;
385 			break;
386 		}
387 		cv_wait(&e->e_cv, &kstat_chain_lock);
388 	}
389 	mutex_exit(&kstat_chain_lock);
390 	return (ksp);
391 }
392 
393 void
394 kstat_rele(kstat_t *ksp)
395 {
396 	ekstat_t *e = (ekstat_t *)ksp;
397 
398 	mutex_enter(&kstat_chain_lock);
399 	ASSERT(e->e_owner == curthread);
400 	e->e_owner = NULL;
401 	cv_broadcast(&e->e_cv);
402 	mutex_exit(&kstat_chain_lock);
403 }
404 
405 kstat_t *
406 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
407 {
408 	ekstat_t e;
409 
410 	e.e_ks.ks_kid = kid;
411 	e.e_zone.zoneid = zoneid;
412 	e.e_zone.next = NULL;
413 
414 	return (kstat_hold(&kstat_avl_bykid, &e));
415 }
416 
417 kstat_t *
418 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
419     zoneid_t ks_zoneid)
420 {
421 	ekstat_t e;
422 
423 	kstat_set_string(e.e_ks.ks_module, ks_module);
424 	e.e_ks.ks_instance = ks_instance;
425 	kstat_set_string(e.e_ks.ks_name, ks_name);
426 	e.e_zone.zoneid = ks_zoneid;
427 	e.e_zone.next = NULL;
428 	return (kstat_hold(&kstat_avl_byname, &e));
429 }
430 
431 static ekstat_t *
432 kstat_alloc(size_t size)
433 {
434 	ekstat_t *e = NULL;
435 
436 	size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
437 
438 	if (kstat_arena == NULL) {
439 		if (size <= kstat_initial_avail) {
440 			e = kstat_initial_ptr;
441 			kstat_initial_ptr = (char *)kstat_initial_ptr + size;
442 			kstat_initial_avail -= size;
443 		}
444 	} else {
445 		e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
446 	}
447 
448 	if (e != NULL) {
449 		bzero(e, size);
450 		e->e_size = size;
451 		cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
452 	}
453 
454 	return (e);
455 }
456 
457 static void
458 kstat_free(ekstat_t *e)
459 {
460 	cv_destroy(&e->e_cv);
461 	vmem_free(kstat_arena, e, e->e_size);
462 }
463 
464 /*
465  * Create various system kstats.
466  */
467 void
468 kstat_init(void)
469 {
470 	kstat_t *ksp;
471 	ekstat_t *e;
472 	avl_tree_t *t = &kstat_avl_bykid;
473 
474 	/*
475 	 * Set up the kstat vmem arena.
476 	 */
477 	kstat_arena = vmem_create("kstat",
478 	    kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
479 	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
480 
481 	/*
482 	 * Make initial kstats appear as though they were allocated.
483 	 */
484 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
485 		(void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
486 		    0, 0, e, (char *)e + e->e_size,
487 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
488 
489 	/*
490 	 * The mother of all kstats.  The first kstat in the system, which
491 	 * always has KID 0, has the headers for all kstats (including itself)
492 	 * as its data.  Thus, the kstat driver does not need any special
493 	 * interface to extract the kstat chain.
494 	 */
495 	kstat_chain_id = 0;
496 	ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
497 	    0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
498 	if (ksp) {
499 		ksp->ks_lock = &kstat_chain_lock;
500 		ksp->ks_update = header_kstat_update;
501 		ksp->ks_snapshot = header_kstat_snapshot;
502 		kstat_install(ksp);
503 	} else {
504 		panic("cannot create kstat 'kstat_headers'");
505 	}
506 
507 	ksp = kstat_create("unix", 0, "kstat_types", "kstat",
508 	    KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
509 	if (ksp) {
510 		int i;
511 		kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
512 
513 		for (i = 0; i < KSTAT_NUM_TYPES; i++) {
514 			kstat_named_init(&kn[i], kstat_data_type[i].name,
515 			    KSTAT_DATA_ULONG);
516 			kn[i].value.ul = i;
517 		}
518 		kstat_install(ksp);
519 	}
520 
521 	ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
522 	    sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
523 	if (ksp) {
524 		ksp->ks_data = (void *) &sysinfo;
525 		kstat_install(ksp);
526 	}
527 
528 	ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
529 	    sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
530 	if (ksp) {
531 		ksp->ks_data = (void *) &vminfo;
532 		kstat_install(ksp);
533 	}
534 
535 	ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
536 	    segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
537 	if (ksp) {
538 		ksp->ks_data = (void *) segmapcnt_ptr;
539 		ksp->ks_update = segmap_kstat_update;
540 		kstat_install(ksp);
541 	}
542 
543 	ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
544 	    biostats_ndata, KSTAT_FLAG_VIRTUAL);
545 	if (ksp) {
546 		ksp->ks_data = (void *) biostats_ptr;
547 		kstat_install(ksp);
548 	}
549 
550 #ifdef VAC
551 	ksp = kstat_create("unix", 0, "flushmeter", "hat", KSTAT_TYPE_RAW,
552 	    sizeof (struct flushmeter), KSTAT_FLAG_VIRTUAL);
553 	if (ksp) {
554 		ksp->ks_data = (void *) &flush_cnt;
555 		kstat_install(ksp);
556 	}
557 #endif	/* VAC */
558 
559 	ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
560 	    sizeof (struct var), KSTAT_FLAG_VIRTUAL);
561 	if (ksp) {
562 		ksp->ks_data = (void *) &v;
563 		kstat_install(ksp);
564 	}
565 
566 	ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
567 	    sizeof (system_misc_kstat) / sizeof (kstat_named_t),
568 	    KSTAT_FLAG_VIRTUAL);
569 	if (ksp) {
570 		ksp->ks_data = (void *) &system_misc_kstat;
571 		ksp->ks_update = system_misc_kstat_update;
572 		kstat_install(ksp);
573 	}
574 
575 	ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
576 	    sizeof (system_pages_kstat) / sizeof (kstat_named_t),
577 	    KSTAT_FLAG_VIRTUAL);
578 	if (ksp) {
579 		ksp->ks_data = (void *) &system_pages_kstat;
580 		ksp->ks_update = system_pages_kstat_update;
581 		kstat_install(ksp);
582 	}
583 
584 	ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
585 	    pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
586 
587 	if (ksp) {
588 		ksp->ks_data = pollstats_ptr;
589 		kstat_install(ksp);
590 	}
591 }
592 
593 /*
594  * Caller of this should ensure that the string pointed by src
595  * doesn't change while kstat's lock is held. Not doing so defeats
596  * kstat's snapshot strategy as explained in <sys/kstat.h>
597  */
598 void
599 kstat_named_setstr(kstat_named_t *knp, const char *src)
600 {
601 	if (knp->data_type != KSTAT_DATA_STRING)
602 		panic("kstat_named_setstr('%p', '%p'): "
603 		    "named kstat is not of type KSTAT_DATA_STRING",
604 		    (void *)knp, (void *)src);
605 
606 	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
607 	if (src != NULL)
608 		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
609 	else
610 		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
611 }
612 
613 void
614 kstat_set_string(char *dst, const char *src)
615 {
616 	bzero(dst, KSTAT_STRLEN);
617 	(void) strncpy(dst, src, KSTAT_STRLEN - 1);
618 }
619 
620 void
621 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
622 {
623 	kstat_set_string(knp->name, name);
624 	knp->data_type = data_type;
625 
626 	if (data_type == KSTAT_DATA_STRING)
627 		kstat_named_setstr(knp, NULL);
628 }
629 
630 void
631 kstat_timer_init(kstat_timer_t *ktp, const char *name)
632 {
633 	kstat_set_string(ktp->name, name);
634 }
635 
636 /* ARGSUSED */
637 static int
638 default_kstat_update(kstat_t *ksp, int rw)
639 {
640 	uint_t i;
641 	size_t len = 0;
642 	kstat_named_t *knp;
643 
644 	/*
645 	 * Named kstats with variable-length long strings have a standard
646 	 * way of determining how much space is needed to hold the snapshot:
647 	 */
648 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
649 	    (ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
650 
651 		/*
652 		 * Add in the space required for the strings
653 		 */
654 		knp = KSTAT_NAMED_PTR(ksp);
655 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
656 			if (knp->data_type == KSTAT_DATA_STRING)
657 				len += KSTAT_NAMED_STR_BUFLEN(knp);
658 		}
659 		ksp->ks_data_size =
660 		    ksp->ks_ndata * sizeof (kstat_named_t) + len;
661 	}
662 	return (0);
663 }
664 
665 static int
666 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
667 {
668 	kstat_io_t *kiop;
669 	hrtime_t cur_time;
670 	size_t	namedsz;
671 
672 	ksp->ks_snaptime = cur_time = gethrtime();
673 
674 	if (rw == KSTAT_WRITE) {
675 		if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
676 			return (EACCES);
677 		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
678 		return (0);
679 	}
680 
681 	/*
682 	 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
683 	 * number of kstat_named_t structures, followed by an optional
684 	 * string segment. The ks_data generally holds only the
685 	 * kstat_named_t structures. So we copy it first. The strings,
686 	 * if any, are copied below. For other kstat types, ks_data holds the
687 	 * entire buffer.
688 	 */
689 
690 	namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
691 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
692 		bcopy(ksp->ks_data, buf, namedsz);
693 	else
694 		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
695 
696 	/*
697 	 * Apply kstat type-specific data massaging
698 	 */
699 	switch (ksp->ks_type) {
700 
701 	case KSTAT_TYPE_IO:
702 		/*
703 		 * Normalize time units and deal with incomplete transactions
704 		 */
705 		kiop = (kstat_io_t *)buf;
706 
707 		scalehrtime(&kiop->wtime);
708 		scalehrtime(&kiop->wlentime);
709 		scalehrtime(&kiop->wlastupdate);
710 		scalehrtime(&kiop->rtime);
711 		scalehrtime(&kiop->rlentime);
712 		scalehrtime(&kiop->rlastupdate);
713 
714 		if (kiop->wcnt != 0) {
715 			/* like kstat_waitq_exit */
716 			hrtime_t wfix = cur_time - kiop->wlastupdate;
717 			kiop->wlastupdate = cur_time;
718 			kiop->wlentime += kiop->wcnt * wfix;
719 			kiop->wtime += wfix;
720 		}
721 
722 		if (kiop->rcnt != 0) {
723 			/* like kstat_runq_exit */
724 			hrtime_t rfix = cur_time - kiop->rlastupdate;
725 			kiop->rlastupdate = cur_time;
726 			kiop->rlentime += kiop->rcnt * rfix;
727 			kiop->rtime += rfix;
728 		}
729 		break;
730 
731 	case KSTAT_TYPE_NAMED:
732 		/*
733 		 * Massage any long strings in at the end of the buffer
734 		 */
735 		if (ksp->ks_data_size > namedsz) {
736 			uint_t i;
737 			kstat_named_t *knp = buf;
738 			char *dst = (char *)(knp + ksp->ks_ndata);
739 			/*
740 			 * Copy strings and update pointers
741 			 */
742 			for (i = 0; i < ksp->ks_ndata; i++, knp++) {
743 				if (knp->data_type == KSTAT_DATA_STRING &&
744 				    KSTAT_NAMED_STR_PTR(knp) != NULL) {
745 					bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
746 					    KSTAT_NAMED_STR_BUFLEN(knp));
747 					KSTAT_NAMED_STR_PTR(knp) = dst;
748 					dst += KSTAT_NAMED_STR_BUFLEN(knp);
749 				}
750 			}
751 			ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
752 		}
753 		break;
754 	}
755 	return (0);
756 }
757 
758 static int
759 header_kstat_update(kstat_t *header_ksp, int rw)
760 {
761 	int nkstats = 0;
762 	ekstat_t *e;
763 	avl_tree_t *t = &kstat_avl_bykid;
764 	zoneid_t zoneid;
765 
766 	if (rw == KSTAT_WRITE)
767 		return (EACCES);
768 
769 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
770 
771 	zoneid = getzoneid();
772 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
773 		if (kstat_zone_find((kstat_t *)e, zoneid)) {
774 			nkstats++;
775 		}
776 	}
777 	header_ksp->ks_ndata = nkstats;
778 	header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
779 	return (0);
780 }
781 
782 /*
783  * Copy out the data section of kstat 0, which consists of the list
784  * of all kstat headers.  By specification, these headers must be
785  * copied out in order of increasing KID.
786  */
787 static int
788 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
789 {
790 	ekstat_t *e;
791 	avl_tree_t *t = &kstat_avl_bykid;
792 	zoneid_t zoneid;
793 
794 	header_ksp->ks_snaptime = gethrtime();
795 
796 	if (rw == KSTAT_WRITE)
797 		return (EACCES);
798 
799 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
800 
801 	zoneid = getzoneid();
802 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
803 		if (kstat_zone_find((kstat_t *)e, zoneid)) {
804 			bcopy(&e->e_ks, buf, sizeof (kstat_t));
805 			buf = (char *)buf + sizeof (kstat_t);
806 		}
807 	}
808 
809 	return (0);
810 }
811 
812 /* ARGSUSED */
813 static int
814 system_misc_kstat_update(kstat_t *ksp, int rw)
815 {
816 	int myncpus = ncpus;
817 	int *loadavgp = &avenrun[0];
818 	int loadavg[LOADAVG_NSTATS];
819 
820 	if (rw == KSTAT_WRITE)
821 		return (EACCES);
822 
823 	if (!INGLOBALZONE(curproc)) {
824 		/*
825 		 * Here we grab cpu_lock which is OK as long as no-one in the
826 		 * future attempts to lookup this particular kstat
827 		 * (unix:0:system_misc) while holding cpu_lock.
828 		 */
829 		mutex_enter(&cpu_lock);
830 		if (pool_pset_enabled()) {
831 			psetid_t mypsid = zone_pset_get(curproc->p_zone);
832 			int error;
833 
834 			myncpus = zone_ncpus_get(curproc->p_zone);
835 			ASSERT(myncpus > 0);
836 			error = cpupart_get_loadavg(mypsid, &loadavg[0],
837 			    LOADAVG_NSTATS);
838 			ASSERT(error == 0);
839 			loadavgp = &loadavg[0];
840 		}
841 		mutex_exit(&cpu_lock);
842 	}
843 
844 	system_misc_kstat.ncpus.value.ui32		= (uint32_t)myncpus;
845 	system_misc_kstat.lbolt.value.ui32		= (uint32_t)lbolt;
846 	system_misc_kstat.deficit.value.ui32		= (uint32_t)deficit;
847 	system_misc_kstat.clk_intr.value.ui32		= (uint32_t)lbolt;
848 	system_misc_kstat.vac.value.ui32		= (uint32_t)vac;
849 	system_misc_kstat.nproc.value.ui32		= (uint32_t)nproc;
850 	system_misc_kstat.avenrun_1min.value.ui32	= (uint32_t)loadavgp[0];
851 	system_misc_kstat.avenrun_5min.value.ui32	= (uint32_t)loadavgp[1];
852 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
853 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)boot_time;
854 	return (0);
855 }
856 
857 #ifdef	__sparc
858 extern caddr_t	econtig32;
859 #else	/* !__sparc */
860 extern caddr_t	econtig;
861 #endif	/* __sparc */
862 
863 extern struct vnode kvp;
864 
865 /* ARGSUSED */
866 static int
867 system_pages_kstat_update(kstat_t *ksp, int rw)
868 {
869 	kobj_stat_t kobj_stat;
870 
871 	if (rw == KSTAT_WRITE) {
872 		return (EACCES);
873 	}
874 
875 	kobj_stat_get(&kobj_stat);
876 	system_pages_kstat.physmem.value.ul	= (ulong_t)physmem;
877 	system_pages_kstat.nalloc.value.ul	= kobj_stat.nalloc;
878 	system_pages_kstat.nfree.value.ul	= kobj_stat.nfree;
879 	system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
880 	system_pages_kstat.nfree_calls.value.ul	= kobj_stat.nfree_calls;
881 	system_pages_kstat.kernelbase.value.ul	= (ulong_t)KERNELBASE;
882 
883 #ifdef	__sparc
884 	/*
885 	 * kstat should REALLY be modified to also report kmem64_base and
886 	 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
887 	 * [ kernelbase .. econtig ] no longer is truly reflective of the
888 	 * kernel's vallocs...
889 	 */
890 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig32;
891 #else	/* !__sparc */
892 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig;
893 #endif	/* __sparc */
894 
895 	system_pages_kstat.freemem.value.ul	= (ulong_t)freemem;
896 	system_pages_kstat.availrmem.value.ul	= (ulong_t)availrmem;
897 	system_pages_kstat.lotsfree.value.ul	= (ulong_t)lotsfree;
898 	system_pages_kstat.desfree.value.ul	= (ulong_t)desfree;
899 	system_pages_kstat.minfree.value.ul	= (ulong_t)minfree;
900 	system_pages_kstat.fastscan.value.ul	= (ulong_t)fastscan;
901 	system_pages_kstat.slowscan.value.ul	= (ulong_t)slowscan;
902 	system_pages_kstat.nscan.value.ul	= (ulong_t)nscan;
903 	system_pages_kstat.desscan.value.ul	= (ulong_t)desscan;
904 	system_pages_kstat.pagesfree.value.ul	= (ulong_t)freemem;
905 	system_pages_kstat.pageslocked.value.ul	= (ulong_t)(availrmem_initial -
906 	    availrmem);
907 	system_pages_kstat.pagestotal.value.ul	= (ulong_t)total_pages;
908 	/*
909 	 * pp_kernel represents total pages used by the kernel since the
910 	 * startup. This formula takes into account the boottime kernel
911 	 * footprint and also considers the availrmem changes because of
912 	 * user explicit page locking.
913 	 */
914 	system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
915 	    obp_pages - availrmem - k_anoninfo.ani_mem_resv -
916 	    anon_segkp_pages_locked - pages_locked -
917 	    pages_claimed - pages_useclaim);
918 
919 	return (0);
920 }
921 
922 kstat_t *
923 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
924     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
925 {
926 	return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
927 	    ks_type, ks_ndata, ks_flags, ALL_ZONES));
928 }
929 
930 /*
931  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
932  * the specified name exists, reactivate it.  Returns a pointer to the kstat
933  * on success, NULL on failure.  The kstat will not be visible to the
934  * kstat driver until kstat_install().
935  */
936 kstat_t *
937 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
938     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
939     zoneid_t ks_zoneid)
940 {
941 	size_t ks_data_size;
942 	kstat_t *ksp;
943 	ekstat_t *e;
944 	avl_index_t where;
945 	char namebuf[KSTAT_STRLEN + 16];
946 
947 	if (avl_numnodes(&kstat_avl_bykid) == 0) {
948 		avl_create(&kstat_avl_bykid, kstat_compare_bykid,
949 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
950 
951 		avl_create(&kstat_avl_byname, kstat_compare_byname,
952 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
953 	}
954 
955 	/*
956 	 * If ks_name == NULL, set the ks_name to <module><instance>.
957 	 */
958 	if (ks_name == NULL) {
959 		char buf[KSTAT_STRLEN];
960 		kstat_set_string(buf, ks_module);
961 		(void) sprintf(namebuf, "%s%d", buf, ks_instance);
962 		ks_name = namebuf;
963 	}
964 
965 	/*
966 	 * Make sure it's a valid kstat data type
967 	 */
968 	if (ks_type >= KSTAT_NUM_TYPES) {
969 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
970 		    "invalid kstat type %d",
971 		    ks_module, ks_instance, ks_name, ks_type);
972 		return (NULL);
973 	}
974 
975 	/*
976 	 * Don't allow persistent virtual kstats -- it makes no sense.
977 	 * ks_data points to garbage when the client goes away.
978 	 */
979 	if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
980 	    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
981 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
982 		    "cannot create persistent virtual kstat",
983 		    ks_module, ks_instance, ks_name);
984 		return (NULL);
985 	}
986 
987 	/*
988 	 * Don't allow variable-size physical kstats, since the framework's
989 	 * memory allocation for physical kstat data is fixed at creation time.
990 	 */
991 	if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
992 	    !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
993 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
994 		    "cannot create variable-size physical kstat",
995 		    ks_module, ks_instance, ks_name);
996 		return (NULL);
997 	}
998 
999 	/*
1000 	 * Make sure the number of data fields is within legal range
1001 	 */
1002 	if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1003 	    ks_ndata > kstat_data_type[ks_type].max_ndata) {
1004 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1005 		    "ks_ndata=%d out of range [%d, %d]",
1006 		    ks_module, ks_instance, ks_name, (int)ks_ndata,
1007 		    kstat_data_type[ks_type].min_ndata,
1008 		    kstat_data_type[ks_type].max_ndata);
1009 		return (NULL);
1010 	}
1011 
1012 	ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1013 
1014 	/*
1015 	 * If the named kstat already exists and is dormant, reactivate it.
1016 	 */
1017 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1018 	if (ksp != NULL) {
1019 		if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1020 			/*
1021 			 * The named kstat exists but is not dormant --
1022 			 * this is a kstat namespace collision.
1023 			 */
1024 			kstat_rele(ksp);
1025 			cmn_err(CE_WARN,
1026 			    "kstat_create('%s', %d, '%s'): namespace collision",
1027 			    ks_module, ks_instance, ks_name);
1028 			return (NULL);
1029 		}
1030 		if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1031 		    (ksp->ks_type != ks_type) ||
1032 		    (ksp->ks_ndata != ks_ndata) ||
1033 		    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1034 			/*
1035 			 * The name is the same, but the other key parameters
1036 			 * differ from those of the dormant kstat -- bogus.
1037 			 */
1038 			kstat_rele(ksp);
1039 			cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1040 			    "invalid reactivation of dormant kstat",
1041 			    ks_module, ks_instance, ks_name);
1042 			return (NULL);
1043 		}
1044 		/*
1045 		 * Return dormant kstat pointer to caller.  As usual,
1046 		 * the kstat is marked invalid until kstat_install().
1047 		 */
1048 		ksp->ks_flags |= KSTAT_FLAG_INVALID;
1049 		kstat_rele(ksp);
1050 		return (ksp);
1051 	}
1052 
1053 	/*
1054 	 * Allocate memory for the new kstat header and, if this is a physical
1055 	 * kstat, the data section.
1056 	 */
1057 	e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1058 	if (e == NULL) {
1059 		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1060 		    "insufficient kernel memory",
1061 		    ks_module, ks_instance, ks_name);
1062 		return (NULL);
1063 	}
1064 
1065 	/*
1066 	 * Initialize as many fields as we can.  The caller may reset
1067 	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1068 	 * Creators of virtual kstats may also reset ks_data.  It is
1069 	 * also up to the caller to initialize the kstat data section,
1070 	 * if necessary.  All initialization must be complete before
1071 	 * calling kstat_install().
1072 	 */
1073 	e->e_zone.zoneid = ks_zoneid;
1074 	e->e_zone.next = NULL;
1075 
1076 	ksp = &e->e_ks;
1077 	ksp->ks_crtime		= gethrtime();
1078 	kstat_set_string(ksp->ks_module, ks_module);
1079 	ksp->ks_instance	= ks_instance;
1080 	kstat_set_string(ksp->ks_name, ks_name);
1081 	ksp->ks_type		= ks_type;
1082 	kstat_set_string(ksp->ks_class, ks_class);
1083 	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
1084 	if (ks_flags & KSTAT_FLAG_VIRTUAL)
1085 		ksp->ks_data	= NULL;
1086 	else
1087 		ksp->ks_data	= (void *)(e + 1);
1088 	ksp->ks_ndata		= ks_ndata;
1089 	ksp->ks_data_size	= ks_data_size;
1090 	ksp->ks_snaptime	= ksp->ks_crtime;
1091 	ksp->ks_update		= default_kstat_update;
1092 	ksp->ks_private		= NULL;
1093 	ksp->ks_snapshot	= default_kstat_snapshot;
1094 	ksp->ks_lock		= NULL;
1095 
1096 	mutex_enter(&kstat_chain_lock);
1097 
1098 	/*
1099 	 * Add our kstat to the AVL trees.
1100 	 */
1101 	if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1102 		mutex_exit(&kstat_chain_lock);
1103 		cmn_err(CE_WARN,
1104 		    "kstat_create('%s', %d, '%s'): namespace collision",
1105 		    ks_module, ks_instance, ks_name);
1106 		kstat_free(e);
1107 		return (NULL);
1108 	}
1109 	avl_insert(&kstat_avl_byname, e, where);
1110 
1111 	/*
1112 	 * Loop around until we find an unused KID.
1113 	 */
1114 	do {
1115 		ksp->ks_kid = kstat_chain_id++;
1116 	} while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1117 	avl_insert(&kstat_avl_bykid, e, where);
1118 
1119 	mutex_exit(&kstat_chain_lock);
1120 
1121 	return (ksp);
1122 }
1123 
1124 /*
1125  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1126  */
1127 void
1128 kstat_install(kstat_t *ksp)
1129 {
1130 	zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1131 
1132 	/*
1133 	 * If this is a variable-size kstat, it MUST provide kstat data locking
1134 	 * to prevent data-size races with kstat readers.
1135 	 */
1136 	if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1137 		panic("kstat_install('%s', %d, '%s'): "
1138 		    "cannot create variable-size kstat without data lock",
1139 		    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1140 	}
1141 
1142 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1143 		cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1144 		    (void *)ksp);
1145 		return;
1146 	}
1147 
1148 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1149 		int has_long_strings = 0;
1150 		uint_t i;
1151 		kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1152 
1153 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1154 			if (knp->data_type == KSTAT_DATA_STRING) {
1155 				has_long_strings = 1;
1156 				break;
1157 			}
1158 		}
1159 		/*
1160 		 * It is an error for a named kstat with fields of
1161 		 * KSTAT_DATA_STRING to be non-virtual.
1162 		 */
1163 		if (has_long_strings && !(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) {
1164 			panic("kstat_install('%s', %d, '%s'): "
1165 			    "named kstat containing KSTAT_DATA_STRING "
1166 			    "is not virtual",
1167 			    ksp->ks_module, ksp->ks_instance,
1168 			    ksp->ks_name);
1169 		}
1170 		/*
1171 		 * The default snapshot routine does not handle KSTAT_WRITE
1172 		 * for long strings.
1173 		 */
1174 		if (has_long_strings && (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1175 		    (ksp->ks_snapshot == default_kstat_snapshot)) {
1176 			panic("kstat_install('%s', %d, '%s'): "
1177 			    "named kstat containing KSTAT_DATA_STRING "
1178 			    "is writable but uses default snapshot routine",
1179 			    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1180 		}
1181 	}
1182 
1183 	if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1184 
1185 		/*
1186 		 * We are reactivating a dormant kstat.  Initialize the
1187 		 * caller's underlying data to the value it had when the
1188 		 * kstat went dormant, and mark the kstat as active.
1189 		 * Grab the provider's kstat lock if it's not already held.
1190 		 */
1191 		kmutex_t *lp = ksp->ks_lock;
1192 		if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1193 			mutex_enter(lp);
1194 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1195 			mutex_exit(lp);
1196 		} else {
1197 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1198 		}
1199 		ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1200 	}
1201 
1202 	/*
1203 	 * Now that the kstat is active, make it visible to the kstat driver.
1204 	 */
1205 	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1206 	kstat_rele(ksp);
1207 }
1208 
1209 /*
1210  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1211  * just update the data and mark it as dormant.
1212  */
1213 void
1214 kstat_delete(kstat_t *ksp)
1215 {
1216 	kmutex_t *lp;
1217 	ekstat_t *e = (ekstat_t *)ksp;
1218 	zoneid_t zoneid = e->e_zone.zoneid;
1219 	kstat_zone_t *kz;
1220 
1221 	if (ksp == NULL)
1222 		return;
1223 
1224 	lp = ksp->ks_lock;
1225 
1226 	if (lp != NULL && MUTEX_HELD(lp)) {
1227 		panic("kstat_delete(%p): caller holds data lock %p",
1228 		    (void *)ksp, (void *)lp);
1229 	}
1230 
1231 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1232 		cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1233 		    (void *)ksp);
1234 		return;
1235 	}
1236 
1237 	if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1238 		/*
1239 		 * Update the data one last time, so that all activity
1240 		 * prior to going dormant has been accounted for.
1241 		 */
1242 		KSTAT_ENTER(ksp);
1243 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
1244 		KSTAT_EXIT(ksp);
1245 
1246 		/*
1247 		 * Mark the kstat as dormant and restore caller-modifiable
1248 		 * fields to default values, so the kstat is readable during
1249 		 * the dormant phase.
1250 		 */
1251 		ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1252 		ksp->ks_lock = NULL;
1253 		ksp->ks_update = default_kstat_update;
1254 		ksp->ks_private = NULL;
1255 		ksp->ks_snapshot = default_kstat_snapshot;
1256 		kstat_rele(ksp);
1257 		return;
1258 	}
1259 
1260 	/*
1261 	 * Remove the kstat from the framework's AVL trees,
1262 	 * free the allocated memory, and increment kstat_chain_id so
1263 	 * /dev/kstat clients can detect the event.
1264 	 */
1265 	mutex_enter(&kstat_chain_lock);
1266 	avl_remove(&kstat_avl_bykid, e);
1267 	avl_remove(&kstat_avl_byname, e);
1268 	kstat_chain_id++;
1269 	mutex_exit(&kstat_chain_lock);
1270 
1271 	kz = e->e_zone.next;
1272 	while (kz != NULL) {
1273 		kstat_zone_t *t = kz;
1274 
1275 		kz = kz->next;
1276 		kmem_free(t, sizeof (*t));
1277 	}
1278 	kstat_rele(ksp);
1279 	kstat_free(e);
1280 }
1281 
1282 void
1283 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1284     const char *ks_name, zoneid_t ks_zoneid)
1285 {
1286 	kstat_t *ksp;
1287 
1288 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1289 	if (ksp != NULL) {
1290 		kstat_rele(ksp);
1291 		kstat_delete(ksp);
1292 	}
1293 }
1294 
1295 void
1296 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1297 {
1298 	kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1299 }
1300 
1301 /*
1302  * The sparc V9 versions of these routines can be much cheaper than
1303  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1304  * For simplicity, however, we always feed the C versions to lint.
1305  */
1306 #if !defined(__sparc) || defined(lint) || defined(__lint)
1307 
1308 void
1309 kstat_waitq_enter(kstat_io_t *kiop)
1310 {
1311 	hrtime_t new, delta;
1312 	ulong_t wcnt;
1313 
1314 	new = gethrtime_unscaled();
1315 	delta = new - kiop->wlastupdate;
1316 	kiop->wlastupdate = new;
1317 	wcnt = kiop->wcnt++;
1318 	if (wcnt != 0) {
1319 		kiop->wlentime += delta * wcnt;
1320 		kiop->wtime += delta;
1321 	}
1322 }
1323 
1324 void
1325 kstat_waitq_exit(kstat_io_t *kiop)
1326 {
1327 	hrtime_t new, delta;
1328 	ulong_t wcnt;
1329 
1330 	new = gethrtime_unscaled();
1331 	delta = new - kiop->wlastupdate;
1332 	kiop->wlastupdate = new;
1333 	wcnt = kiop->wcnt--;
1334 	ASSERT((int)wcnt > 0);
1335 	kiop->wlentime += delta * wcnt;
1336 	kiop->wtime += delta;
1337 }
1338 
1339 void
1340 kstat_runq_enter(kstat_io_t *kiop)
1341 {
1342 	hrtime_t new, delta;
1343 	ulong_t rcnt;
1344 
1345 	new = gethrtime_unscaled();
1346 	delta = new - kiop->rlastupdate;
1347 	kiop->rlastupdate = new;
1348 	rcnt = kiop->rcnt++;
1349 	if (rcnt != 0) {
1350 		kiop->rlentime += delta * rcnt;
1351 		kiop->rtime += delta;
1352 	}
1353 }
1354 
1355 void
1356 kstat_runq_exit(kstat_io_t *kiop)
1357 {
1358 	hrtime_t new, delta;
1359 	ulong_t rcnt;
1360 
1361 	new = gethrtime_unscaled();
1362 	delta = new - kiop->rlastupdate;
1363 	kiop->rlastupdate = new;
1364 	rcnt = kiop->rcnt--;
1365 	ASSERT((int)rcnt > 0);
1366 	kiop->rlentime += delta * rcnt;
1367 	kiop->rtime += delta;
1368 }
1369 
1370 void
1371 kstat_waitq_to_runq(kstat_io_t *kiop)
1372 {
1373 	hrtime_t new, delta;
1374 	ulong_t wcnt, rcnt;
1375 
1376 	new = gethrtime_unscaled();
1377 
1378 	delta = new - kiop->wlastupdate;
1379 	kiop->wlastupdate = new;
1380 	wcnt = kiop->wcnt--;
1381 	ASSERT((int)wcnt > 0);
1382 	kiop->wlentime += delta * wcnt;
1383 	kiop->wtime += delta;
1384 
1385 	delta = new - kiop->rlastupdate;
1386 	kiop->rlastupdate = new;
1387 	rcnt = kiop->rcnt++;
1388 	if (rcnt != 0) {
1389 		kiop->rlentime += delta * rcnt;
1390 		kiop->rtime += delta;
1391 	}
1392 }
1393 
1394 void
1395 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1396 {
1397 	hrtime_t new, delta;
1398 	ulong_t wcnt, rcnt;
1399 
1400 	new = gethrtime_unscaled();
1401 
1402 	delta = new - kiop->rlastupdate;
1403 	kiop->rlastupdate = new;
1404 	rcnt = kiop->rcnt--;
1405 	ASSERT((int)rcnt > 0);
1406 	kiop->rlentime += delta * rcnt;
1407 	kiop->rtime += delta;
1408 
1409 	delta = new - kiop->wlastupdate;
1410 	kiop->wlastupdate = new;
1411 	wcnt = kiop->wcnt++;
1412 	if (wcnt != 0) {
1413 		kiop->wlentime += delta * wcnt;
1414 		kiop->wtime += delta;
1415 	}
1416 }
1417 
1418 #endif
1419 
1420 void
1421 kstat_timer_start(kstat_timer_t *ktp)
1422 {
1423 	ktp->start_time = gethrtime();
1424 }
1425 
1426 void
1427 kstat_timer_stop(kstat_timer_t *ktp)
1428 {
1429 	hrtime_t	etime;
1430 	u_longlong_t	num_events;
1431 
1432 	ktp->stop_time = etime = gethrtime();
1433 	etime -= ktp->start_time;
1434 	num_events = ktp->num_events;
1435 	if (etime < ktp->min_time || num_events == 0)
1436 		ktp->min_time = etime;
1437 	if (etime > ktp->max_time)
1438 		ktp->max_time = etime;
1439 	ktp->elapsed_time += etime;
1440 	ktp->num_events = num_events + 1;
1441 }
1442