xref: /illumos-gate/usr/src/uts/common/os/kstat_fr.c (revision 67d74cc3e7c9d9461311136a0b2069813a3fd927)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2014, Joyent, Inc. All rights reserved.
24  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25  */
26 
27 /*
28  * Kernel statistics framework
29  */
30 
31 #include <sys/types.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
37 #include <sys/errno.h>
38 #include <sys/vmem.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/kstat.h>
42 #include <sys/sysinfo.h>
43 #include <sys/cpuvar.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/dnlc.h>
49 #include <sys/var.h>
50 #include <sys/debug.h>
51 #include <sys/kobj.h>
52 #include <sys/avl.h>
53 #include <sys/pool_pset.h>
54 #include <sys/cpupart.h>
55 #include <sys/zone.h>
56 #include <sys/loadavg.h>
57 #include <vm/page.h>
58 #include <vm/anon.h>
59 #include <vm/seg_kmem.h>
60 
61 /*
62  * Global lock to protect the AVL trees and kstat_chain_id.
63  */
64 static kmutex_t kstat_chain_lock;
65 
66 /*
67  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
68  *
69  * (1)	/dev/kstat, to detect changes in the kstat chain across ioctls;
70  *
71  * (2)	kstat_create(), to assign a KID (kstat ID) to each new kstat.
72  *	/dev/kstat uses the KID as a cookie for kstat lookups.
73  *
74  * We reserve the first two IDs because some kstats are created before
75  * the well-known ones (kstat_headers = 0, kstat_types = 1).
76  *
77  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
78  * into a particular kstat, which is logically equivalent to a kstat being
79  * installed/deleted.
80  */
81 
82 kid_t kstat_chain_id = 2;
83 
84 /*
85  * As far as zones are concerned, there are 3 types of kstat:
86  *
87  * 1) Those which have a well-known name, and which should return per-zone data
88  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
89  * is an example of this type of kstat.
90  *
91  * 2) Those which should only be exported to a particular list of zones.
92  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
93  * able to see NFS mounts associated with zone B, while we want the
94  * global zone to be able to see all mounts on the system.
95  *
96  * 3) Those that can be exported to all zones.  Most system-related
97  * kstats fall within this category.
98  *
99  * An ekstat_t thus contains a list of kstats that the zone is to be
100  * exported to.  The lookup of a name:instance:module thus translates to a
101  * lookup of name:instance:module:myzone; if the kstat is not exported
102  * to all zones, and does not have the caller's zoneid explicitly
103  * enumerated in the list of zones to be exported to, it is the same as
104  * if the kstat didn't exist.
105  *
106  * Writing to kstats is currently disallowed from within a non-global
107  * zone, although this restriction could be removed in the future.
108  */
109 typedef struct kstat_zone {
110 	zoneid_t zoneid;
111 	struct kstat_zone *next;
112 } kstat_zone_t;
113 
114 /*
115  * Extended kstat structure -- for internal use only.
116  */
117 typedef struct ekstat {
118 	kstat_t		e_ks;		/* the kstat itself */
119 	size_t		e_size;		/* total allocation size */
120 	kthread_t	*e_owner;	/* thread holding this kstat */
121 	kcondvar_t	e_cv;		/* wait for owner == NULL */
122 	avl_node_t	e_avl_bykid;	/* AVL tree to sort by KID */
123 	avl_node_t	e_avl_byname;	/* AVL tree to sort by name */
124 	kstat_zone_t	e_zone;		/* zone to export stats to */
125 } ekstat_t;
126 
127 static uint64_t kstat_initial[8192];
128 static void *kstat_initial_ptr = kstat_initial;
129 static size_t kstat_initial_avail = sizeof (kstat_initial);
130 static vmem_t *kstat_arena;
131 
132 #define	KSTAT_ALIGN	(sizeof (uint64_t))
133 
134 static avl_tree_t kstat_avl_bykid;
135 static avl_tree_t kstat_avl_byname;
136 
137 /*
138  * Various pointers we need to create kstats at boot time in kstat_init()
139  */
140 extern	kstat_named_t	*segmapcnt_ptr;
141 extern	uint_t		segmapcnt_ndata;
142 extern	int		segmap_kstat_update(kstat_t *, int);
143 extern	kstat_named_t	*biostats_ptr;
144 extern	uint_t		biostats_ndata;
145 extern	kstat_named_t	*pollstats_ptr;
146 extern	uint_t		pollstats_ndata;
147 
148 extern	int	vac;
149 extern	uint_t	nproc;
150 extern	time_t	boot_time;
151 extern	sysinfo_t	sysinfo;
152 extern	vminfo_t	vminfo;
153 
154 struct {
155 	kstat_named_t ncpus;
156 	kstat_named_t lbolt;
157 	kstat_named_t deficit;
158 	kstat_named_t clk_intr;
159 	kstat_named_t vac;
160 	kstat_named_t nproc;
161 	kstat_named_t avenrun_1min;
162 	kstat_named_t avenrun_5min;
163 	kstat_named_t avenrun_15min;
164 	kstat_named_t boot_time;
165 	kstat_named_t nsec_per_tick;
166 } system_misc_kstat = {
167 	{ "ncpus",		KSTAT_DATA_UINT32 },
168 	{ "lbolt",		KSTAT_DATA_UINT32 },
169 	{ "deficit",		KSTAT_DATA_UINT32 },
170 	{ "clk_intr",		KSTAT_DATA_UINT32 },
171 	{ "vac",		KSTAT_DATA_UINT32 },
172 	{ "nproc",		KSTAT_DATA_UINT32 },
173 	{ "avenrun_1min",	KSTAT_DATA_UINT32 },
174 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
175 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
176 	{ "boot_time",		KSTAT_DATA_UINT32 },
177 	{ "nsec_per_tick",	KSTAT_DATA_UINT32 },
178 };
179 
180 struct {
181 	kstat_named_t physmem;
182 	kstat_named_t nalloc;
183 	kstat_named_t nfree;
184 	kstat_named_t nalloc_calls;
185 	kstat_named_t nfree_calls;
186 	kstat_named_t kernelbase;
187 	kstat_named_t econtig;
188 	kstat_named_t freemem;
189 	kstat_named_t availrmem;
190 	kstat_named_t lotsfree;
191 	kstat_named_t desfree;
192 	kstat_named_t minfree;
193 	kstat_named_t fastscan;
194 	kstat_named_t slowscan;
195 	kstat_named_t nscan;
196 	kstat_named_t desscan;
197 	kstat_named_t pp_kernel;
198 	kstat_named_t pagesfree;
199 	kstat_named_t pageslocked;
200 	kstat_named_t pagestotal;
201 } system_pages_kstat = {
202 	{ "physmem",		KSTAT_DATA_ULONG },
203 	{ "nalloc",		KSTAT_DATA_ULONG },
204 	{ "nfree",		KSTAT_DATA_ULONG },
205 	{ "nalloc_calls",	KSTAT_DATA_ULONG },
206 	{ "nfree_calls",	KSTAT_DATA_ULONG },
207 	{ "kernelbase",		KSTAT_DATA_ULONG },
208 	{ "econtig", 		KSTAT_DATA_ULONG },
209 	{ "freemem", 		KSTAT_DATA_ULONG },
210 	{ "availrmem", 		KSTAT_DATA_ULONG },
211 	{ "lotsfree", 		KSTAT_DATA_ULONG },
212 	{ "desfree", 		KSTAT_DATA_ULONG },
213 	{ "minfree", 		KSTAT_DATA_ULONG },
214 	{ "fastscan", 		KSTAT_DATA_ULONG },
215 	{ "slowscan", 		KSTAT_DATA_ULONG },
216 	{ "nscan", 		KSTAT_DATA_ULONG },
217 	{ "desscan", 		KSTAT_DATA_ULONG },
218 	{ "pp_kernel", 		KSTAT_DATA_ULONG },
219 	{ "pagesfree", 		KSTAT_DATA_ULONG },
220 	{ "pageslocked", 	KSTAT_DATA_ULONG },
221 	{ "pagestotal",		KSTAT_DATA_ULONG },
222 };
223 
224 static int header_kstat_update(kstat_t *, int);
225 static int header_kstat_snapshot(kstat_t *, void *, int);
226 static int system_misc_kstat_update(kstat_t *, int);
227 static int system_pages_kstat_update(kstat_t *, int);
228 
229 static struct {
230 	char	name[KSTAT_STRLEN];
231 	size_t	size;
232 	uint_t	min_ndata;
233 	uint_t	max_ndata;
234 } kstat_data_type[KSTAT_NUM_TYPES] = {
235 	{ "raw",		1,			0,	INT_MAX	},
236 	{ "name=value",		sizeof (kstat_named_t),	0,	INT_MAX	},
237 	{ "interrupt",		sizeof (kstat_intr_t),	1,	1	},
238 	{ "i/o",		sizeof (kstat_io_t),	1,	1	},
239 	{ "event_timer",	sizeof (kstat_timer_t),	0,	INT_MAX	},
240 };
241 
242 int
243 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
244 {
245 	ekstat_t *e = (ekstat_t *)k;
246 	kstat_zone_t *kz;
247 
248 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
249 	for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
250 		if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
251 			return (1);
252 		if (zoneid == kz->zoneid)
253 			return (1);
254 	}
255 	return (0);
256 }
257 
258 void
259 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
260 {
261 	ekstat_t *e = (ekstat_t *)k;
262 	kstat_zone_t *kz, *t = NULL;
263 
264 	mutex_enter(&kstat_chain_lock);
265 	if (zoneid == e->e_zone.zoneid) {
266 		kz = e->e_zone.next;
267 		ASSERT(kz != NULL);
268 		e->e_zone.zoneid = kz->zoneid;
269 		e->e_zone.next = kz->next;
270 		goto out;
271 	}
272 	for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
273 		if (kz->next->zoneid == zoneid) {
274 			t = kz->next;
275 			kz->next = t->next;
276 			break;
277 		}
278 	}
279 	ASSERT(t != NULL);	/* we removed something */
280 	kz = t;
281 out:
282 	kstat_chain_id++;
283 	mutex_exit(&kstat_chain_lock);
284 	kmem_free(kz, sizeof (*kz));
285 }
286 
287 void
288 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
289 {
290 	ekstat_t *e = (ekstat_t *)k;
291 	kstat_zone_t *kz;
292 
293 	kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
294 	if (kz == NULL)
295 		return;
296 	mutex_enter(&kstat_chain_lock);
297 	kz->zoneid = zoneid;
298 	kz->next = e->e_zone.next;
299 	e->e_zone.next = kz;
300 	kstat_chain_id++;
301 	mutex_exit(&kstat_chain_lock);
302 }
303 
304 /*
305  * Compare the list of zones for the given kstats, returning 0 if they match
306  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
307  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
308  * two lists always has one element, and this is an O(n) operation rather than
309  * O(n^2).
310  */
311 static int
312 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
313 {
314 	kstat_zone_t *kz1, *kz2;
315 
316 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
317 	for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
318 		for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
319 			if (kz1->zoneid == ALL_ZONES ||
320 			    kz2->zoneid == ALL_ZONES)
321 				return (0);
322 			if (kz1->zoneid == kz2->zoneid)
323 				return (0);
324 		}
325 	}
326 	return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
327 }
328 
329 /*
330  * Support for keeping kstats sorted in AVL trees for fast lookups.
331  */
332 static int
333 kstat_compare_bykid(const void *a1, const void *a2)
334 {
335 	const kstat_t *k1 = a1;
336 	const kstat_t *k2 = a2;
337 
338 	if (k1->ks_kid < k2->ks_kid)
339 		return (-1);
340 	if (k1->ks_kid > k2->ks_kid)
341 		return (1);
342 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
343 }
344 
345 static int
346 kstat_compare_byname(const void *a1, const void *a2)
347 {
348 	const kstat_t *k1 = a1;
349 	const kstat_t *k2 = a2;
350 	int s;
351 
352 	s = strcmp(k1->ks_module, k2->ks_module);
353 	if (s > 0)
354 		return (1);
355 	if (s < 0)
356 		return (-1);
357 
358 	if (k1->ks_instance < k2->ks_instance)
359 		return (-1);
360 	if (k1->ks_instance > k2->ks_instance)
361 		return (1);
362 
363 	s = strcmp(k1->ks_name, k2->ks_name);
364 	if (s > 0)
365 		return (1);
366 	if (s < 0)
367 		return (-1);
368 
369 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
370 }
371 
372 static kstat_t *
373 kstat_hold(avl_tree_t *t, ekstat_t *template)
374 {
375 	kstat_t *ksp;
376 	ekstat_t *e;
377 
378 	mutex_enter(&kstat_chain_lock);
379 	for (;;) {
380 		ksp = avl_find(t, template, NULL);
381 		if (ksp == NULL)
382 			break;
383 		e = (ekstat_t *)ksp;
384 		if (e->e_owner == NULL) {
385 			e->e_owner = curthread;
386 			break;
387 		}
388 		cv_wait(&e->e_cv, &kstat_chain_lock);
389 	}
390 	mutex_exit(&kstat_chain_lock);
391 	return (ksp);
392 }
393 
394 void
395 kstat_rele(kstat_t *ksp)
396 {
397 	ekstat_t *e = (ekstat_t *)ksp;
398 
399 	mutex_enter(&kstat_chain_lock);
400 	ASSERT(e->e_owner == curthread);
401 	e->e_owner = NULL;
402 	cv_broadcast(&e->e_cv);
403 	mutex_exit(&kstat_chain_lock);
404 }
405 
406 kstat_t *
407 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
408 {
409 	ekstat_t e;
410 
411 	e.e_ks.ks_kid = kid;
412 	e.e_zone.zoneid = zoneid;
413 	e.e_zone.next = NULL;
414 
415 	return (kstat_hold(&kstat_avl_bykid, &e));
416 }
417 
418 kstat_t *
419 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
420     zoneid_t ks_zoneid)
421 {
422 	ekstat_t e;
423 
424 	kstat_set_string(e.e_ks.ks_module, ks_module);
425 	e.e_ks.ks_instance = ks_instance;
426 	kstat_set_string(e.e_ks.ks_name, ks_name);
427 	e.e_zone.zoneid = ks_zoneid;
428 	e.e_zone.next = NULL;
429 	return (kstat_hold(&kstat_avl_byname, &e));
430 }
431 
432 static ekstat_t *
433 kstat_alloc(size_t size)
434 {
435 	ekstat_t *e = NULL;
436 
437 	size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
438 
439 	if (kstat_arena == NULL) {
440 		if (size <= kstat_initial_avail) {
441 			e = kstat_initial_ptr;
442 			kstat_initial_ptr = (char *)kstat_initial_ptr + size;
443 			kstat_initial_avail -= size;
444 		}
445 	} else {
446 		e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
447 	}
448 
449 	if (e != NULL) {
450 		bzero(e, size);
451 		e->e_size = size;
452 		cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
453 	}
454 
455 	return (e);
456 }
457 
458 static void
459 kstat_free(ekstat_t *e)
460 {
461 	cv_destroy(&e->e_cv);
462 	vmem_free(kstat_arena, e, e->e_size);
463 }
464 
465 /*
466  * Create various system kstats.
467  */
468 void
469 kstat_init(void)
470 {
471 	kstat_t *ksp;
472 	ekstat_t *e;
473 	avl_tree_t *t = &kstat_avl_bykid;
474 
475 	/*
476 	 * Set up the kstat vmem arena.
477 	 */
478 	kstat_arena = vmem_create("kstat",
479 	    kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
480 	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
481 
482 	/*
483 	 * Make initial kstats appear as though they were allocated.
484 	 */
485 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
486 		(void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
487 		    0, 0, e, (char *)e + e->e_size,
488 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
489 
490 	/*
491 	 * The mother of all kstats.  The first kstat in the system, which
492 	 * always has KID 0, has the headers for all kstats (including itself)
493 	 * as its data.  Thus, the kstat driver does not need any special
494 	 * interface to extract the kstat chain.
495 	 */
496 	kstat_chain_id = 0;
497 	ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
498 	    0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
499 	if (ksp) {
500 		ksp->ks_lock = &kstat_chain_lock;
501 		ksp->ks_update = header_kstat_update;
502 		ksp->ks_snapshot = header_kstat_snapshot;
503 		kstat_install(ksp);
504 	} else {
505 		panic("cannot create kstat 'kstat_headers'");
506 	}
507 
508 	ksp = kstat_create("unix", 0, "kstat_types", "kstat",
509 	    KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
510 	if (ksp) {
511 		int i;
512 		kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
513 
514 		for (i = 0; i < KSTAT_NUM_TYPES; i++) {
515 			kstat_named_init(&kn[i], kstat_data_type[i].name,
516 			    KSTAT_DATA_ULONG);
517 			kn[i].value.ul = i;
518 		}
519 		kstat_install(ksp);
520 	}
521 
522 	ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
523 	    sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
524 	if (ksp) {
525 		ksp->ks_data = (void *) &sysinfo;
526 		kstat_install(ksp);
527 	}
528 
529 	ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
530 	    sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
531 	if (ksp) {
532 		ksp->ks_data = (void *) &vminfo;
533 		kstat_install(ksp);
534 	}
535 
536 	ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
537 	    segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
538 	if (ksp) {
539 		ksp->ks_data = (void *) segmapcnt_ptr;
540 		ksp->ks_update = segmap_kstat_update;
541 		kstat_install(ksp);
542 	}
543 
544 	ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
545 	    biostats_ndata, KSTAT_FLAG_VIRTUAL);
546 	if (ksp) {
547 		ksp->ks_data = (void *) biostats_ptr;
548 		kstat_install(ksp);
549 	}
550 
551 	ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
552 	    sizeof (struct var), KSTAT_FLAG_VIRTUAL);
553 	if (ksp) {
554 		ksp->ks_data = (void *) &v;
555 		kstat_install(ksp);
556 	}
557 
558 	ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
559 	    sizeof (system_misc_kstat) / sizeof (kstat_named_t),
560 	    KSTAT_FLAG_VIRTUAL);
561 	if (ksp) {
562 		ksp->ks_data = (void *) &system_misc_kstat;
563 		ksp->ks_update = system_misc_kstat_update;
564 		kstat_install(ksp);
565 	}
566 
567 	ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
568 	    sizeof (system_pages_kstat) / sizeof (kstat_named_t),
569 	    KSTAT_FLAG_VIRTUAL);
570 	if (ksp) {
571 		ksp->ks_data = (void *) &system_pages_kstat;
572 		ksp->ks_update = system_pages_kstat_update;
573 		kstat_install(ksp);
574 	}
575 
576 	ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
577 	    pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
578 
579 	if (ksp) {
580 		ksp->ks_data = pollstats_ptr;
581 		kstat_install(ksp);
582 	}
583 }
584 
585 /*
586  * Caller of this should ensure that the string pointed by src
587  * doesn't change while kstat's lock is held. Not doing so defeats
588  * kstat's snapshot strategy as explained in <sys/kstat.h>
589  */
590 void
591 kstat_named_setstr(kstat_named_t *knp, const char *src)
592 {
593 	if (knp->data_type != KSTAT_DATA_STRING)
594 		panic("kstat_named_setstr('%p', '%p'): "
595 		    "named kstat is not of type KSTAT_DATA_STRING",
596 		    (void *)knp, (void *)src);
597 
598 	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
599 	if (src != NULL)
600 		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
601 	else
602 		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
603 }
604 
605 void
606 kstat_set_string(char *dst, const char *src)
607 {
608 	bzero(dst, KSTAT_STRLEN);
609 	(void) strncpy(dst, src, KSTAT_STRLEN - 1);
610 }
611 
612 void
613 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
614 {
615 	kstat_set_string(knp->name, name);
616 	knp->data_type = data_type;
617 
618 	if (data_type == KSTAT_DATA_STRING)
619 		kstat_named_setstr(knp, NULL);
620 }
621 
622 void
623 kstat_timer_init(kstat_timer_t *ktp, const char *name)
624 {
625 	kstat_set_string(ktp->name, name);
626 }
627 
628 /* ARGSUSED */
629 static int
630 default_kstat_update(kstat_t *ksp, int rw)
631 {
632 	uint_t i;
633 	size_t len = 0;
634 	kstat_named_t *knp;
635 
636 	/*
637 	 * Named kstats with variable-length long strings have a standard
638 	 * way of determining how much space is needed to hold the snapshot:
639 	 */
640 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
641 	    (ksp->ks_flags & (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_LONGSTRINGS))) {
642 
643 		/*
644 		 * Add in the space required for the strings
645 		 */
646 		knp = KSTAT_NAMED_PTR(ksp);
647 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
648 			if (knp->data_type == KSTAT_DATA_STRING)
649 				len += KSTAT_NAMED_STR_BUFLEN(knp);
650 		}
651 		ksp->ks_data_size =
652 		    ksp->ks_ndata * sizeof (kstat_named_t) + len;
653 	}
654 	return (0);
655 }
656 
657 static int
658 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
659 {
660 	kstat_io_t *kiop;
661 	hrtime_t cur_time;
662 	size_t	namedsz;
663 
664 	ksp->ks_snaptime = cur_time = gethrtime();
665 
666 	if (rw == KSTAT_WRITE) {
667 		if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
668 			return (EACCES);
669 		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
670 		return (0);
671 	}
672 
673 	/*
674 	 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
675 	 * number of kstat_named_t structures, followed by an optional
676 	 * string segment. The ks_data generally holds only the
677 	 * kstat_named_t structures. So we copy it first. The strings,
678 	 * if any, are copied below. For other kstat types, ks_data holds the
679 	 * entire buffer.
680 	 */
681 
682 	namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
683 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
684 		bcopy(ksp->ks_data, buf, namedsz);
685 	else
686 		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
687 
688 	/*
689 	 * Apply kstat type-specific data massaging
690 	 */
691 	switch (ksp->ks_type) {
692 
693 	case KSTAT_TYPE_IO:
694 		/*
695 		 * Normalize time units and deal with incomplete transactions
696 		 */
697 		kiop = (kstat_io_t *)buf;
698 
699 		scalehrtime(&kiop->wtime);
700 		scalehrtime(&kiop->wlentime);
701 		scalehrtime(&kiop->wlastupdate);
702 		scalehrtime(&kiop->rtime);
703 		scalehrtime(&kiop->rlentime);
704 		scalehrtime(&kiop->rlastupdate);
705 
706 		if (kiop->wcnt != 0) {
707 			/* like kstat_waitq_exit */
708 			hrtime_t wfix = cur_time - kiop->wlastupdate;
709 			kiop->wlastupdate = cur_time;
710 			kiop->wlentime += kiop->wcnt * wfix;
711 			kiop->wtime += wfix;
712 		}
713 
714 		if (kiop->rcnt != 0) {
715 			/* like kstat_runq_exit */
716 			hrtime_t rfix = cur_time - kiop->rlastupdate;
717 			kiop->rlastupdate = cur_time;
718 			kiop->rlentime += kiop->rcnt * rfix;
719 			kiop->rtime += rfix;
720 		}
721 		break;
722 
723 	case KSTAT_TYPE_NAMED:
724 		/*
725 		 * Massage any long strings in at the end of the buffer
726 		 */
727 		if (ksp->ks_data_size > namedsz) {
728 			uint_t i;
729 			kstat_named_t *knp = buf;
730 			char *dst = (char *)(knp + ksp->ks_ndata);
731 			/*
732 			 * Copy strings and update pointers
733 			 */
734 			for (i = 0; i < ksp->ks_ndata; i++, knp++) {
735 				if (knp->data_type == KSTAT_DATA_STRING &&
736 				    KSTAT_NAMED_STR_PTR(knp) != NULL) {
737 					bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
738 					    KSTAT_NAMED_STR_BUFLEN(knp));
739 					KSTAT_NAMED_STR_PTR(knp) = dst;
740 					dst += KSTAT_NAMED_STR_BUFLEN(knp);
741 				}
742 			}
743 			ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
744 		}
745 		break;
746 	}
747 	return (0);
748 }
749 
750 static int
751 header_kstat_update(kstat_t *header_ksp, int rw)
752 {
753 	int nkstats = 0;
754 	ekstat_t *e;
755 	avl_tree_t *t = &kstat_avl_bykid;
756 	zoneid_t zoneid;
757 
758 	if (rw == KSTAT_WRITE)
759 		return (EACCES);
760 
761 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
762 
763 	zoneid = getzoneid();
764 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
765 		if (kstat_zone_find((kstat_t *)e, zoneid) &&
766 		    (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
767 			nkstats++;
768 		}
769 	}
770 	header_ksp->ks_ndata = nkstats;
771 	header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
772 	return (0);
773 }
774 
775 /*
776  * Copy out the data section of kstat 0, which consists of the list
777  * of all kstat headers.  By specification, these headers must be
778  * copied out in order of increasing KID.
779  */
780 static int
781 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
782 {
783 	ekstat_t *e;
784 	avl_tree_t *t = &kstat_avl_bykid;
785 	zoneid_t zoneid;
786 
787 	header_ksp->ks_snaptime = gethrtime();
788 
789 	if (rw == KSTAT_WRITE)
790 		return (EACCES);
791 
792 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
793 
794 	zoneid = getzoneid();
795 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
796 		if (kstat_zone_find((kstat_t *)e, zoneid) &&
797 		    (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
798 			bcopy(&e->e_ks, buf, sizeof (kstat_t));
799 			buf = (char *)buf + sizeof (kstat_t);
800 		}
801 	}
802 
803 	return (0);
804 }
805 
806 /* ARGSUSED */
807 static int
808 system_misc_kstat_update(kstat_t *ksp, int rw)
809 {
810 	int myncpus = ncpus;
811 	int *loadavgp = &avenrun[0];
812 	time_t zone_boot_time;
813 	clock_t zone_lbolt;
814 	hrtime_t zone_hrtime;
815 	size_t zone_nproc;
816 
817 	if (rw == KSTAT_WRITE)
818 		return (EACCES);
819 
820 	if (!INGLOBALZONE(curproc)) {
821 		/*
822 		 * Here we grab cpu_lock which is OK as long as no-one in the
823 		 * future attempts to lookup this particular kstat
824 		 * (unix:0:system_misc) while holding cpu_lock.
825 		 */
826 		mutex_enter(&cpu_lock);
827 		if (pool_pset_enabled()) {
828 			myncpus = zone_ncpus_get(curproc->p_zone);
829 			ASSERT(myncpus > 0);
830 		}
831 		mutex_exit(&cpu_lock);
832 		loadavgp = &curproc->p_zone->zone_avenrun[0];
833 	}
834 
835 	if (INGLOBALZONE(curproc)) {
836 		zone_boot_time = boot_time;
837 		zone_lbolt = ddi_get_lbolt();
838 		zone_nproc = nproc;
839 	} else {
840 		zone_boot_time = curproc->p_zone->zone_boot_time;
841 
842 		zone_hrtime = gethrtime();
843 		zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
844 		    NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
845 		mutex_enter(&curproc->p_zone->zone_nlwps_lock);
846 		zone_nproc = curproc->p_zone->zone_nprocs;
847 		mutex_exit(&curproc->p_zone->zone_nlwps_lock);
848 	}
849 
850 	system_misc_kstat.ncpus.value.ui32		= (uint32_t)myncpus;
851 	system_misc_kstat.lbolt.value.ui32		= (uint32_t)zone_lbolt;
852 	system_misc_kstat.deficit.value.ui32		= (uint32_t)deficit;
853 	system_misc_kstat.clk_intr.value.ui32		= (uint32_t)zone_lbolt;
854 	system_misc_kstat.vac.value.ui32		= (uint32_t)vac;
855 	system_misc_kstat.nproc.value.ui32		= (uint32_t)zone_nproc;
856 	system_misc_kstat.avenrun_1min.value.ui32	= (uint32_t)loadavgp[0];
857 	system_misc_kstat.avenrun_5min.value.ui32	= (uint32_t)loadavgp[1];
858 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
859 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)
860 	    zone_boot_time;
861 	system_misc_kstat.nsec_per_tick.value.ui32	= (uint32_t)
862 	    nsec_per_tick;
863 	return (0);
864 }
865 
866 #ifdef	__sparc
867 extern caddr_t	econtig32;
868 #else	/* !__sparc */
869 extern caddr_t	econtig;
870 #endif	/* __sparc */
871 
872 /* ARGSUSED */
873 static int
874 system_pages_kstat_update(kstat_t *ksp, int rw)
875 {
876 	kobj_stat_t kobj_stat;
877 
878 	if (rw == KSTAT_WRITE) {
879 		return (EACCES);
880 	}
881 
882 	kobj_stat_get(&kobj_stat);
883 	system_pages_kstat.physmem.value.ul	= (ulong_t)physmem;
884 	system_pages_kstat.nalloc.value.ul	= kobj_stat.nalloc;
885 	system_pages_kstat.nfree.value.ul	= kobj_stat.nfree;
886 	system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
887 	system_pages_kstat.nfree_calls.value.ul	= kobj_stat.nfree_calls;
888 	system_pages_kstat.kernelbase.value.ul	= (ulong_t)KERNELBASE;
889 
890 #ifdef	__sparc
891 	/*
892 	 * kstat should REALLY be modified to also report kmem64_base and
893 	 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
894 	 * [ kernelbase .. econtig ] no longer is truly reflective of the
895 	 * kernel's vallocs...
896 	 */
897 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig32;
898 #else	/* !__sparc */
899 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig;
900 #endif	/* __sparc */
901 
902 	system_pages_kstat.freemem.value.ul	= (ulong_t)freemem;
903 	system_pages_kstat.availrmem.value.ul	= (ulong_t)availrmem;
904 	system_pages_kstat.lotsfree.value.ul	= (ulong_t)lotsfree;
905 	system_pages_kstat.desfree.value.ul	= (ulong_t)desfree;
906 	system_pages_kstat.minfree.value.ul	= (ulong_t)minfree;
907 	system_pages_kstat.fastscan.value.ul	= (ulong_t)fastscan;
908 	system_pages_kstat.slowscan.value.ul	= (ulong_t)slowscan;
909 	system_pages_kstat.nscan.value.ul	= (ulong_t)nscan;
910 	system_pages_kstat.desscan.value.ul	= (ulong_t)desscan;
911 	system_pages_kstat.pagesfree.value.ul	= (ulong_t)freemem;
912 	system_pages_kstat.pageslocked.value.ul	= (ulong_t)(availrmem_initial -
913 	    availrmem);
914 	system_pages_kstat.pagestotal.value.ul	= (ulong_t)total_pages;
915 	/*
916 	 * pp_kernel represents total pages used by the kernel since the
917 	 * startup. This formula takes into account the boottime kernel
918 	 * footprint and also considers the availrmem changes because of
919 	 * user explicit page locking.
920 	 */
921 	system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
922 	    obp_pages - availrmem - k_anoninfo.ani_mem_resv -
923 	    anon_segkp_pages_locked - pages_locked -
924 	    pages_claimed - pages_useclaim);
925 
926 	return (0);
927 }
928 
929 kstat_t *
930 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
931     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
932 {
933 	return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
934 	    ks_type, ks_ndata, ks_flags, ALL_ZONES));
935 }
936 
937 /*
938  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
939  * the specified name exists, reactivate it.  Returns a pointer to the kstat
940  * on success, NULL on failure.  The kstat will not be visible to the
941  * kstat driver until kstat_install().
942  */
943 kstat_t *
944 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
945     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
946     zoneid_t ks_zoneid)
947 {
948 	size_t ks_data_size;
949 	kstat_t *ksp;
950 	ekstat_t *e;
951 	avl_index_t where;
952 	char namebuf[KSTAT_STRLEN + 16];
953 
954 	if (avl_numnodes(&kstat_avl_bykid) == 0) {
955 		avl_create(&kstat_avl_bykid, kstat_compare_bykid,
956 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
957 
958 		avl_create(&kstat_avl_byname, kstat_compare_byname,
959 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
960 	}
961 
962 	/*
963 	 * If ks_name == NULL, set the ks_name to <module><instance>.
964 	 */
965 	if (ks_name == NULL) {
966 		char buf[KSTAT_STRLEN];
967 		kstat_set_string(buf, ks_module);
968 		(void) sprintf(namebuf, "%s%d", buf, ks_instance);
969 		ks_name = namebuf;
970 	}
971 
972 	/*
973 	 * Make sure it's a valid kstat data type
974 	 */
975 	if (ks_type >= KSTAT_NUM_TYPES) {
976 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
977 		    "invalid kstat type %d",
978 		    ks_module, ks_instance, ks_name, ks_type);
979 		return (NULL);
980 	}
981 
982 	/*
983 	 * Don't allow persistent virtual kstats -- it makes no sense.
984 	 * ks_data points to garbage when the client goes away.
985 	 */
986 	if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
987 	    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
988 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
989 		    "cannot create persistent virtual kstat",
990 		    ks_module, ks_instance, ks_name);
991 		return (NULL);
992 	}
993 
994 	/*
995 	 * Don't allow variable-size physical kstats, since the framework's
996 	 * memory allocation for physical kstat data is fixed at creation time.
997 	 */
998 	if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
999 	    !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1000 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1001 		    "cannot create variable-size physical kstat",
1002 		    ks_module, ks_instance, ks_name);
1003 		return (NULL);
1004 	}
1005 
1006 	/*
1007 	 * Make sure the number of data fields is within legal range
1008 	 */
1009 	if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1010 	    ks_ndata > kstat_data_type[ks_type].max_ndata) {
1011 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1012 		    "ks_ndata=%d out of range [%d, %d]",
1013 		    ks_module, ks_instance, ks_name, (int)ks_ndata,
1014 		    kstat_data_type[ks_type].min_ndata,
1015 		    kstat_data_type[ks_type].max_ndata);
1016 		return (NULL);
1017 	}
1018 
1019 	ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1020 
1021 	/*
1022 	 * If the named kstat already exists and is dormant, reactivate it.
1023 	 */
1024 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1025 	if (ksp != NULL) {
1026 		if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1027 			/*
1028 			 * The named kstat exists but is not dormant --
1029 			 * this is a kstat namespace collision.
1030 			 */
1031 			kstat_rele(ksp);
1032 			cmn_err(CE_WARN,
1033 			    "kstat_create('%s', %d, '%s'): namespace collision",
1034 			    ks_module, ks_instance, ks_name);
1035 			return (NULL);
1036 		}
1037 		if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1038 		    (ksp->ks_type != ks_type) ||
1039 		    (ksp->ks_ndata != ks_ndata) ||
1040 		    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1041 			/*
1042 			 * The name is the same, but the other key parameters
1043 			 * differ from those of the dormant kstat -- bogus.
1044 			 */
1045 			kstat_rele(ksp);
1046 			cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1047 			    "invalid reactivation of dormant kstat",
1048 			    ks_module, ks_instance, ks_name);
1049 			return (NULL);
1050 		}
1051 		/*
1052 		 * Return dormant kstat pointer to caller.  As usual,
1053 		 * the kstat is marked invalid until kstat_install().
1054 		 */
1055 		ksp->ks_flags |= KSTAT_FLAG_INVALID;
1056 		kstat_rele(ksp);
1057 		return (ksp);
1058 	}
1059 
1060 	/*
1061 	 * Allocate memory for the new kstat header and, if this is a physical
1062 	 * kstat, the data section.
1063 	 */
1064 	e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1065 	if (e == NULL) {
1066 		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1067 		    "insufficient kernel memory",
1068 		    ks_module, ks_instance, ks_name);
1069 		return (NULL);
1070 	}
1071 
1072 	/*
1073 	 * Initialize as many fields as we can.  The caller may reset
1074 	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1075 	 * Creators of virtual kstats may also reset ks_data.  It is
1076 	 * also up to the caller to initialize the kstat data section,
1077 	 * if necessary.  All initialization must be complete before
1078 	 * calling kstat_install().
1079 	 */
1080 	e->e_zone.zoneid = ks_zoneid;
1081 	e->e_zone.next = NULL;
1082 
1083 	ksp = &e->e_ks;
1084 	ksp->ks_crtime		= gethrtime();
1085 	kstat_set_string(ksp->ks_module, ks_module);
1086 	ksp->ks_instance	= ks_instance;
1087 	kstat_set_string(ksp->ks_name, ks_name);
1088 	ksp->ks_type		= ks_type;
1089 	kstat_set_string(ksp->ks_class, ks_class);
1090 	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
1091 	if (ks_flags & KSTAT_FLAG_VIRTUAL)
1092 		ksp->ks_data	= NULL;
1093 	else
1094 		ksp->ks_data	= (void *)(e + 1);
1095 	ksp->ks_ndata		= ks_ndata;
1096 	ksp->ks_data_size	= ks_data_size;
1097 	ksp->ks_snaptime	= ksp->ks_crtime;
1098 	ksp->ks_update		= default_kstat_update;
1099 	ksp->ks_private		= NULL;
1100 	ksp->ks_snapshot	= default_kstat_snapshot;
1101 	ksp->ks_lock		= NULL;
1102 
1103 	mutex_enter(&kstat_chain_lock);
1104 
1105 	/*
1106 	 * Add our kstat to the AVL trees.
1107 	 */
1108 	if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1109 		mutex_exit(&kstat_chain_lock);
1110 		cmn_err(CE_WARN,
1111 		    "kstat_create('%s', %d, '%s'): namespace collision",
1112 		    ks_module, ks_instance, ks_name);
1113 		kstat_free(e);
1114 		return (NULL);
1115 	}
1116 	avl_insert(&kstat_avl_byname, e, where);
1117 
1118 	/*
1119 	 * Loop around until we find an unused KID.
1120 	 */
1121 	do {
1122 		ksp->ks_kid = kstat_chain_id++;
1123 	} while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1124 	avl_insert(&kstat_avl_bykid, e, where);
1125 
1126 	mutex_exit(&kstat_chain_lock);
1127 
1128 	return (ksp);
1129 }
1130 
1131 /*
1132  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1133  */
1134 void
1135 kstat_install(kstat_t *ksp)
1136 {
1137 	zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1138 
1139 	/*
1140 	 * If this is a variable-size kstat, it MUST provide kstat data locking
1141 	 * to prevent data-size races with kstat readers.
1142 	 */
1143 	if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1144 		panic("kstat_install('%s', %d, '%s'): "
1145 		    "cannot create variable-size kstat without data lock",
1146 		    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1147 	}
1148 
1149 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1150 		cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1151 		    (void *)ksp);
1152 		return;
1153 	}
1154 
1155 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1156 		uint_t i;
1157 		kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1158 
1159 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1160 			if (knp->data_type == KSTAT_DATA_STRING) {
1161 				ksp->ks_flags |= KSTAT_FLAG_LONGSTRINGS;
1162 				break;
1163 			}
1164 		}
1165 		/*
1166 		 * The default snapshot routine does not handle KSTAT_WRITE
1167 		 * for long strings.
1168 		 */
1169 		if ((ksp->ks_flags & KSTAT_FLAG_LONGSTRINGS) &&
1170 		    (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1171 		    (ksp->ks_snapshot == default_kstat_snapshot)) {
1172 			panic("kstat_install('%s', %d, '%s'): "
1173 			    "named kstat containing KSTAT_DATA_STRING "
1174 			    "is writable but uses default snapshot routine",
1175 			    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1176 		}
1177 	}
1178 
1179 	if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1180 
1181 		/*
1182 		 * We are reactivating a dormant kstat.  Initialize the
1183 		 * caller's underlying data to the value it had when the
1184 		 * kstat went dormant, and mark the kstat as active.
1185 		 * Grab the provider's kstat lock if it's not already held.
1186 		 */
1187 		kmutex_t *lp = ksp->ks_lock;
1188 		if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1189 			mutex_enter(lp);
1190 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1191 			mutex_exit(lp);
1192 		} else {
1193 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1194 		}
1195 		ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1196 	}
1197 
1198 	/*
1199 	 * Now that the kstat is active, make it visible to the kstat driver.
1200 	 * When copying out kstats the count is determined in
1201 	 * header_kstat_update() and actually copied into kbuf in
1202 	 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1203 	 * calls to ensure that this list doesn't change. Thus, we need to
1204 	 * also take the lock to ensure that the we don't copy the new kstat
1205 	 * in the 2nd pass and overrun the buf.
1206 	 */
1207 	mutex_enter(&kstat_chain_lock);
1208 	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1209 	mutex_exit(&kstat_chain_lock);
1210 	kstat_rele(ksp);
1211 }
1212 
1213 /*
1214  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1215  * just update the data and mark it as dormant.
1216  */
1217 void
1218 kstat_delete(kstat_t *ksp)
1219 {
1220 	kmutex_t *lp;
1221 	ekstat_t *e = (ekstat_t *)ksp;
1222 	zoneid_t zoneid;
1223 	kstat_zone_t *kz;
1224 
1225 	ASSERT(ksp != NULL);
1226 
1227 	if (ksp == NULL)
1228 		return;
1229 
1230 	zoneid = e->e_zone.zoneid;
1231 
1232 	lp = ksp->ks_lock;
1233 
1234 	if (lp != NULL && MUTEX_HELD(lp)) {
1235 		panic("kstat_delete(%p): caller holds data lock %p",
1236 		    (void *)ksp, (void *)lp);
1237 	}
1238 
1239 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1240 		cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1241 		    (void *)ksp);
1242 		return;
1243 	}
1244 
1245 	if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1246 		/*
1247 		 * Update the data one last time, so that all activity
1248 		 * prior to going dormant has been accounted for.
1249 		 */
1250 		KSTAT_ENTER(ksp);
1251 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
1252 		KSTAT_EXIT(ksp);
1253 
1254 		/*
1255 		 * Mark the kstat as dormant and restore caller-modifiable
1256 		 * fields to default values, so the kstat is readable during
1257 		 * the dormant phase.
1258 		 */
1259 		ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1260 		ksp->ks_lock = NULL;
1261 		ksp->ks_update = default_kstat_update;
1262 		ksp->ks_private = NULL;
1263 		ksp->ks_snapshot = default_kstat_snapshot;
1264 		kstat_rele(ksp);
1265 		return;
1266 	}
1267 
1268 	/*
1269 	 * Remove the kstat from the framework's AVL trees,
1270 	 * free the allocated memory, and increment kstat_chain_id so
1271 	 * /dev/kstat clients can detect the event.
1272 	 */
1273 	mutex_enter(&kstat_chain_lock);
1274 	avl_remove(&kstat_avl_bykid, e);
1275 	avl_remove(&kstat_avl_byname, e);
1276 	kstat_chain_id++;
1277 	mutex_exit(&kstat_chain_lock);
1278 
1279 	kz = e->e_zone.next;
1280 	while (kz != NULL) {
1281 		kstat_zone_t *t = kz;
1282 
1283 		kz = kz->next;
1284 		kmem_free(t, sizeof (*t));
1285 	}
1286 	kstat_rele(ksp);
1287 	kstat_free(e);
1288 }
1289 
1290 void
1291 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1292     const char *ks_name, zoneid_t ks_zoneid)
1293 {
1294 	kstat_t *ksp;
1295 
1296 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1297 	if (ksp != NULL) {
1298 		kstat_rele(ksp);
1299 		kstat_delete(ksp);
1300 	}
1301 }
1302 
1303 void
1304 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1305 {
1306 	kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1307 }
1308 
1309 /*
1310  * The sparc V9 versions of these routines can be much cheaper than
1311  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1312  * For simplicity, however, we always feed the C versions to lint.
1313  */
1314 #if !defined(__sparc) || defined(lint) || defined(__lint)
1315 
1316 void
1317 kstat_waitq_enter(kstat_io_t *kiop)
1318 {
1319 	hrtime_t new, delta;
1320 	ulong_t wcnt;
1321 
1322 	new = gethrtime_unscaled();
1323 	delta = new - kiop->wlastupdate;
1324 	kiop->wlastupdate = new;
1325 	wcnt = kiop->wcnt++;
1326 	if (wcnt != 0) {
1327 		kiop->wlentime += delta * wcnt;
1328 		kiop->wtime += delta;
1329 	}
1330 }
1331 
1332 void
1333 kstat_waitq_exit(kstat_io_t *kiop)
1334 {
1335 	hrtime_t new, delta;
1336 	ulong_t wcnt;
1337 
1338 	new = gethrtime_unscaled();
1339 	delta = new - kiop->wlastupdate;
1340 	kiop->wlastupdate = new;
1341 	wcnt = kiop->wcnt--;
1342 	ASSERT((int)wcnt > 0);
1343 	kiop->wlentime += delta * wcnt;
1344 	kiop->wtime += delta;
1345 }
1346 
1347 void
1348 kstat_runq_enter(kstat_io_t *kiop)
1349 {
1350 	hrtime_t new, delta;
1351 	ulong_t rcnt;
1352 
1353 	new = gethrtime_unscaled();
1354 	delta = new - kiop->rlastupdate;
1355 	kiop->rlastupdate = new;
1356 	rcnt = kiop->rcnt++;
1357 	if (rcnt != 0) {
1358 		kiop->rlentime += delta * rcnt;
1359 		kiop->rtime += delta;
1360 	}
1361 }
1362 
1363 void
1364 kstat_runq_exit(kstat_io_t *kiop)
1365 {
1366 	hrtime_t new, delta;
1367 	ulong_t rcnt;
1368 
1369 	new = gethrtime_unscaled();
1370 	delta = new - kiop->rlastupdate;
1371 	kiop->rlastupdate = new;
1372 	rcnt = kiop->rcnt--;
1373 	ASSERT((int)rcnt > 0);
1374 	kiop->rlentime += delta * rcnt;
1375 	kiop->rtime += delta;
1376 }
1377 
1378 void
1379 kstat_waitq_to_runq(kstat_io_t *kiop)
1380 {
1381 	hrtime_t new, delta;
1382 	ulong_t wcnt, rcnt;
1383 
1384 	new = gethrtime_unscaled();
1385 
1386 	delta = new - kiop->wlastupdate;
1387 	kiop->wlastupdate = new;
1388 	wcnt = kiop->wcnt--;
1389 	ASSERT((int)wcnt > 0);
1390 	kiop->wlentime += delta * wcnt;
1391 	kiop->wtime += delta;
1392 
1393 	delta = new - kiop->rlastupdate;
1394 	kiop->rlastupdate = new;
1395 	rcnt = kiop->rcnt++;
1396 	if (rcnt != 0) {
1397 		kiop->rlentime += delta * rcnt;
1398 		kiop->rtime += delta;
1399 	}
1400 }
1401 
1402 void
1403 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1404 {
1405 	hrtime_t new, delta;
1406 	ulong_t wcnt, rcnt;
1407 
1408 	new = gethrtime_unscaled();
1409 
1410 	delta = new - kiop->rlastupdate;
1411 	kiop->rlastupdate = new;
1412 	rcnt = kiop->rcnt--;
1413 	ASSERT((int)rcnt > 0);
1414 	kiop->rlentime += delta * rcnt;
1415 	kiop->rtime += delta;
1416 
1417 	delta = new - kiop->wlastupdate;
1418 	kiop->wlastupdate = new;
1419 	wcnt = kiop->wcnt++;
1420 	if (wcnt != 0) {
1421 		kiop->wlentime += delta * wcnt;
1422 		kiop->wtime += delta;
1423 	}
1424 }
1425 
1426 #endif
1427 
1428 void
1429 kstat_timer_start(kstat_timer_t *ktp)
1430 {
1431 	ktp->start_time = gethrtime();
1432 }
1433 
1434 void
1435 kstat_timer_stop(kstat_timer_t *ktp)
1436 {
1437 	hrtime_t	etime;
1438 	u_longlong_t	num_events;
1439 
1440 	ktp->stop_time = etime = gethrtime();
1441 	etime -= ktp->start_time;
1442 	num_events = ktp->num_events;
1443 	if (etime < ktp->min_time || num_events == 0)
1444 		ktp->min_time = etime;
1445 	if (etime > ktp->max_time)
1446 		ktp->max_time = etime;
1447 	ktp->elapsed_time += etime;
1448 	ktp->num_events = num_events + 1;
1449 }
1450