xref: /illumos-gate/usr/src/uts/common/os/kstat_fr.c (revision 8d0c3d29bb99f6521f2dc5058a7e4debebad7899)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Kernel statistics framework
27  */
28 
29 #include <sys/types.h>
30 #include <sys/time.h>
31 #include <sys/systm.h>
32 #include <sys/vmsystm.h>
33 #include <sys/t_lock.h>
34 #include <sys/param.h>
35 #include <sys/errno.h>
36 #include <sys/vmem.h>
37 #include <sys/sysmacros.h>
38 #include <sys/cmn_err.h>
39 #include <sys/kstat.h>
40 #include <sys/sysinfo.h>
41 #include <sys/cpuvar.h>
42 #include <sys/fcntl.h>
43 #include <sys/flock.h>
44 #include <sys/vnode.h>
45 #include <sys/vfs.h>
46 #include <sys/dnlc.h>
47 #include <sys/var.h>
48 #include <sys/debug.h>
49 #include <sys/kobj.h>
50 #include <sys/avl.h>
51 #include <sys/pool_pset.h>
52 #include <sys/cpupart.h>
53 #include <sys/zone.h>
54 #include <sys/loadavg.h>
55 #include <vm/page.h>
56 #include <vm/anon.h>
57 #include <vm/seg_kmem.h>
58 
59 /*
60  * Global lock to protect the AVL trees and kstat_chain_id.
61  */
62 static kmutex_t kstat_chain_lock;
63 
64 /*
65  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
66  *
67  * (1)	/dev/kstat, to detect changes in the kstat chain across ioctls;
68  *
69  * (2)	kstat_create(), to assign a KID (kstat ID) to each new kstat.
70  *	/dev/kstat uses the KID as a cookie for kstat lookups.
71  *
72  * We reserve the first two IDs because some kstats are created before
73  * the well-known ones (kstat_headers = 0, kstat_types = 1).
74  *
75  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
76  * into a particular kstat, which is logically equivalent to a kstat being
77  * installed/deleted.
78  */
79 
80 kid_t kstat_chain_id = 2;
81 
82 /*
83  * As far as zones are concerned, there are 3 types of kstat:
84  *
85  * 1) Those which have a well-known name, and which should return per-zone data
86  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
87  * is an example of this type of kstat.
88  *
89  * 2) Those which should only be exported to a particular list of zones.
90  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
91  * able to see NFS mounts associated with zone B, while we want the
92  * global zone to be able to see all mounts on the system.
93  *
94  * 3) Those that can be exported to all zones.  Most system-related
95  * kstats fall within this category.
96  *
97  * An ekstat_t thus contains a list of kstats that the zone is to be
98  * exported to.  The lookup of a name:instance:module thus translates to a
99  * lookup of name:instance:module:myzone; if the kstat is not exported
100  * to all zones, and does not have the caller's zoneid explicitly
101  * enumerated in the list of zones to be exported to, it is the same as
102  * if the kstat didn't exist.
103  *
104  * Writing to kstats is currently disallowed from within a non-global
105  * zone, although this restriction could be removed in the future.
106  */
107 typedef struct kstat_zone {
108 	zoneid_t zoneid;
109 	struct kstat_zone *next;
110 } kstat_zone_t;
111 
112 /*
113  * Extended kstat structure -- for internal use only.
114  */
115 typedef struct ekstat {
116 	kstat_t		e_ks;		/* the kstat itself */
117 	size_t		e_size;		/* total allocation size */
118 	kthread_t	*e_owner;	/* thread holding this kstat */
119 	kcondvar_t	e_cv;		/* wait for owner == NULL */
120 	avl_node_t	e_avl_bykid;	/* AVL tree to sort by KID */
121 	avl_node_t	e_avl_byname;	/* AVL tree to sort by name */
122 	kstat_zone_t	e_zone;		/* zone to export stats to */
123 } ekstat_t;
124 
125 static uint64_t kstat_initial[8192];
126 static void *kstat_initial_ptr = kstat_initial;
127 static size_t kstat_initial_avail = sizeof (kstat_initial);
128 static vmem_t *kstat_arena;
129 
130 #define	KSTAT_ALIGN	(sizeof (uint64_t))
131 
132 static avl_tree_t kstat_avl_bykid;
133 static avl_tree_t kstat_avl_byname;
134 
135 /*
136  * Various pointers we need to create kstats at boot time in kstat_init()
137  */
138 extern	kstat_named_t	*segmapcnt_ptr;
139 extern	uint_t		segmapcnt_ndata;
140 extern	int		segmap_kstat_update(kstat_t *, int);
141 extern	kstat_named_t	*biostats_ptr;
142 extern	uint_t		biostats_ndata;
143 extern	kstat_named_t	*pollstats_ptr;
144 extern	uint_t		pollstats_ndata;
145 
146 extern	int	vac;
147 extern	uint_t	nproc;
148 extern	time_t	boot_time;
149 extern	sysinfo_t	sysinfo;
150 extern	vminfo_t	vminfo;
151 
152 struct {
153 	kstat_named_t ncpus;
154 	kstat_named_t lbolt;
155 	kstat_named_t deficit;
156 	kstat_named_t clk_intr;
157 	kstat_named_t vac;
158 	kstat_named_t nproc;
159 	kstat_named_t avenrun_1min;
160 	kstat_named_t avenrun_5min;
161 	kstat_named_t avenrun_15min;
162 	kstat_named_t boot_time;
163 } system_misc_kstat = {
164 	{ "ncpus",		KSTAT_DATA_UINT32 },
165 	{ "lbolt",		KSTAT_DATA_UINT32 },
166 	{ "deficit",		KSTAT_DATA_UINT32 },
167 	{ "clk_intr",		KSTAT_DATA_UINT32 },
168 	{ "vac",		KSTAT_DATA_UINT32 },
169 	{ "nproc",		KSTAT_DATA_UINT32 },
170 	{ "avenrun_1min",	KSTAT_DATA_UINT32 },
171 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
172 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
173 	{ "boot_time",		KSTAT_DATA_UINT32 },
174 };
175 
176 struct {
177 	kstat_named_t physmem;
178 	kstat_named_t nalloc;
179 	kstat_named_t nfree;
180 	kstat_named_t nalloc_calls;
181 	kstat_named_t nfree_calls;
182 	kstat_named_t kernelbase;
183 	kstat_named_t econtig;
184 	kstat_named_t freemem;
185 	kstat_named_t availrmem;
186 	kstat_named_t lotsfree;
187 	kstat_named_t desfree;
188 	kstat_named_t minfree;
189 	kstat_named_t fastscan;
190 	kstat_named_t slowscan;
191 	kstat_named_t nscan;
192 	kstat_named_t desscan;
193 	kstat_named_t pp_kernel;
194 	kstat_named_t pagesfree;
195 	kstat_named_t pageslocked;
196 	kstat_named_t pagestotal;
197 } system_pages_kstat = {
198 	{ "physmem",		KSTAT_DATA_ULONG },
199 	{ "nalloc",		KSTAT_DATA_ULONG },
200 	{ "nfree",		KSTAT_DATA_ULONG },
201 	{ "nalloc_calls",	KSTAT_DATA_ULONG },
202 	{ "nfree_calls",	KSTAT_DATA_ULONG },
203 	{ "kernelbase",		KSTAT_DATA_ULONG },
204 	{ "econtig", 		KSTAT_DATA_ULONG },
205 	{ "freemem", 		KSTAT_DATA_ULONG },
206 	{ "availrmem", 		KSTAT_DATA_ULONG },
207 	{ "lotsfree", 		KSTAT_DATA_ULONG },
208 	{ "desfree", 		KSTAT_DATA_ULONG },
209 	{ "minfree", 		KSTAT_DATA_ULONG },
210 	{ "fastscan", 		KSTAT_DATA_ULONG },
211 	{ "slowscan", 		KSTAT_DATA_ULONG },
212 	{ "nscan", 		KSTAT_DATA_ULONG },
213 	{ "desscan", 		KSTAT_DATA_ULONG },
214 	{ "pp_kernel", 		KSTAT_DATA_ULONG },
215 	{ "pagesfree", 		KSTAT_DATA_ULONG },
216 	{ "pageslocked", 	KSTAT_DATA_ULONG },
217 	{ "pagestotal",		KSTAT_DATA_ULONG },
218 };
219 
220 static int header_kstat_update(kstat_t *, int);
221 static int header_kstat_snapshot(kstat_t *, void *, int);
222 static int system_misc_kstat_update(kstat_t *, int);
223 static int system_pages_kstat_update(kstat_t *, int);
224 
225 static struct {
226 	char	name[KSTAT_STRLEN];
227 	size_t	size;
228 	uint_t	min_ndata;
229 	uint_t	max_ndata;
230 } kstat_data_type[KSTAT_NUM_TYPES] = {
231 	{ "raw",		1,			0,	INT_MAX	},
232 	{ "name=value",		sizeof (kstat_named_t),	0,	INT_MAX	},
233 	{ "interrupt",		sizeof (kstat_intr_t),	1,	1	},
234 	{ "i/o",		sizeof (kstat_io_t),	1,	1	},
235 	{ "event_timer",	sizeof (kstat_timer_t),	0,	INT_MAX	},
236 };
237 
238 int
239 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
240 {
241 	ekstat_t *e = (ekstat_t *)k;
242 	kstat_zone_t *kz;
243 
244 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
245 	for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
246 		if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
247 			return (1);
248 		if (zoneid == kz->zoneid)
249 			return (1);
250 	}
251 	return (0);
252 }
253 
254 void
255 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
256 {
257 	ekstat_t *e = (ekstat_t *)k;
258 	kstat_zone_t *kz, *t = NULL;
259 
260 	mutex_enter(&kstat_chain_lock);
261 	if (zoneid == e->e_zone.zoneid) {
262 		kz = e->e_zone.next;
263 		ASSERT(kz != NULL);
264 		e->e_zone.zoneid = kz->zoneid;
265 		e->e_zone.next = kz->next;
266 		goto out;
267 	}
268 	for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
269 		if (kz->next->zoneid == zoneid) {
270 			t = kz->next;
271 			kz->next = t->next;
272 			break;
273 		}
274 	}
275 	ASSERT(t != NULL);	/* we removed something */
276 	kz = t;
277 out:
278 	kstat_chain_id++;
279 	mutex_exit(&kstat_chain_lock);
280 	kmem_free(kz, sizeof (*kz));
281 }
282 
283 void
284 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
285 {
286 	ekstat_t *e = (ekstat_t *)k;
287 	kstat_zone_t *kz;
288 
289 	kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
290 	if (kz == NULL)
291 		return;
292 	mutex_enter(&kstat_chain_lock);
293 	kz->zoneid = zoneid;
294 	kz->next = e->e_zone.next;
295 	e->e_zone.next = kz;
296 	kstat_chain_id++;
297 	mutex_exit(&kstat_chain_lock);
298 }
299 
300 /*
301  * Compare the list of zones for the given kstats, returning 0 if they match
302  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
303  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
304  * two lists always has one element, and this is an O(n) operation rather than
305  * O(n^2).
306  */
307 static int
308 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
309 {
310 	kstat_zone_t *kz1, *kz2;
311 
312 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
313 	for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
314 		for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
315 			if (kz1->zoneid == ALL_ZONES ||
316 			    kz2->zoneid == ALL_ZONES)
317 				return (0);
318 			if (kz1->zoneid == kz2->zoneid)
319 				return (0);
320 		}
321 	}
322 	return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
323 }
324 
325 /*
326  * Support for keeping kstats sorted in AVL trees for fast lookups.
327  */
328 static int
329 kstat_compare_bykid(const void *a1, const void *a2)
330 {
331 	const kstat_t *k1 = a1;
332 	const kstat_t *k2 = a2;
333 
334 	if (k1->ks_kid < k2->ks_kid)
335 		return (-1);
336 	if (k1->ks_kid > k2->ks_kid)
337 		return (1);
338 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
339 }
340 
341 static int
342 kstat_compare_byname(const void *a1, const void *a2)
343 {
344 	const kstat_t *k1 = a1;
345 	const kstat_t *k2 = a2;
346 	int s;
347 
348 	s = strcmp(k1->ks_module, k2->ks_module);
349 	if (s > 0)
350 		return (1);
351 	if (s < 0)
352 		return (-1);
353 
354 	if (k1->ks_instance < k2->ks_instance)
355 		return (-1);
356 	if (k1->ks_instance > k2->ks_instance)
357 		return (1);
358 
359 	s = strcmp(k1->ks_name, k2->ks_name);
360 	if (s > 0)
361 		return (1);
362 	if (s < 0)
363 		return (-1);
364 
365 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
366 }
367 
368 static kstat_t *
369 kstat_hold(avl_tree_t *t, ekstat_t *template)
370 {
371 	kstat_t *ksp;
372 	ekstat_t *e;
373 
374 	mutex_enter(&kstat_chain_lock);
375 	for (;;) {
376 		ksp = avl_find(t, template, NULL);
377 		if (ksp == NULL)
378 			break;
379 		e = (ekstat_t *)ksp;
380 		if (e->e_owner == NULL) {
381 			e->e_owner = curthread;
382 			break;
383 		}
384 		cv_wait(&e->e_cv, &kstat_chain_lock);
385 	}
386 	mutex_exit(&kstat_chain_lock);
387 	return (ksp);
388 }
389 
390 void
391 kstat_rele(kstat_t *ksp)
392 {
393 	ekstat_t *e = (ekstat_t *)ksp;
394 
395 	mutex_enter(&kstat_chain_lock);
396 	ASSERT(e->e_owner == curthread);
397 	e->e_owner = NULL;
398 	cv_broadcast(&e->e_cv);
399 	mutex_exit(&kstat_chain_lock);
400 }
401 
402 kstat_t *
403 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
404 {
405 	ekstat_t e;
406 
407 	e.e_ks.ks_kid = kid;
408 	e.e_zone.zoneid = zoneid;
409 	e.e_zone.next = NULL;
410 
411 	return (kstat_hold(&kstat_avl_bykid, &e));
412 }
413 
414 kstat_t *
415 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
416     zoneid_t ks_zoneid)
417 {
418 	ekstat_t e;
419 
420 	kstat_set_string(e.e_ks.ks_module, ks_module);
421 	e.e_ks.ks_instance = ks_instance;
422 	kstat_set_string(e.e_ks.ks_name, ks_name);
423 	e.e_zone.zoneid = ks_zoneid;
424 	e.e_zone.next = NULL;
425 	return (kstat_hold(&kstat_avl_byname, &e));
426 }
427 
428 static ekstat_t *
429 kstat_alloc(size_t size)
430 {
431 	ekstat_t *e = NULL;
432 
433 	size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
434 
435 	if (kstat_arena == NULL) {
436 		if (size <= kstat_initial_avail) {
437 			e = kstat_initial_ptr;
438 			kstat_initial_ptr = (char *)kstat_initial_ptr + size;
439 			kstat_initial_avail -= size;
440 		}
441 	} else {
442 		e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
443 	}
444 
445 	if (e != NULL) {
446 		bzero(e, size);
447 		e->e_size = size;
448 		cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
449 	}
450 
451 	return (e);
452 }
453 
454 static void
455 kstat_free(ekstat_t *e)
456 {
457 	cv_destroy(&e->e_cv);
458 	vmem_free(kstat_arena, e, e->e_size);
459 }
460 
461 /*
462  * Create various system kstats.
463  */
464 void
465 kstat_init(void)
466 {
467 	kstat_t *ksp;
468 	ekstat_t *e;
469 	avl_tree_t *t = &kstat_avl_bykid;
470 
471 	/*
472 	 * Set up the kstat vmem arena.
473 	 */
474 	kstat_arena = vmem_create("kstat",
475 	    kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
476 	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
477 
478 	/*
479 	 * Make initial kstats appear as though they were allocated.
480 	 */
481 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
482 		(void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
483 		    0, 0, e, (char *)e + e->e_size,
484 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
485 
486 	/*
487 	 * The mother of all kstats.  The first kstat in the system, which
488 	 * always has KID 0, has the headers for all kstats (including itself)
489 	 * as its data.  Thus, the kstat driver does not need any special
490 	 * interface to extract the kstat chain.
491 	 */
492 	kstat_chain_id = 0;
493 	ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
494 	    0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
495 	if (ksp) {
496 		ksp->ks_lock = &kstat_chain_lock;
497 		ksp->ks_update = header_kstat_update;
498 		ksp->ks_snapshot = header_kstat_snapshot;
499 		kstat_install(ksp);
500 	} else {
501 		panic("cannot create kstat 'kstat_headers'");
502 	}
503 
504 	ksp = kstat_create("unix", 0, "kstat_types", "kstat",
505 	    KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
506 	if (ksp) {
507 		int i;
508 		kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
509 
510 		for (i = 0; i < KSTAT_NUM_TYPES; i++) {
511 			kstat_named_init(&kn[i], kstat_data_type[i].name,
512 			    KSTAT_DATA_ULONG);
513 			kn[i].value.ul = i;
514 		}
515 		kstat_install(ksp);
516 	}
517 
518 	ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
519 	    sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
520 	if (ksp) {
521 		ksp->ks_data = (void *) &sysinfo;
522 		kstat_install(ksp);
523 	}
524 
525 	ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
526 	    sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
527 	if (ksp) {
528 		ksp->ks_data = (void *) &vminfo;
529 		kstat_install(ksp);
530 	}
531 
532 	ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
533 	    segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
534 	if (ksp) {
535 		ksp->ks_data = (void *) segmapcnt_ptr;
536 		ksp->ks_update = segmap_kstat_update;
537 		kstat_install(ksp);
538 	}
539 
540 	ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
541 	    biostats_ndata, KSTAT_FLAG_VIRTUAL);
542 	if (ksp) {
543 		ksp->ks_data = (void *) biostats_ptr;
544 		kstat_install(ksp);
545 	}
546 
547 	ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
548 	    sizeof (struct var), KSTAT_FLAG_VIRTUAL);
549 	if (ksp) {
550 		ksp->ks_data = (void *) &v;
551 		kstat_install(ksp);
552 	}
553 
554 	ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
555 	    sizeof (system_misc_kstat) / sizeof (kstat_named_t),
556 	    KSTAT_FLAG_VIRTUAL);
557 	if (ksp) {
558 		ksp->ks_data = (void *) &system_misc_kstat;
559 		ksp->ks_update = system_misc_kstat_update;
560 		kstat_install(ksp);
561 	}
562 
563 	ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
564 	    sizeof (system_pages_kstat) / sizeof (kstat_named_t),
565 	    KSTAT_FLAG_VIRTUAL);
566 	if (ksp) {
567 		ksp->ks_data = (void *) &system_pages_kstat;
568 		ksp->ks_update = system_pages_kstat_update;
569 		kstat_install(ksp);
570 	}
571 
572 	ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
573 	    pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
574 
575 	if (ksp) {
576 		ksp->ks_data = pollstats_ptr;
577 		kstat_install(ksp);
578 	}
579 }
580 
581 /*
582  * Caller of this should ensure that the string pointed by src
583  * doesn't change while kstat's lock is held. Not doing so defeats
584  * kstat's snapshot strategy as explained in <sys/kstat.h>
585  */
586 void
587 kstat_named_setstr(kstat_named_t *knp, const char *src)
588 {
589 	if (knp->data_type != KSTAT_DATA_STRING)
590 		panic("kstat_named_setstr('%p', '%p'): "
591 		    "named kstat is not of type KSTAT_DATA_STRING",
592 		    (void *)knp, (void *)src);
593 
594 	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
595 	if (src != NULL)
596 		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
597 	else
598 		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
599 }
600 
601 void
602 kstat_set_string(char *dst, const char *src)
603 {
604 	bzero(dst, KSTAT_STRLEN);
605 	(void) strncpy(dst, src, KSTAT_STRLEN - 1);
606 }
607 
608 void
609 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
610 {
611 	kstat_set_string(knp->name, name);
612 	knp->data_type = data_type;
613 
614 	if (data_type == KSTAT_DATA_STRING)
615 		kstat_named_setstr(knp, NULL);
616 }
617 
618 void
619 kstat_timer_init(kstat_timer_t *ktp, const char *name)
620 {
621 	kstat_set_string(ktp->name, name);
622 }
623 
624 /* ARGSUSED */
625 static int
626 default_kstat_update(kstat_t *ksp, int rw)
627 {
628 	uint_t i;
629 	size_t len = 0;
630 	kstat_named_t *knp;
631 
632 	/*
633 	 * Named kstats with variable-length long strings have a standard
634 	 * way of determining how much space is needed to hold the snapshot:
635 	 */
636 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
637 	    (ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
638 
639 		/*
640 		 * Add in the space required for the strings
641 		 */
642 		knp = KSTAT_NAMED_PTR(ksp);
643 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
644 			if (knp->data_type == KSTAT_DATA_STRING)
645 				len += KSTAT_NAMED_STR_BUFLEN(knp);
646 		}
647 		ksp->ks_data_size =
648 		    ksp->ks_ndata * sizeof (kstat_named_t) + len;
649 	}
650 	return (0);
651 }
652 
653 static int
654 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
655 {
656 	kstat_io_t *kiop;
657 	hrtime_t cur_time;
658 	size_t	namedsz;
659 
660 	ksp->ks_snaptime = cur_time = gethrtime();
661 
662 	if (rw == KSTAT_WRITE) {
663 		if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
664 			return (EACCES);
665 		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
666 		return (0);
667 	}
668 
669 	/*
670 	 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
671 	 * number of kstat_named_t structures, followed by an optional
672 	 * string segment. The ks_data generally holds only the
673 	 * kstat_named_t structures. So we copy it first. The strings,
674 	 * if any, are copied below. For other kstat types, ks_data holds the
675 	 * entire buffer.
676 	 */
677 
678 	namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
679 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
680 		bcopy(ksp->ks_data, buf, namedsz);
681 	else
682 		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
683 
684 	/*
685 	 * Apply kstat type-specific data massaging
686 	 */
687 	switch (ksp->ks_type) {
688 
689 	case KSTAT_TYPE_IO:
690 		/*
691 		 * Normalize time units and deal with incomplete transactions
692 		 */
693 		kiop = (kstat_io_t *)buf;
694 
695 		scalehrtime(&kiop->wtime);
696 		scalehrtime(&kiop->wlentime);
697 		scalehrtime(&kiop->wlastupdate);
698 		scalehrtime(&kiop->rtime);
699 		scalehrtime(&kiop->rlentime);
700 		scalehrtime(&kiop->rlastupdate);
701 
702 		if (kiop->wcnt != 0) {
703 			/* like kstat_waitq_exit */
704 			hrtime_t wfix = cur_time - kiop->wlastupdate;
705 			kiop->wlastupdate = cur_time;
706 			kiop->wlentime += kiop->wcnt * wfix;
707 			kiop->wtime += wfix;
708 		}
709 
710 		if (kiop->rcnt != 0) {
711 			/* like kstat_runq_exit */
712 			hrtime_t rfix = cur_time - kiop->rlastupdate;
713 			kiop->rlastupdate = cur_time;
714 			kiop->rlentime += kiop->rcnt * rfix;
715 			kiop->rtime += rfix;
716 		}
717 		break;
718 
719 	case KSTAT_TYPE_NAMED:
720 		/*
721 		 * Massage any long strings in at the end of the buffer
722 		 */
723 		if (ksp->ks_data_size > namedsz) {
724 			uint_t i;
725 			kstat_named_t *knp = buf;
726 			char *dst = (char *)(knp + ksp->ks_ndata);
727 			/*
728 			 * Copy strings and update pointers
729 			 */
730 			for (i = 0; i < ksp->ks_ndata; i++, knp++) {
731 				if (knp->data_type == KSTAT_DATA_STRING &&
732 				    KSTAT_NAMED_STR_PTR(knp) != NULL) {
733 					bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
734 					    KSTAT_NAMED_STR_BUFLEN(knp));
735 					KSTAT_NAMED_STR_PTR(knp) = dst;
736 					dst += KSTAT_NAMED_STR_BUFLEN(knp);
737 				}
738 			}
739 			ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
740 		}
741 		break;
742 	}
743 	return (0);
744 }
745 
746 static int
747 header_kstat_update(kstat_t *header_ksp, int rw)
748 {
749 	int nkstats = 0;
750 	ekstat_t *e;
751 	avl_tree_t *t = &kstat_avl_bykid;
752 	zoneid_t zoneid;
753 
754 	if (rw == KSTAT_WRITE)
755 		return (EACCES);
756 
757 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
758 
759 	zoneid = getzoneid();
760 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
761 		if (kstat_zone_find((kstat_t *)e, zoneid)) {
762 			nkstats++;
763 		}
764 	}
765 	header_ksp->ks_ndata = nkstats;
766 	header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
767 	return (0);
768 }
769 
770 /*
771  * Copy out the data section of kstat 0, which consists of the list
772  * of all kstat headers.  By specification, these headers must be
773  * copied out in order of increasing KID.
774  */
775 static int
776 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
777 {
778 	ekstat_t *e;
779 	avl_tree_t *t = &kstat_avl_bykid;
780 	zoneid_t zoneid;
781 
782 	header_ksp->ks_snaptime = gethrtime();
783 
784 	if (rw == KSTAT_WRITE)
785 		return (EACCES);
786 
787 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
788 
789 	zoneid = getzoneid();
790 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
791 		if (kstat_zone_find((kstat_t *)e, zoneid)) {
792 			bcopy(&e->e_ks, buf, sizeof (kstat_t));
793 			buf = (char *)buf + sizeof (kstat_t);
794 		}
795 	}
796 
797 	return (0);
798 }
799 
800 /* ARGSUSED */
801 static int
802 system_misc_kstat_update(kstat_t *ksp, int rw)
803 {
804 	int myncpus = ncpus;
805 	int *loadavgp = &avenrun[0];
806 	int loadavg[LOADAVG_NSTATS];
807 	time_t zone_boot_time;
808 	clock_t zone_lbolt;
809 	hrtime_t zone_hrtime;
810 
811 	if (rw == KSTAT_WRITE)
812 		return (EACCES);
813 
814 	if (!INGLOBALZONE(curproc)) {
815 		/*
816 		 * Here we grab cpu_lock which is OK as long as no-one in the
817 		 * future attempts to lookup this particular kstat
818 		 * (unix:0:system_misc) while holding cpu_lock.
819 		 */
820 		mutex_enter(&cpu_lock);
821 		if (pool_pset_enabled()) {
822 			psetid_t mypsid = zone_pset_get(curproc->p_zone);
823 			int error;
824 
825 			myncpus = zone_ncpus_get(curproc->p_zone);
826 			ASSERT(myncpus > 0);
827 			error = cpupart_get_loadavg(mypsid, &loadavg[0],
828 			    LOADAVG_NSTATS);
829 			ASSERT(error == 0);
830 			loadavgp = &loadavg[0];
831 		}
832 		mutex_exit(&cpu_lock);
833 	}
834 
835 	if (curproc->p_zone->zone_id == 0) {
836 		zone_boot_time = boot_time;
837 		zone_lbolt = ddi_get_lbolt();
838 	} else {
839 		struct timeval tvp;
840 		hrt2tv(curproc->p_zone->zone_zsched->p_mstart, &tvp);
841 		zone_boot_time = tvp.tv_sec;
842 
843 		zone_hrtime = gethrtime();
844 		zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
845 		    NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
846 	}
847 
848 	system_misc_kstat.ncpus.value.ui32		= (uint32_t)myncpus;
849 	system_misc_kstat.lbolt.value.ui32		= (uint32_t)zone_lbolt;
850 	system_misc_kstat.deficit.value.ui32		= (uint32_t)deficit;
851 	system_misc_kstat.clk_intr.value.ui32		= (uint32_t)zone_lbolt;
852 	system_misc_kstat.vac.value.ui32		= (uint32_t)vac;
853 	system_misc_kstat.nproc.value.ui32		= (uint32_t)nproc;
854 	system_misc_kstat.avenrun_1min.value.ui32	= (uint32_t)loadavgp[0];
855 	system_misc_kstat.avenrun_5min.value.ui32	= (uint32_t)loadavgp[1];
856 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
857 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)
858 	    zone_boot_time;
859 	return (0);
860 }
861 
862 #ifdef	__sparc
863 extern caddr_t	econtig32;
864 #else	/* !__sparc */
865 extern caddr_t	econtig;
866 #endif	/* __sparc */
867 
868 /* ARGSUSED */
869 static int
870 system_pages_kstat_update(kstat_t *ksp, int rw)
871 {
872 	kobj_stat_t kobj_stat;
873 
874 	if (rw == KSTAT_WRITE) {
875 		return (EACCES);
876 	}
877 
878 	kobj_stat_get(&kobj_stat);
879 	system_pages_kstat.physmem.value.ul	= (ulong_t)physmem;
880 	system_pages_kstat.nalloc.value.ul	= kobj_stat.nalloc;
881 	system_pages_kstat.nfree.value.ul	= kobj_stat.nfree;
882 	system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
883 	system_pages_kstat.nfree_calls.value.ul	= kobj_stat.nfree_calls;
884 	system_pages_kstat.kernelbase.value.ul	= (ulong_t)KERNELBASE;
885 
886 #ifdef	__sparc
887 	/*
888 	 * kstat should REALLY be modified to also report kmem64_base and
889 	 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
890 	 * [ kernelbase .. econtig ] no longer is truly reflective of the
891 	 * kernel's vallocs...
892 	 */
893 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig32;
894 #else	/* !__sparc */
895 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig;
896 #endif	/* __sparc */
897 
898 	system_pages_kstat.freemem.value.ul	= (ulong_t)freemem;
899 	system_pages_kstat.availrmem.value.ul	= (ulong_t)availrmem;
900 	system_pages_kstat.lotsfree.value.ul	= (ulong_t)lotsfree;
901 	system_pages_kstat.desfree.value.ul	= (ulong_t)desfree;
902 	system_pages_kstat.minfree.value.ul	= (ulong_t)minfree;
903 	system_pages_kstat.fastscan.value.ul	= (ulong_t)fastscan;
904 	system_pages_kstat.slowscan.value.ul	= (ulong_t)slowscan;
905 	system_pages_kstat.nscan.value.ul	= (ulong_t)nscan;
906 	system_pages_kstat.desscan.value.ul	= (ulong_t)desscan;
907 	system_pages_kstat.pagesfree.value.ul	= (ulong_t)freemem;
908 	system_pages_kstat.pageslocked.value.ul	= (ulong_t)(availrmem_initial -
909 	    availrmem);
910 	system_pages_kstat.pagestotal.value.ul	= (ulong_t)total_pages;
911 	/*
912 	 * pp_kernel represents total pages used by the kernel since the
913 	 * startup. This formula takes into account the boottime kernel
914 	 * footprint and also considers the availrmem changes because of
915 	 * user explicit page locking.
916 	 */
917 	system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
918 	    obp_pages - availrmem - k_anoninfo.ani_mem_resv -
919 	    anon_segkp_pages_locked - pages_locked -
920 	    pages_claimed - pages_useclaim);
921 
922 	return (0);
923 }
924 
925 kstat_t *
926 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
927     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
928 {
929 	return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
930 	    ks_type, ks_ndata, ks_flags, ALL_ZONES));
931 }
932 
933 /*
934  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
935  * the specified name exists, reactivate it.  Returns a pointer to the kstat
936  * on success, NULL on failure.  The kstat will not be visible to the
937  * kstat driver until kstat_install().
938  */
939 kstat_t *
940 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
941     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
942     zoneid_t ks_zoneid)
943 {
944 	size_t ks_data_size;
945 	kstat_t *ksp;
946 	ekstat_t *e;
947 	avl_index_t where;
948 	char namebuf[KSTAT_STRLEN + 16];
949 
950 	if (avl_numnodes(&kstat_avl_bykid) == 0) {
951 		avl_create(&kstat_avl_bykid, kstat_compare_bykid,
952 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
953 
954 		avl_create(&kstat_avl_byname, kstat_compare_byname,
955 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
956 	}
957 
958 	/*
959 	 * If ks_name == NULL, set the ks_name to <module><instance>.
960 	 */
961 	if (ks_name == NULL) {
962 		char buf[KSTAT_STRLEN];
963 		kstat_set_string(buf, ks_module);
964 		(void) sprintf(namebuf, "%s%d", buf, ks_instance);
965 		ks_name = namebuf;
966 	}
967 
968 	/*
969 	 * Make sure it's a valid kstat data type
970 	 */
971 	if (ks_type >= KSTAT_NUM_TYPES) {
972 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
973 		    "invalid kstat type %d",
974 		    ks_module, ks_instance, ks_name, ks_type);
975 		return (NULL);
976 	}
977 
978 	/*
979 	 * Don't allow persistent virtual kstats -- it makes no sense.
980 	 * ks_data points to garbage when the client goes away.
981 	 */
982 	if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
983 	    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
984 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
985 		    "cannot create persistent virtual kstat",
986 		    ks_module, ks_instance, ks_name);
987 		return (NULL);
988 	}
989 
990 	/*
991 	 * Don't allow variable-size physical kstats, since the framework's
992 	 * memory allocation for physical kstat data is fixed at creation time.
993 	 */
994 	if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
995 	    !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
996 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
997 		    "cannot create variable-size physical kstat",
998 		    ks_module, ks_instance, ks_name);
999 		return (NULL);
1000 	}
1001 
1002 	/*
1003 	 * Make sure the number of data fields is within legal range
1004 	 */
1005 	if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1006 	    ks_ndata > kstat_data_type[ks_type].max_ndata) {
1007 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1008 		    "ks_ndata=%d out of range [%d, %d]",
1009 		    ks_module, ks_instance, ks_name, (int)ks_ndata,
1010 		    kstat_data_type[ks_type].min_ndata,
1011 		    kstat_data_type[ks_type].max_ndata);
1012 		return (NULL);
1013 	}
1014 
1015 	ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1016 
1017 	/*
1018 	 * If the named kstat already exists and is dormant, reactivate it.
1019 	 */
1020 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1021 	if (ksp != NULL) {
1022 		if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1023 			/*
1024 			 * The named kstat exists but is not dormant --
1025 			 * this is a kstat namespace collision.
1026 			 */
1027 			kstat_rele(ksp);
1028 			cmn_err(CE_WARN,
1029 			    "kstat_create('%s', %d, '%s'): namespace collision",
1030 			    ks_module, ks_instance, ks_name);
1031 			return (NULL);
1032 		}
1033 		if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1034 		    (ksp->ks_type != ks_type) ||
1035 		    (ksp->ks_ndata != ks_ndata) ||
1036 		    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1037 			/*
1038 			 * The name is the same, but the other key parameters
1039 			 * differ from those of the dormant kstat -- bogus.
1040 			 */
1041 			kstat_rele(ksp);
1042 			cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1043 			    "invalid reactivation of dormant kstat",
1044 			    ks_module, ks_instance, ks_name);
1045 			return (NULL);
1046 		}
1047 		/*
1048 		 * Return dormant kstat pointer to caller.  As usual,
1049 		 * the kstat is marked invalid until kstat_install().
1050 		 */
1051 		ksp->ks_flags |= KSTAT_FLAG_INVALID;
1052 		kstat_rele(ksp);
1053 		return (ksp);
1054 	}
1055 
1056 	/*
1057 	 * Allocate memory for the new kstat header and, if this is a physical
1058 	 * kstat, the data section.
1059 	 */
1060 	e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1061 	if (e == NULL) {
1062 		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1063 		    "insufficient kernel memory",
1064 		    ks_module, ks_instance, ks_name);
1065 		return (NULL);
1066 	}
1067 
1068 	/*
1069 	 * Initialize as many fields as we can.  The caller may reset
1070 	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1071 	 * Creators of virtual kstats may also reset ks_data.  It is
1072 	 * also up to the caller to initialize the kstat data section,
1073 	 * if necessary.  All initialization must be complete before
1074 	 * calling kstat_install().
1075 	 */
1076 	e->e_zone.zoneid = ks_zoneid;
1077 	e->e_zone.next = NULL;
1078 
1079 	ksp = &e->e_ks;
1080 	ksp->ks_crtime		= gethrtime();
1081 	kstat_set_string(ksp->ks_module, ks_module);
1082 	ksp->ks_instance	= ks_instance;
1083 	kstat_set_string(ksp->ks_name, ks_name);
1084 	ksp->ks_type		= ks_type;
1085 	kstat_set_string(ksp->ks_class, ks_class);
1086 	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
1087 	if (ks_flags & KSTAT_FLAG_VIRTUAL)
1088 		ksp->ks_data	= NULL;
1089 	else
1090 		ksp->ks_data	= (void *)(e + 1);
1091 	ksp->ks_ndata		= ks_ndata;
1092 	ksp->ks_data_size	= ks_data_size;
1093 	ksp->ks_snaptime	= ksp->ks_crtime;
1094 	ksp->ks_update		= default_kstat_update;
1095 	ksp->ks_private		= NULL;
1096 	ksp->ks_snapshot	= default_kstat_snapshot;
1097 	ksp->ks_lock		= NULL;
1098 
1099 	mutex_enter(&kstat_chain_lock);
1100 
1101 	/*
1102 	 * Add our kstat to the AVL trees.
1103 	 */
1104 	if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1105 		mutex_exit(&kstat_chain_lock);
1106 		cmn_err(CE_WARN,
1107 		    "kstat_create('%s', %d, '%s'): namespace collision",
1108 		    ks_module, ks_instance, ks_name);
1109 		kstat_free(e);
1110 		return (NULL);
1111 	}
1112 	avl_insert(&kstat_avl_byname, e, where);
1113 
1114 	/*
1115 	 * Loop around until we find an unused KID.
1116 	 */
1117 	do {
1118 		ksp->ks_kid = kstat_chain_id++;
1119 	} while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1120 	avl_insert(&kstat_avl_bykid, e, where);
1121 
1122 	mutex_exit(&kstat_chain_lock);
1123 
1124 	return (ksp);
1125 }
1126 
1127 /*
1128  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1129  */
1130 void
1131 kstat_install(kstat_t *ksp)
1132 {
1133 	zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1134 
1135 	/*
1136 	 * If this is a variable-size kstat, it MUST provide kstat data locking
1137 	 * to prevent data-size races with kstat readers.
1138 	 */
1139 	if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1140 		panic("kstat_install('%s', %d, '%s'): "
1141 		    "cannot create variable-size kstat without data lock",
1142 		    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1143 	}
1144 
1145 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1146 		cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1147 		    (void *)ksp);
1148 		return;
1149 	}
1150 
1151 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1152 		int has_long_strings = 0;
1153 		uint_t i;
1154 		kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1155 
1156 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1157 			if (knp->data_type == KSTAT_DATA_STRING) {
1158 				has_long_strings = 1;
1159 				break;
1160 			}
1161 		}
1162 		/*
1163 		 * It is an error for a named kstat with fields of
1164 		 * KSTAT_DATA_STRING to be non-virtual.
1165 		 */
1166 		if (has_long_strings && !(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) {
1167 			panic("kstat_install('%s', %d, '%s'): "
1168 			    "named kstat containing KSTAT_DATA_STRING "
1169 			    "is not virtual",
1170 			    ksp->ks_module, ksp->ks_instance,
1171 			    ksp->ks_name);
1172 		}
1173 		/*
1174 		 * The default snapshot routine does not handle KSTAT_WRITE
1175 		 * for long strings.
1176 		 */
1177 		if (has_long_strings && (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1178 		    (ksp->ks_snapshot == default_kstat_snapshot)) {
1179 			panic("kstat_install('%s', %d, '%s'): "
1180 			    "named kstat containing KSTAT_DATA_STRING "
1181 			    "is writable but uses default snapshot routine",
1182 			    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1183 		}
1184 	}
1185 
1186 	if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1187 
1188 		/*
1189 		 * We are reactivating a dormant kstat.  Initialize the
1190 		 * caller's underlying data to the value it had when the
1191 		 * kstat went dormant, and mark the kstat as active.
1192 		 * Grab the provider's kstat lock if it's not already held.
1193 		 */
1194 		kmutex_t *lp = ksp->ks_lock;
1195 		if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1196 			mutex_enter(lp);
1197 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1198 			mutex_exit(lp);
1199 		} else {
1200 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1201 		}
1202 		ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1203 	}
1204 
1205 	/*
1206 	 * Now that the kstat is active, make it visible to the kstat driver.
1207 	 */
1208 	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1209 	kstat_rele(ksp);
1210 }
1211 
1212 /*
1213  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1214  * just update the data and mark it as dormant.
1215  */
1216 void
1217 kstat_delete(kstat_t *ksp)
1218 {
1219 	kmutex_t *lp;
1220 	ekstat_t *e = (ekstat_t *)ksp;
1221 	zoneid_t zoneid;
1222 	kstat_zone_t *kz;
1223 
1224 	ASSERT(ksp != NULL);
1225 
1226 	if (ksp == NULL)
1227 		return;
1228 
1229 	zoneid = e->e_zone.zoneid;
1230 
1231 	lp = ksp->ks_lock;
1232 
1233 	if (lp != NULL && MUTEX_HELD(lp)) {
1234 		panic("kstat_delete(%p): caller holds data lock %p",
1235 		    (void *)ksp, (void *)lp);
1236 	}
1237 
1238 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1239 		cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1240 		    (void *)ksp);
1241 		return;
1242 	}
1243 
1244 	if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1245 		/*
1246 		 * Update the data one last time, so that all activity
1247 		 * prior to going dormant has been accounted for.
1248 		 */
1249 		KSTAT_ENTER(ksp);
1250 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
1251 		KSTAT_EXIT(ksp);
1252 
1253 		/*
1254 		 * Mark the kstat as dormant and restore caller-modifiable
1255 		 * fields to default values, so the kstat is readable during
1256 		 * the dormant phase.
1257 		 */
1258 		ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1259 		ksp->ks_lock = NULL;
1260 		ksp->ks_update = default_kstat_update;
1261 		ksp->ks_private = NULL;
1262 		ksp->ks_snapshot = default_kstat_snapshot;
1263 		kstat_rele(ksp);
1264 		return;
1265 	}
1266 
1267 	/*
1268 	 * Remove the kstat from the framework's AVL trees,
1269 	 * free the allocated memory, and increment kstat_chain_id so
1270 	 * /dev/kstat clients can detect the event.
1271 	 */
1272 	mutex_enter(&kstat_chain_lock);
1273 	avl_remove(&kstat_avl_bykid, e);
1274 	avl_remove(&kstat_avl_byname, e);
1275 	kstat_chain_id++;
1276 	mutex_exit(&kstat_chain_lock);
1277 
1278 	kz = e->e_zone.next;
1279 	while (kz != NULL) {
1280 		kstat_zone_t *t = kz;
1281 
1282 		kz = kz->next;
1283 		kmem_free(t, sizeof (*t));
1284 	}
1285 	kstat_rele(ksp);
1286 	kstat_free(e);
1287 }
1288 
1289 void
1290 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1291     const char *ks_name, zoneid_t ks_zoneid)
1292 {
1293 	kstat_t *ksp;
1294 
1295 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1296 	if (ksp != NULL) {
1297 		kstat_rele(ksp);
1298 		kstat_delete(ksp);
1299 	}
1300 }
1301 
1302 void
1303 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1304 {
1305 	kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1306 }
1307 
1308 /*
1309  * The sparc V9 versions of these routines can be much cheaper than
1310  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1311  * For simplicity, however, we always feed the C versions to lint.
1312  */
1313 #if !defined(__sparc) || defined(lint) || defined(__lint)
1314 
1315 void
1316 kstat_waitq_enter(kstat_io_t *kiop)
1317 {
1318 	hrtime_t new, delta;
1319 	ulong_t wcnt;
1320 
1321 	new = gethrtime_unscaled();
1322 	delta = new - kiop->wlastupdate;
1323 	kiop->wlastupdate = new;
1324 	wcnt = kiop->wcnt++;
1325 	if (wcnt != 0) {
1326 		kiop->wlentime += delta * wcnt;
1327 		kiop->wtime += delta;
1328 	}
1329 }
1330 
1331 void
1332 kstat_waitq_exit(kstat_io_t *kiop)
1333 {
1334 	hrtime_t new, delta;
1335 	ulong_t wcnt;
1336 
1337 	new = gethrtime_unscaled();
1338 	delta = new - kiop->wlastupdate;
1339 	kiop->wlastupdate = new;
1340 	wcnt = kiop->wcnt--;
1341 	ASSERT((int)wcnt > 0);
1342 	kiop->wlentime += delta * wcnt;
1343 	kiop->wtime += delta;
1344 }
1345 
1346 void
1347 kstat_runq_enter(kstat_io_t *kiop)
1348 {
1349 	hrtime_t new, delta;
1350 	ulong_t rcnt;
1351 
1352 	new = gethrtime_unscaled();
1353 	delta = new - kiop->rlastupdate;
1354 	kiop->rlastupdate = new;
1355 	rcnt = kiop->rcnt++;
1356 	if (rcnt != 0) {
1357 		kiop->rlentime += delta * rcnt;
1358 		kiop->rtime += delta;
1359 	}
1360 }
1361 
1362 void
1363 kstat_runq_exit(kstat_io_t *kiop)
1364 {
1365 	hrtime_t new, delta;
1366 	ulong_t rcnt;
1367 
1368 	new = gethrtime_unscaled();
1369 	delta = new - kiop->rlastupdate;
1370 	kiop->rlastupdate = new;
1371 	rcnt = kiop->rcnt--;
1372 	ASSERT((int)rcnt > 0);
1373 	kiop->rlentime += delta * rcnt;
1374 	kiop->rtime += delta;
1375 }
1376 
1377 void
1378 kstat_waitq_to_runq(kstat_io_t *kiop)
1379 {
1380 	hrtime_t new, delta;
1381 	ulong_t wcnt, rcnt;
1382 
1383 	new = gethrtime_unscaled();
1384 
1385 	delta = new - kiop->wlastupdate;
1386 	kiop->wlastupdate = new;
1387 	wcnt = kiop->wcnt--;
1388 	ASSERT((int)wcnt > 0);
1389 	kiop->wlentime += delta * wcnt;
1390 	kiop->wtime += delta;
1391 
1392 	delta = new - kiop->rlastupdate;
1393 	kiop->rlastupdate = new;
1394 	rcnt = kiop->rcnt++;
1395 	if (rcnt != 0) {
1396 		kiop->rlentime += delta * rcnt;
1397 		kiop->rtime += delta;
1398 	}
1399 }
1400 
1401 void
1402 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1403 {
1404 	hrtime_t new, delta;
1405 	ulong_t wcnt, rcnt;
1406 
1407 	new = gethrtime_unscaled();
1408 
1409 	delta = new - kiop->rlastupdate;
1410 	kiop->rlastupdate = new;
1411 	rcnt = kiop->rcnt--;
1412 	ASSERT((int)rcnt > 0);
1413 	kiop->rlentime += delta * rcnt;
1414 	kiop->rtime += delta;
1415 
1416 	delta = new - kiop->wlastupdate;
1417 	kiop->wlastupdate = new;
1418 	wcnt = kiop->wcnt++;
1419 	if (wcnt != 0) {
1420 		kiop->wlentime += delta * wcnt;
1421 		kiop->wtime += delta;
1422 	}
1423 }
1424 
1425 #endif
1426 
1427 void
1428 kstat_timer_start(kstat_timer_t *ktp)
1429 {
1430 	ktp->start_time = gethrtime();
1431 }
1432 
1433 void
1434 kstat_timer_stop(kstat_timer_t *ktp)
1435 {
1436 	hrtime_t	etime;
1437 	u_longlong_t	num_events;
1438 
1439 	ktp->stop_time = etime = gethrtime();
1440 	etime -= ktp->start_time;
1441 	num_events = ktp->num_events;
1442 	if (etime < ktp->min_time || num_events == 0)
1443 		ktp->min_time = etime;
1444 	if (etime > ktp->max_time)
1445 		ktp->max_time = etime;
1446 	ktp->elapsed_time += etime;
1447 	ktp->num_events = num_events + 1;
1448 }
1449