xref: /titanic_52/usr/src/uts/common/os/kstat_fr.c (revision 2efb3bf9c7f4cf34038896f1431531c93d3f57c2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013, Joyent, Inc. All rights reserved.
24  */
25 
26 /*
27  * Kernel statistics framework
28  */
29 
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/systm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/t_lock.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/vmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/cmn_err.h>
40 #include <sys/kstat.h>
41 #include <sys/sysinfo.h>
42 #include <sys/cpuvar.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
46 #include <sys/vfs.h>
47 #include <sys/dnlc.h>
48 #include <sys/var.h>
49 #include <sys/debug.h>
50 #include <sys/kobj.h>
51 #include <sys/avl.h>
52 #include <sys/pool_pset.h>
53 #include <sys/cpupart.h>
54 #include <sys/zone.h>
55 #include <sys/loadavg.h>
56 #include <vm/page.h>
57 #include <vm/anon.h>
58 #include <vm/seg_kmem.h>
59 
60 /*
61  * Global lock to protect the AVL trees and kstat_chain_id.
62  */
63 static kmutex_t kstat_chain_lock;
64 
65 /*
66  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
67  *
68  * (1)	/dev/kstat, to detect changes in the kstat chain across ioctls;
69  *
70  * (2)	kstat_create(), to assign a KID (kstat ID) to each new kstat.
71  *	/dev/kstat uses the KID as a cookie for kstat lookups.
72  *
73  * We reserve the first two IDs because some kstats are created before
74  * the well-known ones (kstat_headers = 0, kstat_types = 1).
75  *
76  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
77  * into a particular kstat, which is logically equivalent to a kstat being
78  * installed/deleted.
79  */
80 
81 kid_t kstat_chain_id = 2;
82 
83 /*
84  * As far as zones are concerned, there are 3 types of kstat:
85  *
86  * 1) Those which have a well-known name, and which should return per-zone data
87  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
88  * is an example of this type of kstat.
89  *
90  * 2) Those which should only be exported to a particular list of zones.
91  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
92  * able to see NFS mounts associated with zone B, while we want the
93  * global zone to be able to see all mounts on the system.
94  *
95  * 3) Those that can be exported to all zones.  Most system-related
96  * kstats fall within this category.
97  *
98  * An ekstat_t thus contains a list of kstats that the zone is to be
99  * exported to.  The lookup of a name:instance:module thus translates to a
100  * lookup of name:instance:module:myzone; if the kstat is not exported
101  * to all zones, and does not have the caller's zoneid explicitly
102  * enumerated in the list of zones to be exported to, it is the same as
103  * if the kstat didn't exist.
104  *
105  * Writing to kstats is currently disallowed from within a non-global
106  * zone, although this restriction could be removed in the future.
107  */
108 typedef struct kstat_zone {
109 	zoneid_t zoneid;
110 	struct kstat_zone *next;
111 } kstat_zone_t;
112 
113 /*
114  * Extended kstat structure -- for internal use only.
115  */
116 typedef struct ekstat {
117 	kstat_t		e_ks;		/* the kstat itself */
118 	size_t		e_size;		/* total allocation size */
119 	kthread_t	*e_owner;	/* thread holding this kstat */
120 	kcondvar_t	e_cv;		/* wait for owner == NULL */
121 	avl_node_t	e_avl_bykid;	/* AVL tree to sort by KID */
122 	avl_node_t	e_avl_byname;	/* AVL tree to sort by name */
123 	kstat_zone_t	e_zone;		/* zone to export stats to */
124 } ekstat_t;
125 
126 static uint64_t kstat_initial[8192];
127 static void *kstat_initial_ptr = kstat_initial;
128 static size_t kstat_initial_avail = sizeof (kstat_initial);
129 static vmem_t *kstat_arena;
130 
131 #define	KSTAT_ALIGN	(sizeof (uint64_t))
132 
133 static avl_tree_t kstat_avl_bykid;
134 static avl_tree_t kstat_avl_byname;
135 
136 /*
137  * Various pointers we need to create kstats at boot time in kstat_init()
138  */
139 extern	kstat_named_t	*segmapcnt_ptr;
140 extern	uint_t		segmapcnt_ndata;
141 extern	int		segmap_kstat_update(kstat_t *, int);
142 extern	kstat_named_t	*biostats_ptr;
143 extern	uint_t		biostats_ndata;
144 extern	kstat_named_t	*pollstats_ptr;
145 extern	uint_t		pollstats_ndata;
146 
147 extern	int	vac;
148 extern	uint_t	nproc;
149 extern	time_t	boot_time;
150 extern	sysinfo_t	sysinfo;
151 extern	vminfo_t	vminfo;
152 
153 struct {
154 	kstat_named_t ncpus;
155 	kstat_named_t lbolt;
156 	kstat_named_t deficit;
157 	kstat_named_t clk_intr;
158 	kstat_named_t vac;
159 	kstat_named_t nproc;
160 	kstat_named_t avenrun_1min;
161 	kstat_named_t avenrun_5min;
162 	kstat_named_t avenrun_15min;
163 	kstat_named_t boot_time;
164 } system_misc_kstat = {
165 	{ "ncpus",		KSTAT_DATA_UINT32 },
166 	{ "lbolt",		KSTAT_DATA_UINT32 },
167 	{ "deficit",		KSTAT_DATA_UINT32 },
168 	{ "clk_intr",		KSTAT_DATA_UINT32 },
169 	{ "vac",		KSTAT_DATA_UINT32 },
170 	{ "nproc",		KSTAT_DATA_UINT32 },
171 	{ "avenrun_1min",	KSTAT_DATA_UINT32 },
172 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
173 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
174 	{ "boot_time",		KSTAT_DATA_UINT32 },
175 };
176 
177 struct {
178 	kstat_named_t physmem;
179 	kstat_named_t nalloc;
180 	kstat_named_t nfree;
181 	kstat_named_t nalloc_calls;
182 	kstat_named_t nfree_calls;
183 	kstat_named_t kernelbase;
184 	kstat_named_t econtig;
185 	kstat_named_t freemem;
186 	kstat_named_t availrmem;
187 	kstat_named_t lotsfree;
188 	kstat_named_t desfree;
189 	kstat_named_t minfree;
190 	kstat_named_t fastscan;
191 	kstat_named_t slowscan;
192 	kstat_named_t nscan;
193 	kstat_named_t desscan;
194 	kstat_named_t pp_kernel;
195 	kstat_named_t pagesfree;
196 	kstat_named_t pageslocked;
197 	kstat_named_t pagestotal;
198 } system_pages_kstat = {
199 	{ "physmem",		KSTAT_DATA_ULONG },
200 	{ "nalloc",		KSTAT_DATA_ULONG },
201 	{ "nfree",		KSTAT_DATA_ULONG },
202 	{ "nalloc_calls",	KSTAT_DATA_ULONG },
203 	{ "nfree_calls",	KSTAT_DATA_ULONG },
204 	{ "kernelbase",		KSTAT_DATA_ULONG },
205 	{ "econtig", 		KSTAT_DATA_ULONG },
206 	{ "freemem", 		KSTAT_DATA_ULONG },
207 	{ "availrmem", 		KSTAT_DATA_ULONG },
208 	{ "lotsfree", 		KSTAT_DATA_ULONG },
209 	{ "desfree", 		KSTAT_DATA_ULONG },
210 	{ "minfree", 		KSTAT_DATA_ULONG },
211 	{ "fastscan", 		KSTAT_DATA_ULONG },
212 	{ "slowscan", 		KSTAT_DATA_ULONG },
213 	{ "nscan", 		KSTAT_DATA_ULONG },
214 	{ "desscan", 		KSTAT_DATA_ULONG },
215 	{ "pp_kernel", 		KSTAT_DATA_ULONG },
216 	{ "pagesfree", 		KSTAT_DATA_ULONG },
217 	{ "pageslocked", 	KSTAT_DATA_ULONG },
218 	{ "pagestotal",		KSTAT_DATA_ULONG },
219 };
220 
221 static int header_kstat_update(kstat_t *, int);
222 static int header_kstat_snapshot(kstat_t *, void *, int);
223 static int system_misc_kstat_update(kstat_t *, int);
224 static int system_pages_kstat_update(kstat_t *, int);
225 
226 static struct {
227 	char	name[KSTAT_STRLEN];
228 	size_t	size;
229 	uint_t	min_ndata;
230 	uint_t	max_ndata;
231 } kstat_data_type[KSTAT_NUM_TYPES] = {
232 	{ "raw",		1,			0,	INT_MAX	},
233 	{ "name=value",		sizeof (kstat_named_t),	0,	INT_MAX	},
234 	{ "interrupt",		sizeof (kstat_intr_t),	1,	1	},
235 	{ "i/o",		sizeof (kstat_io_t),	1,	1	},
236 	{ "event_timer",	sizeof (kstat_timer_t),	0,	INT_MAX	},
237 };
238 
239 int
240 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
241 {
242 	ekstat_t *e = (ekstat_t *)k;
243 	kstat_zone_t *kz;
244 
245 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
246 	for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
247 		if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
248 			return (1);
249 		if (zoneid == kz->zoneid)
250 			return (1);
251 	}
252 	return (0);
253 }
254 
255 void
256 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
257 {
258 	ekstat_t *e = (ekstat_t *)k;
259 	kstat_zone_t *kz, *t = NULL;
260 
261 	mutex_enter(&kstat_chain_lock);
262 	if (zoneid == e->e_zone.zoneid) {
263 		kz = e->e_zone.next;
264 		ASSERT(kz != NULL);
265 		e->e_zone.zoneid = kz->zoneid;
266 		e->e_zone.next = kz->next;
267 		goto out;
268 	}
269 	for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
270 		if (kz->next->zoneid == zoneid) {
271 			t = kz->next;
272 			kz->next = t->next;
273 			break;
274 		}
275 	}
276 	ASSERT(t != NULL);	/* we removed something */
277 	kz = t;
278 out:
279 	kstat_chain_id++;
280 	mutex_exit(&kstat_chain_lock);
281 	kmem_free(kz, sizeof (*kz));
282 }
283 
284 void
285 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
286 {
287 	ekstat_t *e = (ekstat_t *)k;
288 	kstat_zone_t *kz;
289 
290 	kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
291 	if (kz == NULL)
292 		return;
293 	mutex_enter(&kstat_chain_lock);
294 	kz->zoneid = zoneid;
295 	kz->next = e->e_zone.next;
296 	e->e_zone.next = kz;
297 	kstat_chain_id++;
298 	mutex_exit(&kstat_chain_lock);
299 }
300 
301 /*
302  * Compare the list of zones for the given kstats, returning 0 if they match
303  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
304  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
305  * two lists always has one element, and this is an O(n) operation rather than
306  * O(n^2).
307  */
308 static int
309 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
310 {
311 	kstat_zone_t *kz1, *kz2;
312 
313 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
314 	for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
315 		for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
316 			if (kz1->zoneid == ALL_ZONES ||
317 			    kz2->zoneid == ALL_ZONES)
318 				return (0);
319 			if (kz1->zoneid == kz2->zoneid)
320 				return (0);
321 		}
322 	}
323 	return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
324 }
325 
326 /*
327  * Support for keeping kstats sorted in AVL trees for fast lookups.
328  */
329 static int
330 kstat_compare_bykid(const void *a1, const void *a2)
331 {
332 	const kstat_t *k1 = a1;
333 	const kstat_t *k2 = a2;
334 
335 	if (k1->ks_kid < k2->ks_kid)
336 		return (-1);
337 	if (k1->ks_kid > k2->ks_kid)
338 		return (1);
339 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
340 }
341 
342 static int
343 kstat_compare_byname(const void *a1, const void *a2)
344 {
345 	const kstat_t *k1 = a1;
346 	const kstat_t *k2 = a2;
347 	int s;
348 
349 	s = strcmp(k1->ks_module, k2->ks_module);
350 	if (s > 0)
351 		return (1);
352 	if (s < 0)
353 		return (-1);
354 
355 	if (k1->ks_instance < k2->ks_instance)
356 		return (-1);
357 	if (k1->ks_instance > k2->ks_instance)
358 		return (1);
359 
360 	s = strcmp(k1->ks_name, k2->ks_name);
361 	if (s > 0)
362 		return (1);
363 	if (s < 0)
364 		return (-1);
365 
366 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
367 }
368 
369 static kstat_t *
370 kstat_hold(avl_tree_t *t, ekstat_t *template)
371 {
372 	kstat_t *ksp;
373 	ekstat_t *e;
374 
375 	mutex_enter(&kstat_chain_lock);
376 	for (;;) {
377 		ksp = avl_find(t, template, NULL);
378 		if (ksp == NULL)
379 			break;
380 		e = (ekstat_t *)ksp;
381 		if (e->e_owner == NULL) {
382 			e->e_owner = curthread;
383 			break;
384 		}
385 		cv_wait(&e->e_cv, &kstat_chain_lock);
386 	}
387 	mutex_exit(&kstat_chain_lock);
388 	return (ksp);
389 }
390 
391 void
392 kstat_rele(kstat_t *ksp)
393 {
394 	ekstat_t *e = (ekstat_t *)ksp;
395 
396 	mutex_enter(&kstat_chain_lock);
397 	ASSERT(e->e_owner == curthread);
398 	e->e_owner = NULL;
399 	cv_broadcast(&e->e_cv);
400 	mutex_exit(&kstat_chain_lock);
401 }
402 
403 kstat_t *
404 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
405 {
406 	ekstat_t e;
407 
408 	e.e_ks.ks_kid = kid;
409 	e.e_zone.zoneid = zoneid;
410 	e.e_zone.next = NULL;
411 
412 	return (kstat_hold(&kstat_avl_bykid, &e));
413 }
414 
415 kstat_t *
416 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
417     zoneid_t ks_zoneid)
418 {
419 	ekstat_t e;
420 
421 	kstat_set_string(e.e_ks.ks_module, ks_module);
422 	e.e_ks.ks_instance = ks_instance;
423 	kstat_set_string(e.e_ks.ks_name, ks_name);
424 	e.e_zone.zoneid = ks_zoneid;
425 	e.e_zone.next = NULL;
426 	return (kstat_hold(&kstat_avl_byname, &e));
427 }
428 
429 static ekstat_t *
430 kstat_alloc(size_t size)
431 {
432 	ekstat_t *e = NULL;
433 
434 	size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
435 
436 	if (kstat_arena == NULL) {
437 		if (size <= kstat_initial_avail) {
438 			e = kstat_initial_ptr;
439 			kstat_initial_ptr = (char *)kstat_initial_ptr + size;
440 			kstat_initial_avail -= size;
441 		}
442 	} else {
443 		e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
444 	}
445 
446 	if (e != NULL) {
447 		bzero(e, size);
448 		e->e_size = size;
449 		cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
450 	}
451 
452 	return (e);
453 }
454 
455 static void
456 kstat_free(ekstat_t *e)
457 {
458 	cv_destroy(&e->e_cv);
459 	vmem_free(kstat_arena, e, e->e_size);
460 }
461 
462 /*
463  * Create various system kstats.
464  */
465 void
466 kstat_init(void)
467 {
468 	kstat_t *ksp;
469 	ekstat_t *e;
470 	avl_tree_t *t = &kstat_avl_bykid;
471 
472 	/*
473 	 * Set up the kstat vmem arena.
474 	 */
475 	kstat_arena = vmem_create("kstat",
476 	    kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
477 	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
478 
479 	/*
480 	 * Make initial kstats appear as though they were allocated.
481 	 */
482 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
483 		(void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
484 		    0, 0, e, (char *)e + e->e_size,
485 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
486 
487 	/*
488 	 * The mother of all kstats.  The first kstat in the system, which
489 	 * always has KID 0, has the headers for all kstats (including itself)
490 	 * as its data.  Thus, the kstat driver does not need any special
491 	 * interface to extract the kstat chain.
492 	 */
493 	kstat_chain_id = 0;
494 	ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
495 	    0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
496 	if (ksp) {
497 		ksp->ks_lock = &kstat_chain_lock;
498 		ksp->ks_update = header_kstat_update;
499 		ksp->ks_snapshot = header_kstat_snapshot;
500 		kstat_install(ksp);
501 	} else {
502 		panic("cannot create kstat 'kstat_headers'");
503 	}
504 
505 	ksp = kstat_create("unix", 0, "kstat_types", "kstat",
506 	    KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
507 	if (ksp) {
508 		int i;
509 		kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
510 
511 		for (i = 0; i < KSTAT_NUM_TYPES; i++) {
512 			kstat_named_init(&kn[i], kstat_data_type[i].name,
513 			    KSTAT_DATA_ULONG);
514 			kn[i].value.ul = i;
515 		}
516 		kstat_install(ksp);
517 	}
518 
519 	ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
520 	    sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
521 	if (ksp) {
522 		ksp->ks_data = (void *) &sysinfo;
523 		kstat_install(ksp);
524 	}
525 
526 	ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
527 	    sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
528 	if (ksp) {
529 		ksp->ks_data = (void *) &vminfo;
530 		kstat_install(ksp);
531 	}
532 
533 	ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
534 	    segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
535 	if (ksp) {
536 		ksp->ks_data = (void *) segmapcnt_ptr;
537 		ksp->ks_update = segmap_kstat_update;
538 		kstat_install(ksp);
539 	}
540 
541 	ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
542 	    biostats_ndata, KSTAT_FLAG_VIRTUAL);
543 	if (ksp) {
544 		ksp->ks_data = (void *) biostats_ptr;
545 		kstat_install(ksp);
546 	}
547 
548 	ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
549 	    sizeof (struct var), KSTAT_FLAG_VIRTUAL);
550 	if (ksp) {
551 		ksp->ks_data = (void *) &v;
552 		kstat_install(ksp);
553 	}
554 
555 	ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
556 	    sizeof (system_misc_kstat) / sizeof (kstat_named_t),
557 	    KSTAT_FLAG_VIRTUAL);
558 	if (ksp) {
559 		ksp->ks_data = (void *) &system_misc_kstat;
560 		ksp->ks_update = system_misc_kstat_update;
561 		kstat_install(ksp);
562 	}
563 
564 	ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
565 	    sizeof (system_pages_kstat) / sizeof (kstat_named_t),
566 	    KSTAT_FLAG_VIRTUAL);
567 	if (ksp) {
568 		ksp->ks_data = (void *) &system_pages_kstat;
569 		ksp->ks_update = system_pages_kstat_update;
570 		kstat_install(ksp);
571 	}
572 
573 	ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
574 	    pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
575 
576 	if (ksp) {
577 		ksp->ks_data = pollstats_ptr;
578 		kstat_install(ksp);
579 	}
580 }
581 
582 /*
583  * Caller of this should ensure that the string pointed by src
584  * doesn't change while kstat's lock is held. Not doing so defeats
585  * kstat's snapshot strategy as explained in <sys/kstat.h>
586  */
587 void
588 kstat_named_setstr(kstat_named_t *knp, const char *src)
589 {
590 	if (knp->data_type != KSTAT_DATA_STRING)
591 		panic("kstat_named_setstr('%p', '%p'): "
592 		    "named kstat is not of type KSTAT_DATA_STRING",
593 		    (void *)knp, (void *)src);
594 
595 	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
596 	if (src != NULL)
597 		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
598 	else
599 		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
600 }
601 
602 void
603 kstat_set_string(char *dst, const char *src)
604 {
605 	bzero(dst, KSTAT_STRLEN);
606 	(void) strncpy(dst, src, KSTAT_STRLEN - 1);
607 }
608 
609 void
610 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
611 {
612 	kstat_set_string(knp->name, name);
613 	knp->data_type = data_type;
614 
615 	if (data_type == KSTAT_DATA_STRING)
616 		kstat_named_setstr(knp, NULL);
617 }
618 
619 void
620 kstat_timer_init(kstat_timer_t *ktp, const char *name)
621 {
622 	kstat_set_string(ktp->name, name);
623 }
624 
625 /* ARGSUSED */
626 static int
627 default_kstat_update(kstat_t *ksp, int rw)
628 {
629 	uint_t i;
630 	size_t len = 0;
631 	kstat_named_t *knp;
632 
633 	/*
634 	 * Named kstats with variable-length long strings have a standard
635 	 * way of determining how much space is needed to hold the snapshot:
636 	 */
637 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
638 	    (ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
639 
640 		/*
641 		 * Add in the space required for the strings
642 		 */
643 		knp = KSTAT_NAMED_PTR(ksp);
644 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
645 			if (knp->data_type == KSTAT_DATA_STRING)
646 				len += KSTAT_NAMED_STR_BUFLEN(knp);
647 		}
648 		ksp->ks_data_size =
649 		    ksp->ks_ndata * sizeof (kstat_named_t) + len;
650 	}
651 	return (0);
652 }
653 
654 static int
655 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
656 {
657 	kstat_io_t *kiop;
658 	hrtime_t cur_time;
659 	size_t	namedsz;
660 
661 	ksp->ks_snaptime = cur_time = gethrtime();
662 
663 	if (rw == KSTAT_WRITE) {
664 		if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
665 			return (EACCES);
666 		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
667 		return (0);
668 	}
669 
670 	/*
671 	 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
672 	 * number of kstat_named_t structures, followed by an optional
673 	 * string segment. The ks_data generally holds only the
674 	 * kstat_named_t structures. So we copy it first. The strings,
675 	 * if any, are copied below. For other kstat types, ks_data holds the
676 	 * entire buffer.
677 	 */
678 
679 	namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
680 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
681 		bcopy(ksp->ks_data, buf, namedsz);
682 	else
683 		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
684 
685 	/*
686 	 * Apply kstat type-specific data massaging
687 	 */
688 	switch (ksp->ks_type) {
689 
690 	case KSTAT_TYPE_IO:
691 		/*
692 		 * Normalize time units and deal with incomplete transactions
693 		 */
694 		kiop = (kstat_io_t *)buf;
695 
696 		scalehrtime(&kiop->wtime);
697 		scalehrtime(&kiop->wlentime);
698 		scalehrtime(&kiop->wlastupdate);
699 		scalehrtime(&kiop->rtime);
700 		scalehrtime(&kiop->rlentime);
701 		scalehrtime(&kiop->rlastupdate);
702 
703 		if (kiop->wcnt != 0) {
704 			/* like kstat_waitq_exit */
705 			hrtime_t wfix = cur_time - kiop->wlastupdate;
706 			kiop->wlastupdate = cur_time;
707 			kiop->wlentime += kiop->wcnt * wfix;
708 			kiop->wtime += wfix;
709 		}
710 
711 		if (kiop->rcnt != 0) {
712 			/* like kstat_runq_exit */
713 			hrtime_t rfix = cur_time - kiop->rlastupdate;
714 			kiop->rlastupdate = cur_time;
715 			kiop->rlentime += kiop->rcnt * rfix;
716 			kiop->rtime += rfix;
717 		}
718 		break;
719 
720 	case KSTAT_TYPE_NAMED:
721 		/*
722 		 * Massage any long strings in at the end of the buffer
723 		 */
724 		if (ksp->ks_data_size > namedsz) {
725 			uint_t i;
726 			kstat_named_t *knp = buf;
727 			char *dst = (char *)(knp + ksp->ks_ndata);
728 			/*
729 			 * Copy strings and update pointers
730 			 */
731 			for (i = 0; i < ksp->ks_ndata; i++, knp++) {
732 				if (knp->data_type == KSTAT_DATA_STRING &&
733 				    KSTAT_NAMED_STR_PTR(knp) != NULL) {
734 					bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
735 					    KSTAT_NAMED_STR_BUFLEN(knp));
736 					KSTAT_NAMED_STR_PTR(knp) = dst;
737 					dst += KSTAT_NAMED_STR_BUFLEN(knp);
738 				}
739 			}
740 			ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
741 		}
742 		break;
743 	}
744 	return (0);
745 }
746 
747 static int
748 header_kstat_update(kstat_t *header_ksp, int rw)
749 {
750 	int nkstats = 0;
751 	ekstat_t *e;
752 	avl_tree_t *t = &kstat_avl_bykid;
753 	zoneid_t zoneid;
754 
755 	if (rw == KSTAT_WRITE)
756 		return (EACCES);
757 
758 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
759 
760 	zoneid = getzoneid();
761 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
762 		if (kstat_zone_find((kstat_t *)e, zoneid) &&
763 		    (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
764 			nkstats++;
765 		}
766 	}
767 	header_ksp->ks_ndata = nkstats;
768 	header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
769 	return (0);
770 }
771 
772 /*
773  * Copy out the data section of kstat 0, which consists of the list
774  * of all kstat headers.  By specification, these headers must be
775  * copied out in order of increasing KID.
776  */
777 static int
778 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
779 {
780 	ekstat_t *e;
781 	avl_tree_t *t = &kstat_avl_bykid;
782 	zoneid_t zoneid;
783 
784 	header_ksp->ks_snaptime = gethrtime();
785 
786 	if (rw == KSTAT_WRITE)
787 		return (EACCES);
788 
789 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
790 
791 	zoneid = getzoneid();
792 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
793 		if (kstat_zone_find((kstat_t *)e, zoneid) &&
794 		    (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
795 			bcopy(&e->e_ks, buf, sizeof (kstat_t));
796 			buf = (char *)buf + sizeof (kstat_t);
797 		}
798 	}
799 
800 	return (0);
801 }
802 
803 /* ARGSUSED */
804 static int
805 system_misc_kstat_update(kstat_t *ksp, int rw)
806 {
807 	int myncpus = ncpus;
808 	int *loadavgp = &avenrun[0];
809 	int loadavg[LOADAVG_NSTATS];
810 	time_t zone_boot_time;
811 	clock_t zone_lbolt;
812 	hrtime_t zone_hrtime;
813 	size_t zone_nproc;
814 
815 	if (rw == KSTAT_WRITE)
816 		return (EACCES);
817 
818 	if (!INGLOBALZONE(curproc)) {
819 		/*
820 		 * Here we grab cpu_lock which is OK as long as no-one in the
821 		 * future attempts to lookup this particular kstat
822 		 * (unix:0:system_misc) while holding cpu_lock.
823 		 */
824 		mutex_enter(&cpu_lock);
825 		if (pool_pset_enabled()) {
826 			psetid_t mypsid = zone_pset_get(curproc->p_zone);
827 			int error;
828 
829 			myncpus = zone_ncpus_get(curproc->p_zone);
830 			ASSERT(myncpus > 0);
831 			error = cpupart_get_loadavg(mypsid, &loadavg[0],
832 			    LOADAVG_NSTATS);
833 			ASSERT(error == 0);
834 			loadavgp = &loadavg[0];
835 		}
836 		mutex_exit(&cpu_lock);
837 	}
838 
839 	if (INGLOBALZONE(curproc)) {
840 		zone_boot_time = boot_time;
841 		zone_lbolt = ddi_get_lbolt();
842 		zone_nproc = nproc;
843 	} else {
844 		zone_boot_time = curproc->p_zone->zone_boot_time;
845 
846 		zone_hrtime = gethrtime();
847 		zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
848 		    NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
849 		mutex_enter(&curproc->p_zone->zone_nlwps_lock);
850 		zone_nproc = curproc->p_zone->zone_nprocs;
851 		mutex_exit(&curproc->p_zone->zone_nlwps_lock);
852 	}
853 
854 	system_misc_kstat.ncpus.value.ui32		= (uint32_t)myncpus;
855 	system_misc_kstat.lbolt.value.ui32		= (uint32_t)zone_lbolt;
856 	system_misc_kstat.deficit.value.ui32		= (uint32_t)deficit;
857 	system_misc_kstat.clk_intr.value.ui32		= (uint32_t)zone_lbolt;
858 	system_misc_kstat.vac.value.ui32		= (uint32_t)vac;
859 	system_misc_kstat.nproc.value.ui32		= (uint32_t)zone_nproc;
860 	system_misc_kstat.avenrun_1min.value.ui32	= (uint32_t)loadavgp[0];
861 	system_misc_kstat.avenrun_5min.value.ui32	= (uint32_t)loadavgp[1];
862 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
863 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)
864 	    zone_boot_time;
865 	return (0);
866 }
867 
868 #ifdef	__sparc
869 extern caddr_t	econtig32;
870 #else	/* !__sparc */
871 extern caddr_t	econtig;
872 #endif	/* __sparc */
873 
874 /* ARGSUSED */
875 static int
876 system_pages_kstat_update(kstat_t *ksp, int rw)
877 {
878 	kobj_stat_t kobj_stat;
879 
880 	if (rw == KSTAT_WRITE) {
881 		return (EACCES);
882 	}
883 
884 	kobj_stat_get(&kobj_stat);
885 	system_pages_kstat.physmem.value.ul	= (ulong_t)physmem;
886 	system_pages_kstat.nalloc.value.ul	= kobj_stat.nalloc;
887 	system_pages_kstat.nfree.value.ul	= kobj_stat.nfree;
888 	system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
889 	system_pages_kstat.nfree_calls.value.ul	= kobj_stat.nfree_calls;
890 	system_pages_kstat.kernelbase.value.ul	= (ulong_t)KERNELBASE;
891 
892 #ifdef	__sparc
893 	/*
894 	 * kstat should REALLY be modified to also report kmem64_base and
895 	 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
896 	 * [ kernelbase .. econtig ] no longer is truly reflective of the
897 	 * kernel's vallocs...
898 	 */
899 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig32;
900 #else	/* !__sparc */
901 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig;
902 #endif	/* __sparc */
903 
904 	system_pages_kstat.freemem.value.ul	= (ulong_t)freemem;
905 	system_pages_kstat.availrmem.value.ul	= (ulong_t)availrmem;
906 	system_pages_kstat.lotsfree.value.ul	= (ulong_t)lotsfree;
907 	system_pages_kstat.desfree.value.ul	= (ulong_t)desfree;
908 	system_pages_kstat.minfree.value.ul	= (ulong_t)minfree;
909 	system_pages_kstat.fastscan.value.ul	= (ulong_t)fastscan;
910 	system_pages_kstat.slowscan.value.ul	= (ulong_t)slowscan;
911 	system_pages_kstat.nscan.value.ul	= (ulong_t)nscan;
912 	system_pages_kstat.desscan.value.ul	= (ulong_t)desscan;
913 	system_pages_kstat.pagesfree.value.ul	= (ulong_t)freemem;
914 	system_pages_kstat.pageslocked.value.ul	= (ulong_t)(availrmem_initial -
915 	    availrmem);
916 	system_pages_kstat.pagestotal.value.ul	= (ulong_t)total_pages;
917 	/*
918 	 * pp_kernel represents total pages used by the kernel since the
919 	 * startup. This formula takes into account the boottime kernel
920 	 * footprint and also considers the availrmem changes because of
921 	 * user explicit page locking.
922 	 */
923 	system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
924 	    obp_pages - availrmem - k_anoninfo.ani_mem_resv -
925 	    anon_segkp_pages_locked - pages_locked -
926 	    pages_claimed - pages_useclaim);
927 
928 	return (0);
929 }
930 
931 kstat_t *
932 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
933     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
934 {
935 	return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
936 	    ks_type, ks_ndata, ks_flags, ALL_ZONES));
937 }
938 
939 /*
940  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
941  * the specified name exists, reactivate it.  Returns a pointer to the kstat
942  * on success, NULL on failure.  The kstat will not be visible to the
943  * kstat driver until kstat_install().
944  */
945 kstat_t *
946 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
947     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
948     zoneid_t ks_zoneid)
949 {
950 	size_t ks_data_size;
951 	kstat_t *ksp;
952 	ekstat_t *e;
953 	avl_index_t where;
954 	char namebuf[KSTAT_STRLEN + 16];
955 
956 	if (avl_numnodes(&kstat_avl_bykid) == 0) {
957 		avl_create(&kstat_avl_bykid, kstat_compare_bykid,
958 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
959 
960 		avl_create(&kstat_avl_byname, kstat_compare_byname,
961 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
962 	}
963 
964 	/*
965 	 * If ks_name == NULL, set the ks_name to <module><instance>.
966 	 */
967 	if (ks_name == NULL) {
968 		char buf[KSTAT_STRLEN];
969 		kstat_set_string(buf, ks_module);
970 		(void) sprintf(namebuf, "%s%d", buf, ks_instance);
971 		ks_name = namebuf;
972 	}
973 
974 	/*
975 	 * Make sure it's a valid kstat data type
976 	 */
977 	if (ks_type >= KSTAT_NUM_TYPES) {
978 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
979 		    "invalid kstat type %d",
980 		    ks_module, ks_instance, ks_name, ks_type);
981 		return (NULL);
982 	}
983 
984 	/*
985 	 * Don't allow persistent virtual kstats -- it makes no sense.
986 	 * ks_data points to garbage when the client goes away.
987 	 */
988 	if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
989 	    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
990 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
991 		    "cannot create persistent virtual kstat",
992 		    ks_module, ks_instance, ks_name);
993 		return (NULL);
994 	}
995 
996 	/*
997 	 * Don't allow variable-size physical kstats, since the framework's
998 	 * memory allocation for physical kstat data is fixed at creation time.
999 	 */
1000 	if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
1001 	    !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1002 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1003 		    "cannot create variable-size physical kstat",
1004 		    ks_module, ks_instance, ks_name);
1005 		return (NULL);
1006 	}
1007 
1008 	/*
1009 	 * Make sure the number of data fields is within legal range
1010 	 */
1011 	if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1012 	    ks_ndata > kstat_data_type[ks_type].max_ndata) {
1013 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1014 		    "ks_ndata=%d out of range [%d, %d]",
1015 		    ks_module, ks_instance, ks_name, (int)ks_ndata,
1016 		    kstat_data_type[ks_type].min_ndata,
1017 		    kstat_data_type[ks_type].max_ndata);
1018 		return (NULL);
1019 	}
1020 
1021 	ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1022 
1023 	/*
1024 	 * If the named kstat already exists and is dormant, reactivate it.
1025 	 */
1026 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1027 	if (ksp != NULL) {
1028 		if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1029 			/*
1030 			 * The named kstat exists but is not dormant --
1031 			 * this is a kstat namespace collision.
1032 			 */
1033 			kstat_rele(ksp);
1034 			cmn_err(CE_WARN,
1035 			    "kstat_create('%s', %d, '%s'): namespace collision",
1036 			    ks_module, ks_instance, ks_name);
1037 			return (NULL);
1038 		}
1039 		if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1040 		    (ksp->ks_type != ks_type) ||
1041 		    (ksp->ks_ndata != ks_ndata) ||
1042 		    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1043 			/*
1044 			 * The name is the same, but the other key parameters
1045 			 * differ from those of the dormant kstat -- bogus.
1046 			 */
1047 			kstat_rele(ksp);
1048 			cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1049 			    "invalid reactivation of dormant kstat",
1050 			    ks_module, ks_instance, ks_name);
1051 			return (NULL);
1052 		}
1053 		/*
1054 		 * Return dormant kstat pointer to caller.  As usual,
1055 		 * the kstat is marked invalid until kstat_install().
1056 		 */
1057 		ksp->ks_flags |= KSTAT_FLAG_INVALID;
1058 		kstat_rele(ksp);
1059 		return (ksp);
1060 	}
1061 
1062 	/*
1063 	 * Allocate memory for the new kstat header and, if this is a physical
1064 	 * kstat, the data section.
1065 	 */
1066 	e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1067 	if (e == NULL) {
1068 		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1069 		    "insufficient kernel memory",
1070 		    ks_module, ks_instance, ks_name);
1071 		return (NULL);
1072 	}
1073 
1074 	/*
1075 	 * Initialize as many fields as we can.  The caller may reset
1076 	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1077 	 * Creators of virtual kstats may also reset ks_data.  It is
1078 	 * also up to the caller to initialize the kstat data section,
1079 	 * if necessary.  All initialization must be complete before
1080 	 * calling kstat_install().
1081 	 */
1082 	e->e_zone.zoneid = ks_zoneid;
1083 	e->e_zone.next = NULL;
1084 
1085 	ksp = &e->e_ks;
1086 	ksp->ks_crtime		= gethrtime();
1087 	kstat_set_string(ksp->ks_module, ks_module);
1088 	ksp->ks_instance	= ks_instance;
1089 	kstat_set_string(ksp->ks_name, ks_name);
1090 	ksp->ks_type		= ks_type;
1091 	kstat_set_string(ksp->ks_class, ks_class);
1092 	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
1093 	if (ks_flags & KSTAT_FLAG_VIRTUAL)
1094 		ksp->ks_data	= NULL;
1095 	else
1096 		ksp->ks_data	= (void *)(e + 1);
1097 	ksp->ks_ndata		= ks_ndata;
1098 	ksp->ks_data_size	= ks_data_size;
1099 	ksp->ks_snaptime	= ksp->ks_crtime;
1100 	ksp->ks_update		= default_kstat_update;
1101 	ksp->ks_private		= NULL;
1102 	ksp->ks_snapshot	= default_kstat_snapshot;
1103 	ksp->ks_lock		= NULL;
1104 
1105 	mutex_enter(&kstat_chain_lock);
1106 
1107 	/*
1108 	 * Add our kstat to the AVL trees.
1109 	 */
1110 	if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1111 		mutex_exit(&kstat_chain_lock);
1112 		cmn_err(CE_WARN,
1113 		    "kstat_create('%s', %d, '%s'): namespace collision",
1114 		    ks_module, ks_instance, ks_name);
1115 		kstat_free(e);
1116 		return (NULL);
1117 	}
1118 	avl_insert(&kstat_avl_byname, e, where);
1119 
1120 	/*
1121 	 * Loop around until we find an unused KID.
1122 	 */
1123 	do {
1124 		ksp->ks_kid = kstat_chain_id++;
1125 	} while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1126 	avl_insert(&kstat_avl_bykid, e, where);
1127 
1128 	mutex_exit(&kstat_chain_lock);
1129 
1130 	return (ksp);
1131 }
1132 
1133 /*
1134  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1135  */
1136 void
1137 kstat_install(kstat_t *ksp)
1138 {
1139 	zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1140 
1141 	/*
1142 	 * If this is a variable-size kstat, it MUST provide kstat data locking
1143 	 * to prevent data-size races with kstat readers.
1144 	 */
1145 	if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1146 		panic("kstat_install('%s', %d, '%s'): "
1147 		    "cannot create variable-size kstat without data lock",
1148 		    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1149 	}
1150 
1151 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1152 		cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1153 		    (void *)ksp);
1154 		return;
1155 	}
1156 
1157 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1158 		int has_long_strings = 0;
1159 		uint_t i;
1160 		kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1161 
1162 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1163 			if (knp->data_type == KSTAT_DATA_STRING) {
1164 				has_long_strings = 1;
1165 				break;
1166 			}
1167 		}
1168 		/*
1169 		 * It is an error for a named kstat with fields of
1170 		 * KSTAT_DATA_STRING to be non-virtual.
1171 		 */
1172 		if (has_long_strings && !(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) {
1173 			panic("kstat_install('%s', %d, '%s'): "
1174 			    "named kstat containing KSTAT_DATA_STRING "
1175 			    "is not virtual",
1176 			    ksp->ks_module, ksp->ks_instance,
1177 			    ksp->ks_name);
1178 		}
1179 		/*
1180 		 * The default snapshot routine does not handle KSTAT_WRITE
1181 		 * for long strings.
1182 		 */
1183 		if (has_long_strings && (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1184 		    (ksp->ks_snapshot == default_kstat_snapshot)) {
1185 			panic("kstat_install('%s', %d, '%s'): "
1186 			    "named kstat containing KSTAT_DATA_STRING "
1187 			    "is writable but uses default snapshot routine",
1188 			    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1189 		}
1190 	}
1191 
1192 	if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1193 
1194 		/*
1195 		 * We are reactivating a dormant kstat.  Initialize the
1196 		 * caller's underlying data to the value it had when the
1197 		 * kstat went dormant, and mark the kstat as active.
1198 		 * Grab the provider's kstat lock if it's not already held.
1199 		 */
1200 		kmutex_t *lp = ksp->ks_lock;
1201 		if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1202 			mutex_enter(lp);
1203 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1204 			mutex_exit(lp);
1205 		} else {
1206 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1207 		}
1208 		ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1209 	}
1210 
1211 	/*
1212 	 * Now that the kstat is active, make it visible to the kstat driver.
1213 	 */
1214 	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1215 	kstat_rele(ksp);
1216 }
1217 
1218 /*
1219  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1220  * just update the data and mark it as dormant.
1221  */
1222 void
1223 kstat_delete(kstat_t *ksp)
1224 {
1225 	kmutex_t *lp;
1226 	ekstat_t *e = (ekstat_t *)ksp;
1227 	zoneid_t zoneid;
1228 	kstat_zone_t *kz;
1229 
1230 	ASSERT(ksp != NULL);
1231 
1232 	if (ksp == NULL)
1233 		return;
1234 
1235 	zoneid = e->e_zone.zoneid;
1236 
1237 	lp = ksp->ks_lock;
1238 
1239 	if (lp != NULL && MUTEX_HELD(lp)) {
1240 		panic("kstat_delete(%p): caller holds data lock %p",
1241 		    (void *)ksp, (void *)lp);
1242 	}
1243 
1244 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1245 		cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1246 		    (void *)ksp);
1247 		return;
1248 	}
1249 
1250 	if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1251 		/*
1252 		 * Update the data one last time, so that all activity
1253 		 * prior to going dormant has been accounted for.
1254 		 */
1255 		KSTAT_ENTER(ksp);
1256 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
1257 		KSTAT_EXIT(ksp);
1258 
1259 		/*
1260 		 * Mark the kstat as dormant and restore caller-modifiable
1261 		 * fields to default values, so the kstat is readable during
1262 		 * the dormant phase.
1263 		 */
1264 		ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1265 		ksp->ks_lock = NULL;
1266 		ksp->ks_update = default_kstat_update;
1267 		ksp->ks_private = NULL;
1268 		ksp->ks_snapshot = default_kstat_snapshot;
1269 		kstat_rele(ksp);
1270 		return;
1271 	}
1272 
1273 	/*
1274 	 * Remove the kstat from the framework's AVL trees,
1275 	 * free the allocated memory, and increment kstat_chain_id so
1276 	 * /dev/kstat clients can detect the event.
1277 	 */
1278 	mutex_enter(&kstat_chain_lock);
1279 	avl_remove(&kstat_avl_bykid, e);
1280 	avl_remove(&kstat_avl_byname, e);
1281 	kstat_chain_id++;
1282 	mutex_exit(&kstat_chain_lock);
1283 
1284 	kz = e->e_zone.next;
1285 	while (kz != NULL) {
1286 		kstat_zone_t *t = kz;
1287 
1288 		kz = kz->next;
1289 		kmem_free(t, sizeof (*t));
1290 	}
1291 	kstat_rele(ksp);
1292 	kstat_free(e);
1293 }
1294 
1295 void
1296 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1297     const char *ks_name, zoneid_t ks_zoneid)
1298 {
1299 	kstat_t *ksp;
1300 
1301 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1302 	if (ksp != NULL) {
1303 		kstat_rele(ksp);
1304 		kstat_delete(ksp);
1305 	}
1306 }
1307 
1308 void
1309 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1310 {
1311 	kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1312 }
1313 
1314 /*
1315  * The sparc V9 versions of these routines can be much cheaper than
1316  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1317  * For simplicity, however, we always feed the C versions to lint.
1318  */
1319 #if !defined(__sparc) || defined(lint) || defined(__lint)
1320 
1321 void
1322 kstat_waitq_enter(kstat_io_t *kiop)
1323 {
1324 	hrtime_t new, delta;
1325 	ulong_t wcnt;
1326 
1327 	new = gethrtime_unscaled();
1328 	delta = new - kiop->wlastupdate;
1329 	kiop->wlastupdate = new;
1330 	wcnt = kiop->wcnt++;
1331 	if (wcnt != 0) {
1332 		kiop->wlentime += delta * wcnt;
1333 		kiop->wtime += delta;
1334 	}
1335 }
1336 
1337 void
1338 kstat_waitq_exit(kstat_io_t *kiop)
1339 {
1340 	hrtime_t new, delta;
1341 	ulong_t wcnt;
1342 
1343 	new = gethrtime_unscaled();
1344 	delta = new - kiop->wlastupdate;
1345 	kiop->wlastupdate = new;
1346 	wcnt = kiop->wcnt--;
1347 	ASSERT((int)wcnt > 0);
1348 	kiop->wlentime += delta * wcnt;
1349 	kiop->wtime += delta;
1350 }
1351 
1352 void
1353 kstat_runq_enter(kstat_io_t *kiop)
1354 {
1355 	hrtime_t new, delta;
1356 	ulong_t rcnt;
1357 
1358 	new = gethrtime_unscaled();
1359 	delta = new - kiop->rlastupdate;
1360 	kiop->rlastupdate = new;
1361 	rcnt = kiop->rcnt++;
1362 	if (rcnt != 0) {
1363 		kiop->rlentime += delta * rcnt;
1364 		kiop->rtime += delta;
1365 	}
1366 }
1367 
1368 void
1369 kstat_runq_exit(kstat_io_t *kiop)
1370 {
1371 	hrtime_t new, delta;
1372 	ulong_t rcnt;
1373 
1374 	new = gethrtime_unscaled();
1375 	delta = new - kiop->rlastupdate;
1376 	kiop->rlastupdate = new;
1377 	rcnt = kiop->rcnt--;
1378 	ASSERT((int)rcnt > 0);
1379 	kiop->rlentime += delta * rcnt;
1380 	kiop->rtime += delta;
1381 }
1382 
1383 void
1384 kstat_waitq_to_runq(kstat_io_t *kiop)
1385 {
1386 	hrtime_t new, delta;
1387 	ulong_t wcnt, rcnt;
1388 
1389 	new = gethrtime_unscaled();
1390 
1391 	delta = new - kiop->wlastupdate;
1392 	kiop->wlastupdate = new;
1393 	wcnt = kiop->wcnt--;
1394 	ASSERT((int)wcnt > 0);
1395 	kiop->wlentime += delta * wcnt;
1396 	kiop->wtime += delta;
1397 
1398 	delta = new - kiop->rlastupdate;
1399 	kiop->rlastupdate = new;
1400 	rcnt = kiop->rcnt++;
1401 	if (rcnt != 0) {
1402 		kiop->rlentime += delta * rcnt;
1403 		kiop->rtime += delta;
1404 	}
1405 }
1406 
1407 void
1408 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1409 {
1410 	hrtime_t new, delta;
1411 	ulong_t wcnt, rcnt;
1412 
1413 	new = gethrtime_unscaled();
1414 
1415 	delta = new - kiop->rlastupdate;
1416 	kiop->rlastupdate = new;
1417 	rcnt = kiop->rcnt--;
1418 	ASSERT((int)rcnt > 0);
1419 	kiop->rlentime += delta * rcnt;
1420 	kiop->rtime += delta;
1421 
1422 	delta = new - kiop->wlastupdate;
1423 	kiop->wlastupdate = new;
1424 	wcnt = kiop->wcnt++;
1425 	if (wcnt != 0) {
1426 		kiop->wlentime += delta * wcnt;
1427 		kiop->wtime += delta;
1428 	}
1429 }
1430 
1431 #endif
1432 
1433 void
1434 kstat_timer_start(kstat_timer_t *ktp)
1435 {
1436 	ktp->start_time = gethrtime();
1437 }
1438 
1439 void
1440 kstat_timer_stop(kstat_timer_t *ktp)
1441 {
1442 	hrtime_t	etime;
1443 	u_longlong_t	num_events;
1444 
1445 	ktp->stop_time = etime = gethrtime();
1446 	etime -= ktp->start_time;
1447 	num_events = ktp->num_events;
1448 	if (etime < ktp->min_time || num_events == 0)
1449 		ktp->min_time = etime;
1450 	if (etime > ktp->max_time)
1451 		ktp->max_time = etime;
1452 	ktp->elapsed_time += etime;
1453 	ktp->num_events = num_events + 1;
1454 }
1455