xref: /illumos-gate/usr/src/uts/common/os/kstat_fr.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Kernel statistics framework
30  */
31 
32 #include <sys/types.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/vmsystm.h>
36 #include <sys/t_lock.h>
37 #include <sys/param.h>
38 #include <sys/errno.h>
39 #include <sys/vmem.h>
40 #include <sys/sysmacros.h>
41 #include <sys/cmn_err.h>
42 #include <sys/kstat.h>
43 #include <sys/sysinfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/vnode.h>
48 #include <sys/vfs.h>
49 #include <sys/dnlc.h>
50 #include <sys/var.h>
51 #include <sys/vmmeter.h>
52 #include <sys/debug.h>
53 #include <sys/kobj.h>
54 #include <sys/avl.h>
55 #include <sys/pool_pset.h>
56 #include <sys/cpupart.h>
57 #include <sys/zone.h>
58 #include <sys/loadavg.h>
59 #include <vm/page.h>
60 #include <vm/anon.h>
61 #include <vm/seg_kmem.h>
62 
63 /*
64  * Global lock to protect the AVL trees and kstat_chain_id.
65  */
66 static kmutex_t kstat_chain_lock;
67 
68 /*
69  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
70  *
71  * (1)	/dev/kstat, to detect changes in the kstat chain across ioctls;
72  *
73  * (2)	kstat_create(), to assign a KID (kstat ID) to each new kstat.
74  *	/dev/kstat uses the KID as a cookie for kstat lookups.
75  *
76  * We reserve the first two IDs because some kstats are created before
77  * the well-known ones (kstat_headers = 0, kstat_types = 1).
78  *
79  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
80  * into a particular kstat, which is logically equivalent to a kstat being
81  * installed/deleted.
82  */
83 
84 kid_t kstat_chain_id = 2;
85 
86 /*
87  * As far as zones are concerned, there are 3 types of kstat:
88  *
89  * 1) Those which have a well-known name, and which should return per-zone data
90  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
91  * is an example of this type of kstat.
92  *
93  * 2) Those which should only be exported to a particular list of zones.
94  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
95  * able to see NFS mounts associated with zone B, while we want the
96  * global zone to be able to see all mounts on the system.
97  *
98  * 3) Those that can be exported to all zones.  Most system-related
99  * kstats fall within this category.
100  *
101  * An ekstat_t thus contains a list of kstats that the zone is to be
102  * exported to.  The lookup of a name:instance:module thus translates to a
103  * lookup of name:instance:module:myzone; if the kstat is not exported
104  * to all zones, and does not have the caller's zoneid explicitly
105  * enumerated in the list of zones to be exported to, it is the same as
106  * if the kstat didn't exist.
107  *
108  * Writing to kstats is currently disallowed from within a non-global
109  * zone, although this restriction could be removed in the future.
110  */
111 typedef struct kstat_zone {
112 	zoneid_t zoneid;
113 	struct kstat_zone *next;
114 } kstat_zone_t;
115 
116 /*
117  * Extended kstat structure -- for internal use only.
118  */
119 typedef struct ekstat {
120 	kstat_t		e_ks;		/* the kstat itself */
121 	size_t		e_size;		/* total allocation size */
122 	kthread_t	*e_owner;	/* thread holding this kstat */
123 	kcondvar_t	e_cv;		/* wait for owner == NULL */
124 	avl_node_t	e_avl_bykid;	/* AVL tree to sort by KID */
125 	avl_node_t	e_avl_byname;	/* AVL tree to sort by name */
126 	kstat_zone_t	e_zone;		/* zone to export stats to */
127 } ekstat_t;
128 
129 static uint64_t kstat_initial[8192];
130 static void *kstat_initial_ptr = kstat_initial;
131 static size_t kstat_initial_avail = sizeof (kstat_initial);
132 static vmem_t *kstat_arena;
133 
134 #define	KSTAT_ALIGN	(sizeof (uint64_t))
135 
136 static avl_tree_t kstat_avl_bykid;
137 static avl_tree_t kstat_avl_byname;
138 
139 /*
140  * Various pointers we need to create kstats at boot time in kstat_init()
141  */
142 extern	kstat_named_t	*segmapcnt_ptr;
143 extern	uint_t		segmapcnt_ndata;
144 extern	int		segmap_kstat_update(kstat_t *, int);
145 extern	kstat_named_t	*biostats_ptr;
146 extern	uint_t		biostats_ndata;
147 extern	kstat_named_t	*pollstats_ptr;
148 extern	uint_t		pollstats_ndata;
149 
150 extern	int	vac;
151 extern	uint_t	nproc;
152 extern	time_t	boot_time;
153 extern	sysinfo_t	sysinfo;
154 extern	vminfo_t	vminfo;
155 
156 struct {
157 	kstat_named_t ncpus;
158 	kstat_named_t lbolt;
159 	kstat_named_t deficit;
160 	kstat_named_t clk_intr;
161 	kstat_named_t vac;
162 	kstat_named_t nproc;
163 	kstat_named_t avenrun_1min;
164 	kstat_named_t avenrun_5min;
165 	kstat_named_t avenrun_15min;
166 	kstat_named_t boot_time;
167 } system_misc_kstat = {
168 	{ "ncpus",		KSTAT_DATA_UINT32 },
169 	{ "lbolt",		KSTAT_DATA_UINT32 },
170 	{ "deficit",		KSTAT_DATA_UINT32 },
171 	{ "clk_intr",		KSTAT_DATA_UINT32 },
172 	{ "vac",		KSTAT_DATA_UINT32 },
173 	{ "nproc",		KSTAT_DATA_UINT32 },
174 	{ "avenrun_1min",	KSTAT_DATA_UINT32 },
175 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
176 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
177 	{ "boot_time",		KSTAT_DATA_UINT32 },
178 };
179 
180 struct {
181 	kstat_named_t physmem;
182 	kstat_named_t nalloc;
183 	kstat_named_t nfree;
184 	kstat_named_t nalloc_calls;
185 	kstat_named_t nfree_calls;
186 	kstat_named_t kernelbase;
187 	kstat_named_t econtig;
188 	kstat_named_t freemem;
189 	kstat_named_t availrmem;
190 	kstat_named_t lotsfree;
191 	kstat_named_t desfree;
192 	kstat_named_t minfree;
193 	kstat_named_t fastscan;
194 	kstat_named_t slowscan;
195 	kstat_named_t nscan;
196 	kstat_named_t desscan;
197 	kstat_named_t pp_kernel;
198 	kstat_named_t pagesfree;
199 	kstat_named_t pageslocked;
200 	kstat_named_t pagestotal;
201 } system_pages_kstat = {
202 	{ "physmem",		KSTAT_DATA_ULONG },
203 	{ "nalloc",		KSTAT_DATA_ULONG },
204 	{ "nfree",		KSTAT_DATA_ULONG },
205 	{ "nalloc_calls",	KSTAT_DATA_ULONG },
206 	{ "nfree_calls",	KSTAT_DATA_ULONG },
207 	{ "kernelbase",		KSTAT_DATA_ULONG },
208 	{ "econtig", 		KSTAT_DATA_ULONG },
209 	{ "freemem", 		KSTAT_DATA_ULONG },
210 	{ "availrmem", 		KSTAT_DATA_ULONG },
211 	{ "lotsfree", 		KSTAT_DATA_ULONG },
212 	{ "desfree", 		KSTAT_DATA_ULONG },
213 	{ "minfree", 		KSTAT_DATA_ULONG },
214 	{ "fastscan", 		KSTAT_DATA_ULONG },
215 	{ "slowscan", 		KSTAT_DATA_ULONG },
216 	{ "nscan", 		KSTAT_DATA_ULONG },
217 	{ "desscan", 		KSTAT_DATA_ULONG },
218 	{ "pp_kernel", 		KSTAT_DATA_ULONG },
219 	{ "pagesfree", 		KSTAT_DATA_ULONG },
220 	{ "pageslocked", 	KSTAT_DATA_ULONG },
221 	{ "pagestotal",		KSTAT_DATA_ULONG },
222 };
223 
224 static int header_kstat_update(kstat_t *, int);
225 static int header_kstat_snapshot(kstat_t *, void *, int);
226 static int system_misc_kstat_update(kstat_t *, int);
227 static int system_pages_kstat_update(kstat_t *, int);
228 
229 static struct {
230 	char	name[KSTAT_STRLEN];
231 	size_t	size;
232 	uint_t	min_ndata;
233 	uint_t	max_ndata;
234 } kstat_data_type[KSTAT_NUM_TYPES] = {
235 	{ "raw",		1,			0,	INT_MAX	},
236 	{ "name=value",		sizeof (kstat_named_t),	0,	INT_MAX	},
237 	{ "interrupt",		sizeof (kstat_intr_t),	1,	1	},
238 	{ "i/o",		sizeof (kstat_io_t),	1,	1	},
239 	{ "event_timer",	sizeof (kstat_timer_t),	0,	INT_MAX	},
240 };
241 
242 int
243 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
244 {
245 	ekstat_t *e = (ekstat_t *)k;
246 	kstat_zone_t *kz;
247 
248 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
249 	for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
250 		if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
251 			return (1);
252 		if (zoneid == kz->zoneid)
253 			return (1);
254 	}
255 	return (0);
256 }
257 
258 void
259 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
260 {
261 	ekstat_t *e = (ekstat_t *)k;
262 	kstat_zone_t *kz, *t = NULL;
263 
264 	mutex_enter(&kstat_chain_lock);
265 	if (zoneid == e->e_zone.zoneid) {
266 		kz = e->e_zone.next;
267 		ASSERT(kz != NULL);
268 		e->e_zone.zoneid = kz->zoneid;
269 		e->e_zone.next = kz->next;
270 		goto out;
271 	}
272 	for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
273 		if (kz->next->zoneid == zoneid) {
274 			t = kz->next;
275 			kz->next = t->next;
276 			break;
277 		}
278 	}
279 	ASSERT(t != NULL);	/* we removed something */
280 	kz = t;
281 out:
282 	kstat_chain_id++;
283 	mutex_exit(&kstat_chain_lock);
284 	kmem_free(kz, sizeof (*kz));
285 }
286 
287 void
288 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
289 {
290 	ekstat_t *e = (ekstat_t *)k;
291 	kstat_zone_t *kz;
292 
293 	kz = kmem_alloc(sizeof (*kz), KM_SLEEP);
294 	mutex_enter(&kstat_chain_lock);
295 	kz->zoneid = zoneid;
296 	kz->next = e->e_zone.next;
297 	e->e_zone.next = kz;
298 	kstat_chain_id++;
299 	mutex_exit(&kstat_chain_lock);
300 }
301 
302 /*
303  * Compare the list of zones for the given kstats, returning 0 if they match
304  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
305  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
306  * two lists always has one element, and this is an O(n) operation rather than
307  * O(n^2).
308  */
309 static int
310 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
311 {
312 	kstat_zone_t *kz1, *kz2;
313 
314 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
315 	for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
316 		for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
317 			if (kz1->zoneid == ALL_ZONES ||
318 			    kz2->zoneid == ALL_ZONES)
319 				return (0);
320 			if (kz1->zoneid == kz2->zoneid)
321 				return (0);
322 		}
323 	}
324 	return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
325 }
326 
327 /*
328  * Support for keeping kstats sorted in AVL trees for fast lookups.
329  */
330 static int
331 kstat_compare_bykid(const void *a1, const void *a2)
332 {
333 	const kstat_t *k1 = a1;
334 	const kstat_t *k2 = a2;
335 
336 	if (k1->ks_kid < k2->ks_kid)
337 		return (-1);
338 	if (k1->ks_kid > k2->ks_kid)
339 		return (1);
340 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
341 }
342 
343 static int
344 kstat_compare_byname(const void *a1, const void *a2)
345 {
346 	const kstat_t *k1 = a1;
347 	const kstat_t *k2 = a2;
348 	int s;
349 
350 	s = strcmp(k1->ks_module, k2->ks_module);
351 	if (s > 0)
352 		return (1);
353 	if (s < 0)
354 		return (-1);
355 
356 	if (k1->ks_instance < k2->ks_instance)
357 		return (-1);
358 	if (k1->ks_instance > k2->ks_instance)
359 		return (1);
360 
361 	s = strcmp(k1->ks_name, k2->ks_name);
362 	if (s > 0)
363 		return (1);
364 	if (s < 0)
365 		return (-1);
366 
367 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
368 }
369 
370 static kstat_t *
371 kstat_hold(avl_tree_t *t, ekstat_t *template)
372 {
373 	kstat_t *ksp;
374 	ekstat_t *e;
375 
376 	mutex_enter(&kstat_chain_lock);
377 	for (;;) {
378 		ksp = avl_find(t, template, NULL);
379 		if (ksp == NULL)
380 			break;
381 		e = (ekstat_t *)ksp;
382 		if (e->e_owner == NULL) {
383 			e->e_owner = curthread;
384 			break;
385 		}
386 		cv_wait(&e->e_cv, &kstat_chain_lock);
387 	}
388 	mutex_exit(&kstat_chain_lock);
389 	return (ksp);
390 }
391 
392 void
393 kstat_rele(kstat_t *ksp)
394 {
395 	ekstat_t *e = (ekstat_t *)ksp;
396 
397 	mutex_enter(&kstat_chain_lock);
398 	ASSERT(e->e_owner == curthread);
399 	e->e_owner = NULL;
400 	cv_broadcast(&e->e_cv);
401 	mutex_exit(&kstat_chain_lock);
402 }
403 
404 kstat_t *
405 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
406 {
407 	ekstat_t e;
408 
409 	e.e_ks.ks_kid = kid;
410 	e.e_zone.zoneid = zoneid;
411 	e.e_zone.next = NULL;
412 
413 	return (kstat_hold(&kstat_avl_bykid, &e));
414 }
415 
416 kstat_t *
417 kstat_hold_byname(char *ks_module, int ks_instance, char *ks_name,
418     zoneid_t ks_zoneid)
419 {
420 	ekstat_t e;
421 
422 	kstat_set_string(e.e_ks.ks_module, ks_module);
423 	e.e_ks.ks_instance = ks_instance;
424 	kstat_set_string(e.e_ks.ks_name, ks_name);
425 	e.e_zone.zoneid = ks_zoneid;
426 	e.e_zone.next = NULL;
427 	return (kstat_hold(&kstat_avl_byname, &e));
428 }
429 
430 static ekstat_t *
431 kstat_alloc(size_t size)
432 {
433 	ekstat_t *e = NULL;
434 
435 	size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
436 
437 	if (kstat_arena == NULL) {
438 		if (size <= kstat_initial_avail) {
439 			e = kstat_initial_ptr;
440 			kstat_initial_ptr = (char *)kstat_initial_ptr + size;
441 			kstat_initial_avail -= size;
442 		}
443 	} else {
444 		e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
445 	}
446 
447 	if (e != NULL) {
448 		bzero(e, size);
449 		e->e_size = size;
450 		cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
451 	}
452 
453 	return (e);
454 }
455 
456 static void
457 kstat_free(ekstat_t *e)
458 {
459 	cv_destroy(&e->e_cv);
460 	vmem_free(kstat_arena, e, e->e_size);
461 }
462 
463 /*
464  * Create various system kstats.
465  */
466 void
467 kstat_init(void)
468 {
469 	kstat_t *ksp;
470 	ekstat_t *e;
471 	avl_tree_t *t = &kstat_avl_bykid;
472 
473 	/*
474 	 * Set up the kstat vmem arena.
475 	 */
476 	kstat_arena = vmem_create("kstat",
477 	    kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
478 	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
479 
480 	/*
481 	 * Make initial kstats appear as though they were allocated.
482 	 */
483 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
484 		(void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
485 		    0, 0, e, (char *)e + e->e_size,
486 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
487 
488 	/*
489 	 * The mother of all kstats.  The first kstat in the system, which
490 	 * always has KID 0, has the headers for all kstats (including itself)
491 	 * as its data.  Thus, the kstat driver does not need any special
492 	 * interface to extract the kstat chain.
493 	 */
494 	kstat_chain_id = 0;
495 	ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
496 		0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
497 	if (ksp) {
498 		ksp->ks_lock = &kstat_chain_lock;
499 		ksp->ks_update = header_kstat_update;
500 		ksp->ks_snapshot = header_kstat_snapshot;
501 		kstat_install(ksp);
502 	} else {
503 		panic("cannot create kstat 'kstat_headers'");
504 	}
505 
506 	ksp = kstat_create("unix", 0, "kstat_types", "kstat",
507 		KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
508 	if (ksp) {
509 		int i;
510 		kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
511 
512 		for (i = 0; i < KSTAT_NUM_TYPES; i++) {
513 			kstat_named_init(&kn[i], kstat_data_type[i].name,
514 				KSTAT_DATA_ULONG);
515 			kn[i].value.ul = i;
516 		}
517 		kstat_install(ksp);
518 	}
519 
520 	ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
521 		sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
522 	if (ksp) {
523 		ksp->ks_data = (void *) &sysinfo;
524 		kstat_install(ksp);
525 	}
526 
527 	ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
528 		sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
529 	if (ksp) {
530 		ksp->ks_data = (void *) &vminfo;
531 		kstat_install(ksp);
532 	}
533 
534 	ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
535 		segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
536 	if (ksp) {
537 		ksp->ks_data = (void *) segmapcnt_ptr;
538 		ksp->ks_update = segmap_kstat_update;
539 		kstat_install(ksp);
540 	}
541 
542 	ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
543 		biostats_ndata, KSTAT_FLAG_VIRTUAL);
544 	if (ksp) {
545 		ksp->ks_data = (void *) biostats_ptr;
546 		kstat_install(ksp);
547 	}
548 
549 #ifdef VAC
550 	ksp = kstat_create("unix", 0, "flushmeter", "hat", KSTAT_TYPE_RAW,
551 		sizeof (struct flushmeter), KSTAT_FLAG_VIRTUAL);
552 	if (ksp) {
553 		ksp->ks_data = (void *) &flush_cnt;
554 		kstat_install(ksp);
555 	}
556 #endif	/* VAC */
557 
558 	ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
559 		sizeof (struct var), KSTAT_FLAG_VIRTUAL);
560 	if (ksp) {
561 		ksp->ks_data = (void *) &v;
562 		kstat_install(ksp);
563 	}
564 
565 	ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
566 		sizeof (system_misc_kstat) / sizeof (kstat_named_t),
567 		KSTAT_FLAG_VIRTUAL);
568 	if (ksp) {
569 		ksp->ks_data = (void *) &system_misc_kstat;
570 		ksp->ks_update = system_misc_kstat_update;
571 		kstat_install(ksp);
572 	}
573 
574 	ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
575 		sizeof (system_pages_kstat) / sizeof (kstat_named_t),
576 		KSTAT_FLAG_VIRTUAL);
577 	if (ksp) {
578 		ksp->ks_data = (void *) &system_pages_kstat;
579 		ksp->ks_update = system_pages_kstat_update;
580 		kstat_install(ksp);
581 	}
582 
583 	ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
584 	    pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
585 
586 	if (ksp) {
587 		ksp->ks_data = pollstats_ptr;
588 		kstat_install(ksp);
589 	}
590 }
591 
592 /*
593  * Caller of this should ensure that the string pointed by src
594  * doesn't change while kstat's lock is held. Not doing so defeats
595  * kstat's snapshot strategy as explained in <sys/kstat.h>
596  */
597 void
598 kstat_named_setstr(kstat_named_t *knp, const char *src)
599 {
600 	if (knp->data_type != KSTAT_DATA_STRING)
601 		panic("kstat_named_setstr('%p', '%p'): "
602 		    "named kstat is not of type KSTAT_DATA_STRING", knp, src);
603 
604 	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
605 	if (src != NULL)
606 		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
607 	else
608 		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
609 }
610 
611 void
612 kstat_set_string(char *dst, char *src)
613 {
614 	bzero(dst, KSTAT_STRLEN);
615 	(void) strncpy(dst, src, KSTAT_STRLEN - 1);
616 }
617 
618 void
619 kstat_named_init(kstat_named_t *knp, char *name, uchar_t data_type)
620 {
621 	kstat_set_string(knp->name, name);
622 	knp->data_type = data_type;
623 
624 	if (data_type == KSTAT_DATA_STRING)
625 		kstat_named_setstr(knp, NULL);
626 }
627 
628 void
629 kstat_timer_init(kstat_timer_t *ktp, char *name)
630 {
631 	kstat_set_string(ktp->name, name);
632 }
633 
634 /* ARGSUSED */
635 static int
636 default_kstat_update(kstat_t *ksp, int rw)
637 {
638 	uint_t i;
639 	size_t len = 0;
640 	kstat_named_t *knp;
641 
642 	/*
643 	 * Named kstats with variable-length long strings have a standard
644 	 * way of determining how much space is needed to hold the snapshot:
645 	 */
646 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
647 	    (ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
648 
649 		/*
650 		 * Add in the space required for the strings
651 		 */
652 		knp = KSTAT_NAMED_PTR(ksp);
653 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
654 			if (knp->data_type == KSTAT_DATA_STRING)
655 				len += KSTAT_NAMED_STR_BUFLEN(knp);
656 		}
657 		ksp->ks_data_size =
658 		    ksp->ks_ndata * sizeof (kstat_named_t) + len;
659 	}
660 	return (0);
661 }
662 
663 static int
664 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
665 {
666 	kstat_io_t *kiop;
667 	hrtime_t cur_time;
668 	size_t	namedsz;
669 
670 	ksp->ks_snaptime = cur_time = gethrtime();
671 
672 	if (rw == KSTAT_WRITE) {
673 		if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
674 			return (EACCES);
675 		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
676 		return (0);
677 	}
678 
679 	/*
680 	 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
681 	 * number of kstat_named_t structures, followed by an optional
682 	 * string segment. The ks_data generally holds only the
683 	 * kstat_named_t structures. So we copy it first. The strings,
684 	 * if any, are copied below. For other kstat types, ks_data holds the
685 	 * entire buffer.
686 	 */
687 
688 	namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
689 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
690 		bcopy(ksp->ks_data, buf, namedsz);
691 	else
692 		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
693 
694 	/*
695 	 * Apply kstat type-specific data massaging
696 	 */
697 	switch (ksp->ks_type) {
698 
699 	case KSTAT_TYPE_IO:
700 		/*
701 		 * Normalize time units and deal with incomplete transactions
702 		 */
703 		kiop = (kstat_io_t *)buf;
704 
705 		scalehrtime(&kiop->wtime);
706 		scalehrtime(&kiop->wlentime);
707 		scalehrtime(&kiop->wlastupdate);
708 		scalehrtime(&kiop->rtime);
709 		scalehrtime(&kiop->rlentime);
710 		scalehrtime(&kiop->rlastupdate);
711 
712 		if (kiop->wcnt != 0) {
713 			hrtime_t wfix = cur_time - kiop->wlastupdate;
714 			kiop->wtime += wfix;
715 			kiop->wlentime += kiop->wcnt * wfix;
716 		}
717 		kiop->wlastupdate = cur_time;
718 		if (kiop->rcnt != 0) {
719 			hrtime_t rfix = cur_time - kiop->rlastupdate;
720 			kiop->rtime += rfix;
721 			kiop->rlentime += kiop->rcnt * rfix;
722 		}
723 		kiop->rlastupdate = cur_time;
724 		break;
725 
726 	case KSTAT_TYPE_NAMED:
727 		/*
728 		 * Massage any long strings in at the end of the buffer
729 		 */
730 		if (ksp->ks_data_size > namedsz) {
731 			uint_t i;
732 			kstat_named_t *knp = buf;
733 			char *dst = (char *)(knp + ksp->ks_ndata);
734 			/*
735 			 * Copy strings and update pointers
736 			 */
737 			for (i = 0; i < ksp->ks_ndata; i++, knp++) {
738 				if (knp->data_type == KSTAT_DATA_STRING &&
739 				    KSTAT_NAMED_STR_PTR(knp) != NULL) {
740 					bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
741 					    KSTAT_NAMED_STR_BUFLEN(knp));
742 					KSTAT_NAMED_STR_PTR(knp) = dst;
743 					dst += KSTAT_NAMED_STR_BUFLEN(knp);
744 				}
745 			}
746 			ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
747 		}
748 		break;
749 	}
750 	return (0);
751 }
752 
753 static int
754 header_kstat_update(kstat_t *header_ksp, int rw)
755 {
756 	int nkstats = 0;
757 	ekstat_t *e;
758 	avl_tree_t *t = &kstat_avl_bykid;
759 	zoneid_t zoneid;
760 
761 	if (rw == KSTAT_WRITE)
762 		return (EACCES);
763 
764 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
765 
766 	zoneid = getzoneid();
767 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
768 		if (kstat_zone_find((kstat_t *)e, zoneid)) {
769 			nkstats++;
770 		}
771 	}
772 	header_ksp->ks_ndata = nkstats;
773 	header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
774 	return (0);
775 }
776 
777 /*
778  * Copy out the data section of kstat 0, which consists of the list
779  * of all kstat headers.  By specification, these headers must be
780  * copied out in order of increasing KID.
781  */
782 static int
783 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
784 {
785 	ekstat_t *e;
786 	avl_tree_t *t = &kstat_avl_bykid;
787 	zoneid_t zoneid;
788 
789 	header_ksp->ks_snaptime = gethrtime();
790 
791 	if (rw == KSTAT_WRITE)
792 		return (EACCES);
793 
794 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
795 
796 	zoneid = getzoneid();
797 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
798 		if (kstat_zone_find((kstat_t *)e, zoneid)) {
799 			bcopy(&e->e_ks, buf, sizeof (kstat_t));
800 			buf = (char *)buf + sizeof (kstat_t);
801 		}
802 	}
803 
804 	return (0);
805 }
806 
807 /* ARGSUSED */
808 static int
809 system_misc_kstat_update(kstat_t *ksp, int rw)
810 {
811 	int myncpus = ncpus;
812 	int *loadavgp = &avenrun[0];
813 	int loadavg[LOADAVG_NSTATS];
814 
815 	if (rw == KSTAT_WRITE)
816 		return (EACCES);
817 
818 	if (!INGLOBALZONE(curproc)) {
819 		/*
820 		 * Here we grab cpu_lock which is OK as long as no-one in the
821 		 * future attempts to lookup this particular kstat
822 		 * (unix:0:system_misc) while holding cpu_lock.
823 		 */
824 		mutex_enter(&cpu_lock);
825 		if (pool_pset_enabled()) {
826 			psetid_t mypsid = zone_pset_get(curproc->p_zone);
827 			int error;
828 
829 			myncpus = zone_ncpus_get(curproc->p_zone);
830 			ASSERT(myncpus > 0);
831 			error = cpupart_get_loadavg(mypsid, &loadavg[0],
832 			    LOADAVG_NSTATS);
833 			ASSERT(error == 0);
834 			loadavgp = &loadavg[0];
835 		}
836 		mutex_exit(&cpu_lock);
837 	}
838 
839 	system_misc_kstat.ncpus.value.ui32		= (uint32_t)myncpus;
840 	system_misc_kstat.lbolt.value.ui32		= (uint32_t)lbolt;
841 	system_misc_kstat.deficit.value.ui32		= (uint32_t)deficit;
842 	system_misc_kstat.clk_intr.value.ui32		= (uint32_t)lbolt;
843 	system_misc_kstat.vac.value.ui32		= (uint32_t)vac;
844 	system_misc_kstat.nproc.value.ui32		= (uint32_t)nproc;
845 	system_misc_kstat.avenrun_1min.value.ui32	= (uint32_t)loadavgp[0];
846 	system_misc_kstat.avenrun_5min.value.ui32	= (uint32_t)loadavgp[1];
847 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
848 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)boot_time;
849 	return (0);
850 }
851 
852 #ifdef	__sparc
853 extern caddr_t	econtig32;
854 #else	/* !__sparc */
855 extern caddr_t	econtig;
856 #endif	/* __sparc */
857 
858 extern struct vnode kvp;
859 
860 /* ARGSUSED */
861 static int
862 system_pages_kstat_update(kstat_t *ksp, int rw)
863 {
864 	kobj_stat_t kobj_stat;
865 
866 	if (rw == KSTAT_WRITE) {
867 		return (EACCES);
868 	}
869 
870 	kobj_stat_get(&kobj_stat);
871 	system_pages_kstat.physmem.value.ul	= (ulong_t)physmem;
872 	system_pages_kstat.nalloc.value.ul	= kobj_stat.nalloc;
873 	system_pages_kstat.nfree.value.ul	= kobj_stat.nfree;
874 	system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
875 	system_pages_kstat.nfree_calls.value.ul	= kobj_stat.nfree_calls;
876 	system_pages_kstat.kernelbase.value.ul	= (ulong_t)KERNELBASE;
877 
878 #ifdef	__sparc
879 	/*
880 	 * kstat should REALLY be modified to also report kmem64_base and
881 	 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
882 	 * [ kernelbase .. econtig ] no longer is truly reflective of the
883 	 * kernel's vallocs...
884 	 */
885 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig32;
886 #else	/* !__sparc */
887 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig;
888 #endif	/* __sparc */
889 
890 	system_pages_kstat.freemem.value.ul	= (ulong_t)freemem;
891 	system_pages_kstat.availrmem.value.ul	= (ulong_t)availrmem;
892 	system_pages_kstat.lotsfree.value.ul	= (ulong_t)lotsfree;
893 	system_pages_kstat.desfree.value.ul	= (ulong_t)desfree;
894 	system_pages_kstat.minfree.value.ul	= (ulong_t)minfree;
895 	system_pages_kstat.fastscan.value.ul	= (ulong_t)fastscan;
896 	system_pages_kstat.slowscan.value.ul	= (ulong_t)slowscan;
897 	system_pages_kstat.nscan.value.ul	= (ulong_t)nscan;
898 	system_pages_kstat.desscan.value.ul	= (ulong_t)desscan;
899 	system_pages_kstat.pagesfree.value.ul	= (ulong_t)freemem;
900 	system_pages_kstat.pageslocked.value.ul	= (ulong_t)(availrmem_initial -
901 	    availrmem);
902 	system_pages_kstat.pagestotal.value.ul	= (ulong_t)total_pages;
903 	/*
904 	 * pp_kernel represents total pages used by the kernel since the
905 	 * startup. This formula takes into account the boottime kernel
906 	 * footprint and also considers the availrmem changes because of
907 	 * user explicit page locking.
908 	 */
909 	system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
910 		obp_pages - availrmem - k_anoninfo.ani_mem_resv -
911 		anon_segkp_pages_locked - segvn_pages_locked -
912 		pages_locked - pages_claimed - pages_useclaim);
913 
914 	return (0);
915 }
916 
917 kstat_t *
918 kstat_create(char *ks_module, int ks_instance, char *ks_name, char *ks_class,
919     uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
920 {
921 	return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
922 		    ks_type, ks_ndata, ks_flags, ALL_ZONES));
923 }
924 
925 /*
926  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
927  * the specified name exists, reactivate it.  Returns a pointer to the kstat
928  * on success, NULL on failure.  The kstat will not be visible to the
929  * kstat driver until kstat_install().
930  */
931 kstat_t *
932 kstat_create_zone(char *ks_module, int ks_instance, char *ks_name,
933     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
934     zoneid_t ks_zoneid)
935 {
936 	size_t ks_data_size;
937 	kstat_t *ksp;
938 	ekstat_t *e;
939 	avl_index_t where;
940 	char namebuf[KSTAT_STRLEN + 16];
941 
942 	if (avl_numnodes(&kstat_avl_bykid) == 0) {
943 		avl_create(&kstat_avl_bykid, kstat_compare_bykid,
944 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
945 
946 		avl_create(&kstat_avl_byname, kstat_compare_byname,
947 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
948 	}
949 
950 	/*
951 	 * If ks_name == NULL, set the ks_name to <module><instance>.
952 	 */
953 	if (ks_name == NULL) {
954 		char buf[KSTAT_STRLEN];
955 		kstat_set_string(buf, ks_module);
956 		(void) sprintf(namebuf, "%s%d", buf, ks_instance);
957 		ks_name = namebuf;
958 	}
959 
960 	/*
961 	 * Make sure it's a valid kstat data type
962 	 */
963 	if (ks_type >= KSTAT_NUM_TYPES) {
964 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
965 			"invalid kstat type %d",
966 			ks_module, ks_instance, ks_name, ks_type);
967 		return (NULL);
968 	}
969 
970 	/*
971 	 * Don't allow persistent virtual kstats -- it makes no sense.
972 	 * ks_data points to garbage when the client goes away.
973 	 */
974 	if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
975 	    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
976 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
977 			"cannot create persistent virtual kstat",
978 			ks_module, ks_instance, ks_name);
979 		return (NULL);
980 	}
981 
982 	/*
983 	 * Don't allow variable-size physical kstats, since the framework's
984 	 * memory allocation for physical kstat data is fixed at creation time.
985 	 */
986 	if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
987 	    !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
988 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
989 			"cannot create variable-size physical kstat",
990 			ks_module, ks_instance, ks_name);
991 		return (NULL);
992 	}
993 
994 	/*
995 	 * Make sure the number of data fields is within legal range
996 	 */
997 	if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
998 	    ks_ndata > kstat_data_type[ks_type].max_ndata) {
999 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1000 			"ks_ndata=%d out of range [%d, %d]",
1001 			ks_module, ks_instance, ks_name, (int)ks_ndata,
1002 			kstat_data_type[ks_type].min_ndata,
1003 			kstat_data_type[ks_type].max_ndata);
1004 		return (NULL);
1005 	}
1006 
1007 	ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1008 
1009 	/*
1010 	 * If the named kstat already exists and is dormant, reactivate it.
1011 	 */
1012 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1013 	if (ksp != NULL) {
1014 		if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1015 			/*
1016 			 * The named kstat exists but is not dormant --
1017 			 * this is a kstat namespace collision.
1018 			 */
1019 			kstat_rele(ksp);
1020 			cmn_err(CE_WARN,
1021 			    "kstat_create('%s', %d, '%s'): namespace collision",
1022 			    ks_module, ks_instance, ks_name);
1023 			return (NULL);
1024 		}
1025 		if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1026 		    (ksp->ks_type != ks_type) ||
1027 		    (ksp->ks_ndata != ks_ndata) ||
1028 		    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1029 			/*
1030 			 * The name is the same, but the other key parameters
1031 			 * differ from those of the dormant kstat -- bogus.
1032 			 */
1033 			kstat_rele(ksp);
1034 			cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1035 				"invalid reactivation of dormant kstat",
1036 				ks_module, ks_instance, ks_name);
1037 			return (NULL);
1038 		}
1039 		/*
1040 		 * Return dormant kstat pointer to caller.  As usual,
1041 		 * the kstat is marked invalid until kstat_install().
1042 		 */
1043 		ksp->ks_flags |= KSTAT_FLAG_INVALID;
1044 		kstat_rele(ksp);
1045 		return (ksp);
1046 	}
1047 
1048 	/*
1049 	 * Allocate memory for the new kstat header and, if this is a physical
1050 	 * kstat, the data section.
1051 	 */
1052 	e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1053 	if (e == NULL) {
1054 		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1055 			"insufficient kernel memory",
1056 			ks_module, ks_instance, ks_name);
1057 		return (NULL);
1058 	}
1059 
1060 	/*
1061 	 * Initialize as many fields as we can.  The caller may reset
1062 	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1063 	 * Creators of virtual kstats may also reset ks_data.  It is
1064 	 * also up to the caller to initialize the kstat data section,
1065 	 * if necessary.  All initialization must be complete before
1066 	 * calling kstat_install().
1067 	 */
1068 	e->e_zone.zoneid = ks_zoneid;
1069 	e->e_zone.next = NULL;
1070 
1071 	ksp = &e->e_ks;
1072 	ksp->ks_crtime		= gethrtime();
1073 	kstat_set_string(ksp->ks_module, ks_module);
1074 	ksp->ks_instance	= ks_instance;
1075 	kstat_set_string(ksp->ks_name, ks_name);
1076 	ksp->ks_type		= ks_type;
1077 	kstat_set_string(ksp->ks_class, ks_class);
1078 	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
1079 	if (ks_flags & KSTAT_FLAG_VIRTUAL)
1080 		ksp->ks_data	= NULL;
1081 	else
1082 		ksp->ks_data	= (void *)(e + 1);
1083 	ksp->ks_ndata		= ks_ndata;
1084 	ksp->ks_data_size	= ks_data_size;
1085 	ksp->ks_snaptime	= ksp->ks_crtime;
1086 	ksp->ks_update		= default_kstat_update;
1087 	ksp->ks_private		= NULL;
1088 	ksp->ks_snapshot	= default_kstat_snapshot;
1089 	ksp->ks_lock		= NULL;
1090 
1091 	mutex_enter(&kstat_chain_lock);
1092 
1093 	/*
1094 	 * Add our kstat to the AVL trees.
1095 	 */
1096 	if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1097 		mutex_exit(&kstat_chain_lock);
1098 		cmn_err(CE_WARN,
1099 		    "kstat_create('%s', %d, '%s'): namespace collision",
1100 		    ks_module, ks_instance, ks_name);
1101 		kstat_free(e);
1102 		return (NULL);
1103 	}
1104 	avl_insert(&kstat_avl_byname, e, where);
1105 
1106 	/*
1107 	 * Loop around until we find an unused KID.
1108 	 */
1109 	do {
1110 		ksp->ks_kid = kstat_chain_id++;
1111 	} while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1112 	avl_insert(&kstat_avl_bykid, e, where);
1113 
1114 	mutex_exit(&kstat_chain_lock);
1115 
1116 	return (ksp);
1117 }
1118 
1119 /*
1120  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1121  */
1122 void
1123 kstat_install(kstat_t *ksp)
1124 {
1125 	zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1126 
1127 	/*
1128 	 * If this is a variable-size kstat, it MUST provide kstat data locking
1129 	 * to prevent data-size races with kstat readers.
1130 	 */
1131 	if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1132 		panic("kstat_install('%s', %d, '%s'): "
1133 		    "cannot create variable-size kstat without data lock",
1134 		    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1135 	}
1136 
1137 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1138 		cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1139 		    (void *)ksp);
1140 		return;
1141 	}
1142 
1143 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1144 		int has_long_strings = 0;
1145 		uint_t i;
1146 		kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1147 
1148 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1149 			if (knp->data_type == KSTAT_DATA_STRING) {
1150 				has_long_strings = 1;
1151 				break;
1152 			}
1153 		}
1154 		/*
1155 		 * It is an error for a named kstat with fields of
1156 		 * KSTAT_DATA_STRING to be non-virtual.
1157 		 */
1158 		if (has_long_strings && !(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) {
1159 			panic("kstat_install('%s', %d, '%s'): "
1160 			    "named kstat containing KSTAT_DATA_STRING "
1161 			    "is not virtual",
1162 			    ksp->ks_module, ksp->ks_instance,
1163 			    ksp->ks_name);
1164 		}
1165 		/*
1166 		 * The default snapshot routine does not handle KSTAT_WRITE
1167 		 * for long strings.
1168 		 */
1169 		if (has_long_strings && (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1170 		    (ksp->ks_snapshot == default_kstat_snapshot)) {
1171 			panic("kstat_install('%s', %d, '%s'): "
1172 			    "named kstat containing KSTAT_DATA_STRING "
1173 			    "is writable but uses default snapshot routine",
1174 			    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1175 		}
1176 	}
1177 
1178 	if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1179 
1180 		/*
1181 		 * We are reactivating a dormant kstat.  Initialize the
1182 		 * caller's underlying data to the value it had when the
1183 		 * kstat went dormant, and mark the kstat as active.
1184 		 * Grab the provider's kstat lock if it's not already held.
1185 		 */
1186 		kmutex_t *lp = ksp->ks_lock;
1187 		if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1188 			mutex_enter(lp);
1189 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1190 			mutex_exit(lp);
1191 		} else {
1192 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1193 		}
1194 		ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1195 	}
1196 
1197 	/*
1198 	 * Now that the kstat is active, make it visible to the kstat driver.
1199 	 */
1200 	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1201 	kstat_rele(ksp);
1202 }
1203 
1204 /*
1205  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1206  * just update the data and mark it as dormant.
1207  */
1208 void
1209 kstat_delete(kstat_t *ksp)
1210 {
1211 	kmutex_t *lp;
1212 	ekstat_t *e = (ekstat_t *)ksp;
1213 	zoneid_t zoneid = e->e_zone.zoneid;
1214 	kstat_zone_t *kz;
1215 
1216 	if (ksp == NULL)
1217 		return;
1218 
1219 	lp = ksp->ks_lock;
1220 
1221 	if (lp != NULL && MUTEX_HELD(lp)) {
1222 		panic("kstat_delete(%p): caller holds data lock %p",
1223 		    (void *)ksp, (void *)lp);
1224 	}
1225 
1226 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1227 		cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1228 		    (void *)ksp);
1229 		return;
1230 	}
1231 
1232 	if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1233 		/*
1234 		 * Update the data one last time, so that all activity
1235 		 * prior to going dormant has been accounted for.
1236 		 */
1237 		KSTAT_ENTER(ksp);
1238 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
1239 		KSTAT_EXIT(ksp);
1240 
1241 		/*
1242 		 * Mark the kstat as dormant and restore caller-modifiable
1243 		 * fields to default values, so the kstat is readable during
1244 		 * the dormant phase.
1245 		 */
1246 		ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1247 		ksp->ks_lock = NULL;
1248 		ksp->ks_update = default_kstat_update;
1249 		ksp->ks_private = NULL;
1250 		ksp->ks_snapshot = default_kstat_snapshot;
1251 		kstat_rele(ksp);
1252 		return;
1253 	}
1254 
1255 	/*
1256 	 * Remove the kstat from the framework's AVL trees,
1257 	 * free the allocated memory, and increment kstat_chain_id so
1258 	 * /dev/kstat clients can detect the event.
1259 	 */
1260 	mutex_enter(&kstat_chain_lock);
1261 	avl_remove(&kstat_avl_bykid, e);
1262 	avl_remove(&kstat_avl_byname, e);
1263 	kstat_chain_id++;
1264 	mutex_exit(&kstat_chain_lock);
1265 
1266 	kz = e->e_zone.next;
1267 	while (kz != NULL) {
1268 		kstat_zone_t *t = kz;
1269 
1270 		kz = kz->next;
1271 		kmem_free(t, sizeof (*t));
1272 	}
1273 	kstat_rele(ksp);
1274 	kstat_free(e);
1275 }
1276 
1277 void
1278 kstat_delete_byname_zone(char *ks_module, int ks_instance, char *ks_name,
1279     zoneid_t ks_zoneid)
1280 {
1281 	kstat_t *ksp;
1282 
1283 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1284 	if (ksp != NULL) {
1285 		kstat_rele(ksp);
1286 		kstat_delete(ksp);
1287 	}
1288 }
1289 
1290 void
1291 kstat_delete_byname(char *ks_module, int ks_instance, char *ks_name)
1292 {
1293 	kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1294 }
1295 
1296 /*
1297  * The sparc V9 versions of these routines can be much cheaper than
1298  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1299  * For simplicity, however, we always feed the C versions to lint.
1300  */
1301 #if !defined(__sparc) || defined(lint) || defined(__lint)
1302 
1303 void
1304 kstat_waitq_enter(kstat_io_t *kiop)
1305 {
1306 	hrtime_t new, delta;
1307 	ulong_t wcnt;
1308 
1309 	new = gethrtime_unscaled();
1310 	delta = new - kiop->wlastupdate;
1311 	kiop->wlastupdate = new;
1312 	wcnt = kiop->wcnt++;
1313 	if (wcnt != 0) {
1314 		kiop->wlentime += delta * wcnt;
1315 		kiop->wtime += delta;
1316 	}
1317 }
1318 
1319 void
1320 kstat_waitq_exit(kstat_io_t *kiop)
1321 {
1322 	hrtime_t new, delta;
1323 	ulong_t wcnt;
1324 
1325 	new = gethrtime_unscaled();
1326 	delta = new - kiop->wlastupdate;
1327 	kiop->wlastupdate = new;
1328 	wcnt = kiop->wcnt--;
1329 	ASSERT((int)wcnt > 0);
1330 	kiop->wlentime += delta * wcnt;
1331 	kiop->wtime += delta;
1332 }
1333 
1334 void
1335 kstat_runq_enter(kstat_io_t *kiop)
1336 {
1337 	hrtime_t new, delta;
1338 	ulong_t rcnt;
1339 
1340 	new = gethrtime_unscaled();
1341 	delta = new - kiop->rlastupdate;
1342 	kiop->rlastupdate = new;
1343 	rcnt = kiop->rcnt++;
1344 	if (rcnt != 0) {
1345 		kiop->rlentime += delta * rcnt;
1346 		kiop->rtime += delta;
1347 	}
1348 }
1349 
1350 void
1351 kstat_runq_exit(kstat_io_t *kiop)
1352 {
1353 	hrtime_t new, delta;
1354 	ulong_t rcnt;
1355 
1356 	new = gethrtime_unscaled();
1357 	delta = new - kiop->rlastupdate;
1358 	kiop->rlastupdate = new;
1359 	rcnt = kiop->rcnt--;
1360 	ASSERT((int)rcnt > 0);
1361 	kiop->rlentime += delta * rcnt;
1362 	kiop->rtime += delta;
1363 }
1364 
1365 void
1366 kstat_waitq_to_runq(kstat_io_t *kiop)
1367 {
1368 	hrtime_t new, delta;
1369 	ulong_t wcnt, rcnt;
1370 
1371 	new = gethrtime_unscaled();
1372 
1373 	delta = new - kiop->wlastupdate;
1374 	kiop->wlastupdate = new;
1375 	wcnt = kiop->wcnt--;
1376 	ASSERT((int)wcnt > 0);
1377 	kiop->wlentime += delta * wcnt;
1378 	kiop->wtime += delta;
1379 
1380 	delta = new - kiop->rlastupdate;
1381 	kiop->rlastupdate = new;
1382 	rcnt = kiop->rcnt++;
1383 	if (rcnt != 0) {
1384 		kiop->rlentime += delta * rcnt;
1385 		kiop->rtime += delta;
1386 	}
1387 }
1388 
1389 void
1390 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1391 {
1392 	hrtime_t new, delta;
1393 	ulong_t wcnt, rcnt;
1394 
1395 	new = gethrtime_unscaled();
1396 
1397 	delta = new - kiop->rlastupdate;
1398 	kiop->rlastupdate = new;
1399 	rcnt = kiop->rcnt--;
1400 	ASSERT((int)rcnt > 0);
1401 	kiop->rlentime += delta * rcnt;
1402 	kiop->rtime += delta;
1403 
1404 	delta = new - kiop->wlastupdate;
1405 	kiop->wlastupdate = new;
1406 	wcnt = kiop->wcnt++;
1407 	if (wcnt != 0) {
1408 		kiop->wlentime += delta * wcnt;
1409 		kiop->wtime += delta;
1410 	}
1411 }
1412 
1413 #endif
1414 
1415 void
1416 kstat_timer_start(kstat_timer_t *ktp)
1417 {
1418 	ktp->start_time = gethrtime();
1419 }
1420 
1421 void
1422 kstat_timer_stop(kstat_timer_t *ktp)
1423 {
1424 	hrtime_t	etime;
1425 	u_longlong_t	num_events;
1426 
1427 	ktp->stop_time = etime = gethrtime();
1428 	etime -= ktp->start_time;
1429 	num_events = ktp->num_events;
1430 	if (etime < ktp->min_time || num_events == 0)
1431 		ktp->min_time = etime;
1432 	if (etime > ktp->max_time)
1433 		ktp->max_time = etime;
1434 	ktp->elapsed_time += etime;
1435 	ktp->num_events = num_events + 1;
1436 }
1437