xref: /illumos-gate/usr/src/uts/common/os/kstat_fr.c (revision 24f5a37652e188ebdcdd6da454511686935025df)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2014, Joyent, Inc. All rights reserved.
24  */
25 
26 /*
27  * Kernel statistics framework
28  */
29 
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/systm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/t_lock.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/vmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/cmn_err.h>
40 #include <sys/kstat.h>
41 #include <sys/sysinfo.h>
42 #include <sys/cpuvar.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
46 #include <sys/vfs.h>
47 #include <sys/dnlc.h>
48 #include <sys/var.h>
49 #include <sys/debug.h>
50 #include <sys/kobj.h>
51 #include <sys/avl.h>
52 #include <sys/pool_pset.h>
53 #include <sys/cpupart.h>
54 #include <sys/zone.h>
55 #include <sys/loadavg.h>
56 #include <vm/page.h>
57 #include <vm/anon.h>
58 #include <vm/seg_kmem.h>
59 
60 /*
61  * Global lock to protect the AVL trees and kstat_chain_id.
62  */
63 static kmutex_t kstat_chain_lock;
64 
65 /*
66  * Every install/delete kstat bumps kstat_chain_id.  This is used by:
67  *
68  * (1)	/dev/kstat, to detect changes in the kstat chain across ioctls;
69  *
70  * (2)	kstat_create(), to assign a KID (kstat ID) to each new kstat.
71  *	/dev/kstat uses the KID as a cookie for kstat lookups.
72  *
73  * We reserve the first two IDs because some kstats are created before
74  * the well-known ones (kstat_headers = 0, kstat_types = 1).
75  *
76  * We also bump the kstat_chain_id if a zone is gaining or losing visibility
77  * into a particular kstat, which is logically equivalent to a kstat being
78  * installed/deleted.
79  */
80 
81 kid_t kstat_chain_id = 2;
82 
83 /*
84  * As far as zones are concerned, there are 3 types of kstat:
85  *
86  * 1) Those which have a well-known name, and which should return per-zone data
87  * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
88  * is an example of this type of kstat.
89  *
90  * 2) Those which should only be exported to a particular list of zones.
91  * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
92  * able to see NFS mounts associated with zone B, while we want the
93  * global zone to be able to see all mounts on the system.
94  *
95  * 3) Those that can be exported to all zones.  Most system-related
96  * kstats fall within this category.
97  *
98  * An ekstat_t thus contains a list of kstats that the zone is to be
99  * exported to.  The lookup of a name:instance:module thus translates to a
100  * lookup of name:instance:module:myzone; if the kstat is not exported
101  * to all zones, and does not have the caller's zoneid explicitly
102  * enumerated in the list of zones to be exported to, it is the same as
103  * if the kstat didn't exist.
104  *
105  * Writing to kstats is currently disallowed from within a non-global
106  * zone, although this restriction could be removed in the future.
107  */
108 typedef struct kstat_zone {
109 	zoneid_t zoneid;
110 	struct kstat_zone *next;
111 } kstat_zone_t;
112 
113 /*
114  * Extended kstat structure -- for internal use only.
115  */
116 typedef struct ekstat {
117 	kstat_t		e_ks;		/* the kstat itself */
118 	size_t		e_size;		/* total allocation size */
119 	kthread_t	*e_owner;	/* thread holding this kstat */
120 	kcondvar_t	e_cv;		/* wait for owner == NULL */
121 	avl_node_t	e_avl_bykid;	/* AVL tree to sort by KID */
122 	avl_node_t	e_avl_byname;	/* AVL tree to sort by name */
123 	kstat_zone_t	e_zone;		/* zone to export stats to */
124 } ekstat_t;
125 
126 static uint64_t kstat_initial[8192];
127 static void *kstat_initial_ptr = kstat_initial;
128 static size_t kstat_initial_avail = sizeof (kstat_initial);
129 static vmem_t *kstat_arena;
130 
131 #define	KSTAT_ALIGN	(sizeof (uint64_t))
132 
133 static avl_tree_t kstat_avl_bykid;
134 static avl_tree_t kstat_avl_byname;
135 
136 /*
137  * Various pointers we need to create kstats at boot time in kstat_init()
138  */
139 extern	kstat_named_t	*segmapcnt_ptr;
140 extern	uint_t		segmapcnt_ndata;
141 extern	int		segmap_kstat_update(kstat_t *, int);
142 extern	kstat_named_t	*biostats_ptr;
143 extern	uint_t		biostats_ndata;
144 extern	kstat_named_t	*pollstats_ptr;
145 extern	uint_t		pollstats_ndata;
146 
147 extern	int	vac;
148 extern	uint_t	nproc;
149 extern	time_t	boot_time;
150 extern	sysinfo_t	sysinfo;
151 extern	vminfo_t	vminfo;
152 
153 struct {
154 	kstat_named_t ncpus;
155 	kstat_named_t lbolt;
156 	kstat_named_t deficit;
157 	kstat_named_t clk_intr;
158 	kstat_named_t vac;
159 	kstat_named_t nproc;
160 	kstat_named_t avenrun_1min;
161 	kstat_named_t avenrun_5min;
162 	kstat_named_t avenrun_15min;
163 	kstat_named_t boot_time;
164 	kstat_named_t nsec_per_tick;
165 } system_misc_kstat = {
166 	{ "ncpus",		KSTAT_DATA_UINT32 },
167 	{ "lbolt",		KSTAT_DATA_UINT32 },
168 	{ "deficit",		KSTAT_DATA_UINT32 },
169 	{ "clk_intr",		KSTAT_DATA_UINT32 },
170 	{ "vac",		KSTAT_DATA_UINT32 },
171 	{ "nproc",		KSTAT_DATA_UINT32 },
172 	{ "avenrun_1min",	KSTAT_DATA_UINT32 },
173 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
174 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
175 	{ "boot_time",		KSTAT_DATA_UINT32 },
176 	{ "nsec_per_tick",	KSTAT_DATA_UINT32 },
177 };
178 
179 struct {
180 	kstat_named_t physmem;
181 	kstat_named_t nalloc;
182 	kstat_named_t nfree;
183 	kstat_named_t nalloc_calls;
184 	kstat_named_t nfree_calls;
185 	kstat_named_t kernelbase;
186 	kstat_named_t econtig;
187 	kstat_named_t freemem;
188 	kstat_named_t availrmem;
189 	kstat_named_t lotsfree;
190 	kstat_named_t desfree;
191 	kstat_named_t minfree;
192 	kstat_named_t fastscan;
193 	kstat_named_t slowscan;
194 	kstat_named_t nscan;
195 	kstat_named_t desscan;
196 	kstat_named_t pp_kernel;
197 	kstat_named_t pagesfree;
198 	kstat_named_t pageslocked;
199 	kstat_named_t pagestotal;
200 } system_pages_kstat = {
201 	{ "physmem",		KSTAT_DATA_ULONG },
202 	{ "nalloc",		KSTAT_DATA_ULONG },
203 	{ "nfree",		KSTAT_DATA_ULONG },
204 	{ "nalloc_calls",	KSTAT_DATA_ULONG },
205 	{ "nfree_calls",	KSTAT_DATA_ULONG },
206 	{ "kernelbase",		KSTAT_DATA_ULONG },
207 	{ "econtig", 		KSTAT_DATA_ULONG },
208 	{ "freemem", 		KSTAT_DATA_ULONG },
209 	{ "availrmem", 		KSTAT_DATA_ULONG },
210 	{ "lotsfree", 		KSTAT_DATA_ULONG },
211 	{ "desfree", 		KSTAT_DATA_ULONG },
212 	{ "minfree", 		KSTAT_DATA_ULONG },
213 	{ "fastscan", 		KSTAT_DATA_ULONG },
214 	{ "slowscan", 		KSTAT_DATA_ULONG },
215 	{ "nscan", 		KSTAT_DATA_ULONG },
216 	{ "desscan", 		KSTAT_DATA_ULONG },
217 	{ "pp_kernel", 		KSTAT_DATA_ULONG },
218 	{ "pagesfree", 		KSTAT_DATA_ULONG },
219 	{ "pageslocked", 	KSTAT_DATA_ULONG },
220 	{ "pagestotal",		KSTAT_DATA_ULONG },
221 };
222 
223 static int header_kstat_update(kstat_t *, int);
224 static int header_kstat_snapshot(kstat_t *, void *, int);
225 static int system_misc_kstat_update(kstat_t *, int);
226 static int system_pages_kstat_update(kstat_t *, int);
227 
228 static struct {
229 	char	name[KSTAT_STRLEN];
230 	size_t	size;
231 	uint_t	min_ndata;
232 	uint_t	max_ndata;
233 } kstat_data_type[KSTAT_NUM_TYPES] = {
234 	{ "raw",		1,			0,	INT_MAX	},
235 	{ "name=value",		sizeof (kstat_named_t),	0,	INT_MAX	},
236 	{ "interrupt",		sizeof (kstat_intr_t),	1,	1	},
237 	{ "i/o",		sizeof (kstat_io_t),	1,	1	},
238 	{ "event_timer",	sizeof (kstat_timer_t),	0,	INT_MAX	},
239 };
240 
241 int
242 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
243 {
244 	ekstat_t *e = (ekstat_t *)k;
245 	kstat_zone_t *kz;
246 
247 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
248 	for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
249 		if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
250 			return (1);
251 		if (zoneid == kz->zoneid)
252 			return (1);
253 	}
254 	return (0);
255 }
256 
257 void
258 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
259 {
260 	ekstat_t *e = (ekstat_t *)k;
261 	kstat_zone_t *kz, *t = NULL;
262 
263 	mutex_enter(&kstat_chain_lock);
264 	if (zoneid == e->e_zone.zoneid) {
265 		kz = e->e_zone.next;
266 		ASSERT(kz != NULL);
267 		e->e_zone.zoneid = kz->zoneid;
268 		e->e_zone.next = kz->next;
269 		goto out;
270 	}
271 	for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
272 		if (kz->next->zoneid == zoneid) {
273 			t = kz->next;
274 			kz->next = t->next;
275 			break;
276 		}
277 	}
278 	ASSERT(t != NULL);	/* we removed something */
279 	kz = t;
280 out:
281 	kstat_chain_id++;
282 	mutex_exit(&kstat_chain_lock);
283 	kmem_free(kz, sizeof (*kz));
284 }
285 
286 void
287 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
288 {
289 	ekstat_t *e = (ekstat_t *)k;
290 	kstat_zone_t *kz;
291 
292 	kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
293 	if (kz == NULL)
294 		return;
295 	mutex_enter(&kstat_chain_lock);
296 	kz->zoneid = zoneid;
297 	kz->next = e->e_zone.next;
298 	e->e_zone.next = kz;
299 	kstat_chain_id++;
300 	mutex_exit(&kstat_chain_lock);
301 }
302 
303 /*
304  * Compare the list of zones for the given kstats, returning 0 if they match
305  * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
306  * In practice, this is called indirectly by kstat_hold_byname(), so one of the
307  * two lists always has one element, and this is an O(n) operation rather than
308  * O(n^2).
309  */
310 static int
311 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
312 {
313 	kstat_zone_t *kz1, *kz2;
314 
315 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
316 	for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
317 		for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
318 			if (kz1->zoneid == ALL_ZONES ||
319 			    kz2->zoneid == ALL_ZONES)
320 				return (0);
321 			if (kz1->zoneid == kz2->zoneid)
322 				return (0);
323 		}
324 	}
325 	return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
326 }
327 
328 /*
329  * Support for keeping kstats sorted in AVL trees for fast lookups.
330  */
331 static int
332 kstat_compare_bykid(const void *a1, const void *a2)
333 {
334 	const kstat_t *k1 = a1;
335 	const kstat_t *k2 = a2;
336 
337 	if (k1->ks_kid < k2->ks_kid)
338 		return (-1);
339 	if (k1->ks_kid > k2->ks_kid)
340 		return (1);
341 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
342 }
343 
344 static int
345 kstat_compare_byname(const void *a1, const void *a2)
346 {
347 	const kstat_t *k1 = a1;
348 	const kstat_t *k2 = a2;
349 	int s;
350 
351 	s = strcmp(k1->ks_module, k2->ks_module);
352 	if (s > 0)
353 		return (1);
354 	if (s < 0)
355 		return (-1);
356 
357 	if (k1->ks_instance < k2->ks_instance)
358 		return (-1);
359 	if (k1->ks_instance > k2->ks_instance)
360 		return (1);
361 
362 	s = strcmp(k1->ks_name, k2->ks_name);
363 	if (s > 0)
364 		return (1);
365 	if (s < 0)
366 		return (-1);
367 
368 	return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
369 }
370 
371 static kstat_t *
372 kstat_hold(avl_tree_t *t, ekstat_t *template)
373 {
374 	kstat_t *ksp;
375 	ekstat_t *e;
376 
377 	mutex_enter(&kstat_chain_lock);
378 	for (;;) {
379 		ksp = avl_find(t, template, NULL);
380 		if (ksp == NULL)
381 			break;
382 		e = (ekstat_t *)ksp;
383 		if (e->e_owner == NULL) {
384 			e->e_owner = curthread;
385 			break;
386 		}
387 		cv_wait(&e->e_cv, &kstat_chain_lock);
388 	}
389 	mutex_exit(&kstat_chain_lock);
390 	return (ksp);
391 }
392 
393 void
394 kstat_rele(kstat_t *ksp)
395 {
396 	ekstat_t *e = (ekstat_t *)ksp;
397 
398 	mutex_enter(&kstat_chain_lock);
399 	ASSERT(e->e_owner == curthread);
400 	e->e_owner = NULL;
401 	cv_broadcast(&e->e_cv);
402 	mutex_exit(&kstat_chain_lock);
403 }
404 
405 kstat_t *
406 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
407 {
408 	ekstat_t e;
409 
410 	e.e_ks.ks_kid = kid;
411 	e.e_zone.zoneid = zoneid;
412 	e.e_zone.next = NULL;
413 
414 	return (kstat_hold(&kstat_avl_bykid, &e));
415 }
416 
417 kstat_t *
418 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
419     zoneid_t ks_zoneid)
420 {
421 	ekstat_t e;
422 
423 	kstat_set_string(e.e_ks.ks_module, ks_module);
424 	e.e_ks.ks_instance = ks_instance;
425 	kstat_set_string(e.e_ks.ks_name, ks_name);
426 	e.e_zone.zoneid = ks_zoneid;
427 	e.e_zone.next = NULL;
428 	return (kstat_hold(&kstat_avl_byname, &e));
429 }
430 
431 static ekstat_t *
432 kstat_alloc(size_t size)
433 {
434 	ekstat_t *e = NULL;
435 
436 	size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
437 
438 	if (kstat_arena == NULL) {
439 		if (size <= kstat_initial_avail) {
440 			e = kstat_initial_ptr;
441 			kstat_initial_ptr = (char *)kstat_initial_ptr + size;
442 			kstat_initial_avail -= size;
443 		}
444 	} else {
445 		e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
446 	}
447 
448 	if (e != NULL) {
449 		bzero(e, size);
450 		e->e_size = size;
451 		cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
452 	}
453 
454 	return (e);
455 }
456 
457 static void
458 kstat_free(ekstat_t *e)
459 {
460 	cv_destroy(&e->e_cv);
461 	vmem_free(kstat_arena, e, e->e_size);
462 }
463 
464 /*
465  * Create various system kstats.
466  */
467 void
468 kstat_init(void)
469 {
470 	kstat_t *ksp;
471 	ekstat_t *e;
472 	avl_tree_t *t = &kstat_avl_bykid;
473 
474 	/*
475 	 * Set up the kstat vmem arena.
476 	 */
477 	kstat_arena = vmem_create("kstat",
478 	    kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
479 	    segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
480 
481 	/*
482 	 * Make initial kstats appear as though they were allocated.
483 	 */
484 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
485 		(void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
486 		    0, 0, e, (char *)e + e->e_size,
487 		    VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
488 
489 	/*
490 	 * The mother of all kstats.  The first kstat in the system, which
491 	 * always has KID 0, has the headers for all kstats (including itself)
492 	 * as its data.  Thus, the kstat driver does not need any special
493 	 * interface to extract the kstat chain.
494 	 */
495 	kstat_chain_id = 0;
496 	ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
497 	    0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
498 	if (ksp) {
499 		ksp->ks_lock = &kstat_chain_lock;
500 		ksp->ks_update = header_kstat_update;
501 		ksp->ks_snapshot = header_kstat_snapshot;
502 		kstat_install(ksp);
503 	} else {
504 		panic("cannot create kstat 'kstat_headers'");
505 	}
506 
507 	ksp = kstat_create("unix", 0, "kstat_types", "kstat",
508 	    KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
509 	if (ksp) {
510 		int i;
511 		kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
512 
513 		for (i = 0; i < KSTAT_NUM_TYPES; i++) {
514 			kstat_named_init(&kn[i], kstat_data_type[i].name,
515 			    KSTAT_DATA_ULONG);
516 			kn[i].value.ul = i;
517 		}
518 		kstat_install(ksp);
519 	}
520 
521 	ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
522 	    sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
523 	if (ksp) {
524 		ksp->ks_data = (void *) &sysinfo;
525 		kstat_install(ksp);
526 	}
527 
528 	ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
529 	    sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
530 	if (ksp) {
531 		ksp->ks_data = (void *) &vminfo;
532 		kstat_install(ksp);
533 	}
534 
535 	ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
536 	    segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
537 	if (ksp) {
538 		ksp->ks_data = (void *) segmapcnt_ptr;
539 		ksp->ks_update = segmap_kstat_update;
540 		kstat_install(ksp);
541 	}
542 
543 	ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
544 	    biostats_ndata, KSTAT_FLAG_VIRTUAL);
545 	if (ksp) {
546 		ksp->ks_data = (void *) biostats_ptr;
547 		kstat_install(ksp);
548 	}
549 
550 	ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
551 	    sizeof (struct var), KSTAT_FLAG_VIRTUAL);
552 	if (ksp) {
553 		ksp->ks_data = (void *) &v;
554 		kstat_install(ksp);
555 	}
556 
557 	ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
558 	    sizeof (system_misc_kstat) / sizeof (kstat_named_t),
559 	    KSTAT_FLAG_VIRTUAL);
560 	if (ksp) {
561 		ksp->ks_data = (void *) &system_misc_kstat;
562 		ksp->ks_update = system_misc_kstat_update;
563 		kstat_install(ksp);
564 	}
565 
566 	ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
567 	    sizeof (system_pages_kstat) / sizeof (kstat_named_t),
568 	    KSTAT_FLAG_VIRTUAL);
569 	if (ksp) {
570 		ksp->ks_data = (void *) &system_pages_kstat;
571 		ksp->ks_update = system_pages_kstat_update;
572 		kstat_install(ksp);
573 	}
574 
575 	ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
576 	    pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
577 
578 	if (ksp) {
579 		ksp->ks_data = pollstats_ptr;
580 		kstat_install(ksp);
581 	}
582 }
583 
584 /*
585  * Caller of this should ensure that the string pointed by src
586  * doesn't change while kstat's lock is held. Not doing so defeats
587  * kstat's snapshot strategy as explained in <sys/kstat.h>
588  */
589 void
590 kstat_named_setstr(kstat_named_t *knp, const char *src)
591 {
592 	if (knp->data_type != KSTAT_DATA_STRING)
593 		panic("kstat_named_setstr('%p', '%p'): "
594 		    "named kstat is not of type KSTAT_DATA_STRING",
595 		    (void *)knp, (void *)src);
596 
597 	KSTAT_NAMED_STR_PTR(knp) = (char *)src;
598 	if (src != NULL)
599 		KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
600 	else
601 		KSTAT_NAMED_STR_BUFLEN(knp) = 0;
602 }
603 
604 void
605 kstat_set_string(char *dst, const char *src)
606 {
607 	bzero(dst, KSTAT_STRLEN);
608 	(void) strncpy(dst, src, KSTAT_STRLEN - 1);
609 }
610 
611 void
612 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
613 {
614 	kstat_set_string(knp->name, name);
615 	knp->data_type = data_type;
616 
617 	if (data_type == KSTAT_DATA_STRING)
618 		kstat_named_setstr(knp, NULL);
619 }
620 
621 void
622 kstat_timer_init(kstat_timer_t *ktp, const char *name)
623 {
624 	kstat_set_string(ktp->name, name);
625 }
626 
627 /* ARGSUSED */
628 static int
629 default_kstat_update(kstat_t *ksp, int rw)
630 {
631 	uint_t i;
632 	size_t len = 0;
633 	kstat_named_t *knp;
634 
635 	/*
636 	 * Named kstats with variable-length long strings have a standard
637 	 * way of determining how much space is needed to hold the snapshot:
638 	 */
639 	if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
640 	    (ksp->ks_flags & KSTAT_FLAG_VAR_SIZE)) {
641 
642 		/*
643 		 * Add in the space required for the strings
644 		 */
645 		knp = KSTAT_NAMED_PTR(ksp);
646 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
647 			if (knp->data_type == KSTAT_DATA_STRING)
648 				len += KSTAT_NAMED_STR_BUFLEN(knp);
649 		}
650 		ksp->ks_data_size =
651 		    ksp->ks_ndata * sizeof (kstat_named_t) + len;
652 	}
653 	return (0);
654 }
655 
656 static int
657 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
658 {
659 	kstat_io_t *kiop;
660 	hrtime_t cur_time;
661 	size_t	namedsz;
662 
663 	ksp->ks_snaptime = cur_time = gethrtime();
664 
665 	if (rw == KSTAT_WRITE) {
666 		if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
667 			return (EACCES);
668 		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
669 		return (0);
670 	}
671 
672 	/*
673 	 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
674 	 * number of kstat_named_t structures, followed by an optional
675 	 * string segment. The ks_data generally holds only the
676 	 * kstat_named_t structures. So we copy it first. The strings,
677 	 * if any, are copied below. For other kstat types, ks_data holds the
678 	 * entire buffer.
679 	 */
680 
681 	namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
682 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
683 		bcopy(ksp->ks_data, buf, namedsz);
684 	else
685 		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
686 
687 	/*
688 	 * Apply kstat type-specific data massaging
689 	 */
690 	switch (ksp->ks_type) {
691 
692 	case KSTAT_TYPE_IO:
693 		/*
694 		 * Normalize time units and deal with incomplete transactions
695 		 */
696 		kiop = (kstat_io_t *)buf;
697 
698 		scalehrtime(&kiop->wtime);
699 		scalehrtime(&kiop->wlentime);
700 		scalehrtime(&kiop->wlastupdate);
701 		scalehrtime(&kiop->rtime);
702 		scalehrtime(&kiop->rlentime);
703 		scalehrtime(&kiop->rlastupdate);
704 
705 		if (kiop->wcnt != 0) {
706 			/* like kstat_waitq_exit */
707 			hrtime_t wfix = cur_time - kiop->wlastupdate;
708 			kiop->wlastupdate = cur_time;
709 			kiop->wlentime += kiop->wcnt * wfix;
710 			kiop->wtime += wfix;
711 		}
712 
713 		if (kiop->rcnt != 0) {
714 			/* like kstat_runq_exit */
715 			hrtime_t rfix = cur_time - kiop->rlastupdate;
716 			kiop->rlastupdate = cur_time;
717 			kiop->rlentime += kiop->rcnt * rfix;
718 			kiop->rtime += rfix;
719 		}
720 		break;
721 
722 	case KSTAT_TYPE_NAMED:
723 		/*
724 		 * Massage any long strings in at the end of the buffer
725 		 */
726 		if (ksp->ks_data_size > namedsz) {
727 			uint_t i;
728 			kstat_named_t *knp = buf;
729 			char *dst = (char *)(knp + ksp->ks_ndata);
730 			/*
731 			 * Copy strings and update pointers
732 			 */
733 			for (i = 0; i < ksp->ks_ndata; i++, knp++) {
734 				if (knp->data_type == KSTAT_DATA_STRING &&
735 				    KSTAT_NAMED_STR_PTR(knp) != NULL) {
736 					bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
737 					    KSTAT_NAMED_STR_BUFLEN(knp));
738 					KSTAT_NAMED_STR_PTR(knp) = dst;
739 					dst += KSTAT_NAMED_STR_BUFLEN(knp);
740 				}
741 			}
742 			ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
743 		}
744 		break;
745 	}
746 	return (0);
747 }
748 
749 static int
750 header_kstat_update(kstat_t *header_ksp, int rw)
751 {
752 	int nkstats = 0;
753 	ekstat_t *e;
754 	avl_tree_t *t = &kstat_avl_bykid;
755 	zoneid_t zoneid;
756 
757 	if (rw == KSTAT_WRITE)
758 		return (EACCES);
759 
760 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
761 
762 	zoneid = getzoneid();
763 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
764 		if (kstat_zone_find((kstat_t *)e, zoneid) &&
765 		    (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
766 			nkstats++;
767 		}
768 	}
769 	header_ksp->ks_ndata = nkstats;
770 	header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
771 	return (0);
772 }
773 
774 /*
775  * Copy out the data section of kstat 0, which consists of the list
776  * of all kstat headers.  By specification, these headers must be
777  * copied out in order of increasing KID.
778  */
779 static int
780 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
781 {
782 	ekstat_t *e;
783 	avl_tree_t *t = &kstat_avl_bykid;
784 	zoneid_t zoneid;
785 
786 	header_ksp->ks_snaptime = gethrtime();
787 
788 	if (rw == KSTAT_WRITE)
789 		return (EACCES);
790 
791 	ASSERT(MUTEX_HELD(&kstat_chain_lock));
792 
793 	zoneid = getzoneid();
794 	for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
795 		if (kstat_zone_find((kstat_t *)e, zoneid) &&
796 		    (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
797 			bcopy(&e->e_ks, buf, sizeof (kstat_t));
798 			buf = (char *)buf + sizeof (kstat_t);
799 		}
800 	}
801 
802 	return (0);
803 }
804 
805 /* ARGSUSED */
806 static int
807 system_misc_kstat_update(kstat_t *ksp, int rw)
808 {
809 	int myncpus = ncpus;
810 	int *loadavgp = &avenrun[0];
811 	time_t zone_boot_time;
812 	clock_t zone_lbolt;
813 	hrtime_t zone_hrtime;
814 	size_t zone_nproc;
815 
816 	if (rw == KSTAT_WRITE)
817 		return (EACCES);
818 
819 	if (!INGLOBALZONE(curproc)) {
820 		/*
821 		 * Here we grab cpu_lock which is OK as long as no-one in the
822 		 * future attempts to lookup this particular kstat
823 		 * (unix:0:system_misc) while holding cpu_lock.
824 		 */
825 		mutex_enter(&cpu_lock);
826 		if (pool_pset_enabled()) {
827 			myncpus = zone_ncpus_get(curproc->p_zone);
828 			ASSERT(myncpus > 0);
829 		}
830 		mutex_exit(&cpu_lock);
831 		loadavgp = &curproc->p_zone->zone_avenrun[0];
832 	}
833 
834 	if (INGLOBALZONE(curproc)) {
835 		zone_boot_time = boot_time;
836 		zone_lbolt = ddi_get_lbolt();
837 		zone_nproc = nproc;
838 	} else {
839 		zone_boot_time = curproc->p_zone->zone_boot_time;
840 
841 		zone_hrtime = gethrtime();
842 		zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
843 		    NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
844 		mutex_enter(&curproc->p_zone->zone_nlwps_lock);
845 		zone_nproc = curproc->p_zone->zone_nprocs;
846 		mutex_exit(&curproc->p_zone->zone_nlwps_lock);
847 	}
848 
849 	system_misc_kstat.ncpus.value.ui32		= (uint32_t)myncpus;
850 	system_misc_kstat.lbolt.value.ui32		= (uint32_t)zone_lbolt;
851 	system_misc_kstat.deficit.value.ui32		= (uint32_t)deficit;
852 	system_misc_kstat.clk_intr.value.ui32		= (uint32_t)zone_lbolt;
853 	system_misc_kstat.vac.value.ui32		= (uint32_t)vac;
854 	system_misc_kstat.nproc.value.ui32		= (uint32_t)zone_nproc;
855 	system_misc_kstat.avenrun_1min.value.ui32	= (uint32_t)loadavgp[0];
856 	system_misc_kstat.avenrun_5min.value.ui32	= (uint32_t)loadavgp[1];
857 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
858 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)
859 	    zone_boot_time;
860 	system_misc_kstat.nsec_per_tick.value.ui32	= (uint32_t)
861 	    nsec_per_tick;
862 	return (0);
863 }
864 
865 #ifdef	__sparc
866 extern caddr_t	econtig32;
867 #else	/* !__sparc */
868 extern caddr_t	econtig;
869 #endif	/* __sparc */
870 
871 /* ARGSUSED */
872 static int
873 system_pages_kstat_update(kstat_t *ksp, int rw)
874 {
875 	kobj_stat_t kobj_stat;
876 
877 	if (rw == KSTAT_WRITE) {
878 		return (EACCES);
879 	}
880 
881 	kobj_stat_get(&kobj_stat);
882 	system_pages_kstat.physmem.value.ul	= (ulong_t)physmem;
883 	system_pages_kstat.nalloc.value.ul	= kobj_stat.nalloc;
884 	system_pages_kstat.nfree.value.ul	= kobj_stat.nfree;
885 	system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
886 	system_pages_kstat.nfree_calls.value.ul	= kobj_stat.nfree_calls;
887 	system_pages_kstat.kernelbase.value.ul	= (ulong_t)KERNELBASE;
888 
889 #ifdef	__sparc
890 	/*
891 	 * kstat should REALLY be modified to also report kmem64_base and
892 	 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
893 	 * [ kernelbase .. econtig ] no longer is truly reflective of the
894 	 * kernel's vallocs...
895 	 */
896 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig32;
897 #else	/* !__sparc */
898 	system_pages_kstat.econtig.value.ul	= (ulong_t)econtig;
899 #endif	/* __sparc */
900 
901 	system_pages_kstat.freemem.value.ul	= (ulong_t)freemem;
902 	system_pages_kstat.availrmem.value.ul	= (ulong_t)availrmem;
903 	system_pages_kstat.lotsfree.value.ul	= (ulong_t)lotsfree;
904 	system_pages_kstat.desfree.value.ul	= (ulong_t)desfree;
905 	system_pages_kstat.minfree.value.ul	= (ulong_t)minfree;
906 	system_pages_kstat.fastscan.value.ul	= (ulong_t)fastscan;
907 	system_pages_kstat.slowscan.value.ul	= (ulong_t)slowscan;
908 	system_pages_kstat.nscan.value.ul	= (ulong_t)nscan;
909 	system_pages_kstat.desscan.value.ul	= (ulong_t)desscan;
910 	system_pages_kstat.pagesfree.value.ul	= (ulong_t)freemem;
911 	system_pages_kstat.pageslocked.value.ul	= (ulong_t)(availrmem_initial -
912 	    availrmem);
913 	system_pages_kstat.pagestotal.value.ul	= (ulong_t)total_pages;
914 	/*
915 	 * pp_kernel represents total pages used by the kernel since the
916 	 * startup. This formula takes into account the boottime kernel
917 	 * footprint and also considers the availrmem changes because of
918 	 * user explicit page locking.
919 	 */
920 	system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
921 	    obp_pages - availrmem - k_anoninfo.ani_mem_resv -
922 	    anon_segkp_pages_locked - pages_locked -
923 	    pages_claimed - pages_useclaim);
924 
925 	return (0);
926 }
927 
928 kstat_t *
929 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
930     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
931 {
932 	return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
933 	    ks_type, ks_ndata, ks_flags, ALL_ZONES));
934 }
935 
936 /*
937  * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
938  * the specified name exists, reactivate it.  Returns a pointer to the kstat
939  * on success, NULL on failure.  The kstat will not be visible to the
940  * kstat driver until kstat_install().
941  */
942 kstat_t *
943 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
944     const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
945     zoneid_t ks_zoneid)
946 {
947 	size_t ks_data_size;
948 	kstat_t *ksp;
949 	ekstat_t *e;
950 	avl_index_t where;
951 	char namebuf[KSTAT_STRLEN + 16];
952 
953 	if (avl_numnodes(&kstat_avl_bykid) == 0) {
954 		avl_create(&kstat_avl_bykid, kstat_compare_bykid,
955 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
956 
957 		avl_create(&kstat_avl_byname, kstat_compare_byname,
958 		    sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
959 	}
960 
961 	/*
962 	 * If ks_name == NULL, set the ks_name to <module><instance>.
963 	 */
964 	if (ks_name == NULL) {
965 		char buf[KSTAT_STRLEN];
966 		kstat_set_string(buf, ks_module);
967 		(void) sprintf(namebuf, "%s%d", buf, ks_instance);
968 		ks_name = namebuf;
969 	}
970 
971 	/*
972 	 * Make sure it's a valid kstat data type
973 	 */
974 	if (ks_type >= KSTAT_NUM_TYPES) {
975 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
976 		    "invalid kstat type %d",
977 		    ks_module, ks_instance, ks_name, ks_type);
978 		return (NULL);
979 	}
980 
981 	/*
982 	 * Don't allow persistent virtual kstats -- it makes no sense.
983 	 * ks_data points to garbage when the client goes away.
984 	 */
985 	if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
986 	    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
987 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
988 		    "cannot create persistent virtual kstat",
989 		    ks_module, ks_instance, ks_name);
990 		return (NULL);
991 	}
992 
993 	/*
994 	 * Don't allow variable-size physical kstats, since the framework's
995 	 * memory allocation for physical kstat data is fixed at creation time.
996 	 */
997 	if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
998 	    !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
999 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1000 		    "cannot create variable-size physical kstat",
1001 		    ks_module, ks_instance, ks_name);
1002 		return (NULL);
1003 	}
1004 
1005 	/*
1006 	 * Make sure the number of data fields is within legal range
1007 	 */
1008 	if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1009 	    ks_ndata > kstat_data_type[ks_type].max_ndata) {
1010 		cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1011 		    "ks_ndata=%d out of range [%d, %d]",
1012 		    ks_module, ks_instance, ks_name, (int)ks_ndata,
1013 		    kstat_data_type[ks_type].min_ndata,
1014 		    kstat_data_type[ks_type].max_ndata);
1015 		return (NULL);
1016 	}
1017 
1018 	ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1019 
1020 	/*
1021 	 * If the named kstat already exists and is dormant, reactivate it.
1022 	 */
1023 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1024 	if (ksp != NULL) {
1025 		if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1026 			/*
1027 			 * The named kstat exists but is not dormant --
1028 			 * this is a kstat namespace collision.
1029 			 */
1030 			kstat_rele(ksp);
1031 			cmn_err(CE_WARN,
1032 			    "kstat_create('%s', %d, '%s'): namespace collision",
1033 			    ks_module, ks_instance, ks_name);
1034 			return (NULL);
1035 		}
1036 		if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1037 		    (ksp->ks_type != ks_type) ||
1038 		    (ksp->ks_ndata != ks_ndata) ||
1039 		    (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1040 			/*
1041 			 * The name is the same, but the other key parameters
1042 			 * differ from those of the dormant kstat -- bogus.
1043 			 */
1044 			kstat_rele(ksp);
1045 			cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1046 			    "invalid reactivation of dormant kstat",
1047 			    ks_module, ks_instance, ks_name);
1048 			return (NULL);
1049 		}
1050 		/*
1051 		 * Return dormant kstat pointer to caller.  As usual,
1052 		 * the kstat is marked invalid until kstat_install().
1053 		 */
1054 		ksp->ks_flags |= KSTAT_FLAG_INVALID;
1055 		kstat_rele(ksp);
1056 		return (ksp);
1057 	}
1058 
1059 	/*
1060 	 * Allocate memory for the new kstat header and, if this is a physical
1061 	 * kstat, the data section.
1062 	 */
1063 	e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1064 	if (e == NULL) {
1065 		cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1066 		    "insufficient kernel memory",
1067 		    ks_module, ks_instance, ks_name);
1068 		return (NULL);
1069 	}
1070 
1071 	/*
1072 	 * Initialize as many fields as we can.  The caller may reset
1073 	 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1074 	 * Creators of virtual kstats may also reset ks_data.  It is
1075 	 * also up to the caller to initialize the kstat data section,
1076 	 * if necessary.  All initialization must be complete before
1077 	 * calling kstat_install().
1078 	 */
1079 	e->e_zone.zoneid = ks_zoneid;
1080 	e->e_zone.next = NULL;
1081 
1082 	ksp = &e->e_ks;
1083 	ksp->ks_crtime		= gethrtime();
1084 	kstat_set_string(ksp->ks_module, ks_module);
1085 	ksp->ks_instance	= ks_instance;
1086 	kstat_set_string(ksp->ks_name, ks_name);
1087 	ksp->ks_type		= ks_type;
1088 	kstat_set_string(ksp->ks_class, ks_class);
1089 	ksp->ks_flags		= ks_flags | KSTAT_FLAG_INVALID;
1090 	if (ks_flags & KSTAT_FLAG_VIRTUAL)
1091 		ksp->ks_data	= NULL;
1092 	else
1093 		ksp->ks_data	= (void *)(e + 1);
1094 	ksp->ks_ndata		= ks_ndata;
1095 	ksp->ks_data_size	= ks_data_size;
1096 	ksp->ks_snaptime	= ksp->ks_crtime;
1097 	ksp->ks_update		= default_kstat_update;
1098 	ksp->ks_private		= NULL;
1099 	ksp->ks_snapshot	= default_kstat_snapshot;
1100 	ksp->ks_lock		= NULL;
1101 
1102 	mutex_enter(&kstat_chain_lock);
1103 
1104 	/*
1105 	 * Add our kstat to the AVL trees.
1106 	 */
1107 	if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1108 		mutex_exit(&kstat_chain_lock);
1109 		cmn_err(CE_WARN,
1110 		    "kstat_create('%s', %d, '%s'): namespace collision",
1111 		    ks_module, ks_instance, ks_name);
1112 		kstat_free(e);
1113 		return (NULL);
1114 	}
1115 	avl_insert(&kstat_avl_byname, e, where);
1116 
1117 	/*
1118 	 * Loop around until we find an unused KID.
1119 	 */
1120 	do {
1121 		ksp->ks_kid = kstat_chain_id++;
1122 	} while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1123 	avl_insert(&kstat_avl_bykid, e, where);
1124 
1125 	mutex_exit(&kstat_chain_lock);
1126 
1127 	return (ksp);
1128 }
1129 
1130 /*
1131  * Activate a fully initialized kstat and make it visible to /dev/kstat.
1132  */
1133 void
1134 kstat_install(kstat_t *ksp)
1135 {
1136 	zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1137 
1138 	/*
1139 	 * If this is a variable-size kstat, it MUST provide kstat data locking
1140 	 * to prevent data-size races with kstat readers.
1141 	 */
1142 	if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1143 		panic("kstat_install('%s', %d, '%s'): "
1144 		    "cannot create variable-size kstat without data lock",
1145 		    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1146 	}
1147 
1148 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1149 		cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1150 		    (void *)ksp);
1151 		return;
1152 	}
1153 
1154 	if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1155 		int has_long_strings = 0;
1156 		uint_t i;
1157 		kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1158 
1159 		for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1160 			if (knp->data_type == KSTAT_DATA_STRING) {
1161 				has_long_strings = 1;
1162 				break;
1163 			}
1164 		}
1165 		/*
1166 		 * It is an error for a named kstat with fields of
1167 		 * KSTAT_DATA_STRING to be non-virtual.
1168 		 */
1169 		if (has_long_strings && !(ksp->ks_flags & KSTAT_FLAG_VIRTUAL)) {
1170 			panic("kstat_install('%s', %d, '%s'): "
1171 			    "named kstat containing KSTAT_DATA_STRING "
1172 			    "is not virtual",
1173 			    ksp->ks_module, ksp->ks_instance,
1174 			    ksp->ks_name);
1175 		}
1176 		/*
1177 		 * The default snapshot routine does not handle KSTAT_WRITE
1178 		 * for long strings.
1179 		 */
1180 		if (has_long_strings && (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1181 		    (ksp->ks_snapshot == default_kstat_snapshot)) {
1182 			panic("kstat_install('%s', %d, '%s'): "
1183 			    "named kstat containing KSTAT_DATA_STRING "
1184 			    "is writable but uses default snapshot routine",
1185 			    ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1186 		}
1187 	}
1188 
1189 	if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1190 
1191 		/*
1192 		 * We are reactivating a dormant kstat.  Initialize the
1193 		 * caller's underlying data to the value it had when the
1194 		 * kstat went dormant, and mark the kstat as active.
1195 		 * Grab the provider's kstat lock if it's not already held.
1196 		 */
1197 		kmutex_t *lp = ksp->ks_lock;
1198 		if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1199 			mutex_enter(lp);
1200 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1201 			mutex_exit(lp);
1202 		} else {
1203 			(void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1204 		}
1205 		ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1206 	}
1207 
1208 	/*
1209 	 * Now that the kstat is active, make it visible to the kstat driver.
1210 	 * When copying out kstats the count is determined in
1211 	 * header_kstat_update() and actually copied into kbuf in
1212 	 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1213 	 * calls to ensure that this list doesn't change. Thus, we need to
1214 	 * also take the lock to ensure that the we don't copy the new kstat
1215 	 * in the 2nd pass and overrun the buf.
1216 	 */
1217 	mutex_enter(&kstat_chain_lock);
1218 	ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1219 	mutex_exit(&kstat_chain_lock);
1220 	kstat_rele(ksp);
1221 }
1222 
1223 /*
1224  * Remove a kstat from the system.  Or, if it's a persistent kstat,
1225  * just update the data and mark it as dormant.
1226  */
1227 void
1228 kstat_delete(kstat_t *ksp)
1229 {
1230 	kmutex_t *lp;
1231 	ekstat_t *e = (ekstat_t *)ksp;
1232 	zoneid_t zoneid;
1233 	kstat_zone_t *kz;
1234 
1235 	ASSERT(ksp != NULL);
1236 
1237 	if (ksp == NULL)
1238 		return;
1239 
1240 	zoneid = e->e_zone.zoneid;
1241 
1242 	lp = ksp->ks_lock;
1243 
1244 	if (lp != NULL && MUTEX_HELD(lp)) {
1245 		panic("kstat_delete(%p): caller holds data lock %p",
1246 		    (void *)ksp, (void *)lp);
1247 	}
1248 
1249 	if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1250 		cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1251 		    (void *)ksp);
1252 		return;
1253 	}
1254 
1255 	if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1256 		/*
1257 		 * Update the data one last time, so that all activity
1258 		 * prior to going dormant has been accounted for.
1259 		 */
1260 		KSTAT_ENTER(ksp);
1261 		(void) KSTAT_UPDATE(ksp, KSTAT_READ);
1262 		KSTAT_EXIT(ksp);
1263 
1264 		/*
1265 		 * Mark the kstat as dormant and restore caller-modifiable
1266 		 * fields to default values, so the kstat is readable during
1267 		 * the dormant phase.
1268 		 */
1269 		ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1270 		ksp->ks_lock = NULL;
1271 		ksp->ks_update = default_kstat_update;
1272 		ksp->ks_private = NULL;
1273 		ksp->ks_snapshot = default_kstat_snapshot;
1274 		kstat_rele(ksp);
1275 		return;
1276 	}
1277 
1278 	/*
1279 	 * Remove the kstat from the framework's AVL trees,
1280 	 * free the allocated memory, and increment kstat_chain_id so
1281 	 * /dev/kstat clients can detect the event.
1282 	 */
1283 	mutex_enter(&kstat_chain_lock);
1284 	avl_remove(&kstat_avl_bykid, e);
1285 	avl_remove(&kstat_avl_byname, e);
1286 	kstat_chain_id++;
1287 	mutex_exit(&kstat_chain_lock);
1288 
1289 	kz = e->e_zone.next;
1290 	while (kz != NULL) {
1291 		kstat_zone_t *t = kz;
1292 
1293 		kz = kz->next;
1294 		kmem_free(t, sizeof (*t));
1295 	}
1296 	kstat_rele(ksp);
1297 	kstat_free(e);
1298 }
1299 
1300 void
1301 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1302     const char *ks_name, zoneid_t ks_zoneid)
1303 {
1304 	kstat_t *ksp;
1305 
1306 	ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1307 	if (ksp != NULL) {
1308 		kstat_rele(ksp);
1309 		kstat_delete(ksp);
1310 	}
1311 }
1312 
1313 void
1314 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1315 {
1316 	kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1317 }
1318 
1319 /*
1320  * The sparc V9 versions of these routines can be much cheaper than
1321  * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1322  * For simplicity, however, we always feed the C versions to lint.
1323  */
1324 #if !defined(__sparc) || defined(lint) || defined(__lint)
1325 
1326 void
1327 kstat_waitq_enter(kstat_io_t *kiop)
1328 {
1329 	hrtime_t new, delta;
1330 	ulong_t wcnt;
1331 
1332 	new = gethrtime_unscaled();
1333 	delta = new - kiop->wlastupdate;
1334 	kiop->wlastupdate = new;
1335 	wcnt = kiop->wcnt++;
1336 	if (wcnt != 0) {
1337 		kiop->wlentime += delta * wcnt;
1338 		kiop->wtime += delta;
1339 	}
1340 }
1341 
1342 void
1343 kstat_waitq_exit(kstat_io_t *kiop)
1344 {
1345 	hrtime_t new, delta;
1346 	ulong_t wcnt;
1347 
1348 	new = gethrtime_unscaled();
1349 	delta = new - kiop->wlastupdate;
1350 	kiop->wlastupdate = new;
1351 	wcnt = kiop->wcnt--;
1352 	ASSERT((int)wcnt > 0);
1353 	kiop->wlentime += delta * wcnt;
1354 	kiop->wtime += delta;
1355 }
1356 
1357 void
1358 kstat_runq_enter(kstat_io_t *kiop)
1359 {
1360 	hrtime_t new, delta;
1361 	ulong_t rcnt;
1362 
1363 	new = gethrtime_unscaled();
1364 	delta = new - kiop->rlastupdate;
1365 	kiop->rlastupdate = new;
1366 	rcnt = kiop->rcnt++;
1367 	if (rcnt != 0) {
1368 		kiop->rlentime += delta * rcnt;
1369 		kiop->rtime += delta;
1370 	}
1371 }
1372 
1373 void
1374 kstat_runq_exit(kstat_io_t *kiop)
1375 {
1376 	hrtime_t new, delta;
1377 	ulong_t rcnt;
1378 
1379 	new = gethrtime_unscaled();
1380 	delta = new - kiop->rlastupdate;
1381 	kiop->rlastupdate = new;
1382 	rcnt = kiop->rcnt--;
1383 	ASSERT((int)rcnt > 0);
1384 	kiop->rlentime += delta * rcnt;
1385 	kiop->rtime += delta;
1386 }
1387 
1388 void
1389 kstat_waitq_to_runq(kstat_io_t *kiop)
1390 {
1391 	hrtime_t new, delta;
1392 	ulong_t wcnt, rcnt;
1393 
1394 	new = gethrtime_unscaled();
1395 
1396 	delta = new - kiop->wlastupdate;
1397 	kiop->wlastupdate = new;
1398 	wcnt = kiop->wcnt--;
1399 	ASSERT((int)wcnt > 0);
1400 	kiop->wlentime += delta * wcnt;
1401 	kiop->wtime += delta;
1402 
1403 	delta = new - kiop->rlastupdate;
1404 	kiop->rlastupdate = new;
1405 	rcnt = kiop->rcnt++;
1406 	if (rcnt != 0) {
1407 		kiop->rlentime += delta * rcnt;
1408 		kiop->rtime += delta;
1409 	}
1410 }
1411 
1412 void
1413 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1414 {
1415 	hrtime_t new, delta;
1416 	ulong_t wcnt, rcnt;
1417 
1418 	new = gethrtime_unscaled();
1419 
1420 	delta = new - kiop->rlastupdate;
1421 	kiop->rlastupdate = new;
1422 	rcnt = kiop->rcnt--;
1423 	ASSERT((int)rcnt > 0);
1424 	kiop->rlentime += delta * rcnt;
1425 	kiop->rtime += delta;
1426 
1427 	delta = new - kiop->wlastupdate;
1428 	kiop->wlastupdate = new;
1429 	wcnt = kiop->wcnt++;
1430 	if (wcnt != 0) {
1431 		kiop->wlentime += delta * wcnt;
1432 		kiop->wtime += delta;
1433 	}
1434 }
1435 
1436 #endif
1437 
1438 void
1439 kstat_timer_start(kstat_timer_t *ktp)
1440 {
1441 	ktp->start_time = gethrtime();
1442 }
1443 
1444 void
1445 kstat_timer_stop(kstat_timer_t *ktp)
1446 {
1447 	hrtime_t	etime;
1448 	u_longlong_t	num_events;
1449 
1450 	ktp->stop_time = etime = gethrtime();
1451 	etime -= ktp->start_time;
1452 	num_events = ktp->num_events;
1453 	if (etime < ktp->min_time || num_events == 0)
1454 		ktp->min_time = etime;
1455 	if (etime > ktp->max_time)
1456 		ktp->max_time = etime;
1457 	ktp->elapsed_time += etime;
1458 	ktp->num_events = num_events + 1;
1459 }
1460