1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014, Joyent, Inc. All rights reserved.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /*
28 * Kernel statistics framework
29 */
30
31 #include <sys/types.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/vmsystm.h>
35 #include <sys/t_lock.h>
36 #include <sys/param.h>
37 #include <sys/errno.h>
38 #include <sys/vmem.h>
39 #include <sys/sysmacros.h>
40 #include <sys/cmn_err.h>
41 #include <sys/kstat.h>
42 #include <sys/sysinfo.h>
43 #include <sys/cpuvar.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/dnlc.h>
49 #include <sys/var.h>
50 #include <sys/debug.h>
51 #include <sys/kobj.h>
52 #include <sys/avl.h>
53 #include <sys/pool_pset.h>
54 #include <sys/cpupart.h>
55 #include <sys/zone.h>
56 #include <sys/loadavg.h>
57 #include <vm/page.h>
58 #include <vm/anon.h>
59 #include <vm/seg_kmem.h>
60
61 /*
62 * Global lock to protect the AVL trees and kstat_chain_id.
63 */
64 static kmutex_t kstat_chain_lock;
65
66 /*
67 * Every install/delete kstat bumps kstat_chain_id. This is used by:
68 *
69 * (1) /dev/kstat, to detect changes in the kstat chain across ioctls;
70 *
71 * (2) kstat_create(), to assign a KID (kstat ID) to each new kstat.
72 * /dev/kstat uses the KID as a cookie for kstat lookups.
73 *
74 * We reserve the first two IDs because some kstats are created before
75 * the well-known ones (kstat_headers = 0, kstat_types = 1).
76 *
77 * We also bump the kstat_chain_id if a zone is gaining or losing visibility
78 * into a particular kstat, which is logically equivalent to a kstat being
79 * installed/deleted.
80 */
81
82 kid_t kstat_chain_id = 2;
83
84 /*
85 * As far as zones are concerned, there are 3 types of kstat:
86 *
87 * 1) Those which have a well-known name, and which should return per-zone data
88 * depending on which zone is doing the kstat_read(). sockfs:0:sock_unix_list
89 * is an example of this type of kstat.
90 *
91 * 2) Those which should only be exported to a particular list of zones.
92 * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
93 * able to see NFS mounts associated with zone B, while we want the
94 * global zone to be able to see all mounts on the system.
95 *
96 * 3) Those that can be exported to all zones. Most system-related
97 * kstats fall within this category.
98 *
99 * An ekstat_t thus contains a list of kstats that the zone is to be
100 * exported to. The lookup of a name:instance:module thus translates to a
101 * lookup of name:instance:module:myzone; if the kstat is not exported
102 * to all zones, and does not have the caller's zoneid explicitly
103 * enumerated in the list of zones to be exported to, it is the same as
104 * if the kstat didn't exist.
105 *
106 * Writing to kstats is currently disallowed from within a non-global
107 * zone, although this restriction could be removed in the future.
108 */
109 typedef struct kstat_zone {
110 zoneid_t zoneid;
111 struct kstat_zone *next;
112 } kstat_zone_t;
113
114 /*
115 * Extended kstat structure -- for internal use only.
116 */
117 typedef struct ekstat {
118 kstat_t e_ks; /* the kstat itself */
119 size_t e_size; /* total allocation size */
120 kthread_t *e_owner; /* thread holding this kstat */
121 kcondvar_t e_cv; /* wait for owner == NULL */
122 avl_node_t e_avl_bykid; /* AVL tree to sort by KID */
123 avl_node_t e_avl_byname; /* AVL tree to sort by name */
124 kstat_zone_t e_zone; /* zone to export stats to */
125 } ekstat_t;
126
127 static uint64_t kstat_initial[8192];
128 static void *kstat_initial_ptr = kstat_initial;
129 static size_t kstat_initial_avail = sizeof (kstat_initial);
130 static vmem_t *kstat_arena;
131
132 #define KSTAT_ALIGN (sizeof (uint64_t))
133
134 static avl_tree_t kstat_avl_bykid;
135 static avl_tree_t kstat_avl_byname;
136
137 /*
138 * Various pointers we need to create kstats at boot time in kstat_init()
139 */
140 extern kstat_named_t *segmapcnt_ptr;
141 extern uint_t segmapcnt_ndata;
142 extern int segmap_kstat_update(kstat_t *, int);
143 extern kstat_named_t *biostats_ptr;
144 extern uint_t biostats_ndata;
145 extern kstat_named_t *pollstats_ptr;
146 extern uint_t pollstats_ndata;
147
148 extern int vac;
149 extern uint_t nproc;
150 extern time_t boot_time;
151 extern sysinfo_t sysinfo;
152 extern vminfo_t vminfo;
153
154 struct {
155 kstat_named_t ncpus;
156 kstat_named_t lbolt;
157 kstat_named_t deficit;
158 kstat_named_t clk_intr;
159 kstat_named_t vac;
160 kstat_named_t nproc;
161 kstat_named_t avenrun_1min;
162 kstat_named_t avenrun_5min;
163 kstat_named_t avenrun_15min;
164 kstat_named_t boot_time;
165 kstat_named_t nsec_per_tick;
166 } system_misc_kstat = {
167 { "ncpus", KSTAT_DATA_UINT32 },
168 { "lbolt", KSTAT_DATA_UINT32 },
169 { "deficit", KSTAT_DATA_UINT32 },
170 { "clk_intr", KSTAT_DATA_UINT32 },
171 { "vac", KSTAT_DATA_UINT32 },
172 { "nproc", KSTAT_DATA_UINT32 },
173 { "avenrun_1min", KSTAT_DATA_UINT32 },
174 { "avenrun_5min", KSTAT_DATA_UINT32 },
175 { "avenrun_15min", KSTAT_DATA_UINT32 },
176 { "boot_time", KSTAT_DATA_UINT32 },
177 { "nsec_per_tick", KSTAT_DATA_UINT32 },
178 };
179
180 struct {
181 kstat_named_t physmem;
182 kstat_named_t nalloc;
183 kstat_named_t nfree;
184 kstat_named_t nalloc_calls;
185 kstat_named_t nfree_calls;
186 kstat_named_t kernelbase;
187 kstat_named_t econtig;
188 kstat_named_t freemem;
189 kstat_named_t availrmem;
190 kstat_named_t lotsfree;
191 kstat_named_t desfree;
192 kstat_named_t minfree;
193 kstat_named_t fastscan;
194 kstat_named_t slowscan;
195 kstat_named_t nscan;
196 kstat_named_t desscan;
197 kstat_named_t pp_kernel;
198 kstat_named_t pagesfree;
199 kstat_named_t pageslocked;
200 kstat_named_t pagestotal;
201 kstat_named_t lowmemscan;
202 kstat_named_t nthrottle;
203 } system_pages_kstat = {
204 { "physmem", KSTAT_DATA_ULONG },
205 { "nalloc", KSTAT_DATA_ULONG },
206 { "nfree", KSTAT_DATA_ULONG },
207 { "nalloc_calls", KSTAT_DATA_ULONG },
208 { "nfree_calls", KSTAT_DATA_ULONG },
209 { "kernelbase", KSTAT_DATA_ULONG },
210 { "econtig", KSTAT_DATA_ULONG },
211 { "freemem", KSTAT_DATA_ULONG },
212 { "availrmem", KSTAT_DATA_ULONG },
213 { "lotsfree", KSTAT_DATA_ULONG },
214 { "desfree", KSTAT_DATA_ULONG },
215 { "minfree", KSTAT_DATA_ULONG },
216 { "fastscan", KSTAT_DATA_ULONG },
217 { "slowscan", KSTAT_DATA_ULONG },
218 { "nscan", KSTAT_DATA_ULONG },
219 { "desscan", KSTAT_DATA_ULONG },
220 { "pp_kernel", KSTAT_DATA_ULONG },
221 { "pagesfree", KSTAT_DATA_ULONG },
222 { "pageslocked", KSTAT_DATA_ULONG },
223 { "pagestotal", KSTAT_DATA_ULONG },
224 { "low_mem_scan", KSTAT_DATA_ULONG },
225 { "n_throttle", KSTAT_DATA_ULONG },
226 };
227
228 static int header_kstat_update(kstat_t *, int);
229 static int header_kstat_snapshot(kstat_t *, void *, int);
230 static int system_misc_kstat_update(kstat_t *, int);
231 static int system_pages_kstat_update(kstat_t *, int);
232
233 static struct {
234 char name[KSTAT_STRLEN];
235 size_t size;
236 uint_t min_ndata;
237 uint_t max_ndata;
238 } kstat_data_type[KSTAT_NUM_TYPES] = {
239 { "raw", 1, 0, INT_MAX },
240 { "name=value", sizeof (kstat_named_t), 0, INT_MAX },
241 { "interrupt", sizeof (kstat_intr_t), 1, 1 },
242 { "i/o", sizeof (kstat_io_t), 1, 1 },
243 { "event_timer", sizeof (kstat_timer_t), 0, INT_MAX },
244 };
245
246 int
kstat_zone_find(kstat_t * k,zoneid_t zoneid)247 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
248 {
249 ekstat_t *e = (ekstat_t *)k;
250 kstat_zone_t *kz;
251
252 ASSERT(MUTEX_HELD(&kstat_chain_lock));
253 for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
254 if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
255 return (1);
256 if (zoneid == kz->zoneid)
257 return (1);
258 }
259 return (0);
260 }
261
262 void
kstat_zone_remove(kstat_t * k,zoneid_t zoneid)263 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
264 {
265 ekstat_t *e = (ekstat_t *)k;
266 kstat_zone_t *kz, *t = NULL;
267
268 mutex_enter(&kstat_chain_lock);
269 if (zoneid == e->e_zone.zoneid) {
270 kz = e->e_zone.next;
271 ASSERT(kz != NULL);
272 e->e_zone.zoneid = kz->zoneid;
273 e->e_zone.next = kz->next;
274 goto out;
275 }
276 for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
277 if (kz->next->zoneid == zoneid) {
278 t = kz->next;
279 kz->next = t->next;
280 break;
281 }
282 }
283 ASSERT(t != NULL); /* we removed something */
284 kz = t;
285 out:
286 kstat_chain_id++;
287 mutex_exit(&kstat_chain_lock);
288 kmem_free(kz, sizeof (*kz));
289 }
290
291 void
kstat_zone_add(kstat_t * k,zoneid_t zoneid)292 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
293 {
294 ekstat_t *e = (ekstat_t *)k;
295 kstat_zone_t *kz;
296
297 kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
298 if (kz == NULL)
299 return;
300 mutex_enter(&kstat_chain_lock);
301 kz->zoneid = zoneid;
302 kz->next = e->e_zone.next;
303 e->e_zone.next = kz;
304 kstat_chain_id++;
305 mutex_exit(&kstat_chain_lock);
306 }
307
308 /*
309 * Compare the list of zones for the given kstats, returning 0 if they match
310 * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
311 * In practice, this is called indirectly by kstat_hold_byname(), so one of the
312 * two lists always has one element, and this is an O(n) operation rather than
313 * O(n^2).
314 */
315 static int
kstat_zone_compare(ekstat_t * e1,ekstat_t * e2)316 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
317 {
318 kstat_zone_t *kz1, *kz2;
319
320 ASSERT(MUTEX_HELD(&kstat_chain_lock));
321 for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
322 for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
323 if (kz1->zoneid == ALL_ZONES ||
324 kz2->zoneid == ALL_ZONES)
325 return (0);
326 if (kz1->zoneid == kz2->zoneid)
327 return (0);
328 }
329 }
330 return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
331 }
332
333 /*
334 * Support for keeping kstats sorted in AVL trees for fast lookups.
335 */
336 static int
kstat_compare_bykid(const void * a1,const void * a2)337 kstat_compare_bykid(const void *a1, const void *a2)
338 {
339 const kstat_t *k1 = a1;
340 const kstat_t *k2 = a2;
341
342 if (k1->ks_kid < k2->ks_kid)
343 return (-1);
344 if (k1->ks_kid > k2->ks_kid)
345 return (1);
346 return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
347 }
348
349 static int
kstat_compare_byname(const void * a1,const void * a2)350 kstat_compare_byname(const void *a1, const void *a2)
351 {
352 const kstat_t *k1 = a1;
353 const kstat_t *k2 = a2;
354 int s;
355
356 s = strcmp(k1->ks_module, k2->ks_module);
357 if (s > 0)
358 return (1);
359 if (s < 0)
360 return (-1);
361
362 if (k1->ks_instance < k2->ks_instance)
363 return (-1);
364 if (k1->ks_instance > k2->ks_instance)
365 return (1);
366
367 s = strcmp(k1->ks_name, k2->ks_name);
368 if (s > 0)
369 return (1);
370 if (s < 0)
371 return (-1);
372
373 return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
374 }
375
376 static kstat_t *
kstat_hold(avl_tree_t * t,ekstat_t * template)377 kstat_hold(avl_tree_t *t, ekstat_t *template)
378 {
379 kstat_t *ksp;
380 ekstat_t *e;
381
382 mutex_enter(&kstat_chain_lock);
383 for (;;) {
384 ksp = avl_find(t, template, NULL);
385 if (ksp == NULL)
386 break;
387 e = (ekstat_t *)ksp;
388 if (e->e_owner == NULL) {
389 e->e_owner = curthread;
390 break;
391 }
392 cv_wait(&e->e_cv, &kstat_chain_lock);
393 }
394 mutex_exit(&kstat_chain_lock);
395 return (ksp);
396 }
397
398 void
kstat_rele(kstat_t * ksp)399 kstat_rele(kstat_t *ksp)
400 {
401 ekstat_t *e = (ekstat_t *)ksp;
402
403 mutex_enter(&kstat_chain_lock);
404 ASSERT(e->e_owner == curthread);
405 e->e_owner = NULL;
406 cv_broadcast(&e->e_cv);
407 mutex_exit(&kstat_chain_lock);
408 }
409
410 kstat_t *
kstat_hold_bykid(kid_t kid,zoneid_t zoneid)411 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
412 {
413 ekstat_t e;
414
415 e.e_ks.ks_kid = kid;
416 e.e_zone.zoneid = zoneid;
417 e.e_zone.next = NULL;
418
419 return (kstat_hold(&kstat_avl_bykid, &e));
420 }
421
422 kstat_t *
kstat_hold_byname(const char * ks_module,int ks_instance,const char * ks_name,zoneid_t ks_zoneid)423 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
424 zoneid_t ks_zoneid)
425 {
426 ekstat_t e;
427
428 kstat_set_string(e.e_ks.ks_module, ks_module);
429 e.e_ks.ks_instance = ks_instance;
430 kstat_set_string(e.e_ks.ks_name, ks_name);
431 e.e_zone.zoneid = ks_zoneid;
432 e.e_zone.next = NULL;
433 return (kstat_hold(&kstat_avl_byname, &e));
434 }
435
436 static ekstat_t *
kstat_alloc(size_t size)437 kstat_alloc(size_t size)
438 {
439 ekstat_t *e = NULL;
440
441 size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
442
443 if (kstat_arena == NULL) {
444 if (size <= kstat_initial_avail) {
445 e = kstat_initial_ptr;
446 kstat_initial_ptr = (char *)kstat_initial_ptr + size;
447 kstat_initial_avail -= size;
448 }
449 } else {
450 e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
451 }
452
453 if (e != NULL) {
454 bzero(e, size);
455 e->e_size = size;
456 cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
457 }
458
459 return (e);
460 }
461
462 static void
kstat_free(ekstat_t * e)463 kstat_free(ekstat_t *e)
464 {
465 cv_destroy(&e->e_cv);
466 vmem_free(kstat_arena, e, e->e_size);
467 }
468
469 /*
470 * Create various system kstats.
471 */
472 void
kstat_init(void)473 kstat_init(void)
474 {
475 kstat_t *ksp;
476 ekstat_t *e;
477 avl_tree_t *t = &kstat_avl_bykid;
478
479 /*
480 * Set up the kstat vmem arena.
481 */
482 kstat_arena = vmem_create("kstat",
483 kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
484 segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
485
486 /*
487 * Make initial kstats appear as though they were allocated.
488 */
489 for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
490 (void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
491 0, 0, e, (char *)e + e->e_size,
492 VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
493
494 /*
495 * The mother of all kstats. The first kstat in the system, which
496 * always has KID 0, has the headers for all kstats (including itself)
497 * as its data. Thus, the kstat driver does not need any special
498 * interface to extract the kstat chain.
499 */
500 kstat_chain_id = 0;
501 ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
502 0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
503 if (ksp) {
504 ksp->ks_lock = &kstat_chain_lock;
505 ksp->ks_update = header_kstat_update;
506 ksp->ks_snapshot = header_kstat_snapshot;
507 kstat_install(ksp);
508 } else {
509 panic("cannot create kstat 'kstat_headers'");
510 }
511
512 ksp = kstat_create("unix", 0, "kstat_types", "kstat",
513 KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
514 if (ksp) {
515 int i;
516 kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
517
518 for (i = 0; i < KSTAT_NUM_TYPES; i++) {
519 kstat_named_init(&kn[i], kstat_data_type[i].name,
520 KSTAT_DATA_ULONG);
521 kn[i].value.ul = i;
522 }
523 kstat_install(ksp);
524 }
525
526 ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
527 sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
528 if (ksp) {
529 ksp->ks_data = (void *) &sysinfo;
530 kstat_install(ksp);
531 }
532
533 ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
534 sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
535 if (ksp) {
536 ksp->ks_data = (void *) &vminfo;
537 kstat_install(ksp);
538 }
539
540 ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
541 segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
542 if (ksp) {
543 ksp->ks_data = (void *) segmapcnt_ptr;
544 ksp->ks_update = segmap_kstat_update;
545 kstat_install(ksp);
546 }
547
548 ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
549 biostats_ndata, KSTAT_FLAG_VIRTUAL);
550 if (ksp) {
551 ksp->ks_data = (void *) biostats_ptr;
552 kstat_install(ksp);
553 }
554
555 ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
556 sizeof (struct var), KSTAT_FLAG_VIRTUAL);
557 if (ksp) {
558 ksp->ks_data = (void *) &v;
559 kstat_install(ksp);
560 }
561
562 ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
563 sizeof (system_misc_kstat) / sizeof (kstat_named_t),
564 KSTAT_FLAG_VIRTUAL);
565 if (ksp) {
566 ksp->ks_data = (void *) &system_misc_kstat;
567 ksp->ks_update = system_misc_kstat_update;
568 kstat_install(ksp);
569 }
570
571 ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
572 sizeof (system_pages_kstat) / sizeof (kstat_named_t),
573 KSTAT_FLAG_VIRTUAL);
574 if (ksp) {
575 ksp->ks_data = (void *) &system_pages_kstat;
576 ksp->ks_update = system_pages_kstat_update;
577 kstat_install(ksp);
578 }
579
580 ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
581 pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
582
583 if (ksp) {
584 ksp->ks_data = pollstats_ptr;
585 kstat_install(ksp);
586 }
587 }
588
589 /*
590 * Caller of this should ensure that the string pointed by src
591 * doesn't change while kstat's lock is held. Not doing so defeats
592 * kstat's snapshot strategy as explained in <sys/kstat.h>
593 */
594 void
kstat_named_setstr(kstat_named_t * knp,const char * src)595 kstat_named_setstr(kstat_named_t *knp, const char *src)
596 {
597 if (knp->data_type != KSTAT_DATA_STRING)
598 panic("kstat_named_setstr('%p', '%p'): "
599 "named kstat is not of type KSTAT_DATA_STRING",
600 (void *)knp, (void *)src);
601
602 KSTAT_NAMED_STR_PTR(knp) = (char *)src;
603 if (src != NULL)
604 KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
605 else
606 KSTAT_NAMED_STR_BUFLEN(knp) = 0;
607 }
608
609 void
kstat_set_string(char * dst,const char * src)610 kstat_set_string(char *dst, const char *src)
611 {
612 bzero(dst, KSTAT_STRLEN);
613 (void) strncpy(dst, src, KSTAT_STRLEN - 1);
614 }
615
616 void
kstat_named_init(kstat_named_t * knp,const char * name,uchar_t data_type)617 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
618 {
619 kstat_set_string(knp->name, name);
620 knp->data_type = data_type;
621
622 if (data_type == KSTAT_DATA_STRING)
623 kstat_named_setstr(knp, NULL);
624 }
625
626 void
kstat_timer_init(kstat_timer_t * ktp,const char * name)627 kstat_timer_init(kstat_timer_t *ktp, const char *name)
628 {
629 kstat_set_string(ktp->name, name);
630 }
631
632 /* ARGSUSED */
633 static int
default_kstat_update(kstat_t * ksp,int rw)634 default_kstat_update(kstat_t *ksp, int rw)
635 {
636 uint_t i;
637 size_t len = 0;
638 kstat_named_t *knp;
639
640 /*
641 * Named kstats with variable-length long strings have a standard
642 * way of determining how much space is needed to hold the snapshot:
643 */
644 if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
645 (ksp->ks_flags & (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_LONGSTRINGS))) {
646
647 /*
648 * Add in the space required for the strings
649 */
650 knp = KSTAT_NAMED_PTR(ksp);
651 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
652 if (knp->data_type == KSTAT_DATA_STRING)
653 len += KSTAT_NAMED_STR_BUFLEN(knp);
654 }
655 ksp->ks_data_size =
656 ksp->ks_ndata * sizeof (kstat_named_t) + len;
657 }
658 return (0);
659 }
660
661 static int
default_kstat_snapshot(kstat_t * ksp,void * buf,int rw)662 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
663 {
664 kstat_io_t *kiop;
665 hrtime_t cur_time;
666 size_t namedsz;
667
668 ksp->ks_snaptime = cur_time = gethrtime();
669
670 if (rw == KSTAT_WRITE) {
671 if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
672 return (EACCES);
673 bcopy(buf, ksp->ks_data, ksp->ks_data_size);
674 return (0);
675 }
676
677 /*
678 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
679 * number of kstat_named_t structures, followed by an optional
680 * string segment. The ks_data generally holds only the
681 * kstat_named_t structures. So we copy it first. The strings,
682 * if any, are copied below. For other kstat types, ks_data holds the
683 * entire buffer.
684 */
685
686 namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
687 if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
688 bcopy(ksp->ks_data, buf, namedsz);
689 else
690 bcopy(ksp->ks_data, buf, ksp->ks_data_size);
691
692 /*
693 * Apply kstat type-specific data massaging
694 */
695 switch (ksp->ks_type) {
696
697 case KSTAT_TYPE_IO:
698 /*
699 * Normalize time units and deal with incomplete transactions
700 */
701 kiop = (kstat_io_t *)buf;
702
703 scalehrtime(&kiop->wtime);
704 scalehrtime(&kiop->wlentime);
705 scalehrtime(&kiop->wlastupdate);
706 scalehrtime(&kiop->rtime);
707 scalehrtime(&kiop->rlentime);
708 scalehrtime(&kiop->rlastupdate);
709
710 if (kiop->wcnt != 0) {
711 /* like kstat_waitq_exit */
712 hrtime_t wfix = cur_time - kiop->wlastupdate;
713 kiop->wlastupdate = cur_time;
714 kiop->wlentime += kiop->wcnt * wfix;
715 kiop->wtime += wfix;
716 }
717
718 if (kiop->rcnt != 0) {
719 /* like kstat_runq_exit */
720 hrtime_t rfix = cur_time - kiop->rlastupdate;
721 kiop->rlastupdate = cur_time;
722 kiop->rlentime += kiop->rcnt * rfix;
723 kiop->rtime += rfix;
724 }
725 break;
726
727 case KSTAT_TYPE_NAMED:
728 /*
729 * Massage any long strings in at the end of the buffer
730 */
731 if (ksp->ks_data_size > namedsz) {
732 uint_t i;
733 kstat_named_t *knp = buf;
734 char *dst = (char *)(knp + ksp->ks_ndata);
735 /*
736 * Copy strings and update pointers
737 */
738 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
739 if (knp->data_type == KSTAT_DATA_STRING &&
740 KSTAT_NAMED_STR_PTR(knp) != NULL) {
741 bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
742 KSTAT_NAMED_STR_BUFLEN(knp));
743 KSTAT_NAMED_STR_PTR(knp) = dst;
744 dst += KSTAT_NAMED_STR_BUFLEN(knp);
745 }
746 }
747 ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
748 }
749 break;
750 }
751 return (0);
752 }
753
754 static int
header_kstat_update(kstat_t * header_ksp,int rw)755 header_kstat_update(kstat_t *header_ksp, int rw)
756 {
757 int nkstats = 0;
758 ekstat_t *e;
759 avl_tree_t *t = &kstat_avl_bykid;
760 zoneid_t zoneid;
761
762 if (rw == KSTAT_WRITE)
763 return (EACCES);
764
765 ASSERT(MUTEX_HELD(&kstat_chain_lock));
766
767 zoneid = getzoneid();
768 for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
769 if (kstat_zone_find((kstat_t *)e, zoneid) &&
770 (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
771 nkstats++;
772 }
773 }
774 header_ksp->ks_ndata = nkstats;
775 header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
776 return (0);
777 }
778
779 /*
780 * Copy out the data section of kstat 0, which consists of the list
781 * of all kstat headers. By specification, these headers must be
782 * copied out in order of increasing KID.
783 */
784 static int
header_kstat_snapshot(kstat_t * header_ksp,void * buf,int rw)785 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
786 {
787 ekstat_t *e;
788 avl_tree_t *t = &kstat_avl_bykid;
789 zoneid_t zoneid;
790
791 header_ksp->ks_snaptime = gethrtime();
792
793 if (rw == KSTAT_WRITE)
794 return (EACCES);
795
796 ASSERT(MUTEX_HELD(&kstat_chain_lock));
797
798 zoneid = getzoneid();
799 for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
800 if (kstat_zone_find((kstat_t *)e, zoneid) &&
801 (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
802 bcopy(&e->e_ks, buf, sizeof (kstat_t));
803 buf = (char *)buf + sizeof (kstat_t);
804 }
805 }
806
807 return (0);
808 }
809
810 /* ARGSUSED */
811 static int
system_misc_kstat_update(kstat_t * ksp,int rw)812 system_misc_kstat_update(kstat_t *ksp, int rw)
813 {
814 int myncpus = ncpus;
815 int *loadavgp = &avenrun[0];
816 time_t zone_boot_time;
817 clock_t zone_lbolt;
818 hrtime_t zone_hrtime;
819 size_t zone_nproc;
820
821 if (rw == KSTAT_WRITE)
822 return (EACCES);
823
824 if (!INGLOBALZONE(curproc)) {
825 /*
826 * Here we grab cpu_lock which is OK as long as no-one in the
827 * future attempts to lookup this particular kstat
828 * (unix:0:system_misc) while holding cpu_lock.
829 */
830 mutex_enter(&cpu_lock);
831 if (pool_pset_enabled()) {
832 myncpus = zone_ncpus_get(curproc->p_zone);
833 ASSERT(myncpus > 0);
834 }
835 mutex_exit(&cpu_lock);
836 loadavgp = &curproc->p_zone->zone_avenrun[0];
837 }
838
839 if (INGLOBALZONE(curproc)) {
840 zone_boot_time = boot_time;
841 zone_lbolt = ddi_get_lbolt();
842 zone_nproc = nproc;
843 } else {
844 zone_boot_time = curproc->p_zone->zone_boot_time;
845
846 zone_hrtime = gethrtime();
847 zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
848 NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
849 mutex_enter(&curproc->p_zone->zone_nlwps_lock);
850 zone_nproc = curproc->p_zone->zone_nprocs;
851 mutex_exit(&curproc->p_zone->zone_nlwps_lock);
852 }
853
854 system_misc_kstat.ncpus.value.ui32 = (uint32_t)myncpus;
855 system_misc_kstat.lbolt.value.ui32 = (uint32_t)zone_lbolt;
856 system_misc_kstat.deficit.value.ui32 = (uint32_t)deficit;
857 system_misc_kstat.clk_intr.value.ui32 = (uint32_t)zone_lbolt;
858 system_misc_kstat.vac.value.ui32 = (uint32_t)vac;
859 system_misc_kstat.nproc.value.ui32 = (uint32_t)zone_nproc;
860 system_misc_kstat.avenrun_1min.value.ui32 = (uint32_t)loadavgp[0];
861 system_misc_kstat.avenrun_5min.value.ui32 = (uint32_t)loadavgp[1];
862 system_misc_kstat.avenrun_15min.value.ui32 = (uint32_t)loadavgp[2];
863 system_misc_kstat.boot_time.value.ui32 = (uint32_t)
864 zone_boot_time;
865 system_misc_kstat.nsec_per_tick.value.ui32 = (uint32_t)
866 nsec_per_tick;
867 return (0);
868 }
869
870 #ifdef __sparc
871 extern caddr_t econtig32;
872 #else /* !__sparc */
873 extern caddr_t econtig;
874 #endif /* __sparc */
875
876 /* ARGSUSED */
877 static int
system_pages_kstat_update(kstat_t * ksp,int rw)878 system_pages_kstat_update(kstat_t *ksp, int rw)
879 {
880 kobj_stat_t kobj_stat;
881
882 if (rw == KSTAT_WRITE) {
883 return (EACCES);
884 }
885
886 kobj_stat_get(&kobj_stat);
887 system_pages_kstat.physmem.value.ul = (ulong_t)physmem;
888 system_pages_kstat.nalloc.value.ul = kobj_stat.nalloc;
889 system_pages_kstat.nfree.value.ul = kobj_stat.nfree;
890 system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
891 system_pages_kstat.nfree_calls.value.ul = kobj_stat.nfree_calls;
892 system_pages_kstat.kernelbase.value.ul = (ulong_t)KERNELBASE;
893
894 #ifdef __sparc
895 /*
896 * kstat should REALLY be modified to also report kmem64_base and
897 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
898 * [ kernelbase .. econtig ] no longer is truly reflective of the
899 * kernel's vallocs...
900 */
901 system_pages_kstat.econtig.value.ul = (ulong_t)econtig32;
902 #else /* !__sparc */
903 system_pages_kstat.econtig.value.ul = (ulong_t)econtig;
904 #endif /* __sparc */
905
906 system_pages_kstat.freemem.value.ul = (ulong_t)freemem;
907 system_pages_kstat.availrmem.value.ul = (ulong_t)availrmem;
908 system_pages_kstat.lotsfree.value.ul = (ulong_t)lotsfree;
909 system_pages_kstat.desfree.value.ul = (ulong_t)desfree;
910 system_pages_kstat.minfree.value.ul = (ulong_t)minfree;
911 system_pages_kstat.fastscan.value.ul = (ulong_t)fastscan;
912 system_pages_kstat.slowscan.value.ul = (ulong_t)slowscan;
913 system_pages_kstat.nscan.value.ul = (ulong_t)nscan;
914 system_pages_kstat.desscan.value.ul = (ulong_t)desscan;
915 system_pages_kstat.pagesfree.value.ul = (ulong_t)freemem;
916 system_pages_kstat.pageslocked.value.ul = (ulong_t)(availrmem_initial -
917 availrmem);
918 system_pages_kstat.pagestotal.value.ul = (ulong_t)total_pages;
919 system_pages_kstat.lowmemscan.value.ul = (ulong_t)low_mem_scan;
920 system_pages_kstat.nthrottle.value.ul = (ulong_t)n_throttle;
921 /*
922 * pp_kernel represents total pages used by the kernel since the
923 * startup. This formula takes into account the boottime kernel
924 * footprint and also considers the availrmem changes because of
925 * user explicit page locking.
926 */
927 system_pages_kstat.pp_kernel.value.ul = (ulong_t)(physinstalled -
928 obp_pages - availrmem - k_anoninfo.ani_mem_resv -
929 anon_segkp_pages_locked - pages_locked -
930 pages_claimed - pages_useclaim);
931
932 return (0);
933 }
934
935 kstat_t *
kstat_create(const char * ks_module,int ks_instance,const char * ks_name,const char * ks_class,uchar_t ks_type,uint_t ks_ndata,uchar_t ks_flags)936 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
937 const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
938 {
939 return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
940 ks_type, ks_ndata, ks_flags, ALL_ZONES));
941 }
942
943 /*
944 * Allocate and initialize a kstat structure. Or, if a dormant kstat with
945 * the specified name exists, reactivate it. Returns a pointer to the kstat
946 * on success, NULL on failure. The kstat will not be visible to the
947 * kstat driver until kstat_install().
948 */
949 kstat_t *
kstat_create_zone(const char * ks_module,int ks_instance,const char * ks_name,const char * ks_class,uchar_t ks_type,uint_t ks_ndata,uchar_t ks_flags,zoneid_t ks_zoneid)950 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
951 const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
952 zoneid_t ks_zoneid)
953 {
954 size_t ks_data_size;
955 kstat_t *ksp;
956 ekstat_t *e;
957 avl_index_t where;
958 char namebuf[KSTAT_STRLEN + 16];
959
960 if (avl_numnodes(&kstat_avl_bykid) == 0) {
961 avl_create(&kstat_avl_bykid, kstat_compare_bykid,
962 sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
963
964 avl_create(&kstat_avl_byname, kstat_compare_byname,
965 sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
966 }
967
968 /*
969 * If ks_name == NULL, set the ks_name to <module><instance>.
970 */
971 if (ks_name == NULL) {
972 char buf[KSTAT_STRLEN];
973 kstat_set_string(buf, ks_module);
974 (void) sprintf(namebuf, "%s%d", buf, ks_instance);
975 ks_name = namebuf;
976 }
977
978 /*
979 * Make sure it's a valid kstat data type
980 */
981 if (ks_type >= KSTAT_NUM_TYPES) {
982 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
983 "invalid kstat type %d",
984 ks_module, ks_instance, ks_name, ks_type);
985 return (NULL);
986 }
987
988 /*
989 * Don't allow persistent virtual kstats -- it makes no sense.
990 * ks_data points to garbage when the client goes away.
991 */
992 if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
993 (ks_flags & KSTAT_FLAG_VIRTUAL)) {
994 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
995 "cannot create persistent virtual kstat",
996 ks_module, ks_instance, ks_name);
997 return (NULL);
998 }
999
1000 /*
1001 * Don't allow variable-size physical kstats, since the framework's
1002 * memory allocation for physical kstat data is fixed at creation time.
1003 */
1004 if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
1005 !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1006 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1007 "cannot create variable-size physical kstat",
1008 ks_module, ks_instance, ks_name);
1009 return (NULL);
1010 }
1011
1012 /*
1013 * Make sure the number of data fields is within legal range
1014 */
1015 if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1016 ks_ndata > kstat_data_type[ks_type].max_ndata) {
1017 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1018 "ks_ndata=%d out of range [%d, %d]",
1019 ks_module, ks_instance, ks_name, (int)ks_ndata,
1020 kstat_data_type[ks_type].min_ndata,
1021 kstat_data_type[ks_type].max_ndata);
1022 return (NULL);
1023 }
1024
1025 ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1026
1027 /*
1028 * If the named kstat already exists and is dormant, reactivate it.
1029 */
1030 ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1031 if (ksp != NULL) {
1032 if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1033 /*
1034 * The named kstat exists but is not dormant --
1035 * this is a kstat namespace collision.
1036 */
1037 kstat_rele(ksp);
1038 cmn_err(CE_WARN,
1039 "kstat_create('%s', %d, '%s'): namespace collision",
1040 ks_module, ks_instance, ks_name);
1041 return (NULL);
1042 }
1043 if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1044 (ksp->ks_type != ks_type) ||
1045 (ksp->ks_ndata != ks_ndata) ||
1046 (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1047 /*
1048 * The name is the same, but the other key parameters
1049 * differ from those of the dormant kstat -- bogus.
1050 */
1051 kstat_rele(ksp);
1052 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1053 "invalid reactivation of dormant kstat",
1054 ks_module, ks_instance, ks_name);
1055 return (NULL);
1056 }
1057 /*
1058 * Return dormant kstat pointer to caller. As usual,
1059 * the kstat is marked invalid until kstat_install().
1060 */
1061 ksp->ks_flags |= KSTAT_FLAG_INVALID;
1062 kstat_rele(ksp);
1063 return (ksp);
1064 }
1065
1066 /*
1067 * Allocate memory for the new kstat header and, if this is a physical
1068 * kstat, the data section.
1069 */
1070 e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1071 if (e == NULL) {
1072 cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1073 "insufficient kernel memory",
1074 ks_module, ks_instance, ks_name);
1075 return (NULL);
1076 }
1077
1078 /*
1079 * Initialize as many fields as we can. The caller may reset
1080 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1081 * Creators of virtual kstats may also reset ks_data. It is
1082 * also up to the caller to initialize the kstat data section,
1083 * if necessary. All initialization must be complete before
1084 * calling kstat_install().
1085 */
1086 e->e_zone.zoneid = ks_zoneid;
1087 e->e_zone.next = NULL;
1088
1089 ksp = &e->e_ks;
1090 ksp->ks_crtime = gethrtime();
1091 kstat_set_string(ksp->ks_module, ks_module);
1092 ksp->ks_instance = ks_instance;
1093 kstat_set_string(ksp->ks_name, ks_name);
1094 ksp->ks_type = ks_type;
1095 kstat_set_string(ksp->ks_class, ks_class);
1096 ksp->ks_flags = ks_flags | KSTAT_FLAG_INVALID;
1097 if (ks_flags & KSTAT_FLAG_VIRTUAL)
1098 ksp->ks_data = NULL;
1099 else
1100 ksp->ks_data = (void *)(e + 1);
1101 ksp->ks_ndata = ks_ndata;
1102 ksp->ks_data_size = ks_data_size;
1103 ksp->ks_snaptime = ksp->ks_crtime;
1104 ksp->ks_update = default_kstat_update;
1105 ksp->ks_private = NULL;
1106 ksp->ks_snapshot = default_kstat_snapshot;
1107 ksp->ks_lock = NULL;
1108
1109 mutex_enter(&kstat_chain_lock);
1110
1111 /*
1112 * Add our kstat to the AVL trees.
1113 */
1114 if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1115 mutex_exit(&kstat_chain_lock);
1116 cmn_err(CE_WARN,
1117 "kstat_create('%s', %d, '%s'): namespace collision",
1118 ks_module, ks_instance, ks_name);
1119 kstat_free(e);
1120 return (NULL);
1121 }
1122 avl_insert(&kstat_avl_byname, e, where);
1123
1124 /*
1125 * Loop around until we find an unused KID.
1126 */
1127 do {
1128 ksp->ks_kid = kstat_chain_id++;
1129 } while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1130 avl_insert(&kstat_avl_bykid, e, where);
1131
1132 mutex_exit(&kstat_chain_lock);
1133
1134 return (ksp);
1135 }
1136
1137 /*
1138 * Activate a fully initialized kstat and make it visible to /dev/kstat.
1139 */
1140 void
kstat_install(kstat_t * ksp)1141 kstat_install(kstat_t *ksp)
1142 {
1143 zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1144
1145 /*
1146 * If this is a variable-size kstat, it MUST provide kstat data locking
1147 * to prevent data-size races with kstat readers.
1148 */
1149 if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1150 panic("kstat_install('%s', %d, '%s'): "
1151 "cannot create variable-size kstat without data lock",
1152 ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1153 }
1154
1155 if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1156 cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1157 (void *)ksp);
1158 return;
1159 }
1160
1161 if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1162 uint_t i;
1163 kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1164
1165 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1166 if (knp->data_type == KSTAT_DATA_STRING) {
1167 ksp->ks_flags |= KSTAT_FLAG_LONGSTRINGS;
1168 break;
1169 }
1170 }
1171 /*
1172 * The default snapshot routine does not handle KSTAT_WRITE
1173 * for long strings.
1174 */
1175 if ((ksp->ks_flags & KSTAT_FLAG_LONGSTRINGS) &&
1176 (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1177 (ksp->ks_snapshot == default_kstat_snapshot)) {
1178 panic("kstat_install('%s', %d, '%s'): "
1179 "named kstat containing KSTAT_DATA_STRING "
1180 "is writable but uses default snapshot routine",
1181 ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1182 }
1183 }
1184
1185 if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1186
1187 /*
1188 * We are reactivating a dormant kstat. Initialize the
1189 * caller's underlying data to the value it had when the
1190 * kstat went dormant, and mark the kstat as active.
1191 * Grab the provider's kstat lock if it's not already held.
1192 */
1193 kmutex_t *lp = ksp->ks_lock;
1194 if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1195 mutex_enter(lp);
1196 (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1197 mutex_exit(lp);
1198 } else {
1199 (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1200 }
1201 ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1202 }
1203
1204 /*
1205 * Now that the kstat is active, make it visible to the kstat driver.
1206 * When copying out kstats the count is determined in
1207 * header_kstat_update() and actually copied into kbuf in
1208 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1209 * calls to ensure that this list doesn't change. Thus, we need to
1210 * also take the lock to ensure that the we don't copy the new kstat
1211 * in the 2nd pass and overrun the buf.
1212 */
1213 mutex_enter(&kstat_chain_lock);
1214 ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1215 mutex_exit(&kstat_chain_lock);
1216 kstat_rele(ksp);
1217 }
1218
1219 /*
1220 * Remove a kstat from the system. Or, if it's a persistent kstat,
1221 * just update the data and mark it as dormant.
1222 */
1223 void
kstat_delete(kstat_t * ksp)1224 kstat_delete(kstat_t *ksp)
1225 {
1226 kmutex_t *lp;
1227 ekstat_t *e = (ekstat_t *)ksp;
1228 zoneid_t zoneid;
1229 kstat_zone_t *kz;
1230
1231 ASSERT(ksp != NULL);
1232
1233 if (ksp == NULL)
1234 return;
1235
1236 zoneid = e->e_zone.zoneid;
1237
1238 lp = ksp->ks_lock;
1239
1240 if (lp != NULL && MUTEX_HELD(lp)) {
1241 panic("kstat_delete(%p): caller holds data lock %p",
1242 (void *)ksp, (void *)lp);
1243 }
1244
1245 if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1246 cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1247 (void *)ksp);
1248 return;
1249 }
1250
1251 if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1252 /*
1253 * Update the data one last time, so that all activity
1254 * prior to going dormant has been accounted for.
1255 */
1256 KSTAT_ENTER(ksp);
1257 (void) KSTAT_UPDATE(ksp, KSTAT_READ);
1258 KSTAT_EXIT(ksp);
1259
1260 /*
1261 * Mark the kstat as dormant and restore caller-modifiable
1262 * fields to default values, so the kstat is readable during
1263 * the dormant phase.
1264 */
1265 ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1266 ksp->ks_lock = NULL;
1267 ksp->ks_update = default_kstat_update;
1268 ksp->ks_private = NULL;
1269 ksp->ks_snapshot = default_kstat_snapshot;
1270 kstat_rele(ksp);
1271 return;
1272 }
1273
1274 /*
1275 * Remove the kstat from the framework's AVL trees,
1276 * free the allocated memory, and increment kstat_chain_id so
1277 * /dev/kstat clients can detect the event.
1278 */
1279 mutex_enter(&kstat_chain_lock);
1280 avl_remove(&kstat_avl_bykid, e);
1281 avl_remove(&kstat_avl_byname, e);
1282 kstat_chain_id++;
1283 mutex_exit(&kstat_chain_lock);
1284
1285 kz = e->e_zone.next;
1286 while (kz != NULL) {
1287 kstat_zone_t *t = kz;
1288
1289 kz = kz->next;
1290 kmem_free(t, sizeof (*t));
1291 }
1292 kstat_rele(ksp);
1293 kstat_free(e);
1294 }
1295
1296 void
kstat_delete_byname_zone(const char * ks_module,int ks_instance,const char * ks_name,zoneid_t ks_zoneid)1297 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1298 const char *ks_name, zoneid_t ks_zoneid)
1299 {
1300 kstat_t *ksp;
1301
1302 ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1303 if (ksp != NULL) {
1304 kstat_rele(ksp);
1305 kstat_delete(ksp);
1306 }
1307 }
1308
1309 void
kstat_delete_byname(const char * ks_module,int ks_instance,const char * ks_name)1310 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1311 {
1312 kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1313 }
1314
1315 /*
1316 * The sparc V9 versions of these routines can be much cheaper than
1317 * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1318 * For simplicity, however, we always feed the C versions to lint.
1319 */
1320 #if !defined(__sparc) || defined(lint) || defined(__lint)
1321
1322 void
kstat_waitq_enter(kstat_io_t * kiop)1323 kstat_waitq_enter(kstat_io_t *kiop)
1324 {
1325 hrtime_t new, delta;
1326 ulong_t wcnt;
1327
1328 new = gethrtime_unscaled();
1329 delta = new - kiop->wlastupdate;
1330 kiop->wlastupdate = new;
1331 wcnt = kiop->wcnt++;
1332 if (wcnt != 0) {
1333 kiop->wlentime += delta * wcnt;
1334 kiop->wtime += delta;
1335 }
1336 }
1337
1338 void
kstat_waitq_exit(kstat_io_t * kiop)1339 kstat_waitq_exit(kstat_io_t *kiop)
1340 {
1341 hrtime_t new, delta;
1342 ulong_t wcnt;
1343
1344 new = gethrtime_unscaled();
1345 delta = new - kiop->wlastupdate;
1346 kiop->wlastupdate = new;
1347 wcnt = kiop->wcnt--;
1348 ASSERT((int)wcnt > 0);
1349 kiop->wlentime += delta * wcnt;
1350 kiop->wtime += delta;
1351 }
1352
1353 void
kstat_runq_enter(kstat_io_t * kiop)1354 kstat_runq_enter(kstat_io_t *kiop)
1355 {
1356 hrtime_t new, delta;
1357 ulong_t rcnt;
1358
1359 new = gethrtime_unscaled();
1360 delta = new - kiop->rlastupdate;
1361 kiop->rlastupdate = new;
1362 rcnt = kiop->rcnt++;
1363 if (rcnt != 0) {
1364 kiop->rlentime += delta * rcnt;
1365 kiop->rtime += delta;
1366 }
1367 }
1368
1369 void
kstat_runq_exit(kstat_io_t * kiop)1370 kstat_runq_exit(kstat_io_t *kiop)
1371 {
1372 hrtime_t new, delta;
1373 ulong_t rcnt;
1374
1375 new = gethrtime_unscaled();
1376 delta = new - kiop->rlastupdate;
1377 kiop->rlastupdate = new;
1378 rcnt = kiop->rcnt--;
1379 ASSERT((int)rcnt > 0);
1380 kiop->rlentime += delta * rcnt;
1381 kiop->rtime += delta;
1382 }
1383
1384 void
kstat_waitq_to_runq(kstat_io_t * kiop)1385 kstat_waitq_to_runq(kstat_io_t *kiop)
1386 {
1387 hrtime_t new, delta;
1388 ulong_t wcnt, rcnt;
1389
1390 new = gethrtime_unscaled();
1391
1392 delta = new - kiop->wlastupdate;
1393 kiop->wlastupdate = new;
1394 wcnt = kiop->wcnt--;
1395 ASSERT((int)wcnt > 0);
1396 kiop->wlentime += delta * wcnt;
1397 kiop->wtime += delta;
1398
1399 delta = new - kiop->rlastupdate;
1400 kiop->rlastupdate = new;
1401 rcnt = kiop->rcnt++;
1402 if (rcnt != 0) {
1403 kiop->rlentime += delta * rcnt;
1404 kiop->rtime += delta;
1405 }
1406 }
1407
1408 void
kstat_runq_back_to_waitq(kstat_io_t * kiop)1409 kstat_runq_back_to_waitq(kstat_io_t *kiop)
1410 {
1411 hrtime_t new, delta;
1412 ulong_t wcnt, rcnt;
1413
1414 new = gethrtime_unscaled();
1415
1416 delta = new - kiop->rlastupdate;
1417 kiop->rlastupdate = new;
1418 rcnt = kiop->rcnt--;
1419 ASSERT((int)rcnt > 0);
1420 kiop->rlentime += delta * rcnt;
1421 kiop->rtime += delta;
1422
1423 delta = new - kiop->wlastupdate;
1424 kiop->wlastupdate = new;
1425 wcnt = kiop->wcnt++;
1426 if (wcnt != 0) {
1427 kiop->wlentime += delta * wcnt;
1428 kiop->wtime += delta;
1429 }
1430 }
1431
1432 #endif
1433
1434 void
kstat_timer_start(kstat_timer_t * ktp)1435 kstat_timer_start(kstat_timer_t *ktp)
1436 {
1437 ktp->start_time = gethrtime();
1438 }
1439
1440 void
kstat_timer_stop(kstat_timer_t * ktp)1441 kstat_timer_stop(kstat_timer_t *ktp)
1442 {
1443 hrtime_t etime;
1444 u_longlong_t num_events;
1445
1446 ktp->stop_time = etime = gethrtime();
1447 etime -= ktp->start_time;
1448 num_events = ktp->num_events;
1449 if (etime < ktp->min_time || num_events == 0)
1450 ktp->min_time = etime;
1451 if (etime > ktp->max_time)
1452 ktp->max_time = etime;
1453 ktp->elapsed_time += etime;
1454 ktp->num_events = num_events + 1;
1455 }
1456