xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision a73c0fe4e90b82a478f821ef3adb5cf34f6a9346)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <mdb/mdb_param.h>
29 #include <mdb/mdb_modapi.h>
30 #include <mdb/mdb_ctf.h>
31 #include <sys/cpuvar.h>
32 #include <sys/kmem_impl.h>
33 #include <sys/vmem_impl.h>
34 #include <sys/machelf.h>
35 #include <sys/modctl.h>
36 #include <sys/kobj.h>
37 #include <sys/panic.h>
38 #include <sys/stack.h>
39 #include <sys/sysmacros.h>
40 #include <vm/page.h>
41 
42 #include "avl.h"
43 #include "combined.h"
44 #include "dist.h"
45 #include "kmem.h"
46 #include "leaky.h"
47 #include "list.h"
48 
49 #define	dprintf(x) if (mdb_debug_level) { \
50 	mdb_printf("kmem debug: ");  \
51 	/*CSTYLED*/\
52 	mdb_printf x ;\
53 }
54 
55 #define	KM_ALLOCATED		0x01
56 #define	KM_FREE			0x02
57 #define	KM_BUFCTL		0x04
58 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
59 #define	KM_HASH			0x10
60 
61 static int mdb_debug_level = 0;
62 
63 /*ARGSUSED*/
64 static int
65 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
66 {
67 	mdb_walker_t w;
68 	char descr[64];
69 
70 	(void) mdb_snprintf(descr, sizeof (descr),
71 	    "walk the %s cache", c->cache_name);
72 
73 	w.walk_name = c->cache_name;
74 	w.walk_descr = descr;
75 	w.walk_init = kmem_walk_init;
76 	w.walk_step = kmem_walk_step;
77 	w.walk_fini = kmem_walk_fini;
78 	w.walk_init_arg = (void *)addr;
79 
80 	if (mdb_add_walker(&w) == -1)
81 		mdb_warn("failed to add %s walker", c->cache_name);
82 
83 	return (WALK_NEXT);
84 }
85 
86 /*ARGSUSED*/
87 int
88 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
89 {
90 	mdb_debug_level ^= 1;
91 
92 	mdb_printf("kmem: debugging is now %s\n",
93 	    mdb_debug_level ? "on" : "off");
94 
95 	return (DCMD_OK);
96 }
97 
98 int
99 kmem_cache_walk_init(mdb_walk_state_t *wsp)
100 {
101 	GElf_Sym sym;
102 
103 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
104 		mdb_warn("couldn't find kmem_caches");
105 		return (WALK_ERR);
106 	}
107 
108 	wsp->walk_addr = (uintptr_t)sym.st_value;
109 
110 	return (list_walk_init_named(wsp, "cache list", "cache"));
111 }
112 
113 int
114 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
115 {
116 	if (wsp->walk_addr == NULL) {
117 		mdb_warn("kmem_cpu_cache doesn't support global walks");
118 		return (WALK_ERR);
119 	}
120 
121 	if (mdb_layered_walk("cpu", wsp) == -1) {
122 		mdb_warn("couldn't walk 'cpu'");
123 		return (WALK_ERR);
124 	}
125 
126 	wsp->walk_data = (void *)wsp->walk_addr;
127 
128 	return (WALK_NEXT);
129 }
130 
131 int
132 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
133 {
134 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
135 	const cpu_t *cpu = wsp->walk_layer;
136 	kmem_cpu_cache_t cc;
137 
138 	caddr += cpu->cpu_cache_offset;
139 
140 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
141 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
142 		return (WALK_ERR);
143 	}
144 
145 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
146 }
147 
148 static int
149 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
150 {
151 	kmem_slab_t *sp = p;
152 	uintptr_t caddr = (uintptr_t)arg;
153 	if ((uintptr_t)sp->slab_cache != caddr) {
154 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
155 		    saddr, caddr, sp->slab_cache);
156 		return (-1);
157 	}
158 
159 	return (0);
160 }
161 
162 static int
163 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
164 {
165 	kmem_slab_t *sp = p;
166 
167 	int rc = kmem_slab_check(p, saddr, arg);
168 	if (rc != 0) {
169 		return (rc);
170 	}
171 
172 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
173 		mdb_warn("slab %p is not a partial slab\n", saddr);
174 		return (-1);
175 	}
176 
177 	return (0);
178 }
179 
180 static int
181 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
182 {
183 	kmem_slab_t *sp = p;
184 
185 	int rc = kmem_slab_check(p, saddr, arg);
186 	if (rc != 0) {
187 		return (rc);
188 	}
189 
190 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
191 		mdb_warn("slab %p is not completely allocated\n", saddr);
192 		return (-1);
193 	}
194 
195 	return (0);
196 }
197 
198 typedef struct {
199 	uintptr_t kns_cache_addr;
200 	int kns_nslabs;
201 } kmem_nth_slab_t;
202 
203 static int
204 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
205 {
206 	kmem_nth_slab_t *chkp = arg;
207 
208 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
209 	if (rc != 0) {
210 		return (rc);
211 	}
212 
213 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
214 }
215 
216 static int
217 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
218 {
219 	uintptr_t caddr = wsp->walk_addr;
220 
221 	wsp->walk_addr = (uintptr_t)(caddr +
222 	    offsetof(kmem_cache_t, cache_complete_slabs));
223 
224 	return (list_walk_init_checked(wsp, "slab list", "slab",
225 	    kmem_complete_slab_check, (void *)caddr));
226 }
227 
228 static int
229 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
230 {
231 	uintptr_t caddr = wsp->walk_addr;
232 
233 	wsp->walk_addr = (uintptr_t)(caddr +
234 	    offsetof(kmem_cache_t, cache_partial_slabs));
235 
236 	return (avl_walk_init_checked(wsp, "slab list", "slab",
237 	    kmem_partial_slab_check, (void *)caddr));
238 }
239 
240 int
241 kmem_slab_walk_init(mdb_walk_state_t *wsp)
242 {
243 	uintptr_t caddr = wsp->walk_addr;
244 
245 	if (caddr == NULL) {
246 		mdb_warn("kmem_slab doesn't support global walks\n");
247 		return (WALK_ERR);
248 	}
249 
250 	combined_walk_init(wsp);
251 	combined_walk_add(wsp,
252 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
253 	combined_walk_add(wsp,
254 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
255 
256 	return (WALK_NEXT);
257 }
258 
259 static int
260 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
261 {
262 	uintptr_t caddr = wsp->walk_addr;
263 	kmem_nth_slab_t *chk;
264 
265 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
266 	    UM_SLEEP | UM_GC);
267 	chk->kns_cache_addr = caddr;
268 	chk->kns_nslabs = 1;
269 	wsp->walk_addr = (uintptr_t)(caddr +
270 	    offsetof(kmem_cache_t, cache_complete_slabs));
271 
272 	return (list_walk_init_checked(wsp, "slab list", "slab",
273 	    kmem_nth_slab_check, chk));
274 }
275 
276 int
277 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
278 {
279 	uintptr_t caddr = wsp->walk_addr;
280 	kmem_cache_t c;
281 
282 	if (caddr == NULL) {
283 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
284 		return (WALK_ERR);
285 	}
286 
287 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
288 		mdb_warn("couldn't read kmem_cache at %p", caddr);
289 		return (WALK_ERR);
290 	}
291 
292 	combined_walk_init(wsp);
293 
294 	/*
295 	 * Some consumers (umem_walk_step(), in particular) require at
296 	 * least one callback if there are any buffers in the cache.  So
297 	 * if there are *no* partial slabs, report the first full slab, if
298 	 * any.
299 	 *
300 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
301 	 */
302 	if (c.cache_partial_slabs.avl_numnodes == 0) {
303 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
304 		    list_walk_step, list_walk_fini);
305 	} else {
306 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
307 		    avl_walk_step, avl_walk_fini);
308 	}
309 
310 	return (WALK_NEXT);
311 }
312 
313 int
314 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
315 {
316 	kmem_cache_t c;
317 	const char *filter = NULL;
318 
319 	if (mdb_getopts(ac, argv,
320 	    'n', MDB_OPT_STR, &filter,
321 	    NULL) != ac) {
322 		return (DCMD_USAGE);
323 	}
324 
325 	if (!(flags & DCMD_ADDRSPEC)) {
326 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
327 			mdb_warn("can't walk kmem_cache");
328 			return (DCMD_ERR);
329 		}
330 		return (DCMD_OK);
331 	}
332 
333 	if (DCMD_HDRSPEC(flags))
334 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
335 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
336 
337 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
338 		mdb_warn("couldn't read kmem_cache at %p", addr);
339 		return (DCMD_ERR);
340 	}
341 
342 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
343 		return (DCMD_OK);
344 
345 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
346 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
347 
348 	return (DCMD_OK);
349 }
350 
351 void
352 kmem_cache_help(void)
353 {
354 	mdb_printf("%s", "Print kernel memory caches.\n\n");
355 	mdb_dec_indent(2);
356 	mdb_printf("%<b>OPTIONS%</b>\n");
357 	mdb_inc_indent(2);
358 	mdb_printf("%s",
359 "  -n name\n"
360 "        name of kmem cache (or matching partial name)\n"
361 "\n"
362 "Column\tDescription\n"
363 "\n"
364 "ADDR\t\taddress of kmem cache\n"
365 "NAME\t\tname of kmem cache\n"
366 "FLAG\t\tvarious cache state flags\n"
367 "CFLAG\t\tcache creation flags\n"
368 "BUFSIZE\tobject size in bytes\n"
369 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
370 }
371 
372 #define	LABEL_WIDTH	11
373 static void
374 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
375     size_t maxbuckets, size_t minbucketsize)
376 {
377 	uint64_t total;
378 	int buckets;
379 	int i;
380 	const int *distarray;
381 	int complete[2];
382 
383 	buckets = buffers_per_slab;
384 
385 	total = 0;
386 	for (i = 0; i <= buffers_per_slab; i++)
387 		total += ks_bucket[i];
388 
389 	if (maxbuckets > 1)
390 		buckets = MIN(buckets, maxbuckets);
391 
392 	if (minbucketsize > 1) {
393 		/*
394 		 * minbucketsize does not apply to the first bucket reserved
395 		 * for completely allocated slabs
396 		 */
397 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
398 		    minbucketsize));
399 		if ((buckets < 2) && (buffers_per_slab > 1)) {
400 			buckets = 2;
401 			minbucketsize = (buffers_per_slab - 1);
402 		}
403 	}
404 
405 	/*
406 	 * The first printed bucket is reserved for completely allocated slabs.
407 	 * Passing (buckets - 1) excludes that bucket from the generated
408 	 * distribution, since we're handling it as a special case.
409 	 */
410 	complete[0] = buffers_per_slab;
411 	complete[1] = buffers_per_slab + 1;
412 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
413 
414 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
415 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
416 
417 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
418 	/*
419 	 * Print bucket ranges in descending order after the first bucket for
420 	 * completely allocated slabs, so a person can see immediately whether
421 	 * or not there is fragmentation without having to scan possibly
422 	 * multiple screens of output. Starting at (buckets - 2) excludes the
423 	 * extra terminating bucket.
424 	 */
425 	for (i = buckets - 2; i >= 0; i--) {
426 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
427 	}
428 	mdb_printf("\n");
429 }
430 #undef LABEL_WIDTH
431 
432 /*ARGSUSED*/
433 static int
434 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
435 {
436 	*is_slab = B_TRUE;
437 	return (WALK_DONE);
438 }
439 
440 /*ARGSUSED*/
441 static int
442 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
443     boolean_t *is_slab)
444 {
445 	/*
446 	 * The "kmem_partial_slab" walker reports the first full slab if there
447 	 * are no partial slabs (for the sake of consumers that require at least
448 	 * one callback if there are any buffers in the cache).
449 	 */
450 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
451 	return (WALK_DONE);
452 }
453 
454 typedef struct kmem_slab_usage {
455 	int ksu_refcnt;			/* count of allocated buffers on slab */
456 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
457 } kmem_slab_usage_t;
458 
459 typedef struct kmem_slab_stats {
460 	const kmem_cache_t *ks_cp;
461 	int ks_slabs;			/* slabs in cache */
462 	int ks_partial_slabs;		/* partially allocated slabs in cache */
463 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
464 	int ks_max_buffers_per_slab;	/* max buffers per slab */
465 	int ks_usage_len;		/* ks_usage array length */
466 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
467 	uint_t *ks_bucket;		/* slab usage distribution */
468 } kmem_slab_stats_t;
469 
470 /*ARGSUSED*/
471 static int
472 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
473     kmem_slab_stats_t *ks)
474 {
475 	kmem_slab_usage_t *ksu;
476 	long unused;
477 
478 	ks->ks_slabs++;
479 	ks->ks_bucket[sp->slab_refcnt]++;
480 
481 	unused = (sp->slab_chunks - sp->slab_refcnt);
482 	if (unused == 0) {
483 		return (WALK_NEXT);
484 	}
485 
486 	ks->ks_partial_slabs++;
487 	ks->ks_unused_buffers += unused;
488 
489 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
490 		kmem_slab_usage_t *usage;
491 		int len = ks->ks_usage_len;
492 
493 		len = (len == 0 ? 16 : len * 2);
494 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
495 		if (ks->ks_usage != NULL) {
496 			bcopy(ks->ks_usage, usage,
497 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
498 			mdb_free(ks->ks_usage,
499 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
500 		}
501 		ks->ks_usage = usage;
502 		ks->ks_usage_len = len;
503 	}
504 
505 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
506 	ksu->ksu_refcnt = sp->slab_refcnt;
507 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
508 	return (WALK_NEXT);
509 }
510 
511 static void
512 kmem_slabs_header()
513 {
514 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
515 	    "", "", "Partial", "", "Unused", "");
516 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
518 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
519 	    "-------------------------", "--------", "--------", "---------",
520 	    "---------", "------");
521 }
522 
523 int
524 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
525 {
526 	kmem_cache_t c;
527 	kmem_slab_stats_t stats;
528 	mdb_walk_cb_t cb;
529 	int pct;
530 	int tenths_pct;
531 	size_t maxbuckets = 1;
532 	size_t minbucketsize = 0;
533 	const char *filter = NULL;
534 	const char *name = NULL;
535 	uint_t opt_v = FALSE;
536 	boolean_t buckets = B_FALSE;
537 	boolean_t skip = B_FALSE;
538 
539 	if (mdb_getopts(argc, argv,
540 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
541 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
542 	    'n', MDB_OPT_STR, &filter,
543 	    'N', MDB_OPT_STR, &name,
544 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
545 	    NULL) != argc) {
546 		return (DCMD_USAGE);
547 	}
548 
549 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
550 		buckets = B_TRUE;
551 	}
552 
553 	if (!(flags & DCMD_ADDRSPEC)) {
554 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
555 		    argv) == -1) {
556 			mdb_warn("can't walk kmem_cache");
557 			return (DCMD_ERR);
558 		}
559 		return (DCMD_OK);
560 	}
561 
562 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
563 		mdb_warn("couldn't read kmem_cache at %p", addr);
564 		return (DCMD_ERR);
565 	}
566 
567 	if (name == NULL) {
568 		skip = ((filter != NULL) &&
569 		    (strstr(c.cache_name, filter) == NULL));
570 	} else if (filter == NULL) {
571 		skip = (strcmp(c.cache_name, name) != 0);
572 	} else {
573 		/* match either -n or -N */
574 		skip = ((strcmp(c.cache_name, name) != 0) &&
575 		    (strstr(c.cache_name, filter) == NULL));
576 	}
577 
578 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
579 		kmem_slabs_header();
580 	} else if ((opt_v || buckets) && !skip) {
581 		if (DCMD_HDRSPEC(flags)) {
582 			kmem_slabs_header();
583 		} else {
584 			boolean_t is_slab = B_FALSE;
585 			const char *walker_name;
586 			if (opt_v) {
587 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
588 				walker_name = "kmem_slab_partial";
589 			} else {
590 				cb = (mdb_walk_cb_t)kmem_first_slab;
591 				walker_name = "kmem_slab";
592 			}
593 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
594 			if (is_slab) {
595 				kmem_slabs_header();
596 			}
597 		}
598 	}
599 
600 	if (skip) {
601 		return (DCMD_OK);
602 	}
603 
604 	bzero(&stats, sizeof (kmem_slab_stats_t));
605 	stats.ks_cp = &c;
606 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
607 	/* +1 to include a zero bucket */
608 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
609 	    sizeof (*stats.ks_bucket), UM_SLEEP);
610 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
611 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
612 
613 	if (c.cache_buftotal == 0) {
614 		pct = 0;
615 		tenths_pct = 0;
616 	} else {
617 		uint64_t n = stats.ks_unused_buffers * 10000;
618 		pct = (int)(n / c.cache_buftotal);
619 		tenths_pct = pct - ((pct / 100) * 100);
620 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
621 		if (tenths_pct == 10) {
622 			pct += 100;
623 			tenths_pct = 0;
624 		}
625 	}
626 
627 	pct /= 100;
628 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
629 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
630 	    stats.ks_unused_buffers, pct, tenths_pct);
631 
632 	if (maxbuckets == 0) {
633 		maxbuckets = stats.ks_max_buffers_per_slab;
634 	}
635 
636 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
637 	    (stats.ks_slabs > 0)) {
638 		mdb_printf("\n");
639 		kmem_slabs_print_dist(stats.ks_bucket,
640 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
641 	}
642 
643 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
644 	    sizeof (*stats.ks_bucket));
645 
646 	if (!opt_v) {
647 		return (DCMD_OK);
648 	}
649 
650 	if (opt_v && (stats.ks_partial_slabs > 0)) {
651 		int i;
652 		kmem_slab_usage_t *ksu;
653 
654 		mdb_printf("  %d complete, %d partial",
655 		    (stats.ks_slabs - stats.ks_partial_slabs),
656 		    stats.ks_partial_slabs);
657 		if (stats.ks_partial_slabs > 0) {
658 			mdb_printf(" (%d):", stats.ks_max_buffers_per_slab);
659 		}
660 		for (i = 0; i < stats.ks_partial_slabs; i++) {
661 			ksu = &stats.ks_usage[i];
662 			if (ksu->ksu_nomove) {
663 				const char *symbol = "*";
664 				mdb_printf(" %d%s", ksu->ksu_refcnt, symbol);
665 			} else {
666 				mdb_printf(" %d", ksu->ksu_refcnt);
667 			}
668 		}
669 		mdb_printf("\n\n");
670 	}
671 
672 	if (stats.ks_usage_len > 0) {
673 		mdb_free(stats.ks_usage,
674 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
675 	}
676 
677 	return (DCMD_OK);
678 }
679 
680 void
681 kmem_slabs_help(void)
682 {
683 	mdb_printf("%s",
684 "Display slab usage per kmem cache.\n\n");
685 	mdb_dec_indent(2);
686 	mdb_printf("%<b>OPTIONS%</b>\n");
687 	mdb_inc_indent(2);
688 	mdb_printf("%s",
689 "  -n name\n"
690 "        name of kmem cache (or matching partial name)\n"
691 "  -N name\n"
692 "        exact name of kmem cache\n"
693 "  -b maxbins\n"
694 "        Print a distribution of allocated buffers per slab using at\n"
695 "        most maxbins bins. The first bin is reserved for completely\n"
696 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
697 "        effect as specifying the maximum allocated buffers per slab\n"
698 "        or setting minbinsize to 1 (-B 1).\n"
699 "  -B minbinsize\n"
700 "        Print a distribution of allocated buffers per slab, making\n"
701 "        all bins (except the first, reserved for completely allocated\n"
702 "        slabs) at least minbinsize buffers apart.\n"
703 "  -v    verbose output: List the allocated buffer count of each partial\n"
704 "        slab on the free list in order from front to back to show how\n"
705 "        closely the slabs are ordered by usage. For example\n"
706 "\n"
707 "          10 complete, 3 partial (8): 7 3 1\n"
708 "\n"
709 "        means there are thirteen slabs with eight buffers each, including\n"
710 "        three partially allocated slabs with less than all eight buffers\n"
711 "        allocated.\n"
712 "\n"
713 "        Buffer allocations are always from the front of the partial slab\n"
714 "        list. When a buffer is freed from a completely used slab, that\n"
715 "        slab is added to the front of the partial slab list. Assuming\n"
716 "        that all buffers are equally likely to be freed soon, the\n"
717 "        desired order of partial slabs is most-used at the front of the\n"
718 "        list and least-used at the back (as in the example above).\n"
719 "        However, if a slab contains an allocated buffer that will not\n"
720 "        soon be freed, it would be better for that slab to be at the\n"
721 "        front where all of its buffers can be allocated. Taking a slab\n"
722 "        off the partial slab list (either with all buffers freed or all\n"
723 "        buffers allocated) reduces cache fragmentation.\n"
724 "\n"
725 "        A slab's allocated buffer count representing a partial slab (9 in\n"
726 "        the example below) may be marked as follows:\n"
727 "\n"
728 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
729 "        reclaimable because the kmem client refused to move one of the\n"
730 "        slab's buffers. Since kmem does not expect to completely free the\n"
731 "        slab, it moves it to the front of the list in the hope of\n"
732 "        completely allocating it instead. A slab marked with an asterisk\n"
733 "        stays marked for as long as it remains on the partial slab list.\n"
734 "\n"
735 "Column\t\tDescription\n"
736 "\n"
737 "Cache Name\t\tname of kmem cache\n"
738 "Slabs\t\t\ttotal slab count\n"
739 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
740 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
741 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
742 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
743 "\t\t\t  for accounting structures (debug mode), slab\n"
744 "\t\t\t  coloring (incremental small offsets to stagger\n"
745 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
746 }
747 
748 static int
749 addrcmp(const void *lhs, const void *rhs)
750 {
751 	uintptr_t p1 = *((uintptr_t *)lhs);
752 	uintptr_t p2 = *((uintptr_t *)rhs);
753 
754 	if (p1 < p2)
755 		return (-1);
756 	if (p1 > p2)
757 		return (1);
758 	return (0);
759 }
760 
761 static int
762 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
763 {
764 	const kmem_bufctl_audit_t *bcp1 = *lhs;
765 	const kmem_bufctl_audit_t *bcp2 = *rhs;
766 
767 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
768 		return (-1);
769 
770 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
771 		return (1);
772 
773 	return (0);
774 }
775 
776 typedef struct kmem_hash_walk {
777 	uintptr_t *kmhw_table;
778 	size_t kmhw_nelems;
779 	size_t kmhw_pos;
780 	kmem_bufctl_t kmhw_cur;
781 } kmem_hash_walk_t;
782 
783 int
784 kmem_hash_walk_init(mdb_walk_state_t *wsp)
785 {
786 	kmem_hash_walk_t *kmhw;
787 	uintptr_t *hash;
788 	kmem_cache_t c;
789 	uintptr_t haddr, addr = wsp->walk_addr;
790 	size_t nelems;
791 	size_t hsize;
792 
793 	if (addr == NULL) {
794 		mdb_warn("kmem_hash doesn't support global walks\n");
795 		return (WALK_ERR);
796 	}
797 
798 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
799 		mdb_warn("couldn't read cache at addr %p", addr);
800 		return (WALK_ERR);
801 	}
802 
803 	if (!(c.cache_flags & KMF_HASH)) {
804 		mdb_warn("cache %p doesn't have a hash table\n", addr);
805 		return (WALK_DONE);		/* nothing to do */
806 	}
807 
808 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
809 	kmhw->kmhw_cur.bc_next = NULL;
810 	kmhw->kmhw_pos = 0;
811 
812 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
813 	hsize = nelems * sizeof (uintptr_t);
814 	haddr = (uintptr_t)c.cache_hash_table;
815 
816 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
817 	if (mdb_vread(hash, hsize, haddr) == -1) {
818 		mdb_warn("failed to read hash table at %p", haddr);
819 		mdb_free(hash, hsize);
820 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
821 		return (WALK_ERR);
822 	}
823 
824 	wsp->walk_data = kmhw;
825 
826 	return (WALK_NEXT);
827 }
828 
829 int
830 kmem_hash_walk_step(mdb_walk_state_t *wsp)
831 {
832 	kmem_hash_walk_t *kmhw = wsp->walk_data;
833 	uintptr_t addr = NULL;
834 
835 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
836 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
837 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
838 				break;
839 		}
840 	}
841 	if (addr == NULL)
842 		return (WALK_DONE);
843 
844 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
845 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
846 		return (WALK_ERR);
847 	}
848 
849 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
850 }
851 
852 void
853 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
854 {
855 	kmem_hash_walk_t *kmhw = wsp->walk_data;
856 
857 	if (kmhw == NULL)
858 		return;
859 
860 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
861 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
862 }
863 
864 /*
865  * Find the address of the bufctl structure for the address 'buf' in cache
866  * 'cp', which is at address caddr, and place it in *out.
867  */
868 static int
869 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
870 {
871 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
872 	kmem_bufctl_t *bcp;
873 	kmem_bufctl_t bc;
874 
875 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
876 		mdb_warn("unable to read hash bucket for %p in cache %p",
877 		    buf, caddr);
878 		return (-1);
879 	}
880 
881 	while (bcp != NULL) {
882 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
883 		    (uintptr_t)bcp) == -1) {
884 			mdb_warn("unable to read bufctl at %p", bcp);
885 			return (-1);
886 		}
887 		if (bc.bc_addr == buf) {
888 			*out = (uintptr_t)bcp;
889 			return (0);
890 		}
891 		bcp = bc.bc_next;
892 	}
893 
894 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
895 	return (-1);
896 }
897 
898 int
899 kmem_get_magsize(const kmem_cache_t *cp)
900 {
901 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
902 	GElf_Sym mt_sym;
903 	kmem_magtype_t mt;
904 	int res;
905 
906 	/*
907 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
908 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
909 	 * it is okay to return 0 for them.
910 	 */
911 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
912 	    (cp->cache_flags & KMF_NOMAGAZINE))
913 		return (res);
914 
915 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
916 		mdb_warn("unable to read 'kmem_magtype'");
917 	} else if (addr < mt_sym.st_value ||
918 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
919 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
920 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
921 		    cp->cache_name, addr);
922 		return (0);
923 	}
924 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
925 		mdb_warn("unable to read magtype at %a", addr);
926 		return (0);
927 	}
928 	return (mt.mt_magsize);
929 }
930 
931 /*ARGSUSED*/
932 static int
933 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
934 {
935 	*est -= (sp->slab_chunks - sp->slab_refcnt);
936 
937 	return (WALK_NEXT);
938 }
939 
940 /*
941  * Returns an upper bound on the number of allocated buffers in a given
942  * cache.
943  */
944 size_t
945 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
946 {
947 	int magsize;
948 	size_t cache_est;
949 
950 	cache_est = cp->cache_buftotal;
951 
952 	(void) mdb_pwalk("kmem_slab_partial",
953 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
954 
955 	if ((magsize = kmem_get_magsize(cp)) != 0) {
956 		size_t mag_est = cp->cache_full.ml_total * magsize;
957 
958 		if (cache_est >= mag_est) {
959 			cache_est -= mag_est;
960 		} else {
961 			mdb_warn("cache %p's magazine layer holds more buffers "
962 			    "than the slab layer.\n", addr);
963 		}
964 	}
965 	return (cache_est);
966 }
967 
968 #define	READMAG_ROUNDS(rounds) { \
969 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
970 		mdb_warn("couldn't read magazine at %p", kmp); \
971 		goto fail; \
972 	} \
973 	for (i = 0; i < rounds; i++) { \
974 		maglist[magcnt++] = mp->mag_round[i]; \
975 		if (magcnt == magmax) { \
976 			mdb_warn("%d magazines exceeds fudge factor\n", \
977 			    magcnt); \
978 			goto fail; \
979 		} \
980 	} \
981 }
982 
983 int
984 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
985     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
986 {
987 	kmem_magazine_t *kmp, *mp;
988 	void **maglist = NULL;
989 	int i, cpu;
990 	size_t magsize, magmax, magbsize;
991 	size_t magcnt = 0;
992 
993 	/*
994 	 * Read the magtype out of the cache, after verifying the pointer's
995 	 * correctness.
996 	 */
997 	magsize = kmem_get_magsize(cp);
998 	if (magsize == 0) {
999 		*maglistp = NULL;
1000 		*magcntp = 0;
1001 		*magmaxp = 0;
1002 		return (WALK_NEXT);
1003 	}
1004 
1005 	/*
1006 	 * There are several places where we need to go buffer hunting:
1007 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1008 	 * and the full magazine list in the depot.
1009 	 *
1010 	 * For an upper bound on the number of buffers in the magazine
1011 	 * layer, we have the number of magazines on the cache_full
1012 	 * list plus at most two magazines per CPU (the loaded and the
1013 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1014 	 * is live (the number "100" comes from the same fudge factor in
1015 	 * crash(1M)).
1016 	 */
1017 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1018 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1019 
1020 	if (magbsize >= PAGESIZE / 2) {
1021 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1022 		    addr, magbsize);
1023 		return (WALK_ERR);
1024 	}
1025 
1026 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1027 	mp = mdb_alloc(magbsize, alloc_flags);
1028 	if (mp == NULL || maglist == NULL)
1029 		goto fail;
1030 
1031 	/*
1032 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1033 	 */
1034 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1035 		READMAG_ROUNDS(magsize);
1036 		kmp = mp->mag_next;
1037 
1038 		if (kmp == cp->cache_full.ml_list)
1039 			break; /* cache_full list loop detected */
1040 	}
1041 
1042 	dprintf(("cache_full list done\n"));
1043 
1044 	/*
1045 	 * Now whip through the CPUs, snagging the loaded magazines
1046 	 * and full spares.
1047 	 */
1048 	for (cpu = 0; cpu < ncpus; cpu++) {
1049 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1050 
1051 		dprintf(("reading cpu cache %p\n",
1052 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1053 
1054 		if (ccp->cc_rounds > 0 &&
1055 		    (kmp = ccp->cc_loaded) != NULL) {
1056 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
1057 			READMAG_ROUNDS(ccp->cc_rounds);
1058 		}
1059 
1060 		if (ccp->cc_prounds > 0 &&
1061 		    (kmp = ccp->cc_ploaded) != NULL) {
1062 			dprintf(("reading %d previously loaded rounds\n",
1063 			    ccp->cc_prounds));
1064 			READMAG_ROUNDS(ccp->cc_prounds);
1065 		}
1066 	}
1067 
1068 	dprintf(("magazine layer: %d buffers\n", magcnt));
1069 
1070 	if (!(alloc_flags & UM_GC))
1071 		mdb_free(mp, magbsize);
1072 
1073 	*maglistp = maglist;
1074 	*magcntp = magcnt;
1075 	*magmaxp = magmax;
1076 
1077 	return (WALK_NEXT);
1078 
1079 fail:
1080 	if (!(alloc_flags & UM_GC)) {
1081 		if (mp)
1082 			mdb_free(mp, magbsize);
1083 		if (maglist)
1084 			mdb_free(maglist, magmax * sizeof (void *));
1085 	}
1086 	return (WALK_ERR);
1087 }
1088 
1089 static int
1090 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1091 {
1092 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1093 }
1094 
1095 static int
1096 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1097 {
1098 	kmem_bufctl_audit_t b;
1099 
1100 	/*
1101 	 * if KMF_AUDIT is not set, we know that we're looking at a
1102 	 * kmem_bufctl_t.
1103 	 */
1104 	if (!(cp->cache_flags & KMF_AUDIT) ||
1105 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1106 		(void) memset(&b, 0, sizeof (b));
1107 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1108 			mdb_warn("unable to read bufctl at %p", buf);
1109 			return (WALK_ERR);
1110 		}
1111 	}
1112 
1113 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1114 }
1115 
1116 typedef struct kmem_walk {
1117 	int kmw_type;
1118 
1119 	int kmw_addr;			/* cache address */
1120 	kmem_cache_t *kmw_cp;
1121 	size_t kmw_csize;
1122 
1123 	/*
1124 	 * magazine layer
1125 	 */
1126 	void **kmw_maglist;
1127 	size_t kmw_max;
1128 	size_t kmw_count;
1129 	size_t kmw_pos;
1130 
1131 	/*
1132 	 * slab layer
1133 	 */
1134 	char *kmw_valid;	/* to keep track of freed buffers */
1135 	char *kmw_ubase;	/* buffer for slab data */
1136 } kmem_walk_t;
1137 
1138 static int
1139 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1140 {
1141 	kmem_walk_t *kmw;
1142 	int ncpus, csize;
1143 	kmem_cache_t *cp;
1144 	size_t vm_quantum;
1145 
1146 	size_t magmax, magcnt;
1147 	void **maglist = NULL;
1148 	uint_t chunksize, slabsize;
1149 	int status = WALK_ERR;
1150 	uintptr_t addr = wsp->walk_addr;
1151 	const char *layered;
1152 
1153 	type &= ~KM_HASH;
1154 
1155 	if (addr == NULL) {
1156 		mdb_warn("kmem walk doesn't support global walks\n");
1157 		return (WALK_ERR);
1158 	}
1159 
1160 	dprintf(("walking %p\n", addr));
1161 
1162 	/*
1163 	 * First we need to figure out how many CPUs are configured in the
1164 	 * system to know how much to slurp out.
1165 	 */
1166 	mdb_readvar(&ncpus, "max_ncpus");
1167 
1168 	csize = KMEM_CACHE_SIZE(ncpus);
1169 	cp = mdb_alloc(csize, UM_SLEEP);
1170 
1171 	if (mdb_vread(cp, csize, addr) == -1) {
1172 		mdb_warn("couldn't read cache at addr %p", addr);
1173 		goto out2;
1174 	}
1175 
1176 	/*
1177 	 * It's easy for someone to hand us an invalid cache address.
1178 	 * Unfortunately, it is hard for this walker to survive an
1179 	 * invalid cache cleanly.  So we make sure that:
1180 	 *
1181 	 *	1. the vmem arena for the cache is readable,
1182 	 *	2. the vmem arena's quantum is a power of 2,
1183 	 *	3. our slabsize is a multiple of the quantum, and
1184 	 *	4. our chunksize is >0 and less than our slabsize.
1185 	 */
1186 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1187 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1188 	    vm_quantum == 0 ||
1189 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1190 	    cp->cache_slabsize < vm_quantum ||
1191 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1192 	    cp->cache_chunksize == 0 ||
1193 	    cp->cache_chunksize > cp->cache_slabsize) {
1194 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1195 		goto out2;
1196 	}
1197 
1198 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1199 
1200 	if (cp->cache_buftotal == 0) {
1201 		mdb_free(cp, csize);
1202 		return (WALK_DONE);
1203 	}
1204 
1205 	/*
1206 	 * If they ask for bufctls, but it's a small-slab cache,
1207 	 * there is nothing to report.
1208 	 */
1209 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1210 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1211 		    cp->cache_flags));
1212 		mdb_free(cp, csize);
1213 		return (WALK_DONE);
1214 	}
1215 
1216 	/*
1217 	 * If they want constructed buffers, but there's no constructor or
1218 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1219 	 */
1220 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1221 	    cp->cache_constructor == NULL ||
1222 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1223 		mdb_free(cp, csize);
1224 		return (WALK_DONE);
1225 	}
1226 
1227 	/*
1228 	 * Read in the contents of the magazine layer
1229 	 */
1230 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1231 	    &magmax, UM_SLEEP) == WALK_ERR)
1232 		goto out2;
1233 
1234 	/*
1235 	 * We have all of the buffers from the magazines;  if we are walking
1236 	 * allocated buffers, sort them so we can bsearch them later.
1237 	 */
1238 	if (type & KM_ALLOCATED)
1239 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1240 
1241 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1242 
1243 	kmw->kmw_type = type;
1244 	kmw->kmw_addr = addr;
1245 	kmw->kmw_cp = cp;
1246 	kmw->kmw_csize = csize;
1247 	kmw->kmw_maglist = maglist;
1248 	kmw->kmw_max = magmax;
1249 	kmw->kmw_count = magcnt;
1250 	kmw->kmw_pos = 0;
1251 
1252 	/*
1253 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1254 	 * hash table instead of the slab layer.
1255 	 */
1256 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1257 		layered = "kmem_hash";
1258 
1259 		kmw->kmw_type |= KM_HASH;
1260 	} else {
1261 		/*
1262 		 * If we are walking freed buffers, we only need the
1263 		 * magazine layer plus the partially allocated slabs.
1264 		 * To walk allocated buffers, we need all of the slabs.
1265 		 */
1266 		if (type & KM_ALLOCATED)
1267 			layered = "kmem_slab";
1268 		else
1269 			layered = "kmem_slab_partial";
1270 
1271 		/*
1272 		 * for small-slab caches, we read in the entire slab.  For
1273 		 * freed buffers, we can just walk the freelist.  For
1274 		 * allocated buffers, we use a 'valid' array to track
1275 		 * the freed buffers.
1276 		 */
1277 		if (!(cp->cache_flags & KMF_HASH)) {
1278 			chunksize = cp->cache_chunksize;
1279 			slabsize = cp->cache_slabsize;
1280 
1281 			kmw->kmw_ubase = mdb_alloc(slabsize +
1282 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1283 
1284 			if (type & KM_ALLOCATED)
1285 				kmw->kmw_valid =
1286 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1287 		}
1288 	}
1289 
1290 	status = WALK_NEXT;
1291 
1292 	if (mdb_layered_walk(layered, wsp) == -1) {
1293 		mdb_warn("unable to start layered '%s' walk", layered);
1294 		status = WALK_ERR;
1295 	}
1296 
1297 out1:
1298 	if (status == WALK_ERR) {
1299 		if (kmw->kmw_valid)
1300 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1301 
1302 		if (kmw->kmw_ubase)
1303 			mdb_free(kmw->kmw_ubase, slabsize +
1304 			    sizeof (kmem_bufctl_t));
1305 
1306 		if (kmw->kmw_maglist)
1307 			mdb_free(kmw->kmw_maglist,
1308 			    kmw->kmw_max * sizeof (uintptr_t));
1309 
1310 		mdb_free(kmw, sizeof (kmem_walk_t));
1311 		wsp->walk_data = NULL;
1312 	}
1313 
1314 out2:
1315 	if (status == WALK_ERR)
1316 		mdb_free(cp, csize);
1317 
1318 	return (status);
1319 }
1320 
1321 int
1322 kmem_walk_step(mdb_walk_state_t *wsp)
1323 {
1324 	kmem_walk_t *kmw = wsp->walk_data;
1325 	int type = kmw->kmw_type;
1326 	kmem_cache_t *cp = kmw->kmw_cp;
1327 
1328 	void **maglist = kmw->kmw_maglist;
1329 	int magcnt = kmw->kmw_count;
1330 
1331 	uintptr_t chunksize, slabsize;
1332 	uintptr_t addr;
1333 	const kmem_slab_t *sp;
1334 	const kmem_bufctl_t *bcp;
1335 	kmem_bufctl_t bc;
1336 
1337 	int chunks;
1338 	char *kbase;
1339 	void *buf;
1340 	int i, ret;
1341 
1342 	char *valid, *ubase;
1343 
1344 	/*
1345 	 * first, handle the 'kmem_hash' layered walk case
1346 	 */
1347 	if (type & KM_HASH) {
1348 		/*
1349 		 * We have a buffer which has been allocated out of the
1350 		 * global layer. We need to make sure that it's not
1351 		 * actually sitting in a magazine before we report it as
1352 		 * an allocated buffer.
1353 		 */
1354 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1355 
1356 		if (magcnt > 0 &&
1357 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1358 		    addrcmp) != NULL)
1359 			return (WALK_NEXT);
1360 
1361 		if (type & KM_BUFCTL)
1362 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1363 
1364 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1365 	}
1366 
1367 	ret = WALK_NEXT;
1368 
1369 	addr = kmw->kmw_addr;
1370 
1371 	/*
1372 	 * If we're walking freed buffers, report everything in the
1373 	 * magazine layer before processing the first slab.
1374 	 */
1375 	if ((type & KM_FREE) && magcnt != 0) {
1376 		kmw->kmw_count = 0;		/* only do this once */
1377 		for (i = 0; i < magcnt; i++) {
1378 			buf = maglist[i];
1379 
1380 			if (type & KM_BUFCTL) {
1381 				uintptr_t out;
1382 
1383 				if (cp->cache_flags & KMF_BUFTAG) {
1384 					kmem_buftag_t *btp;
1385 					kmem_buftag_t tag;
1386 
1387 					/* LINTED - alignment */
1388 					btp = KMEM_BUFTAG(cp, buf);
1389 					if (mdb_vread(&tag, sizeof (tag),
1390 					    (uintptr_t)btp) == -1) {
1391 						mdb_warn("reading buftag for "
1392 						    "%p at %p", buf, btp);
1393 						continue;
1394 					}
1395 					out = (uintptr_t)tag.bt_bufctl;
1396 				} else {
1397 					if (kmem_hash_lookup(cp, addr, buf,
1398 					    &out) == -1)
1399 						continue;
1400 				}
1401 				ret = bufctl_walk_callback(cp, wsp, out);
1402 			} else {
1403 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1404 			}
1405 
1406 			if (ret != WALK_NEXT)
1407 				return (ret);
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * If they want constructed buffers, we're finished, since the
1413 	 * magazine layer holds them all.
1414 	 */
1415 	if (type & KM_CONSTRUCTED)
1416 		return (WALK_DONE);
1417 
1418 	/*
1419 	 * Handle the buffers in the current slab
1420 	 */
1421 	chunksize = cp->cache_chunksize;
1422 	slabsize = cp->cache_slabsize;
1423 
1424 	sp = wsp->walk_layer;
1425 	chunks = sp->slab_chunks;
1426 	kbase = sp->slab_base;
1427 
1428 	dprintf(("kbase is %p\n", kbase));
1429 
1430 	if (!(cp->cache_flags & KMF_HASH)) {
1431 		valid = kmw->kmw_valid;
1432 		ubase = kmw->kmw_ubase;
1433 
1434 		if (mdb_vread(ubase, chunks * chunksize,
1435 		    (uintptr_t)kbase) == -1) {
1436 			mdb_warn("failed to read slab contents at %p", kbase);
1437 			return (WALK_ERR);
1438 		}
1439 
1440 		/*
1441 		 * Set up the valid map as fully allocated -- we'll punch
1442 		 * out the freelist.
1443 		 */
1444 		if (type & KM_ALLOCATED)
1445 			(void) memset(valid, 1, chunks);
1446 	} else {
1447 		valid = NULL;
1448 		ubase = NULL;
1449 	}
1450 
1451 	/*
1452 	 * walk the slab's freelist
1453 	 */
1454 	bcp = sp->slab_head;
1455 
1456 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1457 
1458 	/*
1459 	 * since we could be in the middle of allocating a buffer,
1460 	 * our refcnt could be one higher than it aught.  So we
1461 	 * check one further on the freelist than the count allows.
1462 	 */
1463 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1464 		uint_t ndx;
1465 
1466 		dprintf(("bcp is %p\n", bcp));
1467 
1468 		if (bcp == NULL) {
1469 			if (i == chunks)
1470 				break;
1471 			mdb_warn(
1472 			    "slab %p in cache %p freelist too short by %d\n",
1473 			    sp, addr, chunks - i);
1474 			break;
1475 		}
1476 
1477 		if (cp->cache_flags & KMF_HASH) {
1478 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1479 				mdb_warn("failed to read bufctl ptr at %p",
1480 				    bcp);
1481 				break;
1482 			}
1483 			buf = bc.bc_addr;
1484 		} else {
1485 			/*
1486 			 * Otherwise the buffer is in the slab which
1487 			 * we've read in;  we just need to determine
1488 			 * its offset in the slab to find the
1489 			 * kmem_bufctl_t.
1490 			 */
1491 			bc = *((kmem_bufctl_t *)
1492 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1493 			    (uintptr_t)ubase));
1494 
1495 			buf = KMEM_BUF(cp, bcp);
1496 		}
1497 
1498 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1499 
1500 		if (ndx > slabsize / cp->cache_bufsize) {
1501 			/*
1502 			 * This is very wrong; we have managed to find
1503 			 * a buffer in the slab which shouldn't
1504 			 * actually be here.  Emit a warning, and
1505 			 * try to continue.
1506 			 */
1507 			mdb_warn("buf %p is out of range for "
1508 			    "slab %p, cache %p\n", buf, sp, addr);
1509 		} else if (type & KM_ALLOCATED) {
1510 			/*
1511 			 * we have found a buffer on the slab's freelist;
1512 			 * clear its entry
1513 			 */
1514 			valid[ndx] = 0;
1515 		} else {
1516 			/*
1517 			 * Report this freed buffer
1518 			 */
1519 			if (type & KM_BUFCTL) {
1520 				ret = bufctl_walk_callback(cp, wsp,
1521 				    (uintptr_t)bcp);
1522 			} else {
1523 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1524 			}
1525 			if (ret != WALK_NEXT)
1526 				return (ret);
1527 		}
1528 
1529 		bcp = bc.bc_next;
1530 	}
1531 
1532 	if (bcp != NULL) {
1533 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1534 		    sp, addr, bcp));
1535 	}
1536 
1537 	/*
1538 	 * If we are walking freed buffers, the loop above handled reporting
1539 	 * them.
1540 	 */
1541 	if (type & KM_FREE)
1542 		return (WALK_NEXT);
1543 
1544 	if (type & KM_BUFCTL) {
1545 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1546 		    "cache %p\n", addr);
1547 		return (WALK_ERR);
1548 	}
1549 
1550 	/*
1551 	 * Report allocated buffers, skipping buffers in the magazine layer.
1552 	 * We only get this far for small-slab caches.
1553 	 */
1554 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1555 		buf = (char *)kbase + i * chunksize;
1556 
1557 		if (!valid[i])
1558 			continue;		/* on slab freelist */
1559 
1560 		if (magcnt > 0 &&
1561 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1562 		    addrcmp) != NULL)
1563 			continue;		/* in magazine layer */
1564 
1565 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1566 	}
1567 	return (ret);
1568 }
1569 
1570 void
1571 kmem_walk_fini(mdb_walk_state_t *wsp)
1572 {
1573 	kmem_walk_t *kmw = wsp->walk_data;
1574 	uintptr_t chunksize;
1575 	uintptr_t slabsize;
1576 
1577 	if (kmw == NULL)
1578 		return;
1579 
1580 	if (kmw->kmw_maglist != NULL)
1581 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1582 
1583 	chunksize = kmw->kmw_cp->cache_chunksize;
1584 	slabsize = kmw->kmw_cp->cache_slabsize;
1585 
1586 	if (kmw->kmw_valid != NULL)
1587 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1588 	if (kmw->kmw_ubase != NULL)
1589 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1590 
1591 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1592 	mdb_free(kmw, sizeof (kmem_walk_t));
1593 }
1594 
1595 /*ARGSUSED*/
1596 static int
1597 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1598 {
1599 	/*
1600 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1601 	 * memory in other caches.  This can be a little confusing, so we
1602 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1603 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1604 	 */
1605 	if (c->cache_cflags & KMC_NOTOUCH)
1606 		return (WALK_NEXT);
1607 
1608 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1609 	    wsp->walk_cbdata, addr) == -1)
1610 		return (WALK_DONE);
1611 
1612 	return (WALK_NEXT);
1613 }
1614 
1615 #define	KMEM_WALK_ALL(name, wsp) { \
1616 	wsp->walk_data = (name); \
1617 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1618 		return (WALK_ERR); \
1619 	return (WALK_DONE); \
1620 }
1621 
1622 int
1623 kmem_walk_init(mdb_walk_state_t *wsp)
1624 {
1625 	if (wsp->walk_arg != NULL)
1626 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1627 
1628 	if (wsp->walk_addr == NULL)
1629 		KMEM_WALK_ALL("kmem", wsp);
1630 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1631 }
1632 
1633 int
1634 bufctl_walk_init(mdb_walk_state_t *wsp)
1635 {
1636 	if (wsp->walk_addr == NULL)
1637 		KMEM_WALK_ALL("bufctl", wsp);
1638 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1639 }
1640 
1641 int
1642 freemem_walk_init(mdb_walk_state_t *wsp)
1643 {
1644 	if (wsp->walk_addr == NULL)
1645 		KMEM_WALK_ALL("freemem", wsp);
1646 	return (kmem_walk_init_common(wsp, KM_FREE));
1647 }
1648 
1649 int
1650 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1651 {
1652 	if (wsp->walk_addr == NULL)
1653 		KMEM_WALK_ALL("freemem_constructed", wsp);
1654 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1655 }
1656 
1657 int
1658 freectl_walk_init(mdb_walk_state_t *wsp)
1659 {
1660 	if (wsp->walk_addr == NULL)
1661 		KMEM_WALK_ALL("freectl", wsp);
1662 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1663 }
1664 
1665 int
1666 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1667 {
1668 	if (wsp->walk_addr == NULL)
1669 		KMEM_WALK_ALL("freectl_constructed", wsp);
1670 	return (kmem_walk_init_common(wsp,
1671 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1672 }
1673 
1674 typedef struct bufctl_history_walk {
1675 	void		*bhw_next;
1676 	kmem_cache_t	*bhw_cache;
1677 	kmem_slab_t	*bhw_slab;
1678 	hrtime_t	bhw_timestamp;
1679 } bufctl_history_walk_t;
1680 
1681 int
1682 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1683 {
1684 	bufctl_history_walk_t *bhw;
1685 	kmem_bufctl_audit_t bc;
1686 	kmem_bufctl_audit_t bcn;
1687 
1688 	if (wsp->walk_addr == NULL) {
1689 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1690 		return (WALK_ERR);
1691 	}
1692 
1693 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1694 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1695 		return (WALK_ERR);
1696 	}
1697 
1698 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1699 	bhw->bhw_timestamp = 0;
1700 	bhw->bhw_cache = bc.bc_cache;
1701 	bhw->bhw_slab = bc.bc_slab;
1702 
1703 	/*
1704 	 * sometimes the first log entry matches the base bufctl;  in that
1705 	 * case, skip the base bufctl.
1706 	 */
1707 	if (bc.bc_lastlog != NULL &&
1708 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1709 	    bc.bc_addr == bcn.bc_addr &&
1710 	    bc.bc_cache == bcn.bc_cache &&
1711 	    bc.bc_slab == bcn.bc_slab &&
1712 	    bc.bc_timestamp == bcn.bc_timestamp &&
1713 	    bc.bc_thread == bcn.bc_thread)
1714 		bhw->bhw_next = bc.bc_lastlog;
1715 	else
1716 		bhw->bhw_next = (void *)wsp->walk_addr;
1717 
1718 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1719 	wsp->walk_data = bhw;
1720 
1721 	return (WALK_NEXT);
1722 }
1723 
1724 int
1725 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1726 {
1727 	bufctl_history_walk_t *bhw = wsp->walk_data;
1728 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1729 	uintptr_t baseaddr = wsp->walk_addr;
1730 	kmem_bufctl_audit_t bc;
1731 
1732 	if (addr == NULL)
1733 		return (WALK_DONE);
1734 
1735 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1736 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1737 		return (WALK_ERR);
1738 	}
1739 
1740 	/*
1741 	 * The bufctl is only valid if the address, cache, and slab are
1742 	 * correct.  We also check that the timestamp is decreasing, to
1743 	 * prevent infinite loops.
1744 	 */
1745 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1746 	    bc.bc_cache != bhw->bhw_cache ||
1747 	    bc.bc_slab != bhw->bhw_slab ||
1748 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1749 		return (WALK_DONE);
1750 
1751 	bhw->bhw_next = bc.bc_lastlog;
1752 	bhw->bhw_timestamp = bc.bc_timestamp;
1753 
1754 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1755 }
1756 
1757 void
1758 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1759 {
1760 	bufctl_history_walk_t *bhw = wsp->walk_data;
1761 
1762 	mdb_free(bhw, sizeof (*bhw));
1763 }
1764 
1765 typedef struct kmem_log_walk {
1766 	kmem_bufctl_audit_t *klw_base;
1767 	kmem_bufctl_audit_t **klw_sorted;
1768 	kmem_log_header_t klw_lh;
1769 	size_t klw_size;
1770 	size_t klw_maxndx;
1771 	size_t klw_ndx;
1772 } kmem_log_walk_t;
1773 
1774 int
1775 kmem_log_walk_init(mdb_walk_state_t *wsp)
1776 {
1777 	uintptr_t lp = wsp->walk_addr;
1778 	kmem_log_walk_t *klw;
1779 	kmem_log_header_t *lhp;
1780 	int maxndx, i, j, k;
1781 
1782 	/*
1783 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1784 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1785 	 */
1786 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1787 		mdb_warn("failed to read 'kmem_transaction_log'");
1788 		return (WALK_ERR);
1789 	}
1790 
1791 	if (lp == NULL) {
1792 		mdb_warn("log is disabled\n");
1793 		return (WALK_ERR);
1794 	}
1795 
1796 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1797 	lhp = &klw->klw_lh;
1798 
1799 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1800 		mdb_warn("failed to read log header at %p", lp);
1801 		mdb_free(klw, sizeof (kmem_log_walk_t));
1802 		return (WALK_ERR);
1803 	}
1804 
1805 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1806 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1807 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1808 
1809 	if (mdb_vread(klw->klw_base, klw->klw_size,
1810 	    (uintptr_t)lhp->lh_base) == -1) {
1811 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1812 		mdb_free(klw->klw_base, klw->klw_size);
1813 		mdb_free(klw, sizeof (kmem_log_walk_t));
1814 		return (WALK_ERR);
1815 	}
1816 
1817 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1818 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1819 
1820 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1821 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1822 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1823 
1824 		for (j = 0; j < maxndx; j++)
1825 			klw->klw_sorted[k++] = &chunk[j];
1826 	}
1827 
1828 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1829 	    (int(*)(const void *, const void *))bufctlcmp);
1830 
1831 	klw->klw_maxndx = k;
1832 	wsp->walk_data = klw;
1833 
1834 	return (WALK_NEXT);
1835 }
1836 
1837 int
1838 kmem_log_walk_step(mdb_walk_state_t *wsp)
1839 {
1840 	kmem_log_walk_t *klw = wsp->walk_data;
1841 	kmem_bufctl_audit_t *bcp;
1842 
1843 	if (klw->klw_ndx == klw->klw_maxndx)
1844 		return (WALK_DONE);
1845 
1846 	bcp = klw->klw_sorted[klw->klw_ndx++];
1847 
1848 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1849 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1850 }
1851 
1852 void
1853 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1854 {
1855 	kmem_log_walk_t *klw = wsp->walk_data;
1856 
1857 	mdb_free(klw->klw_base, klw->klw_size);
1858 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1859 	    sizeof (kmem_bufctl_audit_t *));
1860 	mdb_free(klw, sizeof (kmem_log_walk_t));
1861 }
1862 
1863 typedef struct allocdby_bufctl {
1864 	uintptr_t abb_addr;
1865 	hrtime_t abb_ts;
1866 } allocdby_bufctl_t;
1867 
1868 typedef struct allocdby_walk {
1869 	const char *abw_walk;
1870 	uintptr_t abw_thread;
1871 	size_t abw_nbufs;
1872 	size_t abw_size;
1873 	allocdby_bufctl_t *abw_buf;
1874 	size_t abw_ndx;
1875 } allocdby_walk_t;
1876 
1877 int
1878 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1879     allocdby_walk_t *abw)
1880 {
1881 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1882 		return (WALK_NEXT);
1883 
1884 	if (abw->abw_nbufs == abw->abw_size) {
1885 		allocdby_bufctl_t *buf;
1886 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1887 
1888 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1889 
1890 		bcopy(abw->abw_buf, buf, oldsize);
1891 		mdb_free(abw->abw_buf, oldsize);
1892 
1893 		abw->abw_size <<= 1;
1894 		abw->abw_buf = buf;
1895 	}
1896 
1897 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1898 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1899 	abw->abw_nbufs++;
1900 
1901 	return (WALK_NEXT);
1902 }
1903 
1904 /*ARGSUSED*/
1905 int
1906 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1907 {
1908 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1909 	    abw, addr) == -1) {
1910 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1911 		return (WALK_DONE);
1912 	}
1913 
1914 	return (WALK_NEXT);
1915 }
1916 
1917 static int
1918 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1919 {
1920 	if (lhs->abb_ts < rhs->abb_ts)
1921 		return (1);
1922 	if (lhs->abb_ts > rhs->abb_ts)
1923 		return (-1);
1924 	return (0);
1925 }
1926 
1927 static int
1928 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1929 {
1930 	allocdby_walk_t *abw;
1931 
1932 	if (wsp->walk_addr == NULL) {
1933 		mdb_warn("allocdby walk doesn't support global walks\n");
1934 		return (WALK_ERR);
1935 	}
1936 
1937 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1938 
1939 	abw->abw_thread = wsp->walk_addr;
1940 	abw->abw_walk = walk;
1941 	abw->abw_size = 128;	/* something reasonable */
1942 	abw->abw_buf =
1943 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1944 
1945 	wsp->walk_data = abw;
1946 
1947 	if (mdb_walk("kmem_cache",
1948 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1949 		mdb_warn("couldn't walk kmem_cache");
1950 		allocdby_walk_fini(wsp);
1951 		return (WALK_ERR);
1952 	}
1953 
1954 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1955 	    (int(*)(const void *, const void *))allocdby_cmp);
1956 
1957 	return (WALK_NEXT);
1958 }
1959 
1960 int
1961 allocdby_walk_init(mdb_walk_state_t *wsp)
1962 {
1963 	return (allocdby_walk_init_common(wsp, "bufctl"));
1964 }
1965 
1966 int
1967 freedby_walk_init(mdb_walk_state_t *wsp)
1968 {
1969 	return (allocdby_walk_init_common(wsp, "freectl"));
1970 }
1971 
1972 int
1973 allocdby_walk_step(mdb_walk_state_t *wsp)
1974 {
1975 	allocdby_walk_t *abw = wsp->walk_data;
1976 	kmem_bufctl_audit_t bc;
1977 	uintptr_t addr;
1978 
1979 	if (abw->abw_ndx == abw->abw_nbufs)
1980 		return (WALK_DONE);
1981 
1982 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1983 
1984 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1985 		mdb_warn("couldn't read bufctl at %p", addr);
1986 		return (WALK_DONE);
1987 	}
1988 
1989 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1990 }
1991 
1992 void
1993 allocdby_walk_fini(mdb_walk_state_t *wsp)
1994 {
1995 	allocdby_walk_t *abw = wsp->walk_data;
1996 
1997 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1998 	mdb_free(abw, sizeof (allocdby_walk_t));
1999 }
2000 
2001 /*ARGSUSED*/
2002 int
2003 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2004 {
2005 	char c[MDB_SYM_NAMLEN];
2006 	GElf_Sym sym;
2007 	int i;
2008 
2009 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2010 	for (i = 0; i < bcp->bc_depth; i++) {
2011 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2012 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2013 			continue;
2014 		if (strncmp(c, "kmem_", 5) == 0)
2015 			continue;
2016 		mdb_printf("%s+0x%lx",
2017 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2018 		break;
2019 	}
2020 	mdb_printf("\n");
2021 
2022 	return (WALK_NEXT);
2023 }
2024 
2025 static int
2026 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2027 {
2028 	if (!(flags & DCMD_ADDRSPEC))
2029 		return (DCMD_USAGE);
2030 
2031 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2032 
2033 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2034 		mdb_warn("can't walk '%s' for %p", w, addr);
2035 		return (DCMD_ERR);
2036 	}
2037 
2038 	return (DCMD_OK);
2039 }
2040 
2041 /*ARGSUSED*/
2042 int
2043 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2044 {
2045 	return (allocdby_common(addr, flags, "allocdby"));
2046 }
2047 
2048 /*ARGSUSED*/
2049 int
2050 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2051 {
2052 	return (allocdby_common(addr, flags, "freedby"));
2053 }
2054 
2055 /*
2056  * Return a string describing the address in relation to the given thread's
2057  * stack.
2058  *
2059  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2060  *
2061  * - If the address is above the stack pointer, return an empty string
2062  *   signifying that the address is active.
2063  *
2064  * - If the address is below the stack pointer, and the thread is not on proc,
2065  *   return " (below sp)".
2066  *
2067  * - If the address is below the stack pointer, and the thread is on proc,
2068  *   return " (possibly below sp)".  Depending on context, we may or may not
2069  *   have an accurate t_sp.
2070  */
2071 static const char *
2072 stack_active(const kthread_t *t, uintptr_t addr)
2073 {
2074 	uintptr_t panicstk;
2075 	GElf_Sym sym;
2076 
2077 	if (t->t_state == TS_FREE)
2078 		return (" (inactive interrupt thread)");
2079 
2080 	/*
2081 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2082 	 * no longer relates to the thread's real stack.
2083 	 */
2084 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2085 		panicstk = (uintptr_t)sym.st_value;
2086 
2087 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2088 			return ("");
2089 	}
2090 
2091 	if (addr >= t->t_sp + STACK_BIAS)
2092 		return ("");
2093 
2094 	if (t->t_state == TS_ONPROC)
2095 		return (" (possibly below sp)");
2096 
2097 	return (" (below sp)");
2098 }
2099 
2100 typedef struct whatis {
2101 	uintptr_t w_addr;
2102 	const kmem_cache_t *w_cache;
2103 	const vmem_t *w_vmem;
2104 	size_t w_slab_align;
2105 	int w_slab_found;
2106 	int w_found;
2107 	int w_kmem_lite_count;
2108 	uint_t w_verbose;
2109 	uint_t w_freemem;
2110 	uint_t w_all;
2111 	uint_t w_bufctl;
2112 	uint_t w_idspace;
2113 } whatis_t;
2114 
2115 static void
2116 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
2117 {
2118 	/* LINTED pointer cast may result in improper alignment */
2119 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr);
2120 	intptr_t stat;
2121 	int count = 0;
2122 	int i;
2123 	pc_t callers[16];
2124 
2125 	if (w->w_cache->cache_flags & KMF_REDZONE) {
2126 		kmem_buftag_t bt;
2127 
2128 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2129 			goto done;
2130 
2131 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2132 
2133 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2134 			goto done;
2135 
2136 		/*
2137 		 * provide the bufctl ptr if it has useful information
2138 		 */
2139 		if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT))
2140 			baddr = (uintptr_t)bt.bt_bufctl;
2141 
2142 		if (w->w_cache->cache_flags & KMF_LITE) {
2143 			count = w->w_kmem_lite_count;
2144 
2145 			if (count * sizeof (pc_t) > sizeof (callers))
2146 				count = 0;
2147 
2148 			if (count > 0 &&
2149 			    mdb_vread(callers, count * sizeof (pc_t),
2150 			    btaddr +
2151 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2152 				count = 0;
2153 
2154 			/*
2155 			 * skip unused callers
2156 			 */
2157 			while (count > 0 && callers[count - 1] ==
2158 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
2159 				count--;
2160 		}
2161 	}
2162 
2163 done:
2164 	if (baddr == 0)
2165 		mdb_printf("%p is %p+%p, %s from %s\n",
2166 		    w->w_addr, addr, w->w_addr - addr,
2167 		    w->w_freemem == FALSE ? "allocated" : "freed",
2168 		    w->w_cache->cache_name);
2169 	else
2170 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
2171 		    w->w_addr, addr, w->w_addr - addr, baddr,
2172 		    w->w_freemem == FALSE ? "allocated" : "freed",
2173 		    w->w_cache->cache_name);
2174 
2175 	if (count > 0) {
2176 		mdb_inc_indent(8);
2177 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
2178 		    callers[0], (count != 1)? ", ":"\n");
2179 		for (i = 1; i < count; i++)
2180 			mdb_printf("%a%s", callers[i],
2181 			    (i + 1 < count)? ", ":"\n");
2182 		mdb_dec_indent(8);
2183 	}
2184 }
2185 
2186 /*ARGSUSED*/
2187 static int
2188 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
2189 {
2190 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2191 		return (WALK_NEXT);
2192 
2193 	whatis_print_kmem(addr, 0, w);
2194 	w->w_found++;
2195 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2196 }
2197 
2198 static int
2199 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
2200 {
2201 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
2202 		return (WALK_NEXT);
2203 
2204 	mdb_printf("%p is %p+%p ", w->w_addr,
2205 	    vs->vs_start, w->w_addr - vs->vs_start);
2206 
2207 	/*
2208 	 * Always provide the vmem_seg pointer if it has a stack trace.
2209 	 */
2210 	if (w->w_bufctl == TRUE ||
2211 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
2212 		mdb_printf("(vmem_seg %p) ", addr);
2213 	}
2214 
2215 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
2216 	    "freed " : "", w->w_vmem->vm_name);
2217 
2218 	w->w_found++;
2219 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2220 }
2221 
2222 static int
2223 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
2224 {
2225 	const char *nm = vmem->vm_name;
2226 	w->w_vmem = vmem;
2227 	w->w_freemem = FALSE;
2228 
2229 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2230 		return (WALK_NEXT);
2231 
2232 	if (w->w_verbose)
2233 		mdb_printf("Searching vmem arena %s...\n", nm);
2234 
2235 	if (mdb_pwalk("vmem_alloc",
2236 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2237 		mdb_warn("can't walk vmem seg for %p", addr);
2238 		return (WALK_NEXT);
2239 	}
2240 
2241 	if (w->w_found && w->w_all == FALSE)
2242 		return (WALK_DONE);
2243 
2244 	if (w->w_verbose)
2245 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
2246 
2247 	w->w_freemem = TRUE;
2248 
2249 	if (mdb_pwalk("vmem_free",
2250 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2251 		mdb_warn("can't walk vmem seg for %p", addr);
2252 		return (WALK_NEXT);
2253 	}
2254 
2255 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2256 }
2257 
2258 /*ARGSUSED*/
2259 static int
2260 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
2261 {
2262 	uintptr_t addr;
2263 
2264 	if (bcp == NULL)
2265 		return (WALK_NEXT);
2266 
2267 	addr = (uintptr_t)bcp->bc_addr;
2268 
2269 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2270 		return (WALK_NEXT);
2271 
2272 	whatis_print_kmem(addr, baddr, w);
2273 	w->w_found++;
2274 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2275 }
2276 
2277 /*ARGSUSED*/
2278 static int
2279 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
2280 {
2281 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
2282 
2283 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
2284 		return (WALK_NEXT);
2285 
2286 	w->w_slab_found++;
2287 	return (WALK_DONE);
2288 }
2289 
2290 static int
2291 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2292 {
2293 	char *walk, *freewalk;
2294 	mdb_walk_cb_t func;
2295 	vmem_t *vmp = c->cache_arena;
2296 
2297 	if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2298 		return (WALK_NEXT);
2299 
2300 	if (w->w_bufctl == FALSE) {
2301 		walk = "kmem";
2302 		freewalk = "freemem";
2303 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2304 	} else {
2305 		walk = "bufctl";
2306 		freewalk = "freectl";
2307 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2308 	}
2309 
2310 	w->w_cache = c;
2311 
2312 	if (w->w_verbose)
2313 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
2314 
2315 	/*
2316 	 * Verify that the address is in one of the cache's slabs.  If not,
2317 	 * we can skip the more expensive walkers.  (this is purely a
2318 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
2319 	 *
2320 	 * We try to get the cache's arena's quantum, since to accurately
2321 	 * get the base of a slab, you have to align it to the quantum.  If
2322 	 * it doesn't look sensible, we fall back to not aligning.
2323 	 */
2324 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
2325 	    (uintptr_t)&vmp->vm_quantum) == -1) {
2326 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
2327 		w->w_slab_align = 1;
2328 	}
2329 
2330 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
2331 	    (w->w_slab_align & (w->w_slab_align - 1))) {
2332 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
2333 		    w->w_slab_align);
2334 		w->w_slab_align = 1;
2335 	}
2336 
2337 	w->w_slab_found = 0;
2338 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
2339 	    addr) == -1) {
2340 		mdb_warn("can't find kmem_slab walker");
2341 		return (WALK_DONE);
2342 	}
2343 	if (w->w_slab_found == 0)
2344 		return (WALK_NEXT);
2345 
2346 	if (c->cache_flags & KMF_LITE) {
2347 		if (mdb_readvar(&w->w_kmem_lite_count,
2348 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
2349 			w->w_kmem_lite_count = 0;
2350 	}
2351 
2352 	if (w->w_verbose)
2353 		mdb_printf("Searching %s...\n", c->cache_name);
2354 
2355 	w->w_freemem = FALSE;
2356 
2357 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2358 		mdb_warn("can't find %s walker", walk);
2359 		return (WALK_DONE);
2360 	}
2361 
2362 	if (w->w_found && w->w_all == FALSE)
2363 		return (WALK_DONE);
2364 
2365 	/*
2366 	 * We have searched for allocated memory; now search for freed memory.
2367 	 */
2368 	if (w->w_verbose)
2369 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2370 
2371 	w->w_freemem = TRUE;
2372 
2373 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2374 		mdb_warn("can't find %s walker", freewalk);
2375 		return (WALK_DONE);
2376 	}
2377 
2378 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2379 }
2380 
2381 static int
2382 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2383 {
2384 	if (c->cache_cflags & KMC_NOTOUCH)
2385 		return (WALK_NEXT);
2386 
2387 	return (whatis_walk_cache(addr, c, w));
2388 }
2389 
2390 static int
2391 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2392 {
2393 	if (!(c->cache_cflags & KMC_NOTOUCH))
2394 		return (WALK_NEXT);
2395 
2396 	return (whatis_walk_cache(addr, c, w));
2397 }
2398 
2399 static int
2400 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
2401 {
2402 	/*
2403 	 * Often, one calls ::whatis on an address from a thread structure.
2404 	 * We use this opportunity to short circuit this case...
2405 	 */
2406 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
2407 		mdb_printf("%p is %p+%p, allocated as a thread structure\n",
2408 		    w->w_addr, addr, w->w_addr - addr);
2409 		w->w_found++;
2410 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2411 	}
2412 
2413 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
2414 	    w->w_addr > (uintptr_t)t->t_stk)
2415 		return (WALK_NEXT);
2416 
2417 	if (t->t_stkbase == NULL)
2418 		return (WALK_NEXT);
2419 
2420 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
2421 	    stack_active(t, w->w_addr));
2422 
2423 	w->w_found++;
2424 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2425 }
2426 
2427 static int
2428 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
2429 {
2430 	struct module mod;
2431 	char name[MODMAXNAMELEN], *where;
2432 	char c[MDB_SYM_NAMLEN];
2433 	Shdr shdr;
2434 	GElf_Sym sym;
2435 
2436 	if (m->mod_mp == NULL)
2437 		return (WALK_NEXT);
2438 
2439 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2440 		mdb_warn("couldn't read modctl %p's module", addr);
2441 		return (WALK_NEXT);
2442 	}
2443 
2444 	if (w->w_addr >= (uintptr_t)mod.text &&
2445 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
2446 		where = "text segment";
2447 		goto found;
2448 	}
2449 
2450 	if (w->w_addr >= (uintptr_t)mod.data &&
2451 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
2452 		where = "data segment";
2453 		goto found;
2454 	}
2455 
2456 	if (w->w_addr >= (uintptr_t)mod.bss &&
2457 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
2458 		where = "bss";
2459 		goto found;
2460 	}
2461 
2462 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2463 		mdb_warn("couldn't read symbol header for %p's module", addr);
2464 		return (WALK_NEXT);
2465 	}
2466 
2467 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
2468 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
2469 		where = "symtab";
2470 		goto found;
2471 	}
2472 
2473 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2474 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2475 		where = "symspace";
2476 		goto found;
2477 	}
2478 
2479 	return (WALK_NEXT);
2480 
2481 found:
2482 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2483 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2484 
2485 	mdb_printf("%p is ", w->w_addr);
2486 
2487 	/*
2488 	 * If we found this address in a module, then there's a chance that
2489 	 * it's actually a named symbol.  Try the symbol lookup.
2490 	 */
2491 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c),
2492 	    &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value &&
2493 	    w->w_addr < (uintptr_t)sym.st_value + sym.st_size) {
2494 		mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value);
2495 	}
2496 
2497 	mdb_printf("in %s's %s\n", name, where);
2498 
2499 	w->w_found++;
2500 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2501 }
2502 
2503 /*ARGSUSED*/
2504 static int
2505 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2506 {
2507 	static int machsize = 0;
2508 	mdb_ctf_id_t id;
2509 
2510 	if (machsize == 0) {
2511 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2512 			machsize = mdb_ctf_type_size(id);
2513 		else {
2514 			mdb_warn("could not get size of page_t");
2515 			machsize = sizeof (page_t);
2516 		}
2517 	}
2518 
2519 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2520 		return (WALK_NEXT);
2521 
2522 	mdb_printf("%p is %p+%p, allocated as a page structure\n",
2523 	    w->w_addr, addr, w->w_addr - addr);
2524 
2525 	w->w_found++;
2526 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2527 }
2528 
2529 int
2530 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2531 {
2532 	whatis_t w;
2533 
2534 	if (!(flags & DCMD_ADDRSPEC))
2535 		return (DCMD_USAGE);
2536 
2537 	w.w_verbose = FALSE;
2538 	w.w_bufctl = FALSE;
2539 	w.w_all = FALSE;
2540 	w.w_idspace = FALSE;
2541 
2542 	if (mdb_getopts(argc, argv,
2543 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2544 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2545 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2546 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2547 		return (DCMD_USAGE);
2548 
2549 	w.w_addr = addr;
2550 	w.w_found = 0;
2551 
2552 	if (w.w_verbose)
2553 		mdb_printf("Searching modules...\n");
2554 
2555 	if (!w.w_idspace) {
2556 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2557 		    == -1) {
2558 			mdb_warn("couldn't find modctl walker");
2559 			return (DCMD_ERR);
2560 		}
2561 
2562 		if (w.w_found && w.w_all == FALSE)
2563 			return (DCMD_OK);
2564 
2565 		/*
2566 		 * Now search all thread stacks.  Yes, this is a little weak; we
2567 		 * can save a lot of work by first checking to see if the
2568 		 * address is in segkp vs. segkmem.  But hey, computers are
2569 		 * fast.
2570 		 */
2571 		if (w.w_verbose)
2572 			mdb_printf("Searching threads...\n");
2573 
2574 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2575 		    == -1) {
2576 			mdb_warn("couldn't find thread walker");
2577 			return (DCMD_ERR);
2578 		}
2579 
2580 		if (w.w_found && w.w_all == FALSE)
2581 			return (DCMD_OK);
2582 
2583 		if (w.w_verbose)
2584 			mdb_printf("Searching page structures...\n");
2585 
2586 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2587 		    == -1) {
2588 			mdb_warn("couldn't find page walker");
2589 			return (DCMD_ERR);
2590 		}
2591 
2592 		if (w.w_found && w.w_all == FALSE)
2593 			return (DCMD_OK);
2594 	}
2595 
2596 	if (mdb_walk("kmem_cache",
2597 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2598 		mdb_warn("couldn't find kmem_cache walker");
2599 		return (DCMD_ERR);
2600 	}
2601 
2602 	if (w.w_found && w.w_all == FALSE)
2603 		return (DCMD_OK);
2604 
2605 	if (mdb_walk("kmem_cache",
2606 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2607 		mdb_warn("couldn't find kmem_cache walker");
2608 		return (DCMD_ERR);
2609 	}
2610 
2611 	if (w.w_found && w.w_all == FALSE)
2612 		return (DCMD_OK);
2613 
2614 	if (mdb_walk("vmem_postfix",
2615 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2616 		mdb_warn("couldn't find vmem_postfix walker");
2617 		return (DCMD_ERR);
2618 	}
2619 
2620 	if (w.w_found == 0)
2621 		mdb_printf("%p is unknown\n", addr);
2622 
2623 	return (DCMD_OK);
2624 }
2625 
2626 void
2627 whatis_help(void)
2628 {
2629 	mdb_printf(
2630 	    "Given a virtual address, attempt to determine where it came\n"
2631 	    "from.\n"
2632 	    "\n"
2633 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2634 	    "\t\tsearched\n"
2635 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2636 	    "\t\tthe first (most specific) source.\n"
2637 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2638 	    "\t\tthese are ignored.\n"
2639 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2640 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2641 	    "\t\thave a bufctl, it will not be reported.\n");
2642 }
2643 
2644 typedef struct kmem_log_cpu {
2645 	uintptr_t kmc_low;
2646 	uintptr_t kmc_high;
2647 } kmem_log_cpu_t;
2648 
2649 typedef struct kmem_log_data {
2650 	uintptr_t kmd_addr;
2651 	kmem_log_cpu_t *kmd_cpu;
2652 } kmem_log_data_t;
2653 
2654 int
2655 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2656     kmem_log_data_t *kmd)
2657 {
2658 	int i;
2659 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2660 	size_t bufsize;
2661 
2662 	for (i = 0; i < NCPU; i++) {
2663 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2664 			break;
2665 	}
2666 
2667 	if (kmd->kmd_addr) {
2668 		if (b->bc_cache == NULL)
2669 			return (WALK_NEXT);
2670 
2671 		if (mdb_vread(&bufsize, sizeof (bufsize),
2672 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2673 			mdb_warn(
2674 			    "failed to read cache_bufsize for cache at %p",
2675 			    b->bc_cache);
2676 			return (WALK_ERR);
2677 		}
2678 
2679 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2680 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2681 			return (WALK_NEXT);
2682 	}
2683 
2684 	if (i == NCPU)
2685 		mdb_printf("   ");
2686 	else
2687 		mdb_printf("%3d", i);
2688 
2689 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2690 	    b->bc_timestamp, b->bc_thread);
2691 
2692 	return (WALK_NEXT);
2693 }
2694 
2695 /*ARGSUSED*/
2696 int
2697 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2698 {
2699 	kmem_log_header_t lh;
2700 	kmem_cpu_log_header_t clh;
2701 	uintptr_t lhp, clhp;
2702 	int ncpus;
2703 	uintptr_t *cpu;
2704 	GElf_Sym sym;
2705 	kmem_log_cpu_t *kmc;
2706 	int i;
2707 	kmem_log_data_t kmd;
2708 	uint_t opt_b = FALSE;
2709 
2710 	if (mdb_getopts(argc, argv,
2711 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2712 		return (DCMD_USAGE);
2713 
2714 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2715 		mdb_warn("failed to read 'kmem_transaction_log'");
2716 		return (DCMD_ERR);
2717 	}
2718 
2719 	if (lhp == NULL) {
2720 		mdb_warn("no kmem transaction log\n");
2721 		return (DCMD_ERR);
2722 	}
2723 
2724 	mdb_readvar(&ncpus, "ncpus");
2725 
2726 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2727 		mdb_warn("failed to read log header at %p", lhp);
2728 		return (DCMD_ERR);
2729 	}
2730 
2731 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2732 
2733 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2734 
2735 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2736 		mdb_warn("couldn't find 'cpu' array");
2737 		return (DCMD_ERR);
2738 	}
2739 
2740 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2741 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2742 		    NCPU * sizeof (uintptr_t), sym.st_size);
2743 		return (DCMD_ERR);
2744 	}
2745 
2746 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2747 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2748 		return (DCMD_ERR);
2749 	}
2750 
2751 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2752 	kmd.kmd_addr = NULL;
2753 	kmd.kmd_cpu = kmc;
2754 
2755 	for (i = 0; i < NCPU; i++) {
2756 
2757 		if (cpu[i] == NULL)
2758 			continue;
2759 
2760 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2761 			mdb_warn("cannot read cpu %d's log header at %p",
2762 			    i, clhp);
2763 			return (DCMD_ERR);
2764 		}
2765 
2766 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2767 		    (uintptr_t)lh.lh_base;
2768 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2769 
2770 		clhp += sizeof (kmem_cpu_log_header_t);
2771 	}
2772 
2773 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2774 	    "TIMESTAMP", "THREAD");
2775 
2776 	/*
2777 	 * If we have been passed an address, print out only log entries
2778 	 * corresponding to that address.  If opt_b is specified, then interpret
2779 	 * the address as a bufctl.
2780 	 */
2781 	if (flags & DCMD_ADDRSPEC) {
2782 		kmem_bufctl_audit_t b;
2783 
2784 		if (opt_b) {
2785 			kmd.kmd_addr = addr;
2786 		} else {
2787 			if (mdb_vread(&b,
2788 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2789 				mdb_warn("failed to read bufctl at %p", addr);
2790 				return (DCMD_ERR);
2791 			}
2792 
2793 			(void) kmem_log_walk(addr, &b, &kmd);
2794 
2795 			return (DCMD_OK);
2796 		}
2797 	}
2798 
2799 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2800 		mdb_warn("can't find kmem log walker");
2801 		return (DCMD_ERR);
2802 	}
2803 
2804 	return (DCMD_OK);
2805 }
2806 
2807 typedef struct bufctl_history_cb {
2808 	int		bhc_flags;
2809 	int		bhc_argc;
2810 	const mdb_arg_t	*bhc_argv;
2811 	int		bhc_ret;
2812 } bufctl_history_cb_t;
2813 
2814 /*ARGSUSED*/
2815 static int
2816 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2817 {
2818 	bufctl_history_cb_t *bhc = arg;
2819 
2820 	bhc->bhc_ret =
2821 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2822 
2823 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2824 
2825 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2826 }
2827 
2828 void
2829 bufctl_help(void)
2830 {
2831 	mdb_printf("%s",
2832 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2833 	mdb_dec_indent(2);
2834 	mdb_printf("%<b>OPTIONS%</b>\n");
2835 	mdb_inc_indent(2);
2836 	mdb_printf("%s",
2837 "  -v    Display the full content of the bufctl, including its stack trace\n"
2838 "  -h    retrieve the bufctl's transaction history, if available\n"
2839 "  -a addr\n"
2840 "        filter out bufctls not involving the buffer at addr\n"
2841 "  -c caller\n"
2842 "        filter out bufctls without the function/PC in their stack trace\n"
2843 "  -e earliest\n"
2844 "        filter out bufctls timestamped before earliest\n"
2845 "  -l latest\n"
2846 "        filter out bufctls timestamped after latest\n"
2847 "  -t thread\n"
2848 "        filter out bufctls not involving thread\n");
2849 }
2850 
2851 int
2852 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2853 {
2854 	kmem_bufctl_audit_t bc;
2855 	uint_t verbose = FALSE;
2856 	uint_t history = FALSE;
2857 	uint_t in_history = FALSE;
2858 	uintptr_t caller = NULL, thread = NULL;
2859 	uintptr_t laddr, haddr, baddr = NULL;
2860 	hrtime_t earliest = 0, latest = 0;
2861 	int i, depth;
2862 	char c[MDB_SYM_NAMLEN];
2863 	GElf_Sym sym;
2864 
2865 	if (mdb_getopts(argc, argv,
2866 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2867 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2868 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2869 	    'c', MDB_OPT_UINTPTR, &caller,
2870 	    't', MDB_OPT_UINTPTR, &thread,
2871 	    'e', MDB_OPT_UINT64, &earliest,
2872 	    'l', MDB_OPT_UINT64, &latest,
2873 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2874 		return (DCMD_USAGE);
2875 
2876 	if (!(flags & DCMD_ADDRSPEC))
2877 		return (DCMD_USAGE);
2878 
2879 	if (in_history && !history)
2880 		return (DCMD_USAGE);
2881 
2882 	if (history && !in_history) {
2883 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2884 		    UM_SLEEP | UM_GC);
2885 		bufctl_history_cb_t bhc;
2886 
2887 		nargv[0].a_type = MDB_TYPE_STRING;
2888 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2889 
2890 		for (i = 0; i < argc; i++)
2891 			nargv[i + 1] = argv[i];
2892 
2893 		/*
2894 		 * When in history mode, we treat each element as if it
2895 		 * were in a seperate loop, so that the headers group
2896 		 * bufctls with similar histories.
2897 		 */
2898 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2899 		bhc.bhc_argc = argc + 1;
2900 		bhc.bhc_argv = nargv;
2901 		bhc.bhc_ret = DCMD_OK;
2902 
2903 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2904 		    addr) == -1) {
2905 			mdb_warn("unable to walk bufctl_history");
2906 			return (DCMD_ERR);
2907 		}
2908 
2909 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2910 			mdb_printf("\n");
2911 
2912 		return (bhc.bhc_ret);
2913 	}
2914 
2915 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2916 		if (verbose) {
2917 			mdb_printf("%16s %16s %16s %16s\n"
2918 			    "%<u>%16s %16s %16s %16s%</u>\n",
2919 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2920 			    "", "CACHE", "LASTLOG", "CONTENTS");
2921 		} else {
2922 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2923 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2924 		}
2925 	}
2926 
2927 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2928 		mdb_warn("couldn't read bufctl at %p", addr);
2929 		return (DCMD_ERR);
2930 	}
2931 
2932 	/*
2933 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2934 	 * the address does not really refer to a bufctl.
2935 	 */
2936 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2937 
2938 	if (caller != NULL) {
2939 		laddr = caller;
2940 		haddr = caller + sizeof (caller);
2941 
2942 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2943 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2944 			/*
2945 			 * We were provided an exact symbol value; any
2946 			 * address in the function is valid.
2947 			 */
2948 			laddr = (uintptr_t)sym.st_value;
2949 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2950 		}
2951 
2952 		for (i = 0; i < depth; i++)
2953 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2954 				break;
2955 
2956 		if (i == depth)
2957 			return (DCMD_OK);
2958 	}
2959 
2960 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2961 		return (DCMD_OK);
2962 
2963 	if (earliest != 0 && bc.bc_timestamp < earliest)
2964 		return (DCMD_OK);
2965 
2966 	if (latest != 0 && bc.bc_timestamp > latest)
2967 		return (DCMD_OK);
2968 
2969 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2970 		return (DCMD_OK);
2971 
2972 	if (flags & DCMD_PIPE_OUT) {
2973 		mdb_printf("%#lr\n", addr);
2974 		return (DCMD_OK);
2975 	}
2976 
2977 	if (verbose) {
2978 		mdb_printf(
2979 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2980 		    "%16s %16p %16p %16p\n",
2981 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2982 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2983 
2984 		mdb_inc_indent(17);
2985 		for (i = 0; i < depth; i++)
2986 			mdb_printf("%a\n", bc.bc_stack[i]);
2987 		mdb_dec_indent(17);
2988 		mdb_printf("\n");
2989 	} else {
2990 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2991 		    bc.bc_timestamp, bc.bc_thread);
2992 
2993 		for (i = 0; i < depth; i++) {
2994 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2995 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2996 				continue;
2997 			if (strncmp(c, "kmem_", 5) == 0)
2998 				continue;
2999 			mdb_printf(" %a\n", bc.bc_stack[i]);
3000 			break;
3001 		}
3002 
3003 		if (i >= depth)
3004 			mdb_printf("\n");
3005 	}
3006 
3007 	return (DCMD_OK);
3008 }
3009 
3010 typedef struct kmem_verify {
3011 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
3012 	size_t kmv_size;		/* number of bytes in kmv_buf */
3013 	int kmv_corruption;		/* > 0 if corruption found. */
3014 	int kmv_besilent;		/* report actual corruption sites */
3015 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
3016 } kmem_verify_t;
3017 
3018 /*
3019  * verify_pattern()
3020  * 	verify that buf is filled with the pattern pat.
3021  */
3022 static int64_t
3023 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3024 {
3025 	/*LINTED*/
3026 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3027 	uint64_t *buf;
3028 
3029 	for (buf = buf_arg; buf < bufend; buf++)
3030 		if (*buf != pat)
3031 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3032 	return (-1);
3033 }
3034 
3035 /*
3036  * verify_buftag()
3037  *	verify that btp->bt_bxstat == (bcp ^ pat)
3038  */
3039 static int
3040 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3041 {
3042 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3043 }
3044 
3045 /*
3046  * verify_free()
3047  * 	verify the integrity of a free block of memory by checking
3048  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3049  */
3050 /*ARGSUSED1*/
3051 static int
3052 verify_free(uintptr_t addr, const void *data, void *private)
3053 {
3054 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3055 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3056 	int64_t corrupt;		/* corruption offset */
3057 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3058 	kmem_cache_t *cp = &kmv->kmv_cache;
3059 	int besilent = kmv->kmv_besilent;
3060 
3061 	/*LINTED*/
3062 	buftagp = KMEM_BUFTAG(cp, buf);
3063 
3064 	/*
3065 	 * Read the buffer to check.
3066 	 */
3067 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3068 		if (!besilent)
3069 			mdb_warn("couldn't read %p", addr);
3070 		return (WALK_NEXT);
3071 	}
3072 
3073 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3074 	    KMEM_FREE_PATTERN)) >= 0) {
3075 		if (!besilent)
3076 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3077 			    addr, (uintptr_t)addr + corrupt);
3078 		goto corrupt;
3079 	}
3080 	/*
3081 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3082 	 * the first bytes of the buffer, hence we cannot check for red
3083 	 * zone corruption.
3084 	 */
3085 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3086 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3087 		if (!besilent)
3088 			mdb_printf("buffer %p (free) seems to "
3089 			    "have a corrupt redzone pattern\n", addr);
3090 		goto corrupt;
3091 	}
3092 
3093 	/*
3094 	 * confirm bufctl pointer integrity.
3095 	 */
3096 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3097 		if (!besilent)
3098 			mdb_printf("buffer %p (free) has a corrupt "
3099 			    "buftag\n", addr);
3100 		goto corrupt;
3101 	}
3102 
3103 	return (WALK_NEXT);
3104 corrupt:
3105 	kmv->kmv_corruption++;
3106 	return (WALK_NEXT);
3107 }
3108 
3109 /*
3110  * verify_alloc()
3111  * 	Verify that the buftag of an allocated buffer makes sense with respect
3112  * 	to the buffer.
3113  */
3114 /*ARGSUSED1*/
3115 static int
3116 verify_alloc(uintptr_t addr, const void *data, void *private)
3117 {
3118 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3119 	kmem_cache_t *cp = &kmv->kmv_cache;
3120 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3121 	/*LINTED*/
3122 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3123 	uint32_t *ip = (uint32_t *)buftagp;
3124 	uint8_t *bp = (uint8_t *)buf;
3125 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3126 	int besilent = kmv->kmv_besilent;
3127 
3128 	/*
3129 	 * Read the buffer to check.
3130 	 */
3131 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3132 		if (!besilent)
3133 			mdb_warn("couldn't read %p", addr);
3134 		return (WALK_NEXT);
3135 	}
3136 
3137 	/*
3138 	 * There are two cases to handle:
3139 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3140 	 *    0xfeedfacefeedface at the end of it
3141 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3142 	 *    0xbb just past the end of the region in use.  At the buftag,
3143 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3144 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3145 	 *    endianness), followed by 32 bits containing the offset of the
3146 	 *    0xbb byte in the buffer.
3147 	 *
3148 	 * Finally, the two 32-bit words that comprise the second half of the
3149 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3150 	 */
3151 
3152 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3153 		looks_ok = 1;
3154 	else if (!KMEM_SIZE_VALID(ip[1]))
3155 		size_ok = 0;
3156 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3157 		looks_ok = 1;
3158 	else
3159 		size_ok = 0;
3160 
3161 	if (!size_ok) {
3162 		if (!besilent)
3163 			mdb_printf("buffer %p (allocated) has a corrupt "
3164 			    "redzone size encoding\n", addr);
3165 		goto corrupt;
3166 	}
3167 
3168 	if (!looks_ok) {
3169 		if (!besilent)
3170 			mdb_printf("buffer %p (allocated) has a corrupt "
3171 			    "redzone signature\n", addr);
3172 		goto corrupt;
3173 	}
3174 
3175 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3176 		if (!besilent)
3177 			mdb_printf("buffer %p (allocated) has a "
3178 			    "corrupt buftag\n", addr);
3179 		goto corrupt;
3180 	}
3181 
3182 	return (WALK_NEXT);
3183 corrupt:
3184 	kmv->kmv_corruption++;
3185 	return (WALK_NEXT);
3186 }
3187 
3188 /*ARGSUSED2*/
3189 int
3190 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3191 {
3192 	if (flags & DCMD_ADDRSPEC) {
3193 		int check_alloc = 0, check_free = 0;
3194 		kmem_verify_t kmv;
3195 
3196 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3197 		    addr) == -1) {
3198 			mdb_warn("couldn't read kmem_cache %p", addr);
3199 			return (DCMD_ERR);
3200 		}
3201 
3202 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3203 		    sizeof (kmem_buftag_t);
3204 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3205 		kmv.kmv_corruption = 0;
3206 
3207 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3208 			check_alloc = 1;
3209 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3210 				check_free = 1;
3211 		} else {
3212 			if (!(flags & DCMD_LOOP)) {
3213 				mdb_warn("cache %p (%s) does not have "
3214 				    "redzone checking enabled\n", addr,
3215 				    kmv.kmv_cache.cache_name);
3216 			}
3217 			return (DCMD_ERR);
3218 		}
3219 
3220 		if (flags & DCMD_LOOP) {
3221 			/*
3222 			 * table mode, don't print out every corrupt buffer
3223 			 */
3224 			kmv.kmv_besilent = 1;
3225 		} else {
3226 			mdb_printf("Summary for cache '%s'\n",
3227 			    kmv.kmv_cache.cache_name);
3228 			mdb_inc_indent(2);
3229 			kmv.kmv_besilent = 0;
3230 		}
3231 
3232 		if (check_alloc)
3233 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3234 		if (check_free)
3235 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3236 
3237 		if (flags & DCMD_LOOP) {
3238 			if (kmv.kmv_corruption == 0) {
3239 				mdb_printf("%-*s %?p clean\n",
3240 				    KMEM_CACHE_NAMELEN,
3241 				    kmv.kmv_cache.cache_name, addr);
3242 			} else {
3243 				char *s = "";	/* optional s in "buffer[s]" */
3244 				if (kmv.kmv_corruption > 1)
3245 					s = "s";
3246 
3247 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3248 				    KMEM_CACHE_NAMELEN,
3249 				    kmv.kmv_cache.cache_name, addr,
3250 				    kmv.kmv_corruption, s);
3251 			}
3252 		} else {
3253 			/*
3254 			 * This is the more verbose mode, when the user has
3255 			 * type addr::kmem_verify.  If the cache was clean,
3256 			 * nothing will have yet been printed. So say something.
3257 			 */
3258 			if (kmv.kmv_corruption == 0)
3259 				mdb_printf("clean\n");
3260 
3261 			mdb_dec_indent(2);
3262 		}
3263 	} else {
3264 		/*
3265 		 * If the user didn't specify a cache to verify, we'll walk all
3266 		 * kmem_cache's, specifying ourself as a callback for each...
3267 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3268 		 */
3269 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3270 		    "Cache Name", "Addr", "Cache Integrity");
3271 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3272 	}
3273 
3274 	return (DCMD_OK);
3275 }
3276 
3277 typedef struct vmem_node {
3278 	struct vmem_node *vn_next;
3279 	struct vmem_node *vn_parent;
3280 	struct vmem_node *vn_sibling;
3281 	struct vmem_node *vn_children;
3282 	uintptr_t vn_addr;
3283 	int vn_marked;
3284 	vmem_t vn_vmem;
3285 } vmem_node_t;
3286 
3287 typedef struct vmem_walk {
3288 	vmem_node_t *vw_root;
3289 	vmem_node_t *vw_current;
3290 } vmem_walk_t;
3291 
3292 int
3293 vmem_walk_init(mdb_walk_state_t *wsp)
3294 {
3295 	uintptr_t vaddr, paddr;
3296 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3297 	vmem_walk_t *vw;
3298 
3299 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3300 		mdb_warn("couldn't read 'vmem_list'");
3301 		return (WALK_ERR);
3302 	}
3303 
3304 	while (vaddr != NULL) {
3305 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3306 		vp->vn_addr = vaddr;
3307 		vp->vn_next = head;
3308 		head = vp;
3309 
3310 		if (vaddr == wsp->walk_addr)
3311 			current = vp;
3312 
3313 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3314 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3315 			goto err;
3316 		}
3317 
3318 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3319 	}
3320 
3321 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3322 
3323 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3324 			vp->vn_sibling = root;
3325 			root = vp;
3326 			continue;
3327 		}
3328 
3329 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3330 			if (parent->vn_addr != paddr)
3331 				continue;
3332 			vp->vn_sibling = parent->vn_children;
3333 			parent->vn_children = vp;
3334 			vp->vn_parent = parent;
3335 			break;
3336 		}
3337 
3338 		if (parent == NULL) {
3339 			mdb_warn("couldn't find %p's parent (%p)\n",
3340 			    vp->vn_addr, paddr);
3341 			goto err;
3342 		}
3343 	}
3344 
3345 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3346 	vw->vw_root = root;
3347 
3348 	if (current != NULL)
3349 		vw->vw_current = current;
3350 	else
3351 		vw->vw_current = root;
3352 
3353 	wsp->walk_data = vw;
3354 	return (WALK_NEXT);
3355 err:
3356 	for (vp = head; head != NULL; vp = head) {
3357 		head = vp->vn_next;
3358 		mdb_free(vp, sizeof (vmem_node_t));
3359 	}
3360 
3361 	return (WALK_ERR);
3362 }
3363 
3364 int
3365 vmem_walk_step(mdb_walk_state_t *wsp)
3366 {
3367 	vmem_walk_t *vw = wsp->walk_data;
3368 	vmem_node_t *vp;
3369 	int rval;
3370 
3371 	if ((vp = vw->vw_current) == NULL)
3372 		return (WALK_DONE);
3373 
3374 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3375 
3376 	if (vp->vn_children != NULL) {
3377 		vw->vw_current = vp->vn_children;
3378 		return (rval);
3379 	}
3380 
3381 	do {
3382 		vw->vw_current = vp->vn_sibling;
3383 		vp = vp->vn_parent;
3384 	} while (vw->vw_current == NULL && vp != NULL);
3385 
3386 	return (rval);
3387 }
3388 
3389 /*
3390  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3391  * children are visited before their parent.  We perform the postfix walk
3392  * iteratively (rather than recursively) to allow mdb to regain control
3393  * after each callback.
3394  */
3395 int
3396 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3397 {
3398 	vmem_walk_t *vw = wsp->walk_data;
3399 	vmem_node_t *vp = vw->vw_current;
3400 	int rval;
3401 
3402 	/*
3403 	 * If this node is marked, then we know that we have already visited
3404 	 * all of its children.  If the node has any siblings, they need to
3405 	 * be visited next; otherwise, we need to visit the parent.  Note
3406 	 * that vp->vn_marked will only be zero on the first invocation of
3407 	 * the step function.
3408 	 */
3409 	if (vp->vn_marked) {
3410 		if (vp->vn_sibling != NULL)
3411 			vp = vp->vn_sibling;
3412 		else if (vp->vn_parent != NULL)
3413 			vp = vp->vn_parent;
3414 		else {
3415 			/*
3416 			 * We have neither a parent, nor a sibling, and we
3417 			 * have already been visited; we're done.
3418 			 */
3419 			return (WALK_DONE);
3420 		}
3421 	}
3422 
3423 	/*
3424 	 * Before we visit this node, visit its children.
3425 	 */
3426 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3427 		vp = vp->vn_children;
3428 
3429 	vp->vn_marked = 1;
3430 	vw->vw_current = vp;
3431 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3432 
3433 	return (rval);
3434 }
3435 
3436 void
3437 vmem_walk_fini(mdb_walk_state_t *wsp)
3438 {
3439 	vmem_walk_t *vw = wsp->walk_data;
3440 	vmem_node_t *root = vw->vw_root;
3441 	int done;
3442 
3443 	if (root == NULL)
3444 		return;
3445 
3446 	if ((vw->vw_root = root->vn_children) != NULL)
3447 		vmem_walk_fini(wsp);
3448 
3449 	vw->vw_root = root->vn_sibling;
3450 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3451 	mdb_free(root, sizeof (vmem_node_t));
3452 
3453 	if (done) {
3454 		mdb_free(vw, sizeof (vmem_walk_t));
3455 	} else {
3456 		vmem_walk_fini(wsp);
3457 	}
3458 }
3459 
3460 typedef struct vmem_seg_walk {
3461 	uint8_t vsw_type;
3462 	uintptr_t vsw_start;
3463 	uintptr_t vsw_current;
3464 } vmem_seg_walk_t;
3465 
3466 /*ARGSUSED*/
3467 int
3468 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3469 {
3470 	vmem_seg_walk_t *vsw;
3471 
3472 	if (wsp->walk_addr == NULL) {
3473 		mdb_warn("vmem_%s does not support global walks\n", name);
3474 		return (WALK_ERR);
3475 	}
3476 
3477 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3478 
3479 	vsw->vsw_type = type;
3480 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3481 	vsw->vsw_current = vsw->vsw_start;
3482 
3483 	return (WALK_NEXT);
3484 }
3485 
3486 /*
3487  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3488  */
3489 #define	VMEM_NONE	0
3490 
3491 int
3492 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3493 {
3494 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3495 }
3496 
3497 int
3498 vmem_free_walk_init(mdb_walk_state_t *wsp)
3499 {
3500 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3501 }
3502 
3503 int
3504 vmem_span_walk_init(mdb_walk_state_t *wsp)
3505 {
3506 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3507 }
3508 
3509 int
3510 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3511 {
3512 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3513 }
3514 
3515 int
3516 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3517 {
3518 	vmem_seg_t seg;
3519 	vmem_seg_walk_t *vsw = wsp->walk_data;
3520 	uintptr_t addr = vsw->vsw_current;
3521 	static size_t seg_size = 0;
3522 	int rval;
3523 
3524 	if (!seg_size) {
3525 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3526 			mdb_warn("failed to read 'vmem_seg_size'");
3527 			seg_size = sizeof (vmem_seg_t);
3528 		}
3529 	}
3530 
3531 	if (seg_size < sizeof (seg))
3532 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3533 
3534 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3535 		mdb_warn("couldn't read vmem_seg at %p", addr);
3536 		return (WALK_ERR);
3537 	}
3538 
3539 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3540 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3541 		rval = WALK_NEXT;
3542 	} else {
3543 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3544 	}
3545 
3546 	if (vsw->vsw_current == vsw->vsw_start)
3547 		return (WALK_DONE);
3548 
3549 	return (rval);
3550 }
3551 
3552 void
3553 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3554 {
3555 	vmem_seg_walk_t *vsw = wsp->walk_data;
3556 
3557 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3558 }
3559 
3560 #define	VMEM_NAMEWIDTH	22
3561 
3562 int
3563 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3564 {
3565 	vmem_t v, parent;
3566 	vmem_kstat_t *vkp = &v.vm_kstat;
3567 	uintptr_t paddr;
3568 	int ident = 0;
3569 	char c[VMEM_NAMEWIDTH];
3570 
3571 	if (!(flags & DCMD_ADDRSPEC)) {
3572 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3573 			mdb_warn("can't walk vmem");
3574 			return (DCMD_ERR);
3575 		}
3576 		return (DCMD_OK);
3577 	}
3578 
3579 	if (DCMD_HDRSPEC(flags))
3580 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3581 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3582 		    "TOTAL", "SUCCEED", "FAIL");
3583 
3584 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3585 		mdb_warn("couldn't read vmem at %p", addr);
3586 		return (DCMD_ERR);
3587 	}
3588 
3589 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3590 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3591 			mdb_warn("couldn't trace %p's ancestry", addr);
3592 			ident = 0;
3593 			break;
3594 		}
3595 		paddr = (uintptr_t)parent.vm_source;
3596 	}
3597 
3598 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3599 
3600 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3601 	    addr, VMEM_NAMEWIDTH, c,
3602 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3603 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3604 
3605 	return (DCMD_OK);
3606 }
3607 
3608 void
3609 vmem_seg_help(void)
3610 {
3611 	mdb_printf("%s",
3612 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3613 "\n"
3614 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3615 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3616 "information.\n");
3617 	mdb_dec_indent(2);
3618 	mdb_printf("%<b>OPTIONS%</b>\n");
3619 	mdb_inc_indent(2);
3620 	mdb_printf("%s",
3621 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3622 "  -s    report the size of the segment, instead of the end address\n"
3623 "  -c caller\n"
3624 "        filter out segments without the function/PC in their stack trace\n"
3625 "  -e earliest\n"
3626 "        filter out segments timestamped before earliest\n"
3627 "  -l latest\n"
3628 "        filter out segments timestamped after latest\n"
3629 "  -m minsize\n"
3630 "        filer out segments smaller than minsize\n"
3631 "  -M maxsize\n"
3632 "        filer out segments larger than maxsize\n"
3633 "  -t thread\n"
3634 "        filter out segments not involving thread\n"
3635 "  -T type\n"
3636 "        filter out segments not of type 'type'\n"
3637 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3638 }
3639 
3640 /*ARGSUSED*/
3641 int
3642 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3643 {
3644 	vmem_seg_t vs;
3645 	pc_t *stk = vs.vs_stack;
3646 	uintptr_t sz;
3647 	uint8_t t;
3648 	const char *type = NULL;
3649 	GElf_Sym sym;
3650 	char c[MDB_SYM_NAMLEN];
3651 	int no_debug;
3652 	int i;
3653 	int depth;
3654 	uintptr_t laddr, haddr;
3655 
3656 	uintptr_t caller = NULL, thread = NULL;
3657 	uintptr_t minsize = 0, maxsize = 0;
3658 
3659 	hrtime_t earliest = 0, latest = 0;
3660 
3661 	uint_t size = 0;
3662 	uint_t verbose = 0;
3663 
3664 	if (!(flags & DCMD_ADDRSPEC))
3665 		return (DCMD_USAGE);
3666 
3667 	if (mdb_getopts(argc, argv,
3668 	    'c', MDB_OPT_UINTPTR, &caller,
3669 	    'e', MDB_OPT_UINT64, &earliest,
3670 	    'l', MDB_OPT_UINT64, &latest,
3671 	    's', MDB_OPT_SETBITS, TRUE, &size,
3672 	    'm', MDB_OPT_UINTPTR, &minsize,
3673 	    'M', MDB_OPT_UINTPTR, &maxsize,
3674 	    't', MDB_OPT_UINTPTR, &thread,
3675 	    'T', MDB_OPT_STR, &type,
3676 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3677 	    NULL) != argc)
3678 		return (DCMD_USAGE);
3679 
3680 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3681 		if (verbose) {
3682 			mdb_printf("%16s %4s %16s %16s %16s\n"
3683 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3684 			    "ADDR", "TYPE", "START", "END", "SIZE",
3685 			    "", "", "THREAD", "TIMESTAMP", "");
3686 		} else {
3687 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3688 			    "START", size? "SIZE" : "END", "WHO");
3689 		}
3690 	}
3691 
3692 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3693 		mdb_warn("couldn't read vmem_seg at %p", addr);
3694 		return (DCMD_ERR);
3695 	}
3696 
3697 	if (type != NULL) {
3698 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3699 			t = VMEM_ALLOC;
3700 		else if (strcmp(type, "FREE") == 0)
3701 			t = VMEM_FREE;
3702 		else if (strcmp(type, "SPAN") == 0)
3703 			t = VMEM_SPAN;
3704 		else if (strcmp(type, "ROTR") == 0 ||
3705 		    strcmp(type, "ROTOR") == 0)
3706 			t = VMEM_ROTOR;
3707 		else if (strcmp(type, "WLKR") == 0 ||
3708 		    strcmp(type, "WALKER") == 0)
3709 			t = VMEM_WALKER;
3710 		else {
3711 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3712 			    type);
3713 			return (DCMD_ERR);
3714 		}
3715 
3716 		if (vs.vs_type != t)
3717 			return (DCMD_OK);
3718 	}
3719 
3720 	sz = vs.vs_end - vs.vs_start;
3721 
3722 	if (minsize != 0 && sz < minsize)
3723 		return (DCMD_OK);
3724 
3725 	if (maxsize != 0 && sz > maxsize)
3726 		return (DCMD_OK);
3727 
3728 	t = vs.vs_type;
3729 	depth = vs.vs_depth;
3730 
3731 	/*
3732 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3733 	 */
3734 	no_debug = (t != VMEM_ALLOC) ||
3735 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3736 
3737 	if (no_debug) {
3738 		if (caller != NULL || thread != NULL || earliest != 0 ||
3739 		    latest != 0)
3740 			return (DCMD_OK);		/* not enough info */
3741 	} else {
3742 		if (caller != NULL) {
3743 			laddr = caller;
3744 			haddr = caller + sizeof (caller);
3745 
3746 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3747 			    sizeof (c), &sym) != -1 &&
3748 			    caller == (uintptr_t)sym.st_value) {
3749 				/*
3750 				 * We were provided an exact symbol value; any
3751 				 * address in the function is valid.
3752 				 */
3753 				laddr = (uintptr_t)sym.st_value;
3754 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3755 			}
3756 
3757 			for (i = 0; i < depth; i++)
3758 				if (vs.vs_stack[i] >= laddr &&
3759 				    vs.vs_stack[i] < haddr)
3760 					break;
3761 
3762 			if (i == depth)
3763 				return (DCMD_OK);
3764 		}
3765 
3766 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3767 			return (DCMD_OK);
3768 
3769 		if (earliest != 0 && vs.vs_timestamp < earliest)
3770 			return (DCMD_OK);
3771 
3772 		if (latest != 0 && vs.vs_timestamp > latest)
3773 			return (DCMD_OK);
3774 	}
3775 
3776 	type = (t == VMEM_ALLOC ? "ALLC" :
3777 	    t == VMEM_FREE ? "FREE" :
3778 	    t == VMEM_SPAN ? "SPAN" :
3779 	    t == VMEM_ROTOR ? "ROTR" :
3780 	    t == VMEM_WALKER ? "WLKR" :
3781 	    "????");
3782 
3783 	if (flags & DCMD_PIPE_OUT) {
3784 		mdb_printf("%#lr\n", addr);
3785 		return (DCMD_OK);
3786 	}
3787 
3788 	if (verbose) {
3789 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3790 		    addr, type, vs.vs_start, vs.vs_end, sz);
3791 
3792 		if (no_debug)
3793 			return (DCMD_OK);
3794 
3795 		mdb_printf("%16s %4s %16p %16llx\n",
3796 		    "", "", vs.vs_thread, vs.vs_timestamp);
3797 
3798 		mdb_inc_indent(17);
3799 		for (i = 0; i < depth; i++) {
3800 			mdb_printf("%a\n", stk[i]);
3801 		}
3802 		mdb_dec_indent(17);
3803 		mdb_printf("\n");
3804 	} else {
3805 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3806 		    vs.vs_start, size? sz : vs.vs_end);
3807 
3808 		if (no_debug) {
3809 			mdb_printf("\n");
3810 			return (DCMD_OK);
3811 		}
3812 
3813 		for (i = 0; i < depth; i++) {
3814 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3815 			    c, sizeof (c), &sym) == -1)
3816 				continue;
3817 			if (strncmp(c, "vmem_", 5) == 0)
3818 				continue;
3819 			break;
3820 		}
3821 		mdb_printf(" %a\n", stk[i]);
3822 	}
3823 	return (DCMD_OK);
3824 }
3825 
3826 typedef struct kmalog_data {
3827 	uintptr_t	kma_addr;
3828 	hrtime_t	kma_newest;
3829 } kmalog_data_t;
3830 
3831 /*ARGSUSED*/
3832 static int
3833 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3834 {
3835 	char name[KMEM_CACHE_NAMELEN + 1];
3836 	hrtime_t delta;
3837 	int i, depth;
3838 	size_t bufsize;
3839 
3840 	if (bcp->bc_timestamp == 0)
3841 		return (WALK_DONE);
3842 
3843 	if (kma->kma_newest == 0)
3844 		kma->kma_newest = bcp->bc_timestamp;
3845 
3846 	if (kma->kma_addr) {
3847 		if (mdb_vread(&bufsize, sizeof (bufsize),
3848 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3849 			mdb_warn(
3850 			    "failed to read cache_bufsize for cache at %p",
3851 			    bcp->bc_cache);
3852 			return (WALK_ERR);
3853 		}
3854 
3855 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3856 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3857 			return (WALK_NEXT);
3858 	}
3859 
3860 	delta = kma->kma_newest - bcp->bc_timestamp;
3861 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3862 
3863 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3864 	    &bcp->bc_cache->cache_name) <= 0)
3865 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3866 
3867 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3868 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3869 
3870 	for (i = 0; i < depth; i++)
3871 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3872 
3873 	return (WALK_NEXT);
3874 }
3875 
3876 int
3877 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3878 {
3879 	const char *logname = "kmem_transaction_log";
3880 	kmalog_data_t kma;
3881 
3882 	if (argc > 1)
3883 		return (DCMD_USAGE);
3884 
3885 	kma.kma_newest = 0;
3886 	if (flags & DCMD_ADDRSPEC)
3887 		kma.kma_addr = addr;
3888 	else
3889 		kma.kma_addr = NULL;
3890 
3891 	if (argc > 0) {
3892 		if (argv->a_type != MDB_TYPE_STRING)
3893 			return (DCMD_USAGE);
3894 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3895 			logname = "kmem_failure_log";
3896 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3897 			logname = "kmem_slab_log";
3898 		else
3899 			return (DCMD_USAGE);
3900 	}
3901 
3902 	if (mdb_readvar(&addr, logname) == -1) {
3903 		mdb_warn("failed to read %s log header pointer");
3904 		return (DCMD_ERR);
3905 	}
3906 
3907 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3908 		mdb_warn("failed to walk kmem log");
3909 		return (DCMD_ERR);
3910 	}
3911 
3912 	return (DCMD_OK);
3913 }
3914 
3915 /*
3916  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3917  * The first piece is a structure which we use to accumulate kmem_cache_t
3918  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3919  * walker; we either add all caches, or ones named explicitly as arguments.
3920  */
3921 
3922 typedef struct kmclist {
3923 	const char *kmc_name;			/* Name to match (or NULL) */
3924 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3925 	int kmc_nelems;				/* Num entries in kmc_caches */
3926 	int kmc_size;				/* Size of kmc_caches array */
3927 } kmclist_t;
3928 
3929 static int
3930 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3931 {
3932 	void *p;
3933 	int s;
3934 
3935 	if (kmc->kmc_name == NULL ||
3936 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3937 		/*
3938 		 * If we have a match, grow our array (if necessary), and then
3939 		 * add the virtual address of the matching cache to our list.
3940 		 */
3941 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3942 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3943 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3944 
3945 			bcopy(kmc->kmc_caches, p,
3946 			    sizeof (uintptr_t) * kmc->kmc_size);
3947 
3948 			kmc->kmc_caches = p;
3949 			kmc->kmc_size = s;
3950 		}
3951 
3952 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3953 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3954 	}
3955 
3956 	return (WALK_NEXT);
3957 }
3958 
3959 /*
3960  * The second piece of ::kmausers is a hash table of allocations.  Each
3961  * allocation owner is identified by its stack trace and data_size.  We then
3962  * track the total bytes of all such allocations, and the number of allocations
3963  * to report at the end.  Once we have a list of caches, we walk through the
3964  * allocated bufctls of each, and update our hash table accordingly.
3965  */
3966 
3967 typedef struct kmowner {
3968 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3969 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3970 	size_t kmo_signature;			/* Hash table signature */
3971 	uint_t kmo_num;				/* Number of allocations */
3972 	size_t kmo_data_size;			/* Size of each allocation */
3973 	size_t kmo_total_size;			/* Total bytes of allocation */
3974 	int kmo_depth;				/* Depth of stack trace */
3975 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3976 } kmowner_t;
3977 
3978 typedef struct kmusers {
3979 	uintptr_t kmu_addr;			/* address of interest */
3980 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3981 	kmowner_t *kmu_hash;			/* Hash table of owners */
3982 	int kmu_nelems;				/* Number of entries in use */
3983 	int kmu_size;				/* Total number of entries */
3984 } kmusers_t;
3985 
3986 static void
3987 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3988     size_t size, size_t data_size)
3989 {
3990 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3991 	size_t bucket, signature = data_size;
3992 	kmowner_t *kmo, *kmoend;
3993 
3994 	/*
3995 	 * If the hash table is full, double its size and rehash everything.
3996 	 */
3997 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3998 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3999 
4000 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4001 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4002 		kmu->kmu_hash = kmo;
4003 		kmu->kmu_size = s;
4004 
4005 		kmoend = kmu->kmu_hash + kmu->kmu_size;
4006 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4007 			kmo->kmo_head = NULL;
4008 
4009 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4010 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4011 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4012 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4013 			kmu->kmu_hash[bucket].kmo_head = kmo;
4014 		}
4015 	}
4016 
4017 	/*
4018 	 * Finish computing the hash signature from the stack trace, and then
4019 	 * see if the owner is in the hash table.  If so, update our stats.
4020 	 */
4021 	for (i = 0; i < depth; i++)
4022 		signature += bcp->bc_stack[i];
4023 
4024 	bucket = signature & (kmu->kmu_size - 1);
4025 
4026 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4027 		if (kmo->kmo_signature == signature) {
4028 			size_t difference = 0;
4029 
4030 			difference |= kmo->kmo_data_size - data_size;
4031 			difference |= kmo->kmo_depth - depth;
4032 
4033 			for (i = 0; i < depth; i++) {
4034 				difference |= kmo->kmo_stack[i] -
4035 				    bcp->bc_stack[i];
4036 			}
4037 
4038 			if (difference == 0) {
4039 				kmo->kmo_total_size += size;
4040 				kmo->kmo_num++;
4041 				return;
4042 			}
4043 		}
4044 	}
4045 
4046 	/*
4047 	 * If the owner is not yet hashed, grab the next element and fill it
4048 	 * in based on the allocation information.
4049 	 */
4050 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4051 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4052 	kmu->kmu_hash[bucket].kmo_head = kmo;
4053 
4054 	kmo->kmo_signature = signature;
4055 	kmo->kmo_num = 1;
4056 	kmo->kmo_data_size = data_size;
4057 	kmo->kmo_total_size = size;
4058 	kmo->kmo_depth = depth;
4059 
4060 	for (i = 0; i < depth; i++)
4061 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4062 }
4063 
4064 /*
4065  * When ::kmausers is invoked without the -f flag, we simply update our hash
4066  * table with the information from each allocated bufctl.
4067  */
4068 /*ARGSUSED*/
4069 static int
4070 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4071 {
4072 	const kmem_cache_t *cp = kmu->kmu_cache;
4073 
4074 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4075 	return (WALK_NEXT);
4076 }
4077 
4078 /*
4079  * When ::kmausers is invoked with the -f flag, we print out the information
4080  * for each bufctl as well as updating the hash table.
4081  */
4082 static int
4083 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4084 {
4085 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4086 	const kmem_cache_t *cp = kmu->kmu_cache;
4087 	kmem_bufctl_t bufctl;
4088 
4089 	if (kmu->kmu_addr) {
4090 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4091 			mdb_warn("couldn't read bufctl at %p", addr);
4092 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4093 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4094 		    cp->cache_bufsize)
4095 			return (WALK_NEXT);
4096 	}
4097 
4098 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4099 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4100 
4101 	for (i = 0; i < depth; i++)
4102 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4103 
4104 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4105 	return (WALK_NEXT);
4106 }
4107 
4108 /*
4109  * We sort our results by allocation size before printing them.
4110  */
4111 static int
4112 kmownercmp(const void *lp, const void *rp)
4113 {
4114 	const kmowner_t *lhs = lp;
4115 	const kmowner_t *rhs = rp;
4116 
4117 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4118 }
4119 
4120 /*
4121  * The main engine of ::kmausers is relatively straightforward: First we
4122  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4123  * iterate over the allocated bufctls of each cache in the list.  Finally,
4124  * we sort and print our results.
4125  */
4126 /*ARGSUSED*/
4127 int
4128 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4129 {
4130 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4131 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4132 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4133 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4134 	int opt_e = FALSE;		/* Include "small" users */
4135 	int opt_f = FALSE;		/* Print stack traces */
4136 
4137 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4138 	kmowner_t *kmo, *kmoend;
4139 	int i, oelems;
4140 
4141 	kmclist_t kmc;
4142 	kmusers_t kmu;
4143 
4144 	bzero(&kmc, sizeof (kmc));
4145 	bzero(&kmu, sizeof (kmu));
4146 
4147 	while ((i = mdb_getopts(argc, argv,
4148 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4149 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4150 
4151 		argv += i;	/* skip past options we just processed */
4152 		argc -= i;	/* adjust argc */
4153 
4154 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4155 			return (DCMD_USAGE);
4156 
4157 		oelems = kmc.kmc_nelems;
4158 		kmc.kmc_name = argv->a_un.a_str;
4159 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4160 
4161 		if (kmc.kmc_nelems == oelems) {
4162 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4163 			return (DCMD_ERR);
4164 		}
4165 
4166 		do_all_caches = 0;
4167 		argv++;
4168 		argc--;
4169 	}
4170 
4171 	if (flags & DCMD_ADDRSPEC) {
4172 		opt_f = TRUE;
4173 		kmu.kmu_addr = addr;
4174 	} else {
4175 		kmu.kmu_addr = NULL;
4176 	}
4177 
4178 	if (opt_e)
4179 		mem_threshold = cnt_threshold = 0;
4180 
4181 	if (opt_f)
4182 		callback = (mdb_walk_cb_t)kmause2;
4183 
4184 	if (do_all_caches) {
4185 		kmc.kmc_name = NULL; /* match all cache names */
4186 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4187 	}
4188 
4189 	for (i = 0; i < kmc.kmc_nelems; i++) {
4190 		uintptr_t cp = kmc.kmc_caches[i];
4191 		kmem_cache_t c;
4192 
4193 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4194 			mdb_warn("failed to read cache at %p", cp);
4195 			continue;
4196 		}
4197 
4198 		if (!(c.cache_flags & KMF_AUDIT)) {
4199 			if (!do_all_caches) {
4200 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4201 				    c.cache_name);
4202 			}
4203 			continue;
4204 		}
4205 
4206 		kmu.kmu_cache = &c;
4207 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4208 		audited_caches++;
4209 	}
4210 
4211 	if (audited_caches == 0 && do_all_caches) {
4212 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4213 		return (DCMD_ERR);
4214 	}
4215 
4216 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4217 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4218 
4219 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4220 		if (kmo->kmo_total_size < mem_threshold &&
4221 		    kmo->kmo_num < cnt_threshold)
4222 			continue;
4223 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4224 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4225 		for (i = 0; i < kmo->kmo_depth; i++)
4226 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4227 	}
4228 
4229 	return (DCMD_OK);
4230 }
4231 
4232 void
4233 kmausers_help(void)
4234 {
4235 	mdb_printf(
4236 	    "Displays the largest users of the kmem allocator, sorted by \n"
4237 	    "trace.  If one or more caches is specified, only those caches\n"
4238 	    "will be searched.  By default, all caches are searched.  If an\n"
4239 	    "address is specified, then only those allocations which include\n"
4240 	    "the given address are displayed.  Specifying an address implies\n"
4241 	    "-f.\n"
4242 	    "\n"
4243 	    "\t-e\tInclude all users, not just the largest\n"
4244 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4245 	    "\t\tgrouped by stack\n");
4246 }
4247 
4248 static int
4249 kmem_ready_check(void)
4250 {
4251 	int ready;
4252 
4253 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4254 		return (-1); /* errno is set for us */
4255 
4256 	return (ready);
4257 }
4258 
4259 /*ARGSUSED*/
4260 static void
4261 kmem_statechange_cb(void *arg)
4262 {
4263 	static int been_ready = 0;
4264 
4265 	leaky_cleanup(1);	/* state changes invalidate leaky state */
4266 
4267 	if (been_ready)
4268 		return;
4269 
4270 	if (kmem_ready_check() <= 0)
4271 		return;
4272 
4273 	been_ready = 1;
4274 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4275 }
4276 
4277 void
4278 kmem_init(void)
4279 {
4280 	mdb_walker_t w = {
4281 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4282 		list_walk_step, list_walk_fini
4283 	};
4284 
4285 	/*
4286 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4287 	 * immediately.  Walkers in the linkage structure won't be ready until
4288 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4289 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4290 	 * isn't ready, we'll register a callback that will allow us to defer
4291 	 * cache walking until it is.
4292 	 */
4293 	if (mdb_add_walker(&w) != 0) {
4294 		mdb_warn("failed to add kmem_cache walker");
4295 		return;
4296 	}
4297 
4298 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, kmem_statechange_cb, NULL);
4299 	kmem_statechange_cb(NULL);
4300 }
4301 
4302 typedef struct whatthread {
4303 	uintptr_t	wt_target;
4304 	int		wt_verbose;
4305 } whatthread_t;
4306 
4307 static int
4308 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4309 {
4310 	uintptr_t current, data;
4311 
4312 	if (t->t_stkbase == NULL)
4313 		return (WALK_NEXT);
4314 
4315 	/*
4316 	 * Warn about swapped out threads, but drive on anyway
4317 	 */
4318 	if (!(t->t_schedflag & TS_LOAD)) {
4319 		mdb_warn("thread %p's stack swapped out\n", addr);
4320 		return (WALK_NEXT);
4321 	}
4322 
4323 	/*
4324 	 * Search the thread's stack for the given pointer.  Note that it would
4325 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4326 	 * chunks, but this routine is already fast and simple.
4327 	 */
4328 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4329 	    current += sizeof (uintptr_t)) {
4330 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4331 			mdb_warn("couldn't read thread %p's stack at %p",
4332 			    addr, current);
4333 			return (WALK_ERR);
4334 		}
4335 
4336 		if (data == w->wt_target) {
4337 			if (w->wt_verbose) {
4338 				mdb_printf("%p in thread %p's stack%s\n",
4339 				    current, addr, stack_active(t, current));
4340 			} else {
4341 				mdb_printf("%#lr\n", addr);
4342 				return (WALK_NEXT);
4343 			}
4344 		}
4345 	}
4346 
4347 	return (WALK_NEXT);
4348 }
4349 
4350 int
4351 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4352 {
4353 	whatthread_t w;
4354 
4355 	if (!(flags & DCMD_ADDRSPEC))
4356 		return (DCMD_USAGE);
4357 
4358 	w.wt_verbose = FALSE;
4359 	w.wt_target = addr;
4360 
4361 	if (mdb_getopts(argc, argv,
4362 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4363 		return (DCMD_USAGE);
4364 
4365 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4366 	    == -1) {
4367 		mdb_warn("couldn't walk threads");
4368 		return (DCMD_ERR);
4369 	}
4370 
4371 	return (DCMD_OK);
4372 }
4373