xref: /titanic_50/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision 49f9b365248ee858ee91baa36eab27c5200f6dca)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2011 Joyent, Inc.  All rights reserved.
28  */
29 
30 #include <mdb/mdb_param.h>
31 #include <mdb/mdb_modapi.h>
32 #include <mdb/mdb_ctf.h>
33 #include <mdb/mdb_whatis.h>
34 #include <sys/cpuvar.h>
35 #include <sys/kmem_impl.h>
36 #include <sys/vmem_impl.h>
37 #include <sys/machelf.h>
38 #include <sys/modctl.h>
39 #include <sys/kobj.h>
40 #include <sys/panic.h>
41 #include <sys/stack.h>
42 #include <sys/sysmacros.h>
43 #include <vm/page.h>
44 
45 #include "avl.h"
46 #include "combined.h"
47 #include "dist.h"
48 #include "kmem.h"
49 #include "list.h"
50 
51 #define	dprintf(x) if (mdb_debug_level) { \
52 	mdb_printf("kmem debug: ");  \
53 	/*CSTYLED*/\
54 	mdb_printf x ;\
55 }
56 
57 #define	KM_ALLOCATED		0x01
58 #define	KM_FREE			0x02
59 #define	KM_BUFCTL		0x04
60 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
61 #define	KM_HASH			0x10
62 
63 static int mdb_debug_level = 0;
64 
65 /*ARGSUSED*/
66 static int
67 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
68 {
69 	mdb_walker_t w;
70 	char descr[64];
71 
72 	(void) mdb_snprintf(descr, sizeof (descr),
73 	    "walk the %s cache", c->cache_name);
74 
75 	w.walk_name = c->cache_name;
76 	w.walk_descr = descr;
77 	w.walk_init = kmem_walk_init;
78 	w.walk_step = kmem_walk_step;
79 	w.walk_fini = kmem_walk_fini;
80 	w.walk_init_arg = (void *)addr;
81 
82 	if (mdb_add_walker(&w) == -1)
83 		mdb_warn("failed to add %s walker", c->cache_name);
84 
85 	return (WALK_NEXT);
86 }
87 
88 /*ARGSUSED*/
89 int
90 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
91 {
92 	mdb_debug_level ^= 1;
93 
94 	mdb_printf("kmem: debugging is now %s\n",
95 	    mdb_debug_level ? "on" : "off");
96 
97 	return (DCMD_OK);
98 }
99 
100 int
101 kmem_cache_walk_init(mdb_walk_state_t *wsp)
102 {
103 	GElf_Sym sym;
104 
105 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
106 		mdb_warn("couldn't find kmem_caches");
107 		return (WALK_ERR);
108 	}
109 
110 	wsp->walk_addr = (uintptr_t)sym.st_value;
111 
112 	return (list_walk_init_named(wsp, "cache list", "cache"));
113 }
114 
115 int
116 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
117 {
118 	if (wsp->walk_addr == NULL) {
119 		mdb_warn("kmem_cpu_cache doesn't support global walks");
120 		return (WALK_ERR);
121 	}
122 
123 	if (mdb_layered_walk("cpu", wsp) == -1) {
124 		mdb_warn("couldn't walk 'cpu'");
125 		return (WALK_ERR);
126 	}
127 
128 	wsp->walk_data = (void *)wsp->walk_addr;
129 
130 	return (WALK_NEXT);
131 }
132 
133 int
134 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
135 {
136 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
137 	const cpu_t *cpu = wsp->walk_layer;
138 	kmem_cpu_cache_t cc;
139 
140 	caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
141 
142 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
143 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
144 		return (WALK_ERR);
145 	}
146 
147 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
148 }
149 
150 static int
151 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
152 {
153 	kmem_slab_t *sp = p;
154 	uintptr_t caddr = (uintptr_t)arg;
155 	if ((uintptr_t)sp->slab_cache != caddr) {
156 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
157 		    saddr, caddr, sp->slab_cache);
158 		return (-1);
159 	}
160 
161 	return (0);
162 }
163 
164 static int
165 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
166 {
167 	kmem_slab_t *sp = p;
168 
169 	int rc = kmem_slab_check(p, saddr, arg);
170 	if (rc != 0) {
171 		return (rc);
172 	}
173 
174 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
175 		mdb_warn("slab %p is not a partial slab\n", saddr);
176 		return (-1);
177 	}
178 
179 	return (0);
180 }
181 
182 static int
183 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
184 {
185 	kmem_slab_t *sp = p;
186 
187 	int rc = kmem_slab_check(p, saddr, arg);
188 	if (rc != 0) {
189 		return (rc);
190 	}
191 
192 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
193 		mdb_warn("slab %p is not completely allocated\n", saddr);
194 		return (-1);
195 	}
196 
197 	return (0);
198 }
199 
200 typedef struct {
201 	uintptr_t kns_cache_addr;
202 	int kns_nslabs;
203 } kmem_nth_slab_t;
204 
205 static int
206 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
207 {
208 	kmem_nth_slab_t *chkp = arg;
209 
210 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
211 	if (rc != 0) {
212 		return (rc);
213 	}
214 
215 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
216 }
217 
218 static int
219 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
220 {
221 	uintptr_t caddr = wsp->walk_addr;
222 
223 	wsp->walk_addr = (uintptr_t)(caddr +
224 	    offsetof(kmem_cache_t, cache_complete_slabs));
225 
226 	return (list_walk_init_checked(wsp, "slab list", "slab",
227 	    kmem_complete_slab_check, (void *)caddr));
228 }
229 
230 static int
231 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
232 {
233 	uintptr_t caddr = wsp->walk_addr;
234 
235 	wsp->walk_addr = (uintptr_t)(caddr +
236 	    offsetof(kmem_cache_t, cache_partial_slabs));
237 
238 	return (avl_walk_init_checked(wsp, "slab list", "slab",
239 	    kmem_partial_slab_check, (void *)caddr));
240 }
241 
242 int
243 kmem_slab_walk_init(mdb_walk_state_t *wsp)
244 {
245 	uintptr_t caddr = wsp->walk_addr;
246 
247 	if (caddr == NULL) {
248 		mdb_warn("kmem_slab doesn't support global walks\n");
249 		return (WALK_ERR);
250 	}
251 
252 	combined_walk_init(wsp);
253 	combined_walk_add(wsp,
254 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
255 	combined_walk_add(wsp,
256 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
257 
258 	return (WALK_NEXT);
259 }
260 
261 static int
262 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
263 {
264 	uintptr_t caddr = wsp->walk_addr;
265 	kmem_nth_slab_t *chk;
266 
267 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
268 	    UM_SLEEP | UM_GC);
269 	chk->kns_cache_addr = caddr;
270 	chk->kns_nslabs = 1;
271 	wsp->walk_addr = (uintptr_t)(caddr +
272 	    offsetof(kmem_cache_t, cache_complete_slabs));
273 
274 	return (list_walk_init_checked(wsp, "slab list", "slab",
275 	    kmem_nth_slab_check, chk));
276 }
277 
278 int
279 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
280 {
281 	uintptr_t caddr = wsp->walk_addr;
282 	kmem_cache_t c;
283 
284 	if (caddr == NULL) {
285 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
286 		return (WALK_ERR);
287 	}
288 
289 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
290 		mdb_warn("couldn't read kmem_cache at %p", caddr);
291 		return (WALK_ERR);
292 	}
293 
294 	combined_walk_init(wsp);
295 
296 	/*
297 	 * Some consumers (umem_walk_step(), in particular) require at
298 	 * least one callback if there are any buffers in the cache.  So
299 	 * if there are *no* partial slabs, report the first full slab, if
300 	 * any.
301 	 *
302 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
303 	 */
304 	if (c.cache_partial_slabs.avl_numnodes == 0) {
305 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
306 		    list_walk_step, list_walk_fini);
307 	} else {
308 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
309 		    avl_walk_step, avl_walk_fini);
310 	}
311 
312 	return (WALK_NEXT);
313 }
314 
315 int
316 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
317 {
318 	kmem_cache_t c;
319 	const char *filter = NULL;
320 
321 	if (mdb_getopts(ac, argv,
322 	    'n', MDB_OPT_STR, &filter,
323 	    NULL) != ac) {
324 		return (DCMD_USAGE);
325 	}
326 
327 	if (!(flags & DCMD_ADDRSPEC)) {
328 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
329 			mdb_warn("can't walk kmem_cache");
330 			return (DCMD_ERR);
331 		}
332 		return (DCMD_OK);
333 	}
334 
335 	if (DCMD_HDRSPEC(flags))
336 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
337 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
338 
339 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
340 		mdb_warn("couldn't read kmem_cache at %p", addr);
341 		return (DCMD_ERR);
342 	}
343 
344 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
345 		return (DCMD_OK);
346 
347 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
348 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
349 
350 	return (DCMD_OK);
351 }
352 
353 void
354 kmem_cache_help(void)
355 {
356 	mdb_printf("%s", "Print kernel memory caches.\n\n");
357 	mdb_dec_indent(2);
358 	mdb_printf("%<b>OPTIONS%</b>\n");
359 	mdb_inc_indent(2);
360 	mdb_printf("%s",
361 "  -n name\n"
362 "        name of kmem cache (or matching partial name)\n"
363 "\n"
364 "Column\tDescription\n"
365 "\n"
366 "ADDR\t\taddress of kmem cache\n"
367 "NAME\t\tname of kmem cache\n"
368 "FLAG\t\tvarious cache state flags\n"
369 "CFLAG\t\tcache creation flags\n"
370 "BUFSIZE\tobject size in bytes\n"
371 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
372 }
373 
374 #define	LABEL_WIDTH	11
375 static void
376 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
377     size_t maxbuckets, size_t minbucketsize)
378 {
379 	uint64_t total;
380 	int buckets;
381 	int i;
382 	const int *distarray;
383 	int complete[2];
384 
385 	buckets = buffers_per_slab;
386 
387 	total = 0;
388 	for (i = 0; i <= buffers_per_slab; i++)
389 		total += ks_bucket[i];
390 
391 	if (maxbuckets > 1)
392 		buckets = MIN(buckets, maxbuckets);
393 
394 	if (minbucketsize > 1) {
395 		/*
396 		 * minbucketsize does not apply to the first bucket reserved
397 		 * for completely allocated slabs
398 		 */
399 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
400 		    minbucketsize));
401 		if ((buckets < 2) && (buffers_per_slab > 1)) {
402 			buckets = 2;
403 			minbucketsize = (buffers_per_slab - 1);
404 		}
405 	}
406 
407 	/*
408 	 * The first printed bucket is reserved for completely allocated slabs.
409 	 * Passing (buckets - 1) excludes that bucket from the generated
410 	 * distribution, since we're handling it as a special case.
411 	 */
412 	complete[0] = buffers_per_slab;
413 	complete[1] = buffers_per_slab + 1;
414 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
415 
416 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
417 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
418 
419 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
420 	/*
421 	 * Print bucket ranges in descending order after the first bucket for
422 	 * completely allocated slabs, so a person can see immediately whether
423 	 * or not there is fragmentation without having to scan possibly
424 	 * multiple screens of output. Starting at (buckets - 2) excludes the
425 	 * extra terminating bucket.
426 	 */
427 	for (i = buckets - 2; i >= 0; i--) {
428 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
429 	}
430 	mdb_printf("\n");
431 }
432 #undef LABEL_WIDTH
433 
434 /*ARGSUSED*/
435 static int
436 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
437 {
438 	*is_slab = B_TRUE;
439 	return (WALK_DONE);
440 }
441 
442 /*ARGSUSED*/
443 static int
444 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
445     boolean_t *is_slab)
446 {
447 	/*
448 	 * The "kmem_partial_slab" walker reports the first full slab if there
449 	 * are no partial slabs (for the sake of consumers that require at least
450 	 * one callback if there are any buffers in the cache).
451 	 */
452 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
453 	return (WALK_DONE);
454 }
455 
456 typedef struct kmem_slab_usage {
457 	int ksu_refcnt;			/* count of allocated buffers on slab */
458 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
459 } kmem_slab_usage_t;
460 
461 typedef struct kmem_slab_stats {
462 	const kmem_cache_t *ks_cp;
463 	int ks_slabs;			/* slabs in cache */
464 	int ks_partial_slabs;		/* partially allocated slabs in cache */
465 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
466 	int ks_max_buffers_per_slab;	/* max buffers per slab */
467 	int ks_usage_len;		/* ks_usage array length */
468 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
469 	uint_t *ks_bucket;		/* slab usage distribution */
470 } kmem_slab_stats_t;
471 
472 /*ARGSUSED*/
473 static int
474 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
475     kmem_slab_stats_t *ks)
476 {
477 	kmem_slab_usage_t *ksu;
478 	long unused;
479 
480 	ks->ks_slabs++;
481 	ks->ks_bucket[sp->slab_refcnt]++;
482 
483 	unused = (sp->slab_chunks - sp->slab_refcnt);
484 	if (unused == 0) {
485 		return (WALK_NEXT);
486 	}
487 
488 	ks->ks_partial_slabs++;
489 	ks->ks_unused_buffers += unused;
490 
491 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
492 		kmem_slab_usage_t *usage;
493 		int len = ks->ks_usage_len;
494 
495 		len = (len == 0 ? 16 : len * 2);
496 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
497 		if (ks->ks_usage != NULL) {
498 			bcopy(ks->ks_usage, usage,
499 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
500 			mdb_free(ks->ks_usage,
501 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
502 		}
503 		ks->ks_usage = usage;
504 		ks->ks_usage_len = len;
505 	}
506 
507 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
508 	ksu->ksu_refcnt = sp->slab_refcnt;
509 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
510 	return (WALK_NEXT);
511 }
512 
513 static void
514 kmem_slabs_header()
515 {
516 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 	    "", "", "Partial", "", "Unused", "");
518 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
519 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
520 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
521 	    "-------------------------", "--------", "--------", "---------",
522 	    "---------", "------");
523 }
524 
525 int
526 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
527 {
528 	kmem_cache_t c;
529 	kmem_slab_stats_t stats;
530 	mdb_walk_cb_t cb;
531 	int pct;
532 	int tenths_pct;
533 	size_t maxbuckets = 1;
534 	size_t minbucketsize = 0;
535 	const char *filter = NULL;
536 	const char *name = NULL;
537 	uint_t opt_v = FALSE;
538 	boolean_t buckets = B_FALSE;
539 	boolean_t skip = B_FALSE;
540 
541 	if (mdb_getopts(argc, argv,
542 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
543 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
544 	    'n', MDB_OPT_STR, &filter,
545 	    'N', MDB_OPT_STR, &name,
546 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
547 	    NULL) != argc) {
548 		return (DCMD_USAGE);
549 	}
550 
551 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
552 		buckets = B_TRUE;
553 	}
554 
555 	if (!(flags & DCMD_ADDRSPEC)) {
556 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
557 		    argv) == -1) {
558 			mdb_warn("can't walk kmem_cache");
559 			return (DCMD_ERR);
560 		}
561 		return (DCMD_OK);
562 	}
563 
564 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
565 		mdb_warn("couldn't read kmem_cache at %p", addr);
566 		return (DCMD_ERR);
567 	}
568 
569 	if (name == NULL) {
570 		skip = ((filter != NULL) &&
571 		    (strstr(c.cache_name, filter) == NULL));
572 	} else if (filter == NULL) {
573 		skip = (strcmp(c.cache_name, name) != 0);
574 	} else {
575 		/* match either -n or -N */
576 		skip = ((strcmp(c.cache_name, name) != 0) &&
577 		    (strstr(c.cache_name, filter) == NULL));
578 	}
579 
580 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
581 		kmem_slabs_header();
582 	} else if ((opt_v || buckets) && !skip) {
583 		if (DCMD_HDRSPEC(flags)) {
584 			kmem_slabs_header();
585 		} else {
586 			boolean_t is_slab = B_FALSE;
587 			const char *walker_name;
588 			if (opt_v) {
589 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
590 				walker_name = "kmem_slab_partial";
591 			} else {
592 				cb = (mdb_walk_cb_t)kmem_first_slab;
593 				walker_name = "kmem_slab";
594 			}
595 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
596 			if (is_slab) {
597 				kmem_slabs_header();
598 			}
599 		}
600 	}
601 
602 	if (skip) {
603 		return (DCMD_OK);
604 	}
605 
606 	bzero(&stats, sizeof (kmem_slab_stats_t));
607 	stats.ks_cp = &c;
608 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
609 	/* +1 to include a zero bucket */
610 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
611 	    sizeof (*stats.ks_bucket), UM_SLEEP);
612 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
613 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
614 
615 	if (c.cache_buftotal == 0) {
616 		pct = 0;
617 		tenths_pct = 0;
618 	} else {
619 		uint64_t n = stats.ks_unused_buffers * 10000;
620 		pct = (int)(n / c.cache_buftotal);
621 		tenths_pct = pct - ((pct / 100) * 100);
622 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
623 		if (tenths_pct == 10) {
624 			pct += 100;
625 			tenths_pct = 0;
626 		}
627 	}
628 
629 	pct /= 100;
630 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
631 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
632 	    stats.ks_unused_buffers, pct, tenths_pct);
633 
634 	if (maxbuckets == 0) {
635 		maxbuckets = stats.ks_max_buffers_per_slab;
636 	}
637 
638 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
639 	    (stats.ks_slabs > 0)) {
640 		mdb_printf("\n");
641 		kmem_slabs_print_dist(stats.ks_bucket,
642 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
643 	}
644 
645 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
646 	    sizeof (*stats.ks_bucket));
647 
648 	if (!opt_v) {
649 		return (DCMD_OK);
650 	}
651 
652 	if (opt_v && (stats.ks_partial_slabs > 0)) {
653 		int i;
654 		kmem_slab_usage_t *ksu;
655 
656 		mdb_printf("  %d complete (%d), %d partial:",
657 		    (stats.ks_slabs - stats.ks_partial_slabs),
658 		    stats.ks_max_buffers_per_slab,
659 		    stats.ks_partial_slabs);
660 
661 		for (i = 0; i < stats.ks_partial_slabs; i++) {
662 			ksu = &stats.ks_usage[i];
663 			mdb_printf(" %d%s", ksu->ksu_refcnt,
664 			    (ksu->ksu_nomove ? "*" : ""));
665 		}
666 		mdb_printf("\n\n");
667 	}
668 
669 	if (stats.ks_usage_len > 0) {
670 		mdb_free(stats.ks_usage,
671 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
672 	}
673 
674 	return (DCMD_OK);
675 }
676 
677 void
678 kmem_slabs_help(void)
679 {
680 	mdb_printf("%s",
681 "Display slab usage per kmem cache.\n\n");
682 	mdb_dec_indent(2);
683 	mdb_printf("%<b>OPTIONS%</b>\n");
684 	mdb_inc_indent(2);
685 	mdb_printf("%s",
686 "  -n name\n"
687 "        name of kmem cache (or matching partial name)\n"
688 "  -N name\n"
689 "        exact name of kmem cache\n"
690 "  -b maxbins\n"
691 "        Print a distribution of allocated buffers per slab using at\n"
692 "        most maxbins bins. The first bin is reserved for completely\n"
693 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
694 "        effect as specifying the maximum allocated buffers per slab\n"
695 "        or setting minbinsize to 1 (-B 1).\n"
696 "  -B minbinsize\n"
697 "        Print a distribution of allocated buffers per slab, making\n"
698 "        all bins (except the first, reserved for completely allocated\n"
699 "        slabs) at least minbinsize buffers apart.\n"
700 "  -v    verbose output: List the allocated buffer count of each partial\n"
701 "        slab on the free list in order from front to back to show how\n"
702 "        closely the slabs are ordered by usage. For example\n"
703 "\n"
704 "          10 complete, 3 partial (8): 7 3 1\n"
705 "\n"
706 "        means there are thirteen slabs with eight buffers each, including\n"
707 "        three partially allocated slabs with less than all eight buffers\n"
708 "        allocated.\n"
709 "\n"
710 "        Buffer allocations are always from the front of the partial slab\n"
711 "        list. When a buffer is freed from a completely used slab, that\n"
712 "        slab is added to the front of the partial slab list. Assuming\n"
713 "        that all buffers are equally likely to be freed soon, the\n"
714 "        desired order of partial slabs is most-used at the front of the\n"
715 "        list and least-used at the back (as in the example above).\n"
716 "        However, if a slab contains an allocated buffer that will not\n"
717 "        soon be freed, it would be better for that slab to be at the\n"
718 "        front where all of its buffers can be allocated. Taking a slab\n"
719 "        off the partial slab list (either with all buffers freed or all\n"
720 "        buffers allocated) reduces cache fragmentation.\n"
721 "\n"
722 "        A slab's allocated buffer count representing a partial slab (9 in\n"
723 "        the example below) may be marked as follows:\n"
724 "\n"
725 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
726 "        reclaimable because the kmem client refused to move one of the\n"
727 "        slab's buffers. Since kmem does not expect to completely free the\n"
728 "        slab, it moves it to the front of the list in the hope of\n"
729 "        completely allocating it instead. A slab marked with an asterisk\n"
730 "        stays marked for as long as it remains on the partial slab list.\n"
731 "\n"
732 "Column\t\tDescription\n"
733 "\n"
734 "Cache Name\t\tname of kmem cache\n"
735 "Slabs\t\t\ttotal slab count\n"
736 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
737 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
738 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
739 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
740 "\t\t\t  for accounting structures (debug mode), slab\n"
741 "\t\t\t  coloring (incremental small offsets to stagger\n"
742 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
743 }
744 
745 static int
746 addrcmp(const void *lhs, const void *rhs)
747 {
748 	uintptr_t p1 = *((uintptr_t *)lhs);
749 	uintptr_t p2 = *((uintptr_t *)rhs);
750 
751 	if (p1 < p2)
752 		return (-1);
753 	if (p1 > p2)
754 		return (1);
755 	return (0);
756 }
757 
758 static int
759 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
760 {
761 	const kmem_bufctl_audit_t *bcp1 = *lhs;
762 	const kmem_bufctl_audit_t *bcp2 = *rhs;
763 
764 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
765 		return (-1);
766 
767 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
768 		return (1);
769 
770 	return (0);
771 }
772 
773 typedef struct kmem_hash_walk {
774 	uintptr_t *kmhw_table;
775 	size_t kmhw_nelems;
776 	size_t kmhw_pos;
777 	kmem_bufctl_t kmhw_cur;
778 } kmem_hash_walk_t;
779 
780 int
781 kmem_hash_walk_init(mdb_walk_state_t *wsp)
782 {
783 	kmem_hash_walk_t *kmhw;
784 	uintptr_t *hash;
785 	kmem_cache_t c;
786 	uintptr_t haddr, addr = wsp->walk_addr;
787 	size_t nelems;
788 	size_t hsize;
789 
790 	if (addr == NULL) {
791 		mdb_warn("kmem_hash doesn't support global walks\n");
792 		return (WALK_ERR);
793 	}
794 
795 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
796 		mdb_warn("couldn't read cache at addr %p", addr);
797 		return (WALK_ERR);
798 	}
799 
800 	if (!(c.cache_flags & KMF_HASH)) {
801 		mdb_warn("cache %p doesn't have a hash table\n", addr);
802 		return (WALK_DONE);		/* nothing to do */
803 	}
804 
805 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
806 	kmhw->kmhw_cur.bc_next = NULL;
807 	kmhw->kmhw_pos = 0;
808 
809 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
810 	hsize = nelems * sizeof (uintptr_t);
811 	haddr = (uintptr_t)c.cache_hash_table;
812 
813 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
814 	if (mdb_vread(hash, hsize, haddr) == -1) {
815 		mdb_warn("failed to read hash table at %p", haddr);
816 		mdb_free(hash, hsize);
817 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
818 		return (WALK_ERR);
819 	}
820 
821 	wsp->walk_data = kmhw;
822 
823 	return (WALK_NEXT);
824 }
825 
826 int
827 kmem_hash_walk_step(mdb_walk_state_t *wsp)
828 {
829 	kmem_hash_walk_t *kmhw = wsp->walk_data;
830 	uintptr_t addr = NULL;
831 
832 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
833 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
834 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
835 				break;
836 		}
837 	}
838 	if (addr == NULL)
839 		return (WALK_DONE);
840 
841 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
842 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
843 		return (WALK_ERR);
844 	}
845 
846 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
847 }
848 
849 void
850 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
851 {
852 	kmem_hash_walk_t *kmhw = wsp->walk_data;
853 
854 	if (kmhw == NULL)
855 		return;
856 
857 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
858 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
859 }
860 
861 /*
862  * Find the address of the bufctl structure for the address 'buf' in cache
863  * 'cp', which is at address caddr, and place it in *out.
864  */
865 static int
866 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
867 {
868 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
869 	kmem_bufctl_t *bcp;
870 	kmem_bufctl_t bc;
871 
872 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
873 		mdb_warn("unable to read hash bucket for %p in cache %p",
874 		    buf, caddr);
875 		return (-1);
876 	}
877 
878 	while (bcp != NULL) {
879 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
880 		    (uintptr_t)bcp) == -1) {
881 			mdb_warn("unable to read bufctl at %p", bcp);
882 			return (-1);
883 		}
884 		if (bc.bc_addr == buf) {
885 			*out = (uintptr_t)bcp;
886 			return (0);
887 		}
888 		bcp = bc.bc_next;
889 	}
890 
891 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
892 	return (-1);
893 }
894 
895 int
896 kmem_get_magsize(const kmem_cache_t *cp)
897 {
898 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
899 	GElf_Sym mt_sym;
900 	kmem_magtype_t mt;
901 	int res;
902 
903 	/*
904 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
905 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
906 	 * it is okay to return 0 for them.
907 	 */
908 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
909 	    (cp->cache_flags & KMF_NOMAGAZINE))
910 		return (res);
911 
912 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
913 		mdb_warn("unable to read 'kmem_magtype'");
914 	} else if (addr < mt_sym.st_value ||
915 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
916 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
917 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
918 		    cp->cache_name, addr);
919 		return (0);
920 	}
921 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
922 		mdb_warn("unable to read magtype at %a", addr);
923 		return (0);
924 	}
925 	return (mt.mt_magsize);
926 }
927 
928 /*ARGSUSED*/
929 static int
930 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
931 {
932 	*est -= (sp->slab_chunks - sp->slab_refcnt);
933 
934 	return (WALK_NEXT);
935 }
936 
937 /*
938  * Returns an upper bound on the number of allocated buffers in a given
939  * cache.
940  */
941 size_t
942 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
943 {
944 	int magsize;
945 	size_t cache_est;
946 
947 	cache_est = cp->cache_buftotal;
948 
949 	(void) mdb_pwalk("kmem_slab_partial",
950 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
951 
952 	if ((magsize = kmem_get_magsize(cp)) != 0) {
953 		size_t mag_est = cp->cache_full.ml_total * magsize;
954 
955 		if (cache_est >= mag_est) {
956 			cache_est -= mag_est;
957 		} else {
958 			mdb_warn("cache %p's magazine layer holds more buffers "
959 			    "than the slab layer.\n", addr);
960 		}
961 	}
962 	return (cache_est);
963 }
964 
965 #define	READMAG_ROUNDS(rounds) { \
966 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
967 		mdb_warn("couldn't read magazine at %p", kmp); \
968 		goto fail; \
969 	} \
970 	for (i = 0; i < rounds; i++) { \
971 		maglist[magcnt++] = mp->mag_round[i]; \
972 		if (magcnt == magmax) { \
973 			mdb_warn("%d magazines exceeds fudge factor\n", \
974 			    magcnt); \
975 			goto fail; \
976 		} \
977 	} \
978 }
979 
980 int
981 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
982     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
983 {
984 	kmem_magazine_t *kmp, *mp;
985 	void **maglist = NULL;
986 	int i, cpu;
987 	size_t magsize, magmax, magbsize;
988 	size_t magcnt = 0;
989 
990 	/*
991 	 * Read the magtype out of the cache, after verifying the pointer's
992 	 * correctness.
993 	 */
994 	magsize = kmem_get_magsize(cp);
995 	if (magsize == 0) {
996 		*maglistp = NULL;
997 		*magcntp = 0;
998 		*magmaxp = 0;
999 		return (WALK_NEXT);
1000 	}
1001 
1002 	/*
1003 	 * There are several places where we need to go buffer hunting:
1004 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1005 	 * and the full magazine list in the depot.
1006 	 *
1007 	 * For an upper bound on the number of buffers in the magazine
1008 	 * layer, we have the number of magazines on the cache_full
1009 	 * list plus at most two magazines per CPU (the loaded and the
1010 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1011 	 * is live (the number "100" comes from the same fudge factor in
1012 	 * crash(1M)).
1013 	 */
1014 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1015 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1016 
1017 	if (magbsize >= PAGESIZE / 2) {
1018 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1019 		    addr, magbsize);
1020 		return (WALK_ERR);
1021 	}
1022 
1023 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1024 	mp = mdb_alloc(magbsize, alloc_flags);
1025 	if (mp == NULL || maglist == NULL)
1026 		goto fail;
1027 
1028 	/*
1029 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1030 	 */
1031 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1032 		READMAG_ROUNDS(magsize);
1033 		kmp = mp->mag_next;
1034 
1035 		if (kmp == cp->cache_full.ml_list)
1036 			break; /* cache_full list loop detected */
1037 	}
1038 
1039 	dprintf(("cache_full list done\n"));
1040 
1041 	/*
1042 	 * Now whip through the CPUs, snagging the loaded magazines
1043 	 * and full spares.
1044 	 *
1045 	 * In order to prevent inconsistent dumps, rounds and prounds
1046 	 * are copied aside before dumping begins.
1047 	 */
1048 	for (cpu = 0; cpu < ncpus; cpu++) {
1049 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1050 		short rounds, prounds;
1051 
1052 		if (KMEM_DUMPCC(ccp)) {
1053 			rounds = ccp->cc_dump_rounds;
1054 			prounds = ccp->cc_dump_prounds;
1055 		} else {
1056 			rounds = ccp->cc_rounds;
1057 			prounds = ccp->cc_prounds;
1058 		}
1059 
1060 		dprintf(("reading cpu cache %p\n",
1061 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1062 
1063 		if (rounds > 0 &&
1064 		    (kmp = ccp->cc_loaded) != NULL) {
1065 			dprintf(("reading %d loaded rounds\n", rounds));
1066 			READMAG_ROUNDS(rounds);
1067 		}
1068 
1069 		if (prounds > 0 &&
1070 		    (kmp = ccp->cc_ploaded) != NULL) {
1071 			dprintf(("reading %d previously loaded rounds\n",
1072 			    prounds));
1073 			READMAG_ROUNDS(prounds);
1074 		}
1075 	}
1076 
1077 	dprintf(("magazine layer: %d buffers\n", magcnt));
1078 
1079 	if (!(alloc_flags & UM_GC))
1080 		mdb_free(mp, magbsize);
1081 
1082 	*maglistp = maglist;
1083 	*magcntp = magcnt;
1084 	*magmaxp = magmax;
1085 
1086 	return (WALK_NEXT);
1087 
1088 fail:
1089 	if (!(alloc_flags & UM_GC)) {
1090 		if (mp)
1091 			mdb_free(mp, magbsize);
1092 		if (maglist)
1093 			mdb_free(maglist, magmax * sizeof (void *));
1094 	}
1095 	return (WALK_ERR);
1096 }
1097 
1098 static int
1099 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1100 {
1101 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1102 }
1103 
1104 static int
1105 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1106 {
1107 	kmem_bufctl_audit_t b;
1108 
1109 	/*
1110 	 * if KMF_AUDIT is not set, we know that we're looking at a
1111 	 * kmem_bufctl_t.
1112 	 */
1113 	if (!(cp->cache_flags & KMF_AUDIT) ||
1114 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1115 		(void) memset(&b, 0, sizeof (b));
1116 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1117 			mdb_warn("unable to read bufctl at %p", buf);
1118 			return (WALK_ERR);
1119 		}
1120 	}
1121 
1122 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1123 }
1124 
1125 typedef struct kmem_walk {
1126 	int kmw_type;
1127 
1128 	uintptr_t kmw_addr;		/* cache address */
1129 	kmem_cache_t *kmw_cp;
1130 	size_t kmw_csize;
1131 
1132 	/*
1133 	 * magazine layer
1134 	 */
1135 	void **kmw_maglist;
1136 	size_t kmw_max;
1137 	size_t kmw_count;
1138 	size_t kmw_pos;
1139 
1140 	/*
1141 	 * slab layer
1142 	 */
1143 	char *kmw_valid;	/* to keep track of freed buffers */
1144 	char *kmw_ubase;	/* buffer for slab data */
1145 } kmem_walk_t;
1146 
1147 static int
1148 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1149 {
1150 	kmem_walk_t *kmw;
1151 	int ncpus, csize;
1152 	kmem_cache_t *cp;
1153 	size_t vm_quantum;
1154 
1155 	size_t magmax, magcnt;
1156 	void **maglist = NULL;
1157 	uint_t chunksize, slabsize;
1158 	int status = WALK_ERR;
1159 	uintptr_t addr = wsp->walk_addr;
1160 	const char *layered;
1161 
1162 	type &= ~KM_HASH;
1163 
1164 	if (addr == NULL) {
1165 		mdb_warn("kmem walk doesn't support global walks\n");
1166 		return (WALK_ERR);
1167 	}
1168 
1169 	dprintf(("walking %p\n", addr));
1170 
1171 	/*
1172 	 * First we need to figure out how many CPUs are configured in the
1173 	 * system to know how much to slurp out.
1174 	 */
1175 	mdb_readvar(&ncpus, "max_ncpus");
1176 
1177 	csize = KMEM_CACHE_SIZE(ncpus);
1178 	cp = mdb_alloc(csize, UM_SLEEP);
1179 
1180 	if (mdb_vread(cp, csize, addr) == -1) {
1181 		mdb_warn("couldn't read cache at addr %p", addr);
1182 		goto out2;
1183 	}
1184 
1185 	/*
1186 	 * It's easy for someone to hand us an invalid cache address.
1187 	 * Unfortunately, it is hard for this walker to survive an
1188 	 * invalid cache cleanly.  So we make sure that:
1189 	 *
1190 	 *	1. the vmem arena for the cache is readable,
1191 	 *	2. the vmem arena's quantum is a power of 2,
1192 	 *	3. our slabsize is a multiple of the quantum, and
1193 	 *	4. our chunksize is >0 and less than our slabsize.
1194 	 */
1195 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1196 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1197 	    vm_quantum == 0 ||
1198 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1199 	    cp->cache_slabsize < vm_quantum ||
1200 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1201 	    cp->cache_chunksize == 0 ||
1202 	    cp->cache_chunksize > cp->cache_slabsize) {
1203 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1204 		goto out2;
1205 	}
1206 
1207 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1208 
1209 	if (cp->cache_buftotal == 0) {
1210 		mdb_free(cp, csize);
1211 		return (WALK_DONE);
1212 	}
1213 
1214 	/*
1215 	 * If they ask for bufctls, but it's a small-slab cache,
1216 	 * there is nothing to report.
1217 	 */
1218 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1219 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1220 		    cp->cache_flags));
1221 		mdb_free(cp, csize);
1222 		return (WALK_DONE);
1223 	}
1224 
1225 	/*
1226 	 * If they want constructed buffers, but there's no constructor or
1227 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1228 	 */
1229 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1230 	    cp->cache_constructor == NULL ||
1231 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1232 		mdb_free(cp, csize);
1233 		return (WALK_DONE);
1234 	}
1235 
1236 	/*
1237 	 * Read in the contents of the magazine layer
1238 	 */
1239 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1240 	    &magmax, UM_SLEEP) == WALK_ERR)
1241 		goto out2;
1242 
1243 	/*
1244 	 * We have all of the buffers from the magazines;  if we are walking
1245 	 * allocated buffers, sort them so we can bsearch them later.
1246 	 */
1247 	if (type & KM_ALLOCATED)
1248 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1249 
1250 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1251 
1252 	kmw->kmw_type = type;
1253 	kmw->kmw_addr = addr;
1254 	kmw->kmw_cp = cp;
1255 	kmw->kmw_csize = csize;
1256 	kmw->kmw_maglist = maglist;
1257 	kmw->kmw_max = magmax;
1258 	kmw->kmw_count = magcnt;
1259 	kmw->kmw_pos = 0;
1260 
1261 	/*
1262 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1263 	 * hash table instead of the slab layer.
1264 	 */
1265 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1266 		layered = "kmem_hash";
1267 
1268 		kmw->kmw_type |= KM_HASH;
1269 	} else {
1270 		/*
1271 		 * If we are walking freed buffers, we only need the
1272 		 * magazine layer plus the partially allocated slabs.
1273 		 * To walk allocated buffers, we need all of the slabs.
1274 		 */
1275 		if (type & KM_ALLOCATED)
1276 			layered = "kmem_slab";
1277 		else
1278 			layered = "kmem_slab_partial";
1279 
1280 		/*
1281 		 * for small-slab caches, we read in the entire slab.  For
1282 		 * freed buffers, we can just walk the freelist.  For
1283 		 * allocated buffers, we use a 'valid' array to track
1284 		 * the freed buffers.
1285 		 */
1286 		if (!(cp->cache_flags & KMF_HASH)) {
1287 			chunksize = cp->cache_chunksize;
1288 			slabsize = cp->cache_slabsize;
1289 
1290 			kmw->kmw_ubase = mdb_alloc(slabsize +
1291 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1292 
1293 			if (type & KM_ALLOCATED)
1294 				kmw->kmw_valid =
1295 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1296 		}
1297 	}
1298 
1299 	status = WALK_NEXT;
1300 
1301 	if (mdb_layered_walk(layered, wsp) == -1) {
1302 		mdb_warn("unable to start layered '%s' walk", layered);
1303 		status = WALK_ERR;
1304 	}
1305 
1306 out1:
1307 	if (status == WALK_ERR) {
1308 		if (kmw->kmw_valid)
1309 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1310 
1311 		if (kmw->kmw_ubase)
1312 			mdb_free(kmw->kmw_ubase, slabsize +
1313 			    sizeof (kmem_bufctl_t));
1314 
1315 		if (kmw->kmw_maglist)
1316 			mdb_free(kmw->kmw_maglist,
1317 			    kmw->kmw_max * sizeof (uintptr_t));
1318 
1319 		mdb_free(kmw, sizeof (kmem_walk_t));
1320 		wsp->walk_data = NULL;
1321 	}
1322 
1323 out2:
1324 	if (status == WALK_ERR)
1325 		mdb_free(cp, csize);
1326 
1327 	return (status);
1328 }
1329 
1330 int
1331 kmem_walk_step(mdb_walk_state_t *wsp)
1332 {
1333 	kmem_walk_t *kmw = wsp->walk_data;
1334 	int type = kmw->kmw_type;
1335 	kmem_cache_t *cp = kmw->kmw_cp;
1336 
1337 	void **maglist = kmw->kmw_maglist;
1338 	int magcnt = kmw->kmw_count;
1339 
1340 	uintptr_t chunksize, slabsize;
1341 	uintptr_t addr;
1342 	const kmem_slab_t *sp;
1343 	const kmem_bufctl_t *bcp;
1344 	kmem_bufctl_t bc;
1345 
1346 	int chunks;
1347 	char *kbase;
1348 	void *buf;
1349 	int i, ret;
1350 
1351 	char *valid, *ubase;
1352 
1353 	/*
1354 	 * first, handle the 'kmem_hash' layered walk case
1355 	 */
1356 	if (type & KM_HASH) {
1357 		/*
1358 		 * We have a buffer which has been allocated out of the
1359 		 * global layer. We need to make sure that it's not
1360 		 * actually sitting in a magazine before we report it as
1361 		 * an allocated buffer.
1362 		 */
1363 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1364 
1365 		if (magcnt > 0 &&
1366 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1367 		    addrcmp) != NULL)
1368 			return (WALK_NEXT);
1369 
1370 		if (type & KM_BUFCTL)
1371 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1372 
1373 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1374 	}
1375 
1376 	ret = WALK_NEXT;
1377 
1378 	addr = kmw->kmw_addr;
1379 
1380 	/*
1381 	 * If we're walking freed buffers, report everything in the
1382 	 * magazine layer before processing the first slab.
1383 	 */
1384 	if ((type & KM_FREE) && magcnt != 0) {
1385 		kmw->kmw_count = 0;		/* only do this once */
1386 		for (i = 0; i < magcnt; i++) {
1387 			buf = maglist[i];
1388 
1389 			if (type & KM_BUFCTL) {
1390 				uintptr_t out;
1391 
1392 				if (cp->cache_flags & KMF_BUFTAG) {
1393 					kmem_buftag_t *btp;
1394 					kmem_buftag_t tag;
1395 
1396 					/* LINTED - alignment */
1397 					btp = KMEM_BUFTAG(cp, buf);
1398 					if (mdb_vread(&tag, sizeof (tag),
1399 					    (uintptr_t)btp) == -1) {
1400 						mdb_warn("reading buftag for "
1401 						    "%p at %p", buf, btp);
1402 						continue;
1403 					}
1404 					out = (uintptr_t)tag.bt_bufctl;
1405 				} else {
1406 					if (kmem_hash_lookup(cp, addr, buf,
1407 					    &out) == -1)
1408 						continue;
1409 				}
1410 				ret = bufctl_walk_callback(cp, wsp, out);
1411 			} else {
1412 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1413 			}
1414 
1415 			if (ret != WALK_NEXT)
1416 				return (ret);
1417 		}
1418 	}
1419 
1420 	/*
1421 	 * If they want constructed buffers, we're finished, since the
1422 	 * magazine layer holds them all.
1423 	 */
1424 	if (type & KM_CONSTRUCTED)
1425 		return (WALK_DONE);
1426 
1427 	/*
1428 	 * Handle the buffers in the current slab
1429 	 */
1430 	chunksize = cp->cache_chunksize;
1431 	slabsize = cp->cache_slabsize;
1432 
1433 	sp = wsp->walk_layer;
1434 	chunks = sp->slab_chunks;
1435 	kbase = sp->slab_base;
1436 
1437 	dprintf(("kbase is %p\n", kbase));
1438 
1439 	if (!(cp->cache_flags & KMF_HASH)) {
1440 		valid = kmw->kmw_valid;
1441 		ubase = kmw->kmw_ubase;
1442 
1443 		if (mdb_vread(ubase, chunks * chunksize,
1444 		    (uintptr_t)kbase) == -1) {
1445 			mdb_warn("failed to read slab contents at %p", kbase);
1446 			return (WALK_ERR);
1447 		}
1448 
1449 		/*
1450 		 * Set up the valid map as fully allocated -- we'll punch
1451 		 * out the freelist.
1452 		 */
1453 		if (type & KM_ALLOCATED)
1454 			(void) memset(valid, 1, chunks);
1455 	} else {
1456 		valid = NULL;
1457 		ubase = NULL;
1458 	}
1459 
1460 	/*
1461 	 * walk the slab's freelist
1462 	 */
1463 	bcp = sp->slab_head;
1464 
1465 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1466 
1467 	/*
1468 	 * since we could be in the middle of allocating a buffer,
1469 	 * our refcnt could be one higher than it aught.  So we
1470 	 * check one further on the freelist than the count allows.
1471 	 */
1472 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1473 		uint_t ndx;
1474 
1475 		dprintf(("bcp is %p\n", bcp));
1476 
1477 		if (bcp == NULL) {
1478 			if (i == chunks)
1479 				break;
1480 			mdb_warn(
1481 			    "slab %p in cache %p freelist too short by %d\n",
1482 			    sp, addr, chunks - i);
1483 			break;
1484 		}
1485 
1486 		if (cp->cache_flags & KMF_HASH) {
1487 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1488 				mdb_warn("failed to read bufctl ptr at %p",
1489 				    bcp);
1490 				break;
1491 			}
1492 			buf = bc.bc_addr;
1493 		} else {
1494 			/*
1495 			 * Otherwise the buffer is (or should be) in the slab
1496 			 * that we've read in; determine its offset in the
1497 			 * slab, validate that it's not corrupt, and add to
1498 			 * our base address to find the umem_bufctl_t.  (Note
1499 			 * that we don't need to add the size of the bufctl
1500 			 * to our offset calculation because of the slop that's
1501 			 * allocated for the buffer at ubase.)
1502 			 */
1503 			uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1504 
1505 			if (offs > chunks * chunksize) {
1506 				mdb_warn("found corrupt bufctl ptr %p"
1507 				    " in slab %p in cache %p\n", bcp,
1508 				    wsp->walk_addr, addr);
1509 				break;
1510 			}
1511 
1512 			bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1513 			buf = KMEM_BUF(cp, bcp);
1514 		}
1515 
1516 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1517 
1518 		if (ndx > slabsize / cp->cache_bufsize) {
1519 			/*
1520 			 * This is very wrong; we have managed to find
1521 			 * a buffer in the slab which shouldn't
1522 			 * actually be here.  Emit a warning, and
1523 			 * try to continue.
1524 			 */
1525 			mdb_warn("buf %p is out of range for "
1526 			    "slab %p, cache %p\n", buf, sp, addr);
1527 		} else if (type & KM_ALLOCATED) {
1528 			/*
1529 			 * we have found a buffer on the slab's freelist;
1530 			 * clear its entry
1531 			 */
1532 			valid[ndx] = 0;
1533 		} else {
1534 			/*
1535 			 * Report this freed buffer
1536 			 */
1537 			if (type & KM_BUFCTL) {
1538 				ret = bufctl_walk_callback(cp, wsp,
1539 				    (uintptr_t)bcp);
1540 			} else {
1541 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1542 			}
1543 			if (ret != WALK_NEXT)
1544 				return (ret);
1545 		}
1546 
1547 		bcp = bc.bc_next;
1548 	}
1549 
1550 	if (bcp != NULL) {
1551 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1552 		    sp, addr, bcp));
1553 	}
1554 
1555 	/*
1556 	 * If we are walking freed buffers, the loop above handled reporting
1557 	 * them.
1558 	 */
1559 	if (type & KM_FREE)
1560 		return (WALK_NEXT);
1561 
1562 	if (type & KM_BUFCTL) {
1563 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1564 		    "cache %p\n", addr);
1565 		return (WALK_ERR);
1566 	}
1567 
1568 	/*
1569 	 * Report allocated buffers, skipping buffers in the magazine layer.
1570 	 * We only get this far for small-slab caches.
1571 	 */
1572 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1573 		buf = (char *)kbase + i * chunksize;
1574 
1575 		if (!valid[i])
1576 			continue;		/* on slab freelist */
1577 
1578 		if (magcnt > 0 &&
1579 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1580 		    addrcmp) != NULL)
1581 			continue;		/* in magazine layer */
1582 
1583 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1584 	}
1585 	return (ret);
1586 }
1587 
1588 void
1589 kmem_walk_fini(mdb_walk_state_t *wsp)
1590 {
1591 	kmem_walk_t *kmw = wsp->walk_data;
1592 	uintptr_t chunksize;
1593 	uintptr_t slabsize;
1594 
1595 	if (kmw == NULL)
1596 		return;
1597 
1598 	if (kmw->kmw_maglist != NULL)
1599 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1600 
1601 	chunksize = kmw->kmw_cp->cache_chunksize;
1602 	slabsize = kmw->kmw_cp->cache_slabsize;
1603 
1604 	if (kmw->kmw_valid != NULL)
1605 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1606 	if (kmw->kmw_ubase != NULL)
1607 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1608 
1609 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1610 	mdb_free(kmw, sizeof (kmem_walk_t));
1611 }
1612 
1613 /*ARGSUSED*/
1614 static int
1615 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1616 {
1617 	/*
1618 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1619 	 * memory in other caches.  This can be a little confusing, so we
1620 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1621 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1622 	 */
1623 	if (c->cache_cflags & KMC_NOTOUCH)
1624 		return (WALK_NEXT);
1625 
1626 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1627 	    wsp->walk_cbdata, addr) == -1)
1628 		return (WALK_DONE);
1629 
1630 	return (WALK_NEXT);
1631 }
1632 
1633 #define	KMEM_WALK_ALL(name, wsp) { \
1634 	wsp->walk_data = (name); \
1635 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1636 		return (WALK_ERR); \
1637 	return (WALK_DONE); \
1638 }
1639 
1640 int
1641 kmem_walk_init(mdb_walk_state_t *wsp)
1642 {
1643 	if (wsp->walk_arg != NULL)
1644 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1645 
1646 	if (wsp->walk_addr == NULL)
1647 		KMEM_WALK_ALL("kmem", wsp);
1648 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1649 }
1650 
1651 int
1652 bufctl_walk_init(mdb_walk_state_t *wsp)
1653 {
1654 	if (wsp->walk_addr == NULL)
1655 		KMEM_WALK_ALL("bufctl", wsp);
1656 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1657 }
1658 
1659 int
1660 freemem_walk_init(mdb_walk_state_t *wsp)
1661 {
1662 	if (wsp->walk_addr == NULL)
1663 		KMEM_WALK_ALL("freemem", wsp);
1664 	return (kmem_walk_init_common(wsp, KM_FREE));
1665 }
1666 
1667 int
1668 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1669 {
1670 	if (wsp->walk_addr == NULL)
1671 		KMEM_WALK_ALL("freemem_constructed", wsp);
1672 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1673 }
1674 
1675 int
1676 freectl_walk_init(mdb_walk_state_t *wsp)
1677 {
1678 	if (wsp->walk_addr == NULL)
1679 		KMEM_WALK_ALL("freectl", wsp);
1680 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1681 }
1682 
1683 int
1684 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1685 {
1686 	if (wsp->walk_addr == NULL)
1687 		KMEM_WALK_ALL("freectl_constructed", wsp);
1688 	return (kmem_walk_init_common(wsp,
1689 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1690 }
1691 
1692 typedef struct bufctl_history_walk {
1693 	void		*bhw_next;
1694 	kmem_cache_t	*bhw_cache;
1695 	kmem_slab_t	*bhw_slab;
1696 	hrtime_t	bhw_timestamp;
1697 } bufctl_history_walk_t;
1698 
1699 int
1700 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1701 {
1702 	bufctl_history_walk_t *bhw;
1703 	kmem_bufctl_audit_t bc;
1704 	kmem_bufctl_audit_t bcn;
1705 
1706 	if (wsp->walk_addr == NULL) {
1707 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1708 		return (WALK_ERR);
1709 	}
1710 
1711 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1712 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1713 		return (WALK_ERR);
1714 	}
1715 
1716 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1717 	bhw->bhw_timestamp = 0;
1718 	bhw->bhw_cache = bc.bc_cache;
1719 	bhw->bhw_slab = bc.bc_slab;
1720 
1721 	/*
1722 	 * sometimes the first log entry matches the base bufctl;  in that
1723 	 * case, skip the base bufctl.
1724 	 */
1725 	if (bc.bc_lastlog != NULL &&
1726 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1727 	    bc.bc_addr == bcn.bc_addr &&
1728 	    bc.bc_cache == bcn.bc_cache &&
1729 	    bc.bc_slab == bcn.bc_slab &&
1730 	    bc.bc_timestamp == bcn.bc_timestamp &&
1731 	    bc.bc_thread == bcn.bc_thread)
1732 		bhw->bhw_next = bc.bc_lastlog;
1733 	else
1734 		bhw->bhw_next = (void *)wsp->walk_addr;
1735 
1736 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1737 	wsp->walk_data = bhw;
1738 
1739 	return (WALK_NEXT);
1740 }
1741 
1742 int
1743 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1744 {
1745 	bufctl_history_walk_t *bhw = wsp->walk_data;
1746 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1747 	uintptr_t baseaddr = wsp->walk_addr;
1748 	kmem_bufctl_audit_t bc;
1749 
1750 	if (addr == NULL)
1751 		return (WALK_DONE);
1752 
1753 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1754 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1755 		return (WALK_ERR);
1756 	}
1757 
1758 	/*
1759 	 * The bufctl is only valid if the address, cache, and slab are
1760 	 * correct.  We also check that the timestamp is decreasing, to
1761 	 * prevent infinite loops.
1762 	 */
1763 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1764 	    bc.bc_cache != bhw->bhw_cache ||
1765 	    bc.bc_slab != bhw->bhw_slab ||
1766 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1767 		return (WALK_DONE);
1768 
1769 	bhw->bhw_next = bc.bc_lastlog;
1770 	bhw->bhw_timestamp = bc.bc_timestamp;
1771 
1772 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1773 }
1774 
1775 void
1776 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1777 {
1778 	bufctl_history_walk_t *bhw = wsp->walk_data;
1779 
1780 	mdb_free(bhw, sizeof (*bhw));
1781 }
1782 
1783 typedef struct kmem_log_walk {
1784 	kmem_bufctl_audit_t *klw_base;
1785 	kmem_bufctl_audit_t **klw_sorted;
1786 	kmem_log_header_t klw_lh;
1787 	size_t klw_size;
1788 	size_t klw_maxndx;
1789 	size_t klw_ndx;
1790 } kmem_log_walk_t;
1791 
1792 int
1793 kmem_log_walk_init(mdb_walk_state_t *wsp)
1794 {
1795 	uintptr_t lp = wsp->walk_addr;
1796 	kmem_log_walk_t *klw;
1797 	kmem_log_header_t *lhp;
1798 	int maxndx, i, j, k;
1799 
1800 	/*
1801 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1802 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1803 	 */
1804 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1805 		mdb_warn("failed to read 'kmem_transaction_log'");
1806 		return (WALK_ERR);
1807 	}
1808 
1809 	if (lp == NULL) {
1810 		mdb_warn("log is disabled\n");
1811 		return (WALK_ERR);
1812 	}
1813 
1814 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1815 	lhp = &klw->klw_lh;
1816 
1817 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1818 		mdb_warn("failed to read log header at %p", lp);
1819 		mdb_free(klw, sizeof (kmem_log_walk_t));
1820 		return (WALK_ERR);
1821 	}
1822 
1823 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1824 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1825 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1826 
1827 	if (mdb_vread(klw->klw_base, klw->klw_size,
1828 	    (uintptr_t)lhp->lh_base) == -1) {
1829 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1830 		mdb_free(klw->klw_base, klw->klw_size);
1831 		mdb_free(klw, sizeof (kmem_log_walk_t));
1832 		return (WALK_ERR);
1833 	}
1834 
1835 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1836 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1837 
1838 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1839 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1840 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1841 
1842 		for (j = 0; j < maxndx; j++)
1843 			klw->klw_sorted[k++] = &chunk[j];
1844 	}
1845 
1846 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1847 	    (int(*)(const void *, const void *))bufctlcmp);
1848 
1849 	klw->klw_maxndx = k;
1850 	wsp->walk_data = klw;
1851 
1852 	return (WALK_NEXT);
1853 }
1854 
1855 int
1856 kmem_log_walk_step(mdb_walk_state_t *wsp)
1857 {
1858 	kmem_log_walk_t *klw = wsp->walk_data;
1859 	kmem_bufctl_audit_t *bcp;
1860 
1861 	if (klw->klw_ndx == klw->klw_maxndx)
1862 		return (WALK_DONE);
1863 
1864 	bcp = klw->klw_sorted[klw->klw_ndx++];
1865 
1866 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1867 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1868 }
1869 
1870 void
1871 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1872 {
1873 	kmem_log_walk_t *klw = wsp->walk_data;
1874 
1875 	mdb_free(klw->klw_base, klw->klw_size);
1876 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1877 	    sizeof (kmem_bufctl_audit_t *));
1878 	mdb_free(klw, sizeof (kmem_log_walk_t));
1879 }
1880 
1881 typedef struct allocdby_bufctl {
1882 	uintptr_t abb_addr;
1883 	hrtime_t abb_ts;
1884 } allocdby_bufctl_t;
1885 
1886 typedef struct allocdby_walk {
1887 	const char *abw_walk;
1888 	uintptr_t abw_thread;
1889 	size_t abw_nbufs;
1890 	size_t abw_size;
1891 	allocdby_bufctl_t *abw_buf;
1892 	size_t abw_ndx;
1893 } allocdby_walk_t;
1894 
1895 int
1896 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1897     allocdby_walk_t *abw)
1898 {
1899 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1900 		return (WALK_NEXT);
1901 
1902 	if (abw->abw_nbufs == abw->abw_size) {
1903 		allocdby_bufctl_t *buf;
1904 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1905 
1906 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1907 
1908 		bcopy(abw->abw_buf, buf, oldsize);
1909 		mdb_free(abw->abw_buf, oldsize);
1910 
1911 		abw->abw_size <<= 1;
1912 		abw->abw_buf = buf;
1913 	}
1914 
1915 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1916 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1917 	abw->abw_nbufs++;
1918 
1919 	return (WALK_NEXT);
1920 }
1921 
1922 /*ARGSUSED*/
1923 int
1924 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1925 {
1926 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1927 	    abw, addr) == -1) {
1928 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1929 		return (WALK_DONE);
1930 	}
1931 
1932 	return (WALK_NEXT);
1933 }
1934 
1935 static int
1936 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1937 {
1938 	if (lhs->abb_ts < rhs->abb_ts)
1939 		return (1);
1940 	if (lhs->abb_ts > rhs->abb_ts)
1941 		return (-1);
1942 	return (0);
1943 }
1944 
1945 static int
1946 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1947 {
1948 	allocdby_walk_t *abw;
1949 
1950 	if (wsp->walk_addr == NULL) {
1951 		mdb_warn("allocdby walk doesn't support global walks\n");
1952 		return (WALK_ERR);
1953 	}
1954 
1955 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1956 
1957 	abw->abw_thread = wsp->walk_addr;
1958 	abw->abw_walk = walk;
1959 	abw->abw_size = 128;	/* something reasonable */
1960 	abw->abw_buf =
1961 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1962 
1963 	wsp->walk_data = abw;
1964 
1965 	if (mdb_walk("kmem_cache",
1966 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1967 		mdb_warn("couldn't walk kmem_cache");
1968 		allocdby_walk_fini(wsp);
1969 		return (WALK_ERR);
1970 	}
1971 
1972 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1973 	    (int(*)(const void *, const void *))allocdby_cmp);
1974 
1975 	return (WALK_NEXT);
1976 }
1977 
1978 int
1979 allocdby_walk_init(mdb_walk_state_t *wsp)
1980 {
1981 	return (allocdby_walk_init_common(wsp, "bufctl"));
1982 }
1983 
1984 int
1985 freedby_walk_init(mdb_walk_state_t *wsp)
1986 {
1987 	return (allocdby_walk_init_common(wsp, "freectl"));
1988 }
1989 
1990 int
1991 allocdby_walk_step(mdb_walk_state_t *wsp)
1992 {
1993 	allocdby_walk_t *abw = wsp->walk_data;
1994 	kmem_bufctl_audit_t bc;
1995 	uintptr_t addr;
1996 
1997 	if (abw->abw_ndx == abw->abw_nbufs)
1998 		return (WALK_DONE);
1999 
2000 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2001 
2002 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2003 		mdb_warn("couldn't read bufctl at %p", addr);
2004 		return (WALK_DONE);
2005 	}
2006 
2007 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2008 }
2009 
2010 void
2011 allocdby_walk_fini(mdb_walk_state_t *wsp)
2012 {
2013 	allocdby_walk_t *abw = wsp->walk_data;
2014 
2015 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2016 	mdb_free(abw, sizeof (allocdby_walk_t));
2017 }
2018 
2019 /*ARGSUSED*/
2020 int
2021 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2022 {
2023 	char c[MDB_SYM_NAMLEN];
2024 	GElf_Sym sym;
2025 	int i;
2026 
2027 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2028 	for (i = 0; i < bcp->bc_depth; i++) {
2029 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2030 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2031 			continue;
2032 		if (strncmp(c, "kmem_", 5) == 0)
2033 			continue;
2034 		mdb_printf("%s+0x%lx",
2035 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2036 		break;
2037 	}
2038 	mdb_printf("\n");
2039 
2040 	return (WALK_NEXT);
2041 }
2042 
2043 static int
2044 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2045 {
2046 	if (!(flags & DCMD_ADDRSPEC))
2047 		return (DCMD_USAGE);
2048 
2049 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2050 
2051 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2052 		mdb_warn("can't walk '%s' for %p", w, addr);
2053 		return (DCMD_ERR);
2054 	}
2055 
2056 	return (DCMD_OK);
2057 }
2058 
2059 /*ARGSUSED*/
2060 int
2061 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2062 {
2063 	return (allocdby_common(addr, flags, "allocdby"));
2064 }
2065 
2066 /*ARGSUSED*/
2067 int
2068 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2069 {
2070 	return (allocdby_common(addr, flags, "freedby"));
2071 }
2072 
2073 /*
2074  * Return a string describing the address in relation to the given thread's
2075  * stack.
2076  *
2077  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2078  *
2079  * - If the address is above the stack pointer, return an empty string
2080  *   signifying that the address is active.
2081  *
2082  * - If the address is below the stack pointer, and the thread is not on proc,
2083  *   return " (below sp)".
2084  *
2085  * - If the address is below the stack pointer, and the thread is on proc,
2086  *   return " (possibly below sp)".  Depending on context, we may or may not
2087  *   have an accurate t_sp.
2088  */
2089 static const char *
2090 stack_active(const kthread_t *t, uintptr_t addr)
2091 {
2092 	uintptr_t panicstk;
2093 	GElf_Sym sym;
2094 
2095 	if (t->t_state == TS_FREE)
2096 		return (" (inactive interrupt thread)");
2097 
2098 	/*
2099 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2100 	 * no longer relates to the thread's real stack.
2101 	 */
2102 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2103 		panicstk = (uintptr_t)sym.st_value;
2104 
2105 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2106 			return ("");
2107 	}
2108 
2109 	if (addr >= t->t_sp + STACK_BIAS)
2110 		return ("");
2111 
2112 	if (t->t_state == TS_ONPROC)
2113 		return (" (possibly below sp)");
2114 
2115 	return (" (below sp)");
2116 }
2117 
2118 /*
2119  * Additional state for the kmem and vmem ::whatis handlers
2120  */
2121 typedef struct whatis_info {
2122 	mdb_whatis_t *wi_w;
2123 	const kmem_cache_t *wi_cache;
2124 	const vmem_t *wi_vmem;
2125 	vmem_t *wi_msb_arena;
2126 	size_t wi_slab_size;
2127 	uint_t wi_slab_found;
2128 	uint_t wi_kmem_lite_count;
2129 	uint_t wi_freemem;
2130 } whatis_info_t;
2131 
2132 /* call one of our dcmd functions with "-v" and the provided address */
2133 static void
2134 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2135 {
2136 	mdb_arg_t a;
2137 	a.a_type = MDB_TYPE_STRING;
2138 	a.a_un.a_str = "-v";
2139 
2140 	mdb_printf(":\n");
2141 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2142 }
2143 
2144 static void
2145 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2146 {
2147 #define	KMEM_LITE_MAX	16
2148 	pc_t callers[KMEM_LITE_MAX];
2149 	pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2150 
2151 	kmem_buftag_t bt;
2152 	intptr_t stat;
2153 	const char *plural = "";
2154 	int i;
2155 
2156 	/* validate our arguments and read in the buftag */
2157 	if (count == 0 || count > KMEM_LITE_MAX ||
2158 	    mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2159 		return;
2160 
2161 	/* validate the buffer state and read in the callers */
2162 	stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2163 
2164 	if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE ||
2165 	    mdb_vread(callers, count * sizeof (pc_t),
2166 	    btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2167 		return;
2168 
2169 	/* If there aren't any filled in callers, bail */
2170 	if (callers[0] == uninit)
2171 		return;
2172 
2173 	plural = (callers[1] == uninit) ? "" : "s";
2174 
2175 	/* Everything's done and checked; print them out */
2176 	mdb_printf(":\n");
2177 
2178 	mdb_inc_indent(8);
2179 	mdb_printf("recent caller%s: %a", plural, callers[0]);
2180 	for (i = 1; i < count; i++) {
2181 		if (callers[i] == uninit)
2182 			break;
2183 		mdb_printf(", %a", callers[i]);
2184 	}
2185 	mdb_dec_indent(8);
2186 }
2187 
2188 static void
2189 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2190     uintptr_t baddr)
2191 {
2192 	mdb_whatis_t *w = wi->wi_w;
2193 
2194 	const kmem_cache_t *cp = wi->wi_cache;
2195 	/* LINTED pointer cast may result in improper alignment */
2196 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2197 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2198 	int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2199 
2200 	mdb_whatis_report_object(w, maddr, addr, "");
2201 
2202 	if (baddr != 0 && !call_printer)
2203 		mdb_printf("bufctl %p ", baddr);
2204 
2205 	mdb_printf("%s from %s",
2206 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2207 
2208 	if (baddr != 0 && call_printer) {
2209 		whatis_call_printer(bufctl, baddr);
2210 		return;
2211 	}
2212 
2213 	/* for KMF_LITE caches, try to print out the previous callers */
2214 	if (!quiet && (cp->cache_flags & KMF_LITE))
2215 		whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2216 
2217 	mdb_printf("\n");
2218 }
2219 
2220 /*ARGSUSED*/
2221 static int
2222 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2223 {
2224 	mdb_whatis_t *w = wi->wi_w;
2225 
2226 	uintptr_t cur;
2227 	size_t size = wi->wi_cache->cache_bufsize;
2228 
2229 	while (mdb_whatis_match(w, addr, size, &cur))
2230 		whatis_print_kmem(wi, cur, addr, NULL);
2231 
2232 	return (WHATIS_WALKRET(w));
2233 }
2234 
2235 /*ARGSUSED*/
2236 static int
2237 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2238 {
2239 	mdb_whatis_t *w = wi->wi_w;
2240 
2241 	uintptr_t cur;
2242 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
2243 	size_t size = wi->wi_cache->cache_bufsize;
2244 
2245 	while (mdb_whatis_match(w, addr, size, &cur))
2246 		whatis_print_kmem(wi, cur, addr, baddr);
2247 
2248 	return (WHATIS_WALKRET(w));
2249 }
2250 
2251 static int
2252 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2253 {
2254 	mdb_whatis_t *w = wi->wi_w;
2255 
2256 	size_t size = vs->vs_end - vs->vs_start;
2257 	uintptr_t cur;
2258 
2259 	/* We're not interested in anything but alloc and free segments */
2260 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2261 		return (WALK_NEXT);
2262 
2263 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2264 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
2265 
2266 		/*
2267 		 * If we're not printing it seperately, provide the vmem_seg
2268 		 * pointer if it has a stack trace.
2269 		 */
2270 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2271 		    (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2272 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2273 			mdb_printf("vmem_seg %p ", addr);
2274 		}
2275 
2276 		mdb_printf("%s from the %s vmem arena",
2277 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2278 		    wi->wi_vmem->vm_name);
2279 
2280 		if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2281 			whatis_call_printer(vmem_seg, addr);
2282 		else
2283 			mdb_printf("\n");
2284 	}
2285 
2286 	return (WHATIS_WALKRET(w));
2287 }
2288 
2289 static int
2290 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2291 {
2292 	mdb_whatis_t *w = wi->wi_w;
2293 	const char *nm = vmem->vm_name;
2294 
2295 	int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2296 	int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2297 
2298 	if (identifier != idspace)
2299 		return (WALK_NEXT);
2300 
2301 	wi->wi_vmem = vmem;
2302 
2303 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2304 		mdb_printf("Searching vmem arena %s...\n", nm);
2305 
2306 	if (mdb_pwalk("vmem_seg",
2307 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2308 		mdb_warn("can't walk vmem_seg for %p", addr);
2309 		return (WALK_NEXT);
2310 	}
2311 
2312 	return (WHATIS_WALKRET(w));
2313 }
2314 
2315 /*ARGSUSED*/
2316 static int
2317 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2318 {
2319 	mdb_whatis_t *w = wi->wi_w;
2320 
2321 	/* It must overlap with the slab data, or it's not interesting */
2322 	if (mdb_whatis_overlaps(w,
2323 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2324 		wi->wi_slab_found++;
2325 		return (WALK_DONE);
2326 	}
2327 	return (WALK_NEXT);
2328 }
2329 
2330 static int
2331 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2332 {
2333 	mdb_whatis_t *w = wi->wi_w;
2334 
2335 	char *walk, *freewalk;
2336 	mdb_walk_cb_t func;
2337 	int do_bufctl;
2338 
2339 	int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2340 	int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2341 
2342 	if (identifier != idspace)
2343 		return (WALK_NEXT);
2344 
2345 	/* Override the '-b' flag as necessary */
2346 	if (!(c->cache_flags & KMF_HASH))
2347 		do_bufctl = FALSE;	/* no bufctls to walk */
2348 	else if (c->cache_flags & KMF_AUDIT)
2349 		do_bufctl = TRUE;	/* we always want debugging info */
2350 	else
2351 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2352 
2353 	if (do_bufctl) {
2354 		walk = "bufctl";
2355 		freewalk = "freectl";
2356 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2357 	} else {
2358 		walk = "kmem";
2359 		freewalk = "freemem";
2360 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2361 	}
2362 
2363 	wi->wi_cache = c;
2364 
2365 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2366 		mdb_printf("Searching %s...\n", c->cache_name);
2367 
2368 	/*
2369 	 * If more then two buffers live on each slab, figure out if we're
2370 	 * interested in anything in any slab before doing the more expensive
2371 	 * kmem/freemem (bufctl/freectl) walkers.
2372 	 */
2373 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2374 	if (!(c->cache_flags & KMF_HASH))
2375 		wi->wi_slab_size -= sizeof (kmem_slab_t);
2376 
2377 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2378 		wi->wi_slab_found = 0;
2379 		if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2380 		    addr) == -1) {
2381 			mdb_warn("can't find kmem_slab walker");
2382 			return (WALK_DONE);
2383 		}
2384 		if (wi->wi_slab_found == 0)
2385 			return (WALK_NEXT);
2386 	}
2387 
2388 	wi->wi_freemem = FALSE;
2389 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2390 		mdb_warn("can't find %s walker", walk);
2391 		return (WALK_DONE);
2392 	}
2393 
2394 	if (mdb_whatis_done(w))
2395 		return (WALK_DONE);
2396 
2397 	/*
2398 	 * We have searched for allocated memory; now search for freed memory.
2399 	 */
2400 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2401 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2402 
2403 	wi->wi_freemem = TRUE;
2404 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2405 		mdb_warn("can't find %s walker", freewalk);
2406 		return (WALK_DONE);
2407 	}
2408 
2409 	return (WHATIS_WALKRET(w));
2410 }
2411 
2412 static int
2413 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2414 {
2415 	if (c->cache_arena == wi->wi_msb_arena ||
2416 	    (c->cache_cflags & KMC_NOTOUCH))
2417 		return (WALK_NEXT);
2418 
2419 	return (whatis_walk_cache(addr, c, wi));
2420 }
2421 
2422 static int
2423 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2424 {
2425 	if (c->cache_arena != wi->wi_msb_arena)
2426 		return (WALK_NEXT);
2427 
2428 	return (whatis_walk_cache(addr, c, wi));
2429 }
2430 
2431 static int
2432 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2433 {
2434 	if (c->cache_arena == wi->wi_msb_arena ||
2435 	    !(c->cache_cflags & KMC_NOTOUCH))
2436 		return (WALK_NEXT);
2437 
2438 	return (whatis_walk_cache(addr, c, wi));
2439 }
2440 
2441 static int
2442 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2443 {
2444 	uintptr_t cur;
2445 	uintptr_t saddr;
2446 	size_t size;
2447 
2448 	/*
2449 	 * Often, one calls ::whatis on an address from a thread structure.
2450 	 * We use this opportunity to short circuit this case...
2451 	 */
2452 	while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2453 		mdb_whatis_report_object(w, cur, addr,
2454 		    "allocated as a thread structure\n");
2455 
2456 	/*
2457 	 * Now check the stack
2458 	 */
2459 	if (t->t_stkbase == NULL)
2460 		return (WALK_NEXT);
2461 
2462 	/*
2463 	 * This assumes that t_stk is the end of the stack, but it's really
2464 	 * only the initial stack pointer for the thread.  Arguments to the
2465 	 * initial procedure, SA(MINFRAME), etc. are all after t_stk.  So
2466 	 * that 't->t_stk::whatis' reports "part of t's stack", we include
2467 	 * t_stk in the range (the "+ 1", below), but the kernel should
2468 	 * really include the full stack bounds where we can find it.
2469 	 */
2470 	saddr = (uintptr_t)t->t_stkbase;
2471 	size = (uintptr_t)t->t_stk - saddr + 1;
2472 	while (mdb_whatis_match(w, saddr, size, &cur))
2473 		mdb_whatis_report_object(w, cur, cur,
2474 		    "in thread %p's stack%s\n", addr, stack_active(t, cur));
2475 
2476 	return (WHATIS_WALKRET(w));
2477 }
2478 
2479 static void
2480 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2481     uintptr_t base, size_t size, const char *where)
2482 {
2483 	uintptr_t cur;
2484 
2485 	/*
2486 	 * Since we're searching for addresses inside a module, we report
2487 	 * them as symbols.
2488 	 */
2489 	while (mdb_whatis_match(w, base, size, &cur))
2490 		mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2491 }
2492 
2493 static int
2494 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2495 {
2496 	char name[MODMAXNAMELEN];
2497 	struct module mod;
2498 	Shdr shdr;
2499 
2500 	if (m->mod_mp == NULL)
2501 		return (WALK_NEXT);
2502 
2503 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2504 		mdb_warn("couldn't read modctl %p's module", addr);
2505 		return (WALK_NEXT);
2506 	}
2507 
2508 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2509 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2510 
2511 	whatis_modctl_match(w, name,
2512 	    (uintptr_t)mod.text, mod.text_size, "text segment");
2513 	whatis_modctl_match(w, name,
2514 	    (uintptr_t)mod.data, mod.data_size, "data segment");
2515 	whatis_modctl_match(w, name,
2516 	    (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2517 
2518 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2519 		mdb_warn("couldn't read symbol header for %p's module", addr);
2520 		return (WALK_NEXT);
2521 	}
2522 
2523 	whatis_modctl_match(w, name,
2524 	    (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2525 	whatis_modctl_match(w, name,
2526 	    (uintptr_t)mod.symspace, mod.symsize, "symtab");
2527 
2528 	return (WHATIS_WALKRET(w));
2529 }
2530 
2531 /*ARGSUSED*/
2532 static int
2533 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2534 {
2535 	uintptr_t cur;
2536 
2537 	uintptr_t base = (uintptr_t)seg->pages;
2538 	size_t size = (uintptr_t)seg->epages - base;
2539 
2540 	while (mdb_whatis_match(w, base, size, &cur)) {
2541 		/* round our found pointer down to the page_t base. */
2542 		size_t offset = (cur - base) % sizeof (page_t);
2543 
2544 		mdb_whatis_report_object(w, cur, cur - offset,
2545 		    "allocated as a page structure\n");
2546 	}
2547 
2548 	return (WHATIS_WALKRET(w));
2549 }
2550 
2551 /*ARGSUSED*/
2552 static int
2553 whatis_run_modules(mdb_whatis_t *w, void *arg)
2554 {
2555 	if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2556 		mdb_warn("couldn't find modctl walker");
2557 		return (1);
2558 	}
2559 	return (0);
2560 }
2561 
2562 /*ARGSUSED*/
2563 static int
2564 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2565 {
2566 	/*
2567 	 * Now search all thread stacks.  Yes, this is a little weak; we
2568 	 * can save a lot of work by first checking to see if the
2569 	 * address is in segkp vs. segkmem.  But hey, computers are
2570 	 * fast.
2571 	 */
2572 	if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2573 		mdb_warn("couldn't find thread walker");
2574 		return (1);
2575 	}
2576 	return (0);
2577 }
2578 
2579 /*ARGSUSED*/
2580 static int
2581 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2582 {
2583 	if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2584 		mdb_warn("couldn't find memseg walker");
2585 		return (1);
2586 	}
2587 	return (0);
2588 }
2589 
2590 /*ARGSUSED*/
2591 static int
2592 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2593 {
2594 	whatis_info_t wi;
2595 
2596 	bzero(&wi, sizeof (wi));
2597 	wi.wi_w = w;
2598 
2599 	if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2600 		mdb_warn("unable to readvar \"kmem_msb_arena\"");
2601 
2602 	if (mdb_readvar(&wi.wi_kmem_lite_count,
2603 	    "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2604 		wi.wi_kmem_lite_count = 0;
2605 
2606 	/*
2607 	 * We process kmem caches in the following order:
2608 	 *
2609 	 *	non-KMC_NOTOUCH, non-metadata	(typically the most interesting)
2610 	 *	metadata			(can be huge with KMF_AUDIT)
2611 	 *	KMC_NOTOUCH, non-metadata	(see kmem_walk_all())
2612 	 */
2613 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2614 	    &wi) == -1 ||
2615 	    mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2616 	    &wi) == -1 ||
2617 	    mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2618 	    &wi) == -1) {
2619 		mdb_warn("couldn't find kmem_cache walker");
2620 		return (1);
2621 	}
2622 	return (0);
2623 }
2624 
2625 /*ARGSUSED*/
2626 static int
2627 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2628 {
2629 	whatis_info_t wi;
2630 
2631 	bzero(&wi, sizeof (wi));
2632 	wi.wi_w = w;
2633 
2634 	if (mdb_walk("vmem_postfix",
2635 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2636 		mdb_warn("couldn't find vmem_postfix walker");
2637 		return (1);
2638 	}
2639 	return (0);
2640 }
2641 
2642 typedef struct kmem_log_cpu {
2643 	uintptr_t kmc_low;
2644 	uintptr_t kmc_high;
2645 } kmem_log_cpu_t;
2646 
2647 typedef struct kmem_log_data {
2648 	uintptr_t kmd_addr;
2649 	kmem_log_cpu_t *kmd_cpu;
2650 } kmem_log_data_t;
2651 
2652 int
2653 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2654     kmem_log_data_t *kmd)
2655 {
2656 	int i;
2657 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2658 	size_t bufsize;
2659 
2660 	for (i = 0; i < NCPU; i++) {
2661 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2662 			break;
2663 	}
2664 
2665 	if (kmd->kmd_addr) {
2666 		if (b->bc_cache == NULL)
2667 			return (WALK_NEXT);
2668 
2669 		if (mdb_vread(&bufsize, sizeof (bufsize),
2670 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2671 			mdb_warn(
2672 			    "failed to read cache_bufsize for cache at %p",
2673 			    b->bc_cache);
2674 			return (WALK_ERR);
2675 		}
2676 
2677 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2678 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2679 			return (WALK_NEXT);
2680 	}
2681 
2682 	if (i == NCPU)
2683 		mdb_printf("   ");
2684 	else
2685 		mdb_printf("%3d", i);
2686 
2687 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2688 	    b->bc_timestamp, b->bc_thread);
2689 
2690 	return (WALK_NEXT);
2691 }
2692 
2693 /*ARGSUSED*/
2694 int
2695 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2696 {
2697 	kmem_log_header_t lh;
2698 	kmem_cpu_log_header_t clh;
2699 	uintptr_t lhp, clhp;
2700 	int ncpus;
2701 	uintptr_t *cpu;
2702 	GElf_Sym sym;
2703 	kmem_log_cpu_t *kmc;
2704 	int i;
2705 	kmem_log_data_t kmd;
2706 	uint_t opt_b = FALSE;
2707 
2708 	if (mdb_getopts(argc, argv,
2709 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2710 		return (DCMD_USAGE);
2711 
2712 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2713 		mdb_warn("failed to read 'kmem_transaction_log'");
2714 		return (DCMD_ERR);
2715 	}
2716 
2717 	if (lhp == NULL) {
2718 		mdb_warn("no kmem transaction log\n");
2719 		return (DCMD_ERR);
2720 	}
2721 
2722 	mdb_readvar(&ncpus, "ncpus");
2723 
2724 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2725 		mdb_warn("failed to read log header at %p", lhp);
2726 		return (DCMD_ERR);
2727 	}
2728 
2729 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2730 
2731 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2732 
2733 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2734 		mdb_warn("couldn't find 'cpu' array");
2735 		return (DCMD_ERR);
2736 	}
2737 
2738 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2739 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2740 		    NCPU * sizeof (uintptr_t), sym.st_size);
2741 		return (DCMD_ERR);
2742 	}
2743 
2744 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2745 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2746 		return (DCMD_ERR);
2747 	}
2748 
2749 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2750 	kmd.kmd_addr = NULL;
2751 	kmd.kmd_cpu = kmc;
2752 
2753 	for (i = 0; i < NCPU; i++) {
2754 
2755 		if (cpu[i] == NULL)
2756 			continue;
2757 
2758 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2759 			mdb_warn("cannot read cpu %d's log header at %p",
2760 			    i, clhp);
2761 			return (DCMD_ERR);
2762 		}
2763 
2764 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2765 		    (uintptr_t)lh.lh_base;
2766 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2767 
2768 		clhp += sizeof (kmem_cpu_log_header_t);
2769 	}
2770 
2771 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2772 	    "TIMESTAMP", "THREAD");
2773 
2774 	/*
2775 	 * If we have been passed an address, print out only log entries
2776 	 * corresponding to that address.  If opt_b is specified, then interpret
2777 	 * the address as a bufctl.
2778 	 */
2779 	if (flags & DCMD_ADDRSPEC) {
2780 		kmem_bufctl_audit_t b;
2781 
2782 		if (opt_b) {
2783 			kmd.kmd_addr = addr;
2784 		} else {
2785 			if (mdb_vread(&b,
2786 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2787 				mdb_warn("failed to read bufctl at %p", addr);
2788 				return (DCMD_ERR);
2789 			}
2790 
2791 			(void) kmem_log_walk(addr, &b, &kmd);
2792 
2793 			return (DCMD_OK);
2794 		}
2795 	}
2796 
2797 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2798 		mdb_warn("can't find kmem log walker");
2799 		return (DCMD_ERR);
2800 	}
2801 
2802 	return (DCMD_OK);
2803 }
2804 
2805 typedef struct bufctl_history_cb {
2806 	int		bhc_flags;
2807 	int		bhc_argc;
2808 	const mdb_arg_t	*bhc_argv;
2809 	int		bhc_ret;
2810 } bufctl_history_cb_t;
2811 
2812 /*ARGSUSED*/
2813 static int
2814 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2815 {
2816 	bufctl_history_cb_t *bhc = arg;
2817 
2818 	bhc->bhc_ret =
2819 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2820 
2821 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2822 
2823 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2824 }
2825 
2826 void
2827 bufctl_help(void)
2828 {
2829 	mdb_printf("%s",
2830 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2831 	mdb_dec_indent(2);
2832 	mdb_printf("%<b>OPTIONS%</b>\n");
2833 	mdb_inc_indent(2);
2834 	mdb_printf("%s",
2835 "  -v    Display the full content of the bufctl, including its stack trace\n"
2836 "  -h    retrieve the bufctl's transaction history, if available\n"
2837 "  -a addr\n"
2838 "        filter out bufctls not involving the buffer at addr\n"
2839 "  -c caller\n"
2840 "        filter out bufctls without the function/PC in their stack trace\n"
2841 "  -e earliest\n"
2842 "        filter out bufctls timestamped before earliest\n"
2843 "  -l latest\n"
2844 "        filter out bufctls timestamped after latest\n"
2845 "  -t thread\n"
2846 "        filter out bufctls not involving thread\n");
2847 }
2848 
2849 int
2850 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2851 {
2852 	kmem_bufctl_audit_t bc;
2853 	uint_t verbose = FALSE;
2854 	uint_t history = FALSE;
2855 	uint_t in_history = FALSE;
2856 	uintptr_t caller = NULL, thread = NULL;
2857 	uintptr_t laddr, haddr, baddr = NULL;
2858 	hrtime_t earliest = 0, latest = 0;
2859 	int i, depth;
2860 	char c[MDB_SYM_NAMLEN];
2861 	GElf_Sym sym;
2862 
2863 	if (mdb_getopts(argc, argv,
2864 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2865 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2866 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2867 	    'c', MDB_OPT_UINTPTR, &caller,
2868 	    't', MDB_OPT_UINTPTR, &thread,
2869 	    'e', MDB_OPT_UINT64, &earliest,
2870 	    'l', MDB_OPT_UINT64, &latest,
2871 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2872 		return (DCMD_USAGE);
2873 
2874 	if (!(flags & DCMD_ADDRSPEC))
2875 		return (DCMD_USAGE);
2876 
2877 	if (in_history && !history)
2878 		return (DCMD_USAGE);
2879 
2880 	if (history && !in_history) {
2881 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2882 		    UM_SLEEP | UM_GC);
2883 		bufctl_history_cb_t bhc;
2884 
2885 		nargv[0].a_type = MDB_TYPE_STRING;
2886 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2887 
2888 		for (i = 0; i < argc; i++)
2889 			nargv[i + 1] = argv[i];
2890 
2891 		/*
2892 		 * When in history mode, we treat each element as if it
2893 		 * were in a seperate loop, so that the headers group
2894 		 * bufctls with similar histories.
2895 		 */
2896 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2897 		bhc.bhc_argc = argc + 1;
2898 		bhc.bhc_argv = nargv;
2899 		bhc.bhc_ret = DCMD_OK;
2900 
2901 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2902 		    addr) == -1) {
2903 			mdb_warn("unable to walk bufctl_history");
2904 			return (DCMD_ERR);
2905 		}
2906 
2907 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2908 			mdb_printf("\n");
2909 
2910 		return (bhc.bhc_ret);
2911 	}
2912 
2913 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2914 		if (verbose) {
2915 			mdb_printf("%16s %16s %16s %16s\n"
2916 			    "%<u>%16s %16s %16s %16s%</u>\n",
2917 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2918 			    "", "CACHE", "LASTLOG", "CONTENTS");
2919 		} else {
2920 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2921 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2922 		}
2923 	}
2924 
2925 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2926 		mdb_warn("couldn't read bufctl at %p", addr);
2927 		return (DCMD_ERR);
2928 	}
2929 
2930 	/*
2931 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2932 	 * the address does not really refer to a bufctl.
2933 	 */
2934 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2935 
2936 	if (caller != NULL) {
2937 		laddr = caller;
2938 		haddr = caller + sizeof (caller);
2939 
2940 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2941 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2942 			/*
2943 			 * We were provided an exact symbol value; any
2944 			 * address in the function is valid.
2945 			 */
2946 			laddr = (uintptr_t)sym.st_value;
2947 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2948 		}
2949 
2950 		for (i = 0; i < depth; i++)
2951 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2952 				break;
2953 
2954 		if (i == depth)
2955 			return (DCMD_OK);
2956 	}
2957 
2958 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2959 		return (DCMD_OK);
2960 
2961 	if (earliest != 0 && bc.bc_timestamp < earliest)
2962 		return (DCMD_OK);
2963 
2964 	if (latest != 0 && bc.bc_timestamp > latest)
2965 		return (DCMD_OK);
2966 
2967 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2968 		return (DCMD_OK);
2969 
2970 	if (flags & DCMD_PIPE_OUT) {
2971 		mdb_printf("%#lr\n", addr);
2972 		return (DCMD_OK);
2973 	}
2974 
2975 	if (verbose) {
2976 		mdb_printf(
2977 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2978 		    "%16s %16p %16p %16p\n",
2979 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2980 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2981 
2982 		mdb_inc_indent(17);
2983 		for (i = 0; i < depth; i++)
2984 			mdb_printf("%a\n", bc.bc_stack[i]);
2985 		mdb_dec_indent(17);
2986 		mdb_printf("\n");
2987 	} else {
2988 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2989 		    bc.bc_timestamp, bc.bc_thread);
2990 
2991 		for (i = 0; i < depth; i++) {
2992 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2993 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2994 				continue;
2995 			if (strncmp(c, "kmem_", 5) == 0)
2996 				continue;
2997 			mdb_printf(" %a\n", bc.bc_stack[i]);
2998 			break;
2999 		}
3000 
3001 		if (i >= depth)
3002 			mdb_printf("\n");
3003 	}
3004 
3005 	return (DCMD_OK);
3006 }
3007 
3008 typedef struct kmem_verify {
3009 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
3010 	size_t kmv_size;		/* number of bytes in kmv_buf */
3011 	int kmv_corruption;		/* > 0 if corruption found. */
3012 	int kmv_besilent;		/* report actual corruption sites */
3013 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
3014 } kmem_verify_t;
3015 
3016 /*
3017  * verify_pattern()
3018  * 	verify that buf is filled with the pattern pat.
3019  */
3020 static int64_t
3021 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3022 {
3023 	/*LINTED*/
3024 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3025 	uint64_t *buf;
3026 
3027 	for (buf = buf_arg; buf < bufend; buf++)
3028 		if (*buf != pat)
3029 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3030 	return (-1);
3031 }
3032 
3033 /*
3034  * verify_buftag()
3035  *	verify that btp->bt_bxstat == (bcp ^ pat)
3036  */
3037 static int
3038 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3039 {
3040 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3041 }
3042 
3043 /*
3044  * verify_free()
3045  * 	verify the integrity of a free block of memory by checking
3046  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3047  */
3048 /*ARGSUSED1*/
3049 static int
3050 verify_free(uintptr_t addr, const void *data, void *private)
3051 {
3052 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3053 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3054 	int64_t corrupt;		/* corruption offset */
3055 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3056 	kmem_cache_t *cp = &kmv->kmv_cache;
3057 	int besilent = kmv->kmv_besilent;
3058 
3059 	/*LINTED*/
3060 	buftagp = KMEM_BUFTAG(cp, buf);
3061 
3062 	/*
3063 	 * Read the buffer to check.
3064 	 */
3065 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3066 		if (!besilent)
3067 			mdb_warn("couldn't read %p", addr);
3068 		return (WALK_NEXT);
3069 	}
3070 
3071 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3072 	    KMEM_FREE_PATTERN)) >= 0) {
3073 		if (!besilent)
3074 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3075 			    addr, (uintptr_t)addr + corrupt);
3076 		goto corrupt;
3077 	}
3078 	/*
3079 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3080 	 * the first bytes of the buffer, hence we cannot check for red
3081 	 * zone corruption.
3082 	 */
3083 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3084 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3085 		if (!besilent)
3086 			mdb_printf("buffer %p (free) seems to "
3087 			    "have a corrupt redzone pattern\n", addr);
3088 		goto corrupt;
3089 	}
3090 
3091 	/*
3092 	 * confirm bufctl pointer integrity.
3093 	 */
3094 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3095 		if (!besilent)
3096 			mdb_printf("buffer %p (free) has a corrupt "
3097 			    "buftag\n", addr);
3098 		goto corrupt;
3099 	}
3100 
3101 	return (WALK_NEXT);
3102 corrupt:
3103 	kmv->kmv_corruption++;
3104 	return (WALK_NEXT);
3105 }
3106 
3107 /*
3108  * verify_alloc()
3109  * 	Verify that the buftag of an allocated buffer makes sense with respect
3110  * 	to the buffer.
3111  */
3112 /*ARGSUSED1*/
3113 static int
3114 verify_alloc(uintptr_t addr, const void *data, void *private)
3115 {
3116 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3117 	kmem_cache_t *cp = &kmv->kmv_cache;
3118 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3119 	/*LINTED*/
3120 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3121 	uint32_t *ip = (uint32_t *)buftagp;
3122 	uint8_t *bp = (uint8_t *)buf;
3123 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3124 	int besilent = kmv->kmv_besilent;
3125 
3126 	/*
3127 	 * Read the buffer to check.
3128 	 */
3129 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3130 		if (!besilent)
3131 			mdb_warn("couldn't read %p", addr);
3132 		return (WALK_NEXT);
3133 	}
3134 
3135 	/*
3136 	 * There are two cases to handle:
3137 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3138 	 *    0xfeedfacefeedface at the end of it
3139 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3140 	 *    0xbb just past the end of the region in use.  At the buftag,
3141 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3142 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3143 	 *    endianness), followed by 32 bits containing the offset of the
3144 	 *    0xbb byte in the buffer.
3145 	 *
3146 	 * Finally, the two 32-bit words that comprise the second half of the
3147 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3148 	 */
3149 
3150 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3151 		looks_ok = 1;
3152 	else if (!KMEM_SIZE_VALID(ip[1]))
3153 		size_ok = 0;
3154 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3155 		looks_ok = 1;
3156 	else
3157 		size_ok = 0;
3158 
3159 	if (!size_ok) {
3160 		if (!besilent)
3161 			mdb_printf("buffer %p (allocated) has a corrupt "
3162 			    "redzone size encoding\n", addr);
3163 		goto corrupt;
3164 	}
3165 
3166 	if (!looks_ok) {
3167 		if (!besilent)
3168 			mdb_printf("buffer %p (allocated) has a corrupt "
3169 			    "redzone signature\n", addr);
3170 		goto corrupt;
3171 	}
3172 
3173 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3174 		if (!besilent)
3175 			mdb_printf("buffer %p (allocated) has a "
3176 			    "corrupt buftag\n", addr);
3177 		goto corrupt;
3178 	}
3179 
3180 	return (WALK_NEXT);
3181 corrupt:
3182 	kmv->kmv_corruption++;
3183 	return (WALK_NEXT);
3184 }
3185 
3186 /*ARGSUSED2*/
3187 int
3188 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3189 {
3190 	if (flags & DCMD_ADDRSPEC) {
3191 		int check_alloc = 0, check_free = 0;
3192 		kmem_verify_t kmv;
3193 
3194 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3195 		    addr) == -1) {
3196 			mdb_warn("couldn't read kmem_cache %p", addr);
3197 			return (DCMD_ERR);
3198 		}
3199 
3200 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3201 		    sizeof (kmem_buftag_t);
3202 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3203 		kmv.kmv_corruption = 0;
3204 
3205 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3206 			check_alloc = 1;
3207 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3208 				check_free = 1;
3209 		} else {
3210 			if (!(flags & DCMD_LOOP)) {
3211 				mdb_warn("cache %p (%s) does not have "
3212 				    "redzone checking enabled\n", addr,
3213 				    kmv.kmv_cache.cache_name);
3214 			}
3215 			return (DCMD_ERR);
3216 		}
3217 
3218 		if (flags & DCMD_LOOP) {
3219 			/*
3220 			 * table mode, don't print out every corrupt buffer
3221 			 */
3222 			kmv.kmv_besilent = 1;
3223 		} else {
3224 			mdb_printf("Summary for cache '%s'\n",
3225 			    kmv.kmv_cache.cache_name);
3226 			mdb_inc_indent(2);
3227 			kmv.kmv_besilent = 0;
3228 		}
3229 
3230 		if (check_alloc)
3231 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3232 		if (check_free)
3233 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3234 
3235 		if (flags & DCMD_LOOP) {
3236 			if (kmv.kmv_corruption == 0) {
3237 				mdb_printf("%-*s %?p clean\n",
3238 				    KMEM_CACHE_NAMELEN,
3239 				    kmv.kmv_cache.cache_name, addr);
3240 			} else {
3241 				char *s = "";	/* optional s in "buffer[s]" */
3242 				if (kmv.kmv_corruption > 1)
3243 					s = "s";
3244 
3245 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3246 				    KMEM_CACHE_NAMELEN,
3247 				    kmv.kmv_cache.cache_name, addr,
3248 				    kmv.kmv_corruption, s);
3249 			}
3250 		} else {
3251 			/*
3252 			 * This is the more verbose mode, when the user has
3253 			 * type addr::kmem_verify.  If the cache was clean,
3254 			 * nothing will have yet been printed. So say something.
3255 			 */
3256 			if (kmv.kmv_corruption == 0)
3257 				mdb_printf("clean\n");
3258 
3259 			mdb_dec_indent(2);
3260 		}
3261 	} else {
3262 		/*
3263 		 * If the user didn't specify a cache to verify, we'll walk all
3264 		 * kmem_cache's, specifying ourself as a callback for each...
3265 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3266 		 */
3267 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3268 		    "Cache Name", "Addr", "Cache Integrity");
3269 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3270 	}
3271 
3272 	return (DCMD_OK);
3273 }
3274 
3275 typedef struct vmem_node {
3276 	struct vmem_node *vn_next;
3277 	struct vmem_node *vn_parent;
3278 	struct vmem_node *vn_sibling;
3279 	struct vmem_node *vn_children;
3280 	uintptr_t vn_addr;
3281 	int vn_marked;
3282 	vmem_t vn_vmem;
3283 } vmem_node_t;
3284 
3285 typedef struct vmem_walk {
3286 	vmem_node_t *vw_root;
3287 	vmem_node_t *vw_current;
3288 } vmem_walk_t;
3289 
3290 int
3291 vmem_walk_init(mdb_walk_state_t *wsp)
3292 {
3293 	uintptr_t vaddr, paddr;
3294 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3295 	vmem_walk_t *vw;
3296 
3297 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3298 		mdb_warn("couldn't read 'vmem_list'");
3299 		return (WALK_ERR);
3300 	}
3301 
3302 	while (vaddr != NULL) {
3303 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3304 		vp->vn_addr = vaddr;
3305 		vp->vn_next = head;
3306 		head = vp;
3307 
3308 		if (vaddr == wsp->walk_addr)
3309 			current = vp;
3310 
3311 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3312 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3313 			goto err;
3314 		}
3315 
3316 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3317 	}
3318 
3319 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3320 
3321 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3322 			vp->vn_sibling = root;
3323 			root = vp;
3324 			continue;
3325 		}
3326 
3327 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3328 			if (parent->vn_addr != paddr)
3329 				continue;
3330 			vp->vn_sibling = parent->vn_children;
3331 			parent->vn_children = vp;
3332 			vp->vn_parent = parent;
3333 			break;
3334 		}
3335 
3336 		if (parent == NULL) {
3337 			mdb_warn("couldn't find %p's parent (%p)\n",
3338 			    vp->vn_addr, paddr);
3339 			goto err;
3340 		}
3341 	}
3342 
3343 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3344 	vw->vw_root = root;
3345 
3346 	if (current != NULL)
3347 		vw->vw_current = current;
3348 	else
3349 		vw->vw_current = root;
3350 
3351 	wsp->walk_data = vw;
3352 	return (WALK_NEXT);
3353 err:
3354 	for (vp = head; head != NULL; vp = head) {
3355 		head = vp->vn_next;
3356 		mdb_free(vp, sizeof (vmem_node_t));
3357 	}
3358 
3359 	return (WALK_ERR);
3360 }
3361 
3362 int
3363 vmem_walk_step(mdb_walk_state_t *wsp)
3364 {
3365 	vmem_walk_t *vw = wsp->walk_data;
3366 	vmem_node_t *vp;
3367 	int rval;
3368 
3369 	if ((vp = vw->vw_current) == NULL)
3370 		return (WALK_DONE);
3371 
3372 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3373 
3374 	if (vp->vn_children != NULL) {
3375 		vw->vw_current = vp->vn_children;
3376 		return (rval);
3377 	}
3378 
3379 	do {
3380 		vw->vw_current = vp->vn_sibling;
3381 		vp = vp->vn_parent;
3382 	} while (vw->vw_current == NULL && vp != NULL);
3383 
3384 	return (rval);
3385 }
3386 
3387 /*
3388  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3389  * children are visited before their parent.  We perform the postfix walk
3390  * iteratively (rather than recursively) to allow mdb to regain control
3391  * after each callback.
3392  */
3393 int
3394 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3395 {
3396 	vmem_walk_t *vw = wsp->walk_data;
3397 	vmem_node_t *vp = vw->vw_current;
3398 	int rval;
3399 
3400 	/*
3401 	 * If this node is marked, then we know that we have already visited
3402 	 * all of its children.  If the node has any siblings, they need to
3403 	 * be visited next; otherwise, we need to visit the parent.  Note
3404 	 * that vp->vn_marked will only be zero on the first invocation of
3405 	 * the step function.
3406 	 */
3407 	if (vp->vn_marked) {
3408 		if (vp->vn_sibling != NULL)
3409 			vp = vp->vn_sibling;
3410 		else if (vp->vn_parent != NULL)
3411 			vp = vp->vn_parent;
3412 		else {
3413 			/*
3414 			 * We have neither a parent, nor a sibling, and we
3415 			 * have already been visited; we're done.
3416 			 */
3417 			return (WALK_DONE);
3418 		}
3419 	}
3420 
3421 	/*
3422 	 * Before we visit this node, visit its children.
3423 	 */
3424 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3425 		vp = vp->vn_children;
3426 
3427 	vp->vn_marked = 1;
3428 	vw->vw_current = vp;
3429 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3430 
3431 	return (rval);
3432 }
3433 
3434 void
3435 vmem_walk_fini(mdb_walk_state_t *wsp)
3436 {
3437 	vmem_walk_t *vw = wsp->walk_data;
3438 	vmem_node_t *root = vw->vw_root;
3439 	int done;
3440 
3441 	if (root == NULL)
3442 		return;
3443 
3444 	if ((vw->vw_root = root->vn_children) != NULL)
3445 		vmem_walk_fini(wsp);
3446 
3447 	vw->vw_root = root->vn_sibling;
3448 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3449 	mdb_free(root, sizeof (vmem_node_t));
3450 
3451 	if (done) {
3452 		mdb_free(vw, sizeof (vmem_walk_t));
3453 	} else {
3454 		vmem_walk_fini(wsp);
3455 	}
3456 }
3457 
3458 typedef struct vmem_seg_walk {
3459 	uint8_t vsw_type;
3460 	uintptr_t vsw_start;
3461 	uintptr_t vsw_current;
3462 } vmem_seg_walk_t;
3463 
3464 /*ARGSUSED*/
3465 int
3466 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3467 {
3468 	vmem_seg_walk_t *vsw;
3469 
3470 	if (wsp->walk_addr == NULL) {
3471 		mdb_warn("vmem_%s does not support global walks\n", name);
3472 		return (WALK_ERR);
3473 	}
3474 
3475 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3476 
3477 	vsw->vsw_type = type;
3478 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3479 	vsw->vsw_current = vsw->vsw_start;
3480 
3481 	return (WALK_NEXT);
3482 }
3483 
3484 /*
3485  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3486  */
3487 #define	VMEM_NONE	0
3488 
3489 int
3490 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3491 {
3492 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3493 }
3494 
3495 int
3496 vmem_free_walk_init(mdb_walk_state_t *wsp)
3497 {
3498 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3499 }
3500 
3501 int
3502 vmem_span_walk_init(mdb_walk_state_t *wsp)
3503 {
3504 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3505 }
3506 
3507 int
3508 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3509 {
3510 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3511 }
3512 
3513 int
3514 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3515 {
3516 	vmem_seg_t seg;
3517 	vmem_seg_walk_t *vsw = wsp->walk_data;
3518 	uintptr_t addr = vsw->vsw_current;
3519 	static size_t seg_size = 0;
3520 	int rval;
3521 
3522 	if (!seg_size) {
3523 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3524 			mdb_warn("failed to read 'vmem_seg_size'");
3525 			seg_size = sizeof (vmem_seg_t);
3526 		}
3527 	}
3528 
3529 	if (seg_size < sizeof (seg))
3530 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3531 
3532 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3533 		mdb_warn("couldn't read vmem_seg at %p", addr);
3534 		return (WALK_ERR);
3535 	}
3536 
3537 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3538 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3539 		rval = WALK_NEXT;
3540 	} else {
3541 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3542 	}
3543 
3544 	if (vsw->vsw_current == vsw->vsw_start)
3545 		return (WALK_DONE);
3546 
3547 	return (rval);
3548 }
3549 
3550 void
3551 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3552 {
3553 	vmem_seg_walk_t *vsw = wsp->walk_data;
3554 
3555 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3556 }
3557 
3558 #define	VMEM_NAMEWIDTH	22
3559 
3560 int
3561 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3562 {
3563 	vmem_t v, parent;
3564 	vmem_kstat_t *vkp = &v.vm_kstat;
3565 	uintptr_t paddr;
3566 	int ident = 0;
3567 	char c[VMEM_NAMEWIDTH];
3568 
3569 	if (!(flags & DCMD_ADDRSPEC)) {
3570 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3571 			mdb_warn("can't walk vmem");
3572 			return (DCMD_ERR);
3573 		}
3574 		return (DCMD_OK);
3575 	}
3576 
3577 	if (DCMD_HDRSPEC(flags))
3578 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3579 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3580 		    "TOTAL", "SUCCEED", "FAIL");
3581 
3582 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3583 		mdb_warn("couldn't read vmem at %p", addr);
3584 		return (DCMD_ERR);
3585 	}
3586 
3587 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3588 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3589 			mdb_warn("couldn't trace %p's ancestry", addr);
3590 			ident = 0;
3591 			break;
3592 		}
3593 		paddr = (uintptr_t)parent.vm_source;
3594 	}
3595 
3596 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3597 
3598 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3599 	    addr, VMEM_NAMEWIDTH, c,
3600 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3601 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3602 
3603 	return (DCMD_OK);
3604 }
3605 
3606 void
3607 vmem_seg_help(void)
3608 {
3609 	mdb_printf("%s",
3610 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3611 "\n"
3612 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3613 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3614 "information.\n");
3615 	mdb_dec_indent(2);
3616 	mdb_printf("%<b>OPTIONS%</b>\n");
3617 	mdb_inc_indent(2);
3618 	mdb_printf("%s",
3619 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3620 "  -s    report the size of the segment, instead of the end address\n"
3621 "  -c caller\n"
3622 "        filter out segments without the function/PC in their stack trace\n"
3623 "  -e earliest\n"
3624 "        filter out segments timestamped before earliest\n"
3625 "  -l latest\n"
3626 "        filter out segments timestamped after latest\n"
3627 "  -m minsize\n"
3628 "        filer out segments smaller than minsize\n"
3629 "  -M maxsize\n"
3630 "        filer out segments larger than maxsize\n"
3631 "  -t thread\n"
3632 "        filter out segments not involving thread\n"
3633 "  -T type\n"
3634 "        filter out segments not of type 'type'\n"
3635 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3636 }
3637 
3638 /*ARGSUSED*/
3639 int
3640 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3641 {
3642 	vmem_seg_t vs;
3643 	pc_t *stk = vs.vs_stack;
3644 	uintptr_t sz;
3645 	uint8_t t;
3646 	const char *type = NULL;
3647 	GElf_Sym sym;
3648 	char c[MDB_SYM_NAMLEN];
3649 	int no_debug;
3650 	int i;
3651 	int depth;
3652 	uintptr_t laddr, haddr;
3653 
3654 	uintptr_t caller = NULL, thread = NULL;
3655 	uintptr_t minsize = 0, maxsize = 0;
3656 
3657 	hrtime_t earliest = 0, latest = 0;
3658 
3659 	uint_t size = 0;
3660 	uint_t verbose = 0;
3661 
3662 	if (!(flags & DCMD_ADDRSPEC))
3663 		return (DCMD_USAGE);
3664 
3665 	if (mdb_getopts(argc, argv,
3666 	    'c', MDB_OPT_UINTPTR, &caller,
3667 	    'e', MDB_OPT_UINT64, &earliest,
3668 	    'l', MDB_OPT_UINT64, &latest,
3669 	    's', MDB_OPT_SETBITS, TRUE, &size,
3670 	    'm', MDB_OPT_UINTPTR, &minsize,
3671 	    'M', MDB_OPT_UINTPTR, &maxsize,
3672 	    't', MDB_OPT_UINTPTR, &thread,
3673 	    'T', MDB_OPT_STR, &type,
3674 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3675 	    NULL) != argc)
3676 		return (DCMD_USAGE);
3677 
3678 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3679 		if (verbose) {
3680 			mdb_printf("%16s %4s %16s %16s %16s\n"
3681 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3682 			    "ADDR", "TYPE", "START", "END", "SIZE",
3683 			    "", "", "THREAD", "TIMESTAMP", "");
3684 		} else {
3685 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3686 			    "START", size? "SIZE" : "END", "WHO");
3687 		}
3688 	}
3689 
3690 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3691 		mdb_warn("couldn't read vmem_seg at %p", addr);
3692 		return (DCMD_ERR);
3693 	}
3694 
3695 	if (type != NULL) {
3696 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3697 			t = VMEM_ALLOC;
3698 		else if (strcmp(type, "FREE") == 0)
3699 			t = VMEM_FREE;
3700 		else if (strcmp(type, "SPAN") == 0)
3701 			t = VMEM_SPAN;
3702 		else if (strcmp(type, "ROTR") == 0 ||
3703 		    strcmp(type, "ROTOR") == 0)
3704 			t = VMEM_ROTOR;
3705 		else if (strcmp(type, "WLKR") == 0 ||
3706 		    strcmp(type, "WALKER") == 0)
3707 			t = VMEM_WALKER;
3708 		else {
3709 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3710 			    type);
3711 			return (DCMD_ERR);
3712 		}
3713 
3714 		if (vs.vs_type != t)
3715 			return (DCMD_OK);
3716 	}
3717 
3718 	sz = vs.vs_end - vs.vs_start;
3719 
3720 	if (minsize != 0 && sz < minsize)
3721 		return (DCMD_OK);
3722 
3723 	if (maxsize != 0 && sz > maxsize)
3724 		return (DCMD_OK);
3725 
3726 	t = vs.vs_type;
3727 	depth = vs.vs_depth;
3728 
3729 	/*
3730 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3731 	 */
3732 	no_debug = (t != VMEM_ALLOC) ||
3733 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3734 
3735 	if (no_debug) {
3736 		if (caller != NULL || thread != NULL || earliest != 0 ||
3737 		    latest != 0)
3738 			return (DCMD_OK);		/* not enough info */
3739 	} else {
3740 		if (caller != NULL) {
3741 			laddr = caller;
3742 			haddr = caller + sizeof (caller);
3743 
3744 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3745 			    sizeof (c), &sym) != -1 &&
3746 			    caller == (uintptr_t)sym.st_value) {
3747 				/*
3748 				 * We were provided an exact symbol value; any
3749 				 * address in the function is valid.
3750 				 */
3751 				laddr = (uintptr_t)sym.st_value;
3752 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3753 			}
3754 
3755 			for (i = 0; i < depth; i++)
3756 				if (vs.vs_stack[i] >= laddr &&
3757 				    vs.vs_stack[i] < haddr)
3758 					break;
3759 
3760 			if (i == depth)
3761 				return (DCMD_OK);
3762 		}
3763 
3764 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3765 			return (DCMD_OK);
3766 
3767 		if (earliest != 0 && vs.vs_timestamp < earliest)
3768 			return (DCMD_OK);
3769 
3770 		if (latest != 0 && vs.vs_timestamp > latest)
3771 			return (DCMD_OK);
3772 	}
3773 
3774 	type = (t == VMEM_ALLOC ? "ALLC" :
3775 	    t == VMEM_FREE ? "FREE" :
3776 	    t == VMEM_SPAN ? "SPAN" :
3777 	    t == VMEM_ROTOR ? "ROTR" :
3778 	    t == VMEM_WALKER ? "WLKR" :
3779 	    "????");
3780 
3781 	if (flags & DCMD_PIPE_OUT) {
3782 		mdb_printf("%#lr\n", addr);
3783 		return (DCMD_OK);
3784 	}
3785 
3786 	if (verbose) {
3787 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3788 		    addr, type, vs.vs_start, vs.vs_end, sz);
3789 
3790 		if (no_debug)
3791 			return (DCMD_OK);
3792 
3793 		mdb_printf("%16s %4s %16p %16llx\n",
3794 		    "", "", vs.vs_thread, vs.vs_timestamp);
3795 
3796 		mdb_inc_indent(17);
3797 		for (i = 0; i < depth; i++) {
3798 			mdb_printf("%a\n", stk[i]);
3799 		}
3800 		mdb_dec_indent(17);
3801 		mdb_printf("\n");
3802 	} else {
3803 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3804 		    vs.vs_start, size? sz : vs.vs_end);
3805 
3806 		if (no_debug) {
3807 			mdb_printf("\n");
3808 			return (DCMD_OK);
3809 		}
3810 
3811 		for (i = 0; i < depth; i++) {
3812 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3813 			    c, sizeof (c), &sym) == -1)
3814 				continue;
3815 			if (strncmp(c, "vmem_", 5) == 0)
3816 				continue;
3817 			break;
3818 		}
3819 		mdb_printf(" %a\n", stk[i]);
3820 	}
3821 	return (DCMD_OK);
3822 }
3823 
3824 typedef struct kmalog_data {
3825 	uintptr_t	kma_addr;
3826 	hrtime_t	kma_newest;
3827 } kmalog_data_t;
3828 
3829 /*ARGSUSED*/
3830 static int
3831 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3832 {
3833 	char name[KMEM_CACHE_NAMELEN + 1];
3834 	hrtime_t delta;
3835 	int i, depth;
3836 	size_t bufsize;
3837 
3838 	if (bcp->bc_timestamp == 0)
3839 		return (WALK_DONE);
3840 
3841 	if (kma->kma_newest == 0)
3842 		kma->kma_newest = bcp->bc_timestamp;
3843 
3844 	if (kma->kma_addr) {
3845 		if (mdb_vread(&bufsize, sizeof (bufsize),
3846 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3847 			mdb_warn(
3848 			    "failed to read cache_bufsize for cache at %p",
3849 			    bcp->bc_cache);
3850 			return (WALK_ERR);
3851 		}
3852 
3853 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3854 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3855 			return (WALK_NEXT);
3856 	}
3857 
3858 	delta = kma->kma_newest - bcp->bc_timestamp;
3859 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3860 
3861 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3862 	    &bcp->bc_cache->cache_name) <= 0)
3863 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3864 
3865 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3866 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3867 
3868 	for (i = 0; i < depth; i++)
3869 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3870 
3871 	return (WALK_NEXT);
3872 }
3873 
3874 int
3875 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3876 {
3877 	const char *logname = "kmem_transaction_log";
3878 	kmalog_data_t kma;
3879 
3880 	if (argc > 1)
3881 		return (DCMD_USAGE);
3882 
3883 	kma.kma_newest = 0;
3884 	if (flags & DCMD_ADDRSPEC)
3885 		kma.kma_addr = addr;
3886 	else
3887 		kma.kma_addr = NULL;
3888 
3889 	if (argc > 0) {
3890 		if (argv->a_type != MDB_TYPE_STRING)
3891 			return (DCMD_USAGE);
3892 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3893 			logname = "kmem_failure_log";
3894 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3895 			logname = "kmem_slab_log";
3896 		else
3897 			return (DCMD_USAGE);
3898 	}
3899 
3900 	if (mdb_readvar(&addr, logname) == -1) {
3901 		mdb_warn("failed to read %s log header pointer");
3902 		return (DCMD_ERR);
3903 	}
3904 
3905 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3906 		mdb_warn("failed to walk kmem log");
3907 		return (DCMD_ERR);
3908 	}
3909 
3910 	return (DCMD_OK);
3911 }
3912 
3913 /*
3914  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3915  * The first piece is a structure which we use to accumulate kmem_cache_t
3916  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3917  * walker; we either add all caches, or ones named explicitly as arguments.
3918  */
3919 
3920 typedef struct kmclist {
3921 	const char *kmc_name;			/* Name to match (or NULL) */
3922 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3923 	int kmc_nelems;				/* Num entries in kmc_caches */
3924 	int kmc_size;				/* Size of kmc_caches array */
3925 } kmclist_t;
3926 
3927 static int
3928 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3929 {
3930 	void *p;
3931 	int s;
3932 
3933 	if (kmc->kmc_name == NULL ||
3934 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3935 		/*
3936 		 * If we have a match, grow our array (if necessary), and then
3937 		 * add the virtual address of the matching cache to our list.
3938 		 */
3939 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3940 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3941 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3942 
3943 			bcopy(kmc->kmc_caches, p,
3944 			    sizeof (uintptr_t) * kmc->kmc_size);
3945 
3946 			kmc->kmc_caches = p;
3947 			kmc->kmc_size = s;
3948 		}
3949 
3950 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3951 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3952 	}
3953 
3954 	return (WALK_NEXT);
3955 }
3956 
3957 /*
3958  * The second piece of ::kmausers is a hash table of allocations.  Each
3959  * allocation owner is identified by its stack trace and data_size.  We then
3960  * track the total bytes of all such allocations, and the number of allocations
3961  * to report at the end.  Once we have a list of caches, we walk through the
3962  * allocated bufctls of each, and update our hash table accordingly.
3963  */
3964 
3965 typedef struct kmowner {
3966 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3967 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3968 	size_t kmo_signature;			/* Hash table signature */
3969 	uint_t kmo_num;				/* Number of allocations */
3970 	size_t kmo_data_size;			/* Size of each allocation */
3971 	size_t kmo_total_size;			/* Total bytes of allocation */
3972 	int kmo_depth;				/* Depth of stack trace */
3973 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3974 } kmowner_t;
3975 
3976 typedef struct kmusers {
3977 	uintptr_t kmu_addr;			/* address of interest */
3978 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3979 	kmowner_t *kmu_hash;			/* Hash table of owners */
3980 	int kmu_nelems;				/* Number of entries in use */
3981 	int kmu_size;				/* Total number of entries */
3982 } kmusers_t;
3983 
3984 static void
3985 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3986     size_t size, size_t data_size)
3987 {
3988 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3989 	size_t bucket, signature = data_size;
3990 	kmowner_t *kmo, *kmoend;
3991 
3992 	/*
3993 	 * If the hash table is full, double its size and rehash everything.
3994 	 */
3995 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3996 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3997 
3998 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3999 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4000 		kmu->kmu_hash = kmo;
4001 		kmu->kmu_size = s;
4002 
4003 		kmoend = kmu->kmu_hash + kmu->kmu_size;
4004 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4005 			kmo->kmo_head = NULL;
4006 
4007 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4008 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4009 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4010 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4011 			kmu->kmu_hash[bucket].kmo_head = kmo;
4012 		}
4013 	}
4014 
4015 	/*
4016 	 * Finish computing the hash signature from the stack trace, and then
4017 	 * see if the owner is in the hash table.  If so, update our stats.
4018 	 */
4019 	for (i = 0; i < depth; i++)
4020 		signature += bcp->bc_stack[i];
4021 
4022 	bucket = signature & (kmu->kmu_size - 1);
4023 
4024 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4025 		if (kmo->kmo_signature == signature) {
4026 			size_t difference = 0;
4027 
4028 			difference |= kmo->kmo_data_size - data_size;
4029 			difference |= kmo->kmo_depth - depth;
4030 
4031 			for (i = 0; i < depth; i++) {
4032 				difference |= kmo->kmo_stack[i] -
4033 				    bcp->bc_stack[i];
4034 			}
4035 
4036 			if (difference == 0) {
4037 				kmo->kmo_total_size += size;
4038 				kmo->kmo_num++;
4039 				return;
4040 			}
4041 		}
4042 	}
4043 
4044 	/*
4045 	 * If the owner is not yet hashed, grab the next element and fill it
4046 	 * in based on the allocation information.
4047 	 */
4048 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4049 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4050 	kmu->kmu_hash[bucket].kmo_head = kmo;
4051 
4052 	kmo->kmo_signature = signature;
4053 	kmo->kmo_num = 1;
4054 	kmo->kmo_data_size = data_size;
4055 	kmo->kmo_total_size = size;
4056 	kmo->kmo_depth = depth;
4057 
4058 	for (i = 0; i < depth; i++)
4059 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4060 }
4061 
4062 /*
4063  * When ::kmausers is invoked without the -f flag, we simply update our hash
4064  * table with the information from each allocated bufctl.
4065  */
4066 /*ARGSUSED*/
4067 static int
4068 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4069 {
4070 	const kmem_cache_t *cp = kmu->kmu_cache;
4071 
4072 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4073 	return (WALK_NEXT);
4074 }
4075 
4076 /*
4077  * When ::kmausers is invoked with the -f flag, we print out the information
4078  * for each bufctl as well as updating the hash table.
4079  */
4080 static int
4081 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4082 {
4083 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4084 	const kmem_cache_t *cp = kmu->kmu_cache;
4085 	kmem_bufctl_t bufctl;
4086 
4087 	if (kmu->kmu_addr) {
4088 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4089 			mdb_warn("couldn't read bufctl at %p", addr);
4090 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4091 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4092 		    cp->cache_bufsize)
4093 			return (WALK_NEXT);
4094 	}
4095 
4096 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4097 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4098 
4099 	for (i = 0; i < depth; i++)
4100 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4101 
4102 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4103 	return (WALK_NEXT);
4104 }
4105 
4106 /*
4107  * We sort our results by allocation size before printing them.
4108  */
4109 static int
4110 kmownercmp(const void *lp, const void *rp)
4111 {
4112 	const kmowner_t *lhs = lp;
4113 	const kmowner_t *rhs = rp;
4114 
4115 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4116 }
4117 
4118 /*
4119  * The main engine of ::kmausers is relatively straightforward: First we
4120  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4121  * iterate over the allocated bufctls of each cache in the list.  Finally,
4122  * we sort and print our results.
4123  */
4124 /*ARGSUSED*/
4125 int
4126 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4127 {
4128 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4129 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4130 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4131 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4132 	int opt_e = FALSE;		/* Include "small" users */
4133 	int opt_f = FALSE;		/* Print stack traces */
4134 
4135 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4136 	kmowner_t *kmo, *kmoend;
4137 	int i, oelems;
4138 
4139 	kmclist_t kmc;
4140 	kmusers_t kmu;
4141 
4142 	bzero(&kmc, sizeof (kmc));
4143 	bzero(&kmu, sizeof (kmu));
4144 
4145 	while ((i = mdb_getopts(argc, argv,
4146 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4147 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4148 
4149 		argv += i;	/* skip past options we just processed */
4150 		argc -= i;	/* adjust argc */
4151 
4152 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4153 			return (DCMD_USAGE);
4154 
4155 		oelems = kmc.kmc_nelems;
4156 		kmc.kmc_name = argv->a_un.a_str;
4157 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4158 
4159 		if (kmc.kmc_nelems == oelems) {
4160 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4161 			return (DCMD_ERR);
4162 		}
4163 
4164 		do_all_caches = 0;
4165 		argv++;
4166 		argc--;
4167 	}
4168 
4169 	if (flags & DCMD_ADDRSPEC) {
4170 		opt_f = TRUE;
4171 		kmu.kmu_addr = addr;
4172 	} else {
4173 		kmu.kmu_addr = NULL;
4174 	}
4175 
4176 	if (opt_e)
4177 		mem_threshold = cnt_threshold = 0;
4178 
4179 	if (opt_f)
4180 		callback = (mdb_walk_cb_t)kmause2;
4181 
4182 	if (do_all_caches) {
4183 		kmc.kmc_name = NULL; /* match all cache names */
4184 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4185 	}
4186 
4187 	for (i = 0; i < kmc.kmc_nelems; i++) {
4188 		uintptr_t cp = kmc.kmc_caches[i];
4189 		kmem_cache_t c;
4190 
4191 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4192 			mdb_warn("failed to read cache at %p", cp);
4193 			continue;
4194 		}
4195 
4196 		if (!(c.cache_flags & KMF_AUDIT)) {
4197 			if (!do_all_caches) {
4198 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4199 				    c.cache_name);
4200 			}
4201 			continue;
4202 		}
4203 
4204 		kmu.kmu_cache = &c;
4205 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4206 		audited_caches++;
4207 	}
4208 
4209 	if (audited_caches == 0 && do_all_caches) {
4210 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4211 		return (DCMD_ERR);
4212 	}
4213 
4214 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4215 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4216 
4217 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4218 		if (kmo->kmo_total_size < mem_threshold &&
4219 		    kmo->kmo_num < cnt_threshold)
4220 			continue;
4221 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4222 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4223 		for (i = 0; i < kmo->kmo_depth; i++)
4224 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4225 	}
4226 
4227 	return (DCMD_OK);
4228 }
4229 
4230 void
4231 kmausers_help(void)
4232 {
4233 	mdb_printf(
4234 	    "Displays the largest users of the kmem allocator, sorted by \n"
4235 	    "trace.  If one or more caches is specified, only those caches\n"
4236 	    "will be searched.  By default, all caches are searched.  If an\n"
4237 	    "address is specified, then only those allocations which include\n"
4238 	    "the given address are displayed.  Specifying an address implies\n"
4239 	    "-f.\n"
4240 	    "\n"
4241 	    "\t-e\tInclude all users, not just the largest\n"
4242 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4243 	    "\t\tgrouped by stack\n");
4244 }
4245 
4246 static int
4247 kmem_ready_check(void)
4248 {
4249 	int ready;
4250 
4251 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4252 		return (-1); /* errno is set for us */
4253 
4254 	return (ready);
4255 }
4256 
4257 void
4258 kmem_statechange(void)
4259 {
4260 	static int been_ready = 0;
4261 
4262 	if (been_ready)
4263 		return;
4264 
4265 	if (kmem_ready_check() <= 0)
4266 		return;
4267 
4268 	been_ready = 1;
4269 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4270 }
4271 
4272 void
4273 kmem_init(void)
4274 {
4275 	mdb_walker_t w = {
4276 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4277 		list_walk_step, list_walk_fini
4278 	};
4279 
4280 	/*
4281 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4282 	 * immediately.  Walkers in the linkage structure won't be ready until
4283 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4284 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4285 	 * isn't ready, we'll register a callback that will allow us to defer
4286 	 * cache walking until it is.
4287 	 */
4288 	if (mdb_add_walker(&w) != 0) {
4289 		mdb_warn("failed to add kmem_cache walker");
4290 		return;
4291 	}
4292 
4293 	kmem_statechange();
4294 
4295 	/* register our ::whatis handlers */
4296 	mdb_whatis_register("modules", whatis_run_modules, NULL,
4297 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4298 	mdb_whatis_register("threads", whatis_run_threads, NULL,
4299 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4300 	mdb_whatis_register("pages", whatis_run_pages, NULL,
4301 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4302 	mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4303 	    WHATIS_PRIO_ALLOCATOR, 0);
4304 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4305 	    WHATIS_PRIO_ALLOCATOR, 0);
4306 }
4307 
4308 typedef struct whatthread {
4309 	uintptr_t	wt_target;
4310 	int		wt_verbose;
4311 } whatthread_t;
4312 
4313 static int
4314 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4315 {
4316 	uintptr_t current, data;
4317 
4318 	if (t->t_stkbase == NULL)
4319 		return (WALK_NEXT);
4320 
4321 	/*
4322 	 * Warn about swapped out threads, but drive on anyway
4323 	 */
4324 	if (!(t->t_schedflag & TS_LOAD)) {
4325 		mdb_warn("thread %p's stack swapped out\n", addr);
4326 		return (WALK_NEXT);
4327 	}
4328 
4329 	/*
4330 	 * Search the thread's stack for the given pointer.  Note that it would
4331 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4332 	 * chunks, but this routine is already fast and simple.
4333 	 */
4334 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4335 	    current += sizeof (uintptr_t)) {
4336 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4337 			mdb_warn("couldn't read thread %p's stack at %p",
4338 			    addr, current);
4339 			return (WALK_ERR);
4340 		}
4341 
4342 		if (data == w->wt_target) {
4343 			if (w->wt_verbose) {
4344 				mdb_printf("%p in thread %p's stack%s\n",
4345 				    current, addr, stack_active(t, current));
4346 			} else {
4347 				mdb_printf("%#lr\n", addr);
4348 				return (WALK_NEXT);
4349 			}
4350 		}
4351 	}
4352 
4353 	return (WALK_NEXT);
4354 }
4355 
4356 int
4357 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4358 {
4359 	whatthread_t w;
4360 
4361 	if (!(flags & DCMD_ADDRSPEC))
4362 		return (DCMD_USAGE);
4363 
4364 	w.wt_verbose = FALSE;
4365 	w.wt_target = addr;
4366 
4367 	if (mdb_getopts(argc, argv,
4368 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4369 		return (DCMD_USAGE);
4370 
4371 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4372 	    == -1) {
4373 		mdb_warn("couldn't walk threads");
4374 		return (DCMD_ERR);
4375 	}
4376 
4377 	return (DCMD_OK);
4378 }
4379