xref: /titanic_50/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision 129ce2569122928967f6c4d23eebbc1489419d6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <sys/cpuvar.h>
30 #include <sys/kmem_impl.h>
31 #include <sys/vmem_impl.h>
32 #include <sys/machelf.h>
33 #include <sys/modctl.h>
34 #include <sys/kobj.h>
35 #include <sys/panic.h>
36 #include <sys/stack.h>
37 #include <sys/sysmacros.h>
38 #include <vm/page.h>
39 
40 #include "avl.h"
41 #include "combined.h"
42 #include "dist.h"
43 #include "kmem.h"
44 #include "list.h"
45 
46 #define	dprintf(x) if (mdb_debug_level) { \
47 	mdb_printf("kmem debug: ");  \
48 	/*CSTYLED*/\
49 	mdb_printf x ;\
50 }
51 
52 #define	KM_ALLOCATED		0x01
53 #define	KM_FREE			0x02
54 #define	KM_BUFCTL		0x04
55 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
56 #define	KM_HASH			0x10
57 
58 static int mdb_debug_level = 0;
59 
60 /*ARGSUSED*/
61 static int
62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
63 {
64 	mdb_walker_t w;
65 	char descr[64];
66 
67 	(void) mdb_snprintf(descr, sizeof (descr),
68 	    "walk the %s cache", c->cache_name);
69 
70 	w.walk_name = c->cache_name;
71 	w.walk_descr = descr;
72 	w.walk_init = kmem_walk_init;
73 	w.walk_step = kmem_walk_step;
74 	w.walk_fini = kmem_walk_fini;
75 	w.walk_init_arg = (void *)addr;
76 
77 	if (mdb_add_walker(&w) == -1)
78 		mdb_warn("failed to add %s walker", c->cache_name);
79 
80 	return (WALK_NEXT);
81 }
82 
83 /*ARGSUSED*/
84 int
85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
86 {
87 	mdb_debug_level ^= 1;
88 
89 	mdb_printf("kmem: debugging is now %s\n",
90 	    mdb_debug_level ? "on" : "off");
91 
92 	return (DCMD_OK);
93 }
94 
95 int
96 kmem_cache_walk_init(mdb_walk_state_t *wsp)
97 {
98 	GElf_Sym sym;
99 
100 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
101 		mdb_warn("couldn't find kmem_caches");
102 		return (WALK_ERR);
103 	}
104 
105 	wsp->walk_addr = (uintptr_t)sym.st_value;
106 
107 	return (list_walk_init_named(wsp, "cache list", "cache"));
108 }
109 
110 int
111 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
112 {
113 	if (wsp->walk_addr == NULL) {
114 		mdb_warn("kmem_cpu_cache doesn't support global walks");
115 		return (WALK_ERR);
116 	}
117 
118 	if (mdb_layered_walk("cpu", wsp) == -1) {
119 		mdb_warn("couldn't walk 'cpu'");
120 		return (WALK_ERR);
121 	}
122 
123 	wsp->walk_data = (void *)wsp->walk_addr;
124 
125 	return (WALK_NEXT);
126 }
127 
128 int
129 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
130 {
131 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
132 	const cpu_t *cpu = wsp->walk_layer;
133 	kmem_cpu_cache_t cc;
134 
135 	caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
136 
137 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
138 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
139 		return (WALK_ERR);
140 	}
141 
142 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
143 }
144 
145 static int
146 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
147 {
148 	kmem_slab_t *sp = p;
149 	uintptr_t caddr = (uintptr_t)arg;
150 	if ((uintptr_t)sp->slab_cache != caddr) {
151 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
152 		    saddr, caddr, sp->slab_cache);
153 		return (-1);
154 	}
155 
156 	return (0);
157 }
158 
159 static int
160 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
161 {
162 	kmem_slab_t *sp = p;
163 
164 	int rc = kmem_slab_check(p, saddr, arg);
165 	if (rc != 0) {
166 		return (rc);
167 	}
168 
169 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
170 		mdb_warn("slab %p is not a partial slab\n", saddr);
171 		return (-1);
172 	}
173 
174 	return (0);
175 }
176 
177 static int
178 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
179 {
180 	kmem_slab_t *sp = p;
181 
182 	int rc = kmem_slab_check(p, saddr, arg);
183 	if (rc != 0) {
184 		return (rc);
185 	}
186 
187 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
188 		mdb_warn("slab %p is not completely allocated\n", saddr);
189 		return (-1);
190 	}
191 
192 	return (0);
193 }
194 
195 typedef struct {
196 	uintptr_t kns_cache_addr;
197 	int kns_nslabs;
198 } kmem_nth_slab_t;
199 
200 static int
201 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
202 {
203 	kmem_nth_slab_t *chkp = arg;
204 
205 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
206 	if (rc != 0) {
207 		return (rc);
208 	}
209 
210 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
211 }
212 
213 static int
214 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
215 {
216 	uintptr_t caddr = wsp->walk_addr;
217 
218 	wsp->walk_addr = (uintptr_t)(caddr +
219 	    offsetof(kmem_cache_t, cache_complete_slabs));
220 
221 	return (list_walk_init_checked(wsp, "slab list", "slab",
222 	    kmem_complete_slab_check, (void *)caddr));
223 }
224 
225 static int
226 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
227 {
228 	uintptr_t caddr = wsp->walk_addr;
229 
230 	wsp->walk_addr = (uintptr_t)(caddr +
231 	    offsetof(kmem_cache_t, cache_partial_slabs));
232 
233 	return (avl_walk_init_checked(wsp, "slab list", "slab",
234 	    kmem_partial_slab_check, (void *)caddr));
235 }
236 
237 int
238 kmem_slab_walk_init(mdb_walk_state_t *wsp)
239 {
240 	uintptr_t caddr = wsp->walk_addr;
241 
242 	if (caddr == NULL) {
243 		mdb_warn("kmem_slab doesn't support global walks\n");
244 		return (WALK_ERR);
245 	}
246 
247 	combined_walk_init(wsp);
248 	combined_walk_add(wsp,
249 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
250 	combined_walk_add(wsp,
251 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
252 
253 	return (WALK_NEXT);
254 }
255 
256 static int
257 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
258 {
259 	uintptr_t caddr = wsp->walk_addr;
260 	kmem_nth_slab_t *chk;
261 
262 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
263 	    UM_SLEEP | UM_GC);
264 	chk->kns_cache_addr = caddr;
265 	chk->kns_nslabs = 1;
266 	wsp->walk_addr = (uintptr_t)(caddr +
267 	    offsetof(kmem_cache_t, cache_complete_slabs));
268 
269 	return (list_walk_init_checked(wsp, "slab list", "slab",
270 	    kmem_nth_slab_check, chk));
271 }
272 
273 int
274 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
275 {
276 	uintptr_t caddr = wsp->walk_addr;
277 	kmem_cache_t c;
278 
279 	if (caddr == NULL) {
280 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
281 		return (WALK_ERR);
282 	}
283 
284 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
285 		mdb_warn("couldn't read kmem_cache at %p", caddr);
286 		return (WALK_ERR);
287 	}
288 
289 	combined_walk_init(wsp);
290 
291 	/*
292 	 * Some consumers (umem_walk_step(), in particular) require at
293 	 * least one callback if there are any buffers in the cache.  So
294 	 * if there are *no* partial slabs, report the first full slab, if
295 	 * any.
296 	 *
297 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
298 	 */
299 	if (c.cache_partial_slabs.avl_numnodes == 0) {
300 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
301 		    list_walk_step, list_walk_fini);
302 	} else {
303 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
304 		    avl_walk_step, avl_walk_fini);
305 	}
306 
307 	return (WALK_NEXT);
308 }
309 
310 int
311 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
312 {
313 	kmem_cache_t c;
314 	const char *filter = NULL;
315 
316 	if (mdb_getopts(ac, argv,
317 	    'n', MDB_OPT_STR, &filter,
318 	    NULL) != ac) {
319 		return (DCMD_USAGE);
320 	}
321 
322 	if (!(flags & DCMD_ADDRSPEC)) {
323 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
324 			mdb_warn("can't walk kmem_cache");
325 			return (DCMD_ERR);
326 		}
327 		return (DCMD_OK);
328 	}
329 
330 	if (DCMD_HDRSPEC(flags))
331 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
332 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
333 
334 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
335 		mdb_warn("couldn't read kmem_cache at %p", addr);
336 		return (DCMD_ERR);
337 	}
338 
339 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
340 		return (DCMD_OK);
341 
342 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
343 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
344 
345 	return (DCMD_OK);
346 }
347 
348 void
349 kmem_cache_help(void)
350 {
351 	mdb_printf("%s", "Print kernel memory caches.\n\n");
352 	mdb_dec_indent(2);
353 	mdb_printf("%<b>OPTIONS%</b>\n");
354 	mdb_inc_indent(2);
355 	mdb_printf("%s",
356 "  -n name\n"
357 "        name of kmem cache (or matching partial name)\n"
358 "\n"
359 "Column\tDescription\n"
360 "\n"
361 "ADDR\t\taddress of kmem cache\n"
362 "NAME\t\tname of kmem cache\n"
363 "FLAG\t\tvarious cache state flags\n"
364 "CFLAG\t\tcache creation flags\n"
365 "BUFSIZE\tobject size in bytes\n"
366 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
367 }
368 
369 #define	LABEL_WIDTH	11
370 static void
371 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
372     size_t maxbuckets, size_t minbucketsize)
373 {
374 	uint64_t total;
375 	int buckets;
376 	int i;
377 	const int *distarray;
378 	int complete[2];
379 
380 	buckets = buffers_per_slab;
381 
382 	total = 0;
383 	for (i = 0; i <= buffers_per_slab; i++)
384 		total += ks_bucket[i];
385 
386 	if (maxbuckets > 1)
387 		buckets = MIN(buckets, maxbuckets);
388 
389 	if (minbucketsize > 1) {
390 		/*
391 		 * minbucketsize does not apply to the first bucket reserved
392 		 * for completely allocated slabs
393 		 */
394 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
395 		    minbucketsize));
396 		if ((buckets < 2) && (buffers_per_slab > 1)) {
397 			buckets = 2;
398 			minbucketsize = (buffers_per_slab - 1);
399 		}
400 	}
401 
402 	/*
403 	 * The first printed bucket is reserved for completely allocated slabs.
404 	 * Passing (buckets - 1) excludes that bucket from the generated
405 	 * distribution, since we're handling it as a special case.
406 	 */
407 	complete[0] = buffers_per_slab;
408 	complete[1] = buffers_per_slab + 1;
409 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
410 
411 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
412 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
413 
414 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
415 	/*
416 	 * Print bucket ranges in descending order after the first bucket for
417 	 * completely allocated slabs, so a person can see immediately whether
418 	 * or not there is fragmentation without having to scan possibly
419 	 * multiple screens of output. Starting at (buckets - 2) excludes the
420 	 * extra terminating bucket.
421 	 */
422 	for (i = buckets - 2; i >= 0; i--) {
423 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
424 	}
425 	mdb_printf("\n");
426 }
427 #undef LABEL_WIDTH
428 
429 /*ARGSUSED*/
430 static int
431 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
432 {
433 	*is_slab = B_TRUE;
434 	return (WALK_DONE);
435 }
436 
437 /*ARGSUSED*/
438 static int
439 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
440     boolean_t *is_slab)
441 {
442 	/*
443 	 * The "kmem_partial_slab" walker reports the first full slab if there
444 	 * are no partial slabs (for the sake of consumers that require at least
445 	 * one callback if there are any buffers in the cache).
446 	 */
447 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
448 	return (WALK_DONE);
449 }
450 
451 typedef struct kmem_slab_usage {
452 	int ksu_refcnt;			/* count of allocated buffers on slab */
453 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
454 } kmem_slab_usage_t;
455 
456 typedef struct kmem_slab_stats {
457 	const kmem_cache_t *ks_cp;
458 	int ks_slabs;			/* slabs in cache */
459 	int ks_partial_slabs;		/* partially allocated slabs in cache */
460 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
461 	int ks_max_buffers_per_slab;	/* max buffers per slab */
462 	int ks_usage_len;		/* ks_usage array length */
463 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
464 	uint_t *ks_bucket;		/* slab usage distribution */
465 } kmem_slab_stats_t;
466 
467 /*ARGSUSED*/
468 static int
469 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
470     kmem_slab_stats_t *ks)
471 {
472 	kmem_slab_usage_t *ksu;
473 	long unused;
474 
475 	ks->ks_slabs++;
476 	ks->ks_bucket[sp->slab_refcnt]++;
477 
478 	unused = (sp->slab_chunks - sp->slab_refcnt);
479 	if (unused == 0) {
480 		return (WALK_NEXT);
481 	}
482 
483 	ks->ks_partial_slabs++;
484 	ks->ks_unused_buffers += unused;
485 
486 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
487 		kmem_slab_usage_t *usage;
488 		int len = ks->ks_usage_len;
489 
490 		len = (len == 0 ? 16 : len * 2);
491 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
492 		if (ks->ks_usage != NULL) {
493 			bcopy(ks->ks_usage, usage,
494 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
495 			mdb_free(ks->ks_usage,
496 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
497 		}
498 		ks->ks_usage = usage;
499 		ks->ks_usage_len = len;
500 	}
501 
502 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
503 	ksu->ksu_refcnt = sp->slab_refcnt;
504 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
505 	return (WALK_NEXT);
506 }
507 
508 static void
509 kmem_slabs_header()
510 {
511 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
512 	    "", "", "Partial", "", "Unused", "");
513 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
514 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
515 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
516 	    "-------------------------", "--------", "--------", "---------",
517 	    "---------", "------");
518 }
519 
520 int
521 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
522 {
523 	kmem_cache_t c;
524 	kmem_slab_stats_t stats;
525 	mdb_walk_cb_t cb;
526 	int pct;
527 	int tenths_pct;
528 	size_t maxbuckets = 1;
529 	size_t minbucketsize = 0;
530 	const char *filter = NULL;
531 	const char *name = NULL;
532 	uint_t opt_v = FALSE;
533 	boolean_t buckets = B_FALSE;
534 	boolean_t skip = B_FALSE;
535 
536 	if (mdb_getopts(argc, argv,
537 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
538 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
539 	    'n', MDB_OPT_STR, &filter,
540 	    'N', MDB_OPT_STR, &name,
541 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
542 	    NULL) != argc) {
543 		return (DCMD_USAGE);
544 	}
545 
546 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
547 		buckets = B_TRUE;
548 	}
549 
550 	if (!(flags & DCMD_ADDRSPEC)) {
551 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
552 		    argv) == -1) {
553 			mdb_warn("can't walk kmem_cache");
554 			return (DCMD_ERR);
555 		}
556 		return (DCMD_OK);
557 	}
558 
559 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
560 		mdb_warn("couldn't read kmem_cache at %p", addr);
561 		return (DCMD_ERR);
562 	}
563 
564 	if (name == NULL) {
565 		skip = ((filter != NULL) &&
566 		    (strstr(c.cache_name, filter) == NULL));
567 	} else if (filter == NULL) {
568 		skip = (strcmp(c.cache_name, name) != 0);
569 	} else {
570 		/* match either -n or -N */
571 		skip = ((strcmp(c.cache_name, name) != 0) &&
572 		    (strstr(c.cache_name, filter) == NULL));
573 	}
574 
575 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
576 		kmem_slabs_header();
577 	} else if ((opt_v || buckets) && !skip) {
578 		if (DCMD_HDRSPEC(flags)) {
579 			kmem_slabs_header();
580 		} else {
581 			boolean_t is_slab = B_FALSE;
582 			const char *walker_name;
583 			if (opt_v) {
584 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
585 				walker_name = "kmem_slab_partial";
586 			} else {
587 				cb = (mdb_walk_cb_t)kmem_first_slab;
588 				walker_name = "kmem_slab";
589 			}
590 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
591 			if (is_slab) {
592 				kmem_slabs_header();
593 			}
594 		}
595 	}
596 
597 	if (skip) {
598 		return (DCMD_OK);
599 	}
600 
601 	bzero(&stats, sizeof (kmem_slab_stats_t));
602 	stats.ks_cp = &c;
603 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
604 	/* +1 to include a zero bucket */
605 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
606 	    sizeof (*stats.ks_bucket), UM_SLEEP);
607 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
608 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
609 
610 	if (c.cache_buftotal == 0) {
611 		pct = 0;
612 		tenths_pct = 0;
613 	} else {
614 		uint64_t n = stats.ks_unused_buffers * 10000;
615 		pct = (int)(n / c.cache_buftotal);
616 		tenths_pct = pct - ((pct / 100) * 100);
617 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
618 		if (tenths_pct == 10) {
619 			pct += 100;
620 			tenths_pct = 0;
621 		}
622 	}
623 
624 	pct /= 100;
625 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
626 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
627 	    stats.ks_unused_buffers, pct, tenths_pct);
628 
629 	if (maxbuckets == 0) {
630 		maxbuckets = stats.ks_max_buffers_per_slab;
631 	}
632 
633 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
634 	    (stats.ks_slabs > 0)) {
635 		mdb_printf("\n");
636 		kmem_slabs_print_dist(stats.ks_bucket,
637 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
638 	}
639 
640 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
641 	    sizeof (*stats.ks_bucket));
642 
643 	if (!opt_v) {
644 		return (DCMD_OK);
645 	}
646 
647 	if (opt_v && (stats.ks_partial_slabs > 0)) {
648 		int i;
649 		kmem_slab_usage_t *ksu;
650 
651 		mdb_printf("  %d complete (%d), %d partial:",
652 		    (stats.ks_slabs - stats.ks_partial_slabs),
653 		    stats.ks_max_buffers_per_slab,
654 		    stats.ks_partial_slabs);
655 
656 		for (i = 0; i < stats.ks_partial_slabs; i++) {
657 			ksu = &stats.ks_usage[i];
658 			mdb_printf(" %d%s", ksu->ksu_refcnt,
659 			    (ksu->ksu_nomove ? "*" : ""));
660 		}
661 		mdb_printf("\n\n");
662 	}
663 
664 	if (stats.ks_usage_len > 0) {
665 		mdb_free(stats.ks_usage,
666 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
667 	}
668 
669 	return (DCMD_OK);
670 }
671 
672 void
673 kmem_slabs_help(void)
674 {
675 	mdb_printf("%s",
676 "Display slab usage per kmem cache.\n\n");
677 	mdb_dec_indent(2);
678 	mdb_printf("%<b>OPTIONS%</b>\n");
679 	mdb_inc_indent(2);
680 	mdb_printf("%s",
681 "  -n name\n"
682 "        name of kmem cache (or matching partial name)\n"
683 "  -N name\n"
684 "        exact name of kmem cache\n"
685 "  -b maxbins\n"
686 "        Print a distribution of allocated buffers per slab using at\n"
687 "        most maxbins bins. The first bin is reserved for completely\n"
688 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
689 "        effect as specifying the maximum allocated buffers per slab\n"
690 "        or setting minbinsize to 1 (-B 1).\n"
691 "  -B minbinsize\n"
692 "        Print a distribution of allocated buffers per slab, making\n"
693 "        all bins (except the first, reserved for completely allocated\n"
694 "        slabs) at least minbinsize buffers apart.\n"
695 "  -v    verbose output: List the allocated buffer count of each partial\n"
696 "        slab on the free list in order from front to back to show how\n"
697 "        closely the slabs are ordered by usage. For example\n"
698 "\n"
699 "          10 complete, 3 partial (8): 7 3 1\n"
700 "\n"
701 "        means there are thirteen slabs with eight buffers each, including\n"
702 "        three partially allocated slabs with less than all eight buffers\n"
703 "        allocated.\n"
704 "\n"
705 "        Buffer allocations are always from the front of the partial slab\n"
706 "        list. When a buffer is freed from a completely used slab, that\n"
707 "        slab is added to the front of the partial slab list. Assuming\n"
708 "        that all buffers are equally likely to be freed soon, the\n"
709 "        desired order of partial slabs is most-used at the front of the\n"
710 "        list and least-used at the back (as in the example above).\n"
711 "        However, if a slab contains an allocated buffer that will not\n"
712 "        soon be freed, it would be better for that slab to be at the\n"
713 "        front where all of its buffers can be allocated. Taking a slab\n"
714 "        off the partial slab list (either with all buffers freed or all\n"
715 "        buffers allocated) reduces cache fragmentation.\n"
716 "\n"
717 "        A slab's allocated buffer count representing a partial slab (9 in\n"
718 "        the example below) may be marked as follows:\n"
719 "\n"
720 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
721 "        reclaimable because the kmem client refused to move one of the\n"
722 "        slab's buffers. Since kmem does not expect to completely free the\n"
723 "        slab, it moves it to the front of the list in the hope of\n"
724 "        completely allocating it instead. A slab marked with an asterisk\n"
725 "        stays marked for as long as it remains on the partial slab list.\n"
726 "\n"
727 "Column\t\tDescription\n"
728 "\n"
729 "Cache Name\t\tname of kmem cache\n"
730 "Slabs\t\t\ttotal slab count\n"
731 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
732 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
733 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
734 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
735 "\t\t\t  for accounting structures (debug mode), slab\n"
736 "\t\t\t  coloring (incremental small offsets to stagger\n"
737 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
738 }
739 
740 static int
741 addrcmp(const void *lhs, const void *rhs)
742 {
743 	uintptr_t p1 = *((uintptr_t *)lhs);
744 	uintptr_t p2 = *((uintptr_t *)rhs);
745 
746 	if (p1 < p2)
747 		return (-1);
748 	if (p1 > p2)
749 		return (1);
750 	return (0);
751 }
752 
753 static int
754 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
755 {
756 	const kmem_bufctl_audit_t *bcp1 = *lhs;
757 	const kmem_bufctl_audit_t *bcp2 = *rhs;
758 
759 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
760 		return (-1);
761 
762 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
763 		return (1);
764 
765 	return (0);
766 }
767 
768 typedef struct kmem_hash_walk {
769 	uintptr_t *kmhw_table;
770 	size_t kmhw_nelems;
771 	size_t kmhw_pos;
772 	kmem_bufctl_t kmhw_cur;
773 } kmem_hash_walk_t;
774 
775 int
776 kmem_hash_walk_init(mdb_walk_state_t *wsp)
777 {
778 	kmem_hash_walk_t *kmhw;
779 	uintptr_t *hash;
780 	kmem_cache_t c;
781 	uintptr_t haddr, addr = wsp->walk_addr;
782 	size_t nelems;
783 	size_t hsize;
784 
785 	if (addr == NULL) {
786 		mdb_warn("kmem_hash doesn't support global walks\n");
787 		return (WALK_ERR);
788 	}
789 
790 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
791 		mdb_warn("couldn't read cache at addr %p", addr);
792 		return (WALK_ERR);
793 	}
794 
795 	if (!(c.cache_flags & KMF_HASH)) {
796 		mdb_warn("cache %p doesn't have a hash table\n", addr);
797 		return (WALK_DONE);		/* nothing to do */
798 	}
799 
800 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
801 	kmhw->kmhw_cur.bc_next = NULL;
802 	kmhw->kmhw_pos = 0;
803 
804 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
805 	hsize = nelems * sizeof (uintptr_t);
806 	haddr = (uintptr_t)c.cache_hash_table;
807 
808 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
809 	if (mdb_vread(hash, hsize, haddr) == -1) {
810 		mdb_warn("failed to read hash table at %p", haddr);
811 		mdb_free(hash, hsize);
812 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
813 		return (WALK_ERR);
814 	}
815 
816 	wsp->walk_data = kmhw;
817 
818 	return (WALK_NEXT);
819 }
820 
821 int
822 kmem_hash_walk_step(mdb_walk_state_t *wsp)
823 {
824 	kmem_hash_walk_t *kmhw = wsp->walk_data;
825 	uintptr_t addr = NULL;
826 
827 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
828 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
829 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
830 				break;
831 		}
832 	}
833 	if (addr == NULL)
834 		return (WALK_DONE);
835 
836 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
837 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
838 		return (WALK_ERR);
839 	}
840 
841 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
842 }
843 
844 void
845 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
846 {
847 	kmem_hash_walk_t *kmhw = wsp->walk_data;
848 
849 	if (kmhw == NULL)
850 		return;
851 
852 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
853 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
854 }
855 
856 /*
857  * Find the address of the bufctl structure for the address 'buf' in cache
858  * 'cp', which is at address caddr, and place it in *out.
859  */
860 static int
861 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
862 {
863 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
864 	kmem_bufctl_t *bcp;
865 	kmem_bufctl_t bc;
866 
867 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
868 		mdb_warn("unable to read hash bucket for %p in cache %p",
869 		    buf, caddr);
870 		return (-1);
871 	}
872 
873 	while (bcp != NULL) {
874 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
875 		    (uintptr_t)bcp) == -1) {
876 			mdb_warn("unable to read bufctl at %p", bcp);
877 			return (-1);
878 		}
879 		if (bc.bc_addr == buf) {
880 			*out = (uintptr_t)bcp;
881 			return (0);
882 		}
883 		bcp = bc.bc_next;
884 	}
885 
886 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
887 	return (-1);
888 }
889 
890 int
891 kmem_get_magsize(const kmem_cache_t *cp)
892 {
893 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
894 	GElf_Sym mt_sym;
895 	kmem_magtype_t mt;
896 	int res;
897 
898 	/*
899 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
900 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
901 	 * it is okay to return 0 for them.
902 	 */
903 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
904 	    (cp->cache_flags & KMF_NOMAGAZINE))
905 		return (res);
906 
907 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
908 		mdb_warn("unable to read 'kmem_magtype'");
909 	} else if (addr < mt_sym.st_value ||
910 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
911 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
912 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
913 		    cp->cache_name, addr);
914 		return (0);
915 	}
916 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
917 		mdb_warn("unable to read magtype at %a", addr);
918 		return (0);
919 	}
920 	return (mt.mt_magsize);
921 }
922 
923 /*ARGSUSED*/
924 static int
925 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
926 {
927 	*est -= (sp->slab_chunks - sp->slab_refcnt);
928 
929 	return (WALK_NEXT);
930 }
931 
932 /*
933  * Returns an upper bound on the number of allocated buffers in a given
934  * cache.
935  */
936 size_t
937 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
938 {
939 	int magsize;
940 	size_t cache_est;
941 
942 	cache_est = cp->cache_buftotal;
943 
944 	(void) mdb_pwalk("kmem_slab_partial",
945 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
946 
947 	if ((magsize = kmem_get_magsize(cp)) != 0) {
948 		size_t mag_est = cp->cache_full.ml_total * magsize;
949 
950 		if (cache_est >= mag_est) {
951 			cache_est -= mag_est;
952 		} else {
953 			mdb_warn("cache %p's magazine layer holds more buffers "
954 			    "than the slab layer.\n", addr);
955 		}
956 	}
957 	return (cache_est);
958 }
959 
960 #define	READMAG_ROUNDS(rounds) { \
961 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
962 		mdb_warn("couldn't read magazine at %p", kmp); \
963 		goto fail; \
964 	} \
965 	for (i = 0; i < rounds; i++) { \
966 		maglist[magcnt++] = mp->mag_round[i]; \
967 		if (magcnt == magmax) { \
968 			mdb_warn("%d magazines exceeds fudge factor\n", \
969 			    magcnt); \
970 			goto fail; \
971 		} \
972 	} \
973 }
974 
975 int
976 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
977     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
978 {
979 	kmem_magazine_t *kmp, *mp;
980 	void **maglist = NULL;
981 	int i, cpu;
982 	size_t magsize, magmax, magbsize;
983 	size_t magcnt = 0;
984 
985 	/*
986 	 * Read the magtype out of the cache, after verifying the pointer's
987 	 * correctness.
988 	 */
989 	magsize = kmem_get_magsize(cp);
990 	if (magsize == 0) {
991 		*maglistp = NULL;
992 		*magcntp = 0;
993 		*magmaxp = 0;
994 		return (WALK_NEXT);
995 	}
996 
997 	/*
998 	 * There are several places where we need to go buffer hunting:
999 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1000 	 * and the full magazine list in the depot.
1001 	 *
1002 	 * For an upper bound on the number of buffers in the magazine
1003 	 * layer, we have the number of magazines on the cache_full
1004 	 * list plus at most two magazines per CPU (the loaded and the
1005 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1006 	 * is live (the number "100" comes from the same fudge factor in
1007 	 * crash(1M)).
1008 	 */
1009 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1010 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1011 
1012 	if (magbsize >= PAGESIZE / 2) {
1013 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1014 		    addr, magbsize);
1015 		return (WALK_ERR);
1016 	}
1017 
1018 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1019 	mp = mdb_alloc(magbsize, alloc_flags);
1020 	if (mp == NULL || maglist == NULL)
1021 		goto fail;
1022 
1023 	/*
1024 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1025 	 */
1026 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1027 		READMAG_ROUNDS(magsize);
1028 		kmp = mp->mag_next;
1029 
1030 		if (kmp == cp->cache_full.ml_list)
1031 			break; /* cache_full list loop detected */
1032 	}
1033 
1034 	dprintf(("cache_full list done\n"));
1035 
1036 	/*
1037 	 * Now whip through the CPUs, snagging the loaded magazines
1038 	 * and full spares.
1039 	 */
1040 	for (cpu = 0; cpu < ncpus; cpu++) {
1041 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1042 
1043 		dprintf(("reading cpu cache %p\n",
1044 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1045 
1046 		if (ccp->cc_rounds > 0 &&
1047 		    (kmp = ccp->cc_loaded) != NULL) {
1048 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
1049 			READMAG_ROUNDS(ccp->cc_rounds);
1050 		}
1051 
1052 		if (ccp->cc_prounds > 0 &&
1053 		    (kmp = ccp->cc_ploaded) != NULL) {
1054 			dprintf(("reading %d previously loaded rounds\n",
1055 			    ccp->cc_prounds));
1056 			READMAG_ROUNDS(ccp->cc_prounds);
1057 		}
1058 	}
1059 
1060 	dprintf(("magazine layer: %d buffers\n", magcnt));
1061 
1062 	if (!(alloc_flags & UM_GC))
1063 		mdb_free(mp, magbsize);
1064 
1065 	*maglistp = maglist;
1066 	*magcntp = magcnt;
1067 	*magmaxp = magmax;
1068 
1069 	return (WALK_NEXT);
1070 
1071 fail:
1072 	if (!(alloc_flags & UM_GC)) {
1073 		if (mp)
1074 			mdb_free(mp, magbsize);
1075 		if (maglist)
1076 			mdb_free(maglist, magmax * sizeof (void *));
1077 	}
1078 	return (WALK_ERR);
1079 }
1080 
1081 static int
1082 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1083 {
1084 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1085 }
1086 
1087 static int
1088 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1089 {
1090 	kmem_bufctl_audit_t b;
1091 
1092 	/*
1093 	 * if KMF_AUDIT is not set, we know that we're looking at a
1094 	 * kmem_bufctl_t.
1095 	 */
1096 	if (!(cp->cache_flags & KMF_AUDIT) ||
1097 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1098 		(void) memset(&b, 0, sizeof (b));
1099 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1100 			mdb_warn("unable to read bufctl at %p", buf);
1101 			return (WALK_ERR);
1102 		}
1103 	}
1104 
1105 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1106 }
1107 
1108 typedef struct kmem_walk {
1109 	int kmw_type;
1110 
1111 	int kmw_addr;			/* cache address */
1112 	kmem_cache_t *kmw_cp;
1113 	size_t kmw_csize;
1114 
1115 	/*
1116 	 * magazine layer
1117 	 */
1118 	void **kmw_maglist;
1119 	size_t kmw_max;
1120 	size_t kmw_count;
1121 	size_t kmw_pos;
1122 
1123 	/*
1124 	 * slab layer
1125 	 */
1126 	char *kmw_valid;	/* to keep track of freed buffers */
1127 	char *kmw_ubase;	/* buffer for slab data */
1128 } kmem_walk_t;
1129 
1130 static int
1131 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1132 {
1133 	kmem_walk_t *kmw;
1134 	int ncpus, csize;
1135 	kmem_cache_t *cp;
1136 	size_t vm_quantum;
1137 
1138 	size_t magmax, magcnt;
1139 	void **maglist = NULL;
1140 	uint_t chunksize, slabsize;
1141 	int status = WALK_ERR;
1142 	uintptr_t addr = wsp->walk_addr;
1143 	const char *layered;
1144 
1145 	type &= ~KM_HASH;
1146 
1147 	if (addr == NULL) {
1148 		mdb_warn("kmem walk doesn't support global walks\n");
1149 		return (WALK_ERR);
1150 	}
1151 
1152 	dprintf(("walking %p\n", addr));
1153 
1154 	/*
1155 	 * First we need to figure out how many CPUs are configured in the
1156 	 * system to know how much to slurp out.
1157 	 */
1158 	mdb_readvar(&ncpus, "max_ncpus");
1159 
1160 	csize = KMEM_CACHE_SIZE(ncpus);
1161 	cp = mdb_alloc(csize, UM_SLEEP);
1162 
1163 	if (mdb_vread(cp, csize, addr) == -1) {
1164 		mdb_warn("couldn't read cache at addr %p", addr);
1165 		goto out2;
1166 	}
1167 
1168 	/*
1169 	 * It's easy for someone to hand us an invalid cache address.
1170 	 * Unfortunately, it is hard for this walker to survive an
1171 	 * invalid cache cleanly.  So we make sure that:
1172 	 *
1173 	 *	1. the vmem arena for the cache is readable,
1174 	 *	2. the vmem arena's quantum is a power of 2,
1175 	 *	3. our slabsize is a multiple of the quantum, and
1176 	 *	4. our chunksize is >0 and less than our slabsize.
1177 	 */
1178 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1179 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1180 	    vm_quantum == 0 ||
1181 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1182 	    cp->cache_slabsize < vm_quantum ||
1183 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1184 	    cp->cache_chunksize == 0 ||
1185 	    cp->cache_chunksize > cp->cache_slabsize) {
1186 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1187 		goto out2;
1188 	}
1189 
1190 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1191 
1192 	if (cp->cache_buftotal == 0) {
1193 		mdb_free(cp, csize);
1194 		return (WALK_DONE);
1195 	}
1196 
1197 	/*
1198 	 * If they ask for bufctls, but it's a small-slab cache,
1199 	 * there is nothing to report.
1200 	 */
1201 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1202 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1203 		    cp->cache_flags));
1204 		mdb_free(cp, csize);
1205 		return (WALK_DONE);
1206 	}
1207 
1208 	/*
1209 	 * If they want constructed buffers, but there's no constructor or
1210 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1211 	 */
1212 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1213 	    cp->cache_constructor == NULL ||
1214 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1215 		mdb_free(cp, csize);
1216 		return (WALK_DONE);
1217 	}
1218 
1219 	/*
1220 	 * Read in the contents of the magazine layer
1221 	 */
1222 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1223 	    &magmax, UM_SLEEP) == WALK_ERR)
1224 		goto out2;
1225 
1226 	/*
1227 	 * We have all of the buffers from the magazines;  if we are walking
1228 	 * allocated buffers, sort them so we can bsearch them later.
1229 	 */
1230 	if (type & KM_ALLOCATED)
1231 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1232 
1233 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1234 
1235 	kmw->kmw_type = type;
1236 	kmw->kmw_addr = addr;
1237 	kmw->kmw_cp = cp;
1238 	kmw->kmw_csize = csize;
1239 	kmw->kmw_maglist = maglist;
1240 	kmw->kmw_max = magmax;
1241 	kmw->kmw_count = magcnt;
1242 	kmw->kmw_pos = 0;
1243 
1244 	/*
1245 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1246 	 * hash table instead of the slab layer.
1247 	 */
1248 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1249 		layered = "kmem_hash";
1250 
1251 		kmw->kmw_type |= KM_HASH;
1252 	} else {
1253 		/*
1254 		 * If we are walking freed buffers, we only need the
1255 		 * magazine layer plus the partially allocated slabs.
1256 		 * To walk allocated buffers, we need all of the slabs.
1257 		 */
1258 		if (type & KM_ALLOCATED)
1259 			layered = "kmem_slab";
1260 		else
1261 			layered = "kmem_slab_partial";
1262 
1263 		/*
1264 		 * for small-slab caches, we read in the entire slab.  For
1265 		 * freed buffers, we can just walk the freelist.  For
1266 		 * allocated buffers, we use a 'valid' array to track
1267 		 * the freed buffers.
1268 		 */
1269 		if (!(cp->cache_flags & KMF_HASH)) {
1270 			chunksize = cp->cache_chunksize;
1271 			slabsize = cp->cache_slabsize;
1272 
1273 			kmw->kmw_ubase = mdb_alloc(slabsize +
1274 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1275 
1276 			if (type & KM_ALLOCATED)
1277 				kmw->kmw_valid =
1278 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1279 		}
1280 	}
1281 
1282 	status = WALK_NEXT;
1283 
1284 	if (mdb_layered_walk(layered, wsp) == -1) {
1285 		mdb_warn("unable to start layered '%s' walk", layered);
1286 		status = WALK_ERR;
1287 	}
1288 
1289 out1:
1290 	if (status == WALK_ERR) {
1291 		if (kmw->kmw_valid)
1292 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1293 
1294 		if (kmw->kmw_ubase)
1295 			mdb_free(kmw->kmw_ubase, slabsize +
1296 			    sizeof (kmem_bufctl_t));
1297 
1298 		if (kmw->kmw_maglist)
1299 			mdb_free(kmw->kmw_maglist,
1300 			    kmw->kmw_max * sizeof (uintptr_t));
1301 
1302 		mdb_free(kmw, sizeof (kmem_walk_t));
1303 		wsp->walk_data = NULL;
1304 	}
1305 
1306 out2:
1307 	if (status == WALK_ERR)
1308 		mdb_free(cp, csize);
1309 
1310 	return (status);
1311 }
1312 
1313 int
1314 kmem_walk_step(mdb_walk_state_t *wsp)
1315 {
1316 	kmem_walk_t *kmw = wsp->walk_data;
1317 	int type = kmw->kmw_type;
1318 	kmem_cache_t *cp = kmw->kmw_cp;
1319 
1320 	void **maglist = kmw->kmw_maglist;
1321 	int magcnt = kmw->kmw_count;
1322 
1323 	uintptr_t chunksize, slabsize;
1324 	uintptr_t addr;
1325 	const kmem_slab_t *sp;
1326 	const kmem_bufctl_t *bcp;
1327 	kmem_bufctl_t bc;
1328 
1329 	int chunks;
1330 	char *kbase;
1331 	void *buf;
1332 	int i, ret;
1333 
1334 	char *valid, *ubase;
1335 
1336 	/*
1337 	 * first, handle the 'kmem_hash' layered walk case
1338 	 */
1339 	if (type & KM_HASH) {
1340 		/*
1341 		 * We have a buffer which has been allocated out of the
1342 		 * global layer. We need to make sure that it's not
1343 		 * actually sitting in a magazine before we report it as
1344 		 * an allocated buffer.
1345 		 */
1346 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1347 
1348 		if (magcnt > 0 &&
1349 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1350 		    addrcmp) != NULL)
1351 			return (WALK_NEXT);
1352 
1353 		if (type & KM_BUFCTL)
1354 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1355 
1356 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1357 	}
1358 
1359 	ret = WALK_NEXT;
1360 
1361 	addr = kmw->kmw_addr;
1362 
1363 	/*
1364 	 * If we're walking freed buffers, report everything in the
1365 	 * magazine layer before processing the first slab.
1366 	 */
1367 	if ((type & KM_FREE) && magcnt != 0) {
1368 		kmw->kmw_count = 0;		/* only do this once */
1369 		for (i = 0; i < magcnt; i++) {
1370 			buf = maglist[i];
1371 
1372 			if (type & KM_BUFCTL) {
1373 				uintptr_t out;
1374 
1375 				if (cp->cache_flags & KMF_BUFTAG) {
1376 					kmem_buftag_t *btp;
1377 					kmem_buftag_t tag;
1378 
1379 					/* LINTED - alignment */
1380 					btp = KMEM_BUFTAG(cp, buf);
1381 					if (mdb_vread(&tag, sizeof (tag),
1382 					    (uintptr_t)btp) == -1) {
1383 						mdb_warn("reading buftag for "
1384 						    "%p at %p", buf, btp);
1385 						continue;
1386 					}
1387 					out = (uintptr_t)tag.bt_bufctl;
1388 				} else {
1389 					if (kmem_hash_lookup(cp, addr, buf,
1390 					    &out) == -1)
1391 						continue;
1392 				}
1393 				ret = bufctl_walk_callback(cp, wsp, out);
1394 			} else {
1395 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1396 			}
1397 
1398 			if (ret != WALK_NEXT)
1399 				return (ret);
1400 		}
1401 	}
1402 
1403 	/*
1404 	 * If they want constructed buffers, we're finished, since the
1405 	 * magazine layer holds them all.
1406 	 */
1407 	if (type & KM_CONSTRUCTED)
1408 		return (WALK_DONE);
1409 
1410 	/*
1411 	 * Handle the buffers in the current slab
1412 	 */
1413 	chunksize = cp->cache_chunksize;
1414 	slabsize = cp->cache_slabsize;
1415 
1416 	sp = wsp->walk_layer;
1417 	chunks = sp->slab_chunks;
1418 	kbase = sp->slab_base;
1419 
1420 	dprintf(("kbase is %p\n", kbase));
1421 
1422 	if (!(cp->cache_flags & KMF_HASH)) {
1423 		valid = kmw->kmw_valid;
1424 		ubase = kmw->kmw_ubase;
1425 
1426 		if (mdb_vread(ubase, chunks * chunksize,
1427 		    (uintptr_t)kbase) == -1) {
1428 			mdb_warn("failed to read slab contents at %p", kbase);
1429 			return (WALK_ERR);
1430 		}
1431 
1432 		/*
1433 		 * Set up the valid map as fully allocated -- we'll punch
1434 		 * out the freelist.
1435 		 */
1436 		if (type & KM_ALLOCATED)
1437 			(void) memset(valid, 1, chunks);
1438 	} else {
1439 		valid = NULL;
1440 		ubase = NULL;
1441 	}
1442 
1443 	/*
1444 	 * walk the slab's freelist
1445 	 */
1446 	bcp = sp->slab_head;
1447 
1448 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1449 
1450 	/*
1451 	 * since we could be in the middle of allocating a buffer,
1452 	 * our refcnt could be one higher than it aught.  So we
1453 	 * check one further on the freelist than the count allows.
1454 	 */
1455 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1456 		uint_t ndx;
1457 
1458 		dprintf(("bcp is %p\n", bcp));
1459 
1460 		if (bcp == NULL) {
1461 			if (i == chunks)
1462 				break;
1463 			mdb_warn(
1464 			    "slab %p in cache %p freelist too short by %d\n",
1465 			    sp, addr, chunks - i);
1466 			break;
1467 		}
1468 
1469 		if (cp->cache_flags & KMF_HASH) {
1470 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1471 				mdb_warn("failed to read bufctl ptr at %p",
1472 				    bcp);
1473 				break;
1474 			}
1475 			buf = bc.bc_addr;
1476 		} else {
1477 			/*
1478 			 * Otherwise the buffer is in the slab which
1479 			 * we've read in;  we just need to determine
1480 			 * its offset in the slab to find the
1481 			 * kmem_bufctl_t.
1482 			 */
1483 			bc = *((kmem_bufctl_t *)
1484 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1485 			    (uintptr_t)ubase));
1486 
1487 			buf = KMEM_BUF(cp, bcp);
1488 		}
1489 
1490 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1491 
1492 		if (ndx > slabsize / cp->cache_bufsize) {
1493 			/*
1494 			 * This is very wrong; we have managed to find
1495 			 * a buffer in the slab which shouldn't
1496 			 * actually be here.  Emit a warning, and
1497 			 * try to continue.
1498 			 */
1499 			mdb_warn("buf %p is out of range for "
1500 			    "slab %p, cache %p\n", buf, sp, addr);
1501 		} else if (type & KM_ALLOCATED) {
1502 			/*
1503 			 * we have found a buffer on the slab's freelist;
1504 			 * clear its entry
1505 			 */
1506 			valid[ndx] = 0;
1507 		} else {
1508 			/*
1509 			 * Report this freed buffer
1510 			 */
1511 			if (type & KM_BUFCTL) {
1512 				ret = bufctl_walk_callback(cp, wsp,
1513 				    (uintptr_t)bcp);
1514 			} else {
1515 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1516 			}
1517 			if (ret != WALK_NEXT)
1518 				return (ret);
1519 		}
1520 
1521 		bcp = bc.bc_next;
1522 	}
1523 
1524 	if (bcp != NULL) {
1525 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1526 		    sp, addr, bcp));
1527 	}
1528 
1529 	/*
1530 	 * If we are walking freed buffers, the loop above handled reporting
1531 	 * them.
1532 	 */
1533 	if (type & KM_FREE)
1534 		return (WALK_NEXT);
1535 
1536 	if (type & KM_BUFCTL) {
1537 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1538 		    "cache %p\n", addr);
1539 		return (WALK_ERR);
1540 	}
1541 
1542 	/*
1543 	 * Report allocated buffers, skipping buffers in the magazine layer.
1544 	 * We only get this far for small-slab caches.
1545 	 */
1546 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1547 		buf = (char *)kbase + i * chunksize;
1548 
1549 		if (!valid[i])
1550 			continue;		/* on slab freelist */
1551 
1552 		if (magcnt > 0 &&
1553 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1554 		    addrcmp) != NULL)
1555 			continue;		/* in magazine layer */
1556 
1557 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1558 	}
1559 	return (ret);
1560 }
1561 
1562 void
1563 kmem_walk_fini(mdb_walk_state_t *wsp)
1564 {
1565 	kmem_walk_t *kmw = wsp->walk_data;
1566 	uintptr_t chunksize;
1567 	uintptr_t slabsize;
1568 
1569 	if (kmw == NULL)
1570 		return;
1571 
1572 	if (kmw->kmw_maglist != NULL)
1573 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1574 
1575 	chunksize = kmw->kmw_cp->cache_chunksize;
1576 	slabsize = kmw->kmw_cp->cache_slabsize;
1577 
1578 	if (kmw->kmw_valid != NULL)
1579 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1580 	if (kmw->kmw_ubase != NULL)
1581 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1582 
1583 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1584 	mdb_free(kmw, sizeof (kmem_walk_t));
1585 }
1586 
1587 /*ARGSUSED*/
1588 static int
1589 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1590 {
1591 	/*
1592 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1593 	 * memory in other caches.  This can be a little confusing, so we
1594 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1595 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1596 	 */
1597 	if (c->cache_cflags & KMC_NOTOUCH)
1598 		return (WALK_NEXT);
1599 
1600 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1601 	    wsp->walk_cbdata, addr) == -1)
1602 		return (WALK_DONE);
1603 
1604 	return (WALK_NEXT);
1605 }
1606 
1607 #define	KMEM_WALK_ALL(name, wsp) { \
1608 	wsp->walk_data = (name); \
1609 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1610 		return (WALK_ERR); \
1611 	return (WALK_DONE); \
1612 }
1613 
1614 int
1615 kmem_walk_init(mdb_walk_state_t *wsp)
1616 {
1617 	if (wsp->walk_arg != NULL)
1618 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1619 
1620 	if (wsp->walk_addr == NULL)
1621 		KMEM_WALK_ALL("kmem", wsp);
1622 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1623 }
1624 
1625 int
1626 bufctl_walk_init(mdb_walk_state_t *wsp)
1627 {
1628 	if (wsp->walk_addr == NULL)
1629 		KMEM_WALK_ALL("bufctl", wsp);
1630 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1631 }
1632 
1633 int
1634 freemem_walk_init(mdb_walk_state_t *wsp)
1635 {
1636 	if (wsp->walk_addr == NULL)
1637 		KMEM_WALK_ALL("freemem", wsp);
1638 	return (kmem_walk_init_common(wsp, KM_FREE));
1639 }
1640 
1641 int
1642 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1643 {
1644 	if (wsp->walk_addr == NULL)
1645 		KMEM_WALK_ALL("freemem_constructed", wsp);
1646 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1647 }
1648 
1649 int
1650 freectl_walk_init(mdb_walk_state_t *wsp)
1651 {
1652 	if (wsp->walk_addr == NULL)
1653 		KMEM_WALK_ALL("freectl", wsp);
1654 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1655 }
1656 
1657 int
1658 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1659 {
1660 	if (wsp->walk_addr == NULL)
1661 		KMEM_WALK_ALL("freectl_constructed", wsp);
1662 	return (kmem_walk_init_common(wsp,
1663 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1664 }
1665 
1666 typedef struct bufctl_history_walk {
1667 	void		*bhw_next;
1668 	kmem_cache_t	*bhw_cache;
1669 	kmem_slab_t	*bhw_slab;
1670 	hrtime_t	bhw_timestamp;
1671 } bufctl_history_walk_t;
1672 
1673 int
1674 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1675 {
1676 	bufctl_history_walk_t *bhw;
1677 	kmem_bufctl_audit_t bc;
1678 	kmem_bufctl_audit_t bcn;
1679 
1680 	if (wsp->walk_addr == NULL) {
1681 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1682 		return (WALK_ERR);
1683 	}
1684 
1685 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1686 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1687 		return (WALK_ERR);
1688 	}
1689 
1690 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1691 	bhw->bhw_timestamp = 0;
1692 	bhw->bhw_cache = bc.bc_cache;
1693 	bhw->bhw_slab = bc.bc_slab;
1694 
1695 	/*
1696 	 * sometimes the first log entry matches the base bufctl;  in that
1697 	 * case, skip the base bufctl.
1698 	 */
1699 	if (bc.bc_lastlog != NULL &&
1700 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1701 	    bc.bc_addr == bcn.bc_addr &&
1702 	    bc.bc_cache == bcn.bc_cache &&
1703 	    bc.bc_slab == bcn.bc_slab &&
1704 	    bc.bc_timestamp == bcn.bc_timestamp &&
1705 	    bc.bc_thread == bcn.bc_thread)
1706 		bhw->bhw_next = bc.bc_lastlog;
1707 	else
1708 		bhw->bhw_next = (void *)wsp->walk_addr;
1709 
1710 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1711 	wsp->walk_data = bhw;
1712 
1713 	return (WALK_NEXT);
1714 }
1715 
1716 int
1717 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1718 {
1719 	bufctl_history_walk_t *bhw = wsp->walk_data;
1720 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1721 	uintptr_t baseaddr = wsp->walk_addr;
1722 	kmem_bufctl_audit_t bc;
1723 
1724 	if (addr == NULL)
1725 		return (WALK_DONE);
1726 
1727 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1728 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1729 		return (WALK_ERR);
1730 	}
1731 
1732 	/*
1733 	 * The bufctl is only valid if the address, cache, and slab are
1734 	 * correct.  We also check that the timestamp is decreasing, to
1735 	 * prevent infinite loops.
1736 	 */
1737 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1738 	    bc.bc_cache != bhw->bhw_cache ||
1739 	    bc.bc_slab != bhw->bhw_slab ||
1740 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1741 		return (WALK_DONE);
1742 
1743 	bhw->bhw_next = bc.bc_lastlog;
1744 	bhw->bhw_timestamp = bc.bc_timestamp;
1745 
1746 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1747 }
1748 
1749 void
1750 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1751 {
1752 	bufctl_history_walk_t *bhw = wsp->walk_data;
1753 
1754 	mdb_free(bhw, sizeof (*bhw));
1755 }
1756 
1757 typedef struct kmem_log_walk {
1758 	kmem_bufctl_audit_t *klw_base;
1759 	kmem_bufctl_audit_t **klw_sorted;
1760 	kmem_log_header_t klw_lh;
1761 	size_t klw_size;
1762 	size_t klw_maxndx;
1763 	size_t klw_ndx;
1764 } kmem_log_walk_t;
1765 
1766 int
1767 kmem_log_walk_init(mdb_walk_state_t *wsp)
1768 {
1769 	uintptr_t lp = wsp->walk_addr;
1770 	kmem_log_walk_t *klw;
1771 	kmem_log_header_t *lhp;
1772 	int maxndx, i, j, k;
1773 
1774 	/*
1775 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1776 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1777 	 */
1778 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1779 		mdb_warn("failed to read 'kmem_transaction_log'");
1780 		return (WALK_ERR);
1781 	}
1782 
1783 	if (lp == NULL) {
1784 		mdb_warn("log is disabled\n");
1785 		return (WALK_ERR);
1786 	}
1787 
1788 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1789 	lhp = &klw->klw_lh;
1790 
1791 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1792 		mdb_warn("failed to read log header at %p", lp);
1793 		mdb_free(klw, sizeof (kmem_log_walk_t));
1794 		return (WALK_ERR);
1795 	}
1796 
1797 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1798 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1799 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1800 
1801 	if (mdb_vread(klw->klw_base, klw->klw_size,
1802 	    (uintptr_t)lhp->lh_base) == -1) {
1803 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1804 		mdb_free(klw->klw_base, klw->klw_size);
1805 		mdb_free(klw, sizeof (kmem_log_walk_t));
1806 		return (WALK_ERR);
1807 	}
1808 
1809 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1810 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1811 
1812 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1813 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1814 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1815 
1816 		for (j = 0; j < maxndx; j++)
1817 			klw->klw_sorted[k++] = &chunk[j];
1818 	}
1819 
1820 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1821 	    (int(*)(const void *, const void *))bufctlcmp);
1822 
1823 	klw->klw_maxndx = k;
1824 	wsp->walk_data = klw;
1825 
1826 	return (WALK_NEXT);
1827 }
1828 
1829 int
1830 kmem_log_walk_step(mdb_walk_state_t *wsp)
1831 {
1832 	kmem_log_walk_t *klw = wsp->walk_data;
1833 	kmem_bufctl_audit_t *bcp;
1834 
1835 	if (klw->klw_ndx == klw->klw_maxndx)
1836 		return (WALK_DONE);
1837 
1838 	bcp = klw->klw_sorted[klw->klw_ndx++];
1839 
1840 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1841 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1842 }
1843 
1844 void
1845 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1846 {
1847 	kmem_log_walk_t *klw = wsp->walk_data;
1848 
1849 	mdb_free(klw->klw_base, klw->klw_size);
1850 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1851 	    sizeof (kmem_bufctl_audit_t *));
1852 	mdb_free(klw, sizeof (kmem_log_walk_t));
1853 }
1854 
1855 typedef struct allocdby_bufctl {
1856 	uintptr_t abb_addr;
1857 	hrtime_t abb_ts;
1858 } allocdby_bufctl_t;
1859 
1860 typedef struct allocdby_walk {
1861 	const char *abw_walk;
1862 	uintptr_t abw_thread;
1863 	size_t abw_nbufs;
1864 	size_t abw_size;
1865 	allocdby_bufctl_t *abw_buf;
1866 	size_t abw_ndx;
1867 } allocdby_walk_t;
1868 
1869 int
1870 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1871     allocdby_walk_t *abw)
1872 {
1873 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1874 		return (WALK_NEXT);
1875 
1876 	if (abw->abw_nbufs == abw->abw_size) {
1877 		allocdby_bufctl_t *buf;
1878 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1879 
1880 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1881 
1882 		bcopy(abw->abw_buf, buf, oldsize);
1883 		mdb_free(abw->abw_buf, oldsize);
1884 
1885 		abw->abw_size <<= 1;
1886 		abw->abw_buf = buf;
1887 	}
1888 
1889 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1890 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1891 	abw->abw_nbufs++;
1892 
1893 	return (WALK_NEXT);
1894 }
1895 
1896 /*ARGSUSED*/
1897 int
1898 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1899 {
1900 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1901 	    abw, addr) == -1) {
1902 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1903 		return (WALK_DONE);
1904 	}
1905 
1906 	return (WALK_NEXT);
1907 }
1908 
1909 static int
1910 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1911 {
1912 	if (lhs->abb_ts < rhs->abb_ts)
1913 		return (1);
1914 	if (lhs->abb_ts > rhs->abb_ts)
1915 		return (-1);
1916 	return (0);
1917 }
1918 
1919 static int
1920 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1921 {
1922 	allocdby_walk_t *abw;
1923 
1924 	if (wsp->walk_addr == NULL) {
1925 		mdb_warn("allocdby walk doesn't support global walks\n");
1926 		return (WALK_ERR);
1927 	}
1928 
1929 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1930 
1931 	abw->abw_thread = wsp->walk_addr;
1932 	abw->abw_walk = walk;
1933 	abw->abw_size = 128;	/* something reasonable */
1934 	abw->abw_buf =
1935 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1936 
1937 	wsp->walk_data = abw;
1938 
1939 	if (mdb_walk("kmem_cache",
1940 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1941 		mdb_warn("couldn't walk kmem_cache");
1942 		allocdby_walk_fini(wsp);
1943 		return (WALK_ERR);
1944 	}
1945 
1946 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1947 	    (int(*)(const void *, const void *))allocdby_cmp);
1948 
1949 	return (WALK_NEXT);
1950 }
1951 
1952 int
1953 allocdby_walk_init(mdb_walk_state_t *wsp)
1954 {
1955 	return (allocdby_walk_init_common(wsp, "bufctl"));
1956 }
1957 
1958 int
1959 freedby_walk_init(mdb_walk_state_t *wsp)
1960 {
1961 	return (allocdby_walk_init_common(wsp, "freectl"));
1962 }
1963 
1964 int
1965 allocdby_walk_step(mdb_walk_state_t *wsp)
1966 {
1967 	allocdby_walk_t *abw = wsp->walk_data;
1968 	kmem_bufctl_audit_t bc;
1969 	uintptr_t addr;
1970 
1971 	if (abw->abw_ndx == abw->abw_nbufs)
1972 		return (WALK_DONE);
1973 
1974 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1975 
1976 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1977 		mdb_warn("couldn't read bufctl at %p", addr);
1978 		return (WALK_DONE);
1979 	}
1980 
1981 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1982 }
1983 
1984 void
1985 allocdby_walk_fini(mdb_walk_state_t *wsp)
1986 {
1987 	allocdby_walk_t *abw = wsp->walk_data;
1988 
1989 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1990 	mdb_free(abw, sizeof (allocdby_walk_t));
1991 }
1992 
1993 /*ARGSUSED*/
1994 int
1995 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
1996 {
1997 	char c[MDB_SYM_NAMLEN];
1998 	GElf_Sym sym;
1999 	int i;
2000 
2001 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2002 	for (i = 0; i < bcp->bc_depth; i++) {
2003 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2004 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2005 			continue;
2006 		if (strncmp(c, "kmem_", 5) == 0)
2007 			continue;
2008 		mdb_printf("%s+0x%lx",
2009 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2010 		break;
2011 	}
2012 	mdb_printf("\n");
2013 
2014 	return (WALK_NEXT);
2015 }
2016 
2017 static int
2018 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2019 {
2020 	if (!(flags & DCMD_ADDRSPEC))
2021 		return (DCMD_USAGE);
2022 
2023 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2024 
2025 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2026 		mdb_warn("can't walk '%s' for %p", w, addr);
2027 		return (DCMD_ERR);
2028 	}
2029 
2030 	return (DCMD_OK);
2031 }
2032 
2033 /*ARGSUSED*/
2034 int
2035 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2036 {
2037 	return (allocdby_common(addr, flags, "allocdby"));
2038 }
2039 
2040 /*ARGSUSED*/
2041 int
2042 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2043 {
2044 	return (allocdby_common(addr, flags, "freedby"));
2045 }
2046 
2047 /*
2048  * Return a string describing the address in relation to the given thread's
2049  * stack.
2050  *
2051  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2052  *
2053  * - If the address is above the stack pointer, return an empty string
2054  *   signifying that the address is active.
2055  *
2056  * - If the address is below the stack pointer, and the thread is not on proc,
2057  *   return " (below sp)".
2058  *
2059  * - If the address is below the stack pointer, and the thread is on proc,
2060  *   return " (possibly below sp)".  Depending on context, we may or may not
2061  *   have an accurate t_sp.
2062  */
2063 static const char *
2064 stack_active(const kthread_t *t, uintptr_t addr)
2065 {
2066 	uintptr_t panicstk;
2067 	GElf_Sym sym;
2068 
2069 	if (t->t_state == TS_FREE)
2070 		return (" (inactive interrupt thread)");
2071 
2072 	/*
2073 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2074 	 * no longer relates to the thread's real stack.
2075 	 */
2076 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2077 		panicstk = (uintptr_t)sym.st_value;
2078 
2079 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2080 			return ("");
2081 	}
2082 
2083 	if (addr >= t->t_sp + STACK_BIAS)
2084 		return ("");
2085 
2086 	if (t->t_state == TS_ONPROC)
2087 		return (" (possibly below sp)");
2088 
2089 	return (" (below sp)");
2090 }
2091 
2092 typedef struct whatis {
2093 	uintptr_t w_addr;
2094 	const kmem_cache_t *w_cache;
2095 	const vmem_t *w_vmem;
2096 	size_t w_slab_align;
2097 	int w_slab_found;
2098 	int w_found;
2099 	int w_kmem_lite_count;
2100 	uint_t w_verbose;
2101 	uint_t w_freemem;
2102 	uint_t w_all;
2103 	uint_t w_bufctl;
2104 	uint_t w_idspace;
2105 } whatis_t;
2106 
2107 static void
2108 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
2109 {
2110 	/* LINTED pointer cast may result in improper alignment */
2111 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr);
2112 	intptr_t stat;
2113 	int count = 0;
2114 	int i;
2115 	pc_t callers[16];
2116 
2117 	if (w->w_cache->cache_flags & KMF_REDZONE) {
2118 		kmem_buftag_t bt;
2119 
2120 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2121 			goto done;
2122 
2123 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2124 
2125 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2126 			goto done;
2127 
2128 		/*
2129 		 * provide the bufctl ptr if it has useful information
2130 		 */
2131 		if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT))
2132 			baddr = (uintptr_t)bt.bt_bufctl;
2133 
2134 		if (w->w_cache->cache_flags & KMF_LITE) {
2135 			count = w->w_kmem_lite_count;
2136 
2137 			if (count * sizeof (pc_t) > sizeof (callers))
2138 				count = 0;
2139 
2140 			if (count > 0 &&
2141 			    mdb_vread(callers, count * sizeof (pc_t),
2142 			    btaddr +
2143 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2144 				count = 0;
2145 
2146 			/*
2147 			 * skip unused callers
2148 			 */
2149 			while (count > 0 && callers[count - 1] ==
2150 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
2151 				count--;
2152 		}
2153 	}
2154 
2155 done:
2156 	if (baddr == 0)
2157 		mdb_printf("%p is %p+%p, %s from %s\n",
2158 		    w->w_addr, addr, w->w_addr - addr,
2159 		    w->w_freemem == FALSE ? "allocated" : "freed",
2160 		    w->w_cache->cache_name);
2161 	else
2162 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
2163 		    w->w_addr, addr, w->w_addr - addr, baddr,
2164 		    w->w_freemem == FALSE ? "allocated" : "freed",
2165 		    w->w_cache->cache_name);
2166 
2167 	if (count > 0) {
2168 		mdb_inc_indent(8);
2169 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
2170 		    callers[0], (count != 1)? ", ":"\n");
2171 		for (i = 1; i < count; i++)
2172 			mdb_printf("%a%s", callers[i],
2173 			    (i + 1 < count)? ", ":"\n");
2174 		mdb_dec_indent(8);
2175 	}
2176 }
2177 
2178 /*ARGSUSED*/
2179 static int
2180 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
2181 {
2182 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2183 		return (WALK_NEXT);
2184 
2185 	whatis_print_kmem(addr, 0, w);
2186 	w->w_found++;
2187 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2188 }
2189 
2190 static int
2191 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
2192 {
2193 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
2194 		return (WALK_NEXT);
2195 
2196 	mdb_printf("%p is %p+%p ", w->w_addr,
2197 	    vs->vs_start, w->w_addr - vs->vs_start);
2198 
2199 	/*
2200 	 * Always provide the vmem_seg pointer if it has a stack trace.
2201 	 */
2202 	if (w->w_bufctl == TRUE ||
2203 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
2204 		mdb_printf("(vmem_seg %p) ", addr);
2205 	}
2206 
2207 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
2208 	    "freed " : "", w->w_vmem->vm_name);
2209 
2210 	w->w_found++;
2211 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2212 }
2213 
2214 static int
2215 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
2216 {
2217 	const char *nm = vmem->vm_name;
2218 	w->w_vmem = vmem;
2219 	w->w_freemem = FALSE;
2220 
2221 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2222 		return (WALK_NEXT);
2223 
2224 	if (w->w_verbose)
2225 		mdb_printf("Searching vmem arena %s...\n", nm);
2226 
2227 	if (mdb_pwalk("vmem_alloc",
2228 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2229 		mdb_warn("can't walk vmem seg for %p", addr);
2230 		return (WALK_NEXT);
2231 	}
2232 
2233 	if (w->w_found && w->w_all == FALSE)
2234 		return (WALK_DONE);
2235 
2236 	if (w->w_verbose)
2237 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
2238 
2239 	w->w_freemem = TRUE;
2240 
2241 	if (mdb_pwalk("vmem_free",
2242 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2243 		mdb_warn("can't walk vmem seg for %p", addr);
2244 		return (WALK_NEXT);
2245 	}
2246 
2247 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2248 }
2249 
2250 /*ARGSUSED*/
2251 static int
2252 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
2253 {
2254 	uintptr_t addr;
2255 
2256 	if (bcp == NULL)
2257 		return (WALK_NEXT);
2258 
2259 	addr = (uintptr_t)bcp->bc_addr;
2260 
2261 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2262 		return (WALK_NEXT);
2263 
2264 	whatis_print_kmem(addr, baddr, w);
2265 	w->w_found++;
2266 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2267 }
2268 
2269 /*ARGSUSED*/
2270 static int
2271 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
2272 {
2273 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
2274 
2275 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
2276 		return (WALK_NEXT);
2277 
2278 	w->w_slab_found++;
2279 	return (WALK_DONE);
2280 }
2281 
2282 static int
2283 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2284 {
2285 	char *walk, *freewalk;
2286 	mdb_walk_cb_t func;
2287 	vmem_t *vmp = c->cache_arena;
2288 
2289 	if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2290 		return (WALK_NEXT);
2291 
2292 	if (w->w_bufctl == FALSE) {
2293 		walk = "kmem";
2294 		freewalk = "freemem";
2295 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2296 	} else {
2297 		walk = "bufctl";
2298 		freewalk = "freectl";
2299 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2300 	}
2301 
2302 	w->w_cache = c;
2303 
2304 	if (w->w_verbose)
2305 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
2306 
2307 	/*
2308 	 * Verify that the address is in one of the cache's slabs.  If not,
2309 	 * we can skip the more expensive walkers.  (this is purely a
2310 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
2311 	 *
2312 	 * We try to get the cache's arena's quantum, since to accurately
2313 	 * get the base of a slab, you have to align it to the quantum.  If
2314 	 * it doesn't look sensible, we fall back to not aligning.
2315 	 */
2316 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
2317 	    (uintptr_t)&vmp->vm_quantum) == -1) {
2318 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
2319 		w->w_slab_align = 1;
2320 	}
2321 
2322 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
2323 	    (w->w_slab_align & (w->w_slab_align - 1))) {
2324 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
2325 		    w->w_slab_align);
2326 		w->w_slab_align = 1;
2327 	}
2328 
2329 	w->w_slab_found = 0;
2330 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
2331 	    addr) == -1) {
2332 		mdb_warn("can't find kmem_slab walker");
2333 		return (WALK_DONE);
2334 	}
2335 	if (w->w_slab_found == 0)
2336 		return (WALK_NEXT);
2337 
2338 	if (c->cache_flags & KMF_LITE) {
2339 		if (mdb_readvar(&w->w_kmem_lite_count,
2340 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
2341 			w->w_kmem_lite_count = 0;
2342 	}
2343 
2344 	if (w->w_verbose)
2345 		mdb_printf("Searching %s...\n", c->cache_name);
2346 
2347 	w->w_freemem = FALSE;
2348 
2349 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2350 		mdb_warn("can't find %s walker", walk);
2351 		return (WALK_DONE);
2352 	}
2353 
2354 	if (w->w_found && w->w_all == FALSE)
2355 		return (WALK_DONE);
2356 
2357 	/*
2358 	 * We have searched for allocated memory; now search for freed memory.
2359 	 */
2360 	if (w->w_verbose)
2361 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2362 
2363 	w->w_freemem = TRUE;
2364 
2365 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2366 		mdb_warn("can't find %s walker", freewalk);
2367 		return (WALK_DONE);
2368 	}
2369 
2370 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2371 }
2372 
2373 static int
2374 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2375 {
2376 	if (c->cache_cflags & KMC_NOTOUCH)
2377 		return (WALK_NEXT);
2378 
2379 	return (whatis_walk_cache(addr, c, w));
2380 }
2381 
2382 static int
2383 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2384 {
2385 	if (!(c->cache_cflags & KMC_NOTOUCH))
2386 		return (WALK_NEXT);
2387 
2388 	return (whatis_walk_cache(addr, c, w));
2389 }
2390 
2391 static int
2392 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
2393 {
2394 	/*
2395 	 * Often, one calls ::whatis on an address from a thread structure.
2396 	 * We use this opportunity to short circuit this case...
2397 	 */
2398 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
2399 		mdb_printf("%p is %p+%p, allocated as a thread structure\n",
2400 		    w->w_addr, addr, w->w_addr - addr);
2401 		w->w_found++;
2402 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2403 	}
2404 
2405 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
2406 	    w->w_addr > (uintptr_t)t->t_stk)
2407 		return (WALK_NEXT);
2408 
2409 	if (t->t_stkbase == NULL)
2410 		return (WALK_NEXT);
2411 
2412 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
2413 	    stack_active(t, w->w_addr));
2414 
2415 	w->w_found++;
2416 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2417 }
2418 
2419 static int
2420 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
2421 {
2422 	struct module mod;
2423 	char name[MODMAXNAMELEN], *where;
2424 	char c[MDB_SYM_NAMLEN];
2425 	Shdr shdr;
2426 	GElf_Sym sym;
2427 
2428 	if (m->mod_mp == NULL)
2429 		return (WALK_NEXT);
2430 
2431 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2432 		mdb_warn("couldn't read modctl %p's module", addr);
2433 		return (WALK_NEXT);
2434 	}
2435 
2436 	if (w->w_addr >= (uintptr_t)mod.text &&
2437 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
2438 		where = "text segment";
2439 		goto found;
2440 	}
2441 
2442 	if (w->w_addr >= (uintptr_t)mod.data &&
2443 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
2444 		where = "data segment";
2445 		goto found;
2446 	}
2447 
2448 	if (w->w_addr >= (uintptr_t)mod.bss &&
2449 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
2450 		where = "bss";
2451 		goto found;
2452 	}
2453 
2454 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2455 		mdb_warn("couldn't read symbol header for %p's module", addr);
2456 		return (WALK_NEXT);
2457 	}
2458 
2459 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
2460 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
2461 		where = "symtab";
2462 		goto found;
2463 	}
2464 
2465 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2466 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2467 		where = "symspace";
2468 		goto found;
2469 	}
2470 
2471 	return (WALK_NEXT);
2472 
2473 found:
2474 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2475 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2476 
2477 	mdb_printf("%p is ", w->w_addr);
2478 
2479 	/*
2480 	 * If we found this address in a module, then there's a chance that
2481 	 * it's actually a named symbol.  Try the symbol lookup.
2482 	 */
2483 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c),
2484 	    &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value &&
2485 	    w->w_addr < (uintptr_t)sym.st_value + sym.st_size) {
2486 		mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value);
2487 	}
2488 
2489 	mdb_printf("in %s's %s\n", name, where);
2490 
2491 	w->w_found++;
2492 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2493 }
2494 
2495 /*ARGSUSED*/
2496 static int
2497 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2498 {
2499 	static int machsize = 0;
2500 	mdb_ctf_id_t id;
2501 
2502 	if (machsize == 0) {
2503 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2504 			machsize = mdb_ctf_type_size(id);
2505 		else {
2506 			mdb_warn("could not get size of page_t");
2507 			machsize = sizeof (page_t);
2508 		}
2509 	}
2510 
2511 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2512 		return (WALK_NEXT);
2513 
2514 	mdb_printf("%p is %p+%p, allocated as a page structure\n",
2515 	    w->w_addr, addr, w->w_addr - addr);
2516 
2517 	w->w_found++;
2518 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2519 }
2520 
2521 int
2522 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2523 {
2524 	whatis_t w;
2525 
2526 	if (!(flags & DCMD_ADDRSPEC))
2527 		return (DCMD_USAGE);
2528 
2529 	w.w_verbose = FALSE;
2530 	w.w_bufctl = FALSE;
2531 	w.w_all = FALSE;
2532 	w.w_idspace = FALSE;
2533 
2534 	if (mdb_getopts(argc, argv,
2535 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2536 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2537 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2538 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2539 		return (DCMD_USAGE);
2540 
2541 	w.w_addr = addr;
2542 	w.w_found = 0;
2543 
2544 	if (w.w_verbose)
2545 		mdb_printf("Searching modules...\n");
2546 
2547 	if (!w.w_idspace) {
2548 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2549 		    == -1) {
2550 			mdb_warn("couldn't find modctl walker");
2551 			return (DCMD_ERR);
2552 		}
2553 
2554 		if (w.w_found && w.w_all == FALSE)
2555 			return (DCMD_OK);
2556 
2557 		/*
2558 		 * Now search all thread stacks.  Yes, this is a little weak; we
2559 		 * can save a lot of work by first checking to see if the
2560 		 * address is in segkp vs. segkmem.  But hey, computers are
2561 		 * fast.
2562 		 */
2563 		if (w.w_verbose)
2564 			mdb_printf("Searching threads...\n");
2565 
2566 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2567 		    == -1) {
2568 			mdb_warn("couldn't find thread walker");
2569 			return (DCMD_ERR);
2570 		}
2571 
2572 		if (w.w_found && w.w_all == FALSE)
2573 			return (DCMD_OK);
2574 
2575 		if (w.w_verbose)
2576 			mdb_printf("Searching page structures...\n");
2577 
2578 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2579 		    == -1) {
2580 			mdb_warn("couldn't find page walker");
2581 			return (DCMD_ERR);
2582 		}
2583 
2584 		if (w.w_found && w.w_all == FALSE)
2585 			return (DCMD_OK);
2586 	}
2587 
2588 	if (mdb_walk("kmem_cache",
2589 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2590 		mdb_warn("couldn't find kmem_cache walker");
2591 		return (DCMD_ERR);
2592 	}
2593 
2594 	if (w.w_found && w.w_all == FALSE)
2595 		return (DCMD_OK);
2596 
2597 	if (mdb_walk("kmem_cache",
2598 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2599 		mdb_warn("couldn't find kmem_cache walker");
2600 		return (DCMD_ERR);
2601 	}
2602 
2603 	if (w.w_found && w.w_all == FALSE)
2604 		return (DCMD_OK);
2605 
2606 	if (mdb_walk("vmem_postfix",
2607 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2608 		mdb_warn("couldn't find vmem_postfix walker");
2609 		return (DCMD_ERR);
2610 	}
2611 
2612 	if (w.w_found == 0)
2613 		mdb_printf("%p is unknown\n", addr);
2614 
2615 	return (DCMD_OK);
2616 }
2617 
2618 void
2619 whatis_help(void)
2620 {
2621 	mdb_printf(
2622 	    "Given a virtual address, attempt to determine where it came\n"
2623 	    "from.\n"
2624 	    "\n"
2625 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2626 	    "\t\tsearched\n"
2627 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2628 	    "\t\tthe first (most specific) source.\n"
2629 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2630 	    "\t\tthese are ignored.\n"
2631 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2632 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2633 	    "\t\thave a bufctl, it will not be reported.\n");
2634 }
2635 
2636 typedef struct kmem_log_cpu {
2637 	uintptr_t kmc_low;
2638 	uintptr_t kmc_high;
2639 } kmem_log_cpu_t;
2640 
2641 typedef struct kmem_log_data {
2642 	uintptr_t kmd_addr;
2643 	kmem_log_cpu_t *kmd_cpu;
2644 } kmem_log_data_t;
2645 
2646 int
2647 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2648     kmem_log_data_t *kmd)
2649 {
2650 	int i;
2651 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2652 	size_t bufsize;
2653 
2654 	for (i = 0; i < NCPU; i++) {
2655 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2656 			break;
2657 	}
2658 
2659 	if (kmd->kmd_addr) {
2660 		if (b->bc_cache == NULL)
2661 			return (WALK_NEXT);
2662 
2663 		if (mdb_vread(&bufsize, sizeof (bufsize),
2664 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2665 			mdb_warn(
2666 			    "failed to read cache_bufsize for cache at %p",
2667 			    b->bc_cache);
2668 			return (WALK_ERR);
2669 		}
2670 
2671 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2672 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2673 			return (WALK_NEXT);
2674 	}
2675 
2676 	if (i == NCPU)
2677 		mdb_printf("   ");
2678 	else
2679 		mdb_printf("%3d", i);
2680 
2681 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2682 	    b->bc_timestamp, b->bc_thread);
2683 
2684 	return (WALK_NEXT);
2685 }
2686 
2687 /*ARGSUSED*/
2688 int
2689 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2690 {
2691 	kmem_log_header_t lh;
2692 	kmem_cpu_log_header_t clh;
2693 	uintptr_t lhp, clhp;
2694 	int ncpus;
2695 	uintptr_t *cpu;
2696 	GElf_Sym sym;
2697 	kmem_log_cpu_t *kmc;
2698 	int i;
2699 	kmem_log_data_t kmd;
2700 	uint_t opt_b = FALSE;
2701 
2702 	if (mdb_getopts(argc, argv,
2703 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2704 		return (DCMD_USAGE);
2705 
2706 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2707 		mdb_warn("failed to read 'kmem_transaction_log'");
2708 		return (DCMD_ERR);
2709 	}
2710 
2711 	if (lhp == NULL) {
2712 		mdb_warn("no kmem transaction log\n");
2713 		return (DCMD_ERR);
2714 	}
2715 
2716 	mdb_readvar(&ncpus, "ncpus");
2717 
2718 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2719 		mdb_warn("failed to read log header at %p", lhp);
2720 		return (DCMD_ERR);
2721 	}
2722 
2723 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2724 
2725 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2726 
2727 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2728 		mdb_warn("couldn't find 'cpu' array");
2729 		return (DCMD_ERR);
2730 	}
2731 
2732 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2733 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2734 		    NCPU * sizeof (uintptr_t), sym.st_size);
2735 		return (DCMD_ERR);
2736 	}
2737 
2738 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2739 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2740 		return (DCMD_ERR);
2741 	}
2742 
2743 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2744 	kmd.kmd_addr = NULL;
2745 	kmd.kmd_cpu = kmc;
2746 
2747 	for (i = 0; i < NCPU; i++) {
2748 
2749 		if (cpu[i] == NULL)
2750 			continue;
2751 
2752 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2753 			mdb_warn("cannot read cpu %d's log header at %p",
2754 			    i, clhp);
2755 			return (DCMD_ERR);
2756 		}
2757 
2758 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2759 		    (uintptr_t)lh.lh_base;
2760 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2761 
2762 		clhp += sizeof (kmem_cpu_log_header_t);
2763 	}
2764 
2765 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2766 	    "TIMESTAMP", "THREAD");
2767 
2768 	/*
2769 	 * If we have been passed an address, print out only log entries
2770 	 * corresponding to that address.  If opt_b is specified, then interpret
2771 	 * the address as a bufctl.
2772 	 */
2773 	if (flags & DCMD_ADDRSPEC) {
2774 		kmem_bufctl_audit_t b;
2775 
2776 		if (opt_b) {
2777 			kmd.kmd_addr = addr;
2778 		} else {
2779 			if (mdb_vread(&b,
2780 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2781 				mdb_warn("failed to read bufctl at %p", addr);
2782 				return (DCMD_ERR);
2783 			}
2784 
2785 			(void) kmem_log_walk(addr, &b, &kmd);
2786 
2787 			return (DCMD_OK);
2788 		}
2789 	}
2790 
2791 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2792 		mdb_warn("can't find kmem log walker");
2793 		return (DCMD_ERR);
2794 	}
2795 
2796 	return (DCMD_OK);
2797 }
2798 
2799 typedef struct bufctl_history_cb {
2800 	int		bhc_flags;
2801 	int		bhc_argc;
2802 	const mdb_arg_t	*bhc_argv;
2803 	int		bhc_ret;
2804 } bufctl_history_cb_t;
2805 
2806 /*ARGSUSED*/
2807 static int
2808 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2809 {
2810 	bufctl_history_cb_t *bhc = arg;
2811 
2812 	bhc->bhc_ret =
2813 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2814 
2815 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2816 
2817 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2818 }
2819 
2820 void
2821 bufctl_help(void)
2822 {
2823 	mdb_printf("%s",
2824 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2825 	mdb_dec_indent(2);
2826 	mdb_printf("%<b>OPTIONS%</b>\n");
2827 	mdb_inc_indent(2);
2828 	mdb_printf("%s",
2829 "  -v    Display the full content of the bufctl, including its stack trace\n"
2830 "  -h    retrieve the bufctl's transaction history, if available\n"
2831 "  -a addr\n"
2832 "        filter out bufctls not involving the buffer at addr\n"
2833 "  -c caller\n"
2834 "        filter out bufctls without the function/PC in their stack trace\n"
2835 "  -e earliest\n"
2836 "        filter out bufctls timestamped before earliest\n"
2837 "  -l latest\n"
2838 "        filter out bufctls timestamped after latest\n"
2839 "  -t thread\n"
2840 "        filter out bufctls not involving thread\n");
2841 }
2842 
2843 int
2844 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2845 {
2846 	kmem_bufctl_audit_t bc;
2847 	uint_t verbose = FALSE;
2848 	uint_t history = FALSE;
2849 	uint_t in_history = FALSE;
2850 	uintptr_t caller = NULL, thread = NULL;
2851 	uintptr_t laddr, haddr, baddr = NULL;
2852 	hrtime_t earliest = 0, latest = 0;
2853 	int i, depth;
2854 	char c[MDB_SYM_NAMLEN];
2855 	GElf_Sym sym;
2856 
2857 	if (mdb_getopts(argc, argv,
2858 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2859 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2860 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2861 	    'c', MDB_OPT_UINTPTR, &caller,
2862 	    't', MDB_OPT_UINTPTR, &thread,
2863 	    'e', MDB_OPT_UINT64, &earliest,
2864 	    'l', MDB_OPT_UINT64, &latest,
2865 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2866 		return (DCMD_USAGE);
2867 
2868 	if (!(flags & DCMD_ADDRSPEC))
2869 		return (DCMD_USAGE);
2870 
2871 	if (in_history && !history)
2872 		return (DCMD_USAGE);
2873 
2874 	if (history && !in_history) {
2875 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2876 		    UM_SLEEP | UM_GC);
2877 		bufctl_history_cb_t bhc;
2878 
2879 		nargv[0].a_type = MDB_TYPE_STRING;
2880 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2881 
2882 		for (i = 0; i < argc; i++)
2883 			nargv[i + 1] = argv[i];
2884 
2885 		/*
2886 		 * When in history mode, we treat each element as if it
2887 		 * were in a seperate loop, so that the headers group
2888 		 * bufctls with similar histories.
2889 		 */
2890 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2891 		bhc.bhc_argc = argc + 1;
2892 		bhc.bhc_argv = nargv;
2893 		bhc.bhc_ret = DCMD_OK;
2894 
2895 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2896 		    addr) == -1) {
2897 			mdb_warn("unable to walk bufctl_history");
2898 			return (DCMD_ERR);
2899 		}
2900 
2901 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2902 			mdb_printf("\n");
2903 
2904 		return (bhc.bhc_ret);
2905 	}
2906 
2907 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2908 		if (verbose) {
2909 			mdb_printf("%16s %16s %16s %16s\n"
2910 			    "%<u>%16s %16s %16s %16s%</u>\n",
2911 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2912 			    "", "CACHE", "LASTLOG", "CONTENTS");
2913 		} else {
2914 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2915 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2916 		}
2917 	}
2918 
2919 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2920 		mdb_warn("couldn't read bufctl at %p", addr);
2921 		return (DCMD_ERR);
2922 	}
2923 
2924 	/*
2925 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2926 	 * the address does not really refer to a bufctl.
2927 	 */
2928 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2929 
2930 	if (caller != NULL) {
2931 		laddr = caller;
2932 		haddr = caller + sizeof (caller);
2933 
2934 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2935 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2936 			/*
2937 			 * We were provided an exact symbol value; any
2938 			 * address in the function is valid.
2939 			 */
2940 			laddr = (uintptr_t)sym.st_value;
2941 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2942 		}
2943 
2944 		for (i = 0; i < depth; i++)
2945 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2946 				break;
2947 
2948 		if (i == depth)
2949 			return (DCMD_OK);
2950 	}
2951 
2952 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2953 		return (DCMD_OK);
2954 
2955 	if (earliest != 0 && bc.bc_timestamp < earliest)
2956 		return (DCMD_OK);
2957 
2958 	if (latest != 0 && bc.bc_timestamp > latest)
2959 		return (DCMD_OK);
2960 
2961 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2962 		return (DCMD_OK);
2963 
2964 	if (flags & DCMD_PIPE_OUT) {
2965 		mdb_printf("%#lr\n", addr);
2966 		return (DCMD_OK);
2967 	}
2968 
2969 	if (verbose) {
2970 		mdb_printf(
2971 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2972 		    "%16s %16p %16p %16p\n",
2973 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2974 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2975 
2976 		mdb_inc_indent(17);
2977 		for (i = 0; i < depth; i++)
2978 			mdb_printf("%a\n", bc.bc_stack[i]);
2979 		mdb_dec_indent(17);
2980 		mdb_printf("\n");
2981 	} else {
2982 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2983 		    bc.bc_timestamp, bc.bc_thread);
2984 
2985 		for (i = 0; i < depth; i++) {
2986 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2987 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2988 				continue;
2989 			if (strncmp(c, "kmem_", 5) == 0)
2990 				continue;
2991 			mdb_printf(" %a\n", bc.bc_stack[i]);
2992 			break;
2993 		}
2994 
2995 		if (i >= depth)
2996 			mdb_printf("\n");
2997 	}
2998 
2999 	return (DCMD_OK);
3000 }
3001 
3002 typedef struct kmem_verify {
3003 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
3004 	size_t kmv_size;		/* number of bytes in kmv_buf */
3005 	int kmv_corruption;		/* > 0 if corruption found. */
3006 	int kmv_besilent;		/* report actual corruption sites */
3007 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
3008 } kmem_verify_t;
3009 
3010 /*
3011  * verify_pattern()
3012  * 	verify that buf is filled with the pattern pat.
3013  */
3014 static int64_t
3015 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3016 {
3017 	/*LINTED*/
3018 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3019 	uint64_t *buf;
3020 
3021 	for (buf = buf_arg; buf < bufend; buf++)
3022 		if (*buf != pat)
3023 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3024 	return (-1);
3025 }
3026 
3027 /*
3028  * verify_buftag()
3029  *	verify that btp->bt_bxstat == (bcp ^ pat)
3030  */
3031 static int
3032 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3033 {
3034 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3035 }
3036 
3037 /*
3038  * verify_free()
3039  * 	verify the integrity of a free block of memory by checking
3040  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3041  */
3042 /*ARGSUSED1*/
3043 static int
3044 verify_free(uintptr_t addr, const void *data, void *private)
3045 {
3046 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3047 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3048 	int64_t corrupt;		/* corruption offset */
3049 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3050 	kmem_cache_t *cp = &kmv->kmv_cache;
3051 	int besilent = kmv->kmv_besilent;
3052 
3053 	/*LINTED*/
3054 	buftagp = KMEM_BUFTAG(cp, buf);
3055 
3056 	/*
3057 	 * Read the buffer to check.
3058 	 */
3059 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3060 		if (!besilent)
3061 			mdb_warn("couldn't read %p", addr);
3062 		return (WALK_NEXT);
3063 	}
3064 
3065 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3066 	    KMEM_FREE_PATTERN)) >= 0) {
3067 		if (!besilent)
3068 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3069 			    addr, (uintptr_t)addr + corrupt);
3070 		goto corrupt;
3071 	}
3072 	/*
3073 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3074 	 * the first bytes of the buffer, hence we cannot check for red
3075 	 * zone corruption.
3076 	 */
3077 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3078 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3079 		if (!besilent)
3080 			mdb_printf("buffer %p (free) seems to "
3081 			    "have a corrupt redzone pattern\n", addr);
3082 		goto corrupt;
3083 	}
3084 
3085 	/*
3086 	 * confirm bufctl pointer integrity.
3087 	 */
3088 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3089 		if (!besilent)
3090 			mdb_printf("buffer %p (free) has a corrupt "
3091 			    "buftag\n", addr);
3092 		goto corrupt;
3093 	}
3094 
3095 	return (WALK_NEXT);
3096 corrupt:
3097 	kmv->kmv_corruption++;
3098 	return (WALK_NEXT);
3099 }
3100 
3101 /*
3102  * verify_alloc()
3103  * 	Verify that the buftag of an allocated buffer makes sense with respect
3104  * 	to the buffer.
3105  */
3106 /*ARGSUSED1*/
3107 static int
3108 verify_alloc(uintptr_t addr, const void *data, void *private)
3109 {
3110 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3111 	kmem_cache_t *cp = &kmv->kmv_cache;
3112 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3113 	/*LINTED*/
3114 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3115 	uint32_t *ip = (uint32_t *)buftagp;
3116 	uint8_t *bp = (uint8_t *)buf;
3117 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3118 	int besilent = kmv->kmv_besilent;
3119 
3120 	/*
3121 	 * Read the buffer to check.
3122 	 */
3123 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3124 		if (!besilent)
3125 			mdb_warn("couldn't read %p", addr);
3126 		return (WALK_NEXT);
3127 	}
3128 
3129 	/*
3130 	 * There are two cases to handle:
3131 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3132 	 *    0xfeedfacefeedface at the end of it
3133 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3134 	 *    0xbb just past the end of the region in use.  At the buftag,
3135 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3136 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3137 	 *    endianness), followed by 32 bits containing the offset of the
3138 	 *    0xbb byte in the buffer.
3139 	 *
3140 	 * Finally, the two 32-bit words that comprise the second half of the
3141 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3142 	 */
3143 
3144 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3145 		looks_ok = 1;
3146 	else if (!KMEM_SIZE_VALID(ip[1]))
3147 		size_ok = 0;
3148 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3149 		looks_ok = 1;
3150 	else
3151 		size_ok = 0;
3152 
3153 	if (!size_ok) {
3154 		if (!besilent)
3155 			mdb_printf("buffer %p (allocated) has a corrupt "
3156 			    "redzone size encoding\n", addr);
3157 		goto corrupt;
3158 	}
3159 
3160 	if (!looks_ok) {
3161 		if (!besilent)
3162 			mdb_printf("buffer %p (allocated) has a corrupt "
3163 			    "redzone signature\n", addr);
3164 		goto corrupt;
3165 	}
3166 
3167 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3168 		if (!besilent)
3169 			mdb_printf("buffer %p (allocated) has a "
3170 			    "corrupt buftag\n", addr);
3171 		goto corrupt;
3172 	}
3173 
3174 	return (WALK_NEXT);
3175 corrupt:
3176 	kmv->kmv_corruption++;
3177 	return (WALK_NEXT);
3178 }
3179 
3180 /*ARGSUSED2*/
3181 int
3182 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3183 {
3184 	if (flags & DCMD_ADDRSPEC) {
3185 		int check_alloc = 0, check_free = 0;
3186 		kmem_verify_t kmv;
3187 
3188 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3189 		    addr) == -1) {
3190 			mdb_warn("couldn't read kmem_cache %p", addr);
3191 			return (DCMD_ERR);
3192 		}
3193 
3194 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3195 		    sizeof (kmem_buftag_t);
3196 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3197 		kmv.kmv_corruption = 0;
3198 
3199 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3200 			check_alloc = 1;
3201 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3202 				check_free = 1;
3203 		} else {
3204 			if (!(flags & DCMD_LOOP)) {
3205 				mdb_warn("cache %p (%s) does not have "
3206 				    "redzone checking enabled\n", addr,
3207 				    kmv.kmv_cache.cache_name);
3208 			}
3209 			return (DCMD_ERR);
3210 		}
3211 
3212 		if (flags & DCMD_LOOP) {
3213 			/*
3214 			 * table mode, don't print out every corrupt buffer
3215 			 */
3216 			kmv.kmv_besilent = 1;
3217 		} else {
3218 			mdb_printf("Summary for cache '%s'\n",
3219 			    kmv.kmv_cache.cache_name);
3220 			mdb_inc_indent(2);
3221 			kmv.kmv_besilent = 0;
3222 		}
3223 
3224 		if (check_alloc)
3225 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3226 		if (check_free)
3227 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3228 
3229 		if (flags & DCMD_LOOP) {
3230 			if (kmv.kmv_corruption == 0) {
3231 				mdb_printf("%-*s %?p clean\n",
3232 				    KMEM_CACHE_NAMELEN,
3233 				    kmv.kmv_cache.cache_name, addr);
3234 			} else {
3235 				char *s = "";	/* optional s in "buffer[s]" */
3236 				if (kmv.kmv_corruption > 1)
3237 					s = "s";
3238 
3239 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3240 				    KMEM_CACHE_NAMELEN,
3241 				    kmv.kmv_cache.cache_name, addr,
3242 				    kmv.kmv_corruption, s);
3243 			}
3244 		} else {
3245 			/*
3246 			 * This is the more verbose mode, when the user has
3247 			 * type addr::kmem_verify.  If the cache was clean,
3248 			 * nothing will have yet been printed. So say something.
3249 			 */
3250 			if (kmv.kmv_corruption == 0)
3251 				mdb_printf("clean\n");
3252 
3253 			mdb_dec_indent(2);
3254 		}
3255 	} else {
3256 		/*
3257 		 * If the user didn't specify a cache to verify, we'll walk all
3258 		 * kmem_cache's, specifying ourself as a callback for each...
3259 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3260 		 */
3261 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3262 		    "Cache Name", "Addr", "Cache Integrity");
3263 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3264 	}
3265 
3266 	return (DCMD_OK);
3267 }
3268 
3269 typedef struct vmem_node {
3270 	struct vmem_node *vn_next;
3271 	struct vmem_node *vn_parent;
3272 	struct vmem_node *vn_sibling;
3273 	struct vmem_node *vn_children;
3274 	uintptr_t vn_addr;
3275 	int vn_marked;
3276 	vmem_t vn_vmem;
3277 } vmem_node_t;
3278 
3279 typedef struct vmem_walk {
3280 	vmem_node_t *vw_root;
3281 	vmem_node_t *vw_current;
3282 } vmem_walk_t;
3283 
3284 int
3285 vmem_walk_init(mdb_walk_state_t *wsp)
3286 {
3287 	uintptr_t vaddr, paddr;
3288 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3289 	vmem_walk_t *vw;
3290 
3291 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3292 		mdb_warn("couldn't read 'vmem_list'");
3293 		return (WALK_ERR);
3294 	}
3295 
3296 	while (vaddr != NULL) {
3297 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3298 		vp->vn_addr = vaddr;
3299 		vp->vn_next = head;
3300 		head = vp;
3301 
3302 		if (vaddr == wsp->walk_addr)
3303 			current = vp;
3304 
3305 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3306 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3307 			goto err;
3308 		}
3309 
3310 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3311 	}
3312 
3313 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3314 
3315 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3316 			vp->vn_sibling = root;
3317 			root = vp;
3318 			continue;
3319 		}
3320 
3321 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3322 			if (parent->vn_addr != paddr)
3323 				continue;
3324 			vp->vn_sibling = parent->vn_children;
3325 			parent->vn_children = vp;
3326 			vp->vn_parent = parent;
3327 			break;
3328 		}
3329 
3330 		if (parent == NULL) {
3331 			mdb_warn("couldn't find %p's parent (%p)\n",
3332 			    vp->vn_addr, paddr);
3333 			goto err;
3334 		}
3335 	}
3336 
3337 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3338 	vw->vw_root = root;
3339 
3340 	if (current != NULL)
3341 		vw->vw_current = current;
3342 	else
3343 		vw->vw_current = root;
3344 
3345 	wsp->walk_data = vw;
3346 	return (WALK_NEXT);
3347 err:
3348 	for (vp = head; head != NULL; vp = head) {
3349 		head = vp->vn_next;
3350 		mdb_free(vp, sizeof (vmem_node_t));
3351 	}
3352 
3353 	return (WALK_ERR);
3354 }
3355 
3356 int
3357 vmem_walk_step(mdb_walk_state_t *wsp)
3358 {
3359 	vmem_walk_t *vw = wsp->walk_data;
3360 	vmem_node_t *vp;
3361 	int rval;
3362 
3363 	if ((vp = vw->vw_current) == NULL)
3364 		return (WALK_DONE);
3365 
3366 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3367 
3368 	if (vp->vn_children != NULL) {
3369 		vw->vw_current = vp->vn_children;
3370 		return (rval);
3371 	}
3372 
3373 	do {
3374 		vw->vw_current = vp->vn_sibling;
3375 		vp = vp->vn_parent;
3376 	} while (vw->vw_current == NULL && vp != NULL);
3377 
3378 	return (rval);
3379 }
3380 
3381 /*
3382  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3383  * children are visited before their parent.  We perform the postfix walk
3384  * iteratively (rather than recursively) to allow mdb to regain control
3385  * after each callback.
3386  */
3387 int
3388 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3389 {
3390 	vmem_walk_t *vw = wsp->walk_data;
3391 	vmem_node_t *vp = vw->vw_current;
3392 	int rval;
3393 
3394 	/*
3395 	 * If this node is marked, then we know that we have already visited
3396 	 * all of its children.  If the node has any siblings, they need to
3397 	 * be visited next; otherwise, we need to visit the parent.  Note
3398 	 * that vp->vn_marked will only be zero on the first invocation of
3399 	 * the step function.
3400 	 */
3401 	if (vp->vn_marked) {
3402 		if (vp->vn_sibling != NULL)
3403 			vp = vp->vn_sibling;
3404 		else if (vp->vn_parent != NULL)
3405 			vp = vp->vn_parent;
3406 		else {
3407 			/*
3408 			 * We have neither a parent, nor a sibling, and we
3409 			 * have already been visited; we're done.
3410 			 */
3411 			return (WALK_DONE);
3412 		}
3413 	}
3414 
3415 	/*
3416 	 * Before we visit this node, visit its children.
3417 	 */
3418 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3419 		vp = vp->vn_children;
3420 
3421 	vp->vn_marked = 1;
3422 	vw->vw_current = vp;
3423 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3424 
3425 	return (rval);
3426 }
3427 
3428 void
3429 vmem_walk_fini(mdb_walk_state_t *wsp)
3430 {
3431 	vmem_walk_t *vw = wsp->walk_data;
3432 	vmem_node_t *root = vw->vw_root;
3433 	int done;
3434 
3435 	if (root == NULL)
3436 		return;
3437 
3438 	if ((vw->vw_root = root->vn_children) != NULL)
3439 		vmem_walk_fini(wsp);
3440 
3441 	vw->vw_root = root->vn_sibling;
3442 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3443 	mdb_free(root, sizeof (vmem_node_t));
3444 
3445 	if (done) {
3446 		mdb_free(vw, sizeof (vmem_walk_t));
3447 	} else {
3448 		vmem_walk_fini(wsp);
3449 	}
3450 }
3451 
3452 typedef struct vmem_seg_walk {
3453 	uint8_t vsw_type;
3454 	uintptr_t vsw_start;
3455 	uintptr_t vsw_current;
3456 } vmem_seg_walk_t;
3457 
3458 /*ARGSUSED*/
3459 int
3460 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3461 {
3462 	vmem_seg_walk_t *vsw;
3463 
3464 	if (wsp->walk_addr == NULL) {
3465 		mdb_warn("vmem_%s does not support global walks\n", name);
3466 		return (WALK_ERR);
3467 	}
3468 
3469 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3470 
3471 	vsw->vsw_type = type;
3472 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3473 	vsw->vsw_current = vsw->vsw_start;
3474 
3475 	return (WALK_NEXT);
3476 }
3477 
3478 /*
3479  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3480  */
3481 #define	VMEM_NONE	0
3482 
3483 int
3484 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3485 {
3486 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3487 }
3488 
3489 int
3490 vmem_free_walk_init(mdb_walk_state_t *wsp)
3491 {
3492 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3493 }
3494 
3495 int
3496 vmem_span_walk_init(mdb_walk_state_t *wsp)
3497 {
3498 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3499 }
3500 
3501 int
3502 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3503 {
3504 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3505 }
3506 
3507 int
3508 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3509 {
3510 	vmem_seg_t seg;
3511 	vmem_seg_walk_t *vsw = wsp->walk_data;
3512 	uintptr_t addr = vsw->vsw_current;
3513 	static size_t seg_size = 0;
3514 	int rval;
3515 
3516 	if (!seg_size) {
3517 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3518 			mdb_warn("failed to read 'vmem_seg_size'");
3519 			seg_size = sizeof (vmem_seg_t);
3520 		}
3521 	}
3522 
3523 	if (seg_size < sizeof (seg))
3524 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3525 
3526 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3527 		mdb_warn("couldn't read vmem_seg at %p", addr);
3528 		return (WALK_ERR);
3529 	}
3530 
3531 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3532 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3533 		rval = WALK_NEXT;
3534 	} else {
3535 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3536 	}
3537 
3538 	if (vsw->vsw_current == vsw->vsw_start)
3539 		return (WALK_DONE);
3540 
3541 	return (rval);
3542 }
3543 
3544 void
3545 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3546 {
3547 	vmem_seg_walk_t *vsw = wsp->walk_data;
3548 
3549 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3550 }
3551 
3552 #define	VMEM_NAMEWIDTH	22
3553 
3554 int
3555 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3556 {
3557 	vmem_t v, parent;
3558 	vmem_kstat_t *vkp = &v.vm_kstat;
3559 	uintptr_t paddr;
3560 	int ident = 0;
3561 	char c[VMEM_NAMEWIDTH];
3562 
3563 	if (!(flags & DCMD_ADDRSPEC)) {
3564 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3565 			mdb_warn("can't walk vmem");
3566 			return (DCMD_ERR);
3567 		}
3568 		return (DCMD_OK);
3569 	}
3570 
3571 	if (DCMD_HDRSPEC(flags))
3572 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3573 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3574 		    "TOTAL", "SUCCEED", "FAIL");
3575 
3576 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3577 		mdb_warn("couldn't read vmem at %p", addr);
3578 		return (DCMD_ERR);
3579 	}
3580 
3581 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3582 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3583 			mdb_warn("couldn't trace %p's ancestry", addr);
3584 			ident = 0;
3585 			break;
3586 		}
3587 		paddr = (uintptr_t)parent.vm_source;
3588 	}
3589 
3590 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3591 
3592 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3593 	    addr, VMEM_NAMEWIDTH, c,
3594 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3595 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3596 
3597 	return (DCMD_OK);
3598 }
3599 
3600 void
3601 vmem_seg_help(void)
3602 {
3603 	mdb_printf("%s",
3604 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3605 "\n"
3606 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3607 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3608 "information.\n");
3609 	mdb_dec_indent(2);
3610 	mdb_printf("%<b>OPTIONS%</b>\n");
3611 	mdb_inc_indent(2);
3612 	mdb_printf("%s",
3613 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3614 "  -s    report the size of the segment, instead of the end address\n"
3615 "  -c caller\n"
3616 "        filter out segments without the function/PC in their stack trace\n"
3617 "  -e earliest\n"
3618 "        filter out segments timestamped before earliest\n"
3619 "  -l latest\n"
3620 "        filter out segments timestamped after latest\n"
3621 "  -m minsize\n"
3622 "        filer out segments smaller than minsize\n"
3623 "  -M maxsize\n"
3624 "        filer out segments larger than maxsize\n"
3625 "  -t thread\n"
3626 "        filter out segments not involving thread\n"
3627 "  -T type\n"
3628 "        filter out segments not of type 'type'\n"
3629 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3630 }
3631 
3632 /*ARGSUSED*/
3633 int
3634 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3635 {
3636 	vmem_seg_t vs;
3637 	pc_t *stk = vs.vs_stack;
3638 	uintptr_t sz;
3639 	uint8_t t;
3640 	const char *type = NULL;
3641 	GElf_Sym sym;
3642 	char c[MDB_SYM_NAMLEN];
3643 	int no_debug;
3644 	int i;
3645 	int depth;
3646 	uintptr_t laddr, haddr;
3647 
3648 	uintptr_t caller = NULL, thread = NULL;
3649 	uintptr_t minsize = 0, maxsize = 0;
3650 
3651 	hrtime_t earliest = 0, latest = 0;
3652 
3653 	uint_t size = 0;
3654 	uint_t verbose = 0;
3655 
3656 	if (!(flags & DCMD_ADDRSPEC))
3657 		return (DCMD_USAGE);
3658 
3659 	if (mdb_getopts(argc, argv,
3660 	    'c', MDB_OPT_UINTPTR, &caller,
3661 	    'e', MDB_OPT_UINT64, &earliest,
3662 	    'l', MDB_OPT_UINT64, &latest,
3663 	    's', MDB_OPT_SETBITS, TRUE, &size,
3664 	    'm', MDB_OPT_UINTPTR, &minsize,
3665 	    'M', MDB_OPT_UINTPTR, &maxsize,
3666 	    't', MDB_OPT_UINTPTR, &thread,
3667 	    'T', MDB_OPT_STR, &type,
3668 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3669 	    NULL) != argc)
3670 		return (DCMD_USAGE);
3671 
3672 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3673 		if (verbose) {
3674 			mdb_printf("%16s %4s %16s %16s %16s\n"
3675 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3676 			    "ADDR", "TYPE", "START", "END", "SIZE",
3677 			    "", "", "THREAD", "TIMESTAMP", "");
3678 		} else {
3679 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3680 			    "START", size? "SIZE" : "END", "WHO");
3681 		}
3682 	}
3683 
3684 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3685 		mdb_warn("couldn't read vmem_seg at %p", addr);
3686 		return (DCMD_ERR);
3687 	}
3688 
3689 	if (type != NULL) {
3690 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3691 			t = VMEM_ALLOC;
3692 		else if (strcmp(type, "FREE") == 0)
3693 			t = VMEM_FREE;
3694 		else if (strcmp(type, "SPAN") == 0)
3695 			t = VMEM_SPAN;
3696 		else if (strcmp(type, "ROTR") == 0 ||
3697 		    strcmp(type, "ROTOR") == 0)
3698 			t = VMEM_ROTOR;
3699 		else if (strcmp(type, "WLKR") == 0 ||
3700 		    strcmp(type, "WALKER") == 0)
3701 			t = VMEM_WALKER;
3702 		else {
3703 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3704 			    type);
3705 			return (DCMD_ERR);
3706 		}
3707 
3708 		if (vs.vs_type != t)
3709 			return (DCMD_OK);
3710 	}
3711 
3712 	sz = vs.vs_end - vs.vs_start;
3713 
3714 	if (minsize != 0 && sz < minsize)
3715 		return (DCMD_OK);
3716 
3717 	if (maxsize != 0 && sz > maxsize)
3718 		return (DCMD_OK);
3719 
3720 	t = vs.vs_type;
3721 	depth = vs.vs_depth;
3722 
3723 	/*
3724 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3725 	 */
3726 	no_debug = (t != VMEM_ALLOC) ||
3727 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3728 
3729 	if (no_debug) {
3730 		if (caller != NULL || thread != NULL || earliest != 0 ||
3731 		    latest != 0)
3732 			return (DCMD_OK);		/* not enough info */
3733 	} else {
3734 		if (caller != NULL) {
3735 			laddr = caller;
3736 			haddr = caller + sizeof (caller);
3737 
3738 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3739 			    sizeof (c), &sym) != -1 &&
3740 			    caller == (uintptr_t)sym.st_value) {
3741 				/*
3742 				 * We were provided an exact symbol value; any
3743 				 * address in the function is valid.
3744 				 */
3745 				laddr = (uintptr_t)sym.st_value;
3746 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3747 			}
3748 
3749 			for (i = 0; i < depth; i++)
3750 				if (vs.vs_stack[i] >= laddr &&
3751 				    vs.vs_stack[i] < haddr)
3752 					break;
3753 
3754 			if (i == depth)
3755 				return (DCMD_OK);
3756 		}
3757 
3758 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3759 			return (DCMD_OK);
3760 
3761 		if (earliest != 0 && vs.vs_timestamp < earliest)
3762 			return (DCMD_OK);
3763 
3764 		if (latest != 0 && vs.vs_timestamp > latest)
3765 			return (DCMD_OK);
3766 	}
3767 
3768 	type = (t == VMEM_ALLOC ? "ALLC" :
3769 	    t == VMEM_FREE ? "FREE" :
3770 	    t == VMEM_SPAN ? "SPAN" :
3771 	    t == VMEM_ROTOR ? "ROTR" :
3772 	    t == VMEM_WALKER ? "WLKR" :
3773 	    "????");
3774 
3775 	if (flags & DCMD_PIPE_OUT) {
3776 		mdb_printf("%#lr\n", addr);
3777 		return (DCMD_OK);
3778 	}
3779 
3780 	if (verbose) {
3781 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3782 		    addr, type, vs.vs_start, vs.vs_end, sz);
3783 
3784 		if (no_debug)
3785 			return (DCMD_OK);
3786 
3787 		mdb_printf("%16s %4s %16p %16llx\n",
3788 		    "", "", vs.vs_thread, vs.vs_timestamp);
3789 
3790 		mdb_inc_indent(17);
3791 		for (i = 0; i < depth; i++) {
3792 			mdb_printf("%a\n", stk[i]);
3793 		}
3794 		mdb_dec_indent(17);
3795 		mdb_printf("\n");
3796 	} else {
3797 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3798 		    vs.vs_start, size? sz : vs.vs_end);
3799 
3800 		if (no_debug) {
3801 			mdb_printf("\n");
3802 			return (DCMD_OK);
3803 		}
3804 
3805 		for (i = 0; i < depth; i++) {
3806 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3807 			    c, sizeof (c), &sym) == -1)
3808 				continue;
3809 			if (strncmp(c, "vmem_", 5) == 0)
3810 				continue;
3811 			break;
3812 		}
3813 		mdb_printf(" %a\n", stk[i]);
3814 	}
3815 	return (DCMD_OK);
3816 }
3817 
3818 typedef struct kmalog_data {
3819 	uintptr_t	kma_addr;
3820 	hrtime_t	kma_newest;
3821 } kmalog_data_t;
3822 
3823 /*ARGSUSED*/
3824 static int
3825 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3826 {
3827 	char name[KMEM_CACHE_NAMELEN + 1];
3828 	hrtime_t delta;
3829 	int i, depth;
3830 	size_t bufsize;
3831 
3832 	if (bcp->bc_timestamp == 0)
3833 		return (WALK_DONE);
3834 
3835 	if (kma->kma_newest == 0)
3836 		kma->kma_newest = bcp->bc_timestamp;
3837 
3838 	if (kma->kma_addr) {
3839 		if (mdb_vread(&bufsize, sizeof (bufsize),
3840 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3841 			mdb_warn(
3842 			    "failed to read cache_bufsize for cache at %p",
3843 			    bcp->bc_cache);
3844 			return (WALK_ERR);
3845 		}
3846 
3847 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3848 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3849 			return (WALK_NEXT);
3850 	}
3851 
3852 	delta = kma->kma_newest - bcp->bc_timestamp;
3853 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3854 
3855 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3856 	    &bcp->bc_cache->cache_name) <= 0)
3857 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3858 
3859 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3860 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3861 
3862 	for (i = 0; i < depth; i++)
3863 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3864 
3865 	return (WALK_NEXT);
3866 }
3867 
3868 int
3869 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3870 {
3871 	const char *logname = "kmem_transaction_log";
3872 	kmalog_data_t kma;
3873 
3874 	if (argc > 1)
3875 		return (DCMD_USAGE);
3876 
3877 	kma.kma_newest = 0;
3878 	if (flags & DCMD_ADDRSPEC)
3879 		kma.kma_addr = addr;
3880 	else
3881 		kma.kma_addr = NULL;
3882 
3883 	if (argc > 0) {
3884 		if (argv->a_type != MDB_TYPE_STRING)
3885 			return (DCMD_USAGE);
3886 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3887 			logname = "kmem_failure_log";
3888 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3889 			logname = "kmem_slab_log";
3890 		else
3891 			return (DCMD_USAGE);
3892 	}
3893 
3894 	if (mdb_readvar(&addr, logname) == -1) {
3895 		mdb_warn("failed to read %s log header pointer");
3896 		return (DCMD_ERR);
3897 	}
3898 
3899 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3900 		mdb_warn("failed to walk kmem log");
3901 		return (DCMD_ERR);
3902 	}
3903 
3904 	return (DCMD_OK);
3905 }
3906 
3907 /*
3908  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3909  * The first piece is a structure which we use to accumulate kmem_cache_t
3910  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3911  * walker; we either add all caches, or ones named explicitly as arguments.
3912  */
3913 
3914 typedef struct kmclist {
3915 	const char *kmc_name;			/* Name to match (or NULL) */
3916 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3917 	int kmc_nelems;				/* Num entries in kmc_caches */
3918 	int kmc_size;				/* Size of kmc_caches array */
3919 } kmclist_t;
3920 
3921 static int
3922 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3923 {
3924 	void *p;
3925 	int s;
3926 
3927 	if (kmc->kmc_name == NULL ||
3928 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3929 		/*
3930 		 * If we have a match, grow our array (if necessary), and then
3931 		 * add the virtual address of the matching cache to our list.
3932 		 */
3933 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3934 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3935 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3936 
3937 			bcopy(kmc->kmc_caches, p,
3938 			    sizeof (uintptr_t) * kmc->kmc_size);
3939 
3940 			kmc->kmc_caches = p;
3941 			kmc->kmc_size = s;
3942 		}
3943 
3944 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3945 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3946 	}
3947 
3948 	return (WALK_NEXT);
3949 }
3950 
3951 /*
3952  * The second piece of ::kmausers is a hash table of allocations.  Each
3953  * allocation owner is identified by its stack trace and data_size.  We then
3954  * track the total bytes of all such allocations, and the number of allocations
3955  * to report at the end.  Once we have a list of caches, we walk through the
3956  * allocated bufctls of each, and update our hash table accordingly.
3957  */
3958 
3959 typedef struct kmowner {
3960 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3961 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3962 	size_t kmo_signature;			/* Hash table signature */
3963 	uint_t kmo_num;				/* Number of allocations */
3964 	size_t kmo_data_size;			/* Size of each allocation */
3965 	size_t kmo_total_size;			/* Total bytes of allocation */
3966 	int kmo_depth;				/* Depth of stack trace */
3967 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3968 } kmowner_t;
3969 
3970 typedef struct kmusers {
3971 	uintptr_t kmu_addr;			/* address of interest */
3972 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3973 	kmowner_t *kmu_hash;			/* Hash table of owners */
3974 	int kmu_nelems;				/* Number of entries in use */
3975 	int kmu_size;				/* Total number of entries */
3976 } kmusers_t;
3977 
3978 static void
3979 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3980     size_t size, size_t data_size)
3981 {
3982 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3983 	size_t bucket, signature = data_size;
3984 	kmowner_t *kmo, *kmoend;
3985 
3986 	/*
3987 	 * If the hash table is full, double its size and rehash everything.
3988 	 */
3989 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3990 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3991 
3992 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3993 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3994 		kmu->kmu_hash = kmo;
3995 		kmu->kmu_size = s;
3996 
3997 		kmoend = kmu->kmu_hash + kmu->kmu_size;
3998 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3999 			kmo->kmo_head = NULL;
4000 
4001 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4002 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4003 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4004 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4005 			kmu->kmu_hash[bucket].kmo_head = kmo;
4006 		}
4007 	}
4008 
4009 	/*
4010 	 * Finish computing the hash signature from the stack trace, and then
4011 	 * see if the owner is in the hash table.  If so, update our stats.
4012 	 */
4013 	for (i = 0; i < depth; i++)
4014 		signature += bcp->bc_stack[i];
4015 
4016 	bucket = signature & (kmu->kmu_size - 1);
4017 
4018 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4019 		if (kmo->kmo_signature == signature) {
4020 			size_t difference = 0;
4021 
4022 			difference |= kmo->kmo_data_size - data_size;
4023 			difference |= kmo->kmo_depth - depth;
4024 
4025 			for (i = 0; i < depth; i++) {
4026 				difference |= kmo->kmo_stack[i] -
4027 				    bcp->bc_stack[i];
4028 			}
4029 
4030 			if (difference == 0) {
4031 				kmo->kmo_total_size += size;
4032 				kmo->kmo_num++;
4033 				return;
4034 			}
4035 		}
4036 	}
4037 
4038 	/*
4039 	 * If the owner is not yet hashed, grab the next element and fill it
4040 	 * in based on the allocation information.
4041 	 */
4042 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4043 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4044 	kmu->kmu_hash[bucket].kmo_head = kmo;
4045 
4046 	kmo->kmo_signature = signature;
4047 	kmo->kmo_num = 1;
4048 	kmo->kmo_data_size = data_size;
4049 	kmo->kmo_total_size = size;
4050 	kmo->kmo_depth = depth;
4051 
4052 	for (i = 0; i < depth; i++)
4053 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4054 }
4055 
4056 /*
4057  * When ::kmausers is invoked without the -f flag, we simply update our hash
4058  * table with the information from each allocated bufctl.
4059  */
4060 /*ARGSUSED*/
4061 static int
4062 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4063 {
4064 	const kmem_cache_t *cp = kmu->kmu_cache;
4065 
4066 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4067 	return (WALK_NEXT);
4068 }
4069 
4070 /*
4071  * When ::kmausers is invoked with the -f flag, we print out the information
4072  * for each bufctl as well as updating the hash table.
4073  */
4074 static int
4075 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4076 {
4077 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4078 	const kmem_cache_t *cp = kmu->kmu_cache;
4079 	kmem_bufctl_t bufctl;
4080 
4081 	if (kmu->kmu_addr) {
4082 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4083 			mdb_warn("couldn't read bufctl at %p", addr);
4084 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4085 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4086 		    cp->cache_bufsize)
4087 			return (WALK_NEXT);
4088 	}
4089 
4090 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4091 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4092 
4093 	for (i = 0; i < depth; i++)
4094 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4095 
4096 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4097 	return (WALK_NEXT);
4098 }
4099 
4100 /*
4101  * We sort our results by allocation size before printing them.
4102  */
4103 static int
4104 kmownercmp(const void *lp, const void *rp)
4105 {
4106 	const kmowner_t *lhs = lp;
4107 	const kmowner_t *rhs = rp;
4108 
4109 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4110 }
4111 
4112 /*
4113  * The main engine of ::kmausers is relatively straightforward: First we
4114  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4115  * iterate over the allocated bufctls of each cache in the list.  Finally,
4116  * we sort and print our results.
4117  */
4118 /*ARGSUSED*/
4119 int
4120 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4121 {
4122 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4123 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4124 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4125 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4126 	int opt_e = FALSE;		/* Include "small" users */
4127 	int opt_f = FALSE;		/* Print stack traces */
4128 
4129 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4130 	kmowner_t *kmo, *kmoend;
4131 	int i, oelems;
4132 
4133 	kmclist_t kmc;
4134 	kmusers_t kmu;
4135 
4136 	bzero(&kmc, sizeof (kmc));
4137 	bzero(&kmu, sizeof (kmu));
4138 
4139 	while ((i = mdb_getopts(argc, argv,
4140 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4141 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4142 
4143 		argv += i;	/* skip past options we just processed */
4144 		argc -= i;	/* adjust argc */
4145 
4146 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4147 			return (DCMD_USAGE);
4148 
4149 		oelems = kmc.kmc_nelems;
4150 		kmc.kmc_name = argv->a_un.a_str;
4151 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4152 
4153 		if (kmc.kmc_nelems == oelems) {
4154 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4155 			return (DCMD_ERR);
4156 		}
4157 
4158 		do_all_caches = 0;
4159 		argv++;
4160 		argc--;
4161 	}
4162 
4163 	if (flags & DCMD_ADDRSPEC) {
4164 		opt_f = TRUE;
4165 		kmu.kmu_addr = addr;
4166 	} else {
4167 		kmu.kmu_addr = NULL;
4168 	}
4169 
4170 	if (opt_e)
4171 		mem_threshold = cnt_threshold = 0;
4172 
4173 	if (opt_f)
4174 		callback = (mdb_walk_cb_t)kmause2;
4175 
4176 	if (do_all_caches) {
4177 		kmc.kmc_name = NULL; /* match all cache names */
4178 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4179 	}
4180 
4181 	for (i = 0; i < kmc.kmc_nelems; i++) {
4182 		uintptr_t cp = kmc.kmc_caches[i];
4183 		kmem_cache_t c;
4184 
4185 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4186 			mdb_warn("failed to read cache at %p", cp);
4187 			continue;
4188 		}
4189 
4190 		if (!(c.cache_flags & KMF_AUDIT)) {
4191 			if (!do_all_caches) {
4192 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4193 				    c.cache_name);
4194 			}
4195 			continue;
4196 		}
4197 
4198 		kmu.kmu_cache = &c;
4199 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4200 		audited_caches++;
4201 	}
4202 
4203 	if (audited_caches == 0 && do_all_caches) {
4204 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4205 		return (DCMD_ERR);
4206 	}
4207 
4208 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4209 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4210 
4211 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4212 		if (kmo->kmo_total_size < mem_threshold &&
4213 		    kmo->kmo_num < cnt_threshold)
4214 			continue;
4215 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4216 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4217 		for (i = 0; i < kmo->kmo_depth; i++)
4218 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4219 	}
4220 
4221 	return (DCMD_OK);
4222 }
4223 
4224 void
4225 kmausers_help(void)
4226 {
4227 	mdb_printf(
4228 	    "Displays the largest users of the kmem allocator, sorted by \n"
4229 	    "trace.  If one or more caches is specified, only those caches\n"
4230 	    "will be searched.  By default, all caches are searched.  If an\n"
4231 	    "address is specified, then only those allocations which include\n"
4232 	    "the given address are displayed.  Specifying an address implies\n"
4233 	    "-f.\n"
4234 	    "\n"
4235 	    "\t-e\tInclude all users, not just the largest\n"
4236 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4237 	    "\t\tgrouped by stack\n");
4238 }
4239 
4240 static int
4241 kmem_ready_check(void)
4242 {
4243 	int ready;
4244 
4245 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4246 		return (-1); /* errno is set for us */
4247 
4248 	return (ready);
4249 }
4250 
4251 void
4252 kmem_statechange(void)
4253 {
4254 	static int been_ready = 0;
4255 
4256 	if (been_ready)
4257 		return;
4258 
4259 	if (kmem_ready_check() <= 0)
4260 		return;
4261 
4262 	been_ready = 1;
4263 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4264 }
4265 
4266 void
4267 kmem_init(void)
4268 {
4269 	mdb_walker_t w = {
4270 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4271 		list_walk_step, list_walk_fini
4272 	};
4273 
4274 	/*
4275 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4276 	 * immediately.  Walkers in the linkage structure won't be ready until
4277 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4278 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4279 	 * isn't ready, we'll register a callback that will allow us to defer
4280 	 * cache walking until it is.
4281 	 */
4282 	if (mdb_add_walker(&w) != 0) {
4283 		mdb_warn("failed to add kmem_cache walker");
4284 		return;
4285 	}
4286 
4287 	kmem_statechange();
4288 }
4289 
4290 typedef struct whatthread {
4291 	uintptr_t	wt_target;
4292 	int		wt_verbose;
4293 } whatthread_t;
4294 
4295 static int
4296 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4297 {
4298 	uintptr_t current, data;
4299 
4300 	if (t->t_stkbase == NULL)
4301 		return (WALK_NEXT);
4302 
4303 	/*
4304 	 * Warn about swapped out threads, but drive on anyway
4305 	 */
4306 	if (!(t->t_schedflag & TS_LOAD)) {
4307 		mdb_warn("thread %p's stack swapped out\n", addr);
4308 		return (WALK_NEXT);
4309 	}
4310 
4311 	/*
4312 	 * Search the thread's stack for the given pointer.  Note that it would
4313 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4314 	 * chunks, but this routine is already fast and simple.
4315 	 */
4316 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4317 	    current += sizeof (uintptr_t)) {
4318 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4319 			mdb_warn("couldn't read thread %p's stack at %p",
4320 			    addr, current);
4321 			return (WALK_ERR);
4322 		}
4323 
4324 		if (data == w->wt_target) {
4325 			if (w->wt_verbose) {
4326 				mdb_printf("%p in thread %p's stack%s\n",
4327 				    current, addr, stack_active(t, current));
4328 			} else {
4329 				mdb_printf("%#lr\n", addr);
4330 				return (WALK_NEXT);
4331 			}
4332 		}
4333 	}
4334 
4335 	return (WALK_NEXT);
4336 }
4337 
4338 int
4339 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4340 {
4341 	whatthread_t w;
4342 
4343 	if (!(flags & DCMD_ADDRSPEC))
4344 		return (DCMD_USAGE);
4345 
4346 	w.wt_verbose = FALSE;
4347 	w.wt_target = addr;
4348 
4349 	if (mdb_getopts(argc, argv,
4350 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4351 		return (DCMD_USAGE);
4352 
4353 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4354 	    == -1) {
4355 		mdb_warn("couldn't walk threads");
4356 		return (DCMD_ERR);
4357 	}
4358 
4359 	return (DCMD_OK);
4360 }
4361