xref: /titanic_51/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision fc51f9bbbff02dbd8c3adf640b1a184ceeb58fa5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <sys/cpuvar.h>
30 #include <sys/kmem_impl.h>
31 #include <sys/vmem_impl.h>
32 #include <sys/machelf.h>
33 #include <sys/modctl.h>
34 #include <sys/kobj.h>
35 #include <sys/panic.h>
36 #include <sys/stack.h>
37 #include <sys/sysmacros.h>
38 #include <vm/page.h>
39 
40 #include "avl.h"
41 #include "combined.h"
42 #include "dist.h"
43 #include "kmem.h"
44 #include "list.h"
45 
46 #define	dprintf(x) if (mdb_debug_level) { \
47 	mdb_printf("kmem debug: ");  \
48 	/*CSTYLED*/\
49 	mdb_printf x ;\
50 }
51 
52 #define	KM_ALLOCATED		0x01
53 #define	KM_FREE			0x02
54 #define	KM_BUFCTL		0x04
55 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
56 #define	KM_HASH			0x10
57 
58 static int mdb_debug_level = 0;
59 
60 /*ARGSUSED*/
61 static int
62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
63 {
64 	mdb_walker_t w;
65 	char descr[64];
66 
67 	(void) mdb_snprintf(descr, sizeof (descr),
68 	    "walk the %s cache", c->cache_name);
69 
70 	w.walk_name = c->cache_name;
71 	w.walk_descr = descr;
72 	w.walk_init = kmem_walk_init;
73 	w.walk_step = kmem_walk_step;
74 	w.walk_fini = kmem_walk_fini;
75 	w.walk_init_arg = (void *)addr;
76 
77 	if (mdb_add_walker(&w) == -1)
78 		mdb_warn("failed to add %s walker", c->cache_name);
79 
80 	return (WALK_NEXT);
81 }
82 
83 /*ARGSUSED*/
84 int
85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
86 {
87 	mdb_debug_level ^= 1;
88 
89 	mdb_printf("kmem: debugging is now %s\n",
90 	    mdb_debug_level ? "on" : "off");
91 
92 	return (DCMD_OK);
93 }
94 
95 int
96 kmem_cache_walk_init(mdb_walk_state_t *wsp)
97 {
98 	GElf_Sym sym;
99 
100 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
101 		mdb_warn("couldn't find kmem_caches");
102 		return (WALK_ERR);
103 	}
104 
105 	wsp->walk_addr = (uintptr_t)sym.st_value;
106 
107 	return (list_walk_init_named(wsp, "cache list", "cache"));
108 }
109 
110 int
111 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
112 {
113 	if (wsp->walk_addr == NULL) {
114 		mdb_warn("kmem_cpu_cache doesn't support global walks");
115 		return (WALK_ERR);
116 	}
117 
118 	if (mdb_layered_walk("cpu", wsp) == -1) {
119 		mdb_warn("couldn't walk 'cpu'");
120 		return (WALK_ERR);
121 	}
122 
123 	wsp->walk_data = (void *)wsp->walk_addr;
124 
125 	return (WALK_NEXT);
126 }
127 
128 int
129 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
130 {
131 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
132 	const cpu_t *cpu = wsp->walk_layer;
133 	kmem_cpu_cache_t cc;
134 
135 	caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
136 
137 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
138 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
139 		return (WALK_ERR);
140 	}
141 
142 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
143 }
144 
145 static int
146 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
147 {
148 	kmem_slab_t *sp = p;
149 	uintptr_t caddr = (uintptr_t)arg;
150 	if ((uintptr_t)sp->slab_cache != caddr) {
151 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
152 		    saddr, caddr, sp->slab_cache);
153 		return (-1);
154 	}
155 
156 	return (0);
157 }
158 
159 static int
160 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
161 {
162 	kmem_slab_t *sp = p;
163 
164 	int rc = kmem_slab_check(p, saddr, arg);
165 	if (rc != 0) {
166 		return (rc);
167 	}
168 
169 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
170 		mdb_warn("slab %p is not a partial slab\n", saddr);
171 		return (-1);
172 	}
173 
174 	return (0);
175 }
176 
177 static int
178 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
179 {
180 	kmem_slab_t *sp = p;
181 
182 	int rc = kmem_slab_check(p, saddr, arg);
183 	if (rc != 0) {
184 		return (rc);
185 	}
186 
187 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
188 		mdb_warn("slab %p is not completely allocated\n", saddr);
189 		return (-1);
190 	}
191 
192 	return (0);
193 }
194 
195 typedef struct {
196 	uintptr_t kns_cache_addr;
197 	int kns_nslabs;
198 } kmem_nth_slab_t;
199 
200 static int
201 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
202 {
203 	kmem_nth_slab_t *chkp = arg;
204 
205 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
206 	if (rc != 0) {
207 		return (rc);
208 	}
209 
210 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
211 }
212 
213 static int
214 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
215 {
216 	uintptr_t caddr = wsp->walk_addr;
217 
218 	wsp->walk_addr = (uintptr_t)(caddr +
219 	    offsetof(kmem_cache_t, cache_complete_slabs));
220 
221 	return (list_walk_init_checked(wsp, "slab list", "slab",
222 	    kmem_complete_slab_check, (void *)caddr));
223 }
224 
225 static int
226 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
227 {
228 	uintptr_t caddr = wsp->walk_addr;
229 
230 	wsp->walk_addr = (uintptr_t)(caddr +
231 	    offsetof(kmem_cache_t, cache_partial_slabs));
232 
233 	return (avl_walk_init_checked(wsp, "slab list", "slab",
234 	    kmem_partial_slab_check, (void *)caddr));
235 }
236 
237 int
238 kmem_slab_walk_init(mdb_walk_state_t *wsp)
239 {
240 	uintptr_t caddr = wsp->walk_addr;
241 
242 	if (caddr == NULL) {
243 		mdb_warn("kmem_slab doesn't support global walks\n");
244 		return (WALK_ERR);
245 	}
246 
247 	combined_walk_init(wsp);
248 	combined_walk_add(wsp,
249 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
250 	combined_walk_add(wsp,
251 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
252 
253 	return (WALK_NEXT);
254 }
255 
256 static int
257 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
258 {
259 	uintptr_t caddr = wsp->walk_addr;
260 	kmem_nth_slab_t *chk;
261 
262 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
263 	    UM_SLEEP | UM_GC);
264 	chk->kns_cache_addr = caddr;
265 	chk->kns_nslabs = 1;
266 	wsp->walk_addr = (uintptr_t)(caddr +
267 	    offsetof(kmem_cache_t, cache_complete_slabs));
268 
269 	return (list_walk_init_checked(wsp, "slab list", "slab",
270 	    kmem_nth_slab_check, chk));
271 }
272 
273 int
274 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
275 {
276 	uintptr_t caddr = wsp->walk_addr;
277 	kmem_cache_t c;
278 
279 	if (caddr == NULL) {
280 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
281 		return (WALK_ERR);
282 	}
283 
284 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
285 		mdb_warn("couldn't read kmem_cache at %p", caddr);
286 		return (WALK_ERR);
287 	}
288 
289 	combined_walk_init(wsp);
290 
291 	/*
292 	 * Some consumers (umem_walk_step(), in particular) require at
293 	 * least one callback if there are any buffers in the cache.  So
294 	 * if there are *no* partial slabs, report the first full slab, if
295 	 * any.
296 	 *
297 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
298 	 */
299 	if (c.cache_partial_slabs.avl_numnodes == 0) {
300 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
301 		    list_walk_step, list_walk_fini);
302 	} else {
303 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
304 		    avl_walk_step, avl_walk_fini);
305 	}
306 
307 	return (WALK_NEXT);
308 }
309 
310 int
311 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
312 {
313 	kmem_cache_t c;
314 	const char *filter = NULL;
315 
316 	if (mdb_getopts(ac, argv,
317 	    'n', MDB_OPT_STR, &filter,
318 	    NULL) != ac) {
319 		return (DCMD_USAGE);
320 	}
321 
322 	if (!(flags & DCMD_ADDRSPEC)) {
323 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
324 			mdb_warn("can't walk kmem_cache");
325 			return (DCMD_ERR);
326 		}
327 		return (DCMD_OK);
328 	}
329 
330 	if (DCMD_HDRSPEC(flags))
331 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
332 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
333 
334 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
335 		mdb_warn("couldn't read kmem_cache at %p", addr);
336 		return (DCMD_ERR);
337 	}
338 
339 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
340 		return (DCMD_OK);
341 
342 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
343 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
344 
345 	return (DCMD_OK);
346 }
347 
348 void
349 kmem_cache_help(void)
350 {
351 	mdb_printf("%s", "Print kernel memory caches.\n\n");
352 	mdb_dec_indent(2);
353 	mdb_printf("%<b>OPTIONS%</b>\n");
354 	mdb_inc_indent(2);
355 	mdb_printf("%s",
356 "  -n name\n"
357 "        name of kmem cache (or matching partial name)\n"
358 "\n"
359 "Column\tDescription\n"
360 "\n"
361 "ADDR\t\taddress of kmem cache\n"
362 "NAME\t\tname of kmem cache\n"
363 "FLAG\t\tvarious cache state flags\n"
364 "CFLAG\t\tcache creation flags\n"
365 "BUFSIZE\tobject size in bytes\n"
366 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
367 }
368 
369 #define	LABEL_WIDTH	11
370 static void
371 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
372     size_t maxbuckets, size_t minbucketsize)
373 {
374 	uint64_t total;
375 	int buckets;
376 	int i;
377 	const int *distarray;
378 	int complete[2];
379 
380 	buckets = buffers_per_slab;
381 
382 	total = 0;
383 	for (i = 0; i <= buffers_per_slab; i++)
384 		total += ks_bucket[i];
385 
386 	if (maxbuckets > 1)
387 		buckets = MIN(buckets, maxbuckets);
388 
389 	if (minbucketsize > 1) {
390 		/*
391 		 * minbucketsize does not apply to the first bucket reserved
392 		 * for completely allocated slabs
393 		 */
394 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
395 		    minbucketsize));
396 		if ((buckets < 2) && (buffers_per_slab > 1)) {
397 			buckets = 2;
398 			minbucketsize = (buffers_per_slab - 1);
399 		}
400 	}
401 
402 	/*
403 	 * The first printed bucket is reserved for completely allocated slabs.
404 	 * Passing (buckets - 1) excludes that bucket from the generated
405 	 * distribution, since we're handling it as a special case.
406 	 */
407 	complete[0] = buffers_per_slab;
408 	complete[1] = buffers_per_slab + 1;
409 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
410 
411 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
412 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
413 
414 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
415 	/*
416 	 * Print bucket ranges in descending order after the first bucket for
417 	 * completely allocated slabs, so a person can see immediately whether
418 	 * or not there is fragmentation without having to scan possibly
419 	 * multiple screens of output. Starting at (buckets - 2) excludes the
420 	 * extra terminating bucket.
421 	 */
422 	for (i = buckets - 2; i >= 0; i--) {
423 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
424 	}
425 	mdb_printf("\n");
426 }
427 #undef LABEL_WIDTH
428 
429 /*ARGSUSED*/
430 static int
431 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
432 {
433 	*is_slab = B_TRUE;
434 	return (WALK_DONE);
435 }
436 
437 /*ARGSUSED*/
438 static int
439 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
440     boolean_t *is_slab)
441 {
442 	/*
443 	 * The "kmem_partial_slab" walker reports the first full slab if there
444 	 * are no partial slabs (for the sake of consumers that require at least
445 	 * one callback if there are any buffers in the cache).
446 	 */
447 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
448 	return (WALK_DONE);
449 }
450 
451 typedef struct kmem_slab_usage {
452 	int ksu_refcnt;			/* count of allocated buffers on slab */
453 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
454 } kmem_slab_usage_t;
455 
456 typedef struct kmem_slab_stats {
457 	const kmem_cache_t *ks_cp;
458 	int ks_slabs;			/* slabs in cache */
459 	int ks_partial_slabs;		/* partially allocated slabs in cache */
460 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
461 	int ks_max_buffers_per_slab;	/* max buffers per slab */
462 	int ks_usage_len;		/* ks_usage array length */
463 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
464 	uint_t *ks_bucket;		/* slab usage distribution */
465 } kmem_slab_stats_t;
466 
467 /*ARGSUSED*/
468 static int
469 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
470     kmem_slab_stats_t *ks)
471 {
472 	kmem_slab_usage_t *ksu;
473 	long unused;
474 
475 	ks->ks_slabs++;
476 	ks->ks_bucket[sp->slab_refcnt]++;
477 
478 	unused = (sp->slab_chunks - sp->slab_refcnt);
479 	if (unused == 0) {
480 		return (WALK_NEXT);
481 	}
482 
483 	ks->ks_partial_slabs++;
484 	ks->ks_unused_buffers += unused;
485 
486 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
487 		kmem_slab_usage_t *usage;
488 		int len = ks->ks_usage_len;
489 
490 		len = (len == 0 ? 16 : len * 2);
491 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
492 		if (ks->ks_usage != NULL) {
493 			bcopy(ks->ks_usage, usage,
494 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
495 			mdb_free(ks->ks_usage,
496 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
497 		}
498 		ks->ks_usage = usage;
499 		ks->ks_usage_len = len;
500 	}
501 
502 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
503 	ksu->ksu_refcnt = sp->slab_refcnt;
504 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
505 	return (WALK_NEXT);
506 }
507 
508 static void
509 kmem_slabs_header()
510 {
511 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
512 	    "", "", "Partial", "", "Unused", "");
513 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
514 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
515 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
516 	    "-------------------------", "--------", "--------", "---------",
517 	    "---------", "------");
518 }
519 
520 int
521 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
522 {
523 	kmem_cache_t c;
524 	kmem_slab_stats_t stats;
525 	mdb_walk_cb_t cb;
526 	int pct;
527 	int tenths_pct;
528 	size_t maxbuckets = 1;
529 	size_t minbucketsize = 0;
530 	const char *filter = NULL;
531 	const char *name = NULL;
532 	uint_t opt_v = FALSE;
533 	boolean_t buckets = B_FALSE;
534 	boolean_t skip = B_FALSE;
535 
536 	if (mdb_getopts(argc, argv,
537 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
538 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
539 	    'n', MDB_OPT_STR, &filter,
540 	    'N', MDB_OPT_STR, &name,
541 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
542 	    NULL) != argc) {
543 		return (DCMD_USAGE);
544 	}
545 
546 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
547 		buckets = B_TRUE;
548 	}
549 
550 	if (!(flags & DCMD_ADDRSPEC)) {
551 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
552 		    argv) == -1) {
553 			mdb_warn("can't walk kmem_cache");
554 			return (DCMD_ERR);
555 		}
556 		return (DCMD_OK);
557 	}
558 
559 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
560 		mdb_warn("couldn't read kmem_cache at %p", addr);
561 		return (DCMD_ERR);
562 	}
563 
564 	if (name == NULL) {
565 		skip = ((filter != NULL) &&
566 		    (strstr(c.cache_name, filter) == NULL));
567 	} else if (filter == NULL) {
568 		skip = (strcmp(c.cache_name, name) != 0);
569 	} else {
570 		/* match either -n or -N */
571 		skip = ((strcmp(c.cache_name, name) != 0) &&
572 		    (strstr(c.cache_name, filter) == NULL));
573 	}
574 
575 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
576 		kmem_slabs_header();
577 	} else if ((opt_v || buckets) && !skip) {
578 		if (DCMD_HDRSPEC(flags)) {
579 			kmem_slabs_header();
580 		} else {
581 			boolean_t is_slab = B_FALSE;
582 			const char *walker_name;
583 			if (opt_v) {
584 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
585 				walker_name = "kmem_slab_partial";
586 			} else {
587 				cb = (mdb_walk_cb_t)kmem_first_slab;
588 				walker_name = "kmem_slab";
589 			}
590 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
591 			if (is_slab) {
592 				kmem_slabs_header();
593 			}
594 		}
595 	}
596 
597 	if (skip) {
598 		return (DCMD_OK);
599 	}
600 
601 	bzero(&stats, sizeof (kmem_slab_stats_t));
602 	stats.ks_cp = &c;
603 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
604 	/* +1 to include a zero bucket */
605 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
606 	    sizeof (*stats.ks_bucket), UM_SLEEP);
607 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
608 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
609 
610 	if (c.cache_buftotal == 0) {
611 		pct = 0;
612 		tenths_pct = 0;
613 	} else {
614 		uint64_t n = stats.ks_unused_buffers * 10000;
615 		pct = (int)(n / c.cache_buftotal);
616 		tenths_pct = pct - ((pct / 100) * 100);
617 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
618 		if (tenths_pct == 10) {
619 			pct += 100;
620 			tenths_pct = 0;
621 		}
622 	}
623 
624 	pct /= 100;
625 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
626 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
627 	    stats.ks_unused_buffers, pct, tenths_pct);
628 
629 	if (maxbuckets == 0) {
630 		maxbuckets = stats.ks_max_buffers_per_slab;
631 	}
632 
633 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
634 	    (stats.ks_slabs > 0)) {
635 		mdb_printf("\n");
636 		kmem_slabs_print_dist(stats.ks_bucket,
637 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
638 	}
639 
640 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
641 	    sizeof (*stats.ks_bucket));
642 
643 	if (!opt_v) {
644 		return (DCMD_OK);
645 	}
646 
647 	if (opt_v && (stats.ks_partial_slabs > 0)) {
648 		int i;
649 		kmem_slab_usage_t *ksu;
650 
651 		mdb_printf("  %d complete, %d partial",
652 		    (stats.ks_slabs - stats.ks_partial_slabs),
653 		    stats.ks_partial_slabs);
654 		if (stats.ks_partial_slabs > 0) {
655 			mdb_printf(" (%d):", stats.ks_max_buffers_per_slab);
656 		}
657 		for (i = 0; i < stats.ks_partial_slabs; i++) {
658 			ksu = &stats.ks_usage[i];
659 			if (ksu->ksu_nomove) {
660 				const char *symbol = "*";
661 				mdb_printf(" %d%s", ksu->ksu_refcnt, symbol);
662 			} else {
663 				mdb_printf(" %d", ksu->ksu_refcnt);
664 			}
665 		}
666 		mdb_printf("\n\n");
667 	}
668 
669 	if (stats.ks_usage_len > 0) {
670 		mdb_free(stats.ks_usage,
671 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
672 	}
673 
674 	return (DCMD_OK);
675 }
676 
677 void
678 kmem_slabs_help(void)
679 {
680 	mdb_printf("%s",
681 "Display slab usage per kmem cache.\n\n");
682 	mdb_dec_indent(2);
683 	mdb_printf("%<b>OPTIONS%</b>\n");
684 	mdb_inc_indent(2);
685 	mdb_printf("%s",
686 "  -n name\n"
687 "        name of kmem cache (or matching partial name)\n"
688 "  -N name\n"
689 "        exact name of kmem cache\n"
690 "  -b maxbins\n"
691 "        Print a distribution of allocated buffers per slab using at\n"
692 "        most maxbins bins. The first bin is reserved for completely\n"
693 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
694 "        effect as specifying the maximum allocated buffers per slab\n"
695 "        or setting minbinsize to 1 (-B 1).\n"
696 "  -B minbinsize\n"
697 "        Print a distribution of allocated buffers per slab, making\n"
698 "        all bins (except the first, reserved for completely allocated\n"
699 "        slabs) at least minbinsize buffers apart.\n"
700 "  -v    verbose output: List the allocated buffer count of each partial\n"
701 "        slab on the free list in order from front to back to show how\n"
702 "        closely the slabs are ordered by usage. For example\n"
703 "\n"
704 "          10 complete, 3 partial (8): 7 3 1\n"
705 "\n"
706 "        means there are thirteen slabs with eight buffers each, including\n"
707 "        three partially allocated slabs with less than all eight buffers\n"
708 "        allocated.\n"
709 "\n"
710 "        Buffer allocations are always from the front of the partial slab\n"
711 "        list. When a buffer is freed from a completely used slab, that\n"
712 "        slab is added to the front of the partial slab list. Assuming\n"
713 "        that all buffers are equally likely to be freed soon, the\n"
714 "        desired order of partial slabs is most-used at the front of the\n"
715 "        list and least-used at the back (as in the example above).\n"
716 "        However, if a slab contains an allocated buffer that will not\n"
717 "        soon be freed, it would be better for that slab to be at the\n"
718 "        front where all of its buffers can be allocated. Taking a slab\n"
719 "        off the partial slab list (either with all buffers freed or all\n"
720 "        buffers allocated) reduces cache fragmentation.\n"
721 "\n"
722 "        A slab's allocated buffer count representing a partial slab (9 in\n"
723 "        the example below) may be marked as follows:\n"
724 "\n"
725 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
726 "        reclaimable because the kmem client refused to move one of the\n"
727 "        slab's buffers. Since kmem does not expect to completely free the\n"
728 "        slab, it moves it to the front of the list in the hope of\n"
729 "        completely allocating it instead. A slab marked with an asterisk\n"
730 "        stays marked for as long as it remains on the partial slab list.\n"
731 "\n"
732 "Column\t\tDescription\n"
733 "\n"
734 "Cache Name\t\tname of kmem cache\n"
735 "Slabs\t\t\ttotal slab count\n"
736 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
737 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
738 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
739 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
740 "\t\t\t  for accounting structures (debug mode), slab\n"
741 "\t\t\t  coloring (incremental small offsets to stagger\n"
742 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
743 }
744 
745 static int
746 addrcmp(const void *lhs, const void *rhs)
747 {
748 	uintptr_t p1 = *((uintptr_t *)lhs);
749 	uintptr_t p2 = *((uintptr_t *)rhs);
750 
751 	if (p1 < p2)
752 		return (-1);
753 	if (p1 > p2)
754 		return (1);
755 	return (0);
756 }
757 
758 static int
759 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
760 {
761 	const kmem_bufctl_audit_t *bcp1 = *lhs;
762 	const kmem_bufctl_audit_t *bcp2 = *rhs;
763 
764 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
765 		return (-1);
766 
767 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
768 		return (1);
769 
770 	return (0);
771 }
772 
773 typedef struct kmem_hash_walk {
774 	uintptr_t *kmhw_table;
775 	size_t kmhw_nelems;
776 	size_t kmhw_pos;
777 	kmem_bufctl_t kmhw_cur;
778 } kmem_hash_walk_t;
779 
780 int
781 kmem_hash_walk_init(mdb_walk_state_t *wsp)
782 {
783 	kmem_hash_walk_t *kmhw;
784 	uintptr_t *hash;
785 	kmem_cache_t c;
786 	uintptr_t haddr, addr = wsp->walk_addr;
787 	size_t nelems;
788 	size_t hsize;
789 
790 	if (addr == NULL) {
791 		mdb_warn("kmem_hash doesn't support global walks\n");
792 		return (WALK_ERR);
793 	}
794 
795 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
796 		mdb_warn("couldn't read cache at addr %p", addr);
797 		return (WALK_ERR);
798 	}
799 
800 	if (!(c.cache_flags & KMF_HASH)) {
801 		mdb_warn("cache %p doesn't have a hash table\n", addr);
802 		return (WALK_DONE);		/* nothing to do */
803 	}
804 
805 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
806 	kmhw->kmhw_cur.bc_next = NULL;
807 	kmhw->kmhw_pos = 0;
808 
809 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
810 	hsize = nelems * sizeof (uintptr_t);
811 	haddr = (uintptr_t)c.cache_hash_table;
812 
813 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
814 	if (mdb_vread(hash, hsize, haddr) == -1) {
815 		mdb_warn("failed to read hash table at %p", haddr);
816 		mdb_free(hash, hsize);
817 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
818 		return (WALK_ERR);
819 	}
820 
821 	wsp->walk_data = kmhw;
822 
823 	return (WALK_NEXT);
824 }
825 
826 int
827 kmem_hash_walk_step(mdb_walk_state_t *wsp)
828 {
829 	kmem_hash_walk_t *kmhw = wsp->walk_data;
830 	uintptr_t addr = NULL;
831 
832 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
833 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
834 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
835 				break;
836 		}
837 	}
838 	if (addr == NULL)
839 		return (WALK_DONE);
840 
841 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
842 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
843 		return (WALK_ERR);
844 	}
845 
846 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
847 }
848 
849 void
850 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
851 {
852 	kmem_hash_walk_t *kmhw = wsp->walk_data;
853 
854 	if (kmhw == NULL)
855 		return;
856 
857 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
858 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
859 }
860 
861 /*
862  * Find the address of the bufctl structure for the address 'buf' in cache
863  * 'cp', which is at address caddr, and place it in *out.
864  */
865 static int
866 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
867 {
868 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
869 	kmem_bufctl_t *bcp;
870 	kmem_bufctl_t bc;
871 
872 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
873 		mdb_warn("unable to read hash bucket for %p in cache %p",
874 		    buf, caddr);
875 		return (-1);
876 	}
877 
878 	while (bcp != NULL) {
879 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
880 		    (uintptr_t)bcp) == -1) {
881 			mdb_warn("unable to read bufctl at %p", bcp);
882 			return (-1);
883 		}
884 		if (bc.bc_addr == buf) {
885 			*out = (uintptr_t)bcp;
886 			return (0);
887 		}
888 		bcp = bc.bc_next;
889 	}
890 
891 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
892 	return (-1);
893 }
894 
895 int
896 kmem_get_magsize(const kmem_cache_t *cp)
897 {
898 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
899 	GElf_Sym mt_sym;
900 	kmem_magtype_t mt;
901 	int res;
902 
903 	/*
904 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
905 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
906 	 * it is okay to return 0 for them.
907 	 */
908 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
909 	    (cp->cache_flags & KMF_NOMAGAZINE))
910 		return (res);
911 
912 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
913 		mdb_warn("unable to read 'kmem_magtype'");
914 	} else if (addr < mt_sym.st_value ||
915 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
916 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
917 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
918 		    cp->cache_name, addr);
919 		return (0);
920 	}
921 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
922 		mdb_warn("unable to read magtype at %a", addr);
923 		return (0);
924 	}
925 	return (mt.mt_magsize);
926 }
927 
928 /*ARGSUSED*/
929 static int
930 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
931 {
932 	*est -= (sp->slab_chunks - sp->slab_refcnt);
933 
934 	return (WALK_NEXT);
935 }
936 
937 /*
938  * Returns an upper bound on the number of allocated buffers in a given
939  * cache.
940  */
941 size_t
942 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
943 {
944 	int magsize;
945 	size_t cache_est;
946 
947 	cache_est = cp->cache_buftotal;
948 
949 	(void) mdb_pwalk("kmem_slab_partial",
950 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
951 
952 	if ((magsize = kmem_get_magsize(cp)) != 0) {
953 		size_t mag_est = cp->cache_full.ml_total * magsize;
954 
955 		if (cache_est >= mag_est) {
956 			cache_est -= mag_est;
957 		} else {
958 			mdb_warn("cache %p's magazine layer holds more buffers "
959 			    "than the slab layer.\n", addr);
960 		}
961 	}
962 	return (cache_est);
963 }
964 
965 #define	READMAG_ROUNDS(rounds) { \
966 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
967 		mdb_warn("couldn't read magazine at %p", kmp); \
968 		goto fail; \
969 	} \
970 	for (i = 0; i < rounds; i++) { \
971 		maglist[magcnt++] = mp->mag_round[i]; \
972 		if (magcnt == magmax) { \
973 			mdb_warn("%d magazines exceeds fudge factor\n", \
974 			    magcnt); \
975 			goto fail; \
976 		} \
977 	} \
978 }
979 
980 int
981 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
982     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
983 {
984 	kmem_magazine_t *kmp, *mp;
985 	void **maglist = NULL;
986 	int i, cpu;
987 	size_t magsize, magmax, magbsize;
988 	size_t magcnt = 0;
989 
990 	/*
991 	 * Read the magtype out of the cache, after verifying the pointer's
992 	 * correctness.
993 	 */
994 	magsize = kmem_get_magsize(cp);
995 	if (magsize == 0) {
996 		*maglistp = NULL;
997 		*magcntp = 0;
998 		*magmaxp = 0;
999 		return (WALK_NEXT);
1000 	}
1001 
1002 	/*
1003 	 * There are several places where we need to go buffer hunting:
1004 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1005 	 * and the full magazine list in the depot.
1006 	 *
1007 	 * For an upper bound on the number of buffers in the magazine
1008 	 * layer, we have the number of magazines on the cache_full
1009 	 * list plus at most two magazines per CPU (the loaded and the
1010 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1011 	 * is live (the number "100" comes from the same fudge factor in
1012 	 * crash(1M)).
1013 	 */
1014 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1015 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1016 
1017 	if (magbsize >= PAGESIZE / 2) {
1018 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1019 		    addr, magbsize);
1020 		return (WALK_ERR);
1021 	}
1022 
1023 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1024 	mp = mdb_alloc(magbsize, alloc_flags);
1025 	if (mp == NULL || maglist == NULL)
1026 		goto fail;
1027 
1028 	/*
1029 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1030 	 */
1031 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1032 		READMAG_ROUNDS(magsize);
1033 		kmp = mp->mag_next;
1034 
1035 		if (kmp == cp->cache_full.ml_list)
1036 			break; /* cache_full list loop detected */
1037 	}
1038 
1039 	dprintf(("cache_full list done\n"));
1040 
1041 	/*
1042 	 * Now whip through the CPUs, snagging the loaded magazines
1043 	 * and full spares.
1044 	 */
1045 	for (cpu = 0; cpu < ncpus; cpu++) {
1046 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1047 
1048 		dprintf(("reading cpu cache %p\n",
1049 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1050 
1051 		if (ccp->cc_rounds > 0 &&
1052 		    (kmp = ccp->cc_loaded) != NULL) {
1053 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
1054 			READMAG_ROUNDS(ccp->cc_rounds);
1055 		}
1056 
1057 		if (ccp->cc_prounds > 0 &&
1058 		    (kmp = ccp->cc_ploaded) != NULL) {
1059 			dprintf(("reading %d previously loaded rounds\n",
1060 			    ccp->cc_prounds));
1061 			READMAG_ROUNDS(ccp->cc_prounds);
1062 		}
1063 	}
1064 
1065 	dprintf(("magazine layer: %d buffers\n", magcnt));
1066 
1067 	if (!(alloc_flags & UM_GC))
1068 		mdb_free(mp, magbsize);
1069 
1070 	*maglistp = maglist;
1071 	*magcntp = magcnt;
1072 	*magmaxp = magmax;
1073 
1074 	return (WALK_NEXT);
1075 
1076 fail:
1077 	if (!(alloc_flags & UM_GC)) {
1078 		if (mp)
1079 			mdb_free(mp, magbsize);
1080 		if (maglist)
1081 			mdb_free(maglist, magmax * sizeof (void *));
1082 	}
1083 	return (WALK_ERR);
1084 }
1085 
1086 static int
1087 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1088 {
1089 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1090 }
1091 
1092 static int
1093 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1094 {
1095 	kmem_bufctl_audit_t b;
1096 
1097 	/*
1098 	 * if KMF_AUDIT is not set, we know that we're looking at a
1099 	 * kmem_bufctl_t.
1100 	 */
1101 	if (!(cp->cache_flags & KMF_AUDIT) ||
1102 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1103 		(void) memset(&b, 0, sizeof (b));
1104 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1105 			mdb_warn("unable to read bufctl at %p", buf);
1106 			return (WALK_ERR);
1107 		}
1108 	}
1109 
1110 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1111 }
1112 
1113 typedef struct kmem_walk {
1114 	int kmw_type;
1115 
1116 	int kmw_addr;			/* cache address */
1117 	kmem_cache_t *kmw_cp;
1118 	size_t kmw_csize;
1119 
1120 	/*
1121 	 * magazine layer
1122 	 */
1123 	void **kmw_maglist;
1124 	size_t kmw_max;
1125 	size_t kmw_count;
1126 	size_t kmw_pos;
1127 
1128 	/*
1129 	 * slab layer
1130 	 */
1131 	char *kmw_valid;	/* to keep track of freed buffers */
1132 	char *kmw_ubase;	/* buffer for slab data */
1133 } kmem_walk_t;
1134 
1135 static int
1136 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1137 {
1138 	kmem_walk_t *kmw;
1139 	int ncpus, csize;
1140 	kmem_cache_t *cp;
1141 	size_t vm_quantum;
1142 
1143 	size_t magmax, magcnt;
1144 	void **maglist = NULL;
1145 	uint_t chunksize, slabsize;
1146 	int status = WALK_ERR;
1147 	uintptr_t addr = wsp->walk_addr;
1148 	const char *layered;
1149 
1150 	type &= ~KM_HASH;
1151 
1152 	if (addr == NULL) {
1153 		mdb_warn("kmem walk doesn't support global walks\n");
1154 		return (WALK_ERR);
1155 	}
1156 
1157 	dprintf(("walking %p\n", addr));
1158 
1159 	/*
1160 	 * First we need to figure out how many CPUs are configured in the
1161 	 * system to know how much to slurp out.
1162 	 */
1163 	mdb_readvar(&ncpus, "max_ncpus");
1164 
1165 	csize = KMEM_CACHE_SIZE(ncpus);
1166 	cp = mdb_alloc(csize, UM_SLEEP);
1167 
1168 	if (mdb_vread(cp, csize, addr) == -1) {
1169 		mdb_warn("couldn't read cache at addr %p", addr);
1170 		goto out2;
1171 	}
1172 
1173 	/*
1174 	 * It's easy for someone to hand us an invalid cache address.
1175 	 * Unfortunately, it is hard for this walker to survive an
1176 	 * invalid cache cleanly.  So we make sure that:
1177 	 *
1178 	 *	1. the vmem arena for the cache is readable,
1179 	 *	2. the vmem arena's quantum is a power of 2,
1180 	 *	3. our slabsize is a multiple of the quantum, and
1181 	 *	4. our chunksize is >0 and less than our slabsize.
1182 	 */
1183 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1184 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1185 	    vm_quantum == 0 ||
1186 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1187 	    cp->cache_slabsize < vm_quantum ||
1188 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1189 	    cp->cache_chunksize == 0 ||
1190 	    cp->cache_chunksize > cp->cache_slabsize) {
1191 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1192 		goto out2;
1193 	}
1194 
1195 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1196 
1197 	if (cp->cache_buftotal == 0) {
1198 		mdb_free(cp, csize);
1199 		return (WALK_DONE);
1200 	}
1201 
1202 	/*
1203 	 * If they ask for bufctls, but it's a small-slab cache,
1204 	 * there is nothing to report.
1205 	 */
1206 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1207 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1208 		    cp->cache_flags));
1209 		mdb_free(cp, csize);
1210 		return (WALK_DONE);
1211 	}
1212 
1213 	/*
1214 	 * If they want constructed buffers, but there's no constructor or
1215 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1216 	 */
1217 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1218 	    cp->cache_constructor == NULL ||
1219 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1220 		mdb_free(cp, csize);
1221 		return (WALK_DONE);
1222 	}
1223 
1224 	/*
1225 	 * Read in the contents of the magazine layer
1226 	 */
1227 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1228 	    &magmax, UM_SLEEP) == WALK_ERR)
1229 		goto out2;
1230 
1231 	/*
1232 	 * We have all of the buffers from the magazines;  if we are walking
1233 	 * allocated buffers, sort them so we can bsearch them later.
1234 	 */
1235 	if (type & KM_ALLOCATED)
1236 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1237 
1238 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1239 
1240 	kmw->kmw_type = type;
1241 	kmw->kmw_addr = addr;
1242 	kmw->kmw_cp = cp;
1243 	kmw->kmw_csize = csize;
1244 	kmw->kmw_maglist = maglist;
1245 	kmw->kmw_max = magmax;
1246 	kmw->kmw_count = magcnt;
1247 	kmw->kmw_pos = 0;
1248 
1249 	/*
1250 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1251 	 * hash table instead of the slab layer.
1252 	 */
1253 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1254 		layered = "kmem_hash";
1255 
1256 		kmw->kmw_type |= KM_HASH;
1257 	} else {
1258 		/*
1259 		 * If we are walking freed buffers, we only need the
1260 		 * magazine layer plus the partially allocated slabs.
1261 		 * To walk allocated buffers, we need all of the slabs.
1262 		 */
1263 		if (type & KM_ALLOCATED)
1264 			layered = "kmem_slab";
1265 		else
1266 			layered = "kmem_slab_partial";
1267 
1268 		/*
1269 		 * for small-slab caches, we read in the entire slab.  For
1270 		 * freed buffers, we can just walk the freelist.  For
1271 		 * allocated buffers, we use a 'valid' array to track
1272 		 * the freed buffers.
1273 		 */
1274 		if (!(cp->cache_flags & KMF_HASH)) {
1275 			chunksize = cp->cache_chunksize;
1276 			slabsize = cp->cache_slabsize;
1277 
1278 			kmw->kmw_ubase = mdb_alloc(slabsize +
1279 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1280 
1281 			if (type & KM_ALLOCATED)
1282 				kmw->kmw_valid =
1283 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1284 		}
1285 	}
1286 
1287 	status = WALK_NEXT;
1288 
1289 	if (mdb_layered_walk(layered, wsp) == -1) {
1290 		mdb_warn("unable to start layered '%s' walk", layered);
1291 		status = WALK_ERR;
1292 	}
1293 
1294 out1:
1295 	if (status == WALK_ERR) {
1296 		if (kmw->kmw_valid)
1297 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1298 
1299 		if (kmw->kmw_ubase)
1300 			mdb_free(kmw->kmw_ubase, slabsize +
1301 			    sizeof (kmem_bufctl_t));
1302 
1303 		if (kmw->kmw_maglist)
1304 			mdb_free(kmw->kmw_maglist,
1305 			    kmw->kmw_max * sizeof (uintptr_t));
1306 
1307 		mdb_free(kmw, sizeof (kmem_walk_t));
1308 		wsp->walk_data = NULL;
1309 	}
1310 
1311 out2:
1312 	if (status == WALK_ERR)
1313 		mdb_free(cp, csize);
1314 
1315 	return (status);
1316 }
1317 
1318 int
1319 kmem_walk_step(mdb_walk_state_t *wsp)
1320 {
1321 	kmem_walk_t *kmw = wsp->walk_data;
1322 	int type = kmw->kmw_type;
1323 	kmem_cache_t *cp = kmw->kmw_cp;
1324 
1325 	void **maglist = kmw->kmw_maglist;
1326 	int magcnt = kmw->kmw_count;
1327 
1328 	uintptr_t chunksize, slabsize;
1329 	uintptr_t addr;
1330 	const kmem_slab_t *sp;
1331 	const kmem_bufctl_t *bcp;
1332 	kmem_bufctl_t bc;
1333 
1334 	int chunks;
1335 	char *kbase;
1336 	void *buf;
1337 	int i, ret;
1338 
1339 	char *valid, *ubase;
1340 
1341 	/*
1342 	 * first, handle the 'kmem_hash' layered walk case
1343 	 */
1344 	if (type & KM_HASH) {
1345 		/*
1346 		 * We have a buffer which has been allocated out of the
1347 		 * global layer. We need to make sure that it's not
1348 		 * actually sitting in a magazine before we report it as
1349 		 * an allocated buffer.
1350 		 */
1351 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1352 
1353 		if (magcnt > 0 &&
1354 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1355 		    addrcmp) != NULL)
1356 			return (WALK_NEXT);
1357 
1358 		if (type & KM_BUFCTL)
1359 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1360 
1361 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1362 	}
1363 
1364 	ret = WALK_NEXT;
1365 
1366 	addr = kmw->kmw_addr;
1367 
1368 	/*
1369 	 * If we're walking freed buffers, report everything in the
1370 	 * magazine layer before processing the first slab.
1371 	 */
1372 	if ((type & KM_FREE) && magcnt != 0) {
1373 		kmw->kmw_count = 0;		/* only do this once */
1374 		for (i = 0; i < magcnt; i++) {
1375 			buf = maglist[i];
1376 
1377 			if (type & KM_BUFCTL) {
1378 				uintptr_t out;
1379 
1380 				if (cp->cache_flags & KMF_BUFTAG) {
1381 					kmem_buftag_t *btp;
1382 					kmem_buftag_t tag;
1383 
1384 					/* LINTED - alignment */
1385 					btp = KMEM_BUFTAG(cp, buf);
1386 					if (mdb_vread(&tag, sizeof (tag),
1387 					    (uintptr_t)btp) == -1) {
1388 						mdb_warn("reading buftag for "
1389 						    "%p at %p", buf, btp);
1390 						continue;
1391 					}
1392 					out = (uintptr_t)tag.bt_bufctl;
1393 				} else {
1394 					if (kmem_hash_lookup(cp, addr, buf,
1395 					    &out) == -1)
1396 						continue;
1397 				}
1398 				ret = bufctl_walk_callback(cp, wsp, out);
1399 			} else {
1400 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1401 			}
1402 
1403 			if (ret != WALK_NEXT)
1404 				return (ret);
1405 		}
1406 	}
1407 
1408 	/*
1409 	 * If they want constructed buffers, we're finished, since the
1410 	 * magazine layer holds them all.
1411 	 */
1412 	if (type & KM_CONSTRUCTED)
1413 		return (WALK_DONE);
1414 
1415 	/*
1416 	 * Handle the buffers in the current slab
1417 	 */
1418 	chunksize = cp->cache_chunksize;
1419 	slabsize = cp->cache_slabsize;
1420 
1421 	sp = wsp->walk_layer;
1422 	chunks = sp->slab_chunks;
1423 	kbase = sp->slab_base;
1424 
1425 	dprintf(("kbase is %p\n", kbase));
1426 
1427 	if (!(cp->cache_flags & KMF_HASH)) {
1428 		valid = kmw->kmw_valid;
1429 		ubase = kmw->kmw_ubase;
1430 
1431 		if (mdb_vread(ubase, chunks * chunksize,
1432 		    (uintptr_t)kbase) == -1) {
1433 			mdb_warn("failed to read slab contents at %p", kbase);
1434 			return (WALK_ERR);
1435 		}
1436 
1437 		/*
1438 		 * Set up the valid map as fully allocated -- we'll punch
1439 		 * out the freelist.
1440 		 */
1441 		if (type & KM_ALLOCATED)
1442 			(void) memset(valid, 1, chunks);
1443 	} else {
1444 		valid = NULL;
1445 		ubase = NULL;
1446 	}
1447 
1448 	/*
1449 	 * walk the slab's freelist
1450 	 */
1451 	bcp = sp->slab_head;
1452 
1453 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1454 
1455 	/*
1456 	 * since we could be in the middle of allocating a buffer,
1457 	 * our refcnt could be one higher than it aught.  So we
1458 	 * check one further on the freelist than the count allows.
1459 	 */
1460 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1461 		uint_t ndx;
1462 
1463 		dprintf(("bcp is %p\n", bcp));
1464 
1465 		if (bcp == NULL) {
1466 			if (i == chunks)
1467 				break;
1468 			mdb_warn(
1469 			    "slab %p in cache %p freelist too short by %d\n",
1470 			    sp, addr, chunks - i);
1471 			break;
1472 		}
1473 
1474 		if (cp->cache_flags & KMF_HASH) {
1475 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1476 				mdb_warn("failed to read bufctl ptr at %p",
1477 				    bcp);
1478 				break;
1479 			}
1480 			buf = bc.bc_addr;
1481 		} else {
1482 			/*
1483 			 * Otherwise the buffer is in the slab which
1484 			 * we've read in;  we just need to determine
1485 			 * its offset in the slab to find the
1486 			 * kmem_bufctl_t.
1487 			 */
1488 			bc = *((kmem_bufctl_t *)
1489 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1490 			    (uintptr_t)ubase));
1491 
1492 			buf = KMEM_BUF(cp, bcp);
1493 		}
1494 
1495 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1496 
1497 		if (ndx > slabsize / cp->cache_bufsize) {
1498 			/*
1499 			 * This is very wrong; we have managed to find
1500 			 * a buffer in the slab which shouldn't
1501 			 * actually be here.  Emit a warning, and
1502 			 * try to continue.
1503 			 */
1504 			mdb_warn("buf %p is out of range for "
1505 			    "slab %p, cache %p\n", buf, sp, addr);
1506 		} else if (type & KM_ALLOCATED) {
1507 			/*
1508 			 * we have found a buffer on the slab's freelist;
1509 			 * clear its entry
1510 			 */
1511 			valid[ndx] = 0;
1512 		} else {
1513 			/*
1514 			 * Report this freed buffer
1515 			 */
1516 			if (type & KM_BUFCTL) {
1517 				ret = bufctl_walk_callback(cp, wsp,
1518 				    (uintptr_t)bcp);
1519 			} else {
1520 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1521 			}
1522 			if (ret != WALK_NEXT)
1523 				return (ret);
1524 		}
1525 
1526 		bcp = bc.bc_next;
1527 	}
1528 
1529 	if (bcp != NULL) {
1530 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1531 		    sp, addr, bcp));
1532 	}
1533 
1534 	/*
1535 	 * If we are walking freed buffers, the loop above handled reporting
1536 	 * them.
1537 	 */
1538 	if (type & KM_FREE)
1539 		return (WALK_NEXT);
1540 
1541 	if (type & KM_BUFCTL) {
1542 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1543 		    "cache %p\n", addr);
1544 		return (WALK_ERR);
1545 	}
1546 
1547 	/*
1548 	 * Report allocated buffers, skipping buffers in the magazine layer.
1549 	 * We only get this far for small-slab caches.
1550 	 */
1551 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1552 		buf = (char *)kbase + i * chunksize;
1553 
1554 		if (!valid[i])
1555 			continue;		/* on slab freelist */
1556 
1557 		if (magcnt > 0 &&
1558 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1559 		    addrcmp) != NULL)
1560 			continue;		/* in magazine layer */
1561 
1562 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1563 	}
1564 	return (ret);
1565 }
1566 
1567 void
1568 kmem_walk_fini(mdb_walk_state_t *wsp)
1569 {
1570 	kmem_walk_t *kmw = wsp->walk_data;
1571 	uintptr_t chunksize;
1572 	uintptr_t slabsize;
1573 
1574 	if (kmw == NULL)
1575 		return;
1576 
1577 	if (kmw->kmw_maglist != NULL)
1578 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1579 
1580 	chunksize = kmw->kmw_cp->cache_chunksize;
1581 	slabsize = kmw->kmw_cp->cache_slabsize;
1582 
1583 	if (kmw->kmw_valid != NULL)
1584 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1585 	if (kmw->kmw_ubase != NULL)
1586 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1587 
1588 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1589 	mdb_free(kmw, sizeof (kmem_walk_t));
1590 }
1591 
1592 /*ARGSUSED*/
1593 static int
1594 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1595 {
1596 	/*
1597 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1598 	 * memory in other caches.  This can be a little confusing, so we
1599 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1600 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1601 	 */
1602 	if (c->cache_cflags & KMC_NOTOUCH)
1603 		return (WALK_NEXT);
1604 
1605 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1606 	    wsp->walk_cbdata, addr) == -1)
1607 		return (WALK_DONE);
1608 
1609 	return (WALK_NEXT);
1610 }
1611 
1612 #define	KMEM_WALK_ALL(name, wsp) { \
1613 	wsp->walk_data = (name); \
1614 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1615 		return (WALK_ERR); \
1616 	return (WALK_DONE); \
1617 }
1618 
1619 int
1620 kmem_walk_init(mdb_walk_state_t *wsp)
1621 {
1622 	if (wsp->walk_arg != NULL)
1623 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1624 
1625 	if (wsp->walk_addr == NULL)
1626 		KMEM_WALK_ALL("kmem", wsp);
1627 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1628 }
1629 
1630 int
1631 bufctl_walk_init(mdb_walk_state_t *wsp)
1632 {
1633 	if (wsp->walk_addr == NULL)
1634 		KMEM_WALK_ALL("bufctl", wsp);
1635 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1636 }
1637 
1638 int
1639 freemem_walk_init(mdb_walk_state_t *wsp)
1640 {
1641 	if (wsp->walk_addr == NULL)
1642 		KMEM_WALK_ALL("freemem", wsp);
1643 	return (kmem_walk_init_common(wsp, KM_FREE));
1644 }
1645 
1646 int
1647 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1648 {
1649 	if (wsp->walk_addr == NULL)
1650 		KMEM_WALK_ALL("freemem_constructed", wsp);
1651 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1652 }
1653 
1654 int
1655 freectl_walk_init(mdb_walk_state_t *wsp)
1656 {
1657 	if (wsp->walk_addr == NULL)
1658 		KMEM_WALK_ALL("freectl", wsp);
1659 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1660 }
1661 
1662 int
1663 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1664 {
1665 	if (wsp->walk_addr == NULL)
1666 		KMEM_WALK_ALL("freectl_constructed", wsp);
1667 	return (kmem_walk_init_common(wsp,
1668 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1669 }
1670 
1671 typedef struct bufctl_history_walk {
1672 	void		*bhw_next;
1673 	kmem_cache_t	*bhw_cache;
1674 	kmem_slab_t	*bhw_slab;
1675 	hrtime_t	bhw_timestamp;
1676 } bufctl_history_walk_t;
1677 
1678 int
1679 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1680 {
1681 	bufctl_history_walk_t *bhw;
1682 	kmem_bufctl_audit_t bc;
1683 	kmem_bufctl_audit_t bcn;
1684 
1685 	if (wsp->walk_addr == NULL) {
1686 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1687 		return (WALK_ERR);
1688 	}
1689 
1690 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1691 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1692 		return (WALK_ERR);
1693 	}
1694 
1695 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1696 	bhw->bhw_timestamp = 0;
1697 	bhw->bhw_cache = bc.bc_cache;
1698 	bhw->bhw_slab = bc.bc_slab;
1699 
1700 	/*
1701 	 * sometimes the first log entry matches the base bufctl;  in that
1702 	 * case, skip the base bufctl.
1703 	 */
1704 	if (bc.bc_lastlog != NULL &&
1705 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1706 	    bc.bc_addr == bcn.bc_addr &&
1707 	    bc.bc_cache == bcn.bc_cache &&
1708 	    bc.bc_slab == bcn.bc_slab &&
1709 	    bc.bc_timestamp == bcn.bc_timestamp &&
1710 	    bc.bc_thread == bcn.bc_thread)
1711 		bhw->bhw_next = bc.bc_lastlog;
1712 	else
1713 		bhw->bhw_next = (void *)wsp->walk_addr;
1714 
1715 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1716 	wsp->walk_data = bhw;
1717 
1718 	return (WALK_NEXT);
1719 }
1720 
1721 int
1722 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1723 {
1724 	bufctl_history_walk_t *bhw = wsp->walk_data;
1725 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1726 	uintptr_t baseaddr = wsp->walk_addr;
1727 	kmem_bufctl_audit_t bc;
1728 
1729 	if (addr == NULL)
1730 		return (WALK_DONE);
1731 
1732 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1733 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1734 		return (WALK_ERR);
1735 	}
1736 
1737 	/*
1738 	 * The bufctl is only valid if the address, cache, and slab are
1739 	 * correct.  We also check that the timestamp is decreasing, to
1740 	 * prevent infinite loops.
1741 	 */
1742 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1743 	    bc.bc_cache != bhw->bhw_cache ||
1744 	    bc.bc_slab != bhw->bhw_slab ||
1745 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1746 		return (WALK_DONE);
1747 
1748 	bhw->bhw_next = bc.bc_lastlog;
1749 	bhw->bhw_timestamp = bc.bc_timestamp;
1750 
1751 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1752 }
1753 
1754 void
1755 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1756 {
1757 	bufctl_history_walk_t *bhw = wsp->walk_data;
1758 
1759 	mdb_free(bhw, sizeof (*bhw));
1760 }
1761 
1762 typedef struct kmem_log_walk {
1763 	kmem_bufctl_audit_t *klw_base;
1764 	kmem_bufctl_audit_t **klw_sorted;
1765 	kmem_log_header_t klw_lh;
1766 	size_t klw_size;
1767 	size_t klw_maxndx;
1768 	size_t klw_ndx;
1769 } kmem_log_walk_t;
1770 
1771 int
1772 kmem_log_walk_init(mdb_walk_state_t *wsp)
1773 {
1774 	uintptr_t lp = wsp->walk_addr;
1775 	kmem_log_walk_t *klw;
1776 	kmem_log_header_t *lhp;
1777 	int maxndx, i, j, k;
1778 
1779 	/*
1780 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1781 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1782 	 */
1783 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1784 		mdb_warn("failed to read 'kmem_transaction_log'");
1785 		return (WALK_ERR);
1786 	}
1787 
1788 	if (lp == NULL) {
1789 		mdb_warn("log is disabled\n");
1790 		return (WALK_ERR);
1791 	}
1792 
1793 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1794 	lhp = &klw->klw_lh;
1795 
1796 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1797 		mdb_warn("failed to read log header at %p", lp);
1798 		mdb_free(klw, sizeof (kmem_log_walk_t));
1799 		return (WALK_ERR);
1800 	}
1801 
1802 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1803 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1804 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1805 
1806 	if (mdb_vread(klw->klw_base, klw->klw_size,
1807 	    (uintptr_t)lhp->lh_base) == -1) {
1808 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1809 		mdb_free(klw->klw_base, klw->klw_size);
1810 		mdb_free(klw, sizeof (kmem_log_walk_t));
1811 		return (WALK_ERR);
1812 	}
1813 
1814 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1815 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1816 
1817 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1818 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1819 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1820 
1821 		for (j = 0; j < maxndx; j++)
1822 			klw->klw_sorted[k++] = &chunk[j];
1823 	}
1824 
1825 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1826 	    (int(*)(const void *, const void *))bufctlcmp);
1827 
1828 	klw->klw_maxndx = k;
1829 	wsp->walk_data = klw;
1830 
1831 	return (WALK_NEXT);
1832 }
1833 
1834 int
1835 kmem_log_walk_step(mdb_walk_state_t *wsp)
1836 {
1837 	kmem_log_walk_t *klw = wsp->walk_data;
1838 	kmem_bufctl_audit_t *bcp;
1839 
1840 	if (klw->klw_ndx == klw->klw_maxndx)
1841 		return (WALK_DONE);
1842 
1843 	bcp = klw->klw_sorted[klw->klw_ndx++];
1844 
1845 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1846 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1847 }
1848 
1849 void
1850 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1851 {
1852 	kmem_log_walk_t *klw = wsp->walk_data;
1853 
1854 	mdb_free(klw->klw_base, klw->klw_size);
1855 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1856 	    sizeof (kmem_bufctl_audit_t *));
1857 	mdb_free(klw, sizeof (kmem_log_walk_t));
1858 }
1859 
1860 typedef struct allocdby_bufctl {
1861 	uintptr_t abb_addr;
1862 	hrtime_t abb_ts;
1863 } allocdby_bufctl_t;
1864 
1865 typedef struct allocdby_walk {
1866 	const char *abw_walk;
1867 	uintptr_t abw_thread;
1868 	size_t abw_nbufs;
1869 	size_t abw_size;
1870 	allocdby_bufctl_t *abw_buf;
1871 	size_t abw_ndx;
1872 } allocdby_walk_t;
1873 
1874 int
1875 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1876     allocdby_walk_t *abw)
1877 {
1878 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1879 		return (WALK_NEXT);
1880 
1881 	if (abw->abw_nbufs == abw->abw_size) {
1882 		allocdby_bufctl_t *buf;
1883 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1884 
1885 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1886 
1887 		bcopy(abw->abw_buf, buf, oldsize);
1888 		mdb_free(abw->abw_buf, oldsize);
1889 
1890 		abw->abw_size <<= 1;
1891 		abw->abw_buf = buf;
1892 	}
1893 
1894 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1895 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1896 	abw->abw_nbufs++;
1897 
1898 	return (WALK_NEXT);
1899 }
1900 
1901 /*ARGSUSED*/
1902 int
1903 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1904 {
1905 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1906 	    abw, addr) == -1) {
1907 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1908 		return (WALK_DONE);
1909 	}
1910 
1911 	return (WALK_NEXT);
1912 }
1913 
1914 static int
1915 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1916 {
1917 	if (lhs->abb_ts < rhs->abb_ts)
1918 		return (1);
1919 	if (lhs->abb_ts > rhs->abb_ts)
1920 		return (-1);
1921 	return (0);
1922 }
1923 
1924 static int
1925 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1926 {
1927 	allocdby_walk_t *abw;
1928 
1929 	if (wsp->walk_addr == NULL) {
1930 		mdb_warn("allocdby walk doesn't support global walks\n");
1931 		return (WALK_ERR);
1932 	}
1933 
1934 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1935 
1936 	abw->abw_thread = wsp->walk_addr;
1937 	abw->abw_walk = walk;
1938 	abw->abw_size = 128;	/* something reasonable */
1939 	abw->abw_buf =
1940 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1941 
1942 	wsp->walk_data = abw;
1943 
1944 	if (mdb_walk("kmem_cache",
1945 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1946 		mdb_warn("couldn't walk kmem_cache");
1947 		allocdby_walk_fini(wsp);
1948 		return (WALK_ERR);
1949 	}
1950 
1951 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1952 	    (int(*)(const void *, const void *))allocdby_cmp);
1953 
1954 	return (WALK_NEXT);
1955 }
1956 
1957 int
1958 allocdby_walk_init(mdb_walk_state_t *wsp)
1959 {
1960 	return (allocdby_walk_init_common(wsp, "bufctl"));
1961 }
1962 
1963 int
1964 freedby_walk_init(mdb_walk_state_t *wsp)
1965 {
1966 	return (allocdby_walk_init_common(wsp, "freectl"));
1967 }
1968 
1969 int
1970 allocdby_walk_step(mdb_walk_state_t *wsp)
1971 {
1972 	allocdby_walk_t *abw = wsp->walk_data;
1973 	kmem_bufctl_audit_t bc;
1974 	uintptr_t addr;
1975 
1976 	if (abw->abw_ndx == abw->abw_nbufs)
1977 		return (WALK_DONE);
1978 
1979 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1980 
1981 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1982 		mdb_warn("couldn't read bufctl at %p", addr);
1983 		return (WALK_DONE);
1984 	}
1985 
1986 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1987 }
1988 
1989 void
1990 allocdby_walk_fini(mdb_walk_state_t *wsp)
1991 {
1992 	allocdby_walk_t *abw = wsp->walk_data;
1993 
1994 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1995 	mdb_free(abw, sizeof (allocdby_walk_t));
1996 }
1997 
1998 /*ARGSUSED*/
1999 int
2000 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2001 {
2002 	char c[MDB_SYM_NAMLEN];
2003 	GElf_Sym sym;
2004 	int i;
2005 
2006 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2007 	for (i = 0; i < bcp->bc_depth; i++) {
2008 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2009 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2010 			continue;
2011 		if (strncmp(c, "kmem_", 5) == 0)
2012 			continue;
2013 		mdb_printf("%s+0x%lx",
2014 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2015 		break;
2016 	}
2017 	mdb_printf("\n");
2018 
2019 	return (WALK_NEXT);
2020 }
2021 
2022 static int
2023 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2024 {
2025 	if (!(flags & DCMD_ADDRSPEC))
2026 		return (DCMD_USAGE);
2027 
2028 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2029 
2030 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2031 		mdb_warn("can't walk '%s' for %p", w, addr);
2032 		return (DCMD_ERR);
2033 	}
2034 
2035 	return (DCMD_OK);
2036 }
2037 
2038 /*ARGSUSED*/
2039 int
2040 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2041 {
2042 	return (allocdby_common(addr, flags, "allocdby"));
2043 }
2044 
2045 /*ARGSUSED*/
2046 int
2047 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2048 {
2049 	return (allocdby_common(addr, flags, "freedby"));
2050 }
2051 
2052 /*
2053  * Return a string describing the address in relation to the given thread's
2054  * stack.
2055  *
2056  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2057  *
2058  * - If the address is above the stack pointer, return an empty string
2059  *   signifying that the address is active.
2060  *
2061  * - If the address is below the stack pointer, and the thread is not on proc,
2062  *   return " (below sp)".
2063  *
2064  * - If the address is below the stack pointer, and the thread is on proc,
2065  *   return " (possibly below sp)".  Depending on context, we may or may not
2066  *   have an accurate t_sp.
2067  */
2068 static const char *
2069 stack_active(const kthread_t *t, uintptr_t addr)
2070 {
2071 	uintptr_t panicstk;
2072 	GElf_Sym sym;
2073 
2074 	if (t->t_state == TS_FREE)
2075 		return (" (inactive interrupt thread)");
2076 
2077 	/*
2078 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2079 	 * no longer relates to the thread's real stack.
2080 	 */
2081 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2082 		panicstk = (uintptr_t)sym.st_value;
2083 
2084 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2085 			return ("");
2086 	}
2087 
2088 	if (addr >= t->t_sp + STACK_BIAS)
2089 		return ("");
2090 
2091 	if (t->t_state == TS_ONPROC)
2092 		return (" (possibly below sp)");
2093 
2094 	return (" (below sp)");
2095 }
2096 
2097 typedef struct whatis {
2098 	uintptr_t w_addr;
2099 	const kmem_cache_t *w_cache;
2100 	const vmem_t *w_vmem;
2101 	size_t w_slab_align;
2102 	int w_slab_found;
2103 	int w_found;
2104 	int w_kmem_lite_count;
2105 	uint_t w_verbose;
2106 	uint_t w_freemem;
2107 	uint_t w_all;
2108 	uint_t w_bufctl;
2109 	uint_t w_idspace;
2110 } whatis_t;
2111 
2112 static void
2113 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
2114 {
2115 	/* LINTED pointer cast may result in improper alignment */
2116 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr);
2117 	intptr_t stat;
2118 	int count = 0;
2119 	int i;
2120 	pc_t callers[16];
2121 
2122 	if (w->w_cache->cache_flags & KMF_REDZONE) {
2123 		kmem_buftag_t bt;
2124 
2125 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2126 			goto done;
2127 
2128 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2129 
2130 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2131 			goto done;
2132 
2133 		/*
2134 		 * provide the bufctl ptr if it has useful information
2135 		 */
2136 		if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT))
2137 			baddr = (uintptr_t)bt.bt_bufctl;
2138 
2139 		if (w->w_cache->cache_flags & KMF_LITE) {
2140 			count = w->w_kmem_lite_count;
2141 
2142 			if (count * sizeof (pc_t) > sizeof (callers))
2143 				count = 0;
2144 
2145 			if (count > 0 &&
2146 			    mdb_vread(callers, count * sizeof (pc_t),
2147 			    btaddr +
2148 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2149 				count = 0;
2150 
2151 			/*
2152 			 * skip unused callers
2153 			 */
2154 			while (count > 0 && callers[count - 1] ==
2155 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
2156 				count--;
2157 		}
2158 	}
2159 
2160 done:
2161 	if (baddr == 0)
2162 		mdb_printf("%p is %p+%p, %s from %s\n",
2163 		    w->w_addr, addr, w->w_addr - addr,
2164 		    w->w_freemem == FALSE ? "allocated" : "freed",
2165 		    w->w_cache->cache_name);
2166 	else
2167 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
2168 		    w->w_addr, addr, w->w_addr - addr, baddr,
2169 		    w->w_freemem == FALSE ? "allocated" : "freed",
2170 		    w->w_cache->cache_name);
2171 
2172 	if (count > 0) {
2173 		mdb_inc_indent(8);
2174 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
2175 		    callers[0], (count != 1)? ", ":"\n");
2176 		for (i = 1; i < count; i++)
2177 			mdb_printf("%a%s", callers[i],
2178 			    (i + 1 < count)? ", ":"\n");
2179 		mdb_dec_indent(8);
2180 	}
2181 }
2182 
2183 /*ARGSUSED*/
2184 static int
2185 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
2186 {
2187 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2188 		return (WALK_NEXT);
2189 
2190 	whatis_print_kmem(addr, 0, w);
2191 	w->w_found++;
2192 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2193 }
2194 
2195 static int
2196 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
2197 {
2198 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
2199 		return (WALK_NEXT);
2200 
2201 	mdb_printf("%p is %p+%p ", w->w_addr,
2202 	    vs->vs_start, w->w_addr - vs->vs_start);
2203 
2204 	/*
2205 	 * Always provide the vmem_seg pointer if it has a stack trace.
2206 	 */
2207 	if (w->w_bufctl == TRUE ||
2208 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
2209 		mdb_printf("(vmem_seg %p) ", addr);
2210 	}
2211 
2212 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
2213 	    "freed " : "", w->w_vmem->vm_name);
2214 
2215 	w->w_found++;
2216 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2217 }
2218 
2219 static int
2220 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
2221 {
2222 	const char *nm = vmem->vm_name;
2223 	w->w_vmem = vmem;
2224 	w->w_freemem = FALSE;
2225 
2226 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2227 		return (WALK_NEXT);
2228 
2229 	if (w->w_verbose)
2230 		mdb_printf("Searching vmem arena %s...\n", nm);
2231 
2232 	if (mdb_pwalk("vmem_alloc",
2233 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2234 		mdb_warn("can't walk vmem seg for %p", addr);
2235 		return (WALK_NEXT);
2236 	}
2237 
2238 	if (w->w_found && w->w_all == FALSE)
2239 		return (WALK_DONE);
2240 
2241 	if (w->w_verbose)
2242 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
2243 
2244 	w->w_freemem = TRUE;
2245 
2246 	if (mdb_pwalk("vmem_free",
2247 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2248 		mdb_warn("can't walk vmem seg for %p", addr);
2249 		return (WALK_NEXT);
2250 	}
2251 
2252 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2253 }
2254 
2255 /*ARGSUSED*/
2256 static int
2257 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
2258 {
2259 	uintptr_t addr;
2260 
2261 	if (bcp == NULL)
2262 		return (WALK_NEXT);
2263 
2264 	addr = (uintptr_t)bcp->bc_addr;
2265 
2266 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2267 		return (WALK_NEXT);
2268 
2269 	whatis_print_kmem(addr, baddr, w);
2270 	w->w_found++;
2271 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2272 }
2273 
2274 /*ARGSUSED*/
2275 static int
2276 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
2277 {
2278 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
2279 
2280 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
2281 		return (WALK_NEXT);
2282 
2283 	w->w_slab_found++;
2284 	return (WALK_DONE);
2285 }
2286 
2287 static int
2288 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2289 {
2290 	char *walk, *freewalk;
2291 	mdb_walk_cb_t func;
2292 	vmem_t *vmp = c->cache_arena;
2293 
2294 	if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2295 		return (WALK_NEXT);
2296 
2297 	if (w->w_bufctl == FALSE) {
2298 		walk = "kmem";
2299 		freewalk = "freemem";
2300 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2301 	} else {
2302 		walk = "bufctl";
2303 		freewalk = "freectl";
2304 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2305 	}
2306 
2307 	w->w_cache = c;
2308 
2309 	if (w->w_verbose)
2310 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
2311 
2312 	/*
2313 	 * Verify that the address is in one of the cache's slabs.  If not,
2314 	 * we can skip the more expensive walkers.  (this is purely a
2315 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
2316 	 *
2317 	 * We try to get the cache's arena's quantum, since to accurately
2318 	 * get the base of a slab, you have to align it to the quantum.  If
2319 	 * it doesn't look sensible, we fall back to not aligning.
2320 	 */
2321 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
2322 	    (uintptr_t)&vmp->vm_quantum) == -1) {
2323 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
2324 		w->w_slab_align = 1;
2325 	}
2326 
2327 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
2328 	    (w->w_slab_align & (w->w_slab_align - 1))) {
2329 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
2330 		    w->w_slab_align);
2331 		w->w_slab_align = 1;
2332 	}
2333 
2334 	w->w_slab_found = 0;
2335 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
2336 	    addr) == -1) {
2337 		mdb_warn("can't find kmem_slab walker");
2338 		return (WALK_DONE);
2339 	}
2340 	if (w->w_slab_found == 0)
2341 		return (WALK_NEXT);
2342 
2343 	if (c->cache_flags & KMF_LITE) {
2344 		if (mdb_readvar(&w->w_kmem_lite_count,
2345 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
2346 			w->w_kmem_lite_count = 0;
2347 	}
2348 
2349 	if (w->w_verbose)
2350 		mdb_printf("Searching %s...\n", c->cache_name);
2351 
2352 	w->w_freemem = FALSE;
2353 
2354 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2355 		mdb_warn("can't find %s walker", walk);
2356 		return (WALK_DONE);
2357 	}
2358 
2359 	if (w->w_found && w->w_all == FALSE)
2360 		return (WALK_DONE);
2361 
2362 	/*
2363 	 * We have searched for allocated memory; now search for freed memory.
2364 	 */
2365 	if (w->w_verbose)
2366 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2367 
2368 	w->w_freemem = TRUE;
2369 
2370 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2371 		mdb_warn("can't find %s walker", freewalk);
2372 		return (WALK_DONE);
2373 	}
2374 
2375 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2376 }
2377 
2378 static int
2379 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2380 {
2381 	if (c->cache_cflags & KMC_NOTOUCH)
2382 		return (WALK_NEXT);
2383 
2384 	return (whatis_walk_cache(addr, c, w));
2385 }
2386 
2387 static int
2388 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2389 {
2390 	if (!(c->cache_cflags & KMC_NOTOUCH))
2391 		return (WALK_NEXT);
2392 
2393 	return (whatis_walk_cache(addr, c, w));
2394 }
2395 
2396 static int
2397 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
2398 {
2399 	/*
2400 	 * Often, one calls ::whatis on an address from a thread structure.
2401 	 * We use this opportunity to short circuit this case...
2402 	 */
2403 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
2404 		mdb_printf("%p is %p+%p, allocated as a thread structure\n",
2405 		    w->w_addr, addr, w->w_addr - addr);
2406 		w->w_found++;
2407 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2408 	}
2409 
2410 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
2411 	    w->w_addr > (uintptr_t)t->t_stk)
2412 		return (WALK_NEXT);
2413 
2414 	if (t->t_stkbase == NULL)
2415 		return (WALK_NEXT);
2416 
2417 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
2418 	    stack_active(t, w->w_addr));
2419 
2420 	w->w_found++;
2421 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2422 }
2423 
2424 static int
2425 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
2426 {
2427 	struct module mod;
2428 	char name[MODMAXNAMELEN], *where;
2429 	char c[MDB_SYM_NAMLEN];
2430 	Shdr shdr;
2431 	GElf_Sym sym;
2432 
2433 	if (m->mod_mp == NULL)
2434 		return (WALK_NEXT);
2435 
2436 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2437 		mdb_warn("couldn't read modctl %p's module", addr);
2438 		return (WALK_NEXT);
2439 	}
2440 
2441 	if (w->w_addr >= (uintptr_t)mod.text &&
2442 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
2443 		where = "text segment";
2444 		goto found;
2445 	}
2446 
2447 	if (w->w_addr >= (uintptr_t)mod.data &&
2448 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
2449 		where = "data segment";
2450 		goto found;
2451 	}
2452 
2453 	if (w->w_addr >= (uintptr_t)mod.bss &&
2454 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
2455 		where = "bss";
2456 		goto found;
2457 	}
2458 
2459 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2460 		mdb_warn("couldn't read symbol header for %p's module", addr);
2461 		return (WALK_NEXT);
2462 	}
2463 
2464 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
2465 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
2466 		where = "symtab";
2467 		goto found;
2468 	}
2469 
2470 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2471 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2472 		where = "symspace";
2473 		goto found;
2474 	}
2475 
2476 	return (WALK_NEXT);
2477 
2478 found:
2479 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2480 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2481 
2482 	mdb_printf("%p is ", w->w_addr);
2483 
2484 	/*
2485 	 * If we found this address in a module, then there's a chance that
2486 	 * it's actually a named symbol.  Try the symbol lookup.
2487 	 */
2488 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c),
2489 	    &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value &&
2490 	    w->w_addr < (uintptr_t)sym.st_value + sym.st_size) {
2491 		mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value);
2492 	}
2493 
2494 	mdb_printf("in %s's %s\n", name, where);
2495 
2496 	w->w_found++;
2497 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2498 }
2499 
2500 /*ARGSUSED*/
2501 static int
2502 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2503 {
2504 	static int machsize = 0;
2505 	mdb_ctf_id_t id;
2506 
2507 	if (machsize == 0) {
2508 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2509 			machsize = mdb_ctf_type_size(id);
2510 		else {
2511 			mdb_warn("could not get size of page_t");
2512 			machsize = sizeof (page_t);
2513 		}
2514 	}
2515 
2516 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2517 		return (WALK_NEXT);
2518 
2519 	mdb_printf("%p is %p+%p, allocated as a page structure\n",
2520 	    w->w_addr, addr, w->w_addr - addr);
2521 
2522 	w->w_found++;
2523 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2524 }
2525 
2526 int
2527 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2528 {
2529 	whatis_t w;
2530 
2531 	if (!(flags & DCMD_ADDRSPEC))
2532 		return (DCMD_USAGE);
2533 
2534 	w.w_verbose = FALSE;
2535 	w.w_bufctl = FALSE;
2536 	w.w_all = FALSE;
2537 	w.w_idspace = FALSE;
2538 
2539 	if (mdb_getopts(argc, argv,
2540 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2541 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2542 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2543 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2544 		return (DCMD_USAGE);
2545 
2546 	w.w_addr = addr;
2547 	w.w_found = 0;
2548 
2549 	if (w.w_verbose)
2550 		mdb_printf("Searching modules...\n");
2551 
2552 	if (!w.w_idspace) {
2553 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2554 		    == -1) {
2555 			mdb_warn("couldn't find modctl walker");
2556 			return (DCMD_ERR);
2557 		}
2558 
2559 		if (w.w_found && w.w_all == FALSE)
2560 			return (DCMD_OK);
2561 
2562 		/*
2563 		 * Now search all thread stacks.  Yes, this is a little weak; we
2564 		 * can save a lot of work by first checking to see if the
2565 		 * address is in segkp vs. segkmem.  But hey, computers are
2566 		 * fast.
2567 		 */
2568 		if (w.w_verbose)
2569 			mdb_printf("Searching threads...\n");
2570 
2571 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2572 		    == -1) {
2573 			mdb_warn("couldn't find thread walker");
2574 			return (DCMD_ERR);
2575 		}
2576 
2577 		if (w.w_found && w.w_all == FALSE)
2578 			return (DCMD_OK);
2579 
2580 		if (w.w_verbose)
2581 			mdb_printf("Searching page structures...\n");
2582 
2583 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2584 		    == -1) {
2585 			mdb_warn("couldn't find page walker");
2586 			return (DCMD_ERR);
2587 		}
2588 
2589 		if (w.w_found && w.w_all == FALSE)
2590 			return (DCMD_OK);
2591 	}
2592 
2593 	if (mdb_walk("kmem_cache",
2594 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2595 		mdb_warn("couldn't find kmem_cache walker");
2596 		return (DCMD_ERR);
2597 	}
2598 
2599 	if (w.w_found && w.w_all == FALSE)
2600 		return (DCMD_OK);
2601 
2602 	if (mdb_walk("kmem_cache",
2603 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2604 		mdb_warn("couldn't find kmem_cache walker");
2605 		return (DCMD_ERR);
2606 	}
2607 
2608 	if (w.w_found && w.w_all == FALSE)
2609 		return (DCMD_OK);
2610 
2611 	if (mdb_walk("vmem_postfix",
2612 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2613 		mdb_warn("couldn't find vmem_postfix walker");
2614 		return (DCMD_ERR);
2615 	}
2616 
2617 	if (w.w_found == 0)
2618 		mdb_printf("%p is unknown\n", addr);
2619 
2620 	return (DCMD_OK);
2621 }
2622 
2623 void
2624 whatis_help(void)
2625 {
2626 	mdb_printf(
2627 	    "Given a virtual address, attempt to determine where it came\n"
2628 	    "from.\n"
2629 	    "\n"
2630 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2631 	    "\t\tsearched\n"
2632 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2633 	    "\t\tthe first (most specific) source.\n"
2634 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2635 	    "\t\tthese are ignored.\n"
2636 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2637 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2638 	    "\t\thave a bufctl, it will not be reported.\n");
2639 }
2640 
2641 typedef struct kmem_log_cpu {
2642 	uintptr_t kmc_low;
2643 	uintptr_t kmc_high;
2644 } kmem_log_cpu_t;
2645 
2646 typedef struct kmem_log_data {
2647 	uintptr_t kmd_addr;
2648 	kmem_log_cpu_t *kmd_cpu;
2649 } kmem_log_data_t;
2650 
2651 int
2652 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2653     kmem_log_data_t *kmd)
2654 {
2655 	int i;
2656 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2657 	size_t bufsize;
2658 
2659 	for (i = 0; i < NCPU; i++) {
2660 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2661 			break;
2662 	}
2663 
2664 	if (kmd->kmd_addr) {
2665 		if (b->bc_cache == NULL)
2666 			return (WALK_NEXT);
2667 
2668 		if (mdb_vread(&bufsize, sizeof (bufsize),
2669 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2670 			mdb_warn(
2671 			    "failed to read cache_bufsize for cache at %p",
2672 			    b->bc_cache);
2673 			return (WALK_ERR);
2674 		}
2675 
2676 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2677 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2678 			return (WALK_NEXT);
2679 	}
2680 
2681 	if (i == NCPU)
2682 		mdb_printf("   ");
2683 	else
2684 		mdb_printf("%3d", i);
2685 
2686 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2687 	    b->bc_timestamp, b->bc_thread);
2688 
2689 	return (WALK_NEXT);
2690 }
2691 
2692 /*ARGSUSED*/
2693 int
2694 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2695 {
2696 	kmem_log_header_t lh;
2697 	kmem_cpu_log_header_t clh;
2698 	uintptr_t lhp, clhp;
2699 	int ncpus;
2700 	uintptr_t *cpu;
2701 	GElf_Sym sym;
2702 	kmem_log_cpu_t *kmc;
2703 	int i;
2704 	kmem_log_data_t kmd;
2705 	uint_t opt_b = FALSE;
2706 
2707 	if (mdb_getopts(argc, argv,
2708 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2709 		return (DCMD_USAGE);
2710 
2711 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2712 		mdb_warn("failed to read 'kmem_transaction_log'");
2713 		return (DCMD_ERR);
2714 	}
2715 
2716 	if (lhp == NULL) {
2717 		mdb_warn("no kmem transaction log\n");
2718 		return (DCMD_ERR);
2719 	}
2720 
2721 	mdb_readvar(&ncpus, "ncpus");
2722 
2723 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2724 		mdb_warn("failed to read log header at %p", lhp);
2725 		return (DCMD_ERR);
2726 	}
2727 
2728 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2729 
2730 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2731 
2732 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2733 		mdb_warn("couldn't find 'cpu' array");
2734 		return (DCMD_ERR);
2735 	}
2736 
2737 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2738 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2739 		    NCPU * sizeof (uintptr_t), sym.st_size);
2740 		return (DCMD_ERR);
2741 	}
2742 
2743 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2744 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2745 		return (DCMD_ERR);
2746 	}
2747 
2748 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2749 	kmd.kmd_addr = NULL;
2750 	kmd.kmd_cpu = kmc;
2751 
2752 	for (i = 0; i < NCPU; i++) {
2753 
2754 		if (cpu[i] == NULL)
2755 			continue;
2756 
2757 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2758 			mdb_warn("cannot read cpu %d's log header at %p",
2759 			    i, clhp);
2760 			return (DCMD_ERR);
2761 		}
2762 
2763 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2764 		    (uintptr_t)lh.lh_base;
2765 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2766 
2767 		clhp += sizeof (kmem_cpu_log_header_t);
2768 	}
2769 
2770 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2771 	    "TIMESTAMP", "THREAD");
2772 
2773 	/*
2774 	 * If we have been passed an address, print out only log entries
2775 	 * corresponding to that address.  If opt_b is specified, then interpret
2776 	 * the address as a bufctl.
2777 	 */
2778 	if (flags & DCMD_ADDRSPEC) {
2779 		kmem_bufctl_audit_t b;
2780 
2781 		if (opt_b) {
2782 			kmd.kmd_addr = addr;
2783 		} else {
2784 			if (mdb_vread(&b,
2785 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2786 				mdb_warn("failed to read bufctl at %p", addr);
2787 				return (DCMD_ERR);
2788 			}
2789 
2790 			(void) kmem_log_walk(addr, &b, &kmd);
2791 
2792 			return (DCMD_OK);
2793 		}
2794 	}
2795 
2796 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2797 		mdb_warn("can't find kmem log walker");
2798 		return (DCMD_ERR);
2799 	}
2800 
2801 	return (DCMD_OK);
2802 }
2803 
2804 typedef struct bufctl_history_cb {
2805 	int		bhc_flags;
2806 	int		bhc_argc;
2807 	const mdb_arg_t	*bhc_argv;
2808 	int		bhc_ret;
2809 } bufctl_history_cb_t;
2810 
2811 /*ARGSUSED*/
2812 static int
2813 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2814 {
2815 	bufctl_history_cb_t *bhc = arg;
2816 
2817 	bhc->bhc_ret =
2818 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2819 
2820 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2821 
2822 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2823 }
2824 
2825 void
2826 bufctl_help(void)
2827 {
2828 	mdb_printf("%s",
2829 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2830 	mdb_dec_indent(2);
2831 	mdb_printf("%<b>OPTIONS%</b>\n");
2832 	mdb_inc_indent(2);
2833 	mdb_printf("%s",
2834 "  -v    Display the full content of the bufctl, including its stack trace\n"
2835 "  -h    retrieve the bufctl's transaction history, if available\n"
2836 "  -a addr\n"
2837 "        filter out bufctls not involving the buffer at addr\n"
2838 "  -c caller\n"
2839 "        filter out bufctls without the function/PC in their stack trace\n"
2840 "  -e earliest\n"
2841 "        filter out bufctls timestamped before earliest\n"
2842 "  -l latest\n"
2843 "        filter out bufctls timestamped after latest\n"
2844 "  -t thread\n"
2845 "        filter out bufctls not involving thread\n");
2846 }
2847 
2848 int
2849 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2850 {
2851 	kmem_bufctl_audit_t bc;
2852 	uint_t verbose = FALSE;
2853 	uint_t history = FALSE;
2854 	uint_t in_history = FALSE;
2855 	uintptr_t caller = NULL, thread = NULL;
2856 	uintptr_t laddr, haddr, baddr = NULL;
2857 	hrtime_t earliest = 0, latest = 0;
2858 	int i, depth;
2859 	char c[MDB_SYM_NAMLEN];
2860 	GElf_Sym sym;
2861 
2862 	if (mdb_getopts(argc, argv,
2863 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2864 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2865 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2866 	    'c', MDB_OPT_UINTPTR, &caller,
2867 	    't', MDB_OPT_UINTPTR, &thread,
2868 	    'e', MDB_OPT_UINT64, &earliest,
2869 	    'l', MDB_OPT_UINT64, &latest,
2870 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2871 		return (DCMD_USAGE);
2872 
2873 	if (!(flags & DCMD_ADDRSPEC))
2874 		return (DCMD_USAGE);
2875 
2876 	if (in_history && !history)
2877 		return (DCMD_USAGE);
2878 
2879 	if (history && !in_history) {
2880 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2881 		    UM_SLEEP | UM_GC);
2882 		bufctl_history_cb_t bhc;
2883 
2884 		nargv[0].a_type = MDB_TYPE_STRING;
2885 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2886 
2887 		for (i = 0; i < argc; i++)
2888 			nargv[i + 1] = argv[i];
2889 
2890 		/*
2891 		 * When in history mode, we treat each element as if it
2892 		 * were in a seperate loop, so that the headers group
2893 		 * bufctls with similar histories.
2894 		 */
2895 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2896 		bhc.bhc_argc = argc + 1;
2897 		bhc.bhc_argv = nargv;
2898 		bhc.bhc_ret = DCMD_OK;
2899 
2900 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2901 		    addr) == -1) {
2902 			mdb_warn("unable to walk bufctl_history");
2903 			return (DCMD_ERR);
2904 		}
2905 
2906 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2907 			mdb_printf("\n");
2908 
2909 		return (bhc.bhc_ret);
2910 	}
2911 
2912 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2913 		if (verbose) {
2914 			mdb_printf("%16s %16s %16s %16s\n"
2915 			    "%<u>%16s %16s %16s %16s%</u>\n",
2916 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2917 			    "", "CACHE", "LASTLOG", "CONTENTS");
2918 		} else {
2919 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2920 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2921 		}
2922 	}
2923 
2924 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2925 		mdb_warn("couldn't read bufctl at %p", addr);
2926 		return (DCMD_ERR);
2927 	}
2928 
2929 	/*
2930 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2931 	 * the address does not really refer to a bufctl.
2932 	 */
2933 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2934 
2935 	if (caller != NULL) {
2936 		laddr = caller;
2937 		haddr = caller + sizeof (caller);
2938 
2939 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2940 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2941 			/*
2942 			 * We were provided an exact symbol value; any
2943 			 * address in the function is valid.
2944 			 */
2945 			laddr = (uintptr_t)sym.st_value;
2946 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2947 		}
2948 
2949 		for (i = 0; i < depth; i++)
2950 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2951 				break;
2952 
2953 		if (i == depth)
2954 			return (DCMD_OK);
2955 	}
2956 
2957 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2958 		return (DCMD_OK);
2959 
2960 	if (earliest != 0 && bc.bc_timestamp < earliest)
2961 		return (DCMD_OK);
2962 
2963 	if (latest != 0 && bc.bc_timestamp > latest)
2964 		return (DCMD_OK);
2965 
2966 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2967 		return (DCMD_OK);
2968 
2969 	if (flags & DCMD_PIPE_OUT) {
2970 		mdb_printf("%#lr\n", addr);
2971 		return (DCMD_OK);
2972 	}
2973 
2974 	if (verbose) {
2975 		mdb_printf(
2976 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2977 		    "%16s %16p %16p %16p\n",
2978 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2979 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2980 
2981 		mdb_inc_indent(17);
2982 		for (i = 0; i < depth; i++)
2983 			mdb_printf("%a\n", bc.bc_stack[i]);
2984 		mdb_dec_indent(17);
2985 		mdb_printf("\n");
2986 	} else {
2987 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2988 		    bc.bc_timestamp, bc.bc_thread);
2989 
2990 		for (i = 0; i < depth; i++) {
2991 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2992 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2993 				continue;
2994 			if (strncmp(c, "kmem_", 5) == 0)
2995 				continue;
2996 			mdb_printf(" %a\n", bc.bc_stack[i]);
2997 			break;
2998 		}
2999 
3000 		if (i >= depth)
3001 			mdb_printf("\n");
3002 	}
3003 
3004 	return (DCMD_OK);
3005 }
3006 
3007 typedef struct kmem_verify {
3008 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
3009 	size_t kmv_size;		/* number of bytes in kmv_buf */
3010 	int kmv_corruption;		/* > 0 if corruption found. */
3011 	int kmv_besilent;		/* report actual corruption sites */
3012 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
3013 } kmem_verify_t;
3014 
3015 /*
3016  * verify_pattern()
3017  * 	verify that buf is filled with the pattern pat.
3018  */
3019 static int64_t
3020 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3021 {
3022 	/*LINTED*/
3023 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3024 	uint64_t *buf;
3025 
3026 	for (buf = buf_arg; buf < bufend; buf++)
3027 		if (*buf != pat)
3028 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3029 	return (-1);
3030 }
3031 
3032 /*
3033  * verify_buftag()
3034  *	verify that btp->bt_bxstat == (bcp ^ pat)
3035  */
3036 static int
3037 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3038 {
3039 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3040 }
3041 
3042 /*
3043  * verify_free()
3044  * 	verify the integrity of a free block of memory by checking
3045  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3046  */
3047 /*ARGSUSED1*/
3048 static int
3049 verify_free(uintptr_t addr, const void *data, void *private)
3050 {
3051 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3052 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3053 	int64_t corrupt;		/* corruption offset */
3054 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3055 	kmem_cache_t *cp = &kmv->kmv_cache;
3056 	int besilent = kmv->kmv_besilent;
3057 
3058 	/*LINTED*/
3059 	buftagp = KMEM_BUFTAG(cp, buf);
3060 
3061 	/*
3062 	 * Read the buffer to check.
3063 	 */
3064 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3065 		if (!besilent)
3066 			mdb_warn("couldn't read %p", addr);
3067 		return (WALK_NEXT);
3068 	}
3069 
3070 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3071 	    KMEM_FREE_PATTERN)) >= 0) {
3072 		if (!besilent)
3073 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3074 			    addr, (uintptr_t)addr + corrupt);
3075 		goto corrupt;
3076 	}
3077 	/*
3078 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3079 	 * the first bytes of the buffer, hence we cannot check for red
3080 	 * zone corruption.
3081 	 */
3082 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3083 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3084 		if (!besilent)
3085 			mdb_printf("buffer %p (free) seems to "
3086 			    "have a corrupt redzone pattern\n", addr);
3087 		goto corrupt;
3088 	}
3089 
3090 	/*
3091 	 * confirm bufctl pointer integrity.
3092 	 */
3093 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3094 		if (!besilent)
3095 			mdb_printf("buffer %p (free) has a corrupt "
3096 			    "buftag\n", addr);
3097 		goto corrupt;
3098 	}
3099 
3100 	return (WALK_NEXT);
3101 corrupt:
3102 	kmv->kmv_corruption++;
3103 	return (WALK_NEXT);
3104 }
3105 
3106 /*
3107  * verify_alloc()
3108  * 	Verify that the buftag of an allocated buffer makes sense with respect
3109  * 	to the buffer.
3110  */
3111 /*ARGSUSED1*/
3112 static int
3113 verify_alloc(uintptr_t addr, const void *data, void *private)
3114 {
3115 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3116 	kmem_cache_t *cp = &kmv->kmv_cache;
3117 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3118 	/*LINTED*/
3119 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3120 	uint32_t *ip = (uint32_t *)buftagp;
3121 	uint8_t *bp = (uint8_t *)buf;
3122 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3123 	int besilent = kmv->kmv_besilent;
3124 
3125 	/*
3126 	 * Read the buffer to check.
3127 	 */
3128 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3129 		if (!besilent)
3130 			mdb_warn("couldn't read %p", addr);
3131 		return (WALK_NEXT);
3132 	}
3133 
3134 	/*
3135 	 * There are two cases to handle:
3136 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3137 	 *    0xfeedfacefeedface at the end of it
3138 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3139 	 *    0xbb just past the end of the region in use.  At the buftag,
3140 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3141 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3142 	 *    endianness), followed by 32 bits containing the offset of the
3143 	 *    0xbb byte in the buffer.
3144 	 *
3145 	 * Finally, the two 32-bit words that comprise the second half of the
3146 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3147 	 */
3148 
3149 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3150 		looks_ok = 1;
3151 	else if (!KMEM_SIZE_VALID(ip[1]))
3152 		size_ok = 0;
3153 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3154 		looks_ok = 1;
3155 	else
3156 		size_ok = 0;
3157 
3158 	if (!size_ok) {
3159 		if (!besilent)
3160 			mdb_printf("buffer %p (allocated) has a corrupt "
3161 			    "redzone size encoding\n", addr);
3162 		goto corrupt;
3163 	}
3164 
3165 	if (!looks_ok) {
3166 		if (!besilent)
3167 			mdb_printf("buffer %p (allocated) has a corrupt "
3168 			    "redzone signature\n", addr);
3169 		goto corrupt;
3170 	}
3171 
3172 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3173 		if (!besilent)
3174 			mdb_printf("buffer %p (allocated) has a "
3175 			    "corrupt buftag\n", addr);
3176 		goto corrupt;
3177 	}
3178 
3179 	return (WALK_NEXT);
3180 corrupt:
3181 	kmv->kmv_corruption++;
3182 	return (WALK_NEXT);
3183 }
3184 
3185 /*ARGSUSED2*/
3186 int
3187 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3188 {
3189 	if (flags & DCMD_ADDRSPEC) {
3190 		int check_alloc = 0, check_free = 0;
3191 		kmem_verify_t kmv;
3192 
3193 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3194 		    addr) == -1) {
3195 			mdb_warn("couldn't read kmem_cache %p", addr);
3196 			return (DCMD_ERR);
3197 		}
3198 
3199 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3200 		    sizeof (kmem_buftag_t);
3201 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3202 		kmv.kmv_corruption = 0;
3203 
3204 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3205 			check_alloc = 1;
3206 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3207 				check_free = 1;
3208 		} else {
3209 			if (!(flags & DCMD_LOOP)) {
3210 				mdb_warn("cache %p (%s) does not have "
3211 				    "redzone checking enabled\n", addr,
3212 				    kmv.kmv_cache.cache_name);
3213 			}
3214 			return (DCMD_ERR);
3215 		}
3216 
3217 		if (flags & DCMD_LOOP) {
3218 			/*
3219 			 * table mode, don't print out every corrupt buffer
3220 			 */
3221 			kmv.kmv_besilent = 1;
3222 		} else {
3223 			mdb_printf("Summary for cache '%s'\n",
3224 			    kmv.kmv_cache.cache_name);
3225 			mdb_inc_indent(2);
3226 			kmv.kmv_besilent = 0;
3227 		}
3228 
3229 		if (check_alloc)
3230 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3231 		if (check_free)
3232 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3233 
3234 		if (flags & DCMD_LOOP) {
3235 			if (kmv.kmv_corruption == 0) {
3236 				mdb_printf("%-*s %?p clean\n",
3237 				    KMEM_CACHE_NAMELEN,
3238 				    kmv.kmv_cache.cache_name, addr);
3239 			} else {
3240 				char *s = "";	/* optional s in "buffer[s]" */
3241 				if (kmv.kmv_corruption > 1)
3242 					s = "s";
3243 
3244 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3245 				    KMEM_CACHE_NAMELEN,
3246 				    kmv.kmv_cache.cache_name, addr,
3247 				    kmv.kmv_corruption, s);
3248 			}
3249 		} else {
3250 			/*
3251 			 * This is the more verbose mode, when the user has
3252 			 * type addr::kmem_verify.  If the cache was clean,
3253 			 * nothing will have yet been printed. So say something.
3254 			 */
3255 			if (kmv.kmv_corruption == 0)
3256 				mdb_printf("clean\n");
3257 
3258 			mdb_dec_indent(2);
3259 		}
3260 	} else {
3261 		/*
3262 		 * If the user didn't specify a cache to verify, we'll walk all
3263 		 * kmem_cache's, specifying ourself as a callback for each...
3264 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3265 		 */
3266 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3267 		    "Cache Name", "Addr", "Cache Integrity");
3268 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3269 	}
3270 
3271 	return (DCMD_OK);
3272 }
3273 
3274 typedef struct vmem_node {
3275 	struct vmem_node *vn_next;
3276 	struct vmem_node *vn_parent;
3277 	struct vmem_node *vn_sibling;
3278 	struct vmem_node *vn_children;
3279 	uintptr_t vn_addr;
3280 	int vn_marked;
3281 	vmem_t vn_vmem;
3282 } vmem_node_t;
3283 
3284 typedef struct vmem_walk {
3285 	vmem_node_t *vw_root;
3286 	vmem_node_t *vw_current;
3287 } vmem_walk_t;
3288 
3289 int
3290 vmem_walk_init(mdb_walk_state_t *wsp)
3291 {
3292 	uintptr_t vaddr, paddr;
3293 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3294 	vmem_walk_t *vw;
3295 
3296 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3297 		mdb_warn("couldn't read 'vmem_list'");
3298 		return (WALK_ERR);
3299 	}
3300 
3301 	while (vaddr != NULL) {
3302 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3303 		vp->vn_addr = vaddr;
3304 		vp->vn_next = head;
3305 		head = vp;
3306 
3307 		if (vaddr == wsp->walk_addr)
3308 			current = vp;
3309 
3310 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3311 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3312 			goto err;
3313 		}
3314 
3315 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3316 	}
3317 
3318 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3319 
3320 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3321 			vp->vn_sibling = root;
3322 			root = vp;
3323 			continue;
3324 		}
3325 
3326 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3327 			if (parent->vn_addr != paddr)
3328 				continue;
3329 			vp->vn_sibling = parent->vn_children;
3330 			parent->vn_children = vp;
3331 			vp->vn_parent = parent;
3332 			break;
3333 		}
3334 
3335 		if (parent == NULL) {
3336 			mdb_warn("couldn't find %p's parent (%p)\n",
3337 			    vp->vn_addr, paddr);
3338 			goto err;
3339 		}
3340 	}
3341 
3342 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3343 	vw->vw_root = root;
3344 
3345 	if (current != NULL)
3346 		vw->vw_current = current;
3347 	else
3348 		vw->vw_current = root;
3349 
3350 	wsp->walk_data = vw;
3351 	return (WALK_NEXT);
3352 err:
3353 	for (vp = head; head != NULL; vp = head) {
3354 		head = vp->vn_next;
3355 		mdb_free(vp, sizeof (vmem_node_t));
3356 	}
3357 
3358 	return (WALK_ERR);
3359 }
3360 
3361 int
3362 vmem_walk_step(mdb_walk_state_t *wsp)
3363 {
3364 	vmem_walk_t *vw = wsp->walk_data;
3365 	vmem_node_t *vp;
3366 	int rval;
3367 
3368 	if ((vp = vw->vw_current) == NULL)
3369 		return (WALK_DONE);
3370 
3371 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3372 
3373 	if (vp->vn_children != NULL) {
3374 		vw->vw_current = vp->vn_children;
3375 		return (rval);
3376 	}
3377 
3378 	do {
3379 		vw->vw_current = vp->vn_sibling;
3380 		vp = vp->vn_parent;
3381 	} while (vw->vw_current == NULL && vp != NULL);
3382 
3383 	return (rval);
3384 }
3385 
3386 /*
3387  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3388  * children are visited before their parent.  We perform the postfix walk
3389  * iteratively (rather than recursively) to allow mdb to regain control
3390  * after each callback.
3391  */
3392 int
3393 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3394 {
3395 	vmem_walk_t *vw = wsp->walk_data;
3396 	vmem_node_t *vp = vw->vw_current;
3397 	int rval;
3398 
3399 	/*
3400 	 * If this node is marked, then we know that we have already visited
3401 	 * all of its children.  If the node has any siblings, they need to
3402 	 * be visited next; otherwise, we need to visit the parent.  Note
3403 	 * that vp->vn_marked will only be zero on the first invocation of
3404 	 * the step function.
3405 	 */
3406 	if (vp->vn_marked) {
3407 		if (vp->vn_sibling != NULL)
3408 			vp = vp->vn_sibling;
3409 		else if (vp->vn_parent != NULL)
3410 			vp = vp->vn_parent;
3411 		else {
3412 			/*
3413 			 * We have neither a parent, nor a sibling, and we
3414 			 * have already been visited; we're done.
3415 			 */
3416 			return (WALK_DONE);
3417 		}
3418 	}
3419 
3420 	/*
3421 	 * Before we visit this node, visit its children.
3422 	 */
3423 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3424 		vp = vp->vn_children;
3425 
3426 	vp->vn_marked = 1;
3427 	vw->vw_current = vp;
3428 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3429 
3430 	return (rval);
3431 }
3432 
3433 void
3434 vmem_walk_fini(mdb_walk_state_t *wsp)
3435 {
3436 	vmem_walk_t *vw = wsp->walk_data;
3437 	vmem_node_t *root = vw->vw_root;
3438 	int done;
3439 
3440 	if (root == NULL)
3441 		return;
3442 
3443 	if ((vw->vw_root = root->vn_children) != NULL)
3444 		vmem_walk_fini(wsp);
3445 
3446 	vw->vw_root = root->vn_sibling;
3447 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3448 	mdb_free(root, sizeof (vmem_node_t));
3449 
3450 	if (done) {
3451 		mdb_free(vw, sizeof (vmem_walk_t));
3452 	} else {
3453 		vmem_walk_fini(wsp);
3454 	}
3455 }
3456 
3457 typedef struct vmem_seg_walk {
3458 	uint8_t vsw_type;
3459 	uintptr_t vsw_start;
3460 	uintptr_t vsw_current;
3461 } vmem_seg_walk_t;
3462 
3463 /*ARGSUSED*/
3464 int
3465 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3466 {
3467 	vmem_seg_walk_t *vsw;
3468 
3469 	if (wsp->walk_addr == NULL) {
3470 		mdb_warn("vmem_%s does not support global walks\n", name);
3471 		return (WALK_ERR);
3472 	}
3473 
3474 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3475 
3476 	vsw->vsw_type = type;
3477 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3478 	vsw->vsw_current = vsw->vsw_start;
3479 
3480 	return (WALK_NEXT);
3481 }
3482 
3483 /*
3484  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3485  */
3486 #define	VMEM_NONE	0
3487 
3488 int
3489 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3490 {
3491 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3492 }
3493 
3494 int
3495 vmem_free_walk_init(mdb_walk_state_t *wsp)
3496 {
3497 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3498 }
3499 
3500 int
3501 vmem_span_walk_init(mdb_walk_state_t *wsp)
3502 {
3503 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3504 }
3505 
3506 int
3507 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3508 {
3509 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3510 }
3511 
3512 int
3513 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3514 {
3515 	vmem_seg_t seg;
3516 	vmem_seg_walk_t *vsw = wsp->walk_data;
3517 	uintptr_t addr = vsw->vsw_current;
3518 	static size_t seg_size = 0;
3519 	int rval;
3520 
3521 	if (!seg_size) {
3522 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3523 			mdb_warn("failed to read 'vmem_seg_size'");
3524 			seg_size = sizeof (vmem_seg_t);
3525 		}
3526 	}
3527 
3528 	if (seg_size < sizeof (seg))
3529 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3530 
3531 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3532 		mdb_warn("couldn't read vmem_seg at %p", addr);
3533 		return (WALK_ERR);
3534 	}
3535 
3536 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3537 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3538 		rval = WALK_NEXT;
3539 	} else {
3540 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3541 	}
3542 
3543 	if (vsw->vsw_current == vsw->vsw_start)
3544 		return (WALK_DONE);
3545 
3546 	return (rval);
3547 }
3548 
3549 void
3550 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3551 {
3552 	vmem_seg_walk_t *vsw = wsp->walk_data;
3553 
3554 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3555 }
3556 
3557 #define	VMEM_NAMEWIDTH	22
3558 
3559 int
3560 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3561 {
3562 	vmem_t v, parent;
3563 	vmem_kstat_t *vkp = &v.vm_kstat;
3564 	uintptr_t paddr;
3565 	int ident = 0;
3566 	char c[VMEM_NAMEWIDTH];
3567 
3568 	if (!(flags & DCMD_ADDRSPEC)) {
3569 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3570 			mdb_warn("can't walk vmem");
3571 			return (DCMD_ERR);
3572 		}
3573 		return (DCMD_OK);
3574 	}
3575 
3576 	if (DCMD_HDRSPEC(flags))
3577 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3578 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3579 		    "TOTAL", "SUCCEED", "FAIL");
3580 
3581 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3582 		mdb_warn("couldn't read vmem at %p", addr);
3583 		return (DCMD_ERR);
3584 	}
3585 
3586 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3587 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3588 			mdb_warn("couldn't trace %p's ancestry", addr);
3589 			ident = 0;
3590 			break;
3591 		}
3592 		paddr = (uintptr_t)parent.vm_source;
3593 	}
3594 
3595 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3596 
3597 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3598 	    addr, VMEM_NAMEWIDTH, c,
3599 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3600 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3601 
3602 	return (DCMD_OK);
3603 }
3604 
3605 void
3606 vmem_seg_help(void)
3607 {
3608 	mdb_printf("%s",
3609 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3610 "\n"
3611 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3612 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3613 "information.\n");
3614 	mdb_dec_indent(2);
3615 	mdb_printf("%<b>OPTIONS%</b>\n");
3616 	mdb_inc_indent(2);
3617 	mdb_printf("%s",
3618 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3619 "  -s    report the size of the segment, instead of the end address\n"
3620 "  -c caller\n"
3621 "        filter out segments without the function/PC in their stack trace\n"
3622 "  -e earliest\n"
3623 "        filter out segments timestamped before earliest\n"
3624 "  -l latest\n"
3625 "        filter out segments timestamped after latest\n"
3626 "  -m minsize\n"
3627 "        filer out segments smaller than minsize\n"
3628 "  -M maxsize\n"
3629 "        filer out segments larger than maxsize\n"
3630 "  -t thread\n"
3631 "        filter out segments not involving thread\n"
3632 "  -T type\n"
3633 "        filter out segments not of type 'type'\n"
3634 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3635 }
3636 
3637 /*ARGSUSED*/
3638 int
3639 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3640 {
3641 	vmem_seg_t vs;
3642 	pc_t *stk = vs.vs_stack;
3643 	uintptr_t sz;
3644 	uint8_t t;
3645 	const char *type = NULL;
3646 	GElf_Sym sym;
3647 	char c[MDB_SYM_NAMLEN];
3648 	int no_debug;
3649 	int i;
3650 	int depth;
3651 	uintptr_t laddr, haddr;
3652 
3653 	uintptr_t caller = NULL, thread = NULL;
3654 	uintptr_t minsize = 0, maxsize = 0;
3655 
3656 	hrtime_t earliest = 0, latest = 0;
3657 
3658 	uint_t size = 0;
3659 	uint_t verbose = 0;
3660 
3661 	if (!(flags & DCMD_ADDRSPEC))
3662 		return (DCMD_USAGE);
3663 
3664 	if (mdb_getopts(argc, argv,
3665 	    'c', MDB_OPT_UINTPTR, &caller,
3666 	    'e', MDB_OPT_UINT64, &earliest,
3667 	    'l', MDB_OPT_UINT64, &latest,
3668 	    's', MDB_OPT_SETBITS, TRUE, &size,
3669 	    'm', MDB_OPT_UINTPTR, &minsize,
3670 	    'M', MDB_OPT_UINTPTR, &maxsize,
3671 	    't', MDB_OPT_UINTPTR, &thread,
3672 	    'T', MDB_OPT_STR, &type,
3673 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3674 	    NULL) != argc)
3675 		return (DCMD_USAGE);
3676 
3677 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3678 		if (verbose) {
3679 			mdb_printf("%16s %4s %16s %16s %16s\n"
3680 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3681 			    "ADDR", "TYPE", "START", "END", "SIZE",
3682 			    "", "", "THREAD", "TIMESTAMP", "");
3683 		} else {
3684 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3685 			    "START", size? "SIZE" : "END", "WHO");
3686 		}
3687 	}
3688 
3689 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3690 		mdb_warn("couldn't read vmem_seg at %p", addr);
3691 		return (DCMD_ERR);
3692 	}
3693 
3694 	if (type != NULL) {
3695 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3696 			t = VMEM_ALLOC;
3697 		else if (strcmp(type, "FREE") == 0)
3698 			t = VMEM_FREE;
3699 		else if (strcmp(type, "SPAN") == 0)
3700 			t = VMEM_SPAN;
3701 		else if (strcmp(type, "ROTR") == 0 ||
3702 		    strcmp(type, "ROTOR") == 0)
3703 			t = VMEM_ROTOR;
3704 		else if (strcmp(type, "WLKR") == 0 ||
3705 		    strcmp(type, "WALKER") == 0)
3706 			t = VMEM_WALKER;
3707 		else {
3708 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3709 			    type);
3710 			return (DCMD_ERR);
3711 		}
3712 
3713 		if (vs.vs_type != t)
3714 			return (DCMD_OK);
3715 	}
3716 
3717 	sz = vs.vs_end - vs.vs_start;
3718 
3719 	if (minsize != 0 && sz < minsize)
3720 		return (DCMD_OK);
3721 
3722 	if (maxsize != 0 && sz > maxsize)
3723 		return (DCMD_OK);
3724 
3725 	t = vs.vs_type;
3726 	depth = vs.vs_depth;
3727 
3728 	/*
3729 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3730 	 */
3731 	no_debug = (t != VMEM_ALLOC) ||
3732 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3733 
3734 	if (no_debug) {
3735 		if (caller != NULL || thread != NULL || earliest != 0 ||
3736 		    latest != 0)
3737 			return (DCMD_OK);		/* not enough info */
3738 	} else {
3739 		if (caller != NULL) {
3740 			laddr = caller;
3741 			haddr = caller + sizeof (caller);
3742 
3743 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3744 			    sizeof (c), &sym) != -1 &&
3745 			    caller == (uintptr_t)sym.st_value) {
3746 				/*
3747 				 * We were provided an exact symbol value; any
3748 				 * address in the function is valid.
3749 				 */
3750 				laddr = (uintptr_t)sym.st_value;
3751 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3752 			}
3753 
3754 			for (i = 0; i < depth; i++)
3755 				if (vs.vs_stack[i] >= laddr &&
3756 				    vs.vs_stack[i] < haddr)
3757 					break;
3758 
3759 			if (i == depth)
3760 				return (DCMD_OK);
3761 		}
3762 
3763 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3764 			return (DCMD_OK);
3765 
3766 		if (earliest != 0 && vs.vs_timestamp < earliest)
3767 			return (DCMD_OK);
3768 
3769 		if (latest != 0 && vs.vs_timestamp > latest)
3770 			return (DCMD_OK);
3771 	}
3772 
3773 	type = (t == VMEM_ALLOC ? "ALLC" :
3774 	    t == VMEM_FREE ? "FREE" :
3775 	    t == VMEM_SPAN ? "SPAN" :
3776 	    t == VMEM_ROTOR ? "ROTR" :
3777 	    t == VMEM_WALKER ? "WLKR" :
3778 	    "????");
3779 
3780 	if (flags & DCMD_PIPE_OUT) {
3781 		mdb_printf("%#lr\n", addr);
3782 		return (DCMD_OK);
3783 	}
3784 
3785 	if (verbose) {
3786 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3787 		    addr, type, vs.vs_start, vs.vs_end, sz);
3788 
3789 		if (no_debug)
3790 			return (DCMD_OK);
3791 
3792 		mdb_printf("%16s %4s %16p %16llx\n",
3793 		    "", "", vs.vs_thread, vs.vs_timestamp);
3794 
3795 		mdb_inc_indent(17);
3796 		for (i = 0; i < depth; i++) {
3797 			mdb_printf("%a\n", stk[i]);
3798 		}
3799 		mdb_dec_indent(17);
3800 		mdb_printf("\n");
3801 	} else {
3802 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3803 		    vs.vs_start, size? sz : vs.vs_end);
3804 
3805 		if (no_debug) {
3806 			mdb_printf("\n");
3807 			return (DCMD_OK);
3808 		}
3809 
3810 		for (i = 0; i < depth; i++) {
3811 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3812 			    c, sizeof (c), &sym) == -1)
3813 				continue;
3814 			if (strncmp(c, "vmem_", 5) == 0)
3815 				continue;
3816 			break;
3817 		}
3818 		mdb_printf(" %a\n", stk[i]);
3819 	}
3820 	return (DCMD_OK);
3821 }
3822 
3823 typedef struct kmalog_data {
3824 	uintptr_t	kma_addr;
3825 	hrtime_t	kma_newest;
3826 } kmalog_data_t;
3827 
3828 /*ARGSUSED*/
3829 static int
3830 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3831 {
3832 	char name[KMEM_CACHE_NAMELEN + 1];
3833 	hrtime_t delta;
3834 	int i, depth;
3835 	size_t bufsize;
3836 
3837 	if (bcp->bc_timestamp == 0)
3838 		return (WALK_DONE);
3839 
3840 	if (kma->kma_newest == 0)
3841 		kma->kma_newest = bcp->bc_timestamp;
3842 
3843 	if (kma->kma_addr) {
3844 		if (mdb_vread(&bufsize, sizeof (bufsize),
3845 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3846 			mdb_warn(
3847 			    "failed to read cache_bufsize for cache at %p",
3848 			    bcp->bc_cache);
3849 			return (WALK_ERR);
3850 		}
3851 
3852 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3853 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3854 			return (WALK_NEXT);
3855 	}
3856 
3857 	delta = kma->kma_newest - bcp->bc_timestamp;
3858 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3859 
3860 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3861 	    &bcp->bc_cache->cache_name) <= 0)
3862 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3863 
3864 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3865 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3866 
3867 	for (i = 0; i < depth; i++)
3868 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3869 
3870 	return (WALK_NEXT);
3871 }
3872 
3873 int
3874 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3875 {
3876 	const char *logname = "kmem_transaction_log";
3877 	kmalog_data_t kma;
3878 
3879 	if (argc > 1)
3880 		return (DCMD_USAGE);
3881 
3882 	kma.kma_newest = 0;
3883 	if (flags & DCMD_ADDRSPEC)
3884 		kma.kma_addr = addr;
3885 	else
3886 		kma.kma_addr = NULL;
3887 
3888 	if (argc > 0) {
3889 		if (argv->a_type != MDB_TYPE_STRING)
3890 			return (DCMD_USAGE);
3891 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3892 			logname = "kmem_failure_log";
3893 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3894 			logname = "kmem_slab_log";
3895 		else
3896 			return (DCMD_USAGE);
3897 	}
3898 
3899 	if (mdb_readvar(&addr, logname) == -1) {
3900 		mdb_warn("failed to read %s log header pointer");
3901 		return (DCMD_ERR);
3902 	}
3903 
3904 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3905 		mdb_warn("failed to walk kmem log");
3906 		return (DCMD_ERR);
3907 	}
3908 
3909 	return (DCMD_OK);
3910 }
3911 
3912 /*
3913  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3914  * The first piece is a structure which we use to accumulate kmem_cache_t
3915  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3916  * walker; we either add all caches, or ones named explicitly as arguments.
3917  */
3918 
3919 typedef struct kmclist {
3920 	const char *kmc_name;			/* Name to match (or NULL) */
3921 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3922 	int kmc_nelems;				/* Num entries in kmc_caches */
3923 	int kmc_size;				/* Size of kmc_caches array */
3924 } kmclist_t;
3925 
3926 static int
3927 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3928 {
3929 	void *p;
3930 	int s;
3931 
3932 	if (kmc->kmc_name == NULL ||
3933 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3934 		/*
3935 		 * If we have a match, grow our array (if necessary), and then
3936 		 * add the virtual address of the matching cache to our list.
3937 		 */
3938 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3939 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3940 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3941 
3942 			bcopy(kmc->kmc_caches, p,
3943 			    sizeof (uintptr_t) * kmc->kmc_size);
3944 
3945 			kmc->kmc_caches = p;
3946 			kmc->kmc_size = s;
3947 		}
3948 
3949 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3950 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3951 	}
3952 
3953 	return (WALK_NEXT);
3954 }
3955 
3956 /*
3957  * The second piece of ::kmausers is a hash table of allocations.  Each
3958  * allocation owner is identified by its stack trace and data_size.  We then
3959  * track the total bytes of all such allocations, and the number of allocations
3960  * to report at the end.  Once we have a list of caches, we walk through the
3961  * allocated bufctls of each, and update our hash table accordingly.
3962  */
3963 
3964 typedef struct kmowner {
3965 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3966 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3967 	size_t kmo_signature;			/* Hash table signature */
3968 	uint_t kmo_num;				/* Number of allocations */
3969 	size_t kmo_data_size;			/* Size of each allocation */
3970 	size_t kmo_total_size;			/* Total bytes of allocation */
3971 	int kmo_depth;				/* Depth of stack trace */
3972 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3973 } kmowner_t;
3974 
3975 typedef struct kmusers {
3976 	uintptr_t kmu_addr;			/* address of interest */
3977 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3978 	kmowner_t *kmu_hash;			/* Hash table of owners */
3979 	int kmu_nelems;				/* Number of entries in use */
3980 	int kmu_size;				/* Total number of entries */
3981 } kmusers_t;
3982 
3983 static void
3984 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3985     size_t size, size_t data_size)
3986 {
3987 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3988 	size_t bucket, signature = data_size;
3989 	kmowner_t *kmo, *kmoend;
3990 
3991 	/*
3992 	 * If the hash table is full, double its size and rehash everything.
3993 	 */
3994 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3995 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3996 
3997 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3998 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3999 		kmu->kmu_hash = kmo;
4000 		kmu->kmu_size = s;
4001 
4002 		kmoend = kmu->kmu_hash + kmu->kmu_size;
4003 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4004 			kmo->kmo_head = NULL;
4005 
4006 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4007 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4008 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4009 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4010 			kmu->kmu_hash[bucket].kmo_head = kmo;
4011 		}
4012 	}
4013 
4014 	/*
4015 	 * Finish computing the hash signature from the stack trace, and then
4016 	 * see if the owner is in the hash table.  If so, update our stats.
4017 	 */
4018 	for (i = 0; i < depth; i++)
4019 		signature += bcp->bc_stack[i];
4020 
4021 	bucket = signature & (kmu->kmu_size - 1);
4022 
4023 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4024 		if (kmo->kmo_signature == signature) {
4025 			size_t difference = 0;
4026 
4027 			difference |= kmo->kmo_data_size - data_size;
4028 			difference |= kmo->kmo_depth - depth;
4029 
4030 			for (i = 0; i < depth; i++) {
4031 				difference |= kmo->kmo_stack[i] -
4032 				    bcp->bc_stack[i];
4033 			}
4034 
4035 			if (difference == 0) {
4036 				kmo->kmo_total_size += size;
4037 				kmo->kmo_num++;
4038 				return;
4039 			}
4040 		}
4041 	}
4042 
4043 	/*
4044 	 * If the owner is not yet hashed, grab the next element and fill it
4045 	 * in based on the allocation information.
4046 	 */
4047 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4048 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4049 	kmu->kmu_hash[bucket].kmo_head = kmo;
4050 
4051 	kmo->kmo_signature = signature;
4052 	kmo->kmo_num = 1;
4053 	kmo->kmo_data_size = data_size;
4054 	kmo->kmo_total_size = size;
4055 	kmo->kmo_depth = depth;
4056 
4057 	for (i = 0; i < depth; i++)
4058 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4059 }
4060 
4061 /*
4062  * When ::kmausers is invoked without the -f flag, we simply update our hash
4063  * table with the information from each allocated bufctl.
4064  */
4065 /*ARGSUSED*/
4066 static int
4067 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4068 {
4069 	const kmem_cache_t *cp = kmu->kmu_cache;
4070 
4071 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4072 	return (WALK_NEXT);
4073 }
4074 
4075 /*
4076  * When ::kmausers is invoked with the -f flag, we print out the information
4077  * for each bufctl as well as updating the hash table.
4078  */
4079 static int
4080 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4081 {
4082 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4083 	const kmem_cache_t *cp = kmu->kmu_cache;
4084 	kmem_bufctl_t bufctl;
4085 
4086 	if (kmu->kmu_addr) {
4087 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4088 			mdb_warn("couldn't read bufctl at %p", addr);
4089 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4090 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4091 		    cp->cache_bufsize)
4092 			return (WALK_NEXT);
4093 	}
4094 
4095 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4096 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4097 
4098 	for (i = 0; i < depth; i++)
4099 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4100 
4101 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4102 	return (WALK_NEXT);
4103 }
4104 
4105 /*
4106  * We sort our results by allocation size before printing them.
4107  */
4108 static int
4109 kmownercmp(const void *lp, const void *rp)
4110 {
4111 	const kmowner_t *lhs = lp;
4112 	const kmowner_t *rhs = rp;
4113 
4114 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4115 }
4116 
4117 /*
4118  * The main engine of ::kmausers is relatively straightforward: First we
4119  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4120  * iterate over the allocated bufctls of each cache in the list.  Finally,
4121  * we sort and print our results.
4122  */
4123 /*ARGSUSED*/
4124 int
4125 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4126 {
4127 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4128 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4129 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4130 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4131 	int opt_e = FALSE;		/* Include "small" users */
4132 	int opt_f = FALSE;		/* Print stack traces */
4133 
4134 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4135 	kmowner_t *kmo, *kmoend;
4136 	int i, oelems;
4137 
4138 	kmclist_t kmc;
4139 	kmusers_t kmu;
4140 
4141 	bzero(&kmc, sizeof (kmc));
4142 	bzero(&kmu, sizeof (kmu));
4143 
4144 	while ((i = mdb_getopts(argc, argv,
4145 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4146 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4147 
4148 		argv += i;	/* skip past options we just processed */
4149 		argc -= i;	/* adjust argc */
4150 
4151 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4152 			return (DCMD_USAGE);
4153 
4154 		oelems = kmc.kmc_nelems;
4155 		kmc.kmc_name = argv->a_un.a_str;
4156 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4157 
4158 		if (kmc.kmc_nelems == oelems) {
4159 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4160 			return (DCMD_ERR);
4161 		}
4162 
4163 		do_all_caches = 0;
4164 		argv++;
4165 		argc--;
4166 	}
4167 
4168 	if (flags & DCMD_ADDRSPEC) {
4169 		opt_f = TRUE;
4170 		kmu.kmu_addr = addr;
4171 	} else {
4172 		kmu.kmu_addr = NULL;
4173 	}
4174 
4175 	if (opt_e)
4176 		mem_threshold = cnt_threshold = 0;
4177 
4178 	if (opt_f)
4179 		callback = (mdb_walk_cb_t)kmause2;
4180 
4181 	if (do_all_caches) {
4182 		kmc.kmc_name = NULL; /* match all cache names */
4183 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4184 	}
4185 
4186 	for (i = 0; i < kmc.kmc_nelems; i++) {
4187 		uintptr_t cp = kmc.kmc_caches[i];
4188 		kmem_cache_t c;
4189 
4190 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4191 			mdb_warn("failed to read cache at %p", cp);
4192 			continue;
4193 		}
4194 
4195 		if (!(c.cache_flags & KMF_AUDIT)) {
4196 			if (!do_all_caches) {
4197 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4198 				    c.cache_name);
4199 			}
4200 			continue;
4201 		}
4202 
4203 		kmu.kmu_cache = &c;
4204 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4205 		audited_caches++;
4206 	}
4207 
4208 	if (audited_caches == 0 && do_all_caches) {
4209 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4210 		return (DCMD_ERR);
4211 	}
4212 
4213 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4214 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4215 
4216 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4217 		if (kmo->kmo_total_size < mem_threshold &&
4218 		    kmo->kmo_num < cnt_threshold)
4219 			continue;
4220 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4221 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4222 		for (i = 0; i < kmo->kmo_depth; i++)
4223 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4224 	}
4225 
4226 	return (DCMD_OK);
4227 }
4228 
4229 void
4230 kmausers_help(void)
4231 {
4232 	mdb_printf(
4233 	    "Displays the largest users of the kmem allocator, sorted by \n"
4234 	    "trace.  If one or more caches is specified, only those caches\n"
4235 	    "will be searched.  By default, all caches are searched.  If an\n"
4236 	    "address is specified, then only those allocations which include\n"
4237 	    "the given address are displayed.  Specifying an address implies\n"
4238 	    "-f.\n"
4239 	    "\n"
4240 	    "\t-e\tInclude all users, not just the largest\n"
4241 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4242 	    "\t\tgrouped by stack\n");
4243 }
4244 
4245 static int
4246 kmem_ready_check(void)
4247 {
4248 	int ready;
4249 
4250 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4251 		return (-1); /* errno is set for us */
4252 
4253 	return (ready);
4254 }
4255 
4256 void
4257 kmem_statechange(void)
4258 {
4259 	static int been_ready = 0;
4260 
4261 	if (been_ready)
4262 		return;
4263 
4264 	if (kmem_ready_check() <= 0)
4265 		return;
4266 
4267 	been_ready = 1;
4268 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4269 }
4270 
4271 void
4272 kmem_init(void)
4273 {
4274 	mdb_walker_t w = {
4275 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4276 		list_walk_step, list_walk_fini
4277 	};
4278 
4279 	/*
4280 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4281 	 * immediately.  Walkers in the linkage structure won't be ready until
4282 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4283 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4284 	 * isn't ready, we'll register a callback that will allow us to defer
4285 	 * cache walking until it is.
4286 	 */
4287 	if (mdb_add_walker(&w) != 0) {
4288 		mdb_warn("failed to add kmem_cache walker");
4289 		return;
4290 	}
4291 
4292 	kmem_statechange();
4293 }
4294 
4295 typedef struct whatthread {
4296 	uintptr_t	wt_target;
4297 	int		wt_verbose;
4298 } whatthread_t;
4299 
4300 static int
4301 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4302 {
4303 	uintptr_t current, data;
4304 
4305 	if (t->t_stkbase == NULL)
4306 		return (WALK_NEXT);
4307 
4308 	/*
4309 	 * Warn about swapped out threads, but drive on anyway
4310 	 */
4311 	if (!(t->t_schedflag & TS_LOAD)) {
4312 		mdb_warn("thread %p's stack swapped out\n", addr);
4313 		return (WALK_NEXT);
4314 	}
4315 
4316 	/*
4317 	 * Search the thread's stack for the given pointer.  Note that it would
4318 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4319 	 * chunks, but this routine is already fast and simple.
4320 	 */
4321 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4322 	    current += sizeof (uintptr_t)) {
4323 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4324 			mdb_warn("couldn't read thread %p's stack at %p",
4325 			    addr, current);
4326 			return (WALK_ERR);
4327 		}
4328 
4329 		if (data == w->wt_target) {
4330 			if (w->wt_verbose) {
4331 				mdb_printf("%p in thread %p's stack%s\n",
4332 				    current, addr, stack_active(t, current));
4333 			} else {
4334 				mdb_printf("%#lr\n", addr);
4335 				return (WALK_NEXT);
4336 			}
4337 		}
4338 	}
4339 
4340 	return (WALK_NEXT);
4341 }
4342 
4343 int
4344 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4345 {
4346 	whatthread_t w;
4347 
4348 	if (!(flags & DCMD_ADDRSPEC))
4349 		return (DCMD_USAGE);
4350 
4351 	w.wt_verbose = FALSE;
4352 	w.wt_target = addr;
4353 
4354 	if (mdb_getopts(argc, argv,
4355 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4356 		return (DCMD_USAGE);
4357 
4358 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4359 	    == -1) {
4360 		mdb_warn("couldn't walk threads");
4361 		return (DCMD_ERR);
4362 	}
4363 
4364 	return (DCMD_OK);
4365 }
4366