xref: /titanic_50/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision 8600af92c96e63cdfbb9b8a9fc1d1aac9e5ae465)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <mdb/mdb_whatis.h>
30 #include <sys/cpuvar.h>
31 #include <sys/kmem_impl.h>
32 #include <sys/vmem_impl.h>
33 #include <sys/machelf.h>
34 #include <sys/modctl.h>
35 #include <sys/kobj.h>
36 #include <sys/panic.h>
37 #include <sys/stack.h>
38 #include <sys/sysmacros.h>
39 #include <vm/page.h>
40 
41 #include "avl.h"
42 #include "combined.h"
43 #include "dist.h"
44 #include "kmem.h"
45 #include "list.h"
46 
47 #define	dprintf(x) if (mdb_debug_level) { \
48 	mdb_printf("kmem debug: ");  \
49 	/*CSTYLED*/\
50 	mdb_printf x ;\
51 }
52 
53 #define	KM_ALLOCATED		0x01
54 #define	KM_FREE			0x02
55 #define	KM_BUFCTL		0x04
56 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
57 #define	KM_HASH			0x10
58 
59 static int mdb_debug_level = 0;
60 
61 /*ARGSUSED*/
62 static int
63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
64 {
65 	mdb_walker_t w;
66 	char descr[64];
67 
68 	(void) mdb_snprintf(descr, sizeof (descr),
69 	    "walk the %s cache", c->cache_name);
70 
71 	w.walk_name = c->cache_name;
72 	w.walk_descr = descr;
73 	w.walk_init = kmem_walk_init;
74 	w.walk_step = kmem_walk_step;
75 	w.walk_fini = kmem_walk_fini;
76 	w.walk_init_arg = (void *)addr;
77 
78 	if (mdb_add_walker(&w) == -1)
79 		mdb_warn("failed to add %s walker", c->cache_name);
80 
81 	return (WALK_NEXT);
82 }
83 
84 /*ARGSUSED*/
85 int
86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
87 {
88 	mdb_debug_level ^= 1;
89 
90 	mdb_printf("kmem: debugging is now %s\n",
91 	    mdb_debug_level ? "on" : "off");
92 
93 	return (DCMD_OK);
94 }
95 
96 int
97 kmem_cache_walk_init(mdb_walk_state_t *wsp)
98 {
99 	GElf_Sym sym;
100 
101 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
102 		mdb_warn("couldn't find kmem_caches");
103 		return (WALK_ERR);
104 	}
105 
106 	wsp->walk_addr = (uintptr_t)sym.st_value;
107 
108 	return (list_walk_init_named(wsp, "cache list", "cache"));
109 }
110 
111 int
112 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
113 {
114 	if (wsp->walk_addr == NULL) {
115 		mdb_warn("kmem_cpu_cache doesn't support global walks");
116 		return (WALK_ERR);
117 	}
118 
119 	if (mdb_layered_walk("cpu", wsp) == -1) {
120 		mdb_warn("couldn't walk 'cpu'");
121 		return (WALK_ERR);
122 	}
123 
124 	wsp->walk_data = (void *)wsp->walk_addr;
125 
126 	return (WALK_NEXT);
127 }
128 
129 int
130 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
131 {
132 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
133 	const cpu_t *cpu = wsp->walk_layer;
134 	kmem_cpu_cache_t cc;
135 
136 	caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
137 
138 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
139 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
140 		return (WALK_ERR);
141 	}
142 
143 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
144 }
145 
146 static int
147 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
148 {
149 	kmem_slab_t *sp = p;
150 	uintptr_t caddr = (uintptr_t)arg;
151 	if ((uintptr_t)sp->slab_cache != caddr) {
152 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
153 		    saddr, caddr, sp->slab_cache);
154 		return (-1);
155 	}
156 
157 	return (0);
158 }
159 
160 static int
161 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
162 {
163 	kmem_slab_t *sp = p;
164 
165 	int rc = kmem_slab_check(p, saddr, arg);
166 	if (rc != 0) {
167 		return (rc);
168 	}
169 
170 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
171 		mdb_warn("slab %p is not a partial slab\n", saddr);
172 		return (-1);
173 	}
174 
175 	return (0);
176 }
177 
178 static int
179 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
180 {
181 	kmem_slab_t *sp = p;
182 
183 	int rc = kmem_slab_check(p, saddr, arg);
184 	if (rc != 0) {
185 		return (rc);
186 	}
187 
188 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
189 		mdb_warn("slab %p is not completely allocated\n", saddr);
190 		return (-1);
191 	}
192 
193 	return (0);
194 }
195 
196 typedef struct {
197 	uintptr_t kns_cache_addr;
198 	int kns_nslabs;
199 } kmem_nth_slab_t;
200 
201 static int
202 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
203 {
204 	kmem_nth_slab_t *chkp = arg;
205 
206 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
207 	if (rc != 0) {
208 		return (rc);
209 	}
210 
211 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
212 }
213 
214 static int
215 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
216 {
217 	uintptr_t caddr = wsp->walk_addr;
218 
219 	wsp->walk_addr = (uintptr_t)(caddr +
220 	    offsetof(kmem_cache_t, cache_complete_slabs));
221 
222 	return (list_walk_init_checked(wsp, "slab list", "slab",
223 	    kmem_complete_slab_check, (void *)caddr));
224 }
225 
226 static int
227 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
228 {
229 	uintptr_t caddr = wsp->walk_addr;
230 
231 	wsp->walk_addr = (uintptr_t)(caddr +
232 	    offsetof(kmem_cache_t, cache_partial_slabs));
233 
234 	return (avl_walk_init_checked(wsp, "slab list", "slab",
235 	    kmem_partial_slab_check, (void *)caddr));
236 }
237 
238 int
239 kmem_slab_walk_init(mdb_walk_state_t *wsp)
240 {
241 	uintptr_t caddr = wsp->walk_addr;
242 
243 	if (caddr == NULL) {
244 		mdb_warn("kmem_slab doesn't support global walks\n");
245 		return (WALK_ERR);
246 	}
247 
248 	combined_walk_init(wsp);
249 	combined_walk_add(wsp,
250 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
251 	combined_walk_add(wsp,
252 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
253 
254 	return (WALK_NEXT);
255 }
256 
257 static int
258 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
259 {
260 	uintptr_t caddr = wsp->walk_addr;
261 	kmem_nth_slab_t *chk;
262 
263 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
264 	    UM_SLEEP | UM_GC);
265 	chk->kns_cache_addr = caddr;
266 	chk->kns_nslabs = 1;
267 	wsp->walk_addr = (uintptr_t)(caddr +
268 	    offsetof(kmem_cache_t, cache_complete_slabs));
269 
270 	return (list_walk_init_checked(wsp, "slab list", "slab",
271 	    kmem_nth_slab_check, chk));
272 }
273 
274 int
275 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
276 {
277 	uintptr_t caddr = wsp->walk_addr;
278 	kmem_cache_t c;
279 
280 	if (caddr == NULL) {
281 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
282 		return (WALK_ERR);
283 	}
284 
285 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
286 		mdb_warn("couldn't read kmem_cache at %p", caddr);
287 		return (WALK_ERR);
288 	}
289 
290 	combined_walk_init(wsp);
291 
292 	/*
293 	 * Some consumers (umem_walk_step(), in particular) require at
294 	 * least one callback if there are any buffers in the cache.  So
295 	 * if there are *no* partial slabs, report the first full slab, if
296 	 * any.
297 	 *
298 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
299 	 */
300 	if (c.cache_partial_slabs.avl_numnodes == 0) {
301 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
302 		    list_walk_step, list_walk_fini);
303 	} else {
304 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
305 		    avl_walk_step, avl_walk_fini);
306 	}
307 
308 	return (WALK_NEXT);
309 }
310 
311 int
312 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
313 {
314 	kmem_cache_t c;
315 	const char *filter = NULL;
316 
317 	if (mdb_getopts(ac, argv,
318 	    'n', MDB_OPT_STR, &filter,
319 	    NULL) != ac) {
320 		return (DCMD_USAGE);
321 	}
322 
323 	if (!(flags & DCMD_ADDRSPEC)) {
324 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
325 			mdb_warn("can't walk kmem_cache");
326 			return (DCMD_ERR);
327 		}
328 		return (DCMD_OK);
329 	}
330 
331 	if (DCMD_HDRSPEC(flags))
332 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
333 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
334 
335 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
336 		mdb_warn("couldn't read kmem_cache at %p", addr);
337 		return (DCMD_ERR);
338 	}
339 
340 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
341 		return (DCMD_OK);
342 
343 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
344 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
345 
346 	return (DCMD_OK);
347 }
348 
349 void
350 kmem_cache_help(void)
351 {
352 	mdb_printf("%s", "Print kernel memory caches.\n\n");
353 	mdb_dec_indent(2);
354 	mdb_printf("%<b>OPTIONS%</b>\n");
355 	mdb_inc_indent(2);
356 	mdb_printf("%s",
357 "  -n name\n"
358 "        name of kmem cache (or matching partial name)\n"
359 "\n"
360 "Column\tDescription\n"
361 "\n"
362 "ADDR\t\taddress of kmem cache\n"
363 "NAME\t\tname of kmem cache\n"
364 "FLAG\t\tvarious cache state flags\n"
365 "CFLAG\t\tcache creation flags\n"
366 "BUFSIZE\tobject size in bytes\n"
367 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
368 }
369 
370 #define	LABEL_WIDTH	11
371 static void
372 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
373     size_t maxbuckets, size_t minbucketsize)
374 {
375 	uint64_t total;
376 	int buckets;
377 	int i;
378 	const int *distarray;
379 	int complete[2];
380 
381 	buckets = buffers_per_slab;
382 
383 	total = 0;
384 	for (i = 0; i <= buffers_per_slab; i++)
385 		total += ks_bucket[i];
386 
387 	if (maxbuckets > 1)
388 		buckets = MIN(buckets, maxbuckets);
389 
390 	if (minbucketsize > 1) {
391 		/*
392 		 * minbucketsize does not apply to the first bucket reserved
393 		 * for completely allocated slabs
394 		 */
395 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
396 		    minbucketsize));
397 		if ((buckets < 2) && (buffers_per_slab > 1)) {
398 			buckets = 2;
399 			minbucketsize = (buffers_per_slab - 1);
400 		}
401 	}
402 
403 	/*
404 	 * The first printed bucket is reserved for completely allocated slabs.
405 	 * Passing (buckets - 1) excludes that bucket from the generated
406 	 * distribution, since we're handling it as a special case.
407 	 */
408 	complete[0] = buffers_per_slab;
409 	complete[1] = buffers_per_slab + 1;
410 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
411 
412 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
413 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
414 
415 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
416 	/*
417 	 * Print bucket ranges in descending order after the first bucket for
418 	 * completely allocated slabs, so a person can see immediately whether
419 	 * or not there is fragmentation without having to scan possibly
420 	 * multiple screens of output. Starting at (buckets - 2) excludes the
421 	 * extra terminating bucket.
422 	 */
423 	for (i = buckets - 2; i >= 0; i--) {
424 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
425 	}
426 	mdb_printf("\n");
427 }
428 #undef LABEL_WIDTH
429 
430 /*ARGSUSED*/
431 static int
432 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
433 {
434 	*is_slab = B_TRUE;
435 	return (WALK_DONE);
436 }
437 
438 /*ARGSUSED*/
439 static int
440 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
441     boolean_t *is_slab)
442 {
443 	/*
444 	 * The "kmem_partial_slab" walker reports the first full slab if there
445 	 * are no partial slabs (for the sake of consumers that require at least
446 	 * one callback if there are any buffers in the cache).
447 	 */
448 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
449 	return (WALK_DONE);
450 }
451 
452 typedef struct kmem_slab_usage {
453 	int ksu_refcnt;			/* count of allocated buffers on slab */
454 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
455 } kmem_slab_usage_t;
456 
457 typedef struct kmem_slab_stats {
458 	const kmem_cache_t *ks_cp;
459 	int ks_slabs;			/* slabs in cache */
460 	int ks_partial_slabs;		/* partially allocated slabs in cache */
461 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
462 	int ks_max_buffers_per_slab;	/* max buffers per slab */
463 	int ks_usage_len;		/* ks_usage array length */
464 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
465 	uint_t *ks_bucket;		/* slab usage distribution */
466 } kmem_slab_stats_t;
467 
468 /*ARGSUSED*/
469 static int
470 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
471     kmem_slab_stats_t *ks)
472 {
473 	kmem_slab_usage_t *ksu;
474 	long unused;
475 
476 	ks->ks_slabs++;
477 	ks->ks_bucket[sp->slab_refcnt]++;
478 
479 	unused = (sp->slab_chunks - sp->slab_refcnt);
480 	if (unused == 0) {
481 		return (WALK_NEXT);
482 	}
483 
484 	ks->ks_partial_slabs++;
485 	ks->ks_unused_buffers += unused;
486 
487 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
488 		kmem_slab_usage_t *usage;
489 		int len = ks->ks_usage_len;
490 
491 		len = (len == 0 ? 16 : len * 2);
492 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
493 		if (ks->ks_usage != NULL) {
494 			bcopy(ks->ks_usage, usage,
495 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
496 			mdb_free(ks->ks_usage,
497 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
498 		}
499 		ks->ks_usage = usage;
500 		ks->ks_usage_len = len;
501 	}
502 
503 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
504 	ksu->ksu_refcnt = sp->slab_refcnt;
505 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
506 	return (WALK_NEXT);
507 }
508 
509 static void
510 kmem_slabs_header()
511 {
512 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
513 	    "", "", "Partial", "", "Unused", "");
514 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
515 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
516 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 	    "-------------------------", "--------", "--------", "---------",
518 	    "---------", "------");
519 }
520 
521 int
522 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
523 {
524 	kmem_cache_t c;
525 	kmem_slab_stats_t stats;
526 	mdb_walk_cb_t cb;
527 	int pct;
528 	int tenths_pct;
529 	size_t maxbuckets = 1;
530 	size_t minbucketsize = 0;
531 	const char *filter = NULL;
532 	const char *name = NULL;
533 	uint_t opt_v = FALSE;
534 	boolean_t buckets = B_FALSE;
535 	boolean_t skip = B_FALSE;
536 
537 	if (mdb_getopts(argc, argv,
538 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
539 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
540 	    'n', MDB_OPT_STR, &filter,
541 	    'N', MDB_OPT_STR, &name,
542 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
543 	    NULL) != argc) {
544 		return (DCMD_USAGE);
545 	}
546 
547 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
548 		buckets = B_TRUE;
549 	}
550 
551 	if (!(flags & DCMD_ADDRSPEC)) {
552 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
553 		    argv) == -1) {
554 			mdb_warn("can't walk kmem_cache");
555 			return (DCMD_ERR);
556 		}
557 		return (DCMD_OK);
558 	}
559 
560 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
561 		mdb_warn("couldn't read kmem_cache at %p", addr);
562 		return (DCMD_ERR);
563 	}
564 
565 	if (name == NULL) {
566 		skip = ((filter != NULL) &&
567 		    (strstr(c.cache_name, filter) == NULL));
568 	} else if (filter == NULL) {
569 		skip = (strcmp(c.cache_name, name) != 0);
570 	} else {
571 		/* match either -n or -N */
572 		skip = ((strcmp(c.cache_name, name) != 0) &&
573 		    (strstr(c.cache_name, filter) == NULL));
574 	}
575 
576 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
577 		kmem_slabs_header();
578 	} else if ((opt_v || buckets) && !skip) {
579 		if (DCMD_HDRSPEC(flags)) {
580 			kmem_slabs_header();
581 		} else {
582 			boolean_t is_slab = B_FALSE;
583 			const char *walker_name;
584 			if (opt_v) {
585 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
586 				walker_name = "kmem_slab_partial";
587 			} else {
588 				cb = (mdb_walk_cb_t)kmem_first_slab;
589 				walker_name = "kmem_slab";
590 			}
591 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
592 			if (is_slab) {
593 				kmem_slabs_header();
594 			}
595 		}
596 	}
597 
598 	if (skip) {
599 		return (DCMD_OK);
600 	}
601 
602 	bzero(&stats, sizeof (kmem_slab_stats_t));
603 	stats.ks_cp = &c;
604 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
605 	/* +1 to include a zero bucket */
606 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
607 	    sizeof (*stats.ks_bucket), UM_SLEEP);
608 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
609 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
610 
611 	if (c.cache_buftotal == 0) {
612 		pct = 0;
613 		tenths_pct = 0;
614 	} else {
615 		uint64_t n = stats.ks_unused_buffers * 10000;
616 		pct = (int)(n / c.cache_buftotal);
617 		tenths_pct = pct - ((pct / 100) * 100);
618 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
619 		if (tenths_pct == 10) {
620 			pct += 100;
621 			tenths_pct = 0;
622 		}
623 	}
624 
625 	pct /= 100;
626 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
627 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
628 	    stats.ks_unused_buffers, pct, tenths_pct);
629 
630 	if (maxbuckets == 0) {
631 		maxbuckets = stats.ks_max_buffers_per_slab;
632 	}
633 
634 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
635 	    (stats.ks_slabs > 0)) {
636 		mdb_printf("\n");
637 		kmem_slabs_print_dist(stats.ks_bucket,
638 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
639 	}
640 
641 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
642 	    sizeof (*stats.ks_bucket));
643 
644 	if (!opt_v) {
645 		return (DCMD_OK);
646 	}
647 
648 	if (opt_v && (stats.ks_partial_slabs > 0)) {
649 		int i;
650 		kmem_slab_usage_t *ksu;
651 
652 		mdb_printf("  %d complete (%d), %d partial:",
653 		    (stats.ks_slabs - stats.ks_partial_slabs),
654 		    stats.ks_max_buffers_per_slab,
655 		    stats.ks_partial_slabs);
656 
657 		for (i = 0; i < stats.ks_partial_slabs; i++) {
658 			ksu = &stats.ks_usage[i];
659 			mdb_printf(" %d%s", ksu->ksu_refcnt,
660 			    (ksu->ksu_nomove ? "*" : ""));
661 		}
662 		mdb_printf("\n\n");
663 	}
664 
665 	if (stats.ks_usage_len > 0) {
666 		mdb_free(stats.ks_usage,
667 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
668 	}
669 
670 	return (DCMD_OK);
671 }
672 
673 void
674 kmem_slabs_help(void)
675 {
676 	mdb_printf("%s",
677 "Display slab usage per kmem cache.\n\n");
678 	mdb_dec_indent(2);
679 	mdb_printf("%<b>OPTIONS%</b>\n");
680 	mdb_inc_indent(2);
681 	mdb_printf("%s",
682 "  -n name\n"
683 "        name of kmem cache (or matching partial name)\n"
684 "  -N name\n"
685 "        exact name of kmem cache\n"
686 "  -b maxbins\n"
687 "        Print a distribution of allocated buffers per slab using at\n"
688 "        most maxbins bins. The first bin is reserved for completely\n"
689 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
690 "        effect as specifying the maximum allocated buffers per slab\n"
691 "        or setting minbinsize to 1 (-B 1).\n"
692 "  -B minbinsize\n"
693 "        Print a distribution of allocated buffers per slab, making\n"
694 "        all bins (except the first, reserved for completely allocated\n"
695 "        slabs) at least minbinsize buffers apart.\n"
696 "  -v    verbose output: List the allocated buffer count of each partial\n"
697 "        slab on the free list in order from front to back to show how\n"
698 "        closely the slabs are ordered by usage. For example\n"
699 "\n"
700 "          10 complete, 3 partial (8): 7 3 1\n"
701 "\n"
702 "        means there are thirteen slabs with eight buffers each, including\n"
703 "        three partially allocated slabs with less than all eight buffers\n"
704 "        allocated.\n"
705 "\n"
706 "        Buffer allocations are always from the front of the partial slab\n"
707 "        list. When a buffer is freed from a completely used slab, that\n"
708 "        slab is added to the front of the partial slab list. Assuming\n"
709 "        that all buffers are equally likely to be freed soon, the\n"
710 "        desired order of partial slabs is most-used at the front of the\n"
711 "        list and least-used at the back (as in the example above).\n"
712 "        However, if a slab contains an allocated buffer that will not\n"
713 "        soon be freed, it would be better for that slab to be at the\n"
714 "        front where all of its buffers can be allocated. Taking a slab\n"
715 "        off the partial slab list (either with all buffers freed or all\n"
716 "        buffers allocated) reduces cache fragmentation.\n"
717 "\n"
718 "        A slab's allocated buffer count representing a partial slab (9 in\n"
719 "        the example below) may be marked as follows:\n"
720 "\n"
721 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
722 "        reclaimable because the kmem client refused to move one of the\n"
723 "        slab's buffers. Since kmem does not expect to completely free the\n"
724 "        slab, it moves it to the front of the list in the hope of\n"
725 "        completely allocating it instead. A slab marked with an asterisk\n"
726 "        stays marked for as long as it remains on the partial slab list.\n"
727 "\n"
728 "Column\t\tDescription\n"
729 "\n"
730 "Cache Name\t\tname of kmem cache\n"
731 "Slabs\t\t\ttotal slab count\n"
732 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
733 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
734 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
735 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
736 "\t\t\t  for accounting structures (debug mode), slab\n"
737 "\t\t\t  coloring (incremental small offsets to stagger\n"
738 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
739 }
740 
741 static int
742 addrcmp(const void *lhs, const void *rhs)
743 {
744 	uintptr_t p1 = *((uintptr_t *)lhs);
745 	uintptr_t p2 = *((uintptr_t *)rhs);
746 
747 	if (p1 < p2)
748 		return (-1);
749 	if (p1 > p2)
750 		return (1);
751 	return (0);
752 }
753 
754 static int
755 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
756 {
757 	const kmem_bufctl_audit_t *bcp1 = *lhs;
758 	const kmem_bufctl_audit_t *bcp2 = *rhs;
759 
760 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
761 		return (-1);
762 
763 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
764 		return (1);
765 
766 	return (0);
767 }
768 
769 typedef struct kmem_hash_walk {
770 	uintptr_t *kmhw_table;
771 	size_t kmhw_nelems;
772 	size_t kmhw_pos;
773 	kmem_bufctl_t kmhw_cur;
774 } kmem_hash_walk_t;
775 
776 int
777 kmem_hash_walk_init(mdb_walk_state_t *wsp)
778 {
779 	kmem_hash_walk_t *kmhw;
780 	uintptr_t *hash;
781 	kmem_cache_t c;
782 	uintptr_t haddr, addr = wsp->walk_addr;
783 	size_t nelems;
784 	size_t hsize;
785 
786 	if (addr == NULL) {
787 		mdb_warn("kmem_hash doesn't support global walks\n");
788 		return (WALK_ERR);
789 	}
790 
791 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
792 		mdb_warn("couldn't read cache at addr %p", addr);
793 		return (WALK_ERR);
794 	}
795 
796 	if (!(c.cache_flags & KMF_HASH)) {
797 		mdb_warn("cache %p doesn't have a hash table\n", addr);
798 		return (WALK_DONE);		/* nothing to do */
799 	}
800 
801 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
802 	kmhw->kmhw_cur.bc_next = NULL;
803 	kmhw->kmhw_pos = 0;
804 
805 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
806 	hsize = nelems * sizeof (uintptr_t);
807 	haddr = (uintptr_t)c.cache_hash_table;
808 
809 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
810 	if (mdb_vread(hash, hsize, haddr) == -1) {
811 		mdb_warn("failed to read hash table at %p", haddr);
812 		mdb_free(hash, hsize);
813 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
814 		return (WALK_ERR);
815 	}
816 
817 	wsp->walk_data = kmhw;
818 
819 	return (WALK_NEXT);
820 }
821 
822 int
823 kmem_hash_walk_step(mdb_walk_state_t *wsp)
824 {
825 	kmem_hash_walk_t *kmhw = wsp->walk_data;
826 	uintptr_t addr = NULL;
827 
828 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
829 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
830 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
831 				break;
832 		}
833 	}
834 	if (addr == NULL)
835 		return (WALK_DONE);
836 
837 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
838 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
839 		return (WALK_ERR);
840 	}
841 
842 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
843 }
844 
845 void
846 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
847 {
848 	kmem_hash_walk_t *kmhw = wsp->walk_data;
849 
850 	if (kmhw == NULL)
851 		return;
852 
853 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
854 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
855 }
856 
857 /*
858  * Find the address of the bufctl structure for the address 'buf' in cache
859  * 'cp', which is at address caddr, and place it in *out.
860  */
861 static int
862 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
863 {
864 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
865 	kmem_bufctl_t *bcp;
866 	kmem_bufctl_t bc;
867 
868 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
869 		mdb_warn("unable to read hash bucket for %p in cache %p",
870 		    buf, caddr);
871 		return (-1);
872 	}
873 
874 	while (bcp != NULL) {
875 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
876 		    (uintptr_t)bcp) == -1) {
877 			mdb_warn("unable to read bufctl at %p", bcp);
878 			return (-1);
879 		}
880 		if (bc.bc_addr == buf) {
881 			*out = (uintptr_t)bcp;
882 			return (0);
883 		}
884 		bcp = bc.bc_next;
885 	}
886 
887 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
888 	return (-1);
889 }
890 
891 int
892 kmem_get_magsize(const kmem_cache_t *cp)
893 {
894 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
895 	GElf_Sym mt_sym;
896 	kmem_magtype_t mt;
897 	int res;
898 
899 	/*
900 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
901 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
902 	 * it is okay to return 0 for them.
903 	 */
904 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
905 	    (cp->cache_flags & KMF_NOMAGAZINE))
906 		return (res);
907 
908 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
909 		mdb_warn("unable to read 'kmem_magtype'");
910 	} else if (addr < mt_sym.st_value ||
911 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
912 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
913 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
914 		    cp->cache_name, addr);
915 		return (0);
916 	}
917 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
918 		mdb_warn("unable to read magtype at %a", addr);
919 		return (0);
920 	}
921 	return (mt.mt_magsize);
922 }
923 
924 /*ARGSUSED*/
925 static int
926 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
927 {
928 	*est -= (sp->slab_chunks - sp->slab_refcnt);
929 
930 	return (WALK_NEXT);
931 }
932 
933 /*
934  * Returns an upper bound on the number of allocated buffers in a given
935  * cache.
936  */
937 size_t
938 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
939 {
940 	int magsize;
941 	size_t cache_est;
942 
943 	cache_est = cp->cache_buftotal;
944 
945 	(void) mdb_pwalk("kmem_slab_partial",
946 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
947 
948 	if ((magsize = kmem_get_magsize(cp)) != 0) {
949 		size_t mag_est = cp->cache_full.ml_total * magsize;
950 
951 		if (cache_est >= mag_est) {
952 			cache_est -= mag_est;
953 		} else {
954 			mdb_warn("cache %p's magazine layer holds more buffers "
955 			    "than the slab layer.\n", addr);
956 		}
957 	}
958 	return (cache_est);
959 }
960 
961 #define	READMAG_ROUNDS(rounds) { \
962 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
963 		mdb_warn("couldn't read magazine at %p", kmp); \
964 		goto fail; \
965 	} \
966 	for (i = 0; i < rounds; i++) { \
967 		maglist[magcnt++] = mp->mag_round[i]; \
968 		if (magcnt == magmax) { \
969 			mdb_warn("%d magazines exceeds fudge factor\n", \
970 			    magcnt); \
971 			goto fail; \
972 		} \
973 	} \
974 }
975 
976 int
977 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
978     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
979 {
980 	kmem_magazine_t *kmp, *mp;
981 	void **maglist = NULL;
982 	int i, cpu;
983 	size_t magsize, magmax, magbsize;
984 	size_t magcnt = 0;
985 
986 	/*
987 	 * Read the magtype out of the cache, after verifying the pointer's
988 	 * correctness.
989 	 */
990 	magsize = kmem_get_magsize(cp);
991 	if (magsize == 0) {
992 		*maglistp = NULL;
993 		*magcntp = 0;
994 		*magmaxp = 0;
995 		return (WALK_NEXT);
996 	}
997 
998 	/*
999 	 * There are several places where we need to go buffer hunting:
1000 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1001 	 * and the full magazine list in the depot.
1002 	 *
1003 	 * For an upper bound on the number of buffers in the magazine
1004 	 * layer, we have the number of magazines on the cache_full
1005 	 * list plus at most two magazines per CPU (the loaded and the
1006 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1007 	 * is live (the number "100" comes from the same fudge factor in
1008 	 * crash(1M)).
1009 	 */
1010 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1011 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1012 
1013 	if (magbsize >= PAGESIZE / 2) {
1014 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1015 		    addr, magbsize);
1016 		return (WALK_ERR);
1017 	}
1018 
1019 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1020 	mp = mdb_alloc(magbsize, alloc_flags);
1021 	if (mp == NULL || maglist == NULL)
1022 		goto fail;
1023 
1024 	/*
1025 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1026 	 */
1027 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1028 		READMAG_ROUNDS(magsize);
1029 		kmp = mp->mag_next;
1030 
1031 		if (kmp == cp->cache_full.ml_list)
1032 			break; /* cache_full list loop detected */
1033 	}
1034 
1035 	dprintf(("cache_full list done\n"));
1036 
1037 	/*
1038 	 * Now whip through the CPUs, snagging the loaded magazines
1039 	 * and full spares.
1040 	 *
1041 	 * In order to prevent inconsistent dumps, rounds and prounds
1042 	 * are copied aside before dumping begins.
1043 	 */
1044 	for (cpu = 0; cpu < ncpus; cpu++) {
1045 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1046 		short rounds, prounds;
1047 
1048 		if (KMEM_DUMPCC(ccp)) {
1049 			rounds = ccp->cc_dump_rounds;
1050 			prounds = ccp->cc_dump_prounds;
1051 		} else {
1052 			rounds = ccp->cc_rounds;
1053 			prounds = ccp->cc_prounds;
1054 		}
1055 
1056 		dprintf(("reading cpu cache %p\n",
1057 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1058 
1059 		if (rounds > 0 &&
1060 		    (kmp = ccp->cc_loaded) != NULL) {
1061 			dprintf(("reading %d loaded rounds\n", rounds));
1062 			READMAG_ROUNDS(rounds);
1063 		}
1064 
1065 		if (prounds > 0 &&
1066 		    (kmp = ccp->cc_ploaded) != NULL) {
1067 			dprintf(("reading %d previously loaded rounds\n",
1068 			    prounds));
1069 			READMAG_ROUNDS(prounds);
1070 		}
1071 	}
1072 
1073 	dprintf(("magazine layer: %d buffers\n", magcnt));
1074 
1075 	if (!(alloc_flags & UM_GC))
1076 		mdb_free(mp, magbsize);
1077 
1078 	*maglistp = maglist;
1079 	*magcntp = magcnt;
1080 	*magmaxp = magmax;
1081 
1082 	return (WALK_NEXT);
1083 
1084 fail:
1085 	if (!(alloc_flags & UM_GC)) {
1086 		if (mp)
1087 			mdb_free(mp, magbsize);
1088 		if (maglist)
1089 			mdb_free(maglist, magmax * sizeof (void *));
1090 	}
1091 	return (WALK_ERR);
1092 }
1093 
1094 static int
1095 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1096 {
1097 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1098 }
1099 
1100 static int
1101 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1102 {
1103 	kmem_bufctl_audit_t b;
1104 
1105 	/*
1106 	 * if KMF_AUDIT is not set, we know that we're looking at a
1107 	 * kmem_bufctl_t.
1108 	 */
1109 	if (!(cp->cache_flags & KMF_AUDIT) ||
1110 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1111 		(void) memset(&b, 0, sizeof (b));
1112 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1113 			mdb_warn("unable to read bufctl at %p", buf);
1114 			return (WALK_ERR);
1115 		}
1116 	}
1117 
1118 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1119 }
1120 
1121 typedef struct kmem_walk {
1122 	int kmw_type;
1123 
1124 	int kmw_addr;			/* cache address */
1125 	kmem_cache_t *kmw_cp;
1126 	size_t kmw_csize;
1127 
1128 	/*
1129 	 * magazine layer
1130 	 */
1131 	void **kmw_maglist;
1132 	size_t kmw_max;
1133 	size_t kmw_count;
1134 	size_t kmw_pos;
1135 
1136 	/*
1137 	 * slab layer
1138 	 */
1139 	char *kmw_valid;	/* to keep track of freed buffers */
1140 	char *kmw_ubase;	/* buffer for slab data */
1141 } kmem_walk_t;
1142 
1143 static int
1144 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1145 {
1146 	kmem_walk_t *kmw;
1147 	int ncpus, csize;
1148 	kmem_cache_t *cp;
1149 	size_t vm_quantum;
1150 
1151 	size_t magmax, magcnt;
1152 	void **maglist = NULL;
1153 	uint_t chunksize, slabsize;
1154 	int status = WALK_ERR;
1155 	uintptr_t addr = wsp->walk_addr;
1156 	const char *layered;
1157 
1158 	type &= ~KM_HASH;
1159 
1160 	if (addr == NULL) {
1161 		mdb_warn("kmem walk doesn't support global walks\n");
1162 		return (WALK_ERR);
1163 	}
1164 
1165 	dprintf(("walking %p\n", addr));
1166 
1167 	/*
1168 	 * First we need to figure out how many CPUs are configured in the
1169 	 * system to know how much to slurp out.
1170 	 */
1171 	mdb_readvar(&ncpus, "max_ncpus");
1172 
1173 	csize = KMEM_CACHE_SIZE(ncpus);
1174 	cp = mdb_alloc(csize, UM_SLEEP);
1175 
1176 	if (mdb_vread(cp, csize, addr) == -1) {
1177 		mdb_warn("couldn't read cache at addr %p", addr);
1178 		goto out2;
1179 	}
1180 
1181 	/*
1182 	 * It's easy for someone to hand us an invalid cache address.
1183 	 * Unfortunately, it is hard for this walker to survive an
1184 	 * invalid cache cleanly.  So we make sure that:
1185 	 *
1186 	 *	1. the vmem arena for the cache is readable,
1187 	 *	2. the vmem arena's quantum is a power of 2,
1188 	 *	3. our slabsize is a multiple of the quantum, and
1189 	 *	4. our chunksize is >0 and less than our slabsize.
1190 	 */
1191 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1192 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1193 	    vm_quantum == 0 ||
1194 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1195 	    cp->cache_slabsize < vm_quantum ||
1196 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1197 	    cp->cache_chunksize == 0 ||
1198 	    cp->cache_chunksize > cp->cache_slabsize) {
1199 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1200 		goto out2;
1201 	}
1202 
1203 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1204 
1205 	if (cp->cache_buftotal == 0) {
1206 		mdb_free(cp, csize);
1207 		return (WALK_DONE);
1208 	}
1209 
1210 	/*
1211 	 * If they ask for bufctls, but it's a small-slab cache,
1212 	 * there is nothing to report.
1213 	 */
1214 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1215 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1216 		    cp->cache_flags));
1217 		mdb_free(cp, csize);
1218 		return (WALK_DONE);
1219 	}
1220 
1221 	/*
1222 	 * If they want constructed buffers, but there's no constructor or
1223 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1224 	 */
1225 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1226 	    cp->cache_constructor == NULL ||
1227 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1228 		mdb_free(cp, csize);
1229 		return (WALK_DONE);
1230 	}
1231 
1232 	/*
1233 	 * Read in the contents of the magazine layer
1234 	 */
1235 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1236 	    &magmax, UM_SLEEP) == WALK_ERR)
1237 		goto out2;
1238 
1239 	/*
1240 	 * We have all of the buffers from the magazines;  if we are walking
1241 	 * allocated buffers, sort them so we can bsearch them later.
1242 	 */
1243 	if (type & KM_ALLOCATED)
1244 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1245 
1246 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1247 
1248 	kmw->kmw_type = type;
1249 	kmw->kmw_addr = addr;
1250 	kmw->kmw_cp = cp;
1251 	kmw->kmw_csize = csize;
1252 	kmw->kmw_maglist = maglist;
1253 	kmw->kmw_max = magmax;
1254 	kmw->kmw_count = magcnt;
1255 	kmw->kmw_pos = 0;
1256 
1257 	/*
1258 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1259 	 * hash table instead of the slab layer.
1260 	 */
1261 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1262 		layered = "kmem_hash";
1263 
1264 		kmw->kmw_type |= KM_HASH;
1265 	} else {
1266 		/*
1267 		 * If we are walking freed buffers, we only need the
1268 		 * magazine layer plus the partially allocated slabs.
1269 		 * To walk allocated buffers, we need all of the slabs.
1270 		 */
1271 		if (type & KM_ALLOCATED)
1272 			layered = "kmem_slab";
1273 		else
1274 			layered = "kmem_slab_partial";
1275 
1276 		/*
1277 		 * for small-slab caches, we read in the entire slab.  For
1278 		 * freed buffers, we can just walk the freelist.  For
1279 		 * allocated buffers, we use a 'valid' array to track
1280 		 * the freed buffers.
1281 		 */
1282 		if (!(cp->cache_flags & KMF_HASH)) {
1283 			chunksize = cp->cache_chunksize;
1284 			slabsize = cp->cache_slabsize;
1285 
1286 			kmw->kmw_ubase = mdb_alloc(slabsize +
1287 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1288 
1289 			if (type & KM_ALLOCATED)
1290 				kmw->kmw_valid =
1291 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1292 		}
1293 	}
1294 
1295 	status = WALK_NEXT;
1296 
1297 	if (mdb_layered_walk(layered, wsp) == -1) {
1298 		mdb_warn("unable to start layered '%s' walk", layered);
1299 		status = WALK_ERR;
1300 	}
1301 
1302 out1:
1303 	if (status == WALK_ERR) {
1304 		if (kmw->kmw_valid)
1305 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1306 
1307 		if (kmw->kmw_ubase)
1308 			mdb_free(kmw->kmw_ubase, slabsize +
1309 			    sizeof (kmem_bufctl_t));
1310 
1311 		if (kmw->kmw_maglist)
1312 			mdb_free(kmw->kmw_maglist,
1313 			    kmw->kmw_max * sizeof (uintptr_t));
1314 
1315 		mdb_free(kmw, sizeof (kmem_walk_t));
1316 		wsp->walk_data = NULL;
1317 	}
1318 
1319 out2:
1320 	if (status == WALK_ERR)
1321 		mdb_free(cp, csize);
1322 
1323 	return (status);
1324 }
1325 
1326 int
1327 kmem_walk_step(mdb_walk_state_t *wsp)
1328 {
1329 	kmem_walk_t *kmw = wsp->walk_data;
1330 	int type = kmw->kmw_type;
1331 	kmem_cache_t *cp = kmw->kmw_cp;
1332 
1333 	void **maglist = kmw->kmw_maglist;
1334 	int magcnt = kmw->kmw_count;
1335 
1336 	uintptr_t chunksize, slabsize;
1337 	uintptr_t addr;
1338 	const kmem_slab_t *sp;
1339 	const kmem_bufctl_t *bcp;
1340 	kmem_bufctl_t bc;
1341 
1342 	int chunks;
1343 	char *kbase;
1344 	void *buf;
1345 	int i, ret;
1346 
1347 	char *valid, *ubase;
1348 
1349 	/*
1350 	 * first, handle the 'kmem_hash' layered walk case
1351 	 */
1352 	if (type & KM_HASH) {
1353 		/*
1354 		 * We have a buffer which has been allocated out of the
1355 		 * global layer. We need to make sure that it's not
1356 		 * actually sitting in a magazine before we report it as
1357 		 * an allocated buffer.
1358 		 */
1359 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1360 
1361 		if (magcnt > 0 &&
1362 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1363 		    addrcmp) != NULL)
1364 			return (WALK_NEXT);
1365 
1366 		if (type & KM_BUFCTL)
1367 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1368 
1369 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1370 	}
1371 
1372 	ret = WALK_NEXT;
1373 
1374 	addr = kmw->kmw_addr;
1375 
1376 	/*
1377 	 * If we're walking freed buffers, report everything in the
1378 	 * magazine layer before processing the first slab.
1379 	 */
1380 	if ((type & KM_FREE) && magcnt != 0) {
1381 		kmw->kmw_count = 0;		/* only do this once */
1382 		for (i = 0; i < magcnt; i++) {
1383 			buf = maglist[i];
1384 
1385 			if (type & KM_BUFCTL) {
1386 				uintptr_t out;
1387 
1388 				if (cp->cache_flags & KMF_BUFTAG) {
1389 					kmem_buftag_t *btp;
1390 					kmem_buftag_t tag;
1391 
1392 					/* LINTED - alignment */
1393 					btp = KMEM_BUFTAG(cp, buf);
1394 					if (mdb_vread(&tag, sizeof (tag),
1395 					    (uintptr_t)btp) == -1) {
1396 						mdb_warn("reading buftag for "
1397 						    "%p at %p", buf, btp);
1398 						continue;
1399 					}
1400 					out = (uintptr_t)tag.bt_bufctl;
1401 				} else {
1402 					if (kmem_hash_lookup(cp, addr, buf,
1403 					    &out) == -1)
1404 						continue;
1405 				}
1406 				ret = bufctl_walk_callback(cp, wsp, out);
1407 			} else {
1408 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1409 			}
1410 
1411 			if (ret != WALK_NEXT)
1412 				return (ret);
1413 		}
1414 	}
1415 
1416 	/*
1417 	 * If they want constructed buffers, we're finished, since the
1418 	 * magazine layer holds them all.
1419 	 */
1420 	if (type & KM_CONSTRUCTED)
1421 		return (WALK_DONE);
1422 
1423 	/*
1424 	 * Handle the buffers in the current slab
1425 	 */
1426 	chunksize = cp->cache_chunksize;
1427 	slabsize = cp->cache_slabsize;
1428 
1429 	sp = wsp->walk_layer;
1430 	chunks = sp->slab_chunks;
1431 	kbase = sp->slab_base;
1432 
1433 	dprintf(("kbase is %p\n", kbase));
1434 
1435 	if (!(cp->cache_flags & KMF_HASH)) {
1436 		valid = kmw->kmw_valid;
1437 		ubase = kmw->kmw_ubase;
1438 
1439 		if (mdb_vread(ubase, chunks * chunksize,
1440 		    (uintptr_t)kbase) == -1) {
1441 			mdb_warn("failed to read slab contents at %p", kbase);
1442 			return (WALK_ERR);
1443 		}
1444 
1445 		/*
1446 		 * Set up the valid map as fully allocated -- we'll punch
1447 		 * out the freelist.
1448 		 */
1449 		if (type & KM_ALLOCATED)
1450 			(void) memset(valid, 1, chunks);
1451 	} else {
1452 		valid = NULL;
1453 		ubase = NULL;
1454 	}
1455 
1456 	/*
1457 	 * walk the slab's freelist
1458 	 */
1459 	bcp = sp->slab_head;
1460 
1461 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1462 
1463 	/*
1464 	 * since we could be in the middle of allocating a buffer,
1465 	 * our refcnt could be one higher than it aught.  So we
1466 	 * check one further on the freelist than the count allows.
1467 	 */
1468 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1469 		uint_t ndx;
1470 
1471 		dprintf(("bcp is %p\n", bcp));
1472 
1473 		if (bcp == NULL) {
1474 			if (i == chunks)
1475 				break;
1476 			mdb_warn(
1477 			    "slab %p in cache %p freelist too short by %d\n",
1478 			    sp, addr, chunks - i);
1479 			break;
1480 		}
1481 
1482 		if (cp->cache_flags & KMF_HASH) {
1483 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1484 				mdb_warn("failed to read bufctl ptr at %p",
1485 				    bcp);
1486 				break;
1487 			}
1488 			buf = bc.bc_addr;
1489 		} else {
1490 			/*
1491 			 * Otherwise the buffer is in the slab which
1492 			 * we've read in;  we just need to determine
1493 			 * its offset in the slab to find the
1494 			 * kmem_bufctl_t.
1495 			 */
1496 			bc = *((kmem_bufctl_t *)
1497 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1498 			    (uintptr_t)ubase));
1499 
1500 			buf = KMEM_BUF(cp, bcp);
1501 		}
1502 
1503 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1504 
1505 		if (ndx > slabsize / cp->cache_bufsize) {
1506 			/*
1507 			 * This is very wrong; we have managed to find
1508 			 * a buffer in the slab which shouldn't
1509 			 * actually be here.  Emit a warning, and
1510 			 * try to continue.
1511 			 */
1512 			mdb_warn("buf %p is out of range for "
1513 			    "slab %p, cache %p\n", buf, sp, addr);
1514 		} else if (type & KM_ALLOCATED) {
1515 			/*
1516 			 * we have found a buffer on the slab's freelist;
1517 			 * clear its entry
1518 			 */
1519 			valid[ndx] = 0;
1520 		} else {
1521 			/*
1522 			 * Report this freed buffer
1523 			 */
1524 			if (type & KM_BUFCTL) {
1525 				ret = bufctl_walk_callback(cp, wsp,
1526 				    (uintptr_t)bcp);
1527 			} else {
1528 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1529 			}
1530 			if (ret != WALK_NEXT)
1531 				return (ret);
1532 		}
1533 
1534 		bcp = bc.bc_next;
1535 	}
1536 
1537 	if (bcp != NULL) {
1538 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1539 		    sp, addr, bcp));
1540 	}
1541 
1542 	/*
1543 	 * If we are walking freed buffers, the loop above handled reporting
1544 	 * them.
1545 	 */
1546 	if (type & KM_FREE)
1547 		return (WALK_NEXT);
1548 
1549 	if (type & KM_BUFCTL) {
1550 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1551 		    "cache %p\n", addr);
1552 		return (WALK_ERR);
1553 	}
1554 
1555 	/*
1556 	 * Report allocated buffers, skipping buffers in the magazine layer.
1557 	 * We only get this far for small-slab caches.
1558 	 */
1559 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1560 		buf = (char *)kbase + i * chunksize;
1561 
1562 		if (!valid[i])
1563 			continue;		/* on slab freelist */
1564 
1565 		if (magcnt > 0 &&
1566 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1567 		    addrcmp) != NULL)
1568 			continue;		/* in magazine layer */
1569 
1570 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1571 	}
1572 	return (ret);
1573 }
1574 
1575 void
1576 kmem_walk_fini(mdb_walk_state_t *wsp)
1577 {
1578 	kmem_walk_t *kmw = wsp->walk_data;
1579 	uintptr_t chunksize;
1580 	uintptr_t slabsize;
1581 
1582 	if (kmw == NULL)
1583 		return;
1584 
1585 	if (kmw->kmw_maglist != NULL)
1586 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1587 
1588 	chunksize = kmw->kmw_cp->cache_chunksize;
1589 	slabsize = kmw->kmw_cp->cache_slabsize;
1590 
1591 	if (kmw->kmw_valid != NULL)
1592 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1593 	if (kmw->kmw_ubase != NULL)
1594 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1595 
1596 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1597 	mdb_free(kmw, sizeof (kmem_walk_t));
1598 }
1599 
1600 /*ARGSUSED*/
1601 static int
1602 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1603 {
1604 	/*
1605 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1606 	 * memory in other caches.  This can be a little confusing, so we
1607 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1608 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1609 	 */
1610 	if (c->cache_cflags & KMC_NOTOUCH)
1611 		return (WALK_NEXT);
1612 
1613 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1614 	    wsp->walk_cbdata, addr) == -1)
1615 		return (WALK_DONE);
1616 
1617 	return (WALK_NEXT);
1618 }
1619 
1620 #define	KMEM_WALK_ALL(name, wsp) { \
1621 	wsp->walk_data = (name); \
1622 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1623 		return (WALK_ERR); \
1624 	return (WALK_DONE); \
1625 }
1626 
1627 int
1628 kmem_walk_init(mdb_walk_state_t *wsp)
1629 {
1630 	if (wsp->walk_arg != NULL)
1631 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1632 
1633 	if (wsp->walk_addr == NULL)
1634 		KMEM_WALK_ALL("kmem", wsp);
1635 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1636 }
1637 
1638 int
1639 bufctl_walk_init(mdb_walk_state_t *wsp)
1640 {
1641 	if (wsp->walk_addr == NULL)
1642 		KMEM_WALK_ALL("bufctl", wsp);
1643 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1644 }
1645 
1646 int
1647 freemem_walk_init(mdb_walk_state_t *wsp)
1648 {
1649 	if (wsp->walk_addr == NULL)
1650 		KMEM_WALK_ALL("freemem", wsp);
1651 	return (kmem_walk_init_common(wsp, KM_FREE));
1652 }
1653 
1654 int
1655 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1656 {
1657 	if (wsp->walk_addr == NULL)
1658 		KMEM_WALK_ALL("freemem_constructed", wsp);
1659 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1660 }
1661 
1662 int
1663 freectl_walk_init(mdb_walk_state_t *wsp)
1664 {
1665 	if (wsp->walk_addr == NULL)
1666 		KMEM_WALK_ALL("freectl", wsp);
1667 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1668 }
1669 
1670 int
1671 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1672 {
1673 	if (wsp->walk_addr == NULL)
1674 		KMEM_WALK_ALL("freectl_constructed", wsp);
1675 	return (kmem_walk_init_common(wsp,
1676 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1677 }
1678 
1679 typedef struct bufctl_history_walk {
1680 	void		*bhw_next;
1681 	kmem_cache_t	*bhw_cache;
1682 	kmem_slab_t	*bhw_slab;
1683 	hrtime_t	bhw_timestamp;
1684 } bufctl_history_walk_t;
1685 
1686 int
1687 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1688 {
1689 	bufctl_history_walk_t *bhw;
1690 	kmem_bufctl_audit_t bc;
1691 	kmem_bufctl_audit_t bcn;
1692 
1693 	if (wsp->walk_addr == NULL) {
1694 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1695 		return (WALK_ERR);
1696 	}
1697 
1698 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1699 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1700 		return (WALK_ERR);
1701 	}
1702 
1703 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1704 	bhw->bhw_timestamp = 0;
1705 	bhw->bhw_cache = bc.bc_cache;
1706 	bhw->bhw_slab = bc.bc_slab;
1707 
1708 	/*
1709 	 * sometimes the first log entry matches the base bufctl;  in that
1710 	 * case, skip the base bufctl.
1711 	 */
1712 	if (bc.bc_lastlog != NULL &&
1713 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1714 	    bc.bc_addr == bcn.bc_addr &&
1715 	    bc.bc_cache == bcn.bc_cache &&
1716 	    bc.bc_slab == bcn.bc_slab &&
1717 	    bc.bc_timestamp == bcn.bc_timestamp &&
1718 	    bc.bc_thread == bcn.bc_thread)
1719 		bhw->bhw_next = bc.bc_lastlog;
1720 	else
1721 		bhw->bhw_next = (void *)wsp->walk_addr;
1722 
1723 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1724 	wsp->walk_data = bhw;
1725 
1726 	return (WALK_NEXT);
1727 }
1728 
1729 int
1730 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1731 {
1732 	bufctl_history_walk_t *bhw = wsp->walk_data;
1733 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1734 	uintptr_t baseaddr = wsp->walk_addr;
1735 	kmem_bufctl_audit_t bc;
1736 
1737 	if (addr == NULL)
1738 		return (WALK_DONE);
1739 
1740 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1741 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1742 		return (WALK_ERR);
1743 	}
1744 
1745 	/*
1746 	 * The bufctl is only valid if the address, cache, and slab are
1747 	 * correct.  We also check that the timestamp is decreasing, to
1748 	 * prevent infinite loops.
1749 	 */
1750 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1751 	    bc.bc_cache != bhw->bhw_cache ||
1752 	    bc.bc_slab != bhw->bhw_slab ||
1753 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1754 		return (WALK_DONE);
1755 
1756 	bhw->bhw_next = bc.bc_lastlog;
1757 	bhw->bhw_timestamp = bc.bc_timestamp;
1758 
1759 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1760 }
1761 
1762 void
1763 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1764 {
1765 	bufctl_history_walk_t *bhw = wsp->walk_data;
1766 
1767 	mdb_free(bhw, sizeof (*bhw));
1768 }
1769 
1770 typedef struct kmem_log_walk {
1771 	kmem_bufctl_audit_t *klw_base;
1772 	kmem_bufctl_audit_t **klw_sorted;
1773 	kmem_log_header_t klw_lh;
1774 	size_t klw_size;
1775 	size_t klw_maxndx;
1776 	size_t klw_ndx;
1777 } kmem_log_walk_t;
1778 
1779 int
1780 kmem_log_walk_init(mdb_walk_state_t *wsp)
1781 {
1782 	uintptr_t lp = wsp->walk_addr;
1783 	kmem_log_walk_t *klw;
1784 	kmem_log_header_t *lhp;
1785 	int maxndx, i, j, k;
1786 
1787 	/*
1788 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1789 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1790 	 */
1791 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1792 		mdb_warn("failed to read 'kmem_transaction_log'");
1793 		return (WALK_ERR);
1794 	}
1795 
1796 	if (lp == NULL) {
1797 		mdb_warn("log is disabled\n");
1798 		return (WALK_ERR);
1799 	}
1800 
1801 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1802 	lhp = &klw->klw_lh;
1803 
1804 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1805 		mdb_warn("failed to read log header at %p", lp);
1806 		mdb_free(klw, sizeof (kmem_log_walk_t));
1807 		return (WALK_ERR);
1808 	}
1809 
1810 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1811 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1812 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1813 
1814 	if (mdb_vread(klw->klw_base, klw->klw_size,
1815 	    (uintptr_t)lhp->lh_base) == -1) {
1816 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1817 		mdb_free(klw->klw_base, klw->klw_size);
1818 		mdb_free(klw, sizeof (kmem_log_walk_t));
1819 		return (WALK_ERR);
1820 	}
1821 
1822 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1823 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1824 
1825 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1826 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1827 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1828 
1829 		for (j = 0; j < maxndx; j++)
1830 			klw->klw_sorted[k++] = &chunk[j];
1831 	}
1832 
1833 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1834 	    (int(*)(const void *, const void *))bufctlcmp);
1835 
1836 	klw->klw_maxndx = k;
1837 	wsp->walk_data = klw;
1838 
1839 	return (WALK_NEXT);
1840 }
1841 
1842 int
1843 kmem_log_walk_step(mdb_walk_state_t *wsp)
1844 {
1845 	kmem_log_walk_t *klw = wsp->walk_data;
1846 	kmem_bufctl_audit_t *bcp;
1847 
1848 	if (klw->klw_ndx == klw->klw_maxndx)
1849 		return (WALK_DONE);
1850 
1851 	bcp = klw->klw_sorted[klw->klw_ndx++];
1852 
1853 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1854 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1855 }
1856 
1857 void
1858 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1859 {
1860 	kmem_log_walk_t *klw = wsp->walk_data;
1861 
1862 	mdb_free(klw->klw_base, klw->klw_size);
1863 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1864 	    sizeof (kmem_bufctl_audit_t *));
1865 	mdb_free(klw, sizeof (kmem_log_walk_t));
1866 }
1867 
1868 typedef struct allocdby_bufctl {
1869 	uintptr_t abb_addr;
1870 	hrtime_t abb_ts;
1871 } allocdby_bufctl_t;
1872 
1873 typedef struct allocdby_walk {
1874 	const char *abw_walk;
1875 	uintptr_t abw_thread;
1876 	size_t abw_nbufs;
1877 	size_t abw_size;
1878 	allocdby_bufctl_t *abw_buf;
1879 	size_t abw_ndx;
1880 } allocdby_walk_t;
1881 
1882 int
1883 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1884     allocdby_walk_t *abw)
1885 {
1886 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1887 		return (WALK_NEXT);
1888 
1889 	if (abw->abw_nbufs == abw->abw_size) {
1890 		allocdby_bufctl_t *buf;
1891 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1892 
1893 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1894 
1895 		bcopy(abw->abw_buf, buf, oldsize);
1896 		mdb_free(abw->abw_buf, oldsize);
1897 
1898 		abw->abw_size <<= 1;
1899 		abw->abw_buf = buf;
1900 	}
1901 
1902 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1903 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1904 	abw->abw_nbufs++;
1905 
1906 	return (WALK_NEXT);
1907 }
1908 
1909 /*ARGSUSED*/
1910 int
1911 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1912 {
1913 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1914 	    abw, addr) == -1) {
1915 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1916 		return (WALK_DONE);
1917 	}
1918 
1919 	return (WALK_NEXT);
1920 }
1921 
1922 static int
1923 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1924 {
1925 	if (lhs->abb_ts < rhs->abb_ts)
1926 		return (1);
1927 	if (lhs->abb_ts > rhs->abb_ts)
1928 		return (-1);
1929 	return (0);
1930 }
1931 
1932 static int
1933 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1934 {
1935 	allocdby_walk_t *abw;
1936 
1937 	if (wsp->walk_addr == NULL) {
1938 		mdb_warn("allocdby walk doesn't support global walks\n");
1939 		return (WALK_ERR);
1940 	}
1941 
1942 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1943 
1944 	abw->abw_thread = wsp->walk_addr;
1945 	abw->abw_walk = walk;
1946 	abw->abw_size = 128;	/* something reasonable */
1947 	abw->abw_buf =
1948 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1949 
1950 	wsp->walk_data = abw;
1951 
1952 	if (mdb_walk("kmem_cache",
1953 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1954 		mdb_warn("couldn't walk kmem_cache");
1955 		allocdby_walk_fini(wsp);
1956 		return (WALK_ERR);
1957 	}
1958 
1959 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1960 	    (int(*)(const void *, const void *))allocdby_cmp);
1961 
1962 	return (WALK_NEXT);
1963 }
1964 
1965 int
1966 allocdby_walk_init(mdb_walk_state_t *wsp)
1967 {
1968 	return (allocdby_walk_init_common(wsp, "bufctl"));
1969 }
1970 
1971 int
1972 freedby_walk_init(mdb_walk_state_t *wsp)
1973 {
1974 	return (allocdby_walk_init_common(wsp, "freectl"));
1975 }
1976 
1977 int
1978 allocdby_walk_step(mdb_walk_state_t *wsp)
1979 {
1980 	allocdby_walk_t *abw = wsp->walk_data;
1981 	kmem_bufctl_audit_t bc;
1982 	uintptr_t addr;
1983 
1984 	if (abw->abw_ndx == abw->abw_nbufs)
1985 		return (WALK_DONE);
1986 
1987 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1988 
1989 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1990 		mdb_warn("couldn't read bufctl at %p", addr);
1991 		return (WALK_DONE);
1992 	}
1993 
1994 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1995 }
1996 
1997 void
1998 allocdby_walk_fini(mdb_walk_state_t *wsp)
1999 {
2000 	allocdby_walk_t *abw = wsp->walk_data;
2001 
2002 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2003 	mdb_free(abw, sizeof (allocdby_walk_t));
2004 }
2005 
2006 /*ARGSUSED*/
2007 int
2008 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2009 {
2010 	char c[MDB_SYM_NAMLEN];
2011 	GElf_Sym sym;
2012 	int i;
2013 
2014 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2015 	for (i = 0; i < bcp->bc_depth; i++) {
2016 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2017 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2018 			continue;
2019 		if (strncmp(c, "kmem_", 5) == 0)
2020 			continue;
2021 		mdb_printf("%s+0x%lx",
2022 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2023 		break;
2024 	}
2025 	mdb_printf("\n");
2026 
2027 	return (WALK_NEXT);
2028 }
2029 
2030 static int
2031 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2032 {
2033 	if (!(flags & DCMD_ADDRSPEC))
2034 		return (DCMD_USAGE);
2035 
2036 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2037 
2038 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2039 		mdb_warn("can't walk '%s' for %p", w, addr);
2040 		return (DCMD_ERR);
2041 	}
2042 
2043 	return (DCMD_OK);
2044 }
2045 
2046 /*ARGSUSED*/
2047 int
2048 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2049 {
2050 	return (allocdby_common(addr, flags, "allocdby"));
2051 }
2052 
2053 /*ARGSUSED*/
2054 int
2055 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2056 {
2057 	return (allocdby_common(addr, flags, "freedby"));
2058 }
2059 
2060 /*
2061  * Return a string describing the address in relation to the given thread's
2062  * stack.
2063  *
2064  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2065  *
2066  * - If the address is above the stack pointer, return an empty string
2067  *   signifying that the address is active.
2068  *
2069  * - If the address is below the stack pointer, and the thread is not on proc,
2070  *   return " (below sp)".
2071  *
2072  * - If the address is below the stack pointer, and the thread is on proc,
2073  *   return " (possibly below sp)".  Depending on context, we may or may not
2074  *   have an accurate t_sp.
2075  */
2076 static const char *
2077 stack_active(const kthread_t *t, uintptr_t addr)
2078 {
2079 	uintptr_t panicstk;
2080 	GElf_Sym sym;
2081 
2082 	if (t->t_state == TS_FREE)
2083 		return (" (inactive interrupt thread)");
2084 
2085 	/*
2086 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2087 	 * no longer relates to the thread's real stack.
2088 	 */
2089 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2090 		panicstk = (uintptr_t)sym.st_value;
2091 
2092 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2093 			return ("");
2094 	}
2095 
2096 	if (addr >= t->t_sp + STACK_BIAS)
2097 		return ("");
2098 
2099 	if (t->t_state == TS_ONPROC)
2100 		return (" (possibly below sp)");
2101 
2102 	return (" (below sp)");
2103 }
2104 
2105 /*
2106  * Additional state for the kmem and vmem ::whatis handlers
2107  */
2108 typedef struct whatis_info {
2109 	mdb_whatis_t *wi_w;
2110 	const kmem_cache_t *wi_cache;
2111 	const vmem_t *wi_vmem;
2112 	vmem_t *wi_msb_arena;
2113 	size_t wi_slab_size;
2114 	uint_t wi_slab_found;
2115 	uint_t wi_kmem_lite_count;
2116 	uint_t wi_freemem;
2117 } whatis_info_t;
2118 
2119 /* call one of our dcmd functions with "-v" and the provided address */
2120 static void
2121 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2122 {
2123 	mdb_arg_t a;
2124 	a.a_type = MDB_TYPE_STRING;
2125 	a.a_un.a_str = "-v";
2126 
2127 	mdb_printf(":\n");
2128 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2129 }
2130 
2131 static void
2132 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2133 {
2134 #define	KMEM_LITE_MAX	16
2135 	pc_t callers[KMEM_LITE_MAX];
2136 	pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2137 
2138 	kmem_buftag_t bt;
2139 	intptr_t stat;
2140 	const char *plural = "";
2141 	int i;
2142 
2143 	/* validate our arguments and read in the buftag */
2144 	if (count == 0 || count > KMEM_LITE_MAX ||
2145 	    mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2146 		return;
2147 
2148 	/* validate the buffer state and read in the callers */
2149 	stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2150 
2151 	if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE ||
2152 	    mdb_vread(callers, count * sizeof (pc_t),
2153 	    btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2154 		return;
2155 
2156 	/* If there aren't any filled in callers, bail */
2157 	if (callers[0] == uninit)
2158 		return;
2159 
2160 	plural = (callers[1] == uninit) ? "" : "s";
2161 
2162 	/* Everything's done and checked; print them out */
2163 	mdb_printf(":\n");
2164 
2165 	mdb_inc_indent(8);
2166 	mdb_printf("recent caller%s: %a", plural, callers[0]);
2167 	for (i = 1; i < count; i++) {
2168 		if (callers[i] == uninit)
2169 			break;
2170 		mdb_printf(", %a", callers[i]);
2171 	}
2172 	mdb_dec_indent(8);
2173 }
2174 
2175 static void
2176 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2177     uintptr_t baddr)
2178 {
2179 	mdb_whatis_t *w = wi->wi_w;
2180 
2181 	const kmem_cache_t *cp = wi->wi_cache;
2182 	/* LINTED pointer cast may result in improper alignment */
2183 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2184 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2185 	int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2186 
2187 	mdb_whatis_report_object(w, maddr, addr, "");
2188 
2189 	if (baddr != 0 && !call_printer)
2190 		mdb_printf("bufctl %p ", baddr);
2191 
2192 	mdb_printf("%s from %s",
2193 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2194 
2195 	if (baddr != 0 && call_printer) {
2196 		whatis_call_printer(bufctl, baddr);
2197 		return;
2198 	}
2199 
2200 	/* for KMF_LITE caches, try to print out the previous callers */
2201 	if (!quiet && (cp->cache_flags & KMF_LITE))
2202 		whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2203 
2204 	mdb_printf("\n");
2205 }
2206 
2207 /*ARGSUSED*/
2208 static int
2209 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2210 {
2211 	mdb_whatis_t *w = wi->wi_w;
2212 
2213 	uintptr_t cur;
2214 	size_t size = wi->wi_cache->cache_bufsize;
2215 
2216 	while (mdb_whatis_match(w, addr, size, &cur))
2217 		whatis_print_kmem(wi, cur, addr, NULL);
2218 
2219 	return (WHATIS_WALKRET(w));
2220 }
2221 
2222 /*ARGSUSED*/
2223 static int
2224 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2225 {
2226 	mdb_whatis_t *w = wi->wi_w;
2227 
2228 	uintptr_t cur;
2229 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
2230 	size_t size = wi->wi_cache->cache_bufsize;
2231 
2232 	while (mdb_whatis_match(w, addr, size, &cur))
2233 		whatis_print_kmem(wi, cur, addr, baddr);
2234 
2235 	return (WHATIS_WALKRET(w));
2236 }
2237 
2238 static int
2239 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2240 {
2241 	mdb_whatis_t *w = wi->wi_w;
2242 
2243 	size_t size = vs->vs_end - vs->vs_start;
2244 	uintptr_t cur;
2245 
2246 	/* We're not interested in anything but alloc and free segments */
2247 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2248 		return (WALK_NEXT);
2249 
2250 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2251 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
2252 
2253 		/*
2254 		 * If we're not printing it seperately, provide the vmem_seg
2255 		 * pointer if it has a stack trace.
2256 		 */
2257 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2258 		    (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2259 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2260 			mdb_printf("vmem_seg %p ", addr);
2261 		}
2262 
2263 		mdb_printf("%s from the %s vmem arena",
2264 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2265 		    wi->wi_vmem->vm_name);
2266 
2267 		if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2268 			whatis_call_printer(vmem_seg, addr);
2269 		else
2270 			mdb_printf("\n");
2271 	}
2272 
2273 	return (WHATIS_WALKRET(w));
2274 }
2275 
2276 static int
2277 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2278 {
2279 	mdb_whatis_t *w = wi->wi_w;
2280 	const char *nm = vmem->vm_name;
2281 
2282 	int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2283 	int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2284 
2285 	if (identifier != idspace)
2286 		return (WALK_NEXT);
2287 
2288 	wi->wi_vmem = vmem;
2289 
2290 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2291 		mdb_printf("Searching vmem arena %s...\n", nm);
2292 
2293 	if (mdb_pwalk("vmem_seg",
2294 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2295 		mdb_warn("can't walk vmem_seg for %p", addr);
2296 		return (WALK_NEXT);
2297 	}
2298 
2299 	return (WHATIS_WALKRET(w));
2300 }
2301 
2302 /*ARGSUSED*/
2303 static int
2304 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2305 {
2306 	mdb_whatis_t *w = wi->wi_w;
2307 
2308 	/* It must overlap with the slab data, or it's not interesting */
2309 	if (mdb_whatis_overlaps(w,
2310 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2311 		wi->wi_slab_found++;
2312 		return (WALK_DONE);
2313 	}
2314 	return (WALK_NEXT);
2315 }
2316 
2317 static int
2318 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2319 {
2320 	mdb_whatis_t *w = wi->wi_w;
2321 
2322 	char *walk, *freewalk;
2323 	mdb_walk_cb_t func;
2324 	int do_bufctl;
2325 
2326 	int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2327 	int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2328 
2329 	if (identifier != idspace)
2330 		return (WALK_NEXT);
2331 
2332 	/* Override the '-b' flag as necessary */
2333 	if (!(c->cache_flags & KMF_HASH))
2334 		do_bufctl = FALSE;	/* no bufctls to walk */
2335 	else if (c->cache_flags & KMF_AUDIT)
2336 		do_bufctl = TRUE;	/* we always want debugging info */
2337 	else
2338 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2339 
2340 	if (do_bufctl) {
2341 		walk = "bufctl";
2342 		freewalk = "freectl";
2343 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2344 	} else {
2345 		walk = "kmem";
2346 		freewalk = "freemem";
2347 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2348 	}
2349 
2350 	wi->wi_cache = c;
2351 
2352 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2353 		mdb_printf("Searching %s...\n", c->cache_name);
2354 
2355 	/*
2356 	 * If more then two buffers live on each slab, figure out if we're
2357 	 * interested in anything in any slab before doing the more expensive
2358 	 * kmem/freemem (bufctl/freectl) walkers.
2359 	 */
2360 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2361 	if (!(c->cache_flags & KMF_HASH))
2362 		wi->wi_slab_size -= sizeof (kmem_slab_t);
2363 
2364 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2365 		wi->wi_slab_found = 0;
2366 		if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2367 		    addr) == -1) {
2368 			mdb_warn("can't find kmem_slab walker");
2369 			return (WALK_DONE);
2370 		}
2371 		if (wi->wi_slab_found == 0)
2372 			return (WALK_NEXT);
2373 	}
2374 
2375 	wi->wi_freemem = FALSE;
2376 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2377 		mdb_warn("can't find %s walker", walk);
2378 		return (WALK_DONE);
2379 	}
2380 
2381 	if (mdb_whatis_done(w))
2382 		return (WALK_DONE);
2383 
2384 	/*
2385 	 * We have searched for allocated memory; now search for freed memory.
2386 	 */
2387 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2388 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2389 
2390 	wi->wi_freemem = TRUE;
2391 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2392 		mdb_warn("can't find %s walker", freewalk);
2393 		return (WALK_DONE);
2394 	}
2395 
2396 	return (WHATIS_WALKRET(w));
2397 }
2398 
2399 static int
2400 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2401 {
2402 	if (c->cache_arena == wi->wi_msb_arena ||
2403 	    (c->cache_cflags & KMC_NOTOUCH))
2404 		return (WALK_NEXT);
2405 
2406 	return (whatis_walk_cache(addr, c, wi));
2407 }
2408 
2409 static int
2410 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2411 {
2412 	if (c->cache_arena != wi->wi_msb_arena)
2413 		return (WALK_NEXT);
2414 
2415 	return (whatis_walk_cache(addr, c, wi));
2416 }
2417 
2418 static int
2419 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2420 {
2421 	if (c->cache_arena == wi->wi_msb_arena ||
2422 	    !(c->cache_cflags & KMC_NOTOUCH))
2423 		return (WALK_NEXT);
2424 
2425 	return (whatis_walk_cache(addr, c, wi));
2426 }
2427 
2428 static int
2429 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2430 {
2431 	uintptr_t cur;
2432 	uintptr_t saddr;
2433 	size_t size;
2434 
2435 	/*
2436 	 * Often, one calls ::whatis on an address from a thread structure.
2437 	 * We use this opportunity to short circuit this case...
2438 	 */
2439 	while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2440 		mdb_whatis_report_object(w, cur, addr,
2441 		    "allocated as a thread structure\n");
2442 
2443 	/*
2444 	 * Now check the stack
2445 	 */
2446 	if (t->t_stkbase == NULL)
2447 		return (WALK_NEXT);
2448 
2449 	/*
2450 	 * This assumes that t_stk is the end of the stack, but it's really
2451 	 * only the initial stack pointer for the thread.  Arguments to the
2452 	 * initial procedure, SA(MINFRAME), etc. are all after t_stk.  So
2453 	 * that 't->t_stk::whatis' reports "part of t's stack", we include
2454 	 * t_stk in the range (the "+ 1", below), but the kernel should
2455 	 * really include the full stack bounds where we can find it.
2456 	 */
2457 	saddr = (uintptr_t)t->t_stkbase;
2458 	size = (uintptr_t)t->t_stk - saddr + 1;
2459 	while (mdb_whatis_match(w, saddr, size, &cur))
2460 		mdb_whatis_report_object(w, cur, cur,
2461 		    "in thread %p's stack%s\n", addr, stack_active(t, cur));
2462 
2463 	return (WHATIS_WALKRET(w));
2464 }
2465 
2466 static void
2467 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2468     uintptr_t base, size_t size, const char *where)
2469 {
2470 	uintptr_t cur;
2471 
2472 	/*
2473 	 * Since we're searching for addresses inside a module, we report
2474 	 * them as symbols.
2475 	 */
2476 	while (mdb_whatis_match(w, base, size, &cur))
2477 		mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2478 }
2479 
2480 static int
2481 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2482 {
2483 	char name[MODMAXNAMELEN];
2484 	struct module mod;
2485 	Shdr shdr;
2486 
2487 	if (m->mod_mp == NULL)
2488 		return (WALK_NEXT);
2489 
2490 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2491 		mdb_warn("couldn't read modctl %p's module", addr);
2492 		return (WALK_NEXT);
2493 	}
2494 
2495 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2496 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2497 
2498 	whatis_modctl_match(w, name,
2499 	    (uintptr_t)mod.text, mod.text_size, "text segment");
2500 	whatis_modctl_match(w, name,
2501 	    (uintptr_t)mod.data, mod.data_size, "data segment");
2502 	whatis_modctl_match(w, name,
2503 	    (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2504 
2505 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2506 		mdb_warn("couldn't read symbol header for %p's module", addr);
2507 		return (WALK_NEXT);
2508 	}
2509 
2510 	whatis_modctl_match(w, name,
2511 	    (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2512 	whatis_modctl_match(w, name,
2513 	    (uintptr_t)mod.symspace, mod.symsize, "symtab");
2514 
2515 	return (WHATIS_WALKRET(w));
2516 }
2517 
2518 /*ARGSUSED*/
2519 static int
2520 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2521 {
2522 	uintptr_t cur;
2523 
2524 	uintptr_t base = (uintptr_t)seg->pages;
2525 	size_t size = (uintptr_t)seg->epages - base;
2526 
2527 	while (mdb_whatis_match(w, base, size, &cur)) {
2528 		/* round our found pointer down to the page_t base. */
2529 		size_t offset = (cur - base) % sizeof (page_t);
2530 
2531 		mdb_whatis_report_object(w, cur, cur - offset,
2532 		    "allocated as a page structure\n");
2533 	}
2534 
2535 	return (WHATIS_WALKRET(w));
2536 }
2537 
2538 /*ARGSUSED*/
2539 static int
2540 whatis_run_modules(mdb_whatis_t *w, void *arg)
2541 {
2542 	if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2543 		mdb_warn("couldn't find modctl walker");
2544 		return (1);
2545 	}
2546 	return (0);
2547 }
2548 
2549 /*ARGSUSED*/
2550 static int
2551 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2552 {
2553 	/*
2554 	 * Now search all thread stacks.  Yes, this is a little weak; we
2555 	 * can save a lot of work by first checking to see if the
2556 	 * address is in segkp vs. segkmem.  But hey, computers are
2557 	 * fast.
2558 	 */
2559 	if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2560 		mdb_warn("couldn't find thread walker");
2561 		return (1);
2562 	}
2563 	return (0);
2564 }
2565 
2566 /*ARGSUSED*/
2567 static int
2568 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2569 {
2570 	if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2571 		mdb_warn("couldn't find memseg walker");
2572 		return (1);
2573 	}
2574 	return (0);
2575 }
2576 
2577 /*ARGSUSED*/
2578 static int
2579 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2580 {
2581 	whatis_info_t wi;
2582 
2583 	bzero(&wi, sizeof (wi));
2584 	wi.wi_w = w;
2585 
2586 	if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2587 		mdb_warn("unable to readvar \"kmem_msb_arena\"");
2588 
2589 	if (mdb_readvar(&wi.wi_kmem_lite_count,
2590 	    "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2591 		wi.wi_kmem_lite_count = 0;
2592 
2593 	/*
2594 	 * We process kmem caches in the following order:
2595 	 *
2596 	 *	non-KMC_NOTOUCH, non-metadata	(typically the most interesting)
2597 	 *	metadata			(can be huge with KMF_AUDIT)
2598 	 *	KMC_NOTOUCH, non-metadata	(see kmem_walk_all())
2599 	 */
2600 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2601 	    &wi) == -1 ||
2602 	    mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2603 	    &wi) == -1 ||
2604 	    mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2605 	    &wi) == -1) {
2606 		mdb_warn("couldn't find kmem_cache walker");
2607 		return (1);
2608 	}
2609 	return (0);
2610 }
2611 
2612 /*ARGSUSED*/
2613 static int
2614 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2615 {
2616 	whatis_info_t wi;
2617 
2618 	bzero(&wi, sizeof (wi));
2619 	wi.wi_w = w;
2620 
2621 	if (mdb_walk("vmem_postfix",
2622 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2623 		mdb_warn("couldn't find vmem_postfix walker");
2624 		return (1);
2625 	}
2626 	return (0);
2627 }
2628 
2629 typedef struct kmem_log_cpu {
2630 	uintptr_t kmc_low;
2631 	uintptr_t kmc_high;
2632 } kmem_log_cpu_t;
2633 
2634 typedef struct kmem_log_data {
2635 	uintptr_t kmd_addr;
2636 	kmem_log_cpu_t *kmd_cpu;
2637 } kmem_log_data_t;
2638 
2639 int
2640 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2641     kmem_log_data_t *kmd)
2642 {
2643 	int i;
2644 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2645 	size_t bufsize;
2646 
2647 	for (i = 0; i < NCPU; i++) {
2648 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2649 			break;
2650 	}
2651 
2652 	if (kmd->kmd_addr) {
2653 		if (b->bc_cache == NULL)
2654 			return (WALK_NEXT);
2655 
2656 		if (mdb_vread(&bufsize, sizeof (bufsize),
2657 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2658 			mdb_warn(
2659 			    "failed to read cache_bufsize for cache at %p",
2660 			    b->bc_cache);
2661 			return (WALK_ERR);
2662 		}
2663 
2664 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2665 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2666 			return (WALK_NEXT);
2667 	}
2668 
2669 	if (i == NCPU)
2670 		mdb_printf("   ");
2671 	else
2672 		mdb_printf("%3d", i);
2673 
2674 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2675 	    b->bc_timestamp, b->bc_thread);
2676 
2677 	return (WALK_NEXT);
2678 }
2679 
2680 /*ARGSUSED*/
2681 int
2682 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2683 {
2684 	kmem_log_header_t lh;
2685 	kmem_cpu_log_header_t clh;
2686 	uintptr_t lhp, clhp;
2687 	int ncpus;
2688 	uintptr_t *cpu;
2689 	GElf_Sym sym;
2690 	kmem_log_cpu_t *kmc;
2691 	int i;
2692 	kmem_log_data_t kmd;
2693 	uint_t opt_b = FALSE;
2694 
2695 	if (mdb_getopts(argc, argv,
2696 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2697 		return (DCMD_USAGE);
2698 
2699 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2700 		mdb_warn("failed to read 'kmem_transaction_log'");
2701 		return (DCMD_ERR);
2702 	}
2703 
2704 	if (lhp == NULL) {
2705 		mdb_warn("no kmem transaction log\n");
2706 		return (DCMD_ERR);
2707 	}
2708 
2709 	mdb_readvar(&ncpus, "ncpus");
2710 
2711 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2712 		mdb_warn("failed to read log header at %p", lhp);
2713 		return (DCMD_ERR);
2714 	}
2715 
2716 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2717 
2718 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2719 
2720 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2721 		mdb_warn("couldn't find 'cpu' array");
2722 		return (DCMD_ERR);
2723 	}
2724 
2725 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2726 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2727 		    NCPU * sizeof (uintptr_t), sym.st_size);
2728 		return (DCMD_ERR);
2729 	}
2730 
2731 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2732 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2733 		return (DCMD_ERR);
2734 	}
2735 
2736 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2737 	kmd.kmd_addr = NULL;
2738 	kmd.kmd_cpu = kmc;
2739 
2740 	for (i = 0; i < NCPU; i++) {
2741 
2742 		if (cpu[i] == NULL)
2743 			continue;
2744 
2745 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2746 			mdb_warn("cannot read cpu %d's log header at %p",
2747 			    i, clhp);
2748 			return (DCMD_ERR);
2749 		}
2750 
2751 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2752 		    (uintptr_t)lh.lh_base;
2753 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2754 
2755 		clhp += sizeof (kmem_cpu_log_header_t);
2756 	}
2757 
2758 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2759 	    "TIMESTAMP", "THREAD");
2760 
2761 	/*
2762 	 * If we have been passed an address, print out only log entries
2763 	 * corresponding to that address.  If opt_b is specified, then interpret
2764 	 * the address as a bufctl.
2765 	 */
2766 	if (flags & DCMD_ADDRSPEC) {
2767 		kmem_bufctl_audit_t b;
2768 
2769 		if (opt_b) {
2770 			kmd.kmd_addr = addr;
2771 		} else {
2772 			if (mdb_vread(&b,
2773 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2774 				mdb_warn("failed to read bufctl at %p", addr);
2775 				return (DCMD_ERR);
2776 			}
2777 
2778 			(void) kmem_log_walk(addr, &b, &kmd);
2779 
2780 			return (DCMD_OK);
2781 		}
2782 	}
2783 
2784 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2785 		mdb_warn("can't find kmem log walker");
2786 		return (DCMD_ERR);
2787 	}
2788 
2789 	return (DCMD_OK);
2790 }
2791 
2792 typedef struct bufctl_history_cb {
2793 	int		bhc_flags;
2794 	int		bhc_argc;
2795 	const mdb_arg_t	*bhc_argv;
2796 	int		bhc_ret;
2797 } bufctl_history_cb_t;
2798 
2799 /*ARGSUSED*/
2800 static int
2801 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2802 {
2803 	bufctl_history_cb_t *bhc = arg;
2804 
2805 	bhc->bhc_ret =
2806 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2807 
2808 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2809 
2810 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2811 }
2812 
2813 void
2814 bufctl_help(void)
2815 {
2816 	mdb_printf("%s",
2817 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2818 	mdb_dec_indent(2);
2819 	mdb_printf("%<b>OPTIONS%</b>\n");
2820 	mdb_inc_indent(2);
2821 	mdb_printf("%s",
2822 "  -v    Display the full content of the bufctl, including its stack trace\n"
2823 "  -h    retrieve the bufctl's transaction history, if available\n"
2824 "  -a addr\n"
2825 "        filter out bufctls not involving the buffer at addr\n"
2826 "  -c caller\n"
2827 "        filter out bufctls without the function/PC in their stack trace\n"
2828 "  -e earliest\n"
2829 "        filter out bufctls timestamped before earliest\n"
2830 "  -l latest\n"
2831 "        filter out bufctls timestamped after latest\n"
2832 "  -t thread\n"
2833 "        filter out bufctls not involving thread\n");
2834 }
2835 
2836 int
2837 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2838 {
2839 	kmem_bufctl_audit_t bc;
2840 	uint_t verbose = FALSE;
2841 	uint_t history = FALSE;
2842 	uint_t in_history = FALSE;
2843 	uintptr_t caller = NULL, thread = NULL;
2844 	uintptr_t laddr, haddr, baddr = NULL;
2845 	hrtime_t earliest = 0, latest = 0;
2846 	int i, depth;
2847 	char c[MDB_SYM_NAMLEN];
2848 	GElf_Sym sym;
2849 
2850 	if (mdb_getopts(argc, argv,
2851 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2852 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2853 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2854 	    'c', MDB_OPT_UINTPTR, &caller,
2855 	    't', MDB_OPT_UINTPTR, &thread,
2856 	    'e', MDB_OPT_UINT64, &earliest,
2857 	    'l', MDB_OPT_UINT64, &latest,
2858 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2859 		return (DCMD_USAGE);
2860 
2861 	if (!(flags & DCMD_ADDRSPEC))
2862 		return (DCMD_USAGE);
2863 
2864 	if (in_history && !history)
2865 		return (DCMD_USAGE);
2866 
2867 	if (history && !in_history) {
2868 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2869 		    UM_SLEEP | UM_GC);
2870 		bufctl_history_cb_t bhc;
2871 
2872 		nargv[0].a_type = MDB_TYPE_STRING;
2873 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2874 
2875 		for (i = 0; i < argc; i++)
2876 			nargv[i + 1] = argv[i];
2877 
2878 		/*
2879 		 * When in history mode, we treat each element as if it
2880 		 * were in a seperate loop, so that the headers group
2881 		 * bufctls with similar histories.
2882 		 */
2883 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2884 		bhc.bhc_argc = argc + 1;
2885 		bhc.bhc_argv = nargv;
2886 		bhc.bhc_ret = DCMD_OK;
2887 
2888 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2889 		    addr) == -1) {
2890 			mdb_warn("unable to walk bufctl_history");
2891 			return (DCMD_ERR);
2892 		}
2893 
2894 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2895 			mdb_printf("\n");
2896 
2897 		return (bhc.bhc_ret);
2898 	}
2899 
2900 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2901 		if (verbose) {
2902 			mdb_printf("%16s %16s %16s %16s\n"
2903 			    "%<u>%16s %16s %16s %16s%</u>\n",
2904 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2905 			    "", "CACHE", "LASTLOG", "CONTENTS");
2906 		} else {
2907 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2908 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2909 		}
2910 	}
2911 
2912 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2913 		mdb_warn("couldn't read bufctl at %p", addr);
2914 		return (DCMD_ERR);
2915 	}
2916 
2917 	/*
2918 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2919 	 * the address does not really refer to a bufctl.
2920 	 */
2921 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2922 
2923 	if (caller != NULL) {
2924 		laddr = caller;
2925 		haddr = caller + sizeof (caller);
2926 
2927 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2928 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2929 			/*
2930 			 * We were provided an exact symbol value; any
2931 			 * address in the function is valid.
2932 			 */
2933 			laddr = (uintptr_t)sym.st_value;
2934 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2935 		}
2936 
2937 		for (i = 0; i < depth; i++)
2938 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2939 				break;
2940 
2941 		if (i == depth)
2942 			return (DCMD_OK);
2943 	}
2944 
2945 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2946 		return (DCMD_OK);
2947 
2948 	if (earliest != 0 && bc.bc_timestamp < earliest)
2949 		return (DCMD_OK);
2950 
2951 	if (latest != 0 && bc.bc_timestamp > latest)
2952 		return (DCMD_OK);
2953 
2954 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2955 		return (DCMD_OK);
2956 
2957 	if (flags & DCMD_PIPE_OUT) {
2958 		mdb_printf("%#lr\n", addr);
2959 		return (DCMD_OK);
2960 	}
2961 
2962 	if (verbose) {
2963 		mdb_printf(
2964 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2965 		    "%16s %16p %16p %16p\n",
2966 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2967 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2968 
2969 		mdb_inc_indent(17);
2970 		for (i = 0; i < depth; i++)
2971 			mdb_printf("%a\n", bc.bc_stack[i]);
2972 		mdb_dec_indent(17);
2973 		mdb_printf("\n");
2974 	} else {
2975 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2976 		    bc.bc_timestamp, bc.bc_thread);
2977 
2978 		for (i = 0; i < depth; i++) {
2979 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2980 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2981 				continue;
2982 			if (strncmp(c, "kmem_", 5) == 0)
2983 				continue;
2984 			mdb_printf(" %a\n", bc.bc_stack[i]);
2985 			break;
2986 		}
2987 
2988 		if (i >= depth)
2989 			mdb_printf("\n");
2990 	}
2991 
2992 	return (DCMD_OK);
2993 }
2994 
2995 typedef struct kmem_verify {
2996 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
2997 	size_t kmv_size;		/* number of bytes in kmv_buf */
2998 	int kmv_corruption;		/* > 0 if corruption found. */
2999 	int kmv_besilent;		/* report actual corruption sites */
3000 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
3001 } kmem_verify_t;
3002 
3003 /*
3004  * verify_pattern()
3005  * 	verify that buf is filled with the pattern pat.
3006  */
3007 static int64_t
3008 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3009 {
3010 	/*LINTED*/
3011 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3012 	uint64_t *buf;
3013 
3014 	for (buf = buf_arg; buf < bufend; buf++)
3015 		if (*buf != pat)
3016 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3017 	return (-1);
3018 }
3019 
3020 /*
3021  * verify_buftag()
3022  *	verify that btp->bt_bxstat == (bcp ^ pat)
3023  */
3024 static int
3025 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3026 {
3027 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3028 }
3029 
3030 /*
3031  * verify_free()
3032  * 	verify the integrity of a free block of memory by checking
3033  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3034  */
3035 /*ARGSUSED1*/
3036 static int
3037 verify_free(uintptr_t addr, const void *data, void *private)
3038 {
3039 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3040 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3041 	int64_t corrupt;		/* corruption offset */
3042 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3043 	kmem_cache_t *cp = &kmv->kmv_cache;
3044 	int besilent = kmv->kmv_besilent;
3045 
3046 	/*LINTED*/
3047 	buftagp = KMEM_BUFTAG(cp, buf);
3048 
3049 	/*
3050 	 * Read the buffer to check.
3051 	 */
3052 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3053 		if (!besilent)
3054 			mdb_warn("couldn't read %p", addr);
3055 		return (WALK_NEXT);
3056 	}
3057 
3058 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3059 	    KMEM_FREE_PATTERN)) >= 0) {
3060 		if (!besilent)
3061 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3062 			    addr, (uintptr_t)addr + corrupt);
3063 		goto corrupt;
3064 	}
3065 	/*
3066 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3067 	 * the first bytes of the buffer, hence we cannot check for red
3068 	 * zone corruption.
3069 	 */
3070 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3071 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3072 		if (!besilent)
3073 			mdb_printf("buffer %p (free) seems to "
3074 			    "have a corrupt redzone pattern\n", addr);
3075 		goto corrupt;
3076 	}
3077 
3078 	/*
3079 	 * confirm bufctl pointer integrity.
3080 	 */
3081 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3082 		if (!besilent)
3083 			mdb_printf("buffer %p (free) has a corrupt "
3084 			    "buftag\n", addr);
3085 		goto corrupt;
3086 	}
3087 
3088 	return (WALK_NEXT);
3089 corrupt:
3090 	kmv->kmv_corruption++;
3091 	return (WALK_NEXT);
3092 }
3093 
3094 /*
3095  * verify_alloc()
3096  * 	Verify that the buftag of an allocated buffer makes sense with respect
3097  * 	to the buffer.
3098  */
3099 /*ARGSUSED1*/
3100 static int
3101 verify_alloc(uintptr_t addr, const void *data, void *private)
3102 {
3103 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3104 	kmem_cache_t *cp = &kmv->kmv_cache;
3105 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3106 	/*LINTED*/
3107 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3108 	uint32_t *ip = (uint32_t *)buftagp;
3109 	uint8_t *bp = (uint8_t *)buf;
3110 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3111 	int besilent = kmv->kmv_besilent;
3112 
3113 	/*
3114 	 * Read the buffer to check.
3115 	 */
3116 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3117 		if (!besilent)
3118 			mdb_warn("couldn't read %p", addr);
3119 		return (WALK_NEXT);
3120 	}
3121 
3122 	/*
3123 	 * There are two cases to handle:
3124 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3125 	 *    0xfeedfacefeedface at the end of it
3126 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3127 	 *    0xbb just past the end of the region in use.  At the buftag,
3128 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3129 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3130 	 *    endianness), followed by 32 bits containing the offset of the
3131 	 *    0xbb byte in the buffer.
3132 	 *
3133 	 * Finally, the two 32-bit words that comprise the second half of the
3134 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3135 	 */
3136 
3137 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3138 		looks_ok = 1;
3139 	else if (!KMEM_SIZE_VALID(ip[1]))
3140 		size_ok = 0;
3141 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3142 		looks_ok = 1;
3143 	else
3144 		size_ok = 0;
3145 
3146 	if (!size_ok) {
3147 		if (!besilent)
3148 			mdb_printf("buffer %p (allocated) has a corrupt "
3149 			    "redzone size encoding\n", addr);
3150 		goto corrupt;
3151 	}
3152 
3153 	if (!looks_ok) {
3154 		if (!besilent)
3155 			mdb_printf("buffer %p (allocated) has a corrupt "
3156 			    "redzone signature\n", addr);
3157 		goto corrupt;
3158 	}
3159 
3160 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3161 		if (!besilent)
3162 			mdb_printf("buffer %p (allocated) has a "
3163 			    "corrupt buftag\n", addr);
3164 		goto corrupt;
3165 	}
3166 
3167 	return (WALK_NEXT);
3168 corrupt:
3169 	kmv->kmv_corruption++;
3170 	return (WALK_NEXT);
3171 }
3172 
3173 /*ARGSUSED2*/
3174 int
3175 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3176 {
3177 	if (flags & DCMD_ADDRSPEC) {
3178 		int check_alloc = 0, check_free = 0;
3179 		kmem_verify_t kmv;
3180 
3181 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3182 		    addr) == -1) {
3183 			mdb_warn("couldn't read kmem_cache %p", addr);
3184 			return (DCMD_ERR);
3185 		}
3186 
3187 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3188 		    sizeof (kmem_buftag_t);
3189 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3190 		kmv.kmv_corruption = 0;
3191 
3192 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3193 			check_alloc = 1;
3194 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3195 				check_free = 1;
3196 		} else {
3197 			if (!(flags & DCMD_LOOP)) {
3198 				mdb_warn("cache %p (%s) does not have "
3199 				    "redzone checking enabled\n", addr,
3200 				    kmv.kmv_cache.cache_name);
3201 			}
3202 			return (DCMD_ERR);
3203 		}
3204 
3205 		if (flags & DCMD_LOOP) {
3206 			/*
3207 			 * table mode, don't print out every corrupt buffer
3208 			 */
3209 			kmv.kmv_besilent = 1;
3210 		} else {
3211 			mdb_printf("Summary for cache '%s'\n",
3212 			    kmv.kmv_cache.cache_name);
3213 			mdb_inc_indent(2);
3214 			kmv.kmv_besilent = 0;
3215 		}
3216 
3217 		if (check_alloc)
3218 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3219 		if (check_free)
3220 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3221 
3222 		if (flags & DCMD_LOOP) {
3223 			if (kmv.kmv_corruption == 0) {
3224 				mdb_printf("%-*s %?p clean\n",
3225 				    KMEM_CACHE_NAMELEN,
3226 				    kmv.kmv_cache.cache_name, addr);
3227 			} else {
3228 				char *s = "";	/* optional s in "buffer[s]" */
3229 				if (kmv.kmv_corruption > 1)
3230 					s = "s";
3231 
3232 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3233 				    KMEM_CACHE_NAMELEN,
3234 				    kmv.kmv_cache.cache_name, addr,
3235 				    kmv.kmv_corruption, s);
3236 			}
3237 		} else {
3238 			/*
3239 			 * This is the more verbose mode, when the user has
3240 			 * type addr::kmem_verify.  If the cache was clean,
3241 			 * nothing will have yet been printed. So say something.
3242 			 */
3243 			if (kmv.kmv_corruption == 0)
3244 				mdb_printf("clean\n");
3245 
3246 			mdb_dec_indent(2);
3247 		}
3248 	} else {
3249 		/*
3250 		 * If the user didn't specify a cache to verify, we'll walk all
3251 		 * kmem_cache's, specifying ourself as a callback for each...
3252 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3253 		 */
3254 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3255 		    "Cache Name", "Addr", "Cache Integrity");
3256 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3257 	}
3258 
3259 	return (DCMD_OK);
3260 }
3261 
3262 typedef struct vmem_node {
3263 	struct vmem_node *vn_next;
3264 	struct vmem_node *vn_parent;
3265 	struct vmem_node *vn_sibling;
3266 	struct vmem_node *vn_children;
3267 	uintptr_t vn_addr;
3268 	int vn_marked;
3269 	vmem_t vn_vmem;
3270 } vmem_node_t;
3271 
3272 typedef struct vmem_walk {
3273 	vmem_node_t *vw_root;
3274 	vmem_node_t *vw_current;
3275 } vmem_walk_t;
3276 
3277 int
3278 vmem_walk_init(mdb_walk_state_t *wsp)
3279 {
3280 	uintptr_t vaddr, paddr;
3281 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3282 	vmem_walk_t *vw;
3283 
3284 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3285 		mdb_warn("couldn't read 'vmem_list'");
3286 		return (WALK_ERR);
3287 	}
3288 
3289 	while (vaddr != NULL) {
3290 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3291 		vp->vn_addr = vaddr;
3292 		vp->vn_next = head;
3293 		head = vp;
3294 
3295 		if (vaddr == wsp->walk_addr)
3296 			current = vp;
3297 
3298 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3299 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3300 			goto err;
3301 		}
3302 
3303 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3304 	}
3305 
3306 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3307 
3308 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3309 			vp->vn_sibling = root;
3310 			root = vp;
3311 			continue;
3312 		}
3313 
3314 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3315 			if (parent->vn_addr != paddr)
3316 				continue;
3317 			vp->vn_sibling = parent->vn_children;
3318 			parent->vn_children = vp;
3319 			vp->vn_parent = parent;
3320 			break;
3321 		}
3322 
3323 		if (parent == NULL) {
3324 			mdb_warn("couldn't find %p's parent (%p)\n",
3325 			    vp->vn_addr, paddr);
3326 			goto err;
3327 		}
3328 	}
3329 
3330 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3331 	vw->vw_root = root;
3332 
3333 	if (current != NULL)
3334 		vw->vw_current = current;
3335 	else
3336 		vw->vw_current = root;
3337 
3338 	wsp->walk_data = vw;
3339 	return (WALK_NEXT);
3340 err:
3341 	for (vp = head; head != NULL; vp = head) {
3342 		head = vp->vn_next;
3343 		mdb_free(vp, sizeof (vmem_node_t));
3344 	}
3345 
3346 	return (WALK_ERR);
3347 }
3348 
3349 int
3350 vmem_walk_step(mdb_walk_state_t *wsp)
3351 {
3352 	vmem_walk_t *vw = wsp->walk_data;
3353 	vmem_node_t *vp;
3354 	int rval;
3355 
3356 	if ((vp = vw->vw_current) == NULL)
3357 		return (WALK_DONE);
3358 
3359 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3360 
3361 	if (vp->vn_children != NULL) {
3362 		vw->vw_current = vp->vn_children;
3363 		return (rval);
3364 	}
3365 
3366 	do {
3367 		vw->vw_current = vp->vn_sibling;
3368 		vp = vp->vn_parent;
3369 	} while (vw->vw_current == NULL && vp != NULL);
3370 
3371 	return (rval);
3372 }
3373 
3374 /*
3375  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3376  * children are visited before their parent.  We perform the postfix walk
3377  * iteratively (rather than recursively) to allow mdb to regain control
3378  * after each callback.
3379  */
3380 int
3381 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3382 {
3383 	vmem_walk_t *vw = wsp->walk_data;
3384 	vmem_node_t *vp = vw->vw_current;
3385 	int rval;
3386 
3387 	/*
3388 	 * If this node is marked, then we know that we have already visited
3389 	 * all of its children.  If the node has any siblings, they need to
3390 	 * be visited next; otherwise, we need to visit the parent.  Note
3391 	 * that vp->vn_marked will only be zero on the first invocation of
3392 	 * the step function.
3393 	 */
3394 	if (vp->vn_marked) {
3395 		if (vp->vn_sibling != NULL)
3396 			vp = vp->vn_sibling;
3397 		else if (vp->vn_parent != NULL)
3398 			vp = vp->vn_parent;
3399 		else {
3400 			/*
3401 			 * We have neither a parent, nor a sibling, and we
3402 			 * have already been visited; we're done.
3403 			 */
3404 			return (WALK_DONE);
3405 		}
3406 	}
3407 
3408 	/*
3409 	 * Before we visit this node, visit its children.
3410 	 */
3411 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3412 		vp = vp->vn_children;
3413 
3414 	vp->vn_marked = 1;
3415 	vw->vw_current = vp;
3416 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3417 
3418 	return (rval);
3419 }
3420 
3421 void
3422 vmem_walk_fini(mdb_walk_state_t *wsp)
3423 {
3424 	vmem_walk_t *vw = wsp->walk_data;
3425 	vmem_node_t *root = vw->vw_root;
3426 	int done;
3427 
3428 	if (root == NULL)
3429 		return;
3430 
3431 	if ((vw->vw_root = root->vn_children) != NULL)
3432 		vmem_walk_fini(wsp);
3433 
3434 	vw->vw_root = root->vn_sibling;
3435 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3436 	mdb_free(root, sizeof (vmem_node_t));
3437 
3438 	if (done) {
3439 		mdb_free(vw, sizeof (vmem_walk_t));
3440 	} else {
3441 		vmem_walk_fini(wsp);
3442 	}
3443 }
3444 
3445 typedef struct vmem_seg_walk {
3446 	uint8_t vsw_type;
3447 	uintptr_t vsw_start;
3448 	uintptr_t vsw_current;
3449 } vmem_seg_walk_t;
3450 
3451 /*ARGSUSED*/
3452 int
3453 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3454 {
3455 	vmem_seg_walk_t *vsw;
3456 
3457 	if (wsp->walk_addr == NULL) {
3458 		mdb_warn("vmem_%s does not support global walks\n", name);
3459 		return (WALK_ERR);
3460 	}
3461 
3462 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3463 
3464 	vsw->vsw_type = type;
3465 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3466 	vsw->vsw_current = vsw->vsw_start;
3467 
3468 	return (WALK_NEXT);
3469 }
3470 
3471 /*
3472  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3473  */
3474 #define	VMEM_NONE	0
3475 
3476 int
3477 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3478 {
3479 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3480 }
3481 
3482 int
3483 vmem_free_walk_init(mdb_walk_state_t *wsp)
3484 {
3485 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3486 }
3487 
3488 int
3489 vmem_span_walk_init(mdb_walk_state_t *wsp)
3490 {
3491 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3492 }
3493 
3494 int
3495 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3496 {
3497 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3498 }
3499 
3500 int
3501 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3502 {
3503 	vmem_seg_t seg;
3504 	vmem_seg_walk_t *vsw = wsp->walk_data;
3505 	uintptr_t addr = vsw->vsw_current;
3506 	static size_t seg_size = 0;
3507 	int rval;
3508 
3509 	if (!seg_size) {
3510 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3511 			mdb_warn("failed to read 'vmem_seg_size'");
3512 			seg_size = sizeof (vmem_seg_t);
3513 		}
3514 	}
3515 
3516 	if (seg_size < sizeof (seg))
3517 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3518 
3519 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3520 		mdb_warn("couldn't read vmem_seg at %p", addr);
3521 		return (WALK_ERR);
3522 	}
3523 
3524 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3525 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3526 		rval = WALK_NEXT;
3527 	} else {
3528 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3529 	}
3530 
3531 	if (vsw->vsw_current == vsw->vsw_start)
3532 		return (WALK_DONE);
3533 
3534 	return (rval);
3535 }
3536 
3537 void
3538 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3539 {
3540 	vmem_seg_walk_t *vsw = wsp->walk_data;
3541 
3542 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3543 }
3544 
3545 #define	VMEM_NAMEWIDTH	22
3546 
3547 int
3548 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3549 {
3550 	vmem_t v, parent;
3551 	vmem_kstat_t *vkp = &v.vm_kstat;
3552 	uintptr_t paddr;
3553 	int ident = 0;
3554 	char c[VMEM_NAMEWIDTH];
3555 
3556 	if (!(flags & DCMD_ADDRSPEC)) {
3557 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3558 			mdb_warn("can't walk vmem");
3559 			return (DCMD_ERR);
3560 		}
3561 		return (DCMD_OK);
3562 	}
3563 
3564 	if (DCMD_HDRSPEC(flags))
3565 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3566 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3567 		    "TOTAL", "SUCCEED", "FAIL");
3568 
3569 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3570 		mdb_warn("couldn't read vmem at %p", addr);
3571 		return (DCMD_ERR);
3572 	}
3573 
3574 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3575 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3576 			mdb_warn("couldn't trace %p's ancestry", addr);
3577 			ident = 0;
3578 			break;
3579 		}
3580 		paddr = (uintptr_t)parent.vm_source;
3581 	}
3582 
3583 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3584 
3585 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3586 	    addr, VMEM_NAMEWIDTH, c,
3587 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3588 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3589 
3590 	return (DCMD_OK);
3591 }
3592 
3593 void
3594 vmem_seg_help(void)
3595 {
3596 	mdb_printf("%s",
3597 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3598 "\n"
3599 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3600 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3601 "information.\n");
3602 	mdb_dec_indent(2);
3603 	mdb_printf("%<b>OPTIONS%</b>\n");
3604 	mdb_inc_indent(2);
3605 	mdb_printf("%s",
3606 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3607 "  -s    report the size of the segment, instead of the end address\n"
3608 "  -c caller\n"
3609 "        filter out segments without the function/PC in their stack trace\n"
3610 "  -e earliest\n"
3611 "        filter out segments timestamped before earliest\n"
3612 "  -l latest\n"
3613 "        filter out segments timestamped after latest\n"
3614 "  -m minsize\n"
3615 "        filer out segments smaller than minsize\n"
3616 "  -M maxsize\n"
3617 "        filer out segments larger than maxsize\n"
3618 "  -t thread\n"
3619 "        filter out segments not involving thread\n"
3620 "  -T type\n"
3621 "        filter out segments not of type 'type'\n"
3622 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3623 }
3624 
3625 /*ARGSUSED*/
3626 int
3627 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3628 {
3629 	vmem_seg_t vs;
3630 	pc_t *stk = vs.vs_stack;
3631 	uintptr_t sz;
3632 	uint8_t t;
3633 	const char *type = NULL;
3634 	GElf_Sym sym;
3635 	char c[MDB_SYM_NAMLEN];
3636 	int no_debug;
3637 	int i;
3638 	int depth;
3639 	uintptr_t laddr, haddr;
3640 
3641 	uintptr_t caller = NULL, thread = NULL;
3642 	uintptr_t minsize = 0, maxsize = 0;
3643 
3644 	hrtime_t earliest = 0, latest = 0;
3645 
3646 	uint_t size = 0;
3647 	uint_t verbose = 0;
3648 
3649 	if (!(flags & DCMD_ADDRSPEC))
3650 		return (DCMD_USAGE);
3651 
3652 	if (mdb_getopts(argc, argv,
3653 	    'c', MDB_OPT_UINTPTR, &caller,
3654 	    'e', MDB_OPT_UINT64, &earliest,
3655 	    'l', MDB_OPT_UINT64, &latest,
3656 	    's', MDB_OPT_SETBITS, TRUE, &size,
3657 	    'm', MDB_OPT_UINTPTR, &minsize,
3658 	    'M', MDB_OPT_UINTPTR, &maxsize,
3659 	    't', MDB_OPT_UINTPTR, &thread,
3660 	    'T', MDB_OPT_STR, &type,
3661 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3662 	    NULL) != argc)
3663 		return (DCMD_USAGE);
3664 
3665 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3666 		if (verbose) {
3667 			mdb_printf("%16s %4s %16s %16s %16s\n"
3668 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3669 			    "ADDR", "TYPE", "START", "END", "SIZE",
3670 			    "", "", "THREAD", "TIMESTAMP", "");
3671 		} else {
3672 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3673 			    "START", size? "SIZE" : "END", "WHO");
3674 		}
3675 	}
3676 
3677 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3678 		mdb_warn("couldn't read vmem_seg at %p", addr);
3679 		return (DCMD_ERR);
3680 	}
3681 
3682 	if (type != NULL) {
3683 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3684 			t = VMEM_ALLOC;
3685 		else if (strcmp(type, "FREE") == 0)
3686 			t = VMEM_FREE;
3687 		else if (strcmp(type, "SPAN") == 0)
3688 			t = VMEM_SPAN;
3689 		else if (strcmp(type, "ROTR") == 0 ||
3690 		    strcmp(type, "ROTOR") == 0)
3691 			t = VMEM_ROTOR;
3692 		else if (strcmp(type, "WLKR") == 0 ||
3693 		    strcmp(type, "WALKER") == 0)
3694 			t = VMEM_WALKER;
3695 		else {
3696 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3697 			    type);
3698 			return (DCMD_ERR);
3699 		}
3700 
3701 		if (vs.vs_type != t)
3702 			return (DCMD_OK);
3703 	}
3704 
3705 	sz = vs.vs_end - vs.vs_start;
3706 
3707 	if (minsize != 0 && sz < minsize)
3708 		return (DCMD_OK);
3709 
3710 	if (maxsize != 0 && sz > maxsize)
3711 		return (DCMD_OK);
3712 
3713 	t = vs.vs_type;
3714 	depth = vs.vs_depth;
3715 
3716 	/*
3717 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3718 	 */
3719 	no_debug = (t != VMEM_ALLOC) ||
3720 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3721 
3722 	if (no_debug) {
3723 		if (caller != NULL || thread != NULL || earliest != 0 ||
3724 		    latest != 0)
3725 			return (DCMD_OK);		/* not enough info */
3726 	} else {
3727 		if (caller != NULL) {
3728 			laddr = caller;
3729 			haddr = caller + sizeof (caller);
3730 
3731 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3732 			    sizeof (c), &sym) != -1 &&
3733 			    caller == (uintptr_t)sym.st_value) {
3734 				/*
3735 				 * We were provided an exact symbol value; any
3736 				 * address in the function is valid.
3737 				 */
3738 				laddr = (uintptr_t)sym.st_value;
3739 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3740 			}
3741 
3742 			for (i = 0; i < depth; i++)
3743 				if (vs.vs_stack[i] >= laddr &&
3744 				    vs.vs_stack[i] < haddr)
3745 					break;
3746 
3747 			if (i == depth)
3748 				return (DCMD_OK);
3749 		}
3750 
3751 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3752 			return (DCMD_OK);
3753 
3754 		if (earliest != 0 && vs.vs_timestamp < earliest)
3755 			return (DCMD_OK);
3756 
3757 		if (latest != 0 && vs.vs_timestamp > latest)
3758 			return (DCMD_OK);
3759 	}
3760 
3761 	type = (t == VMEM_ALLOC ? "ALLC" :
3762 	    t == VMEM_FREE ? "FREE" :
3763 	    t == VMEM_SPAN ? "SPAN" :
3764 	    t == VMEM_ROTOR ? "ROTR" :
3765 	    t == VMEM_WALKER ? "WLKR" :
3766 	    "????");
3767 
3768 	if (flags & DCMD_PIPE_OUT) {
3769 		mdb_printf("%#lr\n", addr);
3770 		return (DCMD_OK);
3771 	}
3772 
3773 	if (verbose) {
3774 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3775 		    addr, type, vs.vs_start, vs.vs_end, sz);
3776 
3777 		if (no_debug)
3778 			return (DCMD_OK);
3779 
3780 		mdb_printf("%16s %4s %16p %16llx\n",
3781 		    "", "", vs.vs_thread, vs.vs_timestamp);
3782 
3783 		mdb_inc_indent(17);
3784 		for (i = 0; i < depth; i++) {
3785 			mdb_printf("%a\n", stk[i]);
3786 		}
3787 		mdb_dec_indent(17);
3788 		mdb_printf("\n");
3789 	} else {
3790 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3791 		    vs.vs_start, size? sz : vs.vs_end);
3792 
3793 		if (no_debug) {
3794 			mdb_printf("\n");
3795 			return (DCMD_OK);
3796 		}
3797 
3798 		for (i = 0; i < depth; i++) {
3799 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3800 			    c, sizeof (c), &sym) == -1)
3801 				continue;
3802 			if (strncmp(c, "vmem_", 5) == 0)
3803 				continue;
3804 			break;
3805 		}
3806 		mdb_printf(" %a\n", stk[i]);
3807 	}
3808 	return (DCMD_OK);
3809 }
3810 
3811 typedef struct kmalog_data {
3812 	uintptr_t	kma_addr;
3813 	hrtime_t	kma_newest;
3814 } kmalog_data_t;
3815 
3816 /*ARGSUSED*/
3817 static int
3818 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3819 {
3820 	char name[KMEM_CACHE_NAMELEN + 1];
3821 	hrtime_t delta;
3822 	int i, depth;
3823 	size_t bufsize;
3824 
3825 	if (bcp->bc_timestamp == 0)
3826 		return (WALK_DONE);
3827 
3828 	if (kma->kma_newest == 0)
3829 		kma->kma_newest = bcp->bc_timestamp;
3830 
3831 	if (kma->kma_addr) {
3832 		if (mdb_vread(&bufsize, sizeof (bufsize),
3833 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3834 			mdb_warn(
3835 			    "failed to read cache_bufsize for cache at %p",
3836 			    bcp->bc_cache);
3837 			return (WALK_ERR);
3838 		}
3839 
3840 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3841 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3842 			return (WALK_NEXT);
3843 	}
3844 
3845 	delta = kma->kma_newest - bcp->bc_timestamp;
3846 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3847 
3848 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3849 	    &bcp->bc_cache->cache_name) <= 0)
3850 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3851 
3852 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3853 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3854 
3855 	for (i = 0; i < depth; i++)
3856 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3857 
3858 	return (WALK_NEXT);
3859 }
3860 
3861 int
3862 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3863 {
3864 	const char *logname = "kmem_transaction_log";
3865 	kmalog_data_t kma;
3866 
3867 	if (argc > 1)
3868 		return (DCMD_USAGE);
3869 
3870 	kma.kma_newest = 0;
3871 	if (flags & DCMD_ADDRSPEC)
3872 		kma.kma_addr = addr;
3873 	else
3874 		kma.kma_addr = NULL;
3875 
3876 	if (argc > 0) {
3877 		if (argv->a_type != MDB_TYPE_STRING)
3878 			return (DCMD_USAGE);
3879 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3880 			logname = "kmem_failure_log";
3881 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3882 			logname = "kmem_slab_log";
3883 		else
3884 			return (DCMD_USAGE);
3885 	}
3886 
3887 	if (mdb_readvar(&addr, logname) == -1) {
3888 		mdb_warn("failed to read %s log header pointer");
3889 		return (DCMD_ERR);
3890 	}
3891 
3892 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3893 		mdb_warn("failed to walk kmem log");
3894 		return (DCMD_ERR);
3895 	}
3896 
3897 	return (DCMD_OK);
3898 }
3899 
3900 /*
3901  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3902  * The first piece is a structure which we use to accumulate kmem_cache_t
3903  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3904  * walker; we either add all caches, or ones named explicitly as arguments.
3905  */
3906 
3907 typedef struct kmclist {
3908 	const char *kmc_name;			/* Name to match (or NULL) */
3909 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3910 	int kmc_nelems;				/* Num entries in kmc_caches */
3911 	int kmc_size;				/* Size of kmc_caches array */
3912 } kmclist_t;
3913 
3914 static int
3915 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3916 {
3917 	void *p;
3918 	int s;
3919 
3920 	if (kmc->kmc_name == NULL ||
3921 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3922 		/*
3923 		 * If we have a match, grow our array (if necessary), and then
3924 		 * add the virtual address of the matching cache to our list.
3925 		 */
3926 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3927 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3928 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3929 
3930 			bcopy(kmc->kmc_caches, p,
3931 			    sizeof (uintptr_t) * kmc->kmc_size);
3932 
3933 			kmc->kmc_caches = p;
3934 			kmc->kmc_size = s;
3935 		}
3936 
3937 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3938 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3939 	}
3940 
3941 	return (WALK_NEXT);
3942 }
3943 
3944 /*
3945  * The second piece of ::kmausers is a hash table of allocations.  Each
3946  * allocation owner is identified by its stack trace and data_size.  We then
3947  * track the total bytes of all such allocations, and the number of allocations
3948  * to report at the end.  Once we have a list of caches, we walk through the
3949  * allocated bufctls of each, and update our hash table accordingly.
3950  */
3951 
3952 typedef struct kmowner {
3953 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3954 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3955 	size_t kmo_signature;			/* Hash table signature */
3956 	uint_t kmo_num;				/* Number of allocations */
3957 	size_t kmo_data_size;			/* Size of each allocation */
3958 	size_t kmo_total_size;			/* Total bytes of allocation */
3959 	int kmo_depth;				/* Depth of stack trace */
3960 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3961 } kmowner_t;
3962 
3963 typedef struct kmusers {
3964 	uintptr_t kmu_addr;			/* address of interest */
3965 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3966 	kmowner_t *kmu_hash;			/* Hash table of owners */
3967 	int kmu_nelems;				/* Number of entries in use */
3968 	int kmu_size;				/* Total number of entries */
3969 } kmusers_t;
3970 
3971 static void
3972 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3973     size_t size, size_t data_size)
3974 {
3975 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3976 	size_t bucket, signature = data_size;
3977 	kmowner_t *kmo, *kmoend;
3978 
3979 	/*
3980 	 * If the hash table is full, double its size and rehash everything.
3981 	 */
3982 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3983 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3984 
3985 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3986 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3987 		kmu->kmu_hash = kmo;
3988 		kmu->kmu_size = s;
3989 
3990 		kmoend = kmu->kmu_hash + kmu->kmu_size;
3991 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3992 			kmo->kmo_head = NULL;
3993 
3994 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
3995 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
3996 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
3997 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3998 			kmu->kmu_hash[bucket].kmo_head = kmo;
3999 		}
4000 	}
4001 
4002 	/*
4003 	 * Finish computing the hash signature from the stack trace, and then
4004 	 * see if the owner is in the hash table.  If so, update our stats.
4005 	 */
4006 	for (i = 0; i < depth; i++)
4007 		signature += bcp->bc_stack[i];
4008 
4009 	bucket = signature & (kmu->kmu_size - 1);
4010 
4011 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4012 		if (kmo->kmo_signature == signature) {
4013 			size_t difference = 0;
4014 
4015 			difference |= kmo->kmo_data_size - data_size;
4016 			difference |= kmo->kmo_depth - depth;
4017 
4018 			for (i = 0; i < depth; i++) {
4019 				difference |= kmo->kmo_stack[i] -
4020 				    bcp->bc_stack[i];
4021 			}
4022 
4023 			if (difference == 0) {
4024 				kmo->kmo_total_size += size;
4025 				kmo->kmo_num++;
4026 				return;
4027 			}
4028 		}
4029 	}
4030 
4031 	/*
4032 	 * If the owner is not yet hashed, grab the next element and fill it
4033 	 * in based on the allocation information.
4034 	 */
4035 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4036 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4037 	kmu->kmu_hash[bucket].kmo_head = kmo;
4038 
4039 	kmo->kmo_signature = signature;
4040 	kmo->kmo_num = 1;
4041 	kmo->kmo_data_size = data_size;
4042 	kmo->kmo_total_size = size;
4043 	kmo->kmo_depth = depth;
4044 
4045 	for (i = 0; i < depth; i++)
4046 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4047 }
4048 
4049 /*
4050  * When ::kmausers is invoked without the -f flag, we simply update our hash
4051  * table with the information from each allocated bufctl.
4052  */
4053 /*ARGSUSED*/
4054 static int
4055 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4056 {
4057 	const kmem_cache_t *cp = kmu->kmu_cache;
4058 
4059 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4060 	return (WALK_NEXT);
4061 }
4062 
4063 /*
4064  * When ::kmausers is invoked with the -f flag, we print out the information
4065  * for each bufctl as well as updating the hash table.
4066  */
4067 static int
4068 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4069 {
4070 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4071 	const kmem_cache_t *cp = kmu->kmu_cache;
4072 	kmem_bufctl_t bufctl;
4073 
4074 	if (kmu->kmu_addr) {
4075 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4076 			mdb_warn("couldn't read bufctl at %p", addr);
4077 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4078 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4079 		    cp->cache_bufsize)
4080 			return (WALK_NEXT);
4081 	}
4082 
4083 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4084 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4085 
4086 	for (i = 0; i < depth; i++)
4087 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4088 
4089 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4090 	return (WALK_NEXT);
4091 }
4092 
4093 /*
4094  * We sort our results by allocation size before printing them.
4095  */
4096 static int
4097 kmownercmp(const void *lp, const void *rp)
4098 {
4099 	const kmowner_t *lhs = lp;
4100 	const kmowner_t *rhs = rp;
4101 
4102 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4103 }
4104 
4105 /*
4106  * The main engine of ::kmausers is relatively straightforward: First we
4107  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4108  * iterate over the allocated bufctls of each cache in the list.  Finally,
4109  * we sort and print our results.
4110  */
4111 /*ARGSUSED*/
4112 int
4113 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4114 {
4115 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4116 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4117 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4118 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4119 	int opt_e = FALSE;		/* Include "small" users */
4120 	int opt_f = FALSE;		/* Print stack traces */
4121 
4122 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4123 	kmowner_t *kmo, *kmoend;
4124 	int i, oelems;
4125 
4126 	kmclist_t kmc;
4127 	kmusers_t kmu;
4128 
4129 	bzero(&kmc, sizeof (kmc));
4130 	bzero(&kmu, sizeof (kmu));
4131 
4132 	while ((i = mdb_getopts(argc, argv,
4133 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4134 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4135 
4136 		argv += i;	/* skip past options we just processed */
4137 		argc -= i;	/* adjust argc */
4138 
4139 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4140 			return (DCMD_USAGE);
4141 
4142 		oelems = kmc.kmc_nelems;
4143 		kmc.kmc_name = argv->a_un.a_str;
4144 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4145 
4146 		if (kmc.kmc_nelems == oelems) {
4147 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4148 			return (DCMD_ERR);
4149 		}
4150 
4151 		do_all_caches = 0;
4152 		argv++;
4153 		argc--;
4154 	}
4155 
4156 	if (flags & DCMD_ADDRSPEC) {
4157 		opt_f = TRUE;
4158 		kmu.kmu_addr = addr;
4159 	} else {
4160 		kmu.kmu_addr = NULL;
4161 	}
4162 
4163 	if (opt_e)
4164 		mem_threshold = cnt_threshold = 0;
4165 
4166 	if (opt_f)
4167 		callback = (mdb_walk_cb_t)kmause2;
4168 
4169 	if (do_all_caches) {
4170 		kmc.kmc_name = NULL; /* match all cache names */
4171 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4172 	}
4173 
4174 	for (i = 0; i < kmc.kmc_nelems; i++) {
4175 		uintptr_t cp = kmc.kmc_caches[i];
4176 		kmem_cache_t c;
4177 
4178 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4179 			mdb_warn("failed to read cache at %p", cp);
4180 			continue;
4181 		}
4182 
4183 		if (!(c.cache_flags & KMF_AUDIT)) {
4184 			if (!do_all_caches) {
4185 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4186 				    c.cache_name);
4187 			}
4188 			continue;
4189 		}
4190 
4191 		kmu.kmu_cache = &c;
4192 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4193 		audited_caches++;
4194 	}
4195 
4196 	if (audited_caches == 0 && do_all_caches) {
4197 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4198 		return (DCMD_ERR);
4199 	}
4200 
4201 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4202 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4203 
4204 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4205 		if (kmo->kmo_total_size < mem_threshold &&
4206 		    kmo->kmo_num < cnt_threshold)
4207 			continue;
4208 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4209 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4210 		for (i = 0; i < kmo->kmo_depth; i++)
4211 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4212 	}
4213 
4214 	return (DCMD_OK);
4215 }
4216 
4217 void
4218 kmausers_help(void)
4219 {
4220 	mdb_printf(
4221 	    "Displays the largest users of the kmem allocator, sorted by \n"
4222 	    "trace.  If one or more caches is specified, only those caches\n"
4223 	    "will be searched.  By default, all caches are searched.  If an\n"
4224 	    "address is specified, then only those allocations which include\n"
4225 	    "the given address are displayed.  Specifying an address implies\n"
4226 	    "-f.\n"
4227 	    "\n"
4228 	    "\t-e\tInclude all users, not just the largest\n"
4229 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4230 	    "\t\tgrouped by stack\n");
4231 }
4232 
4233 static int
4234 kmem_ready_check(void)
4235 {
4236 	int ready;
4237 
4238 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4239 		return (-1); /* errno is set for us */
4240 
4241 	return (ready);
4242 }
4243 
4244 void
4245 kmem_statechange(void)
4246 {
4247 	static int been_ready = 0;
4248 
4249 	if (been_ready)
4250 		return;
4251 
4252 	if (kmem_ready_check() <= 0)
4253 		return;
4254 
4255 	been_ready = 1;
4256 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4257 }
4258 
4259 void
4260 kmem_init(void)
4261 {
4262 	mdb_walker_t w = {
4263 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4264 		list_walk_step, list_walk_fini
4265 	};
4266 
4267 	/*
4268 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4269 	 * immediately.  Walkers in the linkage structure won't be ready until
4270 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4271 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4272 	 * isn't ready, we'll register a callback that will allow us to defer
4273 	 * cache walking until it is.
4274 	 */
4275 	if (mdb_add_walker(&w) != 0) {
4276 		mdb_warn("failed to add kmem_cache walker");
4277 		return;
4278 	}
4279 
4280 	kmem_statechange();
4281 
4282 	/* register our ::whatis handlers */
4283 	mdb_whatis_register("modules", whatis_run_modules, NULL,
4284 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4285 	mdb_whatis_register("threads", whatis_run_threads, NULL,
4286 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4287 	mdb_whatis_register("pages", whatis_run_pages, NULL,
4288 	    WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4289 	mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4290 	    WHATIS_PRIO_ALLOCATOR, 0);
4291 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4292 	    WHATIS_PRIO_ALLOCATOR, 0);
4293 }
4294 
4295 typedef struct whatthread {
4296 	uintptr_t	wt_target;
4297 	int		wt_verbose;
4298 } whatthread_t;
4299 
4300 static int
4301 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4302 {
4303 	uintptr_t current, data;
4304 
4305 	if (t->t_stkbase == NULL)
4306 		return (WALK_NEXT);
4307 
4308 	/*
4309 	 * Warn about swapped out threads, but drive on anyway
4310 	 */
4311 	if (!(t->t_schedflag & TS_LOAD)) {
4312 		mdb_warn("thread %p's stack swapped out\n", addr);
4313 		return (WALK_NEXT);
4314 	}
4315 
4316 	/*
4317 	 * Search the thread's stack for the given pointer.  Note that it would
4318 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4319 	 * chunks, but this routine is already fast and simple.
4320 	 */
4321 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4322 	    current += sizeof (uintptr_t)) {
4323 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4324 			mdb_warn("couldn't read thread %p's stack at %p",
4325 			    addr, current);
4326 			return (WALK_ERR);
4327 		}
4328 
4329 		if (data == w->wt_target) {
4330 			if (w->wt_verbose) {
4331 				mdb_printf("%p in thread %p's stack%s\n",
4332 				    current, addr, stack_active(t, current));
4333 			} else {
4334 				mdb_printf("%#lr\n", addr);
4335 				return (WALK_NEXT);
4336 			}
4337 		}
4338 	}
4339 
4340 	return (WALK_NEXT);
4341 }
4342 
4343 int
4344 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4345 {
4346 	whatthread_t w;
4347 
4348 	if (!(flags & DCMD_ADDRSPEC))
4349 		return (DCMD_USAGE);
4350 
4351 	w.wt_verbose = FALSE;
4352 	w.wt_target = addr;
4353 
4354 	if (mdb_getopts(argc, argv,
4355 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4356 		return (DCMD_USAGE);
4357 
4358 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4359 	    == -1) {
4360 		mdb_warn("couldn't walk threads");
4361 		return (DCMD_ERR);
4362 	}
4363 
4364 	return (DCMD_OK);
4365 }
4366