xref: /titanic_52/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision 2ec7cc7fc084163eaed884efee9bbd322cc8951b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <sys/cpuvar.h>
30 #include <sys/kmem_impl.h>
31 #include <sys/vmem_impl.h>
32 #include <sys/machelf.h>
33 #include <sys/modctl.h>
34 #include <sys/kobj.h>
35 #include <sys/panic.h>
36 #include <sys/stack.h>
37 #include <sys/sysmacros.h>
38 #include <vm/page.h>
39 
40 #include "avl.h"
41 #include "combined.h"
42 #include "dist.h"
43 #include "kmem.h"
44 #include "list.h"
45 
46 #define	dprintf(x) if (mdb_debug_level) { \
47 	mdb_printf("kmem debug: ");  \
48 	/*CSTYLED*/\
49 	mdb_printf x ;\
50 }
51 
52 #define	KM_ALLOCATED		0x01
53 #define	KM_FREE			0x02
54 #define	KM_BUFCTL		0x04
55 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
56 #define	KM_HASH			0x10
57 
58 static int mdb_debug_level = 0;
59 
60 /*ARGSUSED*/
61 static int
62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
63 {
64 	mdb_walker_t w;
65 	char descr[64];
66 
67 	(void) mdb_snprintf(descr, sizeof (descr),
68 	    "walk the %s cache", c->cache_name);
69 
70 	w.walk_name = c->cache_name;
71 	w.walk_descr = descr;
72 	w.walk_init = kmem_walk_init;
73 	w.walk_step = kmem_walk_step;
74 	w.walk_fini = kmem_walk_fini;
75 	w.walk_init_arg = (void *)addr;
76 
77 	if (mdb_add_walker(&w) == -1)
78 		mdb_warn("failed to add %s walker", c->cache_name);
79 
80 	return (WALK_NEXT);
81 }
82 
83 /*ARGSUSED*/
84 int
85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
86 {
87 	mdb_debug_level ^= 1;
88 
89 	mdb_printf("kmem: debugging is now %s\n",
90 	    mdb_debug_level ? "on" : "off");
91 
92 	return (DCMD_OK);
93 }
94 
95 int
96 kmem_cache_walk_init(mdb_walk_state_t *wsp)
97 {
98 	GElf_Sym sym;
99 
100 	if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
101 		mdb_warn("couldn't find kmem_caches");
102 		return (WALK_ERR);
103 	}
104 
105 	wsp->walk_addr = (uintptr_t)sym.st_value;
106 
107 	return (list_walk_init_named(wsp, "cache list", "cache"));
108 }
109 
110 int
111 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
112 {
113 	if (wsp->walk_addr == NULL) {
114 		mdb_warn("kmem_cpu_cache doesn't support global walks");
115 		return (WALK_ERR);
116 	}
117 
118 	if (mdb_layered_walk("cpu", wsp) == -1) {
119 		mdb_warn("couldn't walk 'cpu'");
120 		return (WALK_ERR);
121 	}
122 
123 	wsp->walk_data = (void *)wsp->walk_addr;
124 
125 	return (WALK_NEXT);
126 }
127 
128 int
129 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
130 {
131 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
132 	const cpu_t *cpu = wsp->walk_layer;
133 	kmem_cpu_cache_t cc;
134 
135 	caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
136 
137 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
138 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
139 		return (WALK_ERR);
140 	}
141 
142 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
143 }
144 
145 static int
146 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
147 {
148 	kmem_slab_t *sp = p;
149 	uintptr_t caddr = (uintptr_t)arg;
150 	if ((uintptr_t)sp->slab_cache != caddr) {
151 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
152 		    saddr, caddr, sp->slab_cache);
153 		return (-1);
154 	}
155 
156 	return (0);
157 }
158 
159 static int
160 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
161 {
162 	kmem_slab_t *sp = p;
163 
164 	int rc = kmem_slab_check(p, saddr, arg);
165 	if (rc != 0) {
166 		return (rc);
167 	}
168 
169 	if (!KMEM_SLAB_IS_PARTIAL(sp)) {
170 		mdb_warn("slab %p is not a partial slab\n", saddr);
171 		return (-1);
172 	}
173 
174 	return (0);
175 }
176 
177 static int
178 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
179 {
180 	kmem_slab_t *sp = p;
181 
182 	int rc = kmem_slab_check(p, saddr, arg);
183 	if (rc != 0) {
184 		return (rc);
185 	}
186 
187 	if (!KMEM_SLAB_IS_ALL_USED(sp)) {
188 		mdb_warn("slab %p is not completely allocated\n", saddr);
189 		return (-1);
190 	}
191 
192 	return (0);
193 }
194 
195 typedef struct {
196 	uintptr_t kns_cache_addr;
197 	int kns_nslabs;
198 } kmem_nth_slab_t;
199 
200 static int
201 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
202 {
203 	kmem_nth_slab_t *chkp = arg;
204 
205 	int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
206 	if (rc != 0) {
207 		return (rc);
208 	}
209 
210 	return (chkp->kns_nslabs-- == 0 ? 1 : 0);
211 }
212 
213 static int
214 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
215 {
216 	uintptr_t caddr = wsp->walk_addr;
217 
218 	wsp->walk_addr = (uintptr_t)(caddr +
219 	    offsetof(kmem_cache_t, cache_complete_slabs));
220 
221 	return (list_walk_init_checked(wsp, "slab list", "slab",
222 	    kmem_complete_slab_check, (void *)caddr));
223 }
224 
225 static int
226 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
227 {
228 	uintptr_t caddr = wsp->walk_addr;
229 
230 	wsp->walk_addr = (uintptr_t)(caddr +
231 	    offsetof(kmem_cache_t, cache_partial_slabs));
232 
233 	return (avl_walk_init_checked(wsp, "slab list", "slab",
234 	    kmem_partial_slab_check, (void *)caddr));
235 }
236 
237 int
238 kmem_slab_walk_init(mdb_walk_state_t *wsp)
239 {
240 	uintptr_t caddr = wsp->walk_addr;
241 
242 	if (caddr == NULL) {
243 		mdb_warn("kmem_slab doesn't support global walks\n");
244 		return (WALK_ERR);
245 	}
246 
247 	combined_walk_init(wsp);
248 	combined_walk_add(wsp,
249 	    kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
250 	combined_walk_add(wsp,
251 	    kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
252 
253 	return (WALK_NEXT);
254 }
255 
256 static int
257 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
258 {
259 	uintptr_t caddr = wsp->walk_addr;
260 	kmem_nth_slab_t *chk;
261 
262 	chk = mdb_alloc(sizeof (kmem_nth_slab_t),
263 	    UM_SLEEP | UM_GC);
264 	chk->kns_cache_addr = caddr;
265 	chk->kns_nslabs = 1;
266 	wsp->walk_addr = (uintptr_t)(caddr +
267 	    offsetof(kmem_cache_t, cache_complete_slabs));
268 
269 	return (list_walk_init_checked(wsp, "slab list", "slab",
270 	    kmem_nth_slab_check, chk));
271 }
272 
273 int
274 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
275 {
276 	uintptr_t caddr = wsp->walk_addr;
277 	kmem_cache_t c;
278 
279 	if (caddr == NULL) {
280 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
281 		return (WALK_ERR);
282 	}
283 
284 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
285 		mdb_warn("couldn't read kmem_cache at %p", caddr);
286 		return (WALK_ERR);
287 	}
288 
289 	combined_walk_init(wsp);
290 
291 	/*
292 	 * Some consumers (umem_walk_step(), in particular) require at
293 	 * least one callback if there are any buffers in the cache.  So
294 	 * if there are *no* partial slabs, report the first full slab, if
295 	 * any.
296 	 *
297 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
298 	 */
299 	if (c.cache_partial_slabs.avl_numnodes == 0) {
300 		combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
301 		    list_walk_step, list_walk_fini);
302 	} else {
303 		combined_walk_add(wsp, kmem_partial_slab_walk_init,
304 		    avl_walk_step, avl_walk_fini);
305 	}
306 
307 	return (WALK_NEXT);
308 }
309 
310 int
311 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
312 {
313 	kmem_cache_t c;
314 	const char *filter = NULL;
315 
316 	if (mdb_getopts(ac, argv,
317 	    'n', MDB_OPT_STR, &filter,
318 	    NULL) != ac) {
319 		return (DCMD_USAGE);
320 	}
321 
322 	if (!(flags & DCMD_ADDRSPEC)) {
323 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
324 			mdb_warn("can't walk kmem_cache");
325 			return (DCMD_ERR);
326 		}
327 		return (DCMD_OK);
328 	}
329 
330 	if (DCMD_HDRSPEC(flags))
331 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
332 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
333 
334 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
335 		mdb_warn("couldn't read kmem_cache at %p", addr);
336 		return (DCMD_ERR);
337 	}
338 
339 	if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
340 		return (DCMD_OK);
341 
342 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
343 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
344 
345 	return (DCMD_OK);
346 }
347 
348 void
349 kmem_cache_help(void)
350 {
351 	mdb_printf("%s", "Print kernel memory caches.\n\n");
352 	mdb_dec_indent(2);
353 	mdb_printf("%<b>OPTIONS%</b>\n");
354 	mdb_inc_indent(2);
355 	mdb_printf("%s",
356 "  -n name\n"
357 "        name of kmem cache (or matching partial name)\n"
358 "\n"
359 "Column\tDescription\n"
360 "\n"
361 "ADDR\t\taddress of kmem cache\n"
362 "NAME\t\tname of kmem cache\n"
363 "FLAG\t\tvarious cache state flags\n"
364 "CFLAG\t\tcache creation flags\n"
365 "BUFSIZE\tobject size in bytes\n"
366 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
367 }
368 
369 #define	LABEL_WIDTH	11
370 static void
371 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
372     size_t maxbuckets, size_t minbucketsize)
373 {
374 	uint64_t total;
375 	int buckets;
376 	int i;
377 	const int *distarray;
378 	int complete[2];
379 
380 	buckets = buffers_per_slab;
381 
382 	total = 0;
383 	for (i = 0; i <= buffers_per_slab; i++)
384 		total += ks_bucket[i];
385 
386 	if (maxbuckets > 1)
387 		buckets = MIN(buckets, maxbuckets);
388 
389 	if (minbucketsize > 1) {
390 		/*
391 		 * minbucketsize does not apply to the first bucket reserved
392 		 * for completely allocated slabs
393 		 */
394 		buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
395 		    minbucketsize));
396 		if ((buckets < 2) && (buffers_per_slab > 1)) {
397 			buckets = 2;
398 			minbucketsize = (buffers_per_slab - 1);
399 		}
400 	}
401 
402 	/*
403 	 * The first printed bucket is reserved for completely allocated slabs.
404 	 * Passing (buckets - 1) excludes that bucket from the generated
405 	 * distribution, since we're handling it as a special case.
406 	 */
407 	complete[0] = buffers_per_slab;
408 	complete[1] = buffers_per_slab + 1;
409 	distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
410 
411 	mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
412 	dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
413 
414 	dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
415 	/*
416 	 * Print bucket ranges in descending order after the first bucket for
417 	 * completely allocated slabs, so a person can see immediately whether
418 	 * or not there is fragmentation without having to scan possibly
419 	 * multiple screens of output. Starting at (buckets - 2) excludes the
420 	 * extra terminating bucket.
421 	 */
422 	for (i = buckets - 2; i >= 0; i--) {
423 		dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
424 	}
425 	mdb_printf("\n");
426 }
427 #undef LABEL_WIDTH
428 
429 /*ARGSUSED*/
430 static int
431 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
432 {
433 	*is_slab = B_TRUE;
434 	return (WALK_DONE);
435 }
436 
437 /*ARGSUSED*/
438 static int
439 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
440     boolean_t *is_slab)
441 {
442 	/*
443 	 * The "kmem_partial_slab" walker reports the first full slab if there
444 	 * are no partial slabs (for the sake of consumers that require at least
445 	 * one callback if there are any buffers in the cache).
446 	 */
447 	*is_slab = KMEM_SLAB_IS_PARTIAL(sp);
448 	return (WALK_DONE);
449 }
450 
451 typedef struct kmem_slab_usage {
452 	int ksu_refcnt;			/* count of allocated buffers on slab */
453 	boolean_t ksu_nomove;		/* slab marked non-reclaimable */
454 } kmem_slab_usage_t;
455 
456 typedef struct kmem_slab_stats {
457 	const kmem_cache_t *ks_cp;
458 	int ks_slabs;			/* slabs in cache */
459 	int ks_partial_slabs;		/* partially allocated slabs in cache */
460 	uint64_t ks_unused_buffers;	/* total unused buffers in cache */
461 	int ks_max_buffers_per_slab;	/* max buffers per slab */
462 	int ks_usage_len;		/* ks_usage array length */
463 	kmem_slab_usage_t *ks_usage;	/* partial slab usage */
464 	uint_t *ks_bucket;		/* slab usage distribution */
465 } kmem_slab_stats_t;
466 
467 /*ARGSUSED*/
468 static int
469 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
470     kmem_slab_stats_t *ks)
471 {
472 	kmem_slab_usage_t *ksu;
473 	long unused;
474 
475 	ks->ks_slabs++;
476 	ks->ks_bucket[sp->slab_refcnt]++;
477 
478 	unused = (sp->slab_chunks - sp->slab_refcnt);
479 	if (unused == 0) {
480 		return (WALK_NEXT);
481 	}
482 
483 	ks->ks_partial_slabs++;
484 	ks->ks_unused_buffers += unused;
485 
486 	if (ks->ks_partial_slabs > ks->ks_usage_len) {
487 		kmem_slab_usage_t *usage;
488 		int len = ks->ks_usage_len;
489 
490 		len = (len == 0 ? 16 : len * 2);
491 		usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
492 		if (ks->ks_usage != NULL) {
493 			bcopy(ks->ks_usage, usage,
494 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
495 			mdb_free(ks->ks_usage,
496 			    ks->ks_usage_len * sizeof (kmem_slab_usage_t));
497 		}
498 		ks->ks_usage = usage;
499 		ks->ks_usage_len = len;
500 	}
501 
502 	ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
503 	ksu->ksu_refcnt = sp->slab_refcnt;
504 	ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
505 	return (WALK_NEXT);
506 }
507 
508 static void
509 kmem_slabs_header()
510 {
511 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
512 	    "", "", "Partial", "", "Unused", "");
513 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
514 	    "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
515 	mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
516 	    "-------------------------", "--------", "--------", "---------",
517 	    "---------", "------");
518 }
519 
520 int
521 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
522 {
523 	kmem_cache_t c;
524 	kmem_slab_stats_t stats;
525 	mdb_walk_cb_t cb;
526 	int pct;
527 	int tenths_pct;
528 	size_t maxbuckets = 1;
529 	size_t minbucketsize = 0;
530 	const char *filter = NULL;
531 	const char *name = NULL;
532 	uint_t opt_v = FALSE;
533 	boolean_t buckets = B_FALSE;
534 	boolean_t skip = B_FALSE;
535 
536 	if (mdb_getopts(argc, argv,
537 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
538 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
539 	    'n', MDB_OPT_STR, &filter,
540 	    'N', MDB_OPT_STR, &name,
541 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v,
542 	    NULL) != argc) {
543 		return (DCMD_USAGE);
544 	}
545 
546 	if ((maxbuckets != 1) || (minbucketsize != 0)) {
547 		buckets = B_TRUE;
548 	}
549 
550 	if (!(flags & DCMD_ADDRSPEC)) {
551 		if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
552 		    argv) == -1) {
553 			mdb_warn("can't walk kmem_cache");
554 			return (DCMD_ERR);
555 		}
556 		return (DCMD_OK);
557 	}
558 
559 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
560 		mdb_warn("couldn't read kmem_cache at %p", addr);
561 		return (DCMD_ERR);
562 	}
563 
564 	if (name == NULL) {
565 		skip = ((filter != NULL) &&
566 		    (strstr(c.cache_name, filter) == NULL));
567 	} else if (filter == NULL) {
568 		skip = (strcmp(c.cache_name, name) != 0);
569 	} else {
570 		/* match either -n or -N */
571 		skip = ((strcmp(c.cache_name, name) != 0) &&
572 		    (strstr(c.cache_name, filter) == NULL));
573 	}
574 
575 	if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
576 		kmem_slabs_header();
577 	} else if ((opt_v || buckets) && !skip) {
578 		if (DCMD_HDRSPEC(flags)) {
579 			kmem_slabs_header();
580 		} else {
581 			boolean_t is_slab = B_FALSE;
582 			const char *walker_name;
583 			if (opt_v) {
584 				cb = (mdb_walk_cb_t)kmem_first_partial_slab;
585 				walker_name = "kmem_slab_partial";
586 			} else {
587 				cb = (mdb_walk_cb_t)kmem_first_slab;
588 				walker_name = "kmem_slab";
589 			}
590 			(void) mdb_pwalk(walker_name, cb, &is_slab, addr);
591 			if (is_slab) {
592 				kmem_slabs_header();
593 			}
594 		}
595 	}
596 
597 	if (skip) {
598 		return (DCMD_OK);
599 	}
600 
601 	bzero(&stats, sizeof (kmem_slab_stats_t));
602 	stats.ks_cp = &c;
603 	stats.ks_max_buffers_per_slab = c.cache_maxchunks;
604 	/* +1 to include a zero bucket */
605 	stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
606 	    sizeof (*stats.ks_bucket), UM_SLEEP);
607 	cb = (mdb_walk_cb_t)kmem_slablist_stat;
608 	(void) mdb_pwalk("kmem_slab", cb, &stats, addr);
609 
610 	if (c.cache_buftotal == 0) {
611 		pct = 0;
612 		tenths_pct = 0;
613 	} else {
614 		uint64_t n = stats.ks_unused_buffers * 10000;
615 		pct = (int)(n / c.cache_buftotal);
616 		tenths_pct = pct - ((pct / 100) * 100);
617 		tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
618 		if (tenths_pct == 10) {
619 			pct += 100;
620 			tenths_pct = 0;
621 		}
622 	}
623 
624 	pct /= 100;
625 	mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
626 	    stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
627 	    stats.ks_unused_buffers, pct, tenths_pct);
628 
629 	if (maxbuckets == 0) {
630 		maxbuckets = stats.ks_max_buffers_per_slab;
631 	}
632 
633 	if (((maxbuckets > 1) || (minbucketsize > 0)) &&
634 	    (stats.ks_slabs > 0)) {
635 		mdb_printf("\n");
636 		kmem_slabs_print_dist(stats.ks_bucket,
637 		    stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
638 	}
639 
640 	mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
641 	    sizeof (*stats.ks_bucket));
642 
643 	if (!opt_v) {
644 		return (DCMD_OK);
645 	}
646 
647 	if (opt_v && (stats.ks_partial_slabs > 0)) {
648 		int i;
649 		kmem_slab_usage_t *ksu;
650 
651 		mdb_printf("  %d complete (%d), %d partial:",
652 		    (stats.ks_slabs - stats.ks_partial_slabs),
653 		    stats.ks_max_buffers_per_slab,
654 		    stats.ks_partial_slabs);
655 
656 		for (i = 0; i < stats.ks_partial_slabs; i++) {
657 			ksu = &stats.ks_usage[i];
658 			mdb_printf(" %d%s", ksu->ksu_refcnt,
659 			    (ksu->ksu_nomove ? "*" : ""));
660 		}
661 		mdb_printf("\n\n");
662 	}
663 
664 	if (stats.ks_usage_len > 0) {
665 		mdb_free(stats.ks_usage,
666 		    stats.ks_usage_len * sizeof (kmem_slab_usage_t));
667 	}
668 
669 	return (DCMD_OK);
670 }
671 
672 void
673 kmem_slabs_help(void)
674 {
675 	mdb_printf("%s",
676 "Display slab usage per kmem cache.\n\n");
677 	mdb_dec_indent(2);
678 	mdb_printf("%<b>OPTIONS%</b>\n");
679 	mdb_inc_indent(2);
680 	mdb_printf("%s",
681 "  -n name\n"
682 "        name of kmem cache (or matching partial name)\n"
683 "  -N name\n"
684 "        exact name of kmem cache\n"
685 "  -b maxbins\n"
686 "        Print a distribution of allocated buffers per slab using at\n"
687 "        most maxbins bins. The first bin is reserved for completely\n"
688 "        allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
689 "        effect as specifying the maximum allocated buffers per slab\n"
690 "        or setting minbinsize to 1 (-B 1).\n"
691 "  -B minbinsize\n"
692 "        Print a distribution of allocated buffers per slab, making\n"
693 "        all bins (except the first, reserved for completely allocated\n"
694 "        slabs) at least minbinsize buffers apart.\n"
695 "  -v    verbose output: List the allocated buffer count of each partial\n"
696 "        slab on the free list in order from front to back to show how\n"
697 "        closely the slabs are ordered by usage. For example\n"
698 "\n"
699 "          10 complete, 3 partial (8): 7 3 1\n"
700 "\n"
701 "        means there are thirteen slabs with eight buffers each, including\n"
702 "        three partially allocated slabs with less than all eight buffers\n"
703 "        allocated.\n"
704 "\n"
705 "        Buffer allocations are always from the front of the partial slab\n"
706 "        list. When a buffer is freed from a completely used slab, that\n"
707 "        slab is added to the front of the partial slab list. Assuming\n"
708 "        that all buffers are equally likely to be freed soon, the\n"
709 "        desired order of partial slabs is most-used at the front of the\n"
710 "        list and least-used at the back (as in the example above).\n"
711 "        However, if a slab contains an allocated buffer that will not\n"
712 "        soon be freed, it would be better for that slab to be at the\n"
713 "        front where all of its buffers can be allocated. Taking a slab\n"
714 "        off the partial slab list (either with all buffers freed or all\n"
715 "        buffers allocated) reduces cache fragmentation.\n"
716 "\n"
717 "        A slab's allocated buffer count representing a partial slab (9 in\n"
718 "        the example below) may be marked as follows:\n"
719 "\n"
720 "        9*   An asterisk indicates that kmem has marked the slab non-\n"
721 "        reclaimable because the kmem client refused to move one of the\n"
722 "        slab's buffers. Since kmem does not expect to completely free the\n"
723 "        slab, it moves it to the front of the list in the hope of\n"
724 "        completely allocating it instead. A slab marked with an asterisk\n"
725 "        stays marked for as long as it remains on the partial slab list.\n"
726 "\n"
727 "Column\t\tDescription\n"
728 "\n"
729 "Cache Name\t\tname of kmem cache\n"
730 "Slabs\t\t\ttotal slab count\n"
731 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
732 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
733 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
734 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
735 "\t\t\t  for accounting structures (debug mode), slab\n"
736 "\t\t\t  coloring (incremental small offsets to stagger\n"
737 "\t\t\t  buffer alignment), or the per-CPU magazine layer\n");
738 }
739 
740 static int
741 addrcmp(const void *lhs, const void *rhs)
742 {
743 	uintptr_t p1 = *((uintptr_t *)lhs);
744 	uintptr_t p2 = *((uintptr_t *)rhs);
745 
746 	if (p1 < p2)
747 		return (-1);
748 	if (p1 > p2)
749 		return (1);
750 	return (0);
751 }
752 
753 static int
754 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
755 {
756 	const kmem_bufctl_audit_t *bcp1 = *lhs;
757 	const kmem_bufctl_audit_t *bcp2 = *rhs;
758 
759 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
760 		return (-1);
761 
762 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
763 		return (1);
764 
765 	return (0);
766 }
767 
768 typedef struct kmem_hash_walk {
769 	uintptr_t *kmhw_table;
770 	size_t kmhw_nelems;
771 	size_t kmhw_pos;
772 	kmem_bufctl_t kmhw_cur;
773 } kmem_hash_walk_t;
774 
775 int
776 kmem_hash_walk_init(mdb_walk_state_t *wsp)
777 {
778 	kmem_hash_walk_t *kmhw;
779 	uintptr_t *hash;
780 	kmem_cache_t c;
781 	uintptr_t haddr, addr = wsp->walk_addr;
782 	size_t nelems;
783 	size_t hsize;
784 
785 	if (addr == NULL) {
786 		mdb_warn("kmem_hash doesn't support global walks\n");
787 		return (WALK_ERR);
788 	}
789 
790 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
791 		mdb_warn("couldn't read cache at addr %p", addr);
792 		return (WALK_ERR);
793 	}
794 
795 	if (!(c.cache_flags & KMF_HASH)) {
796 		mdb_warn("cache %p doesn't have a hash table\n", addr);
797 		return (WALK_DONE);		/* nothing to do */
798 	}
799 
800 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
801 	kmhw->kmhw_cur.bc_next = NULL;
802 	kmhw->kmhw_pos = 0;
803 
804 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
805 	hsize = nelems * sizeof (uintptr_t);
806 	haddr = (uintptr_t)c.cache_hash_table;
807 
808 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
809 	if (mdb_vread(hash, hsize, haddr) == -1) {
810 		mdb_warn("failed to read hash table at %p", haddr);
811 		mdb_free(hash, hsize);
812 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
813 		return (WALK_ERR);
814 	}
815 
816 	wsp->walk_data = kmhw;
817 
818 	return (WALK_NEXT);
819 }
820 
821 int
822 kmem_hash_walk_step(mdb_walk_state_t *wsp)
823 {
824 	kmem_hash_walk_t *kmhw = wsp->walk_data;
825 	uintptr_t addr = NULL;
826 
827 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
828 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
829 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
830 				break;
831 		}
832 	}
833 	if (addr == NULL)
834 		return (WALK_DONE);
835 
836 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
837 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
838 		return (WALK_ERR);
839 	}
840 
841 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
842 }
843 
844 void
845 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
846 {
847 	kmem_hash_walk_t *kmhw = wsp->walk_data;
848 
849 	if (kmhw == NULL)
850 		return;
851 
852 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
853 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
854 }
855 
856 /*
857  * Find the address of the bufctl structure for the address 'buf' in cache
858  * 'cp', which is at address caddr, and place it in *out.
859  */
860 static int
861 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
862 {
863 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
864 	kmem_bufctl_t *bcp;
865 	kmem_bufctl_t bc;
866 
867 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
868 		mdb_warn("unable to read hash bucket for %p in cache %p",
869 		    buf, caddr);
870 		return (-1);
871 	}
872 
873 	while (bcp != NULL) {
874 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
875 		    (uintptr_t)bcp) == -1) {
876 			mdb_warn("unable to read bufctl at %p", bcp);
877 			return (-1);
878 		}
879 		if (bc.bc_addr == buf) {
880 			*out = (uintptr_t)bcp;
881 			return (0);
882 		}
883 		bcp = bc.bc_next;
884 	}
885 
886 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
887 	return (-1);
888 }
889 
890 int
891 kmem_get_magsize(const kmem_cache_t *cp)
892 {
893 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
894 	GElf_Sym mt_sym;
895 	kmem_magtype_t mt;
896 	int res;
897 
898 	/*
899 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
900 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
901 	 * it is okay to return 0 for them.
902 	 */
903 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
904 	    (cp->cache_flags & KMF_NOMAGAZINE))
905 		return (res);
906 
907 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
908 		mdb_warn("unable to read 'kmem_magtype'");
909 	} else if (addr < mt_sym.st_value ||
910 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
911 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
912 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
913 		    cp->cache_name, addr);
914 		return (0);
915 	}
916 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
917 		mdb_warn("unable to read magtype at %a", addr);
918 		return (0);
919 	}
920 	return (mt.mt_magsize);
921 }
922 
923 /*ARGSUSED*/
924 static int
925 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
926 {
927 	*est -= (sp->slab_chunks - sp->slab_refcnt);
928 
929 	return (WALK_NEXT);
930 }
931 
932 /*
933  * Returns an upper bound on the number of allocated buffers in a given
934  * cache.
935  */
936 size_t
937 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
938 {
939 	int magsize;
940 	size_t cache_est;
941 
942 	cache_est = cp->cache_buftotal;
943 
944 	(void) mdb_pwalk("kmem_slab_partial",
945 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
946 
947 	if ((magsize = kmem_get_magsize(cp)) != 0) {
948 		size_t mag_est = cp->cache_full.ml_total * magsize;
949 
950 		if (cache_est >= mag_est) {
951 			cache_est -= mag_est;
952 		} else {
953 			mdb_warn("cache %p's magazine layer holds more buffers "
954 			    "than the slab layer.\n", addr);
955 		}
956 	}
957 	return (cache_est);
958 }
959 
960 #define	READMAG_ROUNDS(rounds) { \
961 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
962 		mdb_warn("couldn't read magazine at %p", kmp); \
963 		goto fail; \
964 	} \
965 	for (i = 0; i < rounds; i++) { \
966 		maglist[magcnt++] = mp->mag_round[i]; \
967 		if (magcnt == magmax) { \
968 			mdb_warn("%d magazines exceeds fudge factor\n", \
969 			    magcnt); \
970 			goto fail; \
971 		} \
972 	} \
973 }
974 
975 int
976 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
977     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
978 {
979 	kmem_magazine_t *kmp, *mp;
980 	void **maglist = NULL;
981 	int i, cpu;
982 	size_t magsize, magmax, magbsize;
983 	size_t magcnt = 0;
984 
985 	/*
986 	 * Read the magtype out of the cache, after verifying the pointer's
987 	 * correctness.
988 	 */
989 	magsize = kmem_get_magsize(cp);
990 	if (magsize == 0) {
991 		*maglistp = NULL;
992 		*magcntp = 0;
993 		*magmaxp = 0;
994 		return (WALK_NEXT);
995 	}
996 
997 	/*
998 	 * There are several places where we need to go buffer hunting:
999 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1000 	 * and the full magazine list in the depot.
1001 	 *
1002 	 * For an upper bound on the number of buffers in the magazine
1003 	 * layer, we have the number of magazines on the cache_full
1004 	 * list plus at most two magazines per CPU (the loaded and the
1005 	 * spare).  Toss in 100 magazines as a fudge factor in case this
1006 	 * is live (the number "100" comes from the same fudge factor in
1007 	 * crash(1M)).
1008 	 */
1009 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1010 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1011 
1012 	if (magbsize >= PAGESIZE / 2) {
1013 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1014 		    addr, magbsize);
1015 		return (WALK_ERR);
1016 	}
1017 
1018 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1019 	mp = mdb_alloc(magbsize, alloc_flags);
1020 	if (mp == NULL || maglist == NULL)
1021 		goto fail;
1022 
1023 	/*
1024 	 * First up: the magazines in the depot (i.e. on the cache_full list).
1025 	 */
1026 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1027 		READMAG_ROUNDS(magsize);
1028 		kmp = mp->mag_next;
1029 
1030 		if (kmp == cp->cache_full.ml_list)
1031 			break; /* cache_full list loop detected */
1032 	}
1033 
1034 	dprintf(("cache_full list done\n"));
1035 
1036 	/*
1037 	 * Now whip through the CPUs, snagging the loaded magazines
1038 	 * and full spares.
1039 	 */
1040 	for (cpu = 0; cpu < ncpus; cpu++) {
1041 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1042 
1043 		dprintf(("reading cpu cache %p\n",
1044 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
1045 
1046 		if (ccp->cc_rounds > 0 &&
1047 		    (kmp = ccp->cc_loaded) != NULL) {
1048 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
1049 			READMAG_ROUNDS(ccp->cc_rounds);
1050 		}
1051 
1052 		if (ccp->cc_prounds > 0 &&
1053 		    (kmp = ccp->cc_ploaded) != NULL) {
1054 			dprintf(("reading %d previously loaded rounds\n",
1055 			    ccp->cc_prounds));
1056 			READMAG_ROUNDS(ccp->cc_prounds);
1057 		}
1058 	}
1059 
1060 	dprintf(("magazine layer: %d buffers\n", magcnt));
1061 
1062 	if (!(alloc_flags & UM_GC))
1063 		mdb_free(mp, magbsize);
1064 
1065 	*maglistp = maglist;
1066 	*magcntp = magcnt;
1067 	*magmaxp = magmax;
1068 
1069 	return (WALK_NEXT);
1070 
1071 fail:
1072 	if (!(alloc_flags & UM_GC)) {
1073 		if (mp)
1074 			mdb_free(mp, magbsize);
1075 		if (maglist)
1076 			mdb_free(maglist, magmax * sizeof (void *));
1077 	}
1078 	return (WALK_ERR);
1079 }
1080 
1081 static int
1082 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1083 {
1084 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1085 }
1086 
1087 static int
1088 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1089 {
1090 	kmem_bufctl_audit_t b;
1091 
1092 	/*
1093 	 * if KMF_AUDIT is not set, we know that we're looking at a
1094 	 * kmem_bufctl_t.
1095 	 */
1096 	if (!(cp->cache_flags & KMF_AUDIT) ||
1097 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1098 		(void) memset(&b, 0, sizeof (b));
1099 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1100 			mdb_warn("unable to read bufctl at %p", buf);
1101 			return (WALK_ERR);
1102 		}
1103 	}
1104 
1105 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1106 }
1107 
1108 typedef struct kmem_walk {
1109 	int kmw_type;
1110 
1111 	int kmw_addr;			/* cache address */
1112 	kmem_cache_t *kmw_cp;
1113 	size_t kmw_csize;
1114 
1115 	/*
1116 	 * magazine layer
1117 	 */
1118 	void **kmw_maglist;
1119 	size_t kmw_max;
1120 	size_t kmw_count;
1121 	size_t kmw_pos;
1122 
1123 	/*
1124 	 * slab layer
1125 	 */
1126 	char *kmw_valid;	/* to keep track of freed buffers */
1127 	char *kmw_ubase;	/* buffer for slab data */
1128 } kmem_walk_t;
1129 
1130 static int
1131 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1132 {
1133 	kmem_walk_t *kmw;
1134 	int ncpus, csize;
1135 	kmem_cache_t *cp;
1136 	size_t vm_quantum;
1137 
1138 	size_t magmax, magcnt;
1139 	void **maglist = NULL;
1140 	uint_t chunksize, slabsize;
1141 	int status = WALK_ERR;
1142 	uintptr_t addr = wsp->walk_addr;
1143 	const char *layered;
1144 
1145 	type &= ~KM_HASH;
1146 
1147 	if (addr == NULL) {
1148 		mdb_warn("kmem walk doesn't support global walks\n");
1149 		return (WALK_ERR);
1150 	}
1151 
1152 	dprintf(("walking %p\n", addr));
1153 
1154 	/*
1155 	 * First we need to figure out how many CPUs are configured in the
1156 	 * system to know how much to slurp out.
1157 	 */
1158 	mdb_readvar(&ncpus, "max_ncpus");
1159 
1160 	csize = KMEM_CACHE_SIZE(ncpus);
1161 	cp = mdb_alloc(csize, UM_SLEEP);
1162 
1163 	if (mdb_vread(cp, csize, addr) == -1) {
1164 		mdb_warn("couldn't read cache at addr %p", addr);
1165 		goto out2;
1166 	}
1167 
1168 	/*
1169 	 * It's easy for someone to hand us an invalid cache address.
1170 	 * Unfortunately, it is hard for this walker to survive an
1171 	 * invalid cache cleanly.  So we make sure that:
1172 	 *
1173 	 *	1. the vmem arena for the cache is readable,
1174 	 *	2. the vmem arena's quantum is a power of 2,
1175 	 *	3. our slabsize is a multiple of the quantum, and
1176 	 *	4. our chunksize is >0 and less than our slabsize.
1177 	 */
1178 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1179 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1180 	    vm_quantum == 0 ||
1181 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1182 	    cp->cache_slabsize < vm_quantum ||
1183 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1184 	    cp->cache_chunksize == 0 ||
1185 	    cp->cache_chunksize > cp->cache_slabsize) {
1186 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1187 		goto out2;
1188 	}
1189 
1190 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1191 
1192 	if (cp->cache_buftotal == 0) {
1193 		mdb_free(cp, csize);
1194 		return (WALK_DONE);
1195 	}
1196 
1197 	/*
1198 	 * If they ask for bufctls, but it's a small-slab cache,
1199 	 * there is nothing to report.
1200 	 */
1201 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1202 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1203 		    cp->cache_flags));
1204 		mdb_free(cp, csize);
1205 		return (WALK_DONE);
1206 	}
1207 
1208 	/*
1209 	 * If they want constructed buffers, but there's no constructor or
1210 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
1211 	 */
1212 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1213 	    cp->cache_constructor == NULL ||
1214 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1215 		mdb_free(cp, csize);
1216 		return (WALK_DONE);
1217 	}
1218 
1219 	/*
1220 	 * Read in the contents of the magazine layer
1221 	 */
1222 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1223 	    &magmax, UM_SLEEP) == WALK_ERR)
1224 		goto out2;
1225 
1226 	/*
1227 	 * We have all of the buffers from the magazines;  if we are walking
1228 	 * allocated buffers, sort them so we can bsearch them later.
1229 	 */
1230 	if (type & KM_ALLOCATED)
1231 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1232 
1233 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1234 
1235 	kmw->kmw_type = type;
1236 	kmw->kmw_addr = addr;
1237 	kmw->kmw_cp = cp;
1238 	kmw->kmw_csize = csize;
1239 	kmw->kmw_maglist = maglist;
1240 	kmw->kmw_max = magmax;
1241 	kmw->kmw_count = magcnt;
1242 	kmw->kmw_pos = 0;
1243 
1244 	/*
1245 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
1246 	 * hash table instead of the slab layer.
1247 	 */
1248 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1249 		layered = "kmem_hash";
1250 
1251 		kmw->kmw_type |= KM_HASH;
1252 	} else {
1253 		/*
1254 		 * If we are walking freed buffers, we only need the
1255 		 * magazine layer plus the partially allocated slabs.
1256 		 * To walk allocated buffers, we need all of the slabs.
1257 		 */
1258 		if (type & KM_ALLOCATED)
1259 			layered = "kmem_slab";
1260 		else
1261 			layered = "kmem_slab_partial";
1262 
1263 		/*
1264 		 * for small-slab caches, we read in the entire slab.  For
1265 		 * freed buffers, we can just walk the freelist.  For
1266 		 * allocated buffers, we use a 'valid' array to track
1267 		 * the freed buffers.
1268 		 */
1269 		if (!(cp->cache_flags & KMF_HASH)) {
1270 			chunksize = cp->cache_chunksize;
1271 			slabsize = cp->cache_slabsize;
1272 
1273 			kmw->kmw_ubase = mdb_alloc(slabsize +
1274 			    sizeof (kmem_bufctl_t), UM_SLEEP);
1275 
1276 			if (type & KM_ALLOCATED)
1277 				kmw->kmw_valid =
1278 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1279 		}
1280 	}
1281 
1282 	status = WALK_NEXT;
1283 
1284 	if (mdb_layered_walk(layered, wsp) == -1) {
1285 		mdb_warn("unable to start layered '%s' walk", layered);
1286 		status = WALK_ERR;
1287 	}
1288 
1289 out1:
1290 	if (status == WALK_ERR) {
1291 		if (kmw->kmw_valid)
1292 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
1293 
1294 		if (kmw->kmw_ubase)
1295 			mdb_free(kmw->kmw_ubase, slabsize +
1296 			    sizeof (kmem_bufctl_t));
1297 
1298 		if (kmw->kmw_maglist)
1299 			mdb_free(kmw->kmw_maglist,
1300 			    kmw->kmw_max * sizeof (uintptr_t));
1301 
1302 		mdb_free(kmw, sizeof (kmem_walk_t));
1303 		wsp->walk_data = NULL;
1304 	}
1305 
1306 out2:
1307 	if (status == WALK_ERR)
1308 		mdb_free(cp, csize);
1309 
1310 	return (status);
1311 }
1312 
1313 int
1314 kmem_walk_step(mdb_walk_state_t *wsp)
1315 {
1316 	kmem_walk_t *kmw = wsp->walk_data;
1317 	int type = kmw->kmw_type;
1318 	kmem_cache_t *cp = kmw->kmw_cp;
1319 
1320 	void **maglist = kmw->kmw_maglist;
1321 	int magcnt = kmw->kmw_count;
1322 
1323 	uintptr_t chunksize, slabsize;
1324 	uintptr_t addr;
1325 	const kmem_slab_t *sp;
1326 	const kmem_bufctl_t *bcp;
1327 	kmem_bufctl_t bc;
1328 
1329 	int chunks;
1330 	char *kbase;
1331 	void *buf;
1332 	int i, ret;
1333 
1334 	char *valid, *ubase;
1335 
1336 	/*
1337 	 * first, handle the 'kmem_hash' layered walk case
1338 	 */
1339 	if (type & KM_HASH) {
1340 		/*
1341 		 * We have a buffer which has been allocated out of the
1342 		 * global layer. We need to make sure that it's not
1343 		 * actually sitting in a magazine before we report it as
1344 		 * an allocated buffer.
1345 		 */
1346 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1347 
1348 		if (magcnt > 0 &&
1349 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1350 		    addrcmp) != NULL)
1351 			return (WALK_NEXT);
1352 
1353 		if (type & KM_BUFCTL)
1354 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1355 
1356 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
1357 	}
1358 
1359 	ret = WALK_NEXT;
1360 
1361 	addr = kmw->kmw_addr;
1362 
1363 	/*
1364 	 * If we're walking freed buffers, report everything in the
1365 	 * magazine layer before processing the first slab.
1366 	 */
1367 	if ((type & KM_FREE) && magcnt != 0) {
1368 		kmw->kmw_count = 0;		/* only do this once */
1369 		for (i = 0; i < magcnt; i++) {
1370 			buf = maglist[i];
1371 
1372 			if (type & KM_BUFCTL) {
1373 				uintptr_t out;
1374 
1375 				if (cp->cache_flags & KMF_BUFTAG) {
1376 					kmem_buftag_t *btp;
1377 					kmem_buftag_t tag;
1378 
1379 					/* LINTED - alignment */
1380 					btp = KMEM_BUFTAG(cp, buf);
1381 					if (mdb_vread(&tag, sizeof (tag),
1382 					    (uintptr_t)btp) == -1) {
1383 						mdb_warn("reading buftag for "
1384 						    "%p at %p", buf, btp);
1385 						continue;
1386 					}
1387 					out = (uintptr_t)tag.bt_bufctl;
1388 				} else {
1389 					if (kmem_hash_lookup(cp, addr, buf,
1390 					    &out) == -1)
1391 						continue;
1392 				}
1393 				ret = bufctl_walk_callback(cp, wsp, out);
1394 			} else {
1395 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1396 			}
1397 
1398 			if (ret != WALK_NEXT)
1399 				return (ret);
1400 		}
1401 	}
1402 
1403 	/*
1404 	 * If they want constructed buffers, we're finished, since the
1405 	 * magazine layer holds them all.
1406 	 */
1407 	if (type & KM_CONSTRUCTED)
1408 		return (WALK_DONE);
1409 
1410 	/*
1411 	 * Handle the buffers in the current slab
1412 	 */
1413 	chunksize = cp->cache_chunksize;
1414 	slabsize = cp->cache_slabsize;
1415 
1416 	sp = wsp->walk_layer;
1417 	chunks = sp->slab_chunks;
1418 	kbase = sp->slab_base;
1419 
1420 	dprintf(("kbase is %p\n", kbase));
1421 
1422 	if (!(cp->cache_flags & KMF_HASH)) {
1423 		valid = kmw->kmw_valid;
1424 		ubase = kmw->kmw_ubase;
1425 
1426 		if (mdb_vread(ubase, chunks * chunksize,
1427 		    (uintptr_t)kbase) == -1) {
1428 			mdb_warn("failed to read slab contents at %p", kbase);
1429 			return (WALK_ERR);
1430 		}
1431 
1432 		/*
1433 		 * Set up the valid map as fully allocated -- we'll punch
1434 		 * out the freelist.
1435 		 */
1436 		if (type & KM_ALLOCATED)
1437 			(void) memset(valid, 1, chunks);
1438 	} else {
1439 		valid = NULL;
1440 		ubase = NULL;
1441 	}
1442 
1443 	/*
1444 	 * walk the slab's freelist
1445 	 */
1446 	bcp = sp->slab_head;
1447 
1448 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1449 
1450 	/*
1451 	 * since we could be in the middle of allocating a buffer,
1452 	 * our refcnt could be one higher than it aught.  So we
1453 	 * check one further on the freelist than the count allows.
1454 	 */
1455 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1456 		uint_t ndx;
1457 
1458 		dprintf(("bcp is %p\n", bcp));
1459 
1460 		if (bcp == NULL) {
1461 			if (i == chunks)
1462 				break;
1463 			mdb_warn(
1464 			    "slab %p in cache %p freelist too short by %d\n",
1465 			    sp, addr, chunks - i);
1466 			break;
1467 		}
1468 
1469 		if (cp->cache_flags & KMF_HASH) {
1470 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1471 				mdb_warn("failed to read bufctl ptr at %p",
1472 				    bcp);
1473 				break;
1474 			}
1475 			buf = bc.bc_addr;
1476 		} else {
1477 			/*
1478 			 * Otherwise the buffer is in the slab which
1479 			 * we've read in;  we just need to determine
1480 			 * its offset in the slab to find the
1481 			 * kmem_bufctl_t.
1482 			 */
1483 			bc = *((kmem_bufctl_t *)
1484 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1485 			    (uintptr_t)ubase));
1486 
1487 			buf = KMEM_BUF(cp, bcp);
1488 		}
1489 
1490 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1491 
1492 		if (ndx > slabsize / cp->cache_bufsize) {
1493 			/*
1494 			 * This is very wrong; we have managed to find
1495 			 * a buffer in the slab which shouldn't
1496 			 * actually be here.  Emit a warning, and
1497 			 * try to continue.
1498 			 */
1499 			mdb_warn("buf %p is out of range for "
1500 			    "slab %p, cache %p\n", buf, sp, addr);
1501 		} else if (type & KM_ALLOCATED) {
1502 			/*
1503 			 * we have found a buffer on the slab's freelist;
1504 			 * clear its entry
1505 			 */
1506 			valid[ndx] = 0;
1507 		} else {
1508 			/*
1509 			 * Report this freed buffer
1510 			 */
1511 			if (type & KM_BUFCTL) {
1512 				ret = bufctl_walk_callback(cp, wsp,
1513 				    (uintptr_t)bcp);
1514 			} else {
1515 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1516 			}
1517 			if (ret != WALK_NEXT)
1518 				return (ret);
1519 		}
1520 
1521 		bcp = bc.bc_next;
1522 	}
1523 
1524 	if (bcp != NULL) {
1525 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1526 		    sp, addr, bcp));
1527 	}
1528 
1529 	/*
1530 	 * If we are walking freed buffers, the loop above handled reporting
1531 	 * them.
1532 	 */
1533 	if (type & KM_FREE)
1534 		return (WALK_NEXT);
1535 
1536 	if (type & KM_BUFCTL) {
1537 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1538 		    "cache %p\n", addr);
1539 		return (WALK_ERR);
1540 	}
1541 
1542 	/*
1543 	 * Report allocated buffers, skipping buffers in the magazine layer.
1544 	 * We only get this far for small-slab caches.
1545 	 */
1546 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1547 		buf = (char *)kbase + i * chunksize;
1548 
1549 		if (!valid[i])
1550 			continue;		/* on slab freelist */
1551 
1552 		if (magcnt > 0 &&
1553 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1554 		    addrcmp) != NULL)
1555 			continue;		/* in magazine layer */
1556 
1557 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1558 	}
1559 	return (ret);
1560 }
1561 
1562 void
1563 kmem_walk_fini(mdb_walk_state_t *wsp)
1564 {
1565 	kmem_walk_t *kmw = wsp->walk_data;
1566 	uintptr_t chunksize;
1567 	uintptr_t slabsize;
1568 
1569 	if (kmw == NULL)
1570 		return;
1571 
1572 	if (kmw->kmw_maglist != NULL)
1573 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1574 
1575 	chunksize = kmw->kmw_cp->cache_chunksize;
1576 	slabsize = kmw->kmw_cp->cache_slabsize;
1577 
1578 	if (kmw->kmw_valid != NULL)
1579 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1580 	if (kmw->kmw_ubase != NULL)
1581 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1582 
1583 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1584 	mdb_free(kmw, sizeof (kmem_walk_t));
1585 }
1586 
1587 /*ARGSUSED*/
1588 static int
1589 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1590 {
1591 	/*
1592 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1593 	 * memory in other caches.  This can be a little confusing, so we
1594 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1595 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1596 	 */
1597 	if (c->cache_cflags & KMC_NOTOUCH)
1598 		return (WALK_NEXT);
1599 
1600 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1601 	    wsp->walk_cbdata, addr) == -1)
1602 		return (WALK_DONE);
1603 
1604 	return (WALK_NEXT);
1605 }
1606 
1607 #define	KMEM_WALK_ALL(name, wsp) { \
1608 	wsp->walk_data = (name); \
1609 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1610 		return (WALK_ERR); \
1611 	return (WALK_DONE); \
1612 }
1613 
1614 int
1615 kmem_walk_init(mdb_walk_state_t *wsp)
1616 {
1617 	if (wsp->walk_arg != NULL)
1618 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1619 
1620 	if (wsp->walk_addr == NULL)
1621 		KMEM_WALK_ALL("kmem", wsp);
1622 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1623 }
1624 
1625 int
1626 bufctl_walk_init(mdb_walk_state_t *wsp)
1627 {
1628 	if (wsp->walk_addr == NULL)
1629 		KMEM_WALK_ALL("bufctl", wsp);
1630 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1631 }
1632 
1633 int
1634 freemem_walk_init(mdb_walk_state_t *wsp)
1635 {
1636 	if (wsp->walk_addr == NULL)
1637 		KMEM_WALK_ALL("freemem", wsp);
1638 	return (kmem_walk_init_common(wsp, KM_FREE));
1639 }
1640 
1641 int
1642 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1643 {
1644 	if (wsp->walk_addr == NULL)
1645 		KMEM_WALK_ALL("freemem_constructed", wsp);
1646 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1647 }
1648 
1649 int
1650 freectl_walk_init(mdb_walk_state_t *wsp)
1651 {
1652 	if (wsp->walk_addr == NULL)
1653 		KMEM_WALK_ALL("freectl", wsp);
1654 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1655 }
1656 
1657 int
1658 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1659 {
1660 	if (wsp->walk_addr == NULL)
1661 		KMEM_WALK_ALL("freectl_constructed", wsp);
1662 	return (kmem_walk_init_common(wsp,
1663 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1664 }
1665 
1666 typedef struct bufctl_history_walk {
1667 	void		*bhw_next;
1668 	kmem_cache_t	*bhw_cache;
1669 	kmem_slab_t	*bhw_slab;
1670 	hrtime_t	bhw_timestamp;
1671 } bufctl_history_walk_t;
1672 
1673 int
1674 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1675 {
1676 	bufctl_history_walk_t *bhw;
1677 	kmem_bufctl_audit_t bc;
1678 	kmem_bufctl_audit_t bcn;
1679 
1680 	if (wsp->walk_addr == NULL) {
1681 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1682 		return (WALK_ERR);
1683 	}
1684 
1685 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1686 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1687 		return (WALK_ERR);
1688 	}
1689 
1690 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1691 	bhw->bhw_timestamp = 0;
1692 	bhw->bhw_cache = bc.bc_cache;
1693 	bhw->bhw_slab = bc.bc_slab;
1694 
1695 	/*
1696 	 * sometimes the first log entry matches the base bufctl;  in that
1697 	 * case, skip the base bufctl.
1698 	 */
1699 	if (bc.bc_lastlog != NULL &&
1700 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1701 	    bc.bc_addr == bcn.bc_addr &&
1702 	    bc.bc_cache == bcn.bc_cache &&
1703 	    bc.bc_slab == bcn.bc_slab &&
1704 	    bc.bc_timestamp == bcn.bc_timestamp &&
1705 	    bc.bc_thread == bcn.bc_thread)
1706 		bhw->bhw_next = bc.bc_lastlog;
1707 	else
1708 		bhw->bhw_next = (void *)wsp->walk_addr;
1709 
1710 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1711 	wsp->walk_data = bhw;
1712 
1713 	return (WALK_NEXT);
1714 }
1715 
1716 int
1717 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1718 {
1719 	bufctl_history_walk_t *bhw = wsp->walk_data;
1720 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1721 	uintptr_t baseaddr = wsp->walk_addr;
1722 	kmem_bufctl_audit_t bc;
1723 
1724 	if (addr == NULL)
1725 		return (WALK_DONE);
1726 
1727 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1728 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1729 		return (WALK_ERR);
1730 	}
1731 
1732 	/*
1733 	 * The bufctl is only valid if the address, cache, and slab are
1734 	 * correct.  We also check that the timestamp is decreasing, to
1735 	 * prevent infinite loops.
1736 	 */
1737 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1738 	    bc.bc_cache != bhw->bhw_cache ||
1739 	    bc.bc_slab != bhw->bhw_slab ||
1740 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1741 		return (WALK_DONE);
1742 
1743 	bhw->bhw_next = bc.bc_lastlog;
1744 	bhw->bhw_timestamp = bc.bc_timestamp;
1745 
1746 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1747 }
1748 
1749 void
1750 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1751 {
1752 	bufctl_history_walk_t *bhw = wsp->walk_data;
1753 
1754 	mdb_free(bhw, sizeof (*bhw));
1755 }
1756 
1757 typedef struct kmem_log_walk {
1758 	kmem_bufctl_audit_t *klw_base;
1759 	kmem_bufctl_audit_t **klw_sorted;
1760 	kmem_log_header_t klw_lh;
1761 	size_t klw_size;
1762 	size_t klw_maxndx;
1763 	size_t klw_ndx;
1764 } kmem_log_walk_t;
1765 
1766 int
1767 kmem_log_walk_init(mdb_walk_state_t *wsp)
1768 {
1769 	uintptr_t lp = wsp->walk_addr;
1770 	kmem_log_walk_t *klw;
1771 	kmem_log_header_t *lhp;
1772 	int maxndx, i, j, k;
1773 
1774 	/*
1775 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1776 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1777 	 */
1778 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1779 		mdb_warn("failed to read 'kmem_transaction_log'");
1780 		return (WALK_ERR);
1781 	}
1782 
1783 	if (lp == NULL) {
1784 		mdb_warn("log is disabled\n");
1785 		return (WALK_ERR);
1786 	}
1787 
1788 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1789 	lhp = &klw->klw_lh;
1790 
1791 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1792 		mdb_warn("failed to read log header at %p", lp);
1793 		mdb_free(klw, sizeof (kmem_log_walk_t));
1794 		return (WALK_ERR);
1795 	}
1796 
1797 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1798 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1799 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1800 
1801 	if (mdb_vread(klw->klw_base, klw->klw_size,
1802 	    (uintptr_t)lhp->lh_base) == -1) {
1803 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1804 		mdb_free(klw->klw_base, klw->klw_size);
1805 		mdb_free(klw, sizeof (kmem_log_walk_t));
1806 		return (WALK_ERR);
1807 	}
1808 
1809 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1810 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1811 
1812 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1813 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1814 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1815 
1816 		for (j = 0; j < maxndx; j++)
1817 			klw->klw_sorted[k++] = &chunk[j];
1818 	}
1819 
1820 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1821 	    (int(*)(const void *, const void *))bufctlcmp);
1822 
1823 	klw->klw_maxndx = k;
1824 	wsp->walk_data = klw;
1825 
1826 	return (WALK_NEXT);
1827 }
1828 
1829 int
1830 kmem_log_walk_step(mdb_walk_state_t *wsp)
1831 {
1832 	kmem_log_walk_t *klw = wsp->walk_data;
1833 	kmem_bufctl_audit_t *bcp;
1834 
1835 	if (klw->klw_ndx == klw->klw_maxndx)
1836 		return (WALK_DONE);
1837 
1838 	bcp = klw->klw_sorted[klw->klw_ndx++];
1839 
1840 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1841 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1842 }
1843 
1844 void
1845 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1846 {
1847 	kmem_log_walk_t *klw = wsp->walk_data;
1848 
1849 	mdb_free(klw->klw_base, klw->klw_size);
1850 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1851 	    sizeof (kmem_bufctl_audit_t *));
1852 	mdb_free(klw, sizeof (kmem_log_walk_t));
1853 }
1854 
1855 typedef struct allocdby_bufctl {
1856 	uintptr_t abb_addr;
1857 	hrtime_t abb_ts;
1858 } allocdby_bufctl_t;
1859 
1860 typedef struct allocdby_walk {
1861 	const char *abw_walk;
1862 	uintptr_t abw_thread;
1863 	size_t abw_nbufs;
1864 	size_t abw_size;
1865 	allocdby_bufctl_t *abw_buf;
1866 	size_t abw_ndx;
1867 } allocdby_walk_t;
1868 
1869 int
1870 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1871     allocdby_walk_t *abw)
1872 {
1873 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1874 		return (WALK_NEXT);
1875 
1876 	if (abw->abw_nbufs == abw->abw_size) {
1877 		allocdby_bufctl_t *buf;
1878 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1879 
1880 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1881 
1882 		bcopy(abw->abw_buf, buf, oldsize);
1883 		mdb_free(abw->abw_buf, oldsize);
1884 
1885 		abw->abw_size <<= 1;
1886 		abw->abw_buf = buf;
1887 	}
1888 
1889 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1890 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1891 	abw->abw_nbufs++;
1892 
1893 	return (WALK_NEXT);
1894 }
1895 
1896 /*ARGSUSED*/
1897 int
1898 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1899 {
1900 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1901 	    abw, addr) == -1) {
1902 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1903 		return (WALK_DONE);
1904 	}
1905 
1906 	return (WALK_NEXT);
1907 }
1908 
1909 static int
1910 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1911 {
1912 	if (lhs->abb_ts < rhs->abb_ts)
1913 		return (1);
1914 	if (lhs->abb_ts > rhs->abb_ts)
1915 		return (-1);
1916 	return (0);
1917 }
1918 
1919 static int
1920 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1921 {
1922 	allocdby_walk_t *abw;
1923 
1924 	if (wsp->walk_addr == NULL) {
1925 		mdb_warn("allocdby walk doesn't support global walks\n");
1926 		return (WALK_ERR);
1927 	}
1928 
1929 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1930 
1931 	abw->abw_thread = wsp->walk_addr;
1932 	abw->abw_walk = walk;
1933 	abw->abw_size = 128;	/* something reasonable */
1934 	abw->abw_buf =
1935 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1936 
1937 	wsp->walk_data = abw;
1938 
1939 	if (mdb_walk("kmem_cache",
1940 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1941 		mdb_warn("couldn't walk kmem_cache");
1942 		allocdby_walk_fini(wsp);
1943 		return (WALK_ERR);
1944 	}
1945 
1946 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1947 	    (int(*)(const void *, const void *))allocdby_cmp);
1948 
1949 	return (WALK_NEXT);
1950 }
1951 
1952 int
1953 allocdby_walk_init(mdb_walk_state_t *wsp)
1954 {
1955 	return (allocdby_walk_init_common(wsp, "bufctl"));
1956 }
1957 
1958 int
1959 freedby_walk_init(mdb_walk_state_t *wsp)
1960 {
1961 	return (allocdby_walk_init_common(wsp, "freectl"));
1962 }
1963 
1964 int
1965 allocdby_walk_step(mdb_walk_state_t *wsp)
1966 {
1967 	allocdby_walk_t *abw = wsp->walk_data;
1968 	kmem_bufctl_audit_t bc;
1969 	uintptr_t addr;
1970 
1971 	if (abw->abw_ndx == abw->abw_nbufs)
1972 		return (WALK_DONE);
1973 
1974 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1975 
1976 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1977 		mdb_warn("couldn't read bufctl at %p", addr);
1978 		return (WALK_DONE);
1979 	}
1980 
1981 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1982 }
1983 
1984 void
1985 allocdby_walk_fini(mdb_walk_state_t *wsp)
1986 {
1987 	allocdby_walk_t *abw = wsp->walk_data;
1988 
1989 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1990 	mdb_free(abw, sizeof (allocdby_walk_t));
1991 }
1992 
1993 /*ARGSUSED*/
1994 int
1995 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
1996 {
1997 	char c[MDB_SYM_NAMLEN];
1998 	GElf_Sym sym;
1999 	int i;
2000 
2001 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2002 	for (i = 0; i < bcp->bc_depth; i++) {
2003 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
2004 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2005 			continue;
2006 		if (strncmp(c, "kmem_", 5) == 0)
2007 			continue;
2008 		mdb_printf("%s+0x%lx",
2009 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2010 		break;
2011 	}
2012 	mdb_printf("\n");
2013 
2014 	return (WALK_NEXT);
2015 }
2016 
2017 static int
2018 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2019 {
2020 	if (!(flags & DCMD_ADDRSPEC))
2021 		return (DCMD_USAGE);
2022 
2023 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2024 
2025 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2026 		mdb_warn("can't walk '%s' for %p", w, addr);
2027 		return (DCMD_ERR);
2028 	}
2029 
2030 	return (DCMD_OK);
2031 }
2032 
2033 /*ARGSUSED*/
2034 int
2035 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2036 {
2037 	return (allocdby_common(addr, flags, "allocdby"));
2038 }
2039 
2040 /*ARGSUSED*/
2041 int
2042 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2043 {
2044 	return (allocdby_common(addr, flags, "freedby"));
2045 }
2046 
2047 /*
2048  * Return a string describing the address in relation to the given thread's
2049  * stack.
2050  *
2051  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2052  *
2053  * - If the address is above the stack pointer, return an empty string
2054  *   signifying that the address is active.
2055  *
2056  * - If the address is below the stack pointer, and the thread is not on proc,
2057  *   return " (below sp)".
2058  *
2059  * - If the address is below the stack pointer, and the thread is on proc,
2060  *   return " (possibly below sp)".  Depending on context, we may or may not
2061  *   have an accurate t_sp.
2062  */
2063 static const char *
2064 stack_active(const kthread_t *t, uintptr_t addr)
2065 {
2066 	uintptr_t panicstk;
2067 	GElf_Sym sym;
2068 
2069 	if (t->t_state == TS_FREE)
2070 		return (" (inactive interrupt thread)");
2071 
2072 	/*
2073 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
2074 	 * no longer relates to the thread's real stack.
2075 	 */
2076 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2077 		panicstk = (uintptr_t)sym.st_value;
2078 
2079 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2080 			return ("");
2081 	}
2082 
2083 	if (addr >= t->t_sp + STACK_BIAS)
2084 		return ("");
2085 
2086 	if (t->t_state == TS_ONPROC)
2087 		return (" (possibly below sp)");
2088 
2089 	return (" (below sp)");
2090 }
2091 
2092 typedef struct whatis {
2093 	uintptr_t w_addr;
2094 	const kmem_cache_t *w_cache;
2095 	const vmem_t *w_vmem;
2096 	size_t w_slab_align;
2097 	int w_slab_found;
2098 	int w_found;
2099 	int w_kmem_lite_count;
2100 	uint_t w_all;
2101 	uint_t w_bufctl;
2102 	uint_t w_freemem;
2103 	uint_t w_idspace;
2104 	uint_t w_quiet;
2105 	uint_t w_verbose;
2106 } whatis_t;
2107 
2108 /* nicely report pointers as offsets from a base */
2109 static void
2110 whatis_report_pointer(uintptr_t addr, uintptr_t base, const char *description)
2111 {
2112 	if (addr == base)
2113 		mdb_printf("%p is %s",
2114 		    addr, description);
2115 	else
2116 		mdb_printf("%p is %p+%p, %s",
2117 		    addr, base, addr - base, description);
2118 }
2119 
2120 /* call one of our dcmd functions with "-v" and the provided address */
2121 static void
2122 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2123 {
2124 	mdb_arg_t a;
2125 	a.a_type = MDB_TYPE_STRING;
2126 	a.a_un.a_str = "-v";
2127 
2128 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2129 }
2130 
2131 static void
2132 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
2133 {
2134 	const kmem_cache_t *cp = w->w_cache;
2135 	/* LINTED pointer cast may result in improper alignment */
2136 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2137 	intptr_t stat;
2138 	int call_printer;
2139 	int count = 0;
2140 	int i;
2141 	pc_t callers[16];
2142 
2143 	if (cp->cache_flags & KMF_REDZONE) {
2144 		kmem_buftag_t bt;
2145 
2146 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2147 			goto done;
2148 
2149 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2150 
2151 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2152 			goto done;
2153 
2154 		/*
2155 		 * provide the bufctl ptr if it has useful information
2156 		 */
2157 		if (baddr == 0 && (cp->cache_flags & KMF_AUDIT))
2158 			baddr = (uintptr_t)bt.bt_bufctl;
2159 
2160 		if (cp->cache_flags & KMF_LITE) {
2161 			count = w->w_kmem_lite_count;
2162 
2163 			if (count * sizeof (pc_t) > sizeof (callers))
2164 				count = 0;
2165 
2166 			if (count > 0 &&
2167 			    mdb_vread(callers, count * sizeof (pc_t),
2168 			    btaddr +
2169 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170 				count = 0;
2171 
2172 			/*
2173 			 * skip unused callers
2174 			 */
2175 			while (count > 0 && callers[count - 1] ==
2176 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
2177 				count--;
2178 		}
2179 	}
2180 
2181 done:
2182 	call_printer =
2183 	    (!w->w_quiet && baddr != 0 && (cp->cache_flags & KMF_AUDIT));
2184 
2185 	whatis_report_pointer(w->w_addr, addr, "");
2186 
2187 	if (baddr != 0 && !call_printer)
2188 		mdb_printf("bufctl %p ", baddr);
2189 
2190 	mdb_printf("%s from %s%s\n",
2191 	    (w->w_freemem == FALSE) ? "allocated" : "freed", cp->cache_name,
2192 	    (call_printer || (!w->w_quiet && count > 0)) ? ":" : "");
2193 
2194 	if (call_printer)
2195 		whatis_call_printer(bufctl, baddr);
2196 
2197 	if (!w->w_quiet && count > 0) {
2198 		mdb_inc_indent(8);
2199 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
2200 		    callers[0], (count != 1)? ", ":"\n");
2201 		for (i = 1; i < count; i++)
2202 			mdb_printf("%a%s", callers[i],
2203 			    (i + 1 < count)? ", ":"\n");
2204 		mdb_dec_indent(8);
2205 	}
2206 }
2207 
2208 /*ARGSUSED*/
2209 static int
2210 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
2211 {
2212 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2213 		return (WALK_NEXT);
2214 
2215 	whatis_print_kmem(addr, 0, w);
2216 	w->w_found++;
2217 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2218 }
2219 
2220 static int
2221 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
2222 {
2223 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
2224 		return (WALK_NEXT);
2225 
2226 	whatis_report_pointer(w->w_addr, vs->vs_start, "");
2227 
2228 	/*
2229 	 * If we're not printing it seperately, provide the vmem_seg
2230 	 * pointer if it has a stack trace.
2231 	 */
2232 	if (w->w_quiet && (w->w_bufctl == TRUE ||
2233 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2234 		mdb_printf("vmem_seg %p ", addr);
2235 	}
2236 
2237 	mdb_printf("%s from %s vmem arena%s\n",
2238 	    (w->w_freemem == FALSE) ? "allocated" : "freed", w->w_vmem->vm_name,
2239 	    !w->w_quiet ? ":" : "");
2240 
2241 	if (!w->w_quiet)
2242 		whatis_call_printer(vmem_seg, addr);
2243 
2244 	w->w_found++;
2245 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2246 }
2247 
2248 static int
2249 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
2250 {
2251 	const char *nm = vmem->vm_name;
2252 	w->w_vmem = vmem;
2253 	w->w_freemem = FALSE;
2254 
2255 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
2256 		return (WALK_NEXT);
2257 
2258 	if (w->w_verbose)
2259 		mdb_printf("Searching vmem arena %s...\n", nm);
2260 
2261 	if (mdb_pwalk("vmem_alloc",
2262 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2263 		mdb_warn("can't walk vmem seg for %p", addr);
2264 		return (WALK_NEXT);
2265 	}
2266 
2267 	if (w->w_found && w->w_all == FALSE)
2268 		return (WALK_DONE);
2269 
2270 	if (w->w_verbose)
2271 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
2272 
2273 	w->w_freemem = TRUE;
2274 
2275 	if (mdb_pwalk("vmem_free",
2276 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
2277 		mdb_warn("can't walk vmem seg for %p", addr);
2278 		return (WALK_NEXT);
2279 	}
2280 
2281 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2282 }
2283 
2284 /*ARGSUSED*/
2285 static int
2286 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
2287 {
2288 	uintptr_t addr;
2289 
2290 	if (bcp == NULL)
2291 		return (WALK_NEXT);
2292 
2293 	addr = (uintptr_t)bcp->bc_addr;
2294 
2295 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2296 		return (WALK_NEXT);
2297 
2298 	whatis_print_kmem(addr, baddr, w);
2299 	w->w_found++;
2300 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2301 }
2302 
2303 /*ARGSUSED*/
2304 static int
2305 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
2306 {
2307 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
2308 
2309 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
2310 		return (WALK_NEXT);
2311 
2312 	w->w_slab_found++;
2313 	return (WALK_DONE);
2314 }
2315 
2316 static int
2317 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2318 {
2319 	char *walk, *freewalk;
2320 	mdb_walk_cb_t func;
2321 	vmem_t *vmp = c->cache_arena;
2322 
2323 	if (((c->cache_flags & KMC_IDENTIFIER) != 0) ^ w->w_idspace)
2324 		return (WALK_NEXT);
2325 
2326 	/* For caches with auditing info, we always walk the bufctls */
2327 	if (w->w_bufctl || (c->cache_flags & KMF_AUDIT)) {
2328 		walk = "bufctl";
2329 		freewalk = "freectl";
2330 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2331 	} else {
2332 		walk = "kmem";
2333 		freewalk = "freemem";
2334 		func = (mdb_walk_cb_t)whatis_walk_kmem;
2335 	}
2336 
2337 	w->w_cache = c;
2338 
2339 	if (w->w_verbose)
2340 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
2341 
2342 	/*
2343 	 * Verify that the address is in one of the cache's slabs.  If not,
2344 	 * we can skip the more expensive walkers.  (this is purely a
2345 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
2346 	 *
2347 	 * We try to get the cache's arena's quantum, since to accurately
2348 	 * get the base of a slab, you have to align it to the quantum.  If
2349 	 * it doesn't look sensible, we fall back to not aligning.
2350 	 */
2351 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
2352 	    (uintptr_t)&vmp->vm_quantum) == -1) {
2353 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
2354 		w->w_slab_align = 1;
2355 	}
2356 
2357 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
2358 	    (w->w_slab_align & (w->w_slab_align - 1))) {
2359 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
2360 		    w->w_slab_align);
2361 		w->w_slab_align = 1;
2362 	}
2363 
2364 	w->w_slab_found = 0;
2365 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
2366 	    addr) == -1) {
2367 		mdb_warn("can't find kmem_slab walker");
2368 		return (WALK_DONE);
2369 	}
2370 	if (w->w_slab_found == 0)
2371 		return (WALK_NEXT);
2372 
2373 	if (c->cache_flags & KMF_LITE) {
2374 		if (mdb_readvar(&w->w_kmem_lite_count,
2375 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
2376 			w->w_kmem_lite_count = 0;
2377 	}
2378 
2379 	if (w->w_verbose)
2380 		mdb_printf("Searching %s...\n", c->cache_name);
2381 
2382 	w->w_freemem = FALSE;
2383 
2384 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2385 		mdb_warn("can't find %s walker", walk);
2386 		return (WALK_DONE);
2387 	}
2388 
2389 	if (w->w_found && w->w_all == FALSE)
2390 		return (WALK_DONE);
2391 
2392 	/*
2393 	 * We have searched for allocated memory; now search for freed memory.
2394 	 */
2395 	if (w->w_verbose)
2396 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2397 
2398 	w->w_freemem = TRUE;
2399 
2400 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2401 		mdb_warn("can't find %s walker", freewalk);
2402 		return (WALK_DONE);
2403 	}
2404 
2405 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2406 }
2407 
2408 static int
2409 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2410 {
2411 	if (c->cache_cflags & KMC_NOTOUCH)
2412 		return (WALK_NEXT);
2413 
2414 	return (whatis_walk_cache(addr, c, w));
2415 }
2416 
2417 static int
2418 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
2419 {
2420 	if (!(c->cache_cflags & KMC_NOTOUCH))
2421 		return (WALK_NEXT);
2422 
2423 	return (whatis_walk_cache(addr, c, w));
2424 }
2425 
2426 static int
2427 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
2428 {
2429 	/*
2430 	 * Often, one calls ::whatis on an address from a thread structure.
2431 	 * We use this opportunity to short circuit this case...
2432 	 */
2433 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
2434 		whatis_report_pointer(w->w_addr, addr,
2435 		    "allocated as a thread structure\n");
2436 		w->w_found++;
2437 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2438 	}
2439 
2440 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
2441 	    w->w_addr > (uintptr_t)t->t_stk)
2442 		return (WALK_NEXT);
2443 
2444 	if (t->t_stkbase == NULL)
2445 		return (WALK_NEXT);
2446 
2447 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
2448 	    stack_active(t, w->w_addr));
2449 
2450 	w->w_found++;
2451 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2452 }
2453 
2454 static int
2455 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
2456 {
2457 	struct module mod;
2458 	char name[MODMAXNAMELEN], *where;
2459 	Shdr shdr;
2460 	GElf_Sym sym;
2461 
2462 	if (m->mod_mp == NULL)
2463 		return (WALK_NEXT);
2464 
2465 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2466 		mdb_warn("couldn't read modctl %p's module", addr);
2467 		return (WALK_NEXT);
2468 	}
2469 
2470 	if (w->w_addr >= (uintptr_t)mod.text &&
2471 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
2472 		where = "text segment";
2473 		goto found;
2474 	}
2475 
2476 	if (w->w_addr >= (uintptr_t)mod.data &&
2477 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
2478 		where = "data segment";
2479 		goto found;
2480 	}
2481 
2482 	if (w->w_addr >= (uintptr_t)mod.bss &&
2483 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
2484 		where = "bss";
2485 		goto found;
2486 	}
2487 
2488 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2489 		mdb_warn("couldn't read symbol header for %p's module", addr);
2490 		return (WALK_NEXT);
2491 	}
2492 
2493 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
2494 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
2495 		where = "symtab";
2496 		goto found;
2497 	}
2498 
2499 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2500 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2501 		where = "symspace";
2502 		goto found;
2503 	}
2504 
2505 	return (WALK_NEXT);
2506 
2507 found:
2508 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2509 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2510 
2511 	mdb_printf("%p is ", w->w_addr);
2512 
2513 	/*
2514 	 * If we found this address in a module, then there's a chance that
2515 	 * it's actually a named symbol.  Try the symbol lookup.
2516 	 */
2517 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, NULL, 0, &sym) != -1 &&
2518 	    (w->w_addr - (uintptr_t)sym.st_value) < sym.st_size) {
2519 		mdb_printf("%a, ", w->w_addr);
2520 	}
2521 
2522 	mdb_printf("in %s's %s\n", name, where);
2523 
2524 	w->w_found++;
2525 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2526 }
2527 
2528 /*ARGSUSED*/
2529 static int
2530 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2531 {
2532 	static int machsize = 0;
2533 	mdb_ctf_id_t id;
2534 
2535 	if (machsize == 0) {
2536 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2537 			machsize = mdb_ctf_type_size(id);
2538 		else {
2539 			mdb_warn("could not get size of page_t");
2540 			machsize = sizeof (page_t);
2541 		}
2542 	}
2543 
2544 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2545 		return (WALK_NEXT);
2546 
2547 	whatis_report_pointer(w->w_addr, addr,
2548 	    "allocated as a page structure\n");
2549 
2550 	w->w_found++;
2551 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2552 }
2553 
2554 int
2555 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2556 {
2557 	whatis_t w;
2558 
2559 	if (!(flags & DCMD_ADDRSPEC))
2560 		return (DCMD_USAGE);
2561 
2562 	w.w_all = FALSE;
2563 	w.w_bufctl = FALSE;
2564 	w.w_idspace = FALSE;
2565 	w.w_quiet = FALSE;
2566 	w.w_verbose = FALSE;
2567 
2568 	if (mdb_getopts(argc, argv,
2569 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2570 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl,
2571 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2572 	    'q', MDB_OPT_SETBITS, TRUE, &w.w_quiet,
2573 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2574 	    NULL) != argc)
2575 		return (DCMD_USAGE);
2576 
2577 	w.w_addr = addr;
2578 	w.w_found = 0;
2579 
2580 	if (w.w_verbose)
2581 		mdb_printf("Searching modules...\n");
2582 
2583 	if (!w.w_idspace) {
2584 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2585 		    == -1) {
2586 			mdb_warn("couldn't find modctl walker");
2587 			return (DCMD_ERR);
2588 		}
2589 
2590 		if (w.w_found && w.w_all == FALSE)
2591 			return (DCMD_OK);
2592 
2593 		/*
2594 		 * Now search all thread stacks.  Yes, this is a little weak; we
2595 		 * can save a lot of work by first checking to see if the
2596 		 * address is in segkp vs. segkmem.  But hey, computers are
2597 		 * fast.
2598 		 */
2599 		if (w.w_verbose)
2600 			mdb_printf("Searching threads...\n");
2601 
2602 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2603 		    == -1) {
2604 			mdb_warn("couldn't find thread walker");
2605 			return (DCMD_ERR);
2606 		}
2607 
2608 		if (w.w_found && w.w_all == FALSE)
2609 			return (DCMD_OK);
2610 
2611 		if (w.w_verbose)
2612 			mdb_printf("Searching page structures...\n");
2613 
2614 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2615 		    == -1) {
2616 			mdb_warn("couldn't find page walker");
2617 			return (DCMD_ERR);
2618 		}
2619 
2620 		if (w.w_found && w.w_all == FALSE)
2621 			return (DCMD_OK);
2622 	}
2623 
2624 	if (mdb_walk("kmem_cache",
2625 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2626 		mdb_warn("couldn't find kmem_cache walker");
2627 		return (DCMD_ERR);
2628 	}
2629 
2630 	if (w.w_found && w.w_all == FALSE)
2631 		return (DCMD_OK);
2632 
2633 	if (mdb_walk("kmem_cache",
2634 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2635 		mdb_warn("couldn't find kmem_cache walker");
2636 		return (DCMD_ERR);
2637 	}
2638 
2639 	if (w.w_found && w.w_all == FALSE)
2640 		return (DCMD_OK);
2641 
2642 	if (mdb_walk("vmem_postfix",
2643 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2644 		mdb_warn("couldn't find vmem_postfix walker");
2645 		return (DCMD_ERR);
2646 	}
2647 
2648 	if (w.w_found == 0)
2649 		mdb_printf("%p is unknown\n", addr);
2650 
2651 	return (DCMD_OK);
2652 }
2653 
2654 void
2655 whatis_help(void)
2656 {
2657 	mdb_printf(
2658 	    "Given a virtual address, attempt to determine where it came\n"
2659 	    "from.\n"
2660 	    "\n"
2661 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2662 	    "\t\tthe first (most specific) source.\n"
2663 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2664 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2665 	    "\t\thave a bufctl, it will not be reported.\n"
2666 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2667 	    "\t\tthese are ignored.\n"
2668 	    "\t-q\tDon't print multi-line reports (stack traces, etc.)\n"
2669 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2670 	    "\t\tsearched\n");
2671 }
2672 
2673 typedef struct kmem_log_cpu {
2674 	uintptr_t kmc_low;
2675 	uintptr_t kmc_high;
2676 } kmem_log_cpu_t;
2677 
2678 typedef struct kmem_log_data {
2679 	uintptr_t kmd_addr;
2680 	kmem_log_cpu_t *kmd_cpu;
2681 } kmem_log_data_t;
2682 
2683 int
2684 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2685     kmem_log_data_t *kmd)
2686 {
2687 	int i;
2688 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2689 	size_t bufsize;
2690 
2691 	for (i = 0; i < NCPU; i++) {
2692 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2693 			break;
2694 	}
2695 
2696 	if (kmd->kmd_addr) {
2697 		if (b->bc_cache == NULL)
2698 			return (WALK_NEXT);
2699 
2700 		if (mdb_vread(&bufsize, sizeof (bufsize),
2701 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2702 			mdb_warn(
2703 			    "failed to read cache_bufsize for cache at %p",
2704 			    b->bc_cache);
2705 			return (WALK_ERR);
2706 		}
2707 
2708 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2709 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2710 			return (WALK_NEXT);
2711 	}
2712 
2713 	if (i == NCPU)
2714 		mdb_printf("   ");
2715 	else
2716 		mdb_printf("%3d", i);
2717 
2718 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2719 	    b->bc_timestamp, b->bc_thread);
2720 
2721 	return (WALK_NEXT);
2722 }
2723 
2724 /*ARGSUSED*/
2725 int
2726 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2727 {
2728 	kmem_log_header_t lh;
2729 	kmem_cpu_log_header_t clh;
2730 	uintptr_t lhp, clhp;
2731 	int ncpus;
2732 	uintptr_t *cpu;
2733 	GElf_Sym sym;
2734 	kmem_log_cpu_t *kmc;
2735 	int i;
2736 	kmem_log_data_t kmd;
2737 	uint_t opt_b = FALSE;
2738 
2739 	if (mdb_getopts(argc, argv,
2740 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2741 		return (DCMD_USAGE);
2742 
2743 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2744 		mdb_warn("failed to read 'kmem_transaction_log'");
2745 		return (DCMD_ERR);
2746 	}
2747 
2748 	if (lhp == NULL) {
2749 		mdb_warn("no kmem transaction log\n");
2750 		return (DCMD_ERR);
2751 	}
2752 
2753 	mdb_readvar(&ncpus, "ncpus");
2754 
2755 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2756 		mdb_warn("failed to read log header at %p", lhp);
2757 		return (DCMD_ERR);
2758 	}
2759 
2760 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2761 
2762 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2763 
2764 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2765 		mdb_warn("couldn't find 'cpu' array");
2766 		return (DCMD_ERR);
2767 	}
2768 
2769 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2770 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2771 		    NCPU * sizeof (uintptr_t), sym.st_size);
2772 		return (DCMD_ERR);
2773 	}
2774 
2775 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2776 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2777 		return (DCMD_ERR);
2778 	}
2779 
2780 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2781 	kmd.kmd_addr = NULL;
2782 	kmd.kmd_cpu = kmc;
2783 
2784 	for (i = 0; i < NCPU; i++) {
2785 
2786 		if (cpu[i] == NULL)
2787 			continue;
2788 
2789 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2790 			mdb_warn("cannot read cpu %d's log header at %p",
2791 			    i, clhp);
2792 			return (DCMD_ERR);
2793 		}
2794 
2795 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2796 		    (uintptr_t)lh.lh_base;
2797 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2798 
2799 		clhp += sizeof (kmem_cpu_log_header_t);
2800 	}
2801 
2802 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2803 	    "TIMESTAMP", "THREAD");
2804 
2805 	/*
2806 	 * If we have been passed an address, print out only log entries
2807 	 * corresponding to that address.  If opt_b is specified, then interpret
2808 	 * the address as a bufctl.
2809 	 */
2810 	if (flags & DCMD_ADDRSPEC) {
2811 		kmem_bufctl_audit_t b;
2812 
2813 		if (opt_b) {
2814 			kmd.kmd_addr = addr;
2815 		} else {
2816 			if (mdb_vread(&b,
2817 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2818 				mdb_warn("failed to read bufctl at %p", addr);
2819 				return (DCMD_ERR);
2820 			}
2821 
2822 			(void) kmem_log_walk(addr, &b, &kmd);
2823 
2824 			return (DCMD_OK);
2825 		}
2826 	}
2827 
2828 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2829 		mdb_warn("can't find kmem log walker");
2830 		return (DCMD_ERR);
2831 	}
2832 
2833 	return (DCMD_OK);
2834 }
2835 
2836 typedef struct bufctl_history_cb {
2837 	int		bhc_flags;
2838 	int		bhc_argc;
2839 	const mdb_arg_t	*bhc_argv;
2840 	int		bhc_ret;
2841 } bufctl_history_cb_t;
2842 
2843 /*ARGSUSED*/
2844 static int
2845 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2846 {
2847 	bufctl_history_cb_t *bhc = arg;
2848 
2849 	bhc->bhc_ret =
2850 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2851 
2852 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2853 
2854 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2855 }
2856 
2857 void
2858 bufctl_help(void)
2859 {
2860 	mdb_printf("%s",
2861 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2862 	mdb_dec_indent(2);
2863 	mdb_printf("%<b>OPTIONS%</b>\n");
2864 	mdb_inc_indent(2);
2865 	mdb_printf("%s",
2866 "  -v    Display the full content of the bufctl, including its stack trace\n"
2867 "  -h    retrieve the bufctl's transaction history, if available\n"
2868 "  -a addr\n"
2869 "        filter out bufctls not involving the buffer at addr\n"
2870 "  -c caller\n"
2871 "        filter out bufctls without the function/PC in their stack trace\n"
2872 "  -e earliest\n"
2873 "        filter out bufctls timestamped before earliest\n"
2874 "  -l latest\n"
2875 "        filter out bufctls timestamped after latest\n"
2876 "  -t thread\n"
2877 "        filter out bufctls not involving thread\n");
2878 }
2879 
2880 int
2881 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2882 {
2883 	kmem_bufctl_audit_t bc;
2884 	uint_t verbose = FALSE;
2885 	uint_t history = FALSE;
2886 	uint_t in_history = FALSE;
2887 	uintptr_t caller = NULL, thread = NULL;
2888 	uintptr_t laddr, haddr, baddr = NULL;
2889 	hrtime_t earliest = 0, latest = 0;
2890 	int i, depth;
2891 	char c[MDB_SYM_NAMLEN];
2892 	GElf_Sym sym;
2893 
2894 	if (mdb_getopts(argc, argv,
2895 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2896 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2897 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2898 	    'c', MDB_OPT_UINTPTR, &caller,
2899 	    't', MDB_OPT_UINTPTR, &thread,
2900 	    'e', MDB_OPT_UINT64, &earliest,
2901 	    'l', MDB_OPT_UINT64, &latest,
2902 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2903 		return (DCMD_USAGE);
2904 
2905 	if (!(flags & DCMD_ADDRSPEC))
2906 		return (DCMD_USAGE);
2907 
2908 	if (in_history && !history)
2909 		return (DCMD_USAGE);
2910 
2911 	if (history && !in_history) {
2912 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2913 		    UM_SLEEP | UM_GC);
2914 		bufctl_history_cb_t bhc;
2915 
2916 		nargv[0].a_type = MDB_TYPE_STRING;
2917 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2918 
2919 		for (i = 0; i < argc; i++)
2920 			nargv[i + 1] = argv[i];
2921 
2922 		/*
2923 		 * When in history mode, we treat each element as if it
2924 		 * were in a seperate loop, so that the headers group
2925 		 * bufctls with similar histories.
2926 		 */
2927 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2928 		bhc.bhc_argc = argc + 1;
2929 		bhc.bhc_argv = nargv;
2930 		bhc.bhc_ret = DCMD_OK;
2931 
2932 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2933 		    addr) == -1) {
2934 			mdb_warn("unable to walk bufctl_history");
2935 			return (DCMD_ERR);
2936 		}
2937 
2938 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2939 			mdb_printf("\n");
2940 
2941 		return (bhc.bhc_ret);
2942 	}
2943 
2944 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2945 		if (verbose) {
2946 			mdb_printf("%16s %16s %16s %16s\n"
2947 			    "%<u>%16s %16s %16s %16s%</u>\n",
2948 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2949 			    "", "CACHE", "LASTLOG", "CONTENTS");
2950 		} else {
2951 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2952 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2953 		}
2954 	}
2955 
2956 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2957 		mdb_warn("couldn't read bufctl at %p", addr);
2958 		return (DCMD_ERR);
2959 	}
2960 
2961 	/*
2962 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2963 	 * the address does not really refer to a bufctl.
2964 	 */
2965 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2966 
2967 	if (caller != NULL) {
2968 		laddr = caller;
2969 		haddr = caller + sizeof (caller);
2970 
2971 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2972 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2973 			/*
2974 			 * We were provided an exact symbol value; any
2975 			 * address in the function is valid.
2976 			 */
2977 			laddr = (uintptr_t)sym.st_value;
2978 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2979 		}
2980 
2981 		for (i = 0; i < depth; i++)
2982 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2983 				break;
2984 
2985 		if (i == depth)
2986 			return (DCMD_OK);
2987 	}
2988 
2989 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2990 		return (DCMD_OK);
2991 
2992 	if (earliest != 0 && bc.bc_timestamp < earliest)
2993 		return (DCMD_OK);
2994 
2995 	if (latest != 0 && bc.bc_timestamp > latest)
2996 		return (DCMD_OK);
2997 
2998 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2999 		return (DCMD_OK);
3000 
3001 	if (flags & DCMD_PIPE_OUT) {
3002 		mdb_printf("%#lr\n", addr);
3003 		return (DCMD_OK);
3004 	}
3005 
3006 	if (verbose) {
3007 		mdb_printf(
3008 		    "%<b>%16p%</b> %16p %16llx %16p\n"
3009 		    "%16s %16p %16p %16p\n",
3010 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
3011 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
3012 
3013 		mdb_inc_indent(17);
3014 		for (i = 0; i < depth; i++)
3015 			mdb_printf("%a\n", bc.bc_stack[i]);
3016 		mdb_dec_indent(17);
3017 		mdb_printf("\n");
3018 	} else {
3019 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
3020 		    bc.bc_timestamp, bc.bc_thread);
3021 
3022 		for (i = 0; i < depth; i++) {
3023 			if (mdb_lookup_by_addr(bc.bc_stack[i],
3024 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
3025 				continue;
3026 			if (strncmp(c, "kmem_", 5) == 0)
3027 				continue;
3028 			mdb_printf(" %a\n", bc.bc_stack[i]);
3029 			break;
3030 		}
3031 
3032 		if (i >= depth)
3033 			mdb_printf("\n");
3034 	}
3035 
3036 	return (DCMD_OK);
3037 }
3038 
3039 typedef struct kmem_verify {
3040 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
3041 	size_t kmv_size;		/* number of bytes in kmv_buf */
3042 	int kmv_corruption;		/* > 0 if corruption found. */
3043 	int kmv_besilent;		/* report actual corruption sites */
3044 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
3045 } kmem_verify_t;
3046 
3047 /*
3048  * verify_pattern()
3049  * 	verify that buf is filled with the pattern pat.
3050  */
3051 static int64_t
3052 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3053 {
3054 	/*LINTED*/
3055 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3056 	uint64_t *buf;
3057 
3058 	for (buf = buf_arg; buf < bufend; buf++)
3059 		if (*buf != pat)
3060 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
3061 	return (-1);
3062 }
3063 
3064 /*
3065  * verify_buftag()
3066  *	verify that btp->bt_bxstat == (bcp ^ pat)
3067  */
3068 static int
3069 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3070 {
3071 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3072 }
3073 
3074 /*
3075  * verify_free()
3076  * 	verify the integrity of a free block of memory by checking
3077  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
3078  */
3079 /*ARGSUSED1*/
3080 static int
3081 verify_free(uintptr_t addr, const void *data, void *private)
3082 {
3083 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3084 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3085 	int64_t corrupt;		/* corruption offset */
3086 	kmem_buftag_t *buftagp;		/* ptr to buftag */
3087 	kmem_cache_t *cp = &kmv->kmv_cache;
3088 	int besilent = kmv->kmv_besilent;
3089 
3090 	/*LINTED*/
3091 	buftagp = KMEM_BUFTAG(cp, buf);
3092 
3093 	/*
3094 	 * Read the buffer to check.
3095 	 */
3096 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3097 		if (!besilent)
3098 			mdb_warn("couldn't read %p", addr);
3099 		return (WALK_NEXT);
3100 	}
3101 
3102 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
3103 	    KMEM_FREE_PATTERN)) >= 0) {
3104 		if (!besilent)
3105 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3106 			    addr, (uintptr_t)addr + corrupt);
3107 		goto corrupt;
3108 	}
3109 	/*
3110 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3111 	 * the first bytes of the buffer, hence we cannot check for red
3112 	 * zone corruption.
3113 	 */
3114 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3115 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3116 		if (!besilent)
3117 			mdb_printf("buffer %p (free) seems to "
3118 			    "have a corrupt redzone pattern\n", addr);
3119 		goto corrupt;
3120 	}
3121 
3122 	/*
3123 	 * confirm bufctl pointer integrity.
3124 	 */
3125 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3126 		if (!besilent)
3127 			mdb_printf("buffer %p (free) has a corrupt "
3128 			    "buftag\n", addr);
3129 		goto corrupt;
3130 	}
3131 
3132 	return (WALK_NEXT);
3133 corrupt:
3134 	kmv->kmv_corruption++;
3135 	return (WALK_NEXT);
3136 }
3137 
3138 /*
3139  * verify_alloc()
3140  * 	Verify that the buftag of an allocated buffer makes sense with respect
3141  * 	to the buffer.
3142  */
3143 /*ARGSUSED1*/
3144 static int
3145 verify_alloc(uintptr_t addr, const void *data, void *private)
3146 {
3147 	kmem_verify_t *kmv = (kmem_verify_t *)private;
3148 	kmem_cache_t *cp = &kmv->kmv_cache;
3149 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
3150 	/*LINTED*/
3151 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3152 	uint32_t *ip = (uint32_t *)buftagp;
3153 	uint8_t *bp = (uint8_t *)buf;
3154 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
3155 	int besilent = kmv->kmv_besilent;
3156 
3157 	/*
3158 	 * Read the buffer to check.
3159 	 */
3160 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3161 		if (!besilent)
3162 			mdb_warn("couldn't read %p", addr);
3163 		return (WALK_NEXT);
3164 	}
3165 
3166 	/*
3167 	 * There are two cases to handle:
3168 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3169 	 *    0xfeedfacefeedface at the end of it
3170 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
3171 	 *    0xbb just past the end of the region in use.  At the buftag,
3172 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
3173 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3174 	 *    endianness), followed by 32 bits containing the offset of the
3175 	 *    0xbb byte in the buffer.
3176 	 *
3177 	 * Finally, the two 32-bit words that comprise the second half of the
3178 	 * buftag should xor to KMEM_BUFTAG_ALLOC
3179 	 */
3180 
3181 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3182 		looks_ok = 1;
3183 	else if (!KMEM_SIZE_VALID(ip[1]))
3184 		size_ok = 0;
3185 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3186 		looks_ok = 1;
3187 	else
3188 		size_ok = 0;
3189 
3190 	if (!size_ok) {
3191 		if (!besilent)
3192 			mdb_printf("buffer %p (allocated) has a corrupt "
3193 			    "redzone size encoding\n", addr);
3194 		goto corrupt;
3195 	}
3196 
3197 	if (!looks_ok) {
3198 		if (!besilent)
3199 			mdb_printf("buffer %p (allocated) has a corrupt "
3200 			    "redzone signature\n", addr);
3201 		goto corrupt;
3202 	}
3203 
3204 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3205 		if (!besilent)
3206 			mdb_printf("buffer %p (allocated) has a "
3207 			    "corrupt buftag\n", addr);
3208 		goto corrupt;
3209 	}
3210 
3211 	return (WALK_NEXT);
3212 corrupt:
3213 	kmv->kmv_corruption++;
3214 	return (WALK_NEXT);
3215 }
3216 
3217 /*ARGSUSED2*/
3218 int
3219 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3220 {
3221 	if (flags & DCMD_ADDRSPEC) {
3222 		int check_alloc = 0, check_free = 0;
3223 		kmem_verify_t kmv;
3224 
3225 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3226 		    addr) == -1) {
3227 			mdb_warn("couldn't read kmem_cache %p", addr);
3228 			return (DCMD_ERR);
3229 		}
3230 
3231 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3232 		    sizeof (kmem_buftag_t);
3233 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3234 		kmv.kmv_corruption = 0;
3235 
3236 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3237 			check_alloc = 1;
3238 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3239 				check_free = 1;
3240 		} else {
3241 			if (!(flags & DCMD_LOOP)) {
3242 				mdb_warn("cache %p (%s) does not have "
3243 				    "redzone checking enabled\n", addr,
3244 				    kmv.kmv_cache.cache_name);
3245 			}
3246 			return (DCMD_ERR);
3247 		}
3248 
3249 		if (flags & DCMD_LOOP) {
3250 			/*
3251 			 * table mode, don't print out every corrupt buffer
3252 			 */
3253 			kmv.kmv_besilent = 1;
3254 		} else {
3255 			mdb_printf("Summary for cache '%s'\n",
3256 			    kmv.kmv_cache.cache_name);
3257 			mdb_inc_indent(2);
3258 			kmv.kmv_besilent = 0;
3259 		}
3260 
3261 		if (check_alloc)
3262 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3263 		if (check_free)
3264 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3265 
3266 		if (flags & DCMD_LOOP) {
3267 			if (kmv.kmv_corruption == 0) {
3268 				mdb_printf("%-*s %?p clean\n",
3269 				    KMEM_CACHE_NAMELEN,
3270 				    kmv.kmv_cache.cache_name, addr);
3271 			} else {
3272 				char *s = "";	/* optional s in "buffer[s]" */
3273 				if (kmv.kmv_corruption > 1)
3274 					s = "s";
3275 
3276 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3277 				    KMEM_CACHE_NAMELEN,
3278 				    kmv.kmv_cache.cache_name, addr,
3279 				    kmv.kmv_corruption, s);
3280 			}
3281 		} else {
3282 			/*
3283 			 * This is the more verbose mode, when the user has
3284 			 * type addr::kmem_verify.  If the cache was clean,
3285 			 * nothing will have yet been printed. So say something.
3286 			 */
3287 			if (kmv.kmv_corruption == 0)
3288 				mdb_printf("clean\n");
3289 
3290 			mdb_dec_indent(2);
3291 		}
3292 	} else {
3293 		/*
3294 		 * If the user didn't specify a cache to verify, we'll walk all
3295 		 * kmem_cache's, specifying ourself as a callback for each...
3296 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3297 		 */
3298 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3299 		    "Cache Name", "Addr", "Cache Integrity");
3300 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3301 	}
3302 
3303 	return (DCMD_OK);
3304 }
3305 
3306 typedef struct vmem_node {
3307 	struct vmem_node *vn_next;
3308 	struct vmem_node *vn_parent;
3309 	struct vmem_node *vn_sibling;
3310 	struct vmem_node *vn_children;
3311 	uintptr_t vn_addr;
3312 	int vn_marked;
3313 	vmem_t vn_vmem;
3314 } vmem_node_t;
3315 
3316 typedef struct vmem_walk {
3317 	vmem_node_t *vw_root;
3318 	vmem_node_t *vw_current;
3319 } vmem_walk_t;
3320 
3321 int
3322 vmem_walk_init(mdb_walk_state_t *wsp)
3323 {
3324 	uintptr_t vaddr, paddr;
3325 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3326 	vmem_walk_t *vw;
3327 
3328 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3329 		mdb_warn("couldn't read 'vmem_list'");
3330 		return (WALK_ERR);
3331 	}
3332 
3333 	while (vaddr != NULL) {
3334 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3335 		vp->vn_addr = vaddr;
3336 		vp->vn_next = head;
3337 		head = vp;
3338 
3339 		if (vaddr == wsp->walk_addr)
3340 			current = vp;
3341 
3342 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3343 			mdb_warn("couldn't read vmem_t at %p", vaddr);
3344 			goto err;
3345 		}
3346 
3347 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3348 	}
3349 
3350 	for (vp = head; vp != NULL; vp = vp->vn_next) {
3351 
3352 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3353 			vp->vn_sibling = root;
3354 			root = vp;
3355 			continue;
3356 		}
3357 
3358 		for (parent = head; parent != NULL; parent = parent->vn_next) {
3359 			if (parent->vn_addr != paddr)
3360 				continue;
3361 			vp->vn_sibling = parent->vn_children;
3362 			parent->vn_children = vp;
3363 			vp->vn_parent = parent;
3364 			break;
3365 		}
3366 
3367 		if (parent == NULL) {
3368 			mdb_warn("couldn't find %p's parent (%p)\n",
3369 			    vp->vn_addr, paddr);
3370 			goto err;
3371 		}
3372 	}
3373 
3374 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3375 	vw->vw_root = root;
3376 
3377 	if (current != NULL)
3378 		vw->vw_current = current;
3379 	else
3380 		vw->vw_current = root;
3381 
3382 	wsp->walk_data = vw;
3383 	return (WALK_NEXT);
3384 err:
3385 	for (vp = head; head != NULL; vp = head) {
3386 		head = vp->vn_next;
3387 		mdb_free(vp, sizeof (vmem_node_t));
3388 	}
3389 
3390 	return (WALK_ERR);
3391 }
3392 
3393 int
3394 vmem_walk_step(mdb_walk_state_t *wsp)
3395 {
3396 	vmem_walk_t *vw = wsp->walk_data;
3397 	vmem_node_t *vp;
3398 	int rval;
3399 
3400 	if ((vp = vw->vw_current) == NULL)
3401 		return (WALK_DONE);
3402 
3403 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3404 
3405 	if (vp->vn_children != NULL) {
3406 		vw->vw_current = vp->vn_children;
3407 		return (rval);
3408 	}
3409 
3410 	do {
3411 		vw->vw_current = vp->vn_sibling;
3412 		vp = vp->vn_parent;
3413 	} while (vw->vw_current == NULL && vp != NULL);
3414 
3415 	return (rval);
3416 }
3417 
3418 /*
3419  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3420  * children are visited before their parent.  We perform the postfix walk
3421  * iteratively (rather than recursively) to allow mdb to regain control
3422  * after each callback.
3423  */
3424 int
3425 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3426 {
3427 	vmem_walk_t *vw = wsp->walk_data;
3428 	vmem_node_t *vp = vw->vw_current;
3429 	int rval;
3430 
3431 	/*
3432 	 * If this node is marked, then we know that we have already visited
3433 	 * all of its children.  If the node has any siblings, they need to
3434 	 * be visited next; otherwise, we need to visit the parent.  Note
3435 	 * that vp->vn_marked will only be zero on the first invocation of
3436 	 * the step function.
3437 	 */
3438 	if (vp->vn_marked) {
3439 		if (vp->vn_sibling != NULL)
3440 			vp = vp->vn_sibling;
3441 		else if (vp->vn_parent != NULL)
3442 			vp = vp->vn_parent;
3443 		else {
3444 			/*
3445 			 * We have neither a parent, nor a sibling, and we
3446 			 * have already been visited; we're done.
3447 			 */
3448 			return (WALK_DONE);
3449 		}
3450 	}
3451 
3452 	/*
3453 	 * Before we visit this node, visit its children.
3454 	 */
3455 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3456 		vp = vp->vn_children;
3457 
3458 	vp->vn_marked = 1;
3459 	vw->vw_current = vp;
3460 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3461 
3462 	return (rval);
3463 }
3464 
3465 void
3466 vmem_walk_fini(mdb_walk_state_t *wsp)
3467 {
3468 	vmem_walk_t *vw = wsp->walk_data;
3469 	vmem_node_t *root = vw->vw_root;
3470 	int done;
3471 
3472 	if (root == NULL)
3473 		return;
3474 
3475 	if ((vw->vw_root = root->vn_children) != NULL)
3476 		vmem_walk_fini(wsp);
3477 
3478 	vw->vw_root = root->vn_sibling;
3479 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3480 	mdb_free(root, sizeof (vmem_node_t));
3481 
3482 	if (done) {
3483 		mdb_free(vw, sizeof (vmem_walk_t));
3484 	} else {
3485 		vmem_walk_fini(wsp);
3486 	}
3487 }
3488 
3489 typedef struct vmem_seg_walk {
3490 	uint8_t vsw_type;
3491 	uintptr_t vsw_start;
3492 	uintptr_t vsw_current;
3493 } vmem_seg_walk_t;
3494 
3495 /*ARGSUSED*/
3496 int
3497 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3498 {
3499 	vmem_seg_walk_t *vsw;
3500 
3501 	if (wsp->walk_addr == NULL) {
3502 		mdb_warn("vmem_%s does not support global walks\n", name);
3503 		return (WALK_ERR);
3504 	}
3505 
3506 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3507 
3508 	vsw->vsw_type = type;
3509 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3510 	vsw->vsw_current = vsw->vsw_start;
3511 
3512 	return (WALK_NEXT);
3513 }
3514 
3515 /*
3516  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3517  */
3518 #define	VMEM_NONE	0
3519 
3520 int
3521 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3522 {
3523 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3524 }
3525 
3526 int
3527 vmem_free_walk_init(mdb_walk_state_t *wsp)
3528 {
3529 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3530 }
3531 
3532 int
3533 vmem_span_walk_init(mdb_walk_state_t *wsp)
3534 {
3535 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3536 }
3537 
3538 int
3539 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3540 {
3541 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3542 }
3543 
3544 int
3545 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3546 {
3547 	vmem_seg_t seg;
3548 	vmem_seg_walk_t *vsw = wsp->walk_data;
3549 	uintptr_t addr = vsw->vsw_current;
3550 	static size_t seg_size = 0;
3551 	int rval;
3552 
3553 	if (!seg_size) {
3554 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3555 			mdb_warn("failed to read 'vmem_seg_size'");
3556 			seg_size = sizeof (vmem_seg_t);
3557 		}
3558 	}
3559 
3560 	if (seg_size < sizeof (seg))
3561 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3562 
3563 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3564 		mdb_warn("couldn't read vmem_seg at %p", addr);
3565 		return (WALK_ERR);
3566 	}
3567 
3568 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3569 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3570 		rval = WALK_NEXT;
3571 	} else {
3572 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3573 	}
3574 
3575 	if (vsw->vsw_current == vsw->vsw_start)
3576 		return (WALK_DONE);
3577 
3578 	return (rval);
3579 }
3580 
3581 void
3582 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3583 {
3584 	vmem_seg_walk_t *vsw = wsp->walk_data;
3585 
3586 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3587 }
3588 
3589 #define	VMEM_NAMEWIDTH	22
3590 
3591 int
3592 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3593 {
3594 	vmem_t v, parent;
3595 	vmem_kstat_t *vkp = &v.vm_kstat;
3596 	uintptr_t paddr;
3597 	int ident = 0;
3598 	char c[VMEM_NAMEWIDTH];
3599 
3600 	if (!(flags & DCMD_ADDRSPEC)) {
3601 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3602 			mdb_warn("can't walk vmem");
3603 			return (DCMD_ERR);
3604 		}
3605 		return (DCMD_OK);
3606 	}
3607 
3608 	if (DCMD_HDRSPEC(flags))
3609 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3610 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3611 		    "TOTAL", "SUCCEED", "FAIL");
3612 
3613 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3614 		mdb_warn("couldn't read vmem at %p", addr);
3615 		return (DCMD_ERR);
3616 	}
3617 
3618 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3619 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3620 			mdb_warn("couldn't trace %p's ancestry", addr);
3621 			ident = 0;
3622 			break;
3623 		}
3624 		paddr = (uintptr_t)parent.vm_source;
3625 	}
3626 
3627 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3628 
3629 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3630 	    addr, VMEM_NAMEWIDTH, c,
3631 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3632 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3633 
3634 	return (DCMD_OK);
3635 }
3636 
3637 void
3638 vmem_seg_help(void)
3639 {
3640 	mdb_printf("%s",
3641 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3642 "\n"
3643 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3644 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3645 "information.\n");
3646 	mdb_dec_indent(2);
3647 	mdb_printf("%<b>OPTIONS%</b>\n");
3648 	mdb_inc_indent(2);
3649 	mdb_printf("%s",
3650 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3651 "  -s    report the size of the segment, instead of the end address\n"
3652 "  -c caller\n"
3653 "        filter out segments without the function/PC in their stack trace\n"
3654 "  -e earliest\n"
3655 "        filter out segments timestamped before earliest\n"
3656 "  -l latest\n"
3657 "        filter out segments timestamped after latest\n"
3658 "  -m minsize\n"
3659 "        filer out segments smaller than minsize\n"
3660 "  -M maxsize\n"
3661 "        filer out segments larger than maxsize\n"
3662 "  -t thread\n"
3663 "        filter out segments not involving thread\n"
3664 "  -T type\n"
3665 "        filter out segments not of type 'type'\n"
3666 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3667 }
3668 
3669 /*ARGSUSED*/
3670 int
3671 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3672 {
3673 	vmem_seg_t vs;
3674 	pc_t *stk = vs.vs_stack;
3675 	uintptr_t sz;
3676 	uint8_t t;
3677 	const char *type = NULL;
3678 	GElf_Sym sym;
3679 	char c[MDB_SYM_NAMLEN];
3680 	int no_debug;
3681 	int i;
3682 	int depth;
3683 	uintptr_t laddr, haddr;
3684 
3685 	uintptr_t caller = NULL, thread = NULL;
3686 	uintptr_t minsize = 0, maxsize = 0;
3687 
3688 	hrtime_t earliest = 0, latest = 0;
3689 
3690 	uint_t size = 0;
3691 	uint_t verbose = 0;
3692 
3693 	if (!(flags & DCMD_ADDRSPEC))
3694 		return (DCMD_USAGE);
3695 
3696 	if (mdb_getopts(argc, argv,
3697 	    'c', MDB_OPT_UINTPTR, &caller,
3698 	    'e', MDB_OPT_UINT64, &earliest,
3699 	    'l', MDB_OPT_UINT64, &latest,
3700 	    's', MDB_OPT_SETBITS, TRUE, &size,
3701 	    'm', MDB_OPT_UINTPTR, &minsize,
3702 	    'M', MDB_OPT_UINTPTR, &maxsize,
3703 	    't', MDB_OPT_UINTPTR, &thread,
3704 	    'T', MDB_OPT_STR, &type,
3705 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3706 	    NULL) != argc)
3707 		return (DCMD_USAGE);
3708 
3709 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3710 		if (verbose) {
3711 			mdb_printf("%16s %4s %16s %16s %16s\n"
3712 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3713 			    "ADDR", "TYPE", "START", "END", "SIZE",
3714 			    "", "", "THREAD", "TIMESTAMP", "");
3715 		} else {
3716 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3717 			    "START", size? "SIZE" : "END", "WHO");
3718 		}
3719 	}
3720 
3721 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3722 		mdb_warn("couldn't read vmem_seg at %p", addr);
3723 		return (DCMD_ERR);
3724 	}
3725 
3726 	if (type != NULL) {
3727 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3728 			t = VMEM_ALLOC;
3729 		else if (strcmp(type, "FREE") == 0)
3730 			t = VMEM_FREE;
3731 		else if (strcmp(type, "SPAN") == 0)
3732 			t = VMEM_SPAN;
3733 		else if (strcmp(type, "ROTR") == 0 ||
3734 		    strcmp(type, "ROTOR") == 0)
3735 			t = VMEM_ROTOR;
3736 		else if (strcmp(type, "WLKR") == 0 ||
3737 		    strcmp(type, "WALKER") == 0)
3738 			t = VMEM_WALKER;
3739 		else {
3740 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3741 			    type);
3742 			return (DCMD_ERR);
3743 		}
3744 
3745 		if (vs.vs_type != t)
3746 			return (DCMD_OK);
3747 	}
3748 
3749 	sz = vs.vs_end - vs.vs_start;
3750 
3751 	if (minsize != 0 && sz < minsize)
3752 		return (DCMD_OK);
3753 
3754 	if (maxsize != 0 && sz > maxsize)
3755 		return (DCMD_OK);
3756 
3757 	t = vs.vs_type;
3758 	depth = vs.vs_depth;
3759 
3760 	/*
3761 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3762 	 */
3763 	no_debug = (t != VMEM_ALLOC) ||
3764 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3765 
3766 	if (no_debug) {
3767 		if (caller != NULL || thread != NULL || earliest != 0 ||
3768 		    latest != 0)
3769 			return (DCMD_OK);		/* not enough info */
3770 	} else {
3771 		if (caller != NULL) {
3772 			laddr = caller;
3773 			haddr = caller + sizeof (caller);
3774 
3775 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3776 			    sizeof (c), &sym) != -1 &&
3777 			    caller == (uintptr_t)sym.st_value) {
3778 				/*
3779 				 * We were provided an exact symbol value; any
3780 				 * address in the function is valid.
3781 				 */
3782 				laddr = (uintptr_t)sym.st_value;
3783 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3784 			}
3785 
3786 			for (i = 0; i < depth; i++)
3787 				if (vs.vs_stack[i] >= laddr &&
3788 				    vs.vs_stack[i] < haddr)
3789 					break;
3790 
3791 			if (i == depth)
3792 				return (DCMD_OK);
3793 		}
3794 
3795 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3796 			return (DCMD_OK);
3797 
3798 		if (earliest != 0 && vs.vs_timestamp < earliest)
3799 			return (DCMD_OK);
3800 
3801 		if (latest != 0 && vs.vs_timestamp > latest)
3802 			return (DCMD_OK);
3803 	}
3804 
3805 	type = (t == VMEM_ALLOC ? "ALLC" :
3806 	    t == VMEM_FREE ? "FREE" :
3807 	    t == VMEM_SPAN ? "SPAN" :
3808 	    t == VMEM_ROTOR ? "ROTR" :
3809 	    t == VMEM_WALKER ? "WLKR" :
3810 	    "????");
3811 
3812 	if (flags & DCMD_PIPE_OUT) {
3813 		mdb_printf("%#lr\n", addr);
3814 		return (DCMD_OK);
3815 	}
3816 
3817 	if (verbose) {
3818 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3819 		    addr, type, vs.vs_start, vs.vs_end, sz);
3820 
3821 		if (no_debug)
3822 			return (DCMD_OK);
3823 
3824 		mdb_printf("%16s %4s %16p %16llx\n",
3825 		    "", "", vs.vs_thread, vs.vs_timestamp);
3826 
3827 		mdb_inc_indent(17);
3828 		for (i = 0; i < depth; i++) {
3829 			mdb_printf("%a\n", stk[i]);
3830 		}
3831 		mdb_dec_indent(17);
3832 		mdb_printf("\n");
3833 	} else {
3834 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3835 		    vs.vs_start, size? sz : vs.vs_end);
3836 
3837 		if (no_debug) {
3838 			mdb_printf("\n");
3839 			return (DCMD_OK);
3840 		}
3841 
3842 		for (i = 0; i < depth; i++) {
3843 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3844 			    c, sizeof (c), &sym) == -1)
3845 				continue;
3846 			if (strncmp(c, "vmem_", 5) == 0)
3847 				continue;
3848 			break;
3849 		}
3850 		mdb_printf(" %a\n", stk[i]);
3851 	}
3852 	return (DCMD_OK);
3853 }
3854 
3855 typedef struct kmalog_data {
3856 	uintptr_t	kma_addr;
3857 	hrtime_t	kma_newest;
3858 } kmalog_data_t;
3859 
3860 /*ARGSUSED*/
3861 static int
3862 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3863 {
3864 	char name[KMEM_CACHE_NAMELEN + 1];
3865 	hrtime_t delta;
3866 	int i, depth;
3867 	size_t bufsize;
3868 
3869 	if (bcp->bc_timestamp == 0)
3870 		return (WALK_DONE);
3871 
3872 	if (kma->kma_newest == 0)
3873 		kma->kma_newest = bcp->bc_timestamp;
3874 
3875 	if (kma->kma_addr) {
3876 		if (mdb_vread(&bufsize, sizeof (bufsize),
3877 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3878 			mdb_warn(
3879 			    "failed to read cache_bufsize for cache at %p",
3880 			    bcp->bc_cache);
3881 			return (WALK_ERR);
3882 		}
3883 
3884 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3885 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3886 			return (WALK_NEXT);
3887 	}
3888 
3889 	delta = kma->kma_newest - bcp->bc_timestamp;
3890 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3891 
3892 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3893 	    &bcp->bc_cache->cache_name) <= 0)
3894 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3895 
3896 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3897 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3898 
3899 	for (i = 0; i < depth; i++)
3900 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3901 
3902 	return (WALK_NEXT);
3903 }
3904 
3905 int
3906 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3907 {
3908 	const char *logname = "kmem_transaction_log";
3909 	kmalog_data_t kma;
3910 
3911 	if (argc > 1)
3912 		return (DCMD_USAGE);
3913 
3914 	kma.kma_newest = 0;
3915 	if (flags & DCMD_ADDRSPEC)
3916 		kma.kma_addr = addr;
3917 	else
3918 		kma.kma_addr = NULL;
3919 
3920 	if (argc > 0) {
3921 		if (argv->a_type != MDB_TYPE_STRING)
3922 			return (DCMD_USAGE);
3923 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3924 			logname = "kmem_failure_log";
3925 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3926 			logname = "kmem_slab_log";
3927 		else
3928 			return (DCMD_USAGE);
3929 	}
3930 
3931 	if (mdb_readvar(&addr, logname) == -1) {
3932 		mdb_warn("failed to read %s log header pointer");
3933 		return (DCMD_ERR);
3934 	}
3935 
3936 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3937 		mdb_warn("failed to walk kmem log");
3938 		return (DCMD_ERR);
3939 	}
3940 
3941 	return (DCMD_OK);
3942 }
3943 
3944 /*
3945  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3946  * The first piece is a structure which we use to accumulate kmem_cache_t
3947  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3948  * walker; we either add all caches, or ones named explicitly as arguments.
3949  */
3950 
3951 typedef struct kmclist {
3952 	const char *kmc_name;			/* Name to match (or NULL) */
3953 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3954 	int kmc_nelems;				/* Num entries in kmc_caches */
3955 	int kmc_size;				/* Size of kmc_caches array */
3956 } kmclist_t;
3957 
3958 static int
3959 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3960 {
3961 	void *p;
3962 	int s;
3963 
3964 	if (kmc->kmc_name == NULL ||
3965 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3966 		/*
3967 		 * If we have a match, grow our array (if necessary), and then
3968 		 * add the virtual address of the matching cache to our list.
3969 		 */
3970 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3971 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3972 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3973 
3974 			bcopy(kmc->kmc_caches, p,
3975 			    sizeof (uintptr_t) * kmc->kmc_size);
3976 
3977 			kmc->kmc_caches = p;
3978 			kmc->kmc_size = s;
3979 		}
3980 
3981 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3982 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3983 	}
3984 
3985 	return (WALK_NEXT);
3986 }
3987 
3988 /*
3989  * The second piece of ::kmausers is a hash table of allocations.  Each
3990  * allocation owner is identified by its stack trace and data_size.  We then
3991  * track the total bytes of all such allocations, and the number of allocations
3992  * to report at the end.  Once we have a list of caches, we walk through the
3993  * allocated bufctls of each, and update our hash table accordingly.
3994  */
3995 
3996 typedef struct kmowner {
3997 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3998 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3999 	size_t kmo_signature;			/* Hash table signature */
4000 	uint_t kmo_num;				/* Number of allocations */
4001 	size_t kmo_data_size;			/* Size of each allocation */
4002 	size_t kmo_total_size;			/* Total bytes of allocation */
4003 	int kmo_depth;				/* Depth of stack trace */
4004 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
4005 } kmowner_t;
4006 
4007 typedef struct kmusers {
4008 	uintptr_t kmu_addr;			/* address of interest */
4009 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
4010 	kmowner_t *kmu_hash;			/* Hash table of owners */
4011 	int kmu_nelems;				/* Number of entries in use */
4012 	int kmu_size;				/* Total number of entries */
4013 } kmusers_t;
4014 
4015 static void
4016 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
4017     size_t size, size_t data_size)
4018 {
4019 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4020 	size_t bucket, signature = data_size;
4021 	kmowner_t *kmo, *kmoend;
4022 
4023 	/*
4024 	 * If the hash table is full, double its size and rehash everything.
4025 	 */
4026 	if (kmu->kmu_nelems >= kmu->kmu_size) {
4027 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4028 
4029 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4030 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4031 		kmu->kmu_hash = kmo;
4032 		kmu->kmu_size = s;
4033 
4034 		kmoend = kmu->kmu_hash + kmu->kmu_size;
4035 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4036 			kmo->kmo_head = NULL;
4037 
4038 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4039 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4040 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4041 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4042 			kmu->kmu_hash[bucket].kmo_head = kmo;
4043 		}
4044 	}
4045 
4046 	/*
4047 	 * Finish computing the hash signature from the stack trace, and then
4048 	 * see if the owner is in the hash table.  If so, update our stats.
4049 	 */
4050 	for (i = 0; i < depth; i++)
4051 		signature += bcp->bc_stack[i];
4052 
4053 	bucket = signature & (kmu->kmu_size - 1);
4054 
4055 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4056 		if (kmo->kmo_signature == signature) {
4057 			size_t difference = 0;
4058 
4059 			difference |= kmo->kmo_data_size - data_size;
4060 			difference |= kmo->kmo_depth - depth;
4061 
4062 			for (i = 0; i < depth; i++) {
4063 				difference |= kmo->kmo_stack[i] -
4064 				    bcp->bc_stack[i];
4065 			}
4066 
4067 			if (difference == 0) {
4068 				kmo->kmo_total_size += size;
4069 				kmo->kmo_num++;
4070 				return;
4071 			}
4072 		}
4073 	}
4074 
4075 	/*
4076 	 * If the owner is not yet hashed, grab the next element and fill it
4077 	 * in based on the allocation information.
4078 	 */
4079 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4080 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4081 	kmu->kmu_hash[bucket].kmo_head = kmo;
4082 
4083 	kmo->kmo_signature = signature;
4084 	kmo->kmo_num = 1;
4085 	kmo->kmo_data_size = data_size;
4086 	kmo->kmo_total_size = size;
4087 	kmo->kmo_depth = depth;
4088 
4089 	for (i = 0; i < depth; i++)
4090 		kmo->kmo_stack[i] = bcp->bc_stack[i];
4091 }
4092 
4093 /*
4094  * When ::kmausers is invoked without the -f flag, we simply update our hash
4095  * table with the information from each allocated bufctl.
4096  */
4097 /*ARGSUSED*/
4098 static int
4099 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4100 {
4101 	const kmem_cache_t *cp = kmu->kmu_cache;
4102 
4103 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4104 	return (WALK_NEXT);
4105 }
4106 
4107 /*
4108  * When ::kmausers is invoked with the -f flag, we print out the information
4109  * for each bufctl as well as updating the hash table.
4110  */
4111 static int
4112 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4113 {
4114 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4115 	const kmem_cache_t *cp = kmu->kmu_cache;
4116 	kmem_bufctl_t bufctl;
4117 
4118 	if (kmu->kmu_addr) {
4119 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
4120 			mdb_warn("couldn't read bufctl at %p", addr);
4121 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4122 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4123 		    cp->cache_bufsize)
4124 			return (WALK_NEXT);
4125 	}
4126 
4127 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4128 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4129 
4130 	for (i = 0; i < depth; i++)
4131 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
4132 
4133 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4134 	return (WALK_NEXT);
4135 }
4136 
4137 /*
4138  * We sort our results by allocation size before printing them.
4139  */
4140 static int
4141 kmownercmp(const void *lp, const void *rp)
4142 {
4143 	const kmowner_t *lhs = lp;
4144 	const kmowner_t *rhs = rp;
4145 
4146 	return (rhs->kmo_total_size - lhs->kmo_total_size);
4147 }
4148 
4149 /*
4150  * The main engine of ::kmausers is relatively straightforward: First we
4151  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4152  * iterate over the allocated bufctls of each cache in the list.  Finally,
4153  * we sort and print our results.
4154  */
4155 /*ARGSUSED*/
4156 int
4157 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4158 {
4159 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
4160 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
4161 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
4162 	int do_all_caches = 1;		/* Do all caches (no arguments) */
4163 	int opt_e = FALSE;		/* Include "small" users */
4164 	int opt_f = FALSE;		/* Print stack traces */
4165 
4166 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4167 	kmowner_t *kmo, *kmoend;
4168 	int i, oelems;
4169 
4170 	kmclist_t kmc;
4171 	kmusers_t kmu;
4172 
4173 	bzero(&kmc, sizeof (kmc));
4174 	bzero(&kmu, sizeof (kmu));
4175 
4176 	while ((i = mdb_getopts(argc, argv,
4177 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4178 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4179 
4180 		argv += i;	/* skip past options we just processed */
4181 		argc -= i;	/* adjust argc */
4182 
4183 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4184 			return (DCMD_USAGE);
4185 
4186 		oelems = kmc.kmc_nelems;
4187 		kmc.kmc_name = argv->a_un.a_str;
4188 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4189 
4190 		if (kmc.kmc_nelems == oelems) {
4191 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4192 			return (DCMD_ERR);
4193 		}
4194 
4195 		do_all_caches = 0;
4196 		argv++;
4197 		argc--;
4198 	}
4199 
4200 	if (flags & DCMD_ADDRSPEC) {
4201 		opt_f = TRUE;
4202 		kmu.kmu_addr = addr;
4203 	} else {
4204 		kmu.kmu_addr = NULL;
4205 	}
4206 
4207 	if (opt_e)
4208 		mem_threshold = cnt_threshold = 0;
4209 
4210 	if (opt_f)
4211 		callback = (mdb_walk_cb_t)kmause2;
4212 
4213 	if (do_all_caches) {
4214 		kmc.kmc_name = NULL; /* match all cache names */
4215 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4216 	}
4217 
4218 	for (i = 0; i < kmc.kmc_nelems; i++) {
4219 		uintptr_t cp = kmc.kmc_caches[i];
4220 		kmem_cache_t c;
4221 
4222 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
4223 			mdb_warn("failed to read cache at %p", cp);
4224 			continue;
4225 		}
4226 
4227 		if (!(c.cache_flags & KMF_AUDIT)) {
4228 			if (!do_all_caches) {
4229 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
4230 				    c.cache_name);
4231 			}
4232 			continue;
4233 		}
4234 
4235 		kmu.kmu_cache = &c;
4236 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
4237 		audited_caches++;
4238 	}
4239 
4240 	if (audited_caches == 0 && do_all_caches) {
4241 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4242 		return (DCMD_ERR);
4243 	}
4244 
4245 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4246 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4247 
4248 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4249 		if (kmo->kmo_total_size < mem_threshold &&
4250 		    kmo->kmo_num < cnt_threshold)
4251 			continue;
4252 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4253 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4254 		for (i = 0; i < kmo->kmo_depth; i++)
4255 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4256 	}
4257 
4258 	return (DCMD_OK);
4259 }
4260 
4261 void
4262 kmausers_help(void)
4263 {
4264 	mdb_printf(
4265 	    "Displays the largest users of the kmem allocator, sorted by \n"
4266 	    "trace.  If one or more caches is specified, only those caches\n"
4267 	    "will be searched.  By default, all caches are searched.  If an\n"
4268 	    "address is specified, then only those allocations which include\n"
4269 	    "the given address are displayed.  Specifying an address implies\n"
4270 	    "-f.\n"
4271 	    "\n"
4272 	    "\t-e\tInclude all users, not just the largest\n"
4273 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
4274 	    "\t\tgrouped by stack\n");
4275 }
4276 
4277 static int
4278 kmem_ready_check(void)
4279 {
4280 	int ready;
4281 
4282 	if (mdb_readvar(&ready, "kmem_ready") < 0)
4283 		return (-1); /* errno is set for us */
4284 
4285 	return (ready);
4286 }
4287 
4288 void
4289 kmem_statechange(void)
4290 {
4291 	static int been_ready = 0;
4292 
4293 	if (been_ready)
4294 		return;
4295 
4296 	if (kmem_ready_check() <= 0)
4297 		return;
4298 
4299 	been_ready = 1;
4300 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4301 }
4302 
4303 void
4304 kmem_init(void)
4305 {
4306 	mdb_walker_t w = {
4307 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4308 		list_walk_step, list_walk_fini
4309 	};
4310 
4311 	/*
4312 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
4313 	 * immediately.  Walkers in the linkage structure won't be ready until
4314 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
4315 	 * is ready, we'll use the walker to initialize the caches.  If kmem
4316 	 * isn't ready, we'll register a callback that will allow us to defer
4317 	 * cache walking until it is.
4318 	 */
4319 	if (mdb_add_walker(&w) != 0) {
4320 		mdb_warn("failed to add kmem_cache walker");
4321 		return;
4322 	}
4323 
4324 	kmem_statechange();
4325 }
4326 
4327 typedef struct whatthread {
4328 	uintptr_t	wt_target;
4329 	int		wt_verbose;
4330 } whatthread_t;
4331 
4332 static int
4333 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4334 {
4335 	uintptr_t current, data;
4336 
4337 	if (t->t_stkbase == NULL)
4338 		return (WALK_NEXT);
4339 
4340 	/*
4341 	 * Warn about swapped out threads, but drive on anyway
4342 	 */
4343 	if (!(t->t_schedflag & TS_LOAD)) {
4344 		mdb_warn("thread %p's stack swapped out\n", addr);
4345 		return (WALK_NEXT);
4346 	}
4347 
4348 	/*
4349 	 * Search the thread's stack for the given pointer.  Note that it would
4350 	 * be more efficient to follow ::kgrep's lead and read in page-sized
4351 	 * chunks, but this routine is already fast and simple.
4352 	 */
4353 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4354 	    current += sizeof (uintptr_t)) {
4355 		if (mdb_vread(&data, sizeof (data), current) == -1) {
4356 			mdb_warn("couldn't read thread %p's stack at %p",
4357 			    addr, current);
4358 			return (WALK_ERR);
4359 		}
4360 
4361 		if (data == w->wt_target) {
4362 			if (w->wt_verbose) {
4363 				mdb_printf("%p in thread %p's stack%s\n",
4364 				    current, addr, stack_active(t, current));
4365 			} else {
4366 				mdb_printf("%#lr\n", addr);
4367 				return (WALK_NEXT);
4368 			}
4369 		}
4370 	}
4371 
4372 	return (WALK_NEXT);
4373 }
4374 
4375 int
4376 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4377 {
4378 	whatthread_t w;
4379 
4380 	if (!(flags & DCMD_ADDRSPEC))
4381 		return (DCMD_USAGE);
4382 
4383 	w.wt_verbose = FALSE;
4384 	w.wt_target = addr;
4385 
4386 	if (mdb_getopts(argc, argv,
4387 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4388 		return (DCMD_USAGE);
4389 
4390 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4391 	    == -1) {
4392 		mdb_warn("couldn't walk threads");
4393 		return (DCMD_ERR);
4394 	}
4395 
4396 	return (DCMD_OK);
4397 }
4398