xref: /titanic_41/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mdb/mdb_param.h>
30 #include <mdb/mdb_modapi.h>
31 #include <mdb/mdb_ctf.h>
32 #include <sys/cpuvar.h>
33 #include <sys/kmem_impl.h>
34 #include <sys/vmem_impl.h>
35 #include <sys/machelf.h>
36 #include <sys/modctl.h>
37 #include <sys/kobj.h>
38 #include <sys/panic.h>
39 #include <sys/stack.h>
40 #include <sys/sysmacros.h>
41 #include <vm/page.h>
42 
43 #include "kmem.h"
44 
45 #define	dprintf(x) if (mdb_debug_level) { \
46 	mdb_printf("kmem debug: ");  \
47 	/*CSTYLED*/\
48 	mdb_printf x ;\
49 }
50 
51 #define	KM_ALLOCATED		0x01
52 #define	KM_FREE			0x02
53 #define	KM_BUFCTL		0x04
54 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
55 #define	KM_HASH			0x10
56 
57 static int mdb_debug_level = 0;
58 
59 static void *kmem_ready_cbhdl;
60 
61 /*ARGSUSED*/
62 static int
63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
64 {
65 	mdb_walker_t w;
66 	char descr[64];
67 
68 	(void) mdb_snprintf(descr, sizeof (descr),
69 	    "walk the %s cache", c->cache_name);
70 
71 	w.walk_name = c->cache_name;
72 	w.walk_descr = descr;
73 	w.walk_init = kmem_walk_init;
74 	w.walk_step = kmem_walk_step;
75 	w.walk_fini = kmem_walk_fini;
76 	w.walk_init_arg = (void *)addr;
77 
78 	if (mdb_add_walker(&w) == -1)
79 		mdb_warn("failed to add %s walker", c->cache_name);
80 
81 	return (WALK_NEXT);
82 }
83 
84 /*ARGSUSED*/
85 int
86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
87 {
88 	mdb_debug_level ^= 1;
89 
90 	mdb_printf("kmem: debugging is now %s\n",
91 	    mdb_debug_level ? "on" : "off");
92 
93 	return (DCMD_OK);
94 }
95 
96 typedef struct {
97 	uintptr_t kcw_first;
98 	uintptr_t kcw_current;
99 } kmem_cache_walk_t;
100 
101 int
102 kmem_cache_walk_init(mdb_walk_state_t *wsp)
103 {
104 	kmem_cache_walk_t *kcw;
105 	kmem_cache_t c;
106 	uintptr_t cp;
107 	GElf_Sym sym;
108 
109 	if (mdb_lookup_by_name("kmem_null_cache", &sym) == -1) {
110 		mdb_warn("couldn't find kmem_null_cache");
111 		return (WALK_ERR);
112 	}
113 
114 	cp = (uintptr_t)sym.st_value;
115 
116 	if (mdb_vread(&c, sizeof (kmem_cache_t), cp) == -1) {
117 		mdb_warn("couldn't read cache at %p", cp);
118 		return (WALK_ERR);
119 	}
120 
121 	kcw = mdb_alloc(sizeof (kmem_cache_walk_t), UM_SLEEP);
122 
123 	kcw->kcw_first = cp;
124 	kcw->kcw_current = (uintptr_t)c.cache_next;
125 	wsp->walk_data = kcw;
126 
127 	return (WALK_NEXT);
128 }
129 
130 int
131 kmem_cache_walk_step(mdb_walk_state_t *wsp)
132 {
133 	kmem_cache_walk_t *kcw = wsp->walk_data;
134 	kmem_cache_t c;
135 	int status;
136 
137 	if (mdb_vread(&c, sizeof (kmem_cache_t), kcw->kcw_current) == -1) {
138 		mdb_warn("couldn't read cache at %p", kcw->kcw_current);
139 		return (WALK_DONE);
140 	}
141 
142 	status = wsp->walk_callback(kcw->kcw_current, &c, wsp->walk_cbdata);
143 
144 	if ((kcw->kcw_current = (uintptr_t)c.cache_next) == kcw->kcw_first)
145 		return (WALK_DONE);
146 
147 	return (status);
148 }
149 
150 void
151 kmem_cache_walk_fini(mdb_walk_state_t *wsp)
152 {
153 	kmem_cache_walk_t *kcw = wsp->walk_data;
154 	mdb_free(kcw, sizeof (kmem_cache_walk_t));
155 }
156 
157 int
158 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
159 {
160 	if (wsp->walk_addr == NULL) {
161 		mdb_warn("kmem_cpu_cache doesn't support global walks");
162 		return (WALK_ERR);
163 	}
164 
165 	if (mdb_layered_walk("cpu", wsp) == -1) {
166 		mdb_warn("couldn't walk 'cpu'");
167 		return (WALK_ERR);
168 	}
169 
170 	wsp->walk_data = (void *)wsp->walk_addr;
171 
172 	return (WALK_NEXT);
173 }
174 
175 int
176 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
177 {
178 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
179 	const cpu_t *cpu = wsp->walk_layer;
180 	kmem_cpu_cache_t cc;
181 
182 	caddr += cpu->cpu_cache_offset;
183 
184 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
185 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
186 		return (WALK_ERR);
187 	}
188 
189 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
190 }
191 
192 int
193 kmem_slab_walk_init(mdb_walk_state_t *wsp)
194 {
195 	uintptr_t caddr = wsp->walk_addr;
196 	kmem_cache_t c;
197 
198 	if (caddr == NULL) {
199 		mdb_warn("kmem_slab doesn't support global walks\n");
200 		return (WALK_ERR);
201 	}
202 
203 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
204 		mdb_warn("couldn't read kmem_cache at %p", caddr);
205 		return (WALK_ERR);
206 	}
207 
208 	wsp->walk_data =
209 	    (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab));
210 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
211 
212 	return (WALK_NEXT);
213 }
214 
215 int
216 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
217 {
218 	uintptr_t caddr = wsp->walk_addr;
219 	kmem_cache_t c;
220 
221 	if (caddr == NULL) {
222 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
223 		return (WALK_ERR);
224 	}
225 
226 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
227 		mdb_warn("couldn't read kmem_cache at %p", caddr);
228 		return (WALK_ERR);
229 	}
230 
231 	wsp->walk_data =
232 	    (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab));
233 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
234 
235 	/*
236 	 * Some consumers (umem_walk_step(), in particular) require at
237 	 * least one callback if there are any buffers in the cache.  So
238 	 * if there are *no* partial slabs, report the last full slab, if
239 	 * any.
240 	 *
241 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
242 	 */
243 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
244 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
245 
246 	return (WALK_NEXT);
247 }
248 
249 int
250 kmem_slab_walk_step(mdb_walk_state_t *wsp)
251 {
252 	kmem_slab_t s;
253 	uintptr_t addr = wsp->walk_addr;
254 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
255 	uintptr_t caddr = saddr - offsetof(kmem_cache_t, cache_nullslab);
256 
257 	if (addr == saddr)
258 		return (WALK_DONE);
259 
260 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
261 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
262 		return (WALK_ERR);
263 	}
264 
265 	if ((uintptr_t)s.slab_cache != caddr) {
266 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
267 		    addr, caddr, s.slab_cache);
268 		return (WALK_ERR);
269 	}
270 
271 	wsp->walk_addr = (uintptr_t)s.slab_next;
272 
273 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
274 }
275 
276 int
277 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
278 {
279 	kmem_cache_t c;
280 
281 	if (!(flags & DCMD_ADDRSPEC)) {
282 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
283 			mdb_warn("can't walk kmem_cache");
284 			return (DCMD_ERR);
285 		}
286 		return (DCMD_OK);
287 	}
288 
289 	if (DCMD_HDRSPEC(flags))
290 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
291 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
292 
293 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
294 		mdb_warn("couldn't read kmem_cache at %p", addr);
295 		return (DCMD_ERR);
296 	}
297 
298 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
299 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
300 
301 	return (DCMD_OK);
302 }
303 
304 static int
305 addrcmp(const void *lhs, const void *rhs)
306 {
307 	uintptr_t p1 = *((uintptr_t *)lhs);
308 	uintptr_t p2 = *((uintptr_t *)rhs);
309 
310 	if (p1 < p2)
311 		return (-1);
312 	if (p1 > p2)
313 		return (1);
314 	return (0);
315 }
316 
317 static int
318 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
319 {
320 	const kmem_bufctl_audit_t *bcp1 = *lhs;
321 	const kmem_bufctl_audit_t *bcp2 = *rhs;
322 
323 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
324 		return (-1);
325 
326 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
327 		return (1);
328 
329 	return (0);
330 }
331 
332 typedef struct kmem_hash_walk {
333 	uintptr_t *kmhw_table;
334 	size_t kmhw_nelems;
335 	size_t kmhw_pos;
336 	kmem_bufctl_t kmhw_cur;
337 } kmem_hash_walk_t;
338 
339 int
340 kmem_hash_walk_init(mdb_walk_state_t *wsp)
341 {
342 	kmem_hash_walk_t *kmhw;
343 	uintptr_t *hash;
344 	kmem_cache_t c;
345 	uintptr_t haddr, addr = wsp->walk_addr;
346 	size_t nelems;
347 	size_t hsize;
348 
349 	if (addr == NULL) {
350 		mdb_warn("kmem_hash doesn't support global walks\n");
351 		return (WALK_ERR);
352 	}
353 
354 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
355 		mdb_warn("couldn't read cache at addr %p", addr);
356 		return (WALK_ERR);
357 	}
358 
359 	if (!(c.cache_flags & KMF_HASH)) {
360 		mdb_warn("cache %p doesn't have a hash table\n", addr);
361 		return (WALK_DONE);		/* nothing to do */
362 	}
363 
364 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
365 	kmhw->kmhw_cur.bc_next = NULL;
366 	kmhw->kmhw_pos = 0;
367 
368 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
369 	hsize = nelems * sizeof (uintptr_t);
370 	haddr = (uintptr_t)c.cache_hash_table;
371 
372 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
373 	if (mdb_vread(hash, hsize, haddr) == -1) {
374 		mdb_warn("failed to read hash table at %p", haddr);
375 		mdb_free(hash, hsize);
376 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
377 		return (WALK_ERR);
378 	}
379 
380 	wsp->walk_data = kmhw;
381 
382 	return (WALK_NEXT);
383 }
384 
385 int
386 kmem_hash_walk_step(mdb_walk_state_t *wsp)
387 {
388 	kmem_hash_walk_t *kmhw = wsp->walk_data;
389 	uintptr_t addr = NULL;
390 
391 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
392 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
393 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
394 				break;
395 		}
396 	}
397 	if (addr == NULL)
398 		return (WALK_DONE);
399 
400 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
401 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
402 		return (WALK_ERR);
403 	}
404 
405 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
406 }
407 
408 void
409 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
410 {
411 	kmem_hash_walk_t *kmhw = wsp->walk_data;
412 
413 	if (kmhw == NULL)
414 		return;
415 
416 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
417 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
418 }
419 
420 /*
421  * Find the address of the bufctl structure for the address 'buf' in cache
422  * 'cp', which is at address caddr, and place it in *out.
423  */
424 static int
425 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
426 {
427 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
428 	kmem_bufctl_t *bcp;
429 	kmem_bufctl_t bc;
430 
431 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
432 		mdb_warn("unable to read hash bucket for %p in cache %p",
433 		    buf, caddr);
434 		return (-1);
435 	}
436 
437 	while (bcp != NULL) {
438 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
439 		    (uintptr_t)bcp) == -1) {
440 			mdb_warn("unable to read bufctl at %p", bcp);
441 			return (-1);
442 		}
443 		if (bc.bc_addr == buf) {
444 			*out = (uintptr_t)bcp;
445 			return (0);
446 		}
447 		bcp = bc.bc_next;
448 	}
449 
450 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
451 	return (-1);
452 }
453 
454 int
455 kmem_get_magsize(const kmem_cache_t *cp)
456 {
457 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
458 	GElf_Sym mt_sym;
459 	kmem_magtype_t mt;
460 	int res;
461 
462 	/*
463 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
464 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
465 	 * it is okay to return 0 for them.
466 	 */
467 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
468 	    (cp->cache_flags & KMF_NOMAGAZINE))
469 		return (res);
470 
471 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
472 		mdb_warn("unable to read 'kmem_magtype'");
473 	} else if (addr < mt_sym.st_value ||
474 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
475 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
476 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
477 		    cp->cache_name, addr);
478 		return (0);
479 	}
480 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
481 		mdb_warn("unable to read magtype at %a", addr);
482 		return (0);
483 	}
484 	return (mt.mt_magsize);
485 }
486 
487 /*ARGSUSED*/
488 static int
489 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
490 {
491 	*est -= (sp->slab_chunks - sp->slab_refcnt);
492 
493 	return (WALK_NEXT);
494 }
495 
496 /*
497  * Returns an upper bound on the number of allocated buffers in a given
498  * cache.
499  */
500 size_t
501 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
502 {
503 	int magsize;
504 	size_t cache_est;
505 
506 	cache_est = cp->cache_buftotal;
507 
508 	(void) mdb_pwalk("kmem_slab_partial",
509 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
510 
511 	if ((magsize = kmem_get_magsize(cp)) != 0) {
512 		size_t mag_est = cp->cache_full.ml_total * magsize;
513 
514 		if (cache_est >= mag_est) {
515 			cache_est -= mag_est;
516 		} else {
517 			mdb_warn("cache %p's magazine layer holds more buffers "
518 			    "than the slab layer.\n", addr);
519 		}
520 	}
521 	return (cache_est);
522 }
523 
524 #define	READMAG_ROUNDS(rounds) { \
525 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
526 		mdb_warn("couldn't read magazine at %p", kmp); \
527 		goto fail; \
528 	} \
529 	for (i = 0; i < rounds; i++) { \
530 		maglist[magcnt++] = mp->mag_round[i]; \
531 		if (magcnt == magmax) { \
532 			mdb_warn("%d magazines exceeds fudge factor\n", \
533 			    magcnt); \
534 			goto fail; \
535 		} \
536 	} \
537 }
538 
539 int
540 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
541     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
542 {
543 	kmem_magazine_t *kmp, *mp;
544 	void **maglist = NULL;
545 	int i, cpu;
546 	size_t magsize, magmax, magbsize;
547 	size_t magcnt = 0;
548 
549 	/*
550 	 * Read the magtype out of the cache, after verifying the pointer's
551 	 * correctness.
552 	 */
553 	magsize = kmem_get_magsize(cp);
554 	if (magsize == 0)
555 		magsize = 1;
556 
557 	/*
558 	 * There are several places where we need to go buffer hunting:
559 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
560 	 * and the full magazine list in the depot.
561 	 *
562 	 * For an upper bound on the number of buffers in the magazine
563 	 * layer, we have the number of magazines on the cache_full
564 	 * list plus at most two magazines per CPU (the loaded and the
565 	 * spare).  Toss in 100 magazines as a fudge factor in case this
566 	 * is live (the number "100" comes from the same fudge factor in
567 	 * crash(1M)).
568 	 */
569 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
570 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
571 
572 	if (magbsize >= PAGESIZE / 2) {
573 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
574 		    addr, magbsize);
575 		goto fail;
576 	}
577 
578 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
579 	mp = mdb_alloc(magbsize, alloc_flags);
580 	if (mp == NULL || maglist == NULL)
581 		goto fail;
582 
583 	/*
584 	 * First up: the magazines in the depot (i.e. on the cache_full list).
585 	 */
586 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
587 		READMAG_ROUNDS(magsize);
588 		kmp = mp->mag_next;
589 
590 		if (kmp == cp->cache_full.ml_list)
591 			break; /* cache_full list loop detected */
592 	}
593 
594 	dprintf(("cache_full list done\n"));
595 
596 	/*
597 	 * Now whip through the CPUs, snagging the loaded magazines
598 	 * and full spares.
599 	 */
600 	for (cpu = 0; cpu < ncpus; cpu++) {
601 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
602 
603 		dprintf(("reading cpu cache %p\n",
604 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
605 
606 		if (ccp->cc_rounds > 0 &&
607 		    (kmp = ccp->cc_loaded) != NULL) {
608 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
609 			READMAG_ROUNDS(ccp->cc_rounds);
610 		}
611 
612 		if (ccp->cc_prounds > 0 &&
613 		    (kmp = ccp->cc_ploaded) != NULL) {
614 			dprintf(("reading %d previously loaded rounds\n",
615 			    ccp->cc_prounds));
616 			READMAG_ROUNDS(ccp->cc_prounds);
617 		}
618 	}
619 
620 	dprintf(("magazine layer: %d buffers\n", magcnt));
621 
622 	if (!(alloc_flags & UM_GC))
623 		mdb_free(mp, magbsize);
624 
625 	*maglistp = maglist;
626 	*magcntp = magcnt;
627 	*magmaxp = magmax;
628 
629 	return (WALK_NEXT);
630 
631 fail:
632 	if (!(alloc_flags & UM_GC)) {
633 		if (mp)
634 			mdb_free(mp, magbsize);
635 		if (maglist)
636 			mdb_free(maglist, magmax * sizeof (void *));
637 	}
638 	return (WALK_ERR);
639 }
640 
641 static int
642 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
643 {
644 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
645 }
646 
647 static int
648 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
649 {
650 	kmem_bufctl_audit_t b;
651 
652 	/*
653 	 * if KMF_AUDIT is not set, we know that we're looking at a
654 	 * kmem_bufctl_t.
655 	 */
656 	if (!(cp->cache_flags & KMF_AUDIT) ||
657 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
658 		(void) memset(&b, 0, sizeof (b));
659 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
660 			mdb_warn("unable to read bufctl at %p", buf);
661 			return (WALK_ERR);
662 		}
663 	}
664 
665 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
666 }
667 
668 typedef struct kmem_walk {
669 	int kmw_type;
670 
671 	int kmw_addr;			/* cache address */
672 	kmem_cache_t *kmw_cp;
673 	size_t kmw_csize;
674 
675 	/*
676 	 * magazine layer
677 	 */
678 	void **kmw_maglist;
679 	size_t kmw_max;
680 	size_t kmw_count;
681 	size_t kmw_pos;
682 
683 	/*
684 	 * slab layer
685 	 */
686 	char *kmw_valid;	/* to keep track of freed buffers */
687 	char *kmw_ubase;	/* buffer for slab data */
688 } kmem_walk_t;
689 
690 static int
691 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
692 {
693 	kmem_walk_t *kmw;
694 	int ncpus, csize;
695 	kmem_cache_t *cp;
696 
697 	size_t magmax, magcnt;
698 	void **maglist = NULL;
699 	uint_t chunksize, slabsize;
700 	int status = WALK_ERR;
701 	uintptr_t addr = wsp->walk_addr;
702 	const char *layered;
703 
704 	type &= ~KM_HASH;
705 
706 	if (addr == NULL) {
707 		mdb_warn("kmem walk doesn't support global walks\n");
708 		return (WALK_ERR);
709 	}
710 
711 	dprintf(("walking %p\n", addr));
712 
713 	/*
714 	 * First we need to figure out how many CPUs are configured in the
715 	 * system to know how much to slurp out.
716 	 */
717 	mdb_readvar(&ncpus, "max_ncpus");
718 
719 	csize = KMEM_CACHE_SIZE(ncpus);
720 	cp = mdb_alloc(csize, UM_SLEEP);
721 
722 	if (mdb_vread(cp, csize, addr) == -1) {
723 		mdb_warn("couldn't read cache at addr %p", addr);
724 		goto out2;
725 	}
726 
727 	dprintf(("buf total is %d\n", cp->cache_buftotal));
728 
729 	if (cp->cache_buftotal == 0) {
730 		mdb_free(cp, csize);
731 		return (WALK_DONE);
732 	}
733 
734 	/*
735 	 * If they ask for bufctls, but it's a small-slab cache,
736 	 * there is nothing to report.
737 	 */
738 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
739 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
740 		    cp->cache_flags));
741 		mdb_free(cp, csize);
742 		return (WALK_DONE);
743 	}
744 
745 	/*
746 	 * If they want constructed buffers, but there's no constructor or
747 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
748 	 */
749 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
750 	    cp->cache_constructor == NULL ||
751 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
752 		mdb_free(cp, csize);
753 		return (WALK_DONE);
754 	}
755 
756 	/*
757 	 * Read in the contents of the magazine layer
758 	 */
759 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
760 	    &magmax, UM_SLEEP) == WALK_ERR)
761 		goto out2;
762 
763 	/*
764 	 * We have all of the buffers from the magazines;  if we are walking
765 	 * allocated buffers, sort them so we can bsearch them later.
766 	 */
767 	if (type & KM_ALLOCATED)
768 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
769 
770 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
771 
772 	kmw->kmw_type = type;
773 	kmw->kmw_addr = addr;
774 	kmw->kmw_cp = cp;
775 	kmw->kmw_csize = csize;
776 	kmw->kmw_maglist = maglist;
777 	kmw->kmw_max = magmax;
778 	kmw->kmw_count = magcnt;
779 	kmw->kmw_pos = 0;
780 
781 	/*
782 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
783 	 * hash table instead of the slab layer.
784 	 */
785 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
786 		layered = "kmem_hash";
787 
788 		kmw->kmw_type |= KM_HASH;
789 	} else {
790 		/*
791 		 * If we are walking freed buffers, we only need the
792 		 * magazine layer plus the partially allocated slabs.
793 		 * To walk allocated buffers, we need all of the slabs.
794 		 */
795 		if (type & KM_ALLOCATED)
796 			layered = "kmem_slab";
797 		else
798 			layered = "kmem_slab_partial";
799 
800 		/*
801 		 * for small-slab caches, we read in the entire slab.  For
802 		 * freed buffers, we can just walk the freelist.  For
803 		 * allocated buffers, we use a 'valid' array to track
804 		 * the freed buffers.
805 		 */
806 		if (!(cp->cache_flags & KMF_HASH)) {
807 			chunksize = cp->cache_chunksize;
808 			slabsize = cp->cache_slabsize;
809 
810 			kmw->kmw_ubase = mdb_alloc(slabsize +
811 			    sizeof (kmem_bufctl_t), UM_SLEEP);
812 
813 			if (type & KM_ALLOCATED)
814 				kmw->kmw_valid =
815 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
816 		}
817 	}
818 
819 	status = WALK_NEXT;
820 
821 	if (mdb_layered_walk(layered, wsp) == -1) {
822 		mdb_warn("unable to start layered '%s' walk", layered);
823 		status = WALK_ERR;
824 	}
825 
826 out1:
827 	if (status == WALK_ERR) {
828 		if (kmw->kmw_valid)
829 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
830 
831 		if (kmw->kmw_ubase)
832 			mdb_free(kmw->kmw_ubase, slabsize +
833 			    sizeof (kmem_bufctl_t));
834 
835 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (uintptr_t));
836 		mdb_free(kmw, sizeof (kmem_walk_t));
837 		wsp->walk_data = NULL;
838 	}
839 
840 out2:
841 	if (status == WALK_ERR)
842 		mdb_free(cp, csize);
843 
844 	return (status);
845 }
846 
847 int
848 kmem_walk_step(mdb_walk_state_t *wsp)
849 {
850 	kmem_walk_t *kmw = wsp->walk_data;
851 	int type = kmw->kmw_type;
852 	kmem_cache_t *cp = kmw->kmw_cp;
853 
854 	void **maglist = kmw->kmw_maglist;
855 	int magcnt = kmw->kmw_count;
856 
857 	uintptr_t chunksize, slabsize;
858 	uintptr_t addr;
859 	const kmem_slab_t *sp;
860 	const kmem_bufctl_t *bcp;
861 	kmem_bufctl_t bc;
862 
863 	int chunks;
864 	char *kbase;
865 	void *buf;
866 	int i, ret;
867 
868 	char *valid, *ubase;
869 
870 	/*
871 	 * first, handle the 'kmem_hash' layered walk case
872 	 */
873 	if (type & KM_HASH) {
874 		/*
875 		 * We have a buffer which has been allocated out of the
876 		 * global layer. We need to make sure that it's not
877 		 * actually sitting in a magazine before we report it as
878 		 * an allocated buffer.
879 		 */
880 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
881 
882 		if (magcnt > 0 &&
883 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
884 		    addrcmp) != NULL)
885 			return (WALK_NEXT);
886 
887 		if (type & KM_BUFCTL)
888 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
889 
890 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
891 	}
892 
893 	ret = WALK_NEXT;
894 
895 	addr = kmw->kmw_addr;
896 
897 	/*
898 	 * If we're walking freed buffers, report everything in the
899 	 * magazine layer before processing the first slab.
900 	 */
901 	if ((type & KM_FREE) && magcnt != 0) {
902 		kmw->kmw_count = 0;		/* only do this once */
903 		for (i = 0; i < magcnt; i++) {
904 			buf = maglist[i];
905 
906 			if (type & KM_BUFCTL) {
907 				uintptr_t out;
908 
909 				if (cp->cache_flags & KMF_BUFTAG) {
910 					kmem_buftag_t *btp;
911 					kmem_buftag_t tag;
912 
913 					/* LINTED - alignment */
914 					btp = KMEM_BUFTAG(cp, buf);
915 					if (mdb_vread(&tag, sizeof (tag),
916 					    (uintptr_t)btp) == -1) {
917 						mdb_warn("reading buftag for "
918 						    "%p at %p", buf, btp);
919 						continue;
920 					}
921 					out = (uintptr_t)tag.bt_bufctl;
922 				} else {
923 					if (kmem_hash_lookup(cp, addr, buf,
924 					    &out) == -1)
925 						continue;
926 				}
927 				ret = bufctl_walk_callback(cp, wsp, out);
928 			} else {
929 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
930 			}
931 
932 			if (ret != WALK_NEXT)
933 				return (ret);
934 		}
935 	}
936 
937 	/*
938 	 * If they want constructed buffers, we're finished, since the
939 	 * magazine layer holds them all.
940 	 */
941 	if (type & KM_CONSTRUCTED)
942 		return (WALK_DONE);
943 
944 	/*
945 	 * Handle the buffers in the current slab
946 	 */
947 	chunksize = cp->cache_chunksize;
948 	slabsize = cp->cache_slabsize;
949 
950 	sp = wsp->walk_layer;
951 	chunks = sp->slab_chunks;
952 	kbase = sp->slab_base;
953 
954 	dprintf(("kbase is %p\n", kbase));
955 
956 	if (!(cp->cache_flags & KMF_HASH)) {
957 		valid = kmw->kmw_valid;
958 		ubase = kmw->kmw_ubase;
959 
960 		if (mdb_vread(ubase, chunks * chunksize,
961 		    (uintptr_t)kbase) == -1) {
962 			mdb_warn("failed to read slab contents at %p", kbase);
963 			return (WALK_ERR);
964 		}
965 
966 		/*
967 		 * Set up the valid map as fully allocated -- we'll punch
968 		 * out the freelist.
969 		 */
970 		if (type & KM_ALLOCATED)
971 			(void) memset(valid, 1, chunks);
972 	} else {
973 		valid = NULL;
974 		ubase = NULL;
975 	}
976 
977 	/*
978 	 * walk the slab's freelist
979 	 */
980 	bcp = sp->slab_head;
981 
982 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
983 
984 	/*
985 	 * since we could be in the middle of allocating a buffer,
986 	 * our refcnt could be one higher than it aught.  So we
987 	 * check one further on the freelist than the count allows.
988 	 */
989 	for (i = sp->slab_refcnt; i <= chunks; i++) {
990 		uint_t ndx;
991 
992 		dprintf(("bcp is %p\n", bcp));
993 
994 		if (bcp == NULL) {
995 			if (i == chunks)
996 				break;
997 			mdb_warn(
998 			    "slab %p in cache %p freelist too short by %d\n",
999 			    sp, addr, chunks - i);
1000 			break;
1001 		}
1002 
1003 		if (cp->cache_flags & KMF_HASH) {
1004 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1005 				mdb_warn("failed to read bufctl ptr at %p",
1006 				    bcp);
1007 				break;
1008 			}
1009 			buf = bc.bc_addr;
1010 		} else {
1011 			/*
1012 			 * Otherwise the buffer is in the slab which
1013 			 * we've read in;  we just need to determine
1014 			 * its offset in the slab to find the
1015 			 * kmem_bufctl_t.
1016 			 */
1017 			bc = *((kmem_bufctl_t *)
1018 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1019 			    (uintptr_t)ubase));
1020 
1021 			buf = KMEM_BUF(cp, bcp);
1022 		}
1023 
1024 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1025 
1026 		if (ndx > slabsize / cp->cache_bufsize) {
1027 			/*
1028 			 * This is very wrong; we have managed to find
1029 			 * a buffer in the slab which shouldn't
1030 			 * actually be here.  Emit a warning, and
1031 			 * try to continue.
1032 			 */
1033 			mdb_warn("buf %p is out of range for "
1034 			    "slab %p, cache %p\n", buf, sp, addr);
1035 		} else if (type & KM_ALLOCATED) {
1036 			/*
1037 			 * we have found a buffer on the slab's freelist;
1038 			 * clear its entry
1039 			 */
1040 			valid[ndx] = 0;
1041 		} else {
1042 			/*
1043 			 * Report this freed buffer
1044 			 */
1045 			if (type & KM_BUFCTL) {
1046 				ret = bufctl_walk_callback(cp, wsp,
1047 				    (uintptr_t)bcp);
1048 			} else {
1049 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1050 			}
1051 			if (ret != WALK_NEXT)
1052 				return (ret);
1053 		}
1054 
1055 		bcp = bc.bc_next;
1056 	}
1057 
1058 	if (bcp != NULL) {
1059 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1060 		    sp, addr, bcp));
1061 	}
1062 
1063 	/*
1064 	 * If we are walking freed buffers, the loop above handled reporting
1065 	 * them.
1066 	 */
1067 	if (type & KM_FREE)
1068 		return (WALK_NEXT);
1069 
1070 	if (type & KM_BUFCTL) {
1071 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1072 		    "cache %p\n", addr);
1073 		return (WALK_ERR);
1074 	}
1075 
1076 	/*
1077 	 * Report allocated buffers, skipping buffers in the magazine layer.
1078 	 * We only get this far for small-slab caches.
1079 	 */
1080 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1081 		buf = (char *)kbase + i * chunksize;
1082 
1083 		if (!valid[i])
1084 			continue;		/* on slab freelist */
1085 
1086 		if (magcnt > 0 &&
1087 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1088 		    addrcmp) != NULL)
1089 			continue;		/* in magazine layer */
1090 
1091 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1092 	}
1093 	return (ret);
1094 }
1095 
1096 void
1097 kmem_walk_fini(mdb_walk_state_t *wsp)
1098 {
1099 	kmem_walk_t *kmw = wsp->walk_data;
1100 	uintptr_t chunksize;
1101 	uintptr_t slabsize;
1102 
1103 	if (kmw == NULL)
1104 		return;
1105 
1106 	if (kmw->kmw_maglist != NULL)
1107 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1108 
1109 	chunksize = kmw->kmw_cp->cache_chunksize;
1110 	slabsize = kmw->kmw_cp->cache_slabsize;
1111 
1112 	if (kmw->kmw_valid != NULL)
1113 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1114 	if (kmw->kmw_ubase != NULL)
1115 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1116 
1117 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1118 	mdb_free(kmw, sizeof (kmem_walk_t));
1119 }
1120 
1121 /*ARGSUSED*/
1122 static int
1123 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1124 {
1125 	/*
1126 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1127 	 * memory in other caches.  This can be a little confusing, so we
1128 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1129 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1130 	 */
1131 	if (c->cache_cflags & KMC_NOTOUCH)
1132 		return (WALK_NEXT);
1133 
1134 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1135 	    wsp->walk_cbdata, addr) == -1)
1136 		return (WALK_DONE);
1137 
1138 	return (WALK_NEXT);
1139 }
1140 
1141 #define	KMEM_WALK_ALL(name, wsp) { \
1142 	wsp->walk_data = (name); \
1143 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1144 		return (WALK_ERR); \
1145 	return (WALK_DONE); \
1146 }
1147 
1148 int
1149 kmem_walk_init(mdb_walk_state_t *wsp)
1150 {
1151 	if (wsp->walk_arg != NULL)
1152 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1153 
1154 	if (wsp->walk_addr == NULL)
1155 		KMEM_WALK_ALL("kmem", wsp);
1156 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1157 }
1158 
1159 int
1160 bufctl_walk_init(mdb_walk_state_t *wsp)
1161 {
1162 	if (wsp->walk_addr == NULL)
1163 		KMEM_WALK_ALL("bufctl", wsp);
1164 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1165 }
1166 
1167 int
1168 freemem_walk_init(mdb_walk_state_t *wsp)
1169 {
1170 	if (wsp->walk_addr == NULL)
1171 		KMEM_WALK_ALL("freemem", wsp);
1172 	return (kmem_walk_init_common(wsp, KM_FREE));
1173 }
1174 
1175 int
1176 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1177 {
1178 	if (wsp->walk_addr == NULL)
1179 		KMEM_WALK_ALL("freemem_constructed", wsp);
1180 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1181 }
1182 
1183 int
1184 freectl_walk_init(mdb_walk_state_t *wsp)
1185 {
1186 	if (wsp->walk_addr == NULL)
1187 		KMEM_WALK_ALL("freectl", wsp);
1188 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1189 }
1190 
1191 int
1192 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1193 {
1194 	if (wsp->walk_addr == NULL)
1195 		KMEM_WALK_ALL("freectl_constructed", wsp);
1196 	return (kmem_walk_init_common(wsp,
1197 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1198 }
1199 
1200 typedef struct bufctl_history_walk {
1201 	void		*bhw_next;
1202 	kmem_cache_t	*bhw_cache;
1203 	kmem_slab_t	*bhw_slab;
1204 	hrtime_t	bhw_timestamp;
1205 } bufctl_history_walk_t;
1206 
1207 int
1208 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1209 {
1210 	bufctl_history_walk_t *bhw;
1211 	kmem_bufctl_audit_t bc;
1212 	kmem_bufctl_audit_t bcn;
1213 
1214 	if (wsp->walk_addr == NULL) {
1215 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1216 		return (WALK_ERR);
1217 	}
1218 
1219 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1220 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1221 		return (WALK_ERR);
1222 	}
1223 
1224 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1225 	bhw->bhw_timestamp = 0;
1226 	bhw->bhw_cache = bc.bc_cache;
1227 	bhw->bhw_slab = bc.bc_slab;
1228 
1229 	/*
1230 	 * sometimes the first log entry matches the base bufctl;  in that
1231 	 * case, skip the base bufctl.
1232 	 */
1233 	if (bc.bc_lastlog != NULL &&
1234 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1235 	    bc.bc_addr == bcn.bc_addr &&
1236 	    bc.bc_cache == bcn.bc_cache &&
1237 	    bc.bc_slab == bcn.bc_slab &&
1238 	    bc.bc_timestamp == bcn.bc_timestamp &&
1239 	    bc.bc_thread == bcn.bc_thread)
1240 		bhw->bhw_next = bc.bc_lastlog;
1241 	else
1242 		bhw->bhw_next = (void *)wsp->walk_addr;
1243 
1244 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1245 	wsp->walk_data = bhw;
1246 
1247 	return (WALK_NEXT);
1248 }
1249 
1250 int
1251 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1252 {
1253 	bufctl_history_walk_t *bhw = wsp->walk_data;
1254 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1255 	uintptr_t baseaddr = wsp->walk_addr;
1256 	kmem_bufctl_audit_t bc;
1257 
1258 	if (addr == NULL)
1259 		return (WALK_DONE);
1260 
1261 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1262 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1263 		return (WALK_ERR);
1264 	}
1265 
1266 	/*
1267 	 * The bufctl is only valid if the address, cache, and slab are
1268 	 * correct.  We also check that the timestamp is decreasing, to
1269 	 * prevent infinite loops.
1270 	 */
1271 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1272 	    bc.bc_cache != bhw->bhw_cache ||
1273 	    bc.bc_slab != bhw->bhw_slab ||
1274 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1275 		return (WALK_DONE);
1276 
1277 	bhw->bhw_next = bc.bc_lastlog;
1278 	bhw->bhw_timestamp = bc.bc_timestamp;
1279 
1280 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1281 }
1282 
1283 void
1284 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1285 {
1286 	bufctl_history_walk_t *bhw = wsp->walk_data;
1287 
1288 	mdb_free(bhw, sizeof (*bhw));
1289 }
1290 
1291 typedef struct kmem_log_walk {
1292 	kmem_bufctl_audit_t *klw_base;
1293 	kmem_bufctl_audit_t **klw_sorted;
1294 	kmem_log_header_t klw_lh;
1295 	size_t klw_size;
1296 	size_t klw_maxndx;
1297 	size_t klw_ndx;
1298 } kmem_log_walk_t;
1299 
1300 int
1301 kmem_log_walk_init(mdb_walk_state_t *wsp)
1302 {
1303 	uintptr_t lp = wsp->walk_addr;
1304 	kmem_log_walk_t *klw;
1305 	kmem_log_header_t *lhp;
1306 	int maxndx, i, j, k;
1307 
1308 	/*
1309 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1310 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1311 	 */
1312 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1313 		mdb_warn("failed to read 'kmem_transaction_log'");
1314 		return (WALK_ERR);
1315 	}
1316 
1317 	if (lp == NULL) {
1318 		mdb_warn("log is disabled\n");
1319 		return (WALK_ERR);
1320 	}
1321 
1322 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1323 	lhp = &klw->klw_lh;
1324 
1325 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1326 		mdb_warn("failed to read log header at %p", lp);
1327 		mdb_free(klw, sizeof (kmem_log_walk_t));
1328 		return (WALK_ERR);
1329 	}
1330 
1331 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1332 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1333 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1334 
1335 	if (mdb_vread(klw->klw_base, klw->klw_size,
1336 	    (uintptr_t)lhp->lh_base) == -1) {
1337 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1338 		mdb_free(klw->klw_base, klw->klw_size);
1339 		mdb_free(klw, sizeof (kmem_log_walk_t));
1340 		return (WALK_ERR);
1341 	}
1342 
1343 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1344 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1345 
1346 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1347 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1348 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1349 
1350 		for (j = 0; j < maxndx; j++)
1351 			klw->klw_sorted[k++] = &chunk[j];
1352 	}
1353 
1354 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1355 	    (int(*)(const void *, const void *))bufctlcmp);
1356 
1357 	klw->klw_maxndx = k;
1358 	wsp->walk_data = klw;
1359 
1360 	return (WALK_NEXT);
1361 }
1362 
1363 int
1364 kmem_log_walk_step(mdb_walk_state_t *wsp)
1365 {
1366 	kmem_log_walk_t *klw = wsp->walk_data;
1367 	kmem_bufctl_audit_t *bcp;
1368 
1369 	if (klw->klw_ndx == klw->klw_maxndx)
1370 		return (WALK_DONE);
1371 
1372 	bcp = klw->klw_sorted[klw->klw_ndx++];
1373 
1374 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1375 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1376 }
1377 
1378 void
1379 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1380 {
1381 	kmem_log_walk_t *klw = wsp->walk_data;
1382 
1383 	mdb_free(klw->klw_base, klw->klw_size);
1384 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1385 	    sizeof (kmem_bufctl_audit_t *));
1386 	mdb_free(klw, sizeof (kmem_log_walk_t));
1387 }
1388 
1389 typedef struct allocdby_bufctl {
1390 	uintptr_t abb_addr;
1391 	hrtime_t abb_ts;
1392 } allocdby_bufctl_t;
1393 
1394 typedef struct allocdby_walk {
1395 	const char *abw_walk;
1396 	uintptr_t abw_thread;
1397 	size_t abw_nbufs;
1398 	size_t abw_size;
1399 	allocdby_bufctl_t *abw_buf;
1400 	size_t abw_ndx;
1401 } allocdby_walk_t;
1402 
1403 int
1404 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1405     allocdby_walk_t *abw)
1406 {
1407 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1408 		return (WALK_NEXT);
1409 
1410 	if (abw->abw_nbufs == abw->abw_size) {
1411 		allocdby_bufctl_t *buf;
1412 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1413 
1414 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1415 
1416 		bcopy(abw->abw_buf, buf, oldsize);
1417 		mdb_free(abw->abw_buf, oldsize);
1418 
1419 		abw->abw_size <<= 1;
1420 		abw->abw_buf = buf;
1421 	}
1422 
1423 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1424 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1425 	abw->abw_nbufs++;
1426 
1427 	return (WALK_NEXT);
1428 }
1429 
1430 /*ARGSUSED*/
1431 int
1432 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1433 {
1434 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1435 	    abw, addr) == -1) {
1436 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1437 		return (WALK_DONE);
1438 	}
1439 
1440 	return (WALK_NEXT);
1441 }
1442 
1443 static int
1444 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1445 {
1446 	if (lhs->abb_ts < rhs->abb_ts)
1447 		return (1);
1448 	if (lhs->abb_ts > rhs->abb_ts)
1449 		return (-1);
1450 	return (0);
1451 }
1452 
1453 static int
1454 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1455 {
1456 	allocdby_walk_t *abw;
1457 
1458 	if (wsp->walk_addr == NULL) {
1459 		mdb_warn("allocdby walk doesn't support global walks\n");
1460 		return (WALK_ERR);
1461 	}
1462 
1463 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1464 
1465 	abw->abw_thread = wsp->walk_addr;
1466 	abw->abw_walk = walk;
1467 	abw->abw_size = 128;	/* something reasonable */
1468 	abw->abw_buf =
1469 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1470 
1471 	wsp->walk_data = abw;
1472 
1473 	if (mdb_walk("kmem_cache",
1474 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1475 		mdb_warn("couldn't walk kmem_cache");
1476 		allocdby_walk_fini(wsp);
1477 		return (WALK_ERR);
1478 	}
1479 
1480 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1481 	    (int(*)(const void *, const void *))allocdby_cmp);
1482 
1483 	return (WALK_NEXT);
1484 }
1485 
1486 int
1487 allocdby_walk_init(mdb_walk_state_t *wsp)
1488 {
1489 	return (allocdby_walk_init_common(wsp, "bufctl"));
1490 }
1491 
1492 int
1493 freedby_walk_init(mdb_walk_state_t *wsp)
1494 {
1495 	return (allocdby_walk_init_common(wsp, "freectl"));
1496 }
1497 
1498 int
1499 allocdby_walk_step(mdb_walk_state_t *wsp)
1500 {
1501 	allocdby_walk_t *abw = wsp->walk_data;
1502 	kmem_bufctl_audit_t bc;
1503 	uintptr_t addr;
1504 
1505 	if (abw->abw_ndx == abw->abw_nbufs)
1506 		return (WALK_DONE);
1507 
1508 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1509 
1510 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1511 		mdb_warn("couldn't read bufctl at %p", addr);
1512 		return (WALK_DONE);
1513 	}
1514 
1515 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1516 }
1517 
1518 void
1519 allocdby_walk_fini(mdb_walk_state_t *wsp)
1520 {
1521 	allocdby_walk_t *abw = wsp->walk_data;
1522 
1523 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1524 	mdb_free(abw, sizeof (allocdby_walk_t));
1525 }
1526 
1527 /*ARGSUSED*/
1528 int
1529 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
1530 {
1531 	char c[MDB_SYM_NAMLEN];
1532 	GElf_Sym sym;
1533 	int i;
1534 
1535 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1536 	for (i = 0; i < bcp->bc_depth; i++) {
1537 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1538 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1539 			continue;
1540 		if (strncmp(c, "kmem_", 5) == 0)
1541 			continue;
1542 		mdb_printf("%s+0x%lx",
1543 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1544 		break;
1545 	}
1546 	mdb_printf("\n");
1547 
1548 	return (WALK_NEXT);
1549 }
1550 
1551 static int
1552 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1553 {
1554 	if (!(flags & DCMD_ADDRSPEC))
1555 		return (DCMD_USAGE);
1556 
1557 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1558 
1559 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1560 		mdb_warn("can't walk '%s' for %p", w, addr);
1561 		return (DCMD_ERR);
1562 	}
1563 
1564 	return (DCMD_OK);
1565 }
1566 
1567 /*ARGSUSED*/
1568 int
1569 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1570 {
1571 	return (allocdby_common(addr, flags, "allocdby"));
1572 }
1573 
1574 /*ARGSUSED*/
1575 int
1576 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1577 {
1578 	return (allocdby_common(addr, flags, "freedby"));
1579 }
1580 
1581 /*
1582  * Return a string describing the address in relation to the given thread's
1583  * stack.
1584  *
1585  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
1586  *
1587  * - If the address is above the stack pointer, return an empty string
1588  *   signifying that the address is active.
1589  *
1590  * - If the address is below the stack pointer, and the thread is not on proc,
1591  *   return " (below sp)".
1592  *
1593  * - If the address is below the stack pointer, and the thread is on proc,
1594  *   return " (possibly below sp)".  Depending on context, we may or may not
1595  *   have an accurate t_sp.
1596  */
1597 static const char *
1598 stack_active(const kthread_t *t, uintptr_t addr)
1599 {
1600 	uintptr_t panicstk;
1601 	GElf_Sym sym;
1602 
1603 	if (t->t_state == TS_FREE)
1604 		return (" (inactive interrupt thread)");
1605 
1606 	/*
1607 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
1608 	 * no longer relates to the thread's real stack.
1609 	 */
1610 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
1611 		panicstk = (uintptr_t)sym.st_value;
1612 
1613 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
1614 			return ("");
1615 	}
1616 
1617 	if (addr >= t->t_sp + STACK_BIAS)
1618 		return ("");
1619 
1620 	if (t->t_state == TS_ONPROC)
1621 		return (" (possibly below sp)");
1622 
1623 	return (" (below sp)");
1624 }
1625 
1626 typedef struct whatis {
1627 	uintptr_t w_addr;
1628 	const kmem_cache_t *w_cache;
1629 	const vmem_t *w_vmem;
1630 	size_t w_slab_align;
1631 	int w_slab_found;
1632 	int w_found;
1633 	int w_kmem_lite_count;
1634 	uint_t w_verbose;
1635 	uint_t w_freemem;
1636 	uint_t w_all;
1637 	uint_t w_bufctl;
1638 	uint_t w_idspace;
1639 } whatis_t;
1640 
1641 static void
1642 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
1643 {
1644 	/* LINTED pointer cast may result in improper alignment */
1645 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr);
1646 	intptr_t stat;
1647 	int count = 0;
1648 	int i;
1649 	pc_t callers[16];
1650 
1651 	if (w->w_cache->cache_flags & KMF_REDZONE) {
1652 		kmem_buftag_t bt;
1653 
1654 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
1655 			goto done;
1656 
1657 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
1658 
1659 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
1660 			goto done;
1661 
1662 		/*
1663 		 * provide the bufctl ptr if it has useful information
1664 		 */
1665 		if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT))
1666 			baddr = (uintptr_t)bt.bt_bufctl;
1667 
1668 		if (w->w_cache->cache_flags & KMF_LITE) {
1669 			count = w->w_kmem_lite_count;
1670 
1671 			if (count * sizeof (pc_t) > sizeof (callers))
1672 				count = 0;
1673 
1674 			if (count > 0 &&
1675 			    mdb_vread(callers, count * sizeof (pc_t),
1676 			    btaddr +
1677 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
1678 				count = 0;
1679 
1680 			/*
1681 			 * skip unused callers
1682 			 */
1683 			while (count > 0 && callers[count - 1] ==
1684 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
1685 				count--;
1686 		}
1687 	}
1688 
1689 done:
1690 	if (baddr == 0)
1691 		mdb_printf("%p is %p+%p, %s from %s\n",
1692 		    w->w_addr, addr, w->w_addr - addr,
1693 		    w->w_freemem == FALSE ? "allocated" : "freed",
1694 		    w->w_cache->cache_name);
1695 	else
1696 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
1697 		    w->w_addr, addr, w->w_addr - addr, baddr,
1698 		    w->w_freemem == FALSE ? "allocated" : "freed",
1699 		    w->w_cache->cache_name);
1700 
1701 	if (count > 0) {
1702 		mdb_inc_indent(8);
1703 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
1704 		    callers[0], (count != 1)? ", ":"\n");
1705 		for (i = 1; i < count; i++)
1706 			mdb_printf("%a%s", callers[i],
1707 			    (i + 1 < count)? ", ":"\n");
1708 		mdb_dec_indent(8);
1709 	}
1710 }
1711 
1712 /*ARGSUSED*/
1713 static int
1714 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
1715 {
1716 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1717 		return (WALK_NEXT);
1718 
1719 	whatis_print_kmem(addr, 0, w);
1720 	w->w_found++;
1721 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1722 }
1723 
1724 static int
1725 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
1726 {
1727 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
1728 		return (WALK_NEXT);
1729 
1730 	mdb_printf("%p is %p+%p ", w->w_addr,
1731 	    vs->vs_start, w->w_addr - vs->vs_start);
1732 
1733 	/*
1734 	 * Always provide the vmem_seg pointer if it has a stack trace.
1735 	 */
1736 	if (w->w_bufctl == TRUE ||
1737 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
1738 		mdb_printf("(vmem_seg %p) ", addr);
1739 	}
1740 
1741 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
1742 	    "freed " : "", w->w_vmem->vm_name);
1743 
1744 	w->w_found++;
1745 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1746 }
1747 
1748 static int
1749 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
1750 {
1751 	const char *nm = vmem->vm_name;
1752 	w->w_vmem = vmem;
1753 	w->w_freemem = FALSE;
1754 
1755 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
1756 		return (WALK_NEXT);
1757 
1758 	if (w->w_verbose)
1759 		mdb_printf("Searching vmem arena %s...\n", nm);
1760 
1761 	if (mdb_pwalk("vmem_alloc",
1762 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1763 		mdb_warn("can't walk vmem seg for %p", addr);
1764 		return (WALK_NEXT);
1765 	}
1766 
1767 	if (w->w_found && w->w_all == FALSE)
1768 		return (WALK_DONE);
1769 
1770 	if (w->w_verbose)
1771 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
1772 
1773 	w->w_freemem = TRUE;
1774 
1775 	if (mdb_pwalk("vmem_free",
1776 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1777 		mdb_warn("can't walk vmem seg for %p", addr);
1778 		return (WALK_NEXT);
1779 	}
1780 
1781 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1782 }
1783 
1784 /*ARGSUSED*/
1785 static int
1786 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
1787 {
1788 	uintptr_t addr;
1789 
1790 	if (bcp == NULL)
1791 		return (WALK_NEXT);
1792 
1793 	addr = (uintptr_t)bcp->bc_addr;
1794 
1795 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1796 		return (WALK_NEXT);
1797 
1798 	whatis_print_kmem(addr, baddr, w);
1799 	w->w_found++;
1800 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1801 }
1802 
1803 /*ARGSUSED*/
1804 static int
1805 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
1806 {
1807 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
1808 
1809 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
1810 		return (WALK_NEXT);
1811 
1812 	w->w_slab_found++;
1813 	return (WALK_DONE);
1814 }
1815 
1816 static int
1817 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
1818 {
1819 	char *walk, *freewalk;
1820 	mdb_walk_cb_t func;
1821 	vmem_t *vmp = c->cache_arena;
1822 
1823 	if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
1824 		return (WALK_NEXT);
1825 
1826 	if (w->w_bufctl == FALSE) {
1827 		walk = "kmem";
1828 		freewalk = "freemem";
1829 		func = (mdb_walk_cb_t)whatis_walk_kmem;
1830 	} else {
1831 		walk = "bufctl";
1832 		freewalk = "freectl";
1833 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
1834 	}
1835 
1836 	w->w_cache = c;
1837 
1838 	if (w->w_verbose)
1839 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
1840 
1841 	/*
1842 	 * Verify that the address is in one of the cache's slabs.  If not,
1843 	 * we can skip the more expensive walkers.  (this is purely a
1844 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
1845 	 *
1846 	 * We try to get the cache's arena's quantum, since to accurately
1847 	 * get the base of a slab, you have to align it to the quantum.  If
1848 	 * it doesn't look sensible, we fall back to not aligning.
1849 	 */
1850 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
1851 	    (uintptr_t)&vmp->vm_quantum) == -1) {
1852 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
1853 		w->w_slab_align = 1;
1854 	}
1855 
1856 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
1857 	    (w->w_slab_align & (w->w_slab_align - 1))) {
1858 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
1859 		    w->w_slab_align);
1860 		w->w_slab_align = 1;
1861 	}
1862 
1863 	w->w_slab_found = 0;
1864 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
1865 	    addr) == -1) {
1866 		mdb_warn("can't find kmem_slab walker");
1867 		return (WALK_DONE);
1868 	}
1869 	if (w->w_slab_found == 0)
1870 		return (WALK_NEXT);
1871 
1872 	if (c->cache_flags & KMF_LITE) {
1873 		if (mdb_readvar(&w->w_kmem_lite_count,
1874 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
1875 			w->w_kmem_lite_count = 0;
1876 	}
1877 
1878 	if (w->w_verbose)
1879 		mdb_printf("Searching %s...\n", c->cache_name);
1880 
1881 	w->w_freemem = FALSE;
1882 
1883 	if (mdb_pwalk(walk, func, w, addr) == -1) {
1884 		mdb_warn("can't find %s walker", walk);
1885 		return (WALK_DONE);
1886 	}
1887 
1888 	if (w->w_found && w->w_all == FALSE)
1889 		return (WALK_DONE);
1890 
1891 	/*
1892 	 * We have searched for allocated memory; now search for freed memory.
1893 	 */
1894 	if (w->w_verbose)
1895 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
1896 
1897 	w->w_freemem = TRUE;
1898 
1899 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
1900 		mdb_warn("can't find %s walker", freewalk);
1901 		return (WALK_DONE);
1902 	}
1903 
1904 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1905 }
1906 
1907 static int
1908 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
1909 {
1910 	if (c->cache_cflags & KMC_NOTOUCH)
1911 		return (WALK_NEXT);
1912 
1913 	return (whatis_walk_cache(addr, c, w));
1914 }
1915 
1916 static int
1917 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
1918 {
1919 	if (!(c->cache_cflags & KMC_NOTOUCH))
1920 		return (WALK_NEXT);
1921 
1922 	return (whatis_walk_cache(addr, c, w));
1923 }
1924 
1925 static int
1926 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
1927 {
1928 	/*
1929 	 * Often, one calls ::whatis on an address from a thread structure.
1930 	 * We use this opportunity to short circuit this case...
1931 	 */
1932 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
1933 		mdb_printf("%p is %p+%p, allocated as a thread structure\n",
1934 		    w->w_addr, addr, w->w_addr - addr);
1935 		w->w_found++;
1936 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1937 	}
1938 
1939 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
1940 	    w->w_addr > (uintptr_t)t->t_stk)
1941 		return (WALK_NEXT);
1942 
1943 	if (t->t_stkbase == NULL)
1944 		return (WALK_NEXT);
1945 
1946 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
1947 	    stack_active(t, w->w_addr));
1948 
1949 	w->w_found++;
1950 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1951 }
1952 
1953 static int
1954 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
1955 {
1956 	struct module mod;
1957 	char name[MODMAXNAMELEN], *where;
1958 	char c[MDB_SYM_NAMLEN];
1959 	Shdr shdr;
1960 	GElf_Sym sym;
1961 
1962 	if (m->mod_mp == NULL)
1963 		return (WALK_NEXT);
1964 
1965 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
1966 		mdb_warn("couldn't read modctl %p's module", addr);
1967 		return (WALK_NEXT);
1968 	}
1969 
1970 	if (w->w_addr >= (uintptr_t)mod.text &&
1971 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
1972 		where = "text segment";
1973 		goto found;
1974 	}
1975 
1976 	if (w->w_addr >= (uintptr_t)mod.data &&
1977 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
1978 		where = "data segment";
1979 		goto found;
1980 	}
1981 
1982 	if (w->w_addr >= (uintptr_t)mod.bss &&
1983 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
1984 		where = "bss";
1985 		goto found;
1986 	}
1987 
1988 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
1989 		mdb_warn("couldn't read symbol header for %p's module", addr);
1990 		return (WALK_NEXT);
1991 	}
1992 
1993 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
1994 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
1995 		where = "symtab";
1996 		goto found;
1997 	}
1998 
1999 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2000 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2001 		where = "symspace";
2002 		goto found;
2003 	}
2004 
2005 	return (WALK_NEXT);
2006 
2007 found:
2008 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2009 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2010 
2011 	mdb_printf("%p is ", w->w_addr);
2012 
2013 	/*
2014 	 * If we found this address in a module, then there's a chance that
2015 	 * it's actually a named symbol.  Try the symbol lookup.
2016 	 */
2017 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c),
2018 	    &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value &&
2019 	    w->w_addr < (uintptr_t)sym.st_value + sym.st_size) {
2020 		mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value);
2021 	}
2022 
2023 	mdb_printf("in %s's %s\n", name, where);
2024 
2025 	w->w_found++;
2026 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2027 }
2028 
2029 /*ARGSUSED*/
2030 static int
2031 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2032 {
2033 	static int machsize = 0;
2034 	mdb_ctf_id_t id;
2035 
2036 	if (machsize == 0) {
2037 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2038 			machsize = mdb_ctf_type_size(id);
2039 		else {
2040 			mdb_warn("could not get size of page_t");
2041 			machsize = sizeof (page_t);
2042 		}
2043 	}
2044 
2045 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2046 		return (WALK_NEXT);
2047 
2048 	mdb_printf("%p is %p+%p, allocated as a page structure\n",
2049 	    w->w_addr, addr, w->w_addr - addr);
2050 
2051 	w->w_found++;
2052 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2053 }
2054 
2055 int
2056 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2057 {
2058 	whatis_t w;
2059 
2060 	if (!(flags & DCMD_ADDRSPEC))
2061 		return (DCMD_USAGE);
2062 
2063 	w.w_verbose = FALSE;
2064 	w.w_bufctl = FALSE;
2065 	w.w_all = FALSE;
2066 	w.w_idspace = FALSE;
2067 
2068 	if (mdb_getopts(argc, argv,
2069 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2070 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2071 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2072 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2073 		return (DCMD_USAGE);
2074 
2075 	w.w_addr = addr;
2076 	w.w_found = 0;
2077 
2078 	if (w.w_verbose)
2079 		mdb_printf("Searching modules...\n");
2080 
2081 	if (!w.w_idspace) {
2082 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2083 		    == -1) {
2084 			mdb_warn("couldn't find modctl walker");
2085 			return (DCMD_ERR);
2086 		}
2087 
2088 		if (w.w_found && w.w_all == FALSE)
2089 			return (DCMD_OK);
2090 
2091 		/*
2092 		 * Now search all thread stacks.  Yes, this is a little weak; we
2093 		 * can save a lot of work by first checking to see if the
2094 		 * address is in segkp vs. segkmem.  But hey, computers are
2095 		 * fast.
2096 		 */
2097 		if (w.w_verbose)
2098 			mdb_printf("Searching threads...\n");
2099 
2100 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2101 		    == -1) {
2102 			mdb_warn("couldn't find thread walker");
2103 			return (DCMD_ERR);
2104 		}
2105 
2106 		if (w.w_found && w.w_all == FALSE)
2107 			return (DCMD_OK);
2108 
2109 		if (w.w_verbose)
2110 			mdb_printf("Searching page structures...\n");
2111 
2112 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2113 		    == -1) {
2114 			mdb_warn("couldn't find page walker");
2115 			return (DCMD_ERR);
2116 		}
2117 
2118 		if (w.w_found && w.w_all == FALSE)
2119 			return (DCMD_OK);
2120 	}
2121 
2122 	if (mdb_walk("kmem_cache",
2123 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2124 		mdb_warn("couldn't find kmem_cache walker");
2125 		return (DCMD_ERR);
2126 	}
2127 
2128 	if (w.w_found && w.w_all == FALSE)
2129 		return (DCMD_OK);
2130 
2131 	if (mdb_walk("kmem_cache",
2132 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2133 		mdb_warn("couldn't find kmem_cache walker");
2134 		return (DCMD_ERR);
2135 	}
2136 
2137 	if (w.w_found && w.w_all == FALSE)
2138 		return (DCMD_OK);
2139 
2140 	if (mdb_walk("vmem_postfix",
2141 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2142 		mdb_warn("couldn't find vmem_postfix walker");
2143 		return (DCMD_ERR);
2144 	}
2145 
2146 	if (w.w_found == 0)
2147 		mdb_printf("%p is unknown\n", addr);
2148 
2149 	return (DCMD_OK);
2150 }
2151 
2152 void
2153 whatis_help(void)
2154 {
2155 	mdb_printf(
2156 	    "Given a virtual address, attempt to determine where it came\n"
2157 	    "from.\n"
2158 	    "\n"
2159 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2160 	    "\t\tsearched\n"
2161 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2162 	    "\t\tthe first (most specific) source.\n"
2163 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2164 	    "\t\tthese are ignored.\n"
2165 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2166 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2167 	    "\t\thave a bufctl, it will not be reported.\n");
2168 }
2169 
2170 typedef struct kmem_log_cpu {
2171 	uintptr_t kmc_low;
2172 	uintptr_t kmc_high;
2173 } kmem_log_cpu_t;
2174 
2175 typedef struct kmem_log_data {
2176 	uintptr_t kmd_addr;
2177 	kmem_log_cpu_t *kmd_cpu;
2178 } kmem_log_data_t;
2179 
2180 int
2181 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2182     kmem_log_data_t *kmd)
2183 {
2184 	int i;
2185 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2186 	size_t bufsize;
2187 
2188 	for (i = 0; i < NCPU; i++) {
2189 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2190 			break;
2191 	}
2192 
2193 	if (kmd->kmd_addr) {
2194 		if (b->bc_cache == NULL)
2195 			return (WALK_NEXT);
2196 
2197 		if (mdb_vread(&bufsize, sizeof (bufsize),
2198 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2199 			mdb_warn(
2200 			    "failed to read cache_bufsize for cache at %p",
2201 			    b->bc_cache);
2202 			return (WALK_ERR);
2203 		}
2204 
2205 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2206 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2207 			return (WALK_NEXT);
2208 	}
2209 
2210 	if (i == NCPU)
2211 		mdb_printf("   ");
2212 	else
2213 		mdb_printf("%3d", i);
2214 
2215 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2216 	    b->bc_timestamp, b->bc_thread);
2217 
2218 	return (WALK_NEXT);
2219 }
2220 
2221 /*ARGSUSED*/
2222 int
2223 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2224 {
2225 	kmem_log_header_t lh;
2226 	kmem_cpu_log_header_t clh;
2227 	uintptr_t lhp, clhp;
2228 	int ncpus;
2229 	uintptr_t *cpu;
2230 	GElf_Sym sym;
2231 	kmem_log_cpu_t *kmc;
2232 	int i;
2233 	kmem_log_data_t kmd;
2234 	uint_t opt_b = FALSE;
2235 
2236 	if (mdb_getopts(argc, argv,
2237 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2238 		return (DCMD_USAGE);
2239 
2240 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2241 		mdb_warn("failed to read 'kmem_transaction_log'");
2242 		return (DCMD_ERR);
2243 	}
2244 
2245 	if (lhp == NULL) {
2246 		mdb_warn("no kmem transaction log\n");
2247 		return (DCMD_ERR);
2248 	}
2249 
2250 	mdb_readvar(&ncpus, "ncpus");
2251 
2252 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2253 		mdb_warn("failed to read log header at %p", lhp);
2254 		return (DCMD_ERR);
2255 	}
2256 
2257 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2258 
2259 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2260 
2261 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2262 		mdb_warn("couldn't find 'cpu' array");
2263 		return (DCMD_ERR);
2264 	}
2265 
2266 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2267 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2268 		    NCPU * sizeof (uintptr_t), sym.st_size);
2269 		return (DCMD_ERR);
2270 	}
2271 
2272 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2273 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2274 		return (DCMD_ERR);
2275 	}
2276 
2277 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2278 	kmd.kmd_addr = NULL;
2279 	kmd.kmd_cpu = kmc;
2280 
2281 	for (i = 0; i < NCPU; i++) {
2282 
2283 		if (cpu[i] == NULL)
2284 			continue;
2285 
2286 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2287 			mdb_warn("cannot read cpu %d's log header at %p",
2288 			    i, clhp);
2289 			return (DCMD_ERR);
2290 		}
2291 
2292 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2293 		    (uintptr_t)lh.lh_base;
2294 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2295 
2296 		clhp += sizeof (kmem_cpu_log_header_t);
2297 	}
2298 
2299 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2300 	    "TIMESTAMP", "THREAD");
2301 
2302 	/*
2303 	 * If we have been passed an address, print out only log entries
2304 	 * corresponding to that address.  If opt_b is specified, then interpret
2305 	 * the address as a bufctl.
2306 	 */
2307 	if (flags & DCMD_ADDRSPEC) {
2308 		kmem_bufctl_audit_t b;
2309 
2310 		if (opt_b) {
2311 			kmd.kmd_addr = addr;
2312 		} else {
2313 			if (mdb_vread(&b,
2314 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2315 				mdb_warn("failed to read bufctl at %p", addr);
2316 				return (DCMD_ERR);
2317 			}
2318 
2319 			(void) kmem_log_walk(addr, &b, &kmd);
2320 
2321 			return (DCMD_OK);
2322 		}
2323 	}
2324 
2325 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2326 		mdb_warn("can't find kmem log walker");
2327 		return (DCMD_ERR);
2328 	}
2329 
2330 	return (DCMD_OK);
2331 }
2332 
2333 typedef struct bufctl_history_cb {
2334 	int		bhc_flags;
2335 	int		bhc_argc;
2336 	const mdb_arg_t	*bhc_argv;
2337 	int		bhc_ret;
2338 } bufctl_history_cb_t;
2339 
2340 /*ARGSUSED*/
2341 static int
2342 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2343 {
2344 	bufctl_history_cb_t *bhc = arg;
2345 
2346 	bhc->bhc_ret =
2347 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2348 
2349 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2350 
2351 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2352 }
2353 
2354 void
2355 bufctl_help(void)
2356 {
2357 	mdb_printf("%s\n",
2358 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n");
2359 	mdb_dec_indent(2);
2360 	mdb_printf("%<b>OPTIONS%</b>\n");
2361 	mdb_inc_indent(2);
2362 	mdb_printf("%s",
2363 "  -v    Display the full content of the bufctl, including its stack trace\n"
2364 "  -h    retrieve the bufctl's transaction history, if available\n"
2365 "  -a addr\n"
2366 "        filter out bufctls not involving the buffer at addr\n"
2367 "  -c caller\n"
2368 "        filter out bufctls without the function/PC in their stack trace\n"
2369 "  -e earliest\n"
2370 "        filter out bufctls timestamped before earliest\n"
2371 "  -l latest\n"
2372 "        filter out bufctls timestamped after latest\n"
2373 "  -t thread\n"
2374 "        filter out bufctls not involving thread\n");
2375 }
2376 
2377 int
2378 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2379 {
2380 	kmem_bufctl_audit_t bc;
2381 	uint_t verbose = FALSE;
2382 	uint_t history = FALSE;
2383 	uint_t in_history = FALSE;
2384 	uintptr_t caller = NULL, thread = NULL;
2385 	uintptr_t laddr, haddr, baddr = NULL;
2386 	hrtime_t earliest = 0, latest = 0;
2387 	int i, depth;
2388 	char c[MDB_SYM_NAMLEN];
2389 	GElf_Sym sym;
2390 
2391 	if (mdb_getopts(argc, argv,
2392 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2393 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2394 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2395 	    'c', MDB_OPT_UINTPTR, &caller,
2396 	    't', MDB_OPT_UINTPTR, &thread,
2397 	    'e', MDB_OPT_UINT64, &earliest,
2398 	    'l', MDB_OPT_UINT64, &latest,
2399 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2400 		return (DCMD_USAGE);
2401 
2402 	if (!(flags & DCMD_ADDRSPEC))
2403 		return (DCMD_USAGE);
2404 
2405 	if (in_history && !history)
2406 		return (DCMD_USAGE);
2407 
2408 	if (history && !in_history) {
2409 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2410 		    UM_SLEEP | UM_GC);
2411 		bufctl_history_cb_t bhc;
2412 
2413 		nargv[0].a_type = MDB_TYPE_STRING;
2414 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2415 
2416 		for (i = 0; i < argc; i++)
2417 			nargv[i + 1] = argv[i];
2418 
2419 		/*
2420 		 * When in history mode, we treat each element as if it
2421 		 * were in a seperate loop, so that the headers group
2422 		 * bufctls with similar histories.
2423 		 */
2424 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2425 		bhc.bhc_argc = argc + 1;
2426 		bhc.bhc_argv = nargv;
2427 		bhc.bhc_ret = DCMD_OK;
2428 
2429 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2430 		    addr) == -1) {
2431 			mdb_warn("unable to walk bufctl_history");
2432 			return (DCMD_ERR);
2433 		}
2434 
2435 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2436 			mdb_printf("\n");
2437 
2438 		return (bhc.bhc_ret);
2439 	}
2440 
2441 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2442 		if (verbose) {
2443 			mdb_printf("%16s %16s %16s %16s\n"
2444 			    "%<u>%16s %16s %16s %16s%</u>\n",
2445 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2446 			    "", "CACHE", "LASTLOG", "CONTENTS");
2447 		} else {
2448 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2449 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2450 		}
2451 	}
2452 
2453 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2454 		mdb_warn("couldn't read bufctl at %p", addr);
2455 		return (DCMD_ERR);
2456 	}
2457 
2458 	/*
2459 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2460 	 * the address does not really refer to a bufctl.
2461 	 */
2462 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2463 
2464 	if (caller != NULL) {
2465 		laddr = caller;
2466 		haddr = caller + sizeof (caller);
2467 
2468 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2469 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2470 			/*
2471 			 * We were provided an exact symbol value; any
2472 			 * address in the function is valid.
2473 			 */
2474 			laddr = (uintptr_t)sym.st_value;
2475 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2476 		}
2477 
2478 		for (i = 0; i < depth; i++)
2479 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2480 				break;
2481 
2482 		if (i == depth)
2483 			return (DCMD_OK);
2484 	}
2485 
2486 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2487 		return (DCMD_OK);
2488 
2489 	if (earliest != 0 && bc.bc_timestamp < earliest)
2490 		return (DCMD_OK);
2491 
2492 	if (latest != 0 && bc.bc_timestamp > latest)
2493 		return (DCMD_OK);
2494 
2495 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2496 		return (DCMD_OK);
2497 
2498 	if (flags & DCMD_PIPE_OUT) {
2499 		mdb_printf("%#lr\n", addr);
2500 		return (DCMD_OK);
2501 	}
2502 
2503 	if (verbose) {
2504 		mdb_printf(
2505 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2506 		    "%16s %16p %16p %16p\n",
2507 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2508 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2509 
2510 		mdb_inc_indent(17);
2511 		for (i = 0; i < depth; i++)
2512 			mdb_printf("%a\n", bc.bc_stack[i]);
2513 		mdb_dec_indent(17);
2514 		mdb_printf("\n");
2515 	} else {
2516 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2517 		    bc.bc_timestamp, bc.bc_thread);
2518 
2519 		for (i = 0; i < depth; i++) {
2520 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2521 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2522 				continue;
2523 			if (strncmp(c, "kmem_", 5) == 0)
2524 				continue;
2525 			mdb_printf(" %a\n", bc.bc_stack[i]);
2526 			break;
2527 		}
2528 
2529 		if (i >= depth)
2530 			mdb_printf("\n");
2531 	}
2532 
2533 	return (DCMD_OK);
2534 }
2535 
2536 typedef struct kmem_verify {
2537 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
2538 	size_t kmv_size;		/* number of bytes in kmv_buf */
2539 	int kmv_corruption;		/* > 0 if corruption found. */
2540 	int kmv_besilent;		/* report actual corruption sites */
2541 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
2542 } kmem_verify_t;
2543 
2544 /*
2545  * verify_pattern()
2546  * 	verify that buf is filled with the pattern pat.
2547  */
2548 static int64_t
2549 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2550 {
2551 	/*LINTED*/
2552 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2553 	uint64_t *buf;
2554 
2555 	for (buf = buf_arg; buf < bufend; buf++)
2556 		if (*buf != pat)
2557 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2558 	return (-1);
2559 }
2560 
2561 /*
2562  * verify_buftag()
2563  *	verify that btp->bt_bxstat == (bcp ^ pat)
2564  */
2565 static int
2566 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
2567 {
2568 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2569 }
2570 
2571 /*
2572  * verify_free()
2573  * 	verify the integrity of a free block of memory by checking
2574  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
2575  */
2576 /*ARGSUSED1*/
2577 static int
2578 verify_free(uintptr_t addr, const void *data, void *private)
2579 {
2580 	kmem_verify_t *kmv = (kmem_verify_t *)private;
2581 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
2582 	int64_t corrupt;		/* corruption offset */
2583 	kmem_buftag_t *buftagp;		/* ptr to buftag */
2584 	kmem_cache_t *cp = &kmv->kmv_cache;
2585 	int besilent = kmv->kmv_besilent;
2586 
2587 	/*LINTED*/
2588 	buftagp = KMEM_BUFTAG(cp, buf);
2589 
2590 	/*
2591 	 * Read the buffer to check.
2592 	 */
2593 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
2594 		if (!besilent)
2595 			mdb_warn("couldn't read %p", addr);
2596 		return (WALK_NEXT);
2597 	}
2598 
2599 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2600 	    KMEM_FREE_PATTERN)) >= 0) {
2601 		if (!besilent)
2602 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2603 			    addr, (uintptr_t)addr + corrupt);
2604 		goto corrupt;
2605 	}
2606 	/*
2607 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
2608 	 * the first bytes of the buffer, hence we cannot check for red
2609 	 * zone corruption.
2610 	 */
2611 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
2612 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
2613 		if (!besilent)
2614 			mdb_printf("buffer %p (free) seems to "
2615 			    "have a corrupt redzone pattern\n", addr);
2616 		goto corrupt;
2617 	}
2618 
2619 	/*
2620 	 * confirm bufctl pointer integrity.
2621 	 */
2622 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
2623 		if (!besilent)
2624 			mdb_printf("buffer %p (free) has a corrupt "
2625 			    "buftag\n", addr);
2626 		goto corrupt;
2627 	}
2628 
2629 	return (WALK_NEXT);
2630 corrupt:
2631 	kmv->kmv_corruption++;
2632 	return (WALK_NEXT);
2633 }
2634 
2635 /*
2636  * verify_alloc()
2637  * 	Verify that the buftag of an allocated buffer makes sense with respect
2638  * 	to the buffer.
2639  */
2640 /*ARGSUSED1*/
2641 static int
2642 verify_alloc(uintptr_t addr, const void *data, void *private)
2643 {
2644 	kmem_verify_t *kmv = (kmem_verify_t *)private;
2645 	kmem_cache_t *cp = &kmv->kmv_cache;
2646 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
2647 	/*LINTED*/
2648 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
2649 	uint32_t *ip = (uint32_t *)buftagp;
2650 	uint8_t *bp = (uint8_t *)buf;
2651 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2652 	int besilent = kmv->kmv_besilent;
2653 
2654 	/*
2655 	 * Read the buffer to check.
2656 	 */
2657 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
2658 		if (!besilent)
2659 			mdb_warn("couldn't read %p", addr);
2660 		return (WALK_NEXT);
2661 	}
2662 
2663 	/*
2664 	 * There are two cases to handle:
2665 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
2666 	 *    0xfeedfacefeedface at the end of it
2667 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
2668 	 *    0xbb just past the end of the region in use.  At the buftag,
2669 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2670 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2671 	 *    endianness), followed by 32 bits containing the offset of the
2672 	 *    0xbb byte in the buffer.
2673 	 *
2674 	 * Finally, the two 32-bit words that comprise the second half of the
2675 	 * buftag should xor to KMEM_BUFTAG_ALLOC
2676 	 */
2677 
2678 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
2679 		looks_ok = 1;
2680 	else if (!KMEM_SIZE_VALID(ip[1]))
2681 		size_ok = 0;
2682 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
2683 		looks_ok = 1;
2684 	else
2685 		size_ok = 0;
2686 
2687 	if (!size_ok) {
2688 		if (!besilent)
2689 			mdb_printf("buffer %p (allocated) has a corrupt "
2690 			    "redzone size encoding\n", addr);
2691 		goto corrupt;
2692 	}
2693 
2694 	if (!looks_ok) {
2695 		if (!besilent)
2696 			mdb_printf("buffer %p (allocated) has a corrupt "
2697 			    "redzone signature\n", addr);
2698 		goto corrupt;
2699 	}
2700 
2701 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
2702 		if (!besilent)
2703 			mdb_printf("buffer %p (allocated) has a "
2704 			    "corrupt buftag\n", addr);
2705 		goto corrupt;
2706 	}
2707 
2708 	return (WALK_NEXT);
2709 corrupt:
2710 	kmv->kmv_corruption++;
2711 	return (WALK_NEXT);
2712 }
2713 
2714 /*ARGSUSED2*/
2715 int
2716 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2717 {
2718 	if (flags & DCMD_ADDRSPEC) {
2719 		int check_alloc = 0, check_free = 0;
2720 		kmem_verify_t kmv;
2721 
2722 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
2723 		    addr) == -1) {
2724 			mdb_warn("couldn't read kmem_cache %p", addr);
2725 			return (DCMD_ERR);
2726 		}
2727 
2728 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
2729 		    sizeof (kmem_buftag_t);
2730 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
2731 		kmv.kmv_corruption = 0;
2732 
2733 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
2734 			check_alloc = 1;
2735 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
2736 				check_free = 1;
2737 		} else {
2738 			if (!(flags & DCMD_LOOP)) {
2739 				mdb_warn("cache %p (%s) does not have "
2740 				    "redzone checking enabled\n", addr,
2741 				    kmv.kmv_cache.cache_name);
2742 			}
2743 			return (DCMD_ERR);
2744 		}
2745 
2746 		if (flags & DCMD_LOOP) {
2747 			/*
2748 			 * table mode, don't print out every corrupt buffer
2749 			 */
2750 			kmv.kmv_besilent = 1;
2751 		} else {
2752 			mdb_printf("Summary for cache '%s'\n",
2753 			    kmv.kmv_cache.cache_name);
2754 			mdb_inc_indent(2);
2755 			kmv.kmv_besilent = 0;
2756 		}
2757 
2758 		if (check_alloc)
2759 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
2760 		if (check_free)
2761 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
2762 
2763 		if (flags & DCMD_LOOP) {
2764 			if (kmv.kmv_corruption == 0) {
2765 				mdb_printf("%-*s %?p clean\n",
2766 				    KMEM_CACHE_NAMELEN,
2767 				    kmv.kmv_cache.cache_name, addr);
2768 			} else {
2769 				char *s = "";	/* optional s in "buffer[s]" */
2770 				if (kmv.kmv_corruption > 1)
2771 					s = "s";
2772 
2773 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2774 				    KMEM_CACHE_NAMELEN,
2775 				    kmv.kmv_cache.cache_name, addr,
2776 				    kmv.kmv_corruption, s);
2777 			}
2778 		} else {
2779 			/*
2780 			 * This is the more verbose mode, when the user has
2781 			 * type addr::kmem_verify.  If the cache was clean,
2782 			 * nothing will have yet been printed. So say something.
2783 			 */
2784 			if (kmv.kmv_corruption == 0)
2785 				mdb_printf("clean\n");
2786 
2787 			mdb_dec_indent(2);
2788 		}
2789 	} else {
2790 		/*
2791 		 * If the user didn't specify a cache to verify, we'll walk all
2792 		 * kmem_cache's, specifying ourself as a callback for each...
2793 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
2794 		 */
2795 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
2796 		    "Cache Name", "Addr", "Cache Integrity");
2797 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
2798 	}
2799 
2800 	return (DCMD_OK);
2801 }
2802 
2803 typedef struct vmem_node {
2804 	struct vmem_node *vn_next;
2805 	struct vmem_node *vn_parent;
2806 	struct vmem_node *vn_sibling;
2807 	struct vmem_node *vn_children;
2808 	uintptr_t vn_addr;
2809 	int vn_marked;
2810 	vmem_t vn_vmem;
2811 } vmem_node_t;
2812 
2813 typedef struct vmem_walk {
2814 	vmem_node_t *vw_root;
2815 	vmem_node_t *vw_current;
2816 } vmem_walk_t;
2817 
2818 int
2819 vmem_walk_init(mdb_walk_state_t *wsp)
2820 {
2821 	uintptr_t vaddr, paddr;
2822 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2823 	vmem_walk_t *vw;
2824 
2825 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
2826 		mdb_warn("couldn't read 'vmem_list'");
2827 		return (WALK_ERR);
2828 	}
2829 
2830 	while (vaddr != NULL) {
2831 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2832 		vp->vn_addr = vaddr;
2833 		vp->vn_next = head;
2834 		head = vp;
2835 
2836 		if (vaddr == wsp->walk_addr)
2837 			current = vp;
2838 
2839 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2840 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2841 			goto err;
2842 		}
2843 
2844 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2845 	}
2846 
2847 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2848 
2849 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2850 			vp->vn_sibling = root;
2851 			root = vp;
2852 			continue;
2853 		}
2854 
2855 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2856 			if (parent->vn_addr != paddr)
2857 				continue;
2858 			vp->vn_sibling = parent->vn_children;
2859 			parent->vn_children = vp;
2860 			vp->vn_parent = parent;
2861 			break;
2862 		}
2863 
2864 		if (parent == NULL) {
2865 			mdb_warn("couldn't find %p's parent (%p)\n",
2866 			    vp->vn_addr, paddr);
2867 			goto err;
2868 		}
2869 	}
2870 
2871 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2872 	vw->vw_root = root;
2873 
2874 	if (current != NULL)
2875 		vw->vw_current = current;
2876 	else
2877 		vw->vw_current = root;
2878 
2879 	wsp->walk_data = vw;
2880 	return (WALK_NEXT);
2881 err:
2882 	for (vp = head; head != NULL; vp = head) {
2883 		head = vp->vn_next;
2884 		mdb_free(vp, sizeof (vmem_node_t));
2885 	}
2886 
2887 	return (WALK_ERR);
2888 }
2889 
2890 int
2891 vmem_walk_step(mdb_walk_state_t *wsp)
2892 {
2893 	vmem_walk_t *vw = wsp->walk_data;
2894 	vmem_node_t *vp;
2895 	int rval;
2896 
2897 	if ((vp = vw->vw_current) == NULL)
2898 		return (WALK_DONE);
2899 
2900 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2901 
2902 	if (vp->vn_children != NULL) {
2903 		vw->vw_current = vp->vn_children;
2904 		return (rval);
2905 	}
2906 
2907 	do {
2908 		vw->vw_current = vp->vn_sibling;
2909 		vp = vp->vn_parent;
2910 	} while (vw->vw_current == NULL && vp != NULL);
2911 
2912 	return (rval);
2913 }
2914 
2915 /*
2916  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2917  * children are visited before their parent.  We perform the postfix walk
2918  * iteratively (rather than recursively) to allow mdb to regain control
2919  * after each callback.
2920  */
2921 int
2922 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2923 {
2924 	vmem_walk_t *vw = wsp->walk_data;
2925 	vmem_node_t *vp = vw->vw_current;
2926 	int rval;
2927 
2928 	/*
2929 	 * If this node is marked, then we know that we have already visited
2930 	 * all of its children.  If the node has any siblings, they need to
2931 	 * be visited next; otherwise, we need to visit the parent.  Note
2932 	 * that vp->vn_marked will only be zero on the first invocation of
2933 	 * the step function.
2934 	 */
2935 	if (vp->vn_marked) {
2936 		if (vp->vn_sibling != NULL)
2937 			vp = vp->vn_sibling;
2938 		else if (vp->vn_parent != NULL)
2939 			vp = vp->vn_parent;
2940 		else {
2941 			/*
2942 			 * We have neither a parent, nor a sibling, and we
2943 			 * have already been visited; we're done.
2944 			 */
2945 			return (WALK_DONE);
2946 		}
2947 	}
2948 
2949 	/*
2950 	 * Before we visit this node, visit its children.
2951 	 */
2952 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2953 		vp = vp->vn_children;
2954 
2955 	vp->vn_marked = 1;
2956 	vw->vw_current = vp;
2957 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2958 
2959 	return (rval);
2960 }
2961 
2962 void
2963 vmem_walk_fini(mdb_walk_state_t *wsp)
2964 {
2965 	vmem_walk_t *vw = wsp->walk_data;
2966 	vmem_node_t *root = vw->vw_root;
2967 	int done;
2968 
2969 	if (root == NULL)
2970 		return;
2971 
2972 	if ((vw->vw_root = root->vn_children) != NULL)
2973 		vmem_walk_fini(wsp);
2974 
2975 	vw->vw_root = root->vn_sibling;
2976 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2977 	mdb_free(root, sizeof (vmem_node_t));
2978 
2979 	if (done) {
2980 		mdb_free(vw, sizeof (vmem_walk_t));
2981 	} else {
2982 		vmem_walk_fini(wsp);
2983 	}
2984 }
2985 
2986 typedef struct vmem_seg_walk {
2987 	uint8_t vsw_type;
2988 	uintptr_t vsw_start;
2989 	uintptr_t vsw_current;
2990 } vmem_seg_walk_t;
2991 
2992 /*ARGSUSED*/
2993 int
2994 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2995 {
2996 	vmem_seg_walk_t *vsw;
2997 
2998 	if (wsp->walk_addr == NULL) {
2999 		mdb_warn("vmem_%s does not support global walks\n", name);
3000 		return (WALK_ERR);
3001 	}
3002 
3003 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3004 
3005 	vsw->vsw_type = type;
3006 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3007 	vsw->vsw_current = vsw->vsw_start;
3008 
3009 	return (WALK_NEXT);
3010 }
3011 
3012 /*
3013  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3014  */
3015 #define	VMEM_NONE	0
3016 
3017 int
3018 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3019 {
3020 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3021 }
3022 
3023 int
3024 vmem_free_walk_init(mdb_walk_state_t *wsp)
3025 {
3026 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3027 }
3028 
3029 int
3030 vmem_span_walk_init(mdb_walk_state_t *wsp)
3031 {
3032 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3033 }
3034 
3035 int
3036 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3037 {
3038 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3039 }
3040 
3041 int
3042 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3043 {
3044 	vmem_seg_t seg;
3045 	vmem_seg_walk_t *vsw = wsp->walk_data;
3046 	uintptr_t addr = vsw->vsw_current;
3047 	static size_t seg_size = 0;
3048 	int rval;
3049 
3050 	if (!seg_size) {
3051 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3052 			mdb_warn("failed to read 'vmem_seg_size'");
3053 			seg_size = sizeof (vmem_seg_t);
3054 		}
3055 	}
3056 
3057 	if (seg_size < sizeof (seg))
3058 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3059 
3060 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3061 		mdb_warn("couldn't read vmem_seg at %p", addr);
3062 		return (WALK_ERR);
3063 	}
3064 
3065 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3066 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3067 		rval = WALK_NEXT;
3068 	} else {
3069 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3070 	}
3071 
3072 	if (vsw->vsw_current == vsw->vsw_start)
3073 		return (WALK_DONE);
3074 
3075 	return (rval);
3076 }
3077 
3078 void
3079 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3080 {
3081 	vmem_seg_walk_t *vsw = wsp->walk_data;
3082 
3083 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3084 }
3085 
3086 #define	VMEM_NAMEWIDTH	22
3087 
3088 int
3089 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3090 {
3091 	vmem_t v, parent;
3092 	vmem_kstat_t *vkp = &v.vm_kstat;
3093 	uintptr_t paddr;
3094 	int ident = 0;
3095 	char c[VMEM_NAMEWIDTH];
3096 
3097 	if (!(flags & DCMD_ADDRSPEC)) {
3098 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3099 			mdb_warn("can't walk vmem");
3100 			return (DCMD_ERR);
3101 		}
3102 		return (DCMD_OK);
3103 	}
3104 
3105 	if (DCMD_HDRSPEC(flags))
3106 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3107 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3108 		    "TOTAL", "SUCCEED", "FAIL");
3109 
3110 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3111 		mdb_warn("couldn't read vmem at %p", addr);
3112 		return (DCMD_ERR);
3113 	}
3114 
3115 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3116 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3117 			mdb_warn("couldn't trace %p's ancestry", addr);
3118 			ident = 0;
3119 			break;
3120 		}
3121 		paddr = (uintptr_t)parent.vm_source;
3122 	}
3123 
3124 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3125 
3126 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3127 	    addr, VMEM_NAMEWIDTH, c,
3128 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3129 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3130 
3131 	return (DCMD_OK);
3132 }
3133 
3134 void
3135 vmem_seg_help(void)
3136 {
3137 	mdb_printf("%s\n",
3138 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3139 "\n"
3140 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3141 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3142 "information.\n");
3143 	mdb_dec_indent(2);
3144 	mdb_printf("%<b>OPTIONS%</b>\n");
3145 	mdb_inc_indent(2);
3146 	mdb_printf("%s",
3147 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3148 "  -s    report the size of the segment, instead of the end address\n"
3149 "  -c caller\n"
3150 "        filter out segments without the function/PC in their stack trace\n"
3151 "  -e earliest\n"
3152 "        filter out segments timestamped before earliest\n"
3153 "  -l latest\n"
3154 "        filter out segments timestamped after latest\n"
3155 "  -m minsize\n"
3156 "        filer out segments smaller than minsize\n"
3157 "  -M maxsize\n"
3158 "        filer out segments larger than maxsize\n"
3159 "  -t thread\n"
3160 "        filter out segments not involving thread\n"
3161 "  -T type\n"
3162 "        filter out segments not of type 'type'\n"
3163 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3164 }
3165 
3166 /*ARGSUSED*/
3167 int
3168 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3169 {
3170 	vmem_seg_t vs;
3171 	pc_t *stk = vs.vs_stack;
3172 	uintptr_t sz;
3173 	uint8_t t;
3174 	const char *type = NULL;
3175 	GElf_Sym sym;
3176 	char c[MDB_SYM_NAMLEN];
3177 	int no_debug;
3178 	int i;
3179 	int depth;
3180 	uintptr_t laddr, haddr;
3181 
3182 	uintptr_t caller = NULL, thread = NULL;
3183 	uintptr_t minsize = 0, maxsize = 0;
3184 
3185 	hrtime_t earliest = 0, latest = 0;
3186 
3187 	uint_t size = 0;
3188 	uint_t verbose = 0;
3189 
3190 	if (!(flags & DCMD_ADDRSPEC))
3191 		return (DCMD_USAGE);
3192 
3193 	if (mdb_getopts(argc, argv,
3194 	    'c', MDB_OPT_UINTPTR, &caller,
3195 	    'e', MDB_OPT_UINT64, &earliest,
3196 	    'l', MDB_OPT_UINT64, &latest,
3197 	    's', MDB_OPT_SETBITS, TRUE, &size,
3198 	    'm', MDB_OPT_UINTPTR, &minsize,
3199 	    'M', MDB_OPT_UINTPTR, &maxsize,
3200 	    't', MDB_OPT_UINTPTR, &thread,
3201 	    'T', MDB_OPT_STR, &type,
3202 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3203 	    NULL) != argc)
3204 		return (DCMD_USAGE);
3205 
3206 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3207 		if (verbose) {
3208 			mdb_printf("%16s %4s %16s %16s %16s\n"
3209 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3210 			    "ADDR", "TYPE", "START", "END", "SIZE",
3211 			    "", "", "THREAD", "TIMESTAMP", "");
3212 		} else {
3213 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3214 			    "START", size? "SIZE" : "END", "WHO");
3215 		}
3216 	}
3217 
3218 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3219 		mdb_warn("couldn't read vmem_seg at %p", addr);
3220 		return (DCMD_ERR);
3221 	}
3222 
3223 	if (type != NULL) {
3224 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3225 			t = VMEM_ALLOC;
3226 		else if (strcmp(type, "FREE") == 0)
3227 			t = VMEM_FREE;
3228 		else if (strcmp(type, "SPAN") == 0)
3229 			t = VMEM_SPAN;
3230 		else if (strcmp(type, "ROTR") == 0 ||
3231 		    strcmp(type, "ROTOR") == 0)
3232 			t = VMEM_ROTOR;
3233 		else if (strcmp(type, "WLKR") == 0 ||
3234 		    strcmp(type, "WALKER") == 0)
3235 			t = VMEM_WALKER;
3236 		else {
3237 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3238 			    type);
3239 			return (DCMD_ERR);
3240 		}
3241 
3242 		if (vs.vs_type != t)
3243 			return (DCMD_OK);
3244 	}
3245 
3246 	sz = vs.vs_end - vs.vs_start;
3247 
3248 	if (minsize != 0 && sz < minsize)
3249 		return (DCMD_OK);
3250 
3251 	if (maxsize != 0 && sz > maxsize)
3252 		return (DCMD_OK);
3253 
3254 	t = vs.vs_type;
3255 	depth = vs.vs_depth;
3256 
3257 	/*
3258 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3259 	 */
3260 	no_debug = (t != VMEM_ALLOC) ||
3261 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3262 
3263 	if (no_debug) {
3264 		if (caller != NULL || thread != NULL || earliest != 0 ||
3265 		    latest != 0)
3266 			return (DCMD_OK);		/* not enough info */
3267 	} else {
3268 		if (caller != NULL) {
3269 			laddr = caller;
3270 			haddr = caller + sizeof (caller);
3271 
3272 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3273 			    sizeof (c), &sym) != -1 &&
3274 			    caller == (uintptr_t)sym.st_value) {
3275 				/*
3276 				 * We were provided an exact symbol value; any
3277 				 * address in the function is valid.
3278 				 */
3279 				laddr = (uintptr_t)sym.st_value;
3280 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3281 			}
3282 
3283 			for (i = 0; i < depth; i++)
3284 				if (vs.vs_stack[i] >= laddr &&
3285 				    vs.vs_stack[i] < haddr)
3286 					break;
3287 
3288 			if (i == depth)
3289 				return (DCMD_OK);
3290 		}
3291 
3292 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3293 			return (DCMD_OK);
3294 
3295 		if (earliest != 0 && vs.vs_timestamp < earliest)
3296 			return (DCMD_OK);
3297 
3298 		if (latest != 0 && vs.vs_timestamp > latest)
3299 			return (DCMD_OK);
3300 	}
3301 
3302 	type = (t == VMEM_ALLOC ? "ALLC" :
3303 	    t == VMEM_FREE ? "FREE" :
3304 	    t == VMEM_SPAN ? "SPAN" :
3305 	    t == VMEM_ROTOR ? "ROTR" :
3306 	    t == VMEM_WALKER ? "WLKR" :
3307 	    "????");
3308 
3309 	if (flags & DCMD_PIPE_OUT) {
3310 		mdb_printf("%#lr\n", addr);
3311 		return (DCMD_OK);
3312 	}
3313 
3314 	if (verbose) {
3315 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3316 		    addr, type, vs.vs_start, vs.vs_end, sz);
3317 
3318 		if (no_debug)
3319 			return (DCMD_OK);
3320 
3321 		mdb_printf("%16s %4s %16p %16llx\n",
3322 		    "", "", vs.vs_thread, vs.vs_timestamp);
3323 
3324 		mdb_inc_indent(17);
3325 		for (i = 0; i < depth; i++) {
3326 			mdb_printf("%a\n", stk[i]);
3327 		}
3328 		mdb_dec_indent(17);
3329 		mdb_printf("\n");
3330 	} else {
3331 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3332 		    vs.vs_start, size? sz : vs.vs_end);
3333 
3334 		if (no_debug) {
3335 			mdb_printf("\n");
3336 			return (DCMD_OK);
3337 		}
3338 
3339 		for (i = 0; i < depth; i++) {
3340 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3341 			    c, sizeof (c), &sym) == -1)
3342 				continue;
3343 			if (strncmp(c, "vmem_", 5) == 0)
3344 				continue;
3345 			break;
3346 		}
3347 		mdb_printf(" %a\n", stk[i]);
3348 	}
3349 	return (DCMD_OK);
3350 }
3351 
3352 typedef struct kmalog_data {
3353 	uintptr_t	kma_addr;
3354 	hrtime_t	kma_newest;
3355 } kmalog_data_t;
3356 
3357 /*ARGSUSED*/
3358 static int
3359 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3360 {
3361 	char name[KMEM_CACHE_NAMELEN + 1];
3362 	hrtime_t delta;
3363 	int i, depth;
3364 	size_t bufsize;
3365 
3366 	if (bcp->bc_timestamp == 0)
3367 		return (WALK_DONE);
3368 
3369 	if (kma->kma_newest == 0)
3370 		kma->kma_newest = bcp->bc_timestamp;
3371 
3372 	if (kma->kma_addr) {
3373 		if (mdb_vread(&bufsize, sizeof (bufsize),
3374 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3375 			mdb_warn(
3376 			    "failed to read cache_bufsize for cache at %p",
3377 			    bcp->bc_cache);
3378 			return (WALK_ERR);
3379 		}
3380 
3381 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3382 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3383 			return (WALK_NEXT);
3384 	}
3385 
3386 	delta = kma->kma_newest - bcp->bc_timestamp;
3387 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3388 
3389 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3390 	    &bcp->bc_cache->cache_name) <= 0)
3391 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3392 
3393 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3394 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3395 
3396 	for (i = 0; i < depth; i++)
3397 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3398 
3399 	return (WALK_NEXT);
3400 }
3401 
3402 int
3403 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3404 {
3405 	const char *logname = "kmem_transaction_log";
3406 	kmalog_data_t kma;
3407 
3408 	if (argc > 1)
3409 		return (DCMD_USAGE);
3410 
3411 	kma.kma_newest = 0;
3412 	if (flags & DCMD_ADDRSPEC)
3413 		kma.kma_addr = addr;
3414 	else
3415 		kma.kma_addr = NULL;
3416 
3417 	if (argc > 0) {
3418 		if (argv->a_type != MDB_TYPE_STRING)
3419 			return (DCMD_USAGE);
3420 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3421 			logname = "kmem_failure_log";
3422 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3423 			logname = "kmem_slab_log";
3424 		else
3425 			return (DCMD_USAGE);
3426 	}
3427 
3428 	if (mdb_readvar(&addr, logname) == -1) {
3429 		mdb_warn("failed to read %s log header pointer");
3430 		return (DCMD_ERR);
3431 	}
3432 
3433 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3434 		mdb_warn("failed to walk kmem log");
3435 		return (DCMD_ERR);
3436 	}
3437 
3438 	return (DCMD_OK);
3439 }
3440 
3441 /*
3442  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3443  * The first piece is a structure which we use to accumulate kmem_cache_t
3444  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3445  * walker; we either add all caches, or ones named explicitly as arguments.
3446  */
3447 
3448 typedef struct kmclist {
3449 	const char *kmc_name;			/* Name to match (or NULL) */
3450 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3451 	int kmc_nelems;				/* Num entries in kmc_caches */
3452 	int kmc_size;				/* Size of kmc_caches array */
3453 } kmclist_t;
3454 
3455 static int
3456 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3457 {
3458 	void *p;
3459 	int s;
3460 
3461 	if (kmc->kmc_name == NULL ||
3462 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3463 		/*
3464 		 * If we have a match, grow our array (if necessary), and then
3465 		 * add the virtual address of the matching cache to our list.
3466 		 */
3467 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3468 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3469 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3470 
3471 			bcopy(kmc->kmc_caches, p,
3472 			    sizeof (uintptr_t) * kmc->kmc_size);
3473 
3474 			kmc->kmc_caches = p;
3475 			kmc->kmc_size = s;
3476 		}
3477 
3478 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3479 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3480 	}
3481 
3482 	return (WALK_NEXT);
3483 }
3484 
3485 /*
3486  * The second piece of ::kmausers is a hash table of allocations.  Each
3487  * allocation owner is identified by its stack trace and data_size.  We then
3488  * track the total bytes of all such allocations, and the number of allocations
3489  * to report at the end.  Once we have a list of caches, we walk through the
3490  * allocated bufctls of each, and update our hash table accordingly.
3491  */
3492 
3493 typedef struct kmowner {
3494 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3495 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3496 	size_t kmo_signature;			/* Hash table signature */
3497 	uint_t kmo_num;				/* Number of allocations */
3498 	size_t kmo_data_size;			/* Size of each allocation */
3499 	size_t kmo_total_size;			/* Total bytes of allocation */
3500 	int kmo_depth;				/* Depth of stack trace */
3501 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3502 } kmowner_t;
3503 
3504 typedef struct kmusers {
3505 	uintptr_t kmu_addr;			/* address of interest */
3506 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3507 	kmowner_t *kmu_hash;			/* Hash table of owners */
3508 	int kmu_nelems;				/* Number of entries in use */
3509 	int kmu_size;				/* Total number of entries */
3510 } kmusers_t;
3511 
3512 static void
3513 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3514     size_t size, size_t data_size)
3515 {
3516 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3517 	size_t bucket, signature = data_size;
3518 	kmowner_t *kmo, *kmoend;
3519 
3520 	/*
3521 	 * If the hash table is full, double its size and rehash everything.
3522 	 */
3523 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3524 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3525 
3526 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3527 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3528 		kmu->kmu_hash = kmo;
3529 		kmu->kmu_size = s;
3530 
3531 		kmoend = kmu->kmu_hash + kmu->kmu_size;
3532 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3533 			kmo->kmo_head = NULL;
3534 
3535 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
3536 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
3537 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
3538 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3539 			kmu->kmu_hash[bucket].kmo_head = kmo;
3540 		}
3541 	}
3542 
3543 	/*
3544 	 * Finish computing the hash signature from the stack trace, and then
3545 	 * see if the owner is in the hash table.  If so, update our stats.
3546 	 */
3547 	for (i = 0; i < depth; i++)
3548 		signature += bcp->bc_stack[i];
3549 
3550 	bucket = signature & (kmu->kmu_size - 1);
3551 
3552 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
3553 		if (kmo->kmo_signature == signature) {
3554 			size_t difference = 0;
3555 
3556 			difference |= kmo->kmo_data_size - data_size;
3557 			difference |= kmo->kmo_depth - depth;
3558 
3559 			for (i = 0; i < depth; i++) {
3560 				difference |= kmo->kmo_stack[i] -
3561 				    bcp->bc_stack[i];
3562 			}
3563 
3564 			if (difference == 0) {
3565 				kmo->kmo_total_size += size;
3566 				kmo->kmo_num++;
3567 				return;
3568 			}
3569 		}
3570 	}
3571 
3572 	/*
3573 	 * If the owner is not yet hashed, grab the next element and fill it
3574 	 * in based on the allocation information.
3575 	 */
3576 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
3577 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3578 	kmu->kmu_hash[bucket].kmo_head = kmo;
3579 
3580 	kmo->kmo_signature = signature;
3581 	kmo->kmo_num = 1;
3582 	kmo->kmo_data_size = data_size;
3583 	kmo->kmo_total_size = size;
3584 	kmo->kmo_depth = depth;
3585 
3586 	for (i = 0; i < depth; i++)
3587 		kmo->kmo_stack[i] = bcp->bc_stack[i];
3588 }
3589 
3590 /*
3591  * When ::kmausers is invoked without the -f flag, we simply update our hash
3592  * table with the information from each allocated bufctl.
3593  */
3594 /*ARGSUSED*/
3595 static int
3596 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
3597 {
3598 	const kmem_cache_t *cp = kmu->kmu_cache;
3599 
3600 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3601 	return (WALK_NEXT);
3602 }
3603 
3604 /*
3605  * When ::kmausers is invoked with the -f flag, we print out the information
3606  * for each bufctl as well as updating the hash table.
3607  */
3608 static int
3609 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
3610 {
3611 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3612 	const kmem_cache_t *cp = kmu->kmu_cache;
3613 	kmem_bufctl_t bufctl;
3614 
3615 	if (kmu->kmu_addr) {
3616 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
3617 			mdb_warn("couldn't read bufctl at %p", addr);
3618 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
3619 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
3620 		    cp->cache_bufsize)
3621 			return (WALK_NEXT);
3622 	}
3623 
3624 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3625 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3626 
3627 	for (i = 0; i < depth; i++)
3628 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3629 
3630 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3631 	return (WALK_NEXT);
3632 }
3633 
3634 /*
3635  * We sort our results by allocation size before printing them.
3636  */
3637 static int
3638 kmownercmp(const void *lp, const void *rp)
3639 {
3640 	const kmowner_t *lhs = lp;
3641 	const kmowner_t *rhs = rp;
3642 
3643 	return (rhs->kmo_total_size - lhs->kmo_total_size);
3644 }
3645 
3646 /*
3647  * The main engine of ::kmausers is relatively straightforward: First we
3648  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
3649  * iterate over the allocated bufctls of each cache in the list.  Finally,
3650  * we sort and print our results.
3651  */
3652 /*ARGSUSED*/
3653 int
3654 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3655 {
3656 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3657 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3658 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
3659 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3660 	int opt_e = FALSE;		/* Include "small" users */
3661 	int opt_f = FALSE;		/* Print stack traces */
3662 
3663 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
3664 	kmowner_t *kmo, *kmoend;
3665 	int i, oelems;
3666 
3667 	kmclist_t kmc;
3668 	kmusers_t kmu;
3669 
3670 	bzero(&kmc, sizeof (kmc));
3671 	bzero(&kmu, sizeof (kmu));
3672 
3673 	while ((i = mdb_getopts(argc, argv,
3674 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3675 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3676 
3677 		argv += i;	/* skip past options we just processed */
3678 		argc -= i;	/* adjust argc */
3679 
3680 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3681 			return (DCMD_USAGE);
3682 
3683 		oelems = kmc.kmc_nelems;
3684 		kmc.kmc_name = argv->a_un.a_str;
3685 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
3686 
3687 		if (kmc.kmc_nelems == oelems) {
3688 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
3689 			return (DCMD_ERR);
3690 		}
3691 
3692 		do_all_caches = 0;
3693 		argv++;
3694 		argc--;
3695 	}
3696 
3697 	if (flags & DCMD_ADDRSPEC) {
3698 		opt_f = TRUE;
3699 		kmu.kmu_addr = addr;
3700 	} else {
3701 		kmu.kmu_addr = NULL;
3702 	}
3703 
3704 	if (opt_e)
3705 		mem_threshold = cnt_threshold = 0;
3706 
3707 	if (opt_f)
3708 		callback = (mdb_walk_cb_t)kmause2;
3709 
3710 	if (do_all_caches) {
3711 		kmc.kmc_name = NULL; /* match all cache names */
3712 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
3713 	}
3714 
3715 	for (i = 0; i < kmc.kmc_nelems; i++) {
3716 		uintptr_t cp = kmc.kmc_caches[i];
3717 		kmem_cache_t c;
3718 
3719 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3720 			mdb_warn("failed to read cache at %p", cp);
3721 			continue;
3722 		}
3723 
3724 		if (!(c.cache_flags & KMF_AUDIT)) {
3725 			if (!do_all_caches) {
3726 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
3727 				    c.cache_name);
3728 			}
3729 			continue;
3730 		}
3731 
3732 		kmu.kmu_cache = &c;
3733 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
3734 		audited_caches++;
3735 	}
3736 
3737 	if (audited_caches == 0 && do_all_caches) {
3738 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
3739 		return (DCMD_ERR);
3740 	}
3741 
3742 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
3743 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
3744 
3745 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
3746 		if (kmo->kmo_total_size < mem_threshold &&
3747 		    kmo->kmo_num < cnt_threshold)
3748 			continue;
3749 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3750 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
3751 		for (i = 0; i < kmo->kmo_depth; i++)
3752 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
3753 	}
3754 
3755 	return (DCMD_OK);
3756 }
3757 
3758 void
3759 kmausers_help(void)
3760 {
3761 	mdb_printf(
3762 	    "Displays the largest users of the kmem allocator, sorted by \n"
3763 	    "trace.  If one or more caches is specified, only those caches\n"
3764 	    "will be searched.  By default, all caches are searched.  If an\n"
3765 	    "address is specified, then only those allocations which include\n"
3766 	    "the given address are displayed.  Specifying an address implies\n"
3767 	    "-f.\n"
3768 	    "\n"
3769 	    "\t-e\tInclude all users, not just the largest\n"
3770 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
3771 	    "\t\tgrouped by stack\n");
3772 }
3773 
3774 static int
3775 kmem_ready_check(void)
3776 {
3777 	int ready;
3778 
3779 	if (mdb_readvar(&ready, "kmem_ready") < 0)
3780 		return (-1); /* errno is set for us */
3781 
3782 	return (ready);
3783 }
3784 
3785 /*ARGSUSED*/
3786 static void
3787 kmem_ready_cb(void *arg)
3788 {
3789 	if (kmem_ready_check() <= 0)
3790 		return;
3791 
3792 	if (kmem_ready_cbhdl != NULL) {
3793 		mdb_callback_remove(kmem_ready_cbhdl);
3794 		kmem_ready_cbhdl = NULL;
3795 	}
3796 
3797 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
3798 }
3799 
3800 void
3801 kmem_init(void)
3802 {
3803 	mdb_walker_t w = {
3804 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
3805 		kmem_cache_walk_step, kmem_cache_walk_fini
3806 	};
3807 
3808 	/*
3809 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
3810 	 * immediately.  Walkers in the linkage structure won't be ready until
3811 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
3812 	 * is ready, we'll use the walker to initialize the caches.  If kmem
3813 	 * isn't ready, we'll register a callback that will allow us to defer
3814 	 * cache walking until it is.
3815 	 */
3816 	if (mdb_add_walker(&w) != 0) {
3817 		mdb_warn("failed to add kmem_cache walker");
3818 		return;
3819 	}
3820 
3821 	if (kmem_ready_check() > 0) {
3822 		kmem_ready_cb(NULL);
3823 	} else {
3824 		kmem_ready_cbhdl = mdb_callback_add(MDB_CALLBACK_STCHG,
3825 		    kmem_ready_cb, NULL);
3826 	}
3827 }
3828 
3829 typedef struct whatthread {
3830 	uintptr_t	wt_target;
3831 	int		wt_verbose;
3832 } whatthread_t;
3833 
3834 static int
3835 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
3836 {
3837 	uintptr_t current, data;
3838 
3839 	if (t->t_stkbase == NULL)
3840 		return (WALK_NEXT);
3841 
3842 	/*
3843 	 * Warn about swapped out threads, but drive on anyway
3844 	 */
3845 	if (!(t->t_schedflag & TS_LOAD)) {
3846 		mdb_warn("thread %p's stack swapped out\n", addr);
3847 		return (WALK_NEXT);
3848 	}
3849 
3850 	/*
3851 	 * Search the thread's stack for the given pointer.  Note that it would
3852 	 * be more efficient to follow ::kgrep's lead and read in page-sized
3853 	 * chunks, but this routine is already fast and simple.
3854 	 */
3855 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
3856 	    current += sizeof (uintptr_t)) {
3857 		if (mdb_vread(&data, sizeof (data), current) == -1) {
3858 			mdb_warn("couldn't read thread %p's stack at %p",
3859 			    addr, current);
3860 			return (WALK_ERR);
3861 		}
3862 
3863 		if (data == w->wt_target) {
3864 			if (w->wt_verbose) {
3865 				mdb_printf("%p in thread %p's stack%s\n",
3866 				    current, addr, stack_active(t, current));
3867 			} else {
3868 				mdb_printf("%#lr\n", addr);
3869 				return (WALK_NEXT);
3870 			}
3871 		}
3872 	}
3873 
3874 	return (WALK_NEXT);
3875 }
3876 
3877 int
3878 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3879 {
3880 	whatthread_t w;
3881 
3882 	if (!(flags & DCMD_ADDRSPEC))
3883 		return (DCMD_USAGE);
3884 
3885 	w.wt_verbose = FALSE;
3886 	w.wt_target = addr;
3887 
3888 	if (mdb_getopts(argc, argv,
3889 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
3890 		return (DCMD_USAGE);
3891 
3892 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
3893 	    == -1) {
3894 		mdb_warn("couldn't walk threads");
3895 		return (DCMD_ERR);
3896 	}
3897 
3898 	return (DCMD_OK);
3899 }
3900