xref: /titanic_41/usr/src/cmd/mdb/common/modules/genunix/kmem.c (revision ba2e4443695ee6a6f420a35cd4fc3d3346d22932)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <mdb/mdb_param.h>
29 #include <mdb/mdb_modapi.h>
30 #include <mdb/mdb_ctf.h>
31 #include <sys/cpuvar.h>
32 #include <sys/kmem_impl.h>
33 #include <sys/vmem_impl.h>
34 #include <sys/machelf.h>
35 #include <sys/modctl.h>
36 #include <sys/kobj.h>
37 #include <sys/panic.h>
38 #include <sys/stack.h>
39 #include <sys/sysmacros.h>
40 #include <vm/page.h>
41 
42 #include "kmem.h"
43 #include "leaky.h"
44 
45 #define	dprintf(x) if (mdb_debug_level) { \
46 	mdb_printf("kmem debug: ");  \
47 	/*CSTYLED*/\
48 	mdb_printf x ;\
49 }
50 
51 #define	KM_ALLOCATED		0x01
52 #define	KM_FREE			0x02
53 #define	KM_BUFCTL		0x04
54 #define	KM_CONSTRUCTED		0x08	/* only constructed free buffers */
55 #define	KM_HASH			0x10
56 
57 static int mdb_debug_level = 0;
58 
59 /*ARGSUSED*/
60 static int
61 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
62 {
63 	mdb_walker_t w;
64 	char descr[64];
65 
66 	(void) mdb_snprintf(descr, sizeof (descr),
67 	    "walk the %s cache", c->cache_name);
68 
69 	w.walk_name = c->cache_name;
70 	w.walk_descr = descr;
71 	w.walk_init = kmem_walk_init;
72 	w.walk_step = kmem_walk_step;
73 	w.walk_fini = kmem_walk_fini;
74 	w.walk_init_arg = (void *)addr;
75 
76 	if (mdb_add_walker(&w) == -1)
77 		mdb_warn("failed to add %s walker", c->cache_name);
78 
79 	return (WALK_NEXT);
80 }
81 
82 /*ARGSUSED*/
83 int
84 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
85 {
86 	mdb_debug_level ^= 1;
87 
88 	mdb_printf("kmem: debugging is now %s\n",
89 	    mdb_debug_level ? "on" : "off");
90 
91 	return (DCMD_OK);
92 }
93 
94 typedef struct {
95 	uintptr_t kcw_first;
96 	uintptr_t kcw_current;
97 } kmem_cache_walk_t;
98 
99 int
100 kmem_cache_walk_init(mdb_walk_state_t *wsp)
101 {
102 	kmem_cache_walk_t *kcw;
103 	kmem_cache_t c;
104 	uintptr_t cp;
105 	GElf_Sym sym;
106 
107 	if (mdb_lookup_by_name("kmem_null_cache", &sym) == -1) {
108 		mdb_warn("couldn't find kmem_null_cache");
109 		return (WALK_ERR);
110 	}
111 
112 	cp = (uintptr_t)sym.st_value;
113 
114 	if (mdb_vread(&c, sizeof (kmem_cache_t), cp) == -1) {
115 		mdb_warn("couldn't read cache at %p", cp);
116 		return (WALK_ERR);
117 	}
118 
119 	kcw = mdb_alloc(sizeof (kmem_cache_walk_t), UM_SLEEP);
120 
121 	kcw->kcw_first = cp;
122 	kcw->kcw_current = (uintptr_t)c.cache_next;
123 	wsp->walk_data = kcw;
124 
125 	return (WALK_NEXT);
126 }
127 
128 int
129 kmem_cache_walk_step(mdb_walk_state_t *wsp)
130 {
131 	kmem_cache_walk_t *kcw = wsp->walk_data;
132 	kmem_cache_t c;
133 	int status;
134 
135 	if (mdb_vread(&c, sizeof (kmem_cache_t), kcw->kcw_current) == -1) {
136 		mdb_warn("couldn't read cache at %p", kcw->kcw_current);
137 		return (WALK_DONE);
138 	}
139 
140 	status = wsp->walk_callback(kcw->kcw_current, &c, wsp->walk_cbdata);
141 
142 	if ((kcw->kcw_current = (uintptr_t)c.cache_next) == kcw->kcw_first)
143 		return (WALK_DONE);
144 
145 	return (status);
146 }
147 
148 void
149 kmem_cache_walk_fini(mdb_walk_state_t *wsp)
150 {
151 	kmem_cache_walk_t *kcw = wsp->walk_data;
152 	mdb_free(kcw, sizeof (kmem_cache_walk_t));
153 }
154 
155 int
156 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
157 {
158 	if (wsp->walk_addr == NULL) {
159 		mdb_warn("kmem_cpu_cache doesn't support global walks");
160 		return (WALK_ERR);
161 	}
162 
163 	if (mdb_layered_walk("cpu", wsp) == -1) {
164 		mdb_warn("couldn't walk 'cpu'");
165 		return (WALK_ERR);
166 	}
167 
168 	wsp->walk_data = (void *)wsp->walk_addr;
169 
170 	return (WALK_NEXT);
171 }
172 
173 int
174 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
175 {
176 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
177 	const cpu_t *cpu = wsp->walk_layer;
178 	kmem_cpu_cache_t cc;
179 
180 	caddr += cpu->cpu_cache_offset;
181 
182 	if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
183 		mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
184 		return (WALK_ERR);
185 	}
186 
187 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
188 }
189 
190 int
191 kmem_slab_walk_init(mdb_walk_state_t *wsp)
192 {
193 	uintptr_t caddr = wsp->walk_addr;
194 	kmem_cache_t c;
195 
196 	if (caddr == NULL) {
197 		mdb_warn("kmem_slab doesn't support global walks\n");
198 		return (WALK_ERR);
199 	}
200 
201 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
202 		mdb_warn("couldn't read kmem_cache at %p", caddr);
203 		return (WALK_ERR);
204 	}
205 
206 	wsp->walk_data =
207 	    (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab));
208 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
209 
210 	return (WALK_NEXT);
211 }
212 
213 int
214 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
215 {
216 	uintptr_t caddr = wsp->walk_addr;
217 	kmem_cache_t c;
218 
219 	if (caddr == NULL) {
220 		mdb_warn("kmem_slab_partial doesn't support global walks\n");
221 		return (WALK_ERR);
222 	}
223 
224 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
225 		mdb_warn("couldn't read kmem_cache at %p", caddr);
226 		return (WALK_ERR);
227 	}
228 
229 	wsp->walk_data =
230 	    (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab));
231 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
232 
233 	/*
234 	 * Some consumers (umem_walk_step(), in particular) require at
235 	 * least one callback if there are any buffers in the cache.  So
236 	 * if there are *no* partial slabs, report the last full slab, if
237 	 * any.
238 	 *
239 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
240 	 */
241 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
242 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
243 
244 	return (WALK_NEXT);
245 }
246 
247 int
248 kmem_slab_walk_step(mdb_walk_state_t *wsp)
249 {
250 	kmem_slab_t s;
251 	uintptr_t addr = wsp->walk_addr;
252 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
253 	uintptr_t caddr = saddr - offsetof(kmem_cache_t, cache_nullslab);
254 
255 	if (addr == saddr)
256 		return (WALK_DONE);
257 
258 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
259 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
260 		return (WALK_ERR);
261 	}
262 
263 	if ((uintptr_t)s.slab_cache != caddr) {
264 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
265 		    addr, caddr, s.slab_cache);
266 		return (WALK_ERR);
267 	}
268 
269 	wsp->walk_addr = (uintptr_t)s.slab_next;
270 
271 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
272 }
273 
274 int
275 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
276 {
277 	kmem_cache_t c;
278 
279 	if (!(flags & DCMD_ADDRSPEC)) {
280 		if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
281 			mdb_warn("can't walk kmem_cache");
282 			return (DCMD_ERR);
283 		}
284 		return (DCMD_OK);
285 	}
286 
287 	if (DCMD_HDRSPEC(flags))
288 		mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
289 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
290 
291 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
292 		mdb_warn("couldn't read kmem_cache at %p", addr);
293 		return (DCMD_ERR);
294 	}
295 
296 	mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
297 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
298 
299 	return (DCMD_OK);
300 }
301 
302 static int
303 addrcmp(const void *lhs, const void *rhs)
304 {
305 	uintptr_t p1 = *((uintptr_t *)lhs);
306 	uintptr_t p2 = *((uintptr_t *)rhs);
307 
308 	if (p1 < p2)
309 		return (-1);
310 	if (p1 > p2)
311 		return (1);
312 	return (0);
313 }
314 
315 static int
316 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
317 {
318 	const kmem_bufctl_audit_t *bcp1 = *lhs;
319 	const kmem_bufctl_audit_t *bcp2 = *rhs;
320 
321 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
322 		return (-1);
323 
324 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
325 		return (1);
326 
327 	return (0);
328 }
329 
330 typedef struct kmem_hash_walk {
331 	uintptr_t *kmhw_table;
332 	size_t kmhw_nelems;
333 	size_t kmhw_pos;
334 	kmem_bufctl_t kmhw_cur;
335 } kmem_hash_walk_t;
336 
337 int
338 kmem_hash_walk_init(mdb_walk_state_t *wsp)
339 {
340 	kmem_hash_walk_t *kmhw;
341 	uintptr_t *hash;
342 	kmem_cache_t c;
343 	uintptr_t haddr, addr = wsp->walk_addr;
344 	size_t nelems;
345 	size_t hsize;
346 
347 	if (addr == NULL) {
348 		mdb_warn("kmem_hash doesn't support global walks\n");
349 		return (WALK_ERR);
350 	}
351 
352 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
353 		mdb_warn("couldn't read cache at addr %p", addr);
354 		return (WALK_ERR);
355 	}
356 
357 	if (!(c.cache_flags & KMF_HASH)) {
358 		mdb_warn("cache %p doesn't have a hash table\n", addr);
359 		return (WALK_DONE);		/* nothing to do */
360 	}
361 
362 	kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
363 	kmhw->kmhw_cur.bc_next = NULL;
364 	kmhw->kmhw_pos = 0;
365 
366 	kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
367 	hsize = nelems * sizeof (uintptr_t);
368 	haddr = (uintptr_t)c.cache_hash_table;
369 
370 	kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
371 	if (mdb_vread(hash, hsize, haddr) == -1) {
372 		mdb_warn("failed to read hash table at %p", haddr);
373 		mdb_free(hash, hsize);
374 		mdb_free(kmhw, sizeof (kmem_hash_walk_t));
375 		return (WALK_ERR);
376 	}
377 
378 	wsp->walk_data = kmhw;
379 
380 	return (WALK_NEXT);
381 }
382 
383 int
384 kmem_hash_walk_step(mdb_walk_state_t *wsp)
385 {
386 	kmem_hash_walk_t *kmhw = wsp->walk_data;
387 	uintptr_t addr = NULL;
388 
389 	if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
390 		while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
391 			if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
392 				break;
393 		}
394 	}
395 	if (addr == NULL)
396 		return (WALK_DONE);
397 
398 	if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
399 		mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
400 		return (WALK_ERR);
401 	}
402 
403 	return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
404 }
405 
406 void
407 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
408 {
409 	kmem_hash_walk_t *kmhw = wsp->walk_data;
410 
411 	if (kmhw == NULL)
412 		return;
413 
414 	mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
415 	mdb_free(kmhw, sizeof (kmem_hash_walk_t));
416 }
417 
418 /*
419  * Find the address of the bufctl structure for the address 'buf' in cache
420  * 'cp', which is at address caddr, and place it in *out.
421  */
422 static int
423 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
424 {
425 	uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
426 	kmem_bufctl_t *bcp;
427 	kmem_bufctl_t bc;
428 
429 	if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
430 		mdb_warn("unable to read hash bucket for %p in cache %p",
431 		    buf, caddr);
432 		return (-1);
433 	}
434 
435 	while (bcp != NULL) {
436 		if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
437 		    (uintptr_t)bcp) == -1) {
438 			mdb_warn("unable to read bufctl at %p", bcp);
439 			return (-1);
440 		}
441 		if (bc.bc_addr == buf) {
442 			*out = (uintptr_t)bcp;
443 			return (0);
444 		}
445 		bcp = bc.bc_next;
446 	}
447 
448 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
449 	return (-1);
450 }
451 
452 int
453 kmem_get_magsize(const kmem_cache_t *cp)
454 {
455 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
456 	GElf_Sym mt_sym;
457 	kmem_magtype_t mt;
458 	int res;
459 
460 	/*
461 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
462 	 * with KMF_NOMAGAZINE have disabled their magazine layers, so
463 	 * it is okay to return 0 for them.
464 	 */
465 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
466 	    (cp->cache_flags & KMF_NOMAGAZINE))
467 		return (res);
468 
469 	if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
470 		mdb_warn("unable to read 'kmem_magtype'");
471 	} else if (addr < mt_sym.st_value ||
472 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
473 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
474 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
475 		    cp->cache_name, addr);
476 		return (0);
477 	}
478 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
479 		mdb_warn("unable to read magtype at %a", addr);
480 		return (0);
481 	}
482 	return (mt.mt_magsize);
483 }
484 
485 /*ARGSUSED*/
486 static int
487 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
488 {
489 	*est -= (sp->slab_chunks - sp->slab_refcnt);
490 
491 	return (WALK_NEXT);
492 }
493 
494 /*
495  * Returns an upper bound on the number of allocated buffers in a given
496  * cache.
497  */
498 size_t
499 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
500 {
501 	int magsize;
502 	size_t cache_est;
503 
504 	cache_est = cp->cache_buftotal;
505 
506 	(void) mdb_pwalk("kmem_slab_partial",
507 	    (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
508 
509 	if ((magsize = kmem_get_magsize(cp)) != 0) {
510 		size_t mag_est = cp->cache_full.ml_total * magsize;
511 
512 		if (cache_est >= mag_est) {
513 			cache_est -= mag_est;
514 		} else {
515 			mdb_warn("cache %p's magazine layer holds more buffers "
516 			    "than the slab layer.\n", addr);
517 		}
518 	}
519 	return (cache_est);
520 }
521 
522 #define	READMAG_ROUNDS(rounds) { \
523 	if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
524 		mdb_warn("couldn't read magazine at %p", kmp); \
525 		goto fail; \
526 	} \
527 	for (i = 0; i < rounds; i++) { \
528 		maglist[magcnt++] = mp->mag_round[i]; \
529 		if (magcnt == magmax) { \
530 			mdb_warn("%d magazines exceeds fudge factor\n", \
531 			    magcnt); \
532 			goto fail; \
533 		} \
534 	} \
535 }
536 
537 int
538 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
539     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
540 {
541 	kmem_magazine_t *kmp, *mp;
542 	void **maglist = NULL;
543 	int i, cpu;
544 	size_t magsize, magmax, magbsize;
545 	size_t magcnt = 0;
546 
547 	/*
548 	 * Read the magtype out of the cache, after verifying the pointer's
549 	 * correctness.
550 	 */
551 	magsize = kmem_get_magsize(cp);
552 	if (magsize == 0) {
553 		*maglistp = NULL;
554 		*magcntp = 0;
555 		*magmaxp = 0;
556 		return (WALK_NEXT);
557 	}
558 
559 	/*
560 	 * There are several places where we need to go buffer hunting:
561 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
562 	 * and the full magazine list in the depot.
563 	 *
564 	 * For an upper bound on the number of buffers in the magazine
565 	 * layer, we have the number of magazines on the cache_full
566 	 * list plus at most two magazines per CPU (the loaded and the
567 	 * spare).  Toss in 100 magazines as a fudge factor in case this
568 	 * is live (the number "100" comes from the same fudge factor in
569 	 * crash(1M)).
570 	 */
571 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
572 	magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
573 
574 	if (magbsize >= PAGESIZE / 2) {
575 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
576 		    addr, magbsize);
577 		return (WALK_ERR);
578 	}
579 
580 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
581 	mp = mdb_alloc(magbsize, alloc_flags);
582 	if (mp == NULL || maglist == NULL)
583 		goto fail;
584 
585 	/*
586 	 * First up: the magazines in the depot (i.e. on the cache_full list).
587 	 */
588 	for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
589 		READMAG_ROUNDS(magsize);
590 		kmp = mp->mag_next;
591 
592 		if (kmp == cp->cache_full.ml_list)
593 			break; /* cache_full list loop detected */
594 	}
595 
596 	dprintf(("cache_full list done\n"));
597 
598 	/*
599 	 * Now whip through the CPUs, snagging the loaded magazines
600 	 * and full spares.
601 	 */
602 	for (cpu = 0; cpu < ncpus; cpu++) {
603 		kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
604 
605 		dprintf(("reading cpu cache %p\n",
606 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
607 
608 		if (ccp->cc_rounds > 0 &&
609 		    (kmp = ccp->cc_loaded) != NULL) {
610 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
611 			READMAG_ROUNDS(ccp->cc_rounds);
612 		}
613 
614 		if (ccp->cc_prounds > 0 &&
615 		    (kmp = ccp->cc_ploaded) != NULL) {
616 			dprintf(("reading %d previously loaded rounds\n",
617 			    ccp->cc_prounds));
618 			READMAG_ROUNDS(ccp->cc_prounds);
619 		}
620 	}
621 
622 	dprintf(("magazine layer: %d buffers\n", magcnt));
623 
624 	if (!(alloc_flags & UM_GC))
625 		mdb_free(mp, magbsize);
626 
627 	*maglistp = maglist;
628 	*magcntp = magcnt;
629 	*magmaxp = magmax;
630 
631 	return (WALK_NEXT);
632 
633 fail:
634 	if (!(alloc_flags & UM_GC)) {
635 		if (mp)
636 			mdb_free(mp, magbsize);
637 		if (maglist)
638 			mdb_free(maglist, magmax * sizeof (void *));
639 	}
640 	return (WALK_ERR);
641 }
642 
643 static int
644 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
645 {
646 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
647 }
648 
649 static int
650 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
651 {
652 	kmem_bufctl_audit_t b;
653 
654 	/*
655 	 * if KMF_AUDIT is not set, we know that we're looking at a
656 	 * kmem_bufctl_t.
657 	 */
658 	if (!(cp->cache_flags & KMF_AUDIT) ||
659 	    mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
660 		(void) memset(&b, 0, sizeof (b));
661 		if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
662 			mdb_warn("unable to read bufctl at %p", buf);
663 			return (WALK_ERR);
664 		}
665 	}
666 
667 	return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
668 }
669 
670 typedef struct kmem_walk {
671 	int kmw_type;
672 
673 	int kmw_addr;			/* cache address */
674 	kmem_cache_t *kmw_cp;
675 	size_t kmw_csize;
676 
677 	/*
678 	 * magazine layer
679 	 */
680 	void **kmw_maglist;
681 	size_t kmw_max;
682 	size_t kmw_count;
683 	size_t kmw_pos;
684 
685 	/*
686 	 * slab layer
687 	 */
688 	char *kmw_valid;	/* to keep track of freed buffers */
689 	char *kmw_ubase;	/* buffer for slab data */
690 } kmem_walk_t;
691 
692 static int
693 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
694 {
695 	kmem_walk_t *kmw;
696 	int ncpus, csize;
697 	kmem_cache_t *cp;
698 	size_t vm_quantum;
699 
700 	size_t magmax, magcnt;
701 	void **maglist = NULL;
702 	uint_t chunksize, slabsize;
703 	int status = WALK_ERR;
704 	uintptr_t addr = wsp->walk_addr;
705 	const char *layered;
706 
707 	type &= ~KM_HASH;
708 
709 	if (addr == NULL) {
710 		mdb_warn("kmem walk doesn't support global walks\n");
711 		return (WALK_ERR);
712 	}
713 
714 	dprintf(("walking %p\n", addr));
715 
716 	/*
717 	 * First we need to figure out how many CPUs are configured in the
718 	 * system to know how much to slurp out.
719 	 */
720 	mdb_readvar(&ncpus, "max_ncpus");
721 
722 	csize = KMEM_CACHE_SIZE(ncpus);
723 	cp = mdb_alloc(csize, UM_SLEEP);
724 
725 	if (mdb_vread(cp, csize, addr) == -1) {
726 		mdb_warn("couldn't read cache at addr %p", addr);
727 		goto out2;
728 	}
729 
730 	/*
731 	 * It's easy for someone to hand us an invalid cache address.
732 	 * Unfortunately, it is hard for this walker to survive an
733 	 * invalid cache cleanly.  So we make sure that:
734 	 *
735 	 *	1. the vmem arena for the cache is readable,
736 	 *	2. the vmem arena's quantum is a power of 2,
737 	 *	3. our slabsize is a multiple of the quantum, and
738 	 *	4. our chunksize is >0 and less than our slabsize.
739 	 */
740 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
741 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
742 	    vm_quantum == 0 ||
743 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
744 	    cp->cache_slabsize < vm_quantum ||
745 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
746 	    cp->cache_chunksize == 0 ||
747 	    cp->cache_chunksize > cp->cache_slabsize) {
748 		mdb_warn("%p is not a valid kmem_cache_t\n", addr);
749 		goto out2;
750 	}
751 
752 	dprintf(("buf total is %d\n", cp->cache_buftotal));
753 
754 	if (cp->cache_buftotal == 0) {
755 		mdb_free(cp, csize);
756 		return (WALK_DONE);
757 	}
758 
759 	/*
760 	 * If they ask for bufctls, but it's a small-slab cache,
761 	 * there is nothing to report.
762 	 */
763 	if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
764 		dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
765 		    cp->cache_flags));
766 		mdb_free(cp, csize);
767 		return (WALK_DONE);
768 	}
769 
770 	/*
771 	 * If they want constructed buffers, but there's no constructor or
772 	 * the cache has DEADBEEF checking enabled, there is nothing to report.
773 	 */
774 	if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
775 	    cp->cache_constructor == NULL ||
776 	    (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
777 		mdb_free(cp, csize);
778 		return (WALK_DONE);
779 	}
780 
781 	/*
782 	 * Read in the contents of the magazine layer
783 	 */
784 	if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
785 	    &magmax, UM_SLEEP) == WALK_ERR)
786 		goto out2;
787 
788 	/*
789 	 * We have all of the buffers from the magazines;  if we are walking
790 	 * allocated buffers, sort them so we can bsearch them later.
791 	 */
792 	if (type & KM_ALLOCATED)
793 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
794 
795 	wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
796 
797 	kmw->kmw_type = type;
798 	kmw->kmw_addr = addr;
799 	kmw->kmw_cp = cp;
800 	kmw->kmw_csize = csize;
801 	kmw->kmw_maglist = maglist;
802 	kmw->kmw_max = magmax;
803 	kmw->kmw_count = magcnt;
804 	kmw->kmw_pos = 0;
805 
806 	/*
807 	 * When walking allocated buffers in a KMF_HASH cache, we walk the
808 	 * hash table instead of the slab layer.
809 	 */
810 	if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
811 		layered = "kmem_hash";
812 
813 		kmw->kmw_type |= KM_HASH;
814 	} else {
815 		/*
816 		 * If we are walking freed buffers, we only need the
817 		 * magazine layer plus the partially allocated slabs.
818 		 * To walk allocated buffers, we need all of the slabs.
819 		 */
820 		if (type & KM_ALLOCATED)
821 			layered = "kmem_slab";
822 		else
823 			layered = "kmem_slab_partial";
824 
825 		/*
826 		 * for small-slab caches, we read in the entire slab.  For
827 		 * freed buffers, we can just walk the freelist.  For
828 		 * allocated buffers, we use a 'valid' array to track
829 		 * the freed buffers.
830 		 */
831 		if (!(cp->cache_flags & KMF_HASH)) {
832 			chunksize = cp->cache_chunksize;
833 			slabsize = cp->cache_slabsize;
834 
835 			kmw->kmw_ubase = mdb_alloc(slabsize +
836 			    sizeof (kmem_bufctl_t), UM_SLEEP);
837 
838 			if (type & KM_ALLOCATED)
839 				kmw->kmw_valid =
840 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
841 		}
842 	}
843 
844 	status = WALK_NEXT;
845 
846 	if (mdb_layered_walk(layered, wsp) == -1) {
847 		mdb_warn("unable to start layered '%s' walk", layered);
848 		status = WALK_ERR;
849 	}
850 
851 out1:
852 	if (status == WALK_ERR) {
853 		if (kmw->kmw_valid)
854 			mdb_free(kmw->kmw_valid, slabsize / chunksize);
855 
856 		if (kmw->kmw_ubase)
857 			mdb_free(kmw->kmw_ubase, slabsize +
858 			    sizeof (kmem_bufctl_t));
859 
860 		if (kmw->kmw_maglist)
861 			mdb_free(kmw->kmw_maglist,
862 			    kmw->kmw_max * sizeof (uintptr_t));
863 
864 		mdb_free(kmw, sizeof (kmem_walk_t));
865 		wsp->walk_data = NULL;
866 	}
867 
868 out2:
869 	if (status == WALK_ERR)
870 		mdb_free(cp, csize);
871 
872 	return (status);
873 }
874 
875 int
876 kmem_walk_step(mdb_walk_state_t *wsp)
877 {
878 	kmem_walk_t *kmw = wsp->walk_data;
879 	int type = kmw->kmw_type;
880 	kmem_cache_t *cp = kmw->kmw_cp;
881 
882 	void **maglist = kmw->kmw_maglist;
883 	int magcnt = kmw->kmw_count;
884 
885 	uintptr_t chunksize, slabsize;
886 	uintptr_t addr;
887 	const kmem_slab_t *sp;
888 	const kmem_bufctl_t *bcp;
889 	kmem_bufctl_t bc;
890 
891 	int chunks;
892 	char *kbase;
893 	void *buf;
894 	int i, ret;
895 
896 	char *valid, *ubase;
897 
898 	/*
899 	 * first, handle the 'kmem_hash' layered walk case
900 	 */
901 	if (type & KM_HASH) {
902 		/*
903 		 * We have a buffer which has been allocated out of the
904 		 * global layer. We need to make sure that it's not
905 		 * actually sitting in a magazine before we report it as
906 		 * an allocated buffer.
907 		 */
908 		buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
909 
910 		if (magcnt > 0 &&
911 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
912 		    addrcmp) != NULL)
913 			return (WALK_NEXT);
914 
915 		if (type & KM_BUFCTL)
916 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
917 
918 		return (kmem_walk_callback(wsp, (uintptr_t)buf));
919 	}
920 
921 	ret = WALK_NEXT;
922 
923 	addr = kmw->kmw_addr;
924 
925 	/*
926 	 * If we're walking freed buffers, report everything in the
927 	 * magazine layer before processing the first slab.
928 	 */
929 	if ((type & KM_FREE) && magcnt != 0) {
930 		kmw->kmw_count = 0;		/* only do this once */
931 		for (i = 0; i < magcnt; i++) {
932 			buf = maglist[i];
933 
934 			if (type & KM_BUFCTL) {
935 				uintptr_t out;
936 
937 				if (cp->cache_flags & KMF_BUFTAG) {
938 					kmem_buftag_t *btp;
939 					kmem_buftag_t tag;
940 
941 					/* LINTED - alignment */
942 					btp = KMEM_BUFTAG(cp, buf);
943 					if (mdb_vread(&tag, sizeof (tag),
944 					    (uintptr_t)btp) == -1) {
945 						mdb_warn("reading buftag for "
946 						    "%p at %p", buf, btp);
947 						continue;
948 					}
949 					out = (uintptr_t)tag.bt_bufctl;
950 				} else {
951 					if (kmem_hash_lookup(cp, addr, buf,
952 					    &out) == -1)
953 						continue;
954 				}
955 				ret = bufctl_walk_callback(cp, wsp, out);
956 			} else {
957 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
958 			}
959 
960 			if (ret != WALK_NEXT)
961 				return (ret);
962 		}
963 	}
964 
965 	/*
966 	 * If they want constructed buffers, we're finished, since the
967 	 * magazine layer holds them all.
968 	 */
969 	if (type & KM_CONSTRUCTED)
970 		return (WALK_DONE);
971 
972 	/*
973 	 * Handle the buffers in the current slab
974 	 */
975 	chunksize = cp->cache_chunksize;
976 	slabsize = cp->cache_slabsize;
977 
978 	sp = wsp->walk_layer;
979 	chunks = sp->slab_chunks;
980 	kbase = sp->slab_base;
981 
982 	dprintf(("kbase is %p\n", kbase));
983 
984 	if (!(cp->cache_flags & KMF_HASH)) {
985 		valid = kmw->kmw_valid;
986 		ubase = kmw->kmw_ubase;
987 
988 		if (mdb_vread(ubase, chunks * chunksize,
989 		    (uintptr_t)kbase) == -1) {
990 			mdb_warn("failed to read slab contents at %p", kbase);
991 			return (WALK_ERR);
992 		}
993 
994 		/*
995 		 * Set up the valid map as fully allocated -- we'll punch
996 		 * out the freelist.
997 		 */
998 		if (type & KM_ALLOCATED)
999 			(void) memset(valid, 1, chunks);
1000 	} else {
1001 		valid = NULL;
1002 		ubase = NULL;
1003 	}
1004 
1005 	/*
1006 	 * walk the slab's freelist
1007 	 */
1008 	bcp = sp->slab_head;
1009 
1010 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1011 
1012 	/*
1013 	 * since we could be in the middle of allocating a buffer,
1014 	 * our refcnt could be one higher than it aught.  So we
1015 	 * check one further on the freelist than the count allows.
1016 	 */
1017 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1018 		uint_t ndx;
1019 
1020 		dprintf(("bcp is %p\n", bcp));
1021 
1022 		if (bcp == NULL) {
1023 			if (i == chunks)
1024 				break;
1025 			mdb_warn(
1026 			    "slab %p in cache %p freelist too short by %d\n",
1027 			    sp, addr, chunks - i);
1028 			break;
1029 		}
1030 
1031 		if (cp->cache_flags & KMF_HASH) {
1032 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1033 				mdb_warn("failed to read bufctl ptr at %p",
1034 				    bcp);
1035 				break;
1036 			}
1037 			buf = bc.bc_addr;
1038 		} else {
1039 			/*
1040 			 * Otherwise the buffer is in the slab which
1041 			 * we've read in;  we just need to determine
1042 			 * its offset in the slab to find the
1043 			 * kmem_bufctl_t.
1044 			 */
1045 			bc = *((kmem_bufctl_t *)
1046 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1047 			    (uintptr_t)ubase));
1048 
1049 			buf = KMEM_BUF(cp, bcp);
1050 		}
1051 
1052 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1053 
1054 		if (ndx > slabsize / cp->cache_bufsize) {
1055 			/*
1056 			 * This is very wrong; we have managed to find
1057 			 * a buffer in the slab which shouldn't
1058 			 * actually be here.  Emit a warning, and
1059 			 * try to continue.
1060 			 */
1061 			mdb_warn("buf %p is out of range for "
1062 			    "slab %p, cache %p\n", buf, sp, addr);
1063 		} else if (type & KM_ALLOCATED) {
1064 			/*
1065 			 * we have found a buffer on the slab's freelist;
1066 			 * clear its entry
1067 			 */
1068 			valid[ndx] = 0;
1069 		} else {
1070 			/*
1071 			 * Report this freed buffer
1072 			 */
1073 			if (type & KM_BUFCTL) {
1074 				ret = bufctl_walk_callback(cp, wsp,
1075 				    (uintptr_t)bcp);
1076 			} else {
1077 				ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1078 			}
1079 			if (ret != WALK_NEXT)
1080 				return (ret);
1081 		}
1082 
1083 		bcp = bc.bc_next;
1084 	}
1085 
1086 	if (bcp != NULL) {
1087 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1088 		    sp, addr, bcp));
1089 	}
1090 
1091 	/*
1092 	 * If we are walking freed buffers, the loop above handled reporting
1093 	 * them.
1094 	 */
1095 	if (type & KM_FREE)
1096 		return (WALK_NEXT);
1097 
1098 	if (type & KM_BUFCTL) {
1099 		mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1100 		    "cache %p\n", addr);
1101 		return (WALK_ERR);
1102 	}
1103 
1104 	/*
1105 	 * Report allocated buffers, skipping buffers in the magazine layer.
1106 	 * We only get this far for small-slab caches.
1107 	 */
1108 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1109 		buf = (char *)kbase + i * chunksize;
1110 
1111 		if (!valid[i])
1112 			continue;		/* on slab freelist */
1113 
1114 		if (magcnt > 0 &&
1115 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1116 		    addrcmp) != NULL)
1117 			continue;		/* in magazine layer */
1118 
1119 		ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1120 	}
1121 	return (ret);
1122 }
1123 
1124 void
1125 kmem_walk_fini(mdb_walk_state_t *wsp)
1126 {
1127 	kmem_walk_t *kmw = wsp->walk_data;
1128 	uintptr_t chunksize;
1129 	uintptr_t slabsize;
1130 
1131 	if (kmw == NULL)
1132 		return;
1133 
1134 	if (kmw->kmw_maglist != NULL)
1135 		mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1136 
1137 	chunksize = kmw->kmw_cp->cache_chunksize;
1138 	slabsize = kmw->kmw_cp->cache_slabsize;
1139 
1140 	if (kmw->kmw_valid != NULL)
1141 		mdb_free(kmw->kmw_valid, slabsize / chunksize);
1142 	if (kmw->kmw_ubase != NULL)
1143 		mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1144 
1145 	mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1146 	mdb_free(kmw, sizeof (kmem_walk_t));
1147 }
1148 
1149 /*ARGSUSED*/
1150 static int
1151 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1152 {
1153 	/*
1154 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1155 	 * memory in other caches.  This can be a little confusing, so we
1156 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1157 	 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1158 	 */
1159 	if (c->cache_cflags & KMC_NOTOUCH)
1160 		return (WALK_NEXT);
1161 
1162 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1163 	    wsp->walk_cbdata, addr) == -1)
1164 		return (WALK_DONE);
1165 
1166 	return (WALK_NEXT);
1167 }
1168 
1169 #define	KMEM_WALK_ALL(name, wsp) { \
1170 	wsp->walk_data = (name); \
1171 	if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1172 		return (WALK_ERR); \
1173 	return (WALK_DONE); \
1174 }
1175 
1176 int
1177 kmem_walk_init(mdb_walk_state_t *wsp)
1178 {
1179 	if (wsp->walk_arg != NULL)
1180 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1181 
1182 	if (wsp->walk_addr == NULL)
1183 		KMEM_WALK_ALL("kmem", wsp);
1184 	return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1185 }
1186 
1187 int
1188 bufctl_walk_init(mdb_walk_state_t *wsp)
1189 {
1190 	if (wsp->walk_addr == NULL)
1191 		KMEM_WALK_ALL("bufctl", wsp);
1192 	return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1193 }
1194 
1195 int
1196 freemem_walk_init(mdb_walk_state_t *wsp)
1197 {
1198 	if (wsp->walk_addr == NULL)
1199 		KMEM_WALK_ALL("freemem", wsp);
1200 	return (kmem_walk_init_common(wsp, KM_FREE));
1201 }
1202 
1203 int
1204 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1205 {
1206 	if (wsp->walk_addr == NULL)
1207 		KMEM_WALK_ALL("freemem_constructed", wsp);
1208 	return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1209 }
1210 
1211 int
1212 freectl_walk_init(mdb_walk_state_t *wsp)
1213 {
1214 	if (wsp->walk_addr == NULL)
1215 		KMEM_WALK_ALL("freectl", wsp);
1216 	return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1217 }
1218 
1219 int
1220 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1221 {
1222 	if (wsp->walk_addr == NULL)
1223 		KMEM_WALK_ALL("freectl_constructed", wsp);
1224 	return (kmem_walk_init_common(wsp,
1225 	    KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1226 }
1227 
1228 typedef struct bufctl_history_walk {
1229 	void		*bhw_next;
1230 	kmem_cache_t	*bhw_cache;
1231 	kmem_slab_t	*bhw_slab;
1232 	hrtime_t	bhw_timestamp;
1233 } bufctl_history_walk_t;
1234 
1235 int
1236 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1237 {
1238 	bufctl_history_walk_t *bhw;
1239 	kmem_bufctl_audit_t bc;
1240 	kmem_bufctl_audit_t bcn;
1241 
1242 	if (wsp->walk_addr == NULL) {
1243 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1244 		return (WALK_ERR);
1245 	}
1246 
1247 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1248 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1249 		return (WALK_ERR);
1250 	}
1251 
1252 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1253 	bhw->bhw_timestamp = 0;
1254 	bhw->bhw_cache = bc.bc_cache;
1255 	bhw->bhw_slab = bc.bc_slab;
1256 
1257 	/*
1258 	 * sometimes the first log entry matches the base bufctl;  in that
1259 	 * case, skip the base bufctl.
1260 	 */
1261 	if (bc.bc_lastlog != NULL &&
1262 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1263 	    bc.bc_addr == bcn.bc_addr &&
1264 	    bc.bc_cache == bcn.bc_cache &&
1265 	    bc.bc_slab == bcn.bc_slab &&
1266 	    bc.bc_timestamp == bcn.bc_timestamp &&
1267 	    bc.bc_thread == bcn.bc_thread)
1268 		bhw->bhw_next = bc.bc_lastlog;
1269 	else
1270 		bhw->bhw_next = (void *)wsp->walk_addr;
1271 
1272 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1273 	wsp->walk_data = bhw;
1274 
1275 	return (WALK_NEXT);
1276 }
1277 
1278 int
1279 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1280 {
1281 	bufctl_history_walk_t *bhw = wsp->walk_data;
1282 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1283 	uintptr_t baseaddr = wsp->walk_addr;
1284 	kmem_bufctl_audit_t bc;
1285 
1286 	if (addr == NULL)
1287 		return (WALK_DONE);
1288 
1289 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1290 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1291 		return (WALK_ERR);
1292 	}
1293 
1294 	/*
1295 	 * The bufctl is only valid if the address, cache, and slab are
1296 	 * correct.  We also check that the timestamp is decreasing, to
1297 	 * prevent infinite loops.
1298 	 */
1299 	if ((uintptr_t)bc.bc_addr != baseaddr ||
1300 	    bc.bc_cache != bhw->bhw_cache ||
1301 	    bc.bc_slab != bhw->bhw_slab ||
1302 	    (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1303 		return (WALK_DONE);
1304 
1305 	bhw->bhw_next = bc.bc_lastlog;
1306 	bhw->bhw_timestamp = bc.bc_timestamp;
1307 
1308 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1309 }
1310 
1311 void
1312 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1313 {
1314 	bufctl_history_walk_t *bhw = wsp->walk_data;
1315 
1316 	mdb_free(bhw, sizeof (*bhw));
1317 }
1318 
1319 typedef struct kmem_log_walk {
1320 	kmem_bufctl_audit_t *klw_base;
1321 	kmem_bufctl_audit_t **klw_sorted;
1322 	kmem_log_header_t klw_lh;
1323 	size_t klw_size;
1324 	size_t klw_maxndx;
1325 	size_t klw_ndx;
1326 } kmem_log_walk_t;
1327 
1328 int
1329 kmem_log_walk_init(mdb_walk_state_t *wsp)
1330 {
1331 	uintptr_t lp = wsp->walk_addr;
1332 	kmem_log_walk_t *klw;
1333 	kmem_log_header_t *lhp;
1334 	int maxndx, i, j, k;
1335 
1336 	/*
1337 	 * By default (global walk), walk the kmem_transaction_log.  Otherwise
1338 	 * read the log whose kmem_log_header_t is stored at walk_addr.
1339 	 */
1340 	if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1341 		mdb_warn("failed to read 'kmem_transaction_log'");
1342 		return (WALK_ERR);
1343 	}
1344 
1345 	if (lp == NULL) {
1346 		mdb_warn("log is disabled\n");
1347 		return (WALK_ERR);
1348 	}
1349 
1350 	klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1351 	lhp = &klw->klw_lh;
1352 
1353 	if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1354 		mdb_warn("failed to read log header at %p", lp);
1355 		mdb_free(klw, sizeof (kmem_log_walk_t));
1356 		return (WALK_ERR);
1357 	}
1358 
1359 	klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1360 	klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1361 	maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1362 
1363 	if (mdb_vread(klw->klw_base, klw->klw_size,
1364 	    (uintptr_t)lhp->lh_base) == -1) {
1365 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1366 		mdb_free(klw->klw_base, klw->klw_size);
1367 		mdb_free(klw, sizeof (kmem_log_walk_t));
1368 		return (WALK_ERR);
1369 	}
1370 
1371 	klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1372 	    sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1373 
1374 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1375 		kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1376 		    ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1377 
1378 		for (j = 0; j < maxndx; j++)
1379 			klw->klw_sorted[k++] = &chunk[j];
1380 	}
1381 
1382 	qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1383 	    (int(*)(const void *, const void *))bufctlcmp);
1384 
1385 	klw->klw_maxndx = k;
1386 	wsp->walk_data = klw;
1387 
1388 	return (WALK_NEXT);
1389 }
1390 
1391 int
1392 kmem_log_walk_step(mdb_walk_state_t *wsp)
1393 {
1394 	kmem_log_walk_t *klw = wsp->walk_data;
1395 	kmem_bufctl_audit_t *bcp;
1396 
1397 	if (klw->klw_ndx == klw->klw_maxndx)
1398 		return (WALK_DONE);
1399 
1400 	bcp = klw->klw_sorted[klw->klw_ndx++];
1401 
1402 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1403 	    (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1404 }
1405 
1406 void
1407 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1408 {
1409 	kmem_log_walk_t *klw = wsp->walk_data;
1410 
1411 	mdb_free(klw->klw_base, klw->klw_size);
1412 	mdb_free(klw->klw_sorted, klw->klw_maxndx *
1413 	    sizeof (kmem_bufctl_audit_t *));
1414 	mdb_free(klw, sizeof (kmem_log_walk_t));
1415 }
1416 
1417 typedef struct allocdby_bufctl {
1418 	uintptr_t abb_addr;
1419 	hrtime_t abb_ts;
1420 } allocdby_bufctl_t;
1421 
1422 typedef struct allocdby_walk {
1423 	const char *abw_walk;
1424 	uintptr_t abw_thread;
1425 	size_t abw_nbufs;
1426 	size_t abw_size;
1427 	allocdby_bufctl_t *abw_buf;
1428 	size_t abw_ndx;
1429 } allocdby_walk_t;
1430 
1431 int
1432 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1433     allocdby_walk_t *abw)
1434 {
1435 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1436 		return (WALK_NEXT);
1437 
1438 	if (abw->abw_nbufs == abw->abw_size) {
1439 		allocdby_bufctl_t *buf;
1440 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1441 
1442 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1443 
1444 		bcopy(abw->abw_buf, buf, oldsize);
1445 		mdb_free(abw->abw_buf, oldsize);
1446 
1447 		abw->abw_size <<= 1;
1448 		abw->abw_buf = buf;
1449 	}
1450 
1451 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1452 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1453 	abw->abw_nbufs++;
1454 
1455 	return (WALK_NEXT);
1456 }
1457 
1458 /*ARGSUSED*/
1459 int
1460 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1461 {
1462 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1463 	    abw, addr) == -1) {
1464 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1465 		return (WALK_DONE);
1466 	}
1467 
1468 	return (WALK_NEXT);
1469 }
1470 
1471 static int
1472 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1473 {
1474 	if (lhs->abb_ts < rhs->abb_ts)
1475 		return (1);
1476 	if (lhs->abb_ts > rhs->abb_ts)
1477 		return (-1);
1478 	return (0);
1479 }
1480 
1481 static int
1482 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1483 {
1484 	allocdby_walk_t *abw;
1485 
1486 	if (wsp->walk_addr == NULL) {
1487 		mdb_warn("allocdby walk doesn't support global walks\n");
1488 		return (WALK_ERR);
1489 	}
1490 
1491 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1492 
1493 	abw->abw_thread = wsp->walk_addr;
1494 	abw->abw_walk = walk;
1495 	abw->abw_size = 128;	/* something reasonable */
1496 	abw->abw_buf =
1497 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1498 
1499 	wsp->walk_data = abw;
1500 
1501 	if (mdb_walk("kmem_cache",
1502 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1503 		mdb_warn("couldn't walk kmem_cache");
1504 		allocdby_walk_fini(wsp);
1505 		return (WALK_ERR);
1506 	}
1507 
1508 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1509 	    (int(*)(const void *, const void *))allocdby_cmp);
1510 
1511 	return (WALK_NEXT);
1512 }
1513 
1514 int
1515 allocdby_walk_init(mdb_walk_state_t *wsp)
1516 {
1517 	return (allocdby_walk_init_common(wsp, "bufctl"));
1518 }
1519 
1520 int
1521 freedby_walk_init(mdb_walk_state_t *wsp)
1522 {
1523 	return (allocdby_walk_init_common(wsp, "freectl"));
1524 }
1525 
1526 int
1527 allocdby_walk_step(mdb_walk_state_t *wsp)
1528 {
1529 	allocdby_walk_t *abw = wsp->walk_data;
1530 	kmem_bufctl_audit_t bc;
1531 	uintptr_t addr;
1532 
1533 	if (abw->abw_ndx == abw->abw_nbufs)
1534 		return (WALK_DONE);
1535 
1536 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1537 
1538 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1539 		mdb_warn("couldn't read bufctl at %p", addr);
1540 		return (WALK_DONE);
1541 	}
1542 
1543 	return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1544 }
1545 
1546 void
1547 allocdby_walk_fini(mdb_walk_state_t *wsp)
1548 {
1549 	allocdby_walk_t *abw = wsp->walk_data;
1550 
1551 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1552 	mdb_free(abw, sizeof (allocdby_walk_t));
1553 }
1554 
1555 /*ARGSUSED*/
1556 int
1557 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
1558 {
1559 	char c[MDB_SYM_NAMLEN];
1560 	GElf_Sym sym;
1561 	int i;
1562 
1563 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1564 	for (i = 0; i < bcp->bc_depth; i++) {
1565 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1566 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1567 			continue;
1568 		if (strncmp(c, "kmem_", 5) == 0)
1569 			continue;
1570 		mdb_printf("%s+0x%lx",
1571 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1572 		break;
1573 	}
1574 	mdb_printf("\n");
1575 
1576 	return (WALK_NEXT);
1577 }
1578 
1579 static int
1580 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1581 {
1582 	if (!(flags & DCMD_ADDRSPEC))
1583 		return (DCMD_USAGE);
1584 
1585 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1586 
1587 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1588 		mdb_warn("can't walk '%s' for %p", w, addr);
1589 		return (DCMD_ERR);
1590 	}
1591 
1592 	return (DCMD_OK);
1593 }
1594 
1595 /*ARGSUSED*/
1596 int
1597 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1598 {
1599 	return (allocdby_common(addr, flags, "allocdby"));
1600 }
1601 
1602 /*ARGSUSED*/
1603 int
1604 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1605 {
1606 	return (allocdby_common(addr, flags, "freedby"));
1607 }
1608 
1609 /*
1610  * Return a string describing the address in relation to the given thread's
1611  * stack.
1612  *
1613  * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
1614  *
1615  * - If the address is above the stack pointer, return an empty string
1616  *   signifying that the address is active.
1617  *
1618  * - If the address is below the stack pointer, and the thread is not on proc,
1619  *   return " (below sp)".
1620  *
1621  * - If the address is below the stack pointer, and the thread is on proc,
1622  *   return " (possibly below sp)".  Depending on context, we may or may not
1623  *   have an accurate t_sp.
1624  */
1625 static const char *
1626 stack_active(const kthread_t *t, uintptr_t addr)
1627 {
1628 	uintptr_t panicstk;
1629 	GElf_Sym sym;
1630 
1631 	if (t->t_state == TS_FREE)
1632 		return (" (inactive interrupt thread)");
1633 
1634 	/*
1635 	 * Check to see if we're on the panic stack.  If so, ignore t_sp, as it
1636 	 * no longer relates to the thread's real stack.
1637 	 */
1638 	if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
1639 		panicstk = (uintptr_t)sym.st_value;
1640 
1641 		if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
1642 			return ("");
1643 	}
1644 
1645 	if (addr >= t->t_sp + STACK_BIAS)
1646 		return ("");
1647 
1648 	if (t->t_state == TS_ONPROC)
1649 		return (" (possibly below sp)");
1650 
1651 	return (" (below sp)");
1652 }
1653 
1654 typedef struct whatis {
1655 	uintptr_t w_addr;
1656 	const kmem_cache_t *w_cache;
1657 	const vmem_t *w_vmem;
1658 	size_t w_slab_align;
1659 	int w_slab_found;
1660 	int w_found;
1661 	int w_kmem_lite_count;
1662 	uint_t w_verbose;
1663 	uint_t w_freemem;
1664 	uint_t w_all;
1665 	uint_t w_bufctl;
1666 	uint_t w_idspace;
1667 } whatis_t;
1668 
1669 static void
1670 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
1671 {
1672 	/* LINTED pointer cast may result in improper alignment */
1673 	uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr);
1674 	intptr_t stat;
1675 	int count = 0;
1676 	int i;
1677 	pc_t callers[16];
1678 
1679 	if (w->w_cache->cache_flags & KMF_REDZONE) {
1680 		kmem_buftag_t bt;
1681 
1682 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
1683 			goto done;
1684 
1685 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
1686 
1687 		if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
1688 			goto done;
1689 
1690 		/*
1691 		 * provide the bufctl ptr if it has useful information
1692 		 */
1693 		if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT))
1694 			baddr = (uintptr_t)bt.bt_bufctl;
1695 
1696 		if (w->w_cache->cache_flags & KMF_LITE) {
1697 			count = w->w_kmem_lite_count;
1698 
1699 			if (count * sizeof (pc_t) > sizeof (callers))
1700 				count = 0;
1701 
1702 			if (count > 0 &&
1703 			    mdb_vread(callers, count * sizeof (pc_t),
1704 			    btaddr +
1705 			    offsetof(kmem_buftag_lite_t, bt_history)) == -1)
1706 				count = 0;
1707 
1708 			/*
1709 			 * skip unused callers
1710 			 */
1711 			while (count > 0 && callers[count - 1] ==
1712 			    (pc_t)KMEM_UNINITIALIZED_PATTERN)
1713 				count--;
1714 		}
1715 	}
1716 
1717 done:
1718 	if (baddr == 0)
1719 		mdb_printf("%p is %p+%p, %s from %s\n",
1720 		    w->w_addr, addr, w->w_addr - addr,
1721 		    w->w_freemem == FALSE ? "allocated" : "freed",
1722 		    w->w_cache->cache_name);
1723 	else
1724 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
1725 		    w->w_addr, addr, w->w_addr - addr, baddr,
1726 		    w->w_freemem == FALSE ? "allocated" : "freed",
1727 		    w->w_cache->cache_name);
1728 
1729 	if (count > 0) {
1730 		mdb_inc_indent(8);
1731 		mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"",
1732 		    callers[0], (count != 1)? ", ":"\n");
1733 		for (i = 1; i < count; i++)
1734 			mdb_printf("%a%s", callers[i],
1735 			    (i + 1 < count)? ", ":"\n");
1736 		mdb_dec_indent(8);
1737 	}
1738 }
1739 
1740 /*ARGSUSED*/
1741 static int
1742 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w)
1743 {
1744 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1745 		return (WALK_NEXT);
1746 
1747 	whatis_print_kmem(addr, 0, w);
1748 	w->w_found++;
1749 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1750 }
1751 
1752 static int
1753 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
1754 {
1755 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
1756 		return (WALK_NEXT);
1757 
1758 	mdb_printf("%p is %p+%p ", w->w_addr,
1759 	    vs->vs_start, w->w_addr - vs->vs_start);
1760 
1761 	/*
1762 	 * Always provide the vmem_seg pointer if it has a stack trace.
1763 	 */
1764 	if (w->w_bufctl == TRUE ||
1765 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
1766 		mdb_printf("(vmem_seg %p) ", addr);
1767 	}
1768 
1769 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
1770 	    "freed " : "", w->w_vmem->vm_name);
1771 
1772 	w->w_found++;
1773 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1774 }
1775 
1776 static int
1777 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
1778 {
1779 	const char *nm = vmem->vm_name;
1780 	w->w_vmem = vmem;
1781 	w->w_freemem = FALSE;
1782 
1783 	if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
1784 		return (WALK_NEXT);
1785 
1786 	if (w->w_verbose)
1787 		mdb_printf("Searching vmem arena %s...\n", nm);
1788 
1789 	if (mdb_pwalk("vmem_alloc",
1790 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1791 		mdb_warn("can't walk vmem seg for %p", addr);
1792 		return (WALK_NEXT);
1793 	}
1794 
1795 	if (w->w_found && w->w_all == FALSE)
1796 		return (WALK_DONE);
1797 
1798 	if (w->w_verbose)
1799 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
1800 
1801 	w->w_freemem = TRUE;
1802 
1803 	if (mdb_pwalk("vmem_free",
1804 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1805 		mdb_warn("can't walk vmem seg for %p", addr);
1806 		return (WALK_NEXT);
1807 	}
1808 
1809 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1810 }
1811 
1812 /*ARGSUSED*/
1813 static int
1814 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w)
1815 {
1816 	uintptr_t addr;
1817 
1818 	if (bcp == NULL)
1819 		return (WALK_NEXT);
1820 
1821 	addr = (uintptr_t)bcp->bc_addr;
1822 
1823 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1824 		return (WALK_NEXT);
1825 
1826 	whatis_print_kmem(addr, baddr, w);
1827 	w->w_found++;
1828 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1829 }
1830 
1831 /*ARGSUSED*/
1832 static int
1833 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w)
1834 {
1835 	uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align);
1836 
1837 	if ((w->w_addr - base) >= w->w_cache->cache_slabsize)
1838 		return (WALK_NEXT);
1839 
1840 	w->w_slab_found++;
1841 	return (WALK_DONE);
1842 }
1843 
1844 static int
1845 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
1846 {
1847 	char *walk, *freewalk;
1848 	mdb_walk_cb_t func;
1849 	vmem_t *vmp = c->cache_arena;
1850 
1851 	if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace)
1852 		return (WALK_NEXT);
1853 
1854 	if (w->w_bufctl == FALSE) {
1855 		walk = "kmem";
1856 		freewalk = "freemem";
1857 		func = (mdb_walk_cb_t)whatis_walk_kmem;
1858 	} else {
1859 		walk = "bufctl";
1860 		freewalk = "freectl";
1861 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
1862 	}
1863 
1864 	w->w_cache = c;
1865 
1866 	if (w->w_verbose)
1867 		mdb_printf("Searching %s's slabs...\n", c->cache_name);
1868 
1869 	/*
1870 	 * Verify that the address is in one of the cache's slabs.  If not,
1871 	 * we can skip the more expensive walkers.  (this is purely a
1872 	 * heuristic -- as long as there are no false-negatives, we'll be fine)
1873 	 *
1874 	 * We try to get the cache's arena's quantum, since to accurately
1875 	 * get the base of a slab, you have to align it to the quantum.  If
1876 	 * it doesn't look sensible, we fall back to not aligning.
1877 	 */
1878 	if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align),
1879 	    (uintptr_t)&vmp->vm_quantum) == -1) {
1880 		mdb_warn("unable to read %p->cache_arena->vm_quantum", c);
1881 		w->w_slab_align = 1;
1882 	}
1883 
1884 	if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 ||
1885 	    (w->w_slab_align & (w->w_slab_align - 1))) {
1886 		mdb_warn("%p's arena has invalid quantum (0x%p)\n", c,
1887 		    w->w_slab_align);
1888 		w->w_slab_align = 1;
1889 	}
1890 
1891 	w->w_slab_found = 0;
1892 	if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w,
1893 	    addr) == -1) {
1894 		mdb_warn("can't find kmem_slab walker");
1895 		return (WALK_DONE);
1896 	}
1897 	if (w->w_slab_found == 0)
1898 		return (WALK_NEXT);
1899 
1900 	if (c->cache_flags & KMF_LITE) {
1901 		if (mdb_readvar(&w->w_kmem_lite_count,
1902 		    "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16)
1903 			w->w_kmem_lite_count = 0;
1904 	}
1905 
1906 	if (w->w_verbose)
1907 		mdb_printf("Searching %s...\n", c->cache_name);
1908 
1909 	w->w_freemem = FALSE;
1910 
1911 	if (mdb_pwalk(walk, func, w, addr) == -1) {
1912 		mdb_warn("can't find %s walker", walk);
1913 		return (WALK_DONE);
1914 	}
1915 
1916 	if (w->w_found && w->w_all == FALSE)
1917 		return (WALK_DONE);
1918 
1919 	/*
1920 	 * We have searched for allocated memory; now search for freed memory.
1921 	 */
1922 	if (w->w_verbose)
1923 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
1924 
1925 	w->w_freemem = TRUE;
1926 
1927 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
1928 		mdb_warn("can't find %s walker", freewalk);
1929 		return (WALK_DONE);
1930 	}
1931 
1932 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1933 }
1934 
1935 static int
1936 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
1937 {
1938 	if (c->cache_cflags & KMC_NOTOUCH)
1939 		return (WALK_NEXT);
1940 
1941 	return (whatis_walk_cache(addr, c, w));
1942 }
1943 
1944 static int
1945 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w)
1946 {
1947 	if (!(c->cache_cflags & KMC_NOTOUCH))
1948 		return (WALK_NEXT);
1949 
1950 	return (whatis_walk_cache(addr, c, w));
1951 }
1952 
1953 static int
1954 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w)
1955 {
1956 	/*
1957 	 * Often, one calls ::whatis on an address from a thread structure.
1958 	 * We use this opportunity to short circuit this case...
1959 	 */
1960 	if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) {
1961 		mdb_printf("%p is %p+%p, allocated as a thread structure\n",
1962 		    w->w_addr, addr, w->w_addr - addr);
1963 		w->w_found++;
1964 		return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1965 	}
1966 
1967 	if (w->w_addr < (uintptr_t)t->t_stkbase ||
1968 	    w->w_addr > (uintptr_t)t->t_stk)
1969 		return (WALK_NEXT);
1970 
1971 	if (t->t_stkbase == NULL)
1972 		return (WALK_NEXT);
1973 
1974 	mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr,
1975 	    stack_active(t, w->w_addr));
1976 
1977 	w->w_found++;
1978 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1979 }
1980 
1981 static int
1982 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w)
1983 {
1984 	struct module mod;
1985 	char name[MODMAXNAMELEN], *where;
1986 	char c[MDB_SYM_NAMLEN];
1987 	Shdr shdr;
1988 	GElf_Sym sym;
1989 
1990 	if (m->mod_mp == NULL)
1991 		return (WALK_NEXT);
1992 
1993 	if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
1994 		mdb_warn("couldn't read modctl %p's module", addr);
1995 		return (WALK_NEXT);
1996 	}
1997 
1998 	if (w->w_addr >= (uintptr_t)mod.text &&
1999 	    w->w_addr < (uintptr_t)mod.text + mod.text_size) {
2000 		where = "text segment";
2001 		goto found;
2002 	}
2003 
2004 	if (w->w_addr >= (uintptr_t)mod.data &&
2005 	    w->w_addr < (uintptr_t)mod.data + mod.data_size) {
2006 		where = "data segment";
2007 		goto found;
2008 	}
2009 
2010 	if (w->w_addr >= (uintptr_t)mod.bss &&
2011 	    w->w_addr < (uintptr_t)mod.bss + mod.bss_size) {
2012 		where = "bss";
2013 		goto found;
2014 	}
2015 
2016 	if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2017 		mdb_warn("couldn't read symbol header for %p's module", addr);
2018 		return (WALK_NEXT);
2019 	}
2020 
2021 	if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr <
2022 	    (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) {
2023 		where = "symtab";
2024 		goto found;
2025 	}
2026 
2027 	if (w->w_addr >= (uintptr_t)mod.symspace &&
2028 	    w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) {
2029 		where = "symspace";
2030 		goto found;
2031 	}
2032 
2033 	return (WALK_NEXT);
2034 
2035 found:
2036 	if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2037 		(void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2038 
2039 	mdb_printf("%p is ", w->w_addr);
2040 
2041 	/*
2042 	 * If we found this address in a module, then there's a chance that
2043 	 * it's actually a named symbol.  Try the symbol lookup.
2044 	 */
2045 	if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c),
2046 	    &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value &&
2047 	    w->w_addr < (uintptr_t)sym.st_value + sym.st_size) {
2048 		mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value);
2049 	}
2050 
2051 	mdb_printf("in %s's %s\n", name, where);
2052 
2053 	w->w_found++;
2054 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2055 }
2056 
2057 /*ARGSUSED*/
2058 static int
2059 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w)
2060 {
2061 	static int machsize = 0;
2062 	mdb_ctf_id_t id;
2063 
2064 	if (machsize == 0) {
2065 		if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0)
2066 			machsize = mdb_ctf_type_size(id);
2067 		else {
2068 			mdb_warn("could not get size of page_t");
2069 			machsize = sizeof (page_t);
2070 		}
2071 	}
2072 
2073 	if (w->w_addr < addr || w->w_addr >= addr + machsize)
2074 		return (WALK_NEXT);
2075 
2076 	mdb_printf("%p is %p+%p, allocated as a page structure\n",
2077 	    w->w_addr, addr, w->w_addr - addr);
2078 
2079 	w->w_found++;
2080 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2081 }
2082 
2083 int
2084 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2085 {
2086 	whatis_t w;
2087 
2088 	if (!(flags & DCMD_ADDRSPEC))
2089 		return (DCMD_USAGE);
2090 
2091 	w.w_verbose = FALSE;
2092 	w.w_bufctl = FALSE;
2093 	w.w_all = FALSE;
2094 	w.w_idspace = FALSE;
2095 
2096 	if (mdb_getopts(argc, argv,
2097 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2098 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2099 	    'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace,
2100 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2101 		return (DCMD_USAGE);
2102 
2103 	w.w_addr = addr;
2104 	w.w_found = 0;
2105 
2106 	if (w.w_verbose)
2107 		mdb_printf("Searching modules...\n");
2108 
2109 	if (!w.w_idspace) {
2110 		if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w)
2111 		    == -1) {
2112 			mdb_warn("couldn't find modctl walker");
2113 			return (DCMD_ERR);
2114 		}
2115 
2116 		if (w.w_found && w.w_all == FALSE)
2117 			return (DCMD_OK);
2118 
2119 		/*
2120 		 * Now search all thread stacks.  Yes, this is a little weak; we
2121 		 * can save a lot of work by first checking to see if the
2122 		 * address is in segkp vs. segkmem.  But hey, computers are
2123 		 * fast.
2124 		 */
2125 		if (w.w_verbose)
2126 			mdb_printf("Searching threads...\n");
2127 
2128 		if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w)
2129 		    == -1) {
2130 			mdb_warn("couldn't find thread walker");
2131 			return (DCMD_ERR);
2132 		}
2133 
2134 		if (w.w_found && w.w_all == FALSE)
2135 			return (DCMD_OK);
2136 
2137 		if (w.w_verbose)
2138 			mdb_printf("Searching page structures...\n");
2139 
2140 		if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w)
2141 		    == -1) {
2142 			mdb_warn("couldn't find page walker");
2143 			return (DCMD_ERR);
2144 		}
2145 
2146 		if (w.w_found && w.w_all == FALSE)
2147 			return (DCMD_OK);
2148 	}
2149 
2150 	if (mdb_walk("kmem_cache",
2151 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2152 		mdb_warn("couldn't find kmem_cache walker");
2153 		return (DCMD_ERR);
2154 	}
2155 
2156 	if (w.w_found && w.w_all == FALSE)
2157 		return (DCMD_OK);
2158 
2159 	if (mdb_walk("kmem_cache",
2160 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2161 		mdb_warn("couldn't find kmem_cache walker");
2162 		return (DCMD_ERR);
2163 	}
2164 
2165 	if (w.w_found && w.w_all == FALSE)
2166 		return (DCMD_OK);
2167 
2168 	if (mdb_walk("vmem_postfix",
2169 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2170 		mdb_warn("couldn't find vmem_postfix walker");
2171 		return (DCMD_ERR);
2172 	}
2173 
2174 	if (w.w_found == 0)
2175 		mdb_printf("%p is unknown\n", addr);
2176 
2177 	return (DCMD_OK);
2178 }
2179 
2180 void
2181 whatis_help(void)
2182 {
2183 	mdb_printf(
2184 	    "Given a virtual address, attempt to determine where it came\n"
2185 	    "from.\n"
2186 	    "\n"
2187 	    "\t-v\tVerbose output; display caches/arenas/etc as they are\n"
2188 	    "\t\tsearched\n"
2189 	    "\t-a\tFind all possible sources.  Default behavior is to stop at\n"
2190 	    "\t\tthe first (most specific) source.\n"
2191 	    "\t-i\tSearch only identifier arenas and caches.  By default\n"
2192 	    "\t\tthese are ignored.\n"
2193 	    "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n"
2194 	    "\t\trespectively.  Warning: if the buffer exists, but does not\n"
2195 	    "\t\thave a bufctl, it will not be reported.\n");
2196 }
2197 
2198 typedef struct kmem_log_cpu {
2199 	uintptr_t kmc_low;
2200 	uintptr_t kmc_high;
2201 } kmem_log_cpu_t;
2202 
2203 typedef struct kmem_log_data {
2204 	uintptr_t kmd_addr;
2205 	kmem_log_cpu_t *kmd_cpu;
2206 } kmem_log_data_t;
2207 
2208 int
2209 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2210     kmem_log_data_t *kmd)
2211 {
2212 	int i;
2213 	kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2214 	size_t bufsize;
2215 
2216 	for (i = 0; i < NCPU; i++) {
2217 		if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2218 			break;
2219 	}
2220 
2221 	if (kmd->kmd_addr) {
2222 		if (b->bc_cache == NULL)
2223 			return (WALK_NEXT);
2224 
2225 		if (mdb_vread(&bufsize, sizeof (bufsize),
2226 		    (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2227 			mdb_warn(
2228 			    "failed to read cache_bufsize for cache at %p",
2229 			    b->bc_cache);
2230 			return (WALK_ERR);
2231 		}
2232 
2233 		if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2234 		    kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2235 			return (WALK_NEXT);
2236 	}
2237 
2238 	if (i == NCPU)
2239 		mdb_printf("   ");
2240 	else
2241 		mdb_printf("%3d", i);
2242 
2243 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2244 	    b->bc_timestamp, b->bc_thread);
2245 
2246 	return (WALK_NEXT);
2247 }
2248 
2249 /*ARGSUSED*/
2250 int
2251 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2252 {
2253 	kmem_log_header_t lh;
2254 	kmem_cpu_log_header_t clh;
2255 	uintptr_t lhp, clhp;
2256 	int ncpus;
2257 	uintptr_t *cpu;
2258 	GElf_Sym sym;
2259 	kmem_log_cpu_t *kmc;
2260 	int i;
2261 	kmem_log_data_t kmd;
2262 	uint_t opt_b = FALSE;
2263 
2264 	if (mdb_getopts(argc, argv,
2265 	    'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2266 		return (DCMD_USAGE);
2267 
2268 	if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2269 		mdb_warn("failed to read 'kmem_transaction_log'");
2270 		return (DCMD_ERR);
2271 	}
2272 
2273 	if (lhp == NULL) {
2274 		mdb_warn("no kmem transaction log\n");
2275 		return (DCMD_ERR);
2276 	}
2277 
2278 	mdb_readvar(&ncpus, "ncpus");
2279 
2280 	if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2281 		mdb_warn("failed to read log header at %p", lhp);
2282 		return (DCMD_ERR);
2283 	}
2284 
2285 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2286 
2287 	cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2288 
2289 	if (mdb_lookup_by_name("cpu", &sym) == -1) {
2290 		mdb_warn("couldn't find 'cpu' array");
2291 		return (DCMD_ERR);
2292 	}
2293 
2294 	if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2295 		mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2296 		    NCPU * sizeof (uintptr_t), sym.st_size);
2297 		return (DCMD_ERR);
2298 	}
2299 
2300 	if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2301 		mdb_warn("failed to read cpu array at %p", sym.st_value);
2302 		return (DCMD_ERR);
2303 	}
2304 
2305 	kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2306 	kmd.kmd_addr = NULL;
2307 	kmd.kmd_cpu = kmc;
2308 
2309 	for (i = 0; i < NCPU; i++) {
2310 
2311 		if (cpu[i] == NULL)
2312 			continue;
2313 
2314 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2315 			mdb_warn("cannot read cpu %d's log header at %p",
2316 			    i, clhp);
2317 			return (DCMD_ERR);
2318 		}
2319 
2320 		kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2321 		    (uintptr_t)lh.lh_base;
2322 		kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2323 
2324 		clhp += sizeof (kmem_cpu_log_header_t);
2325 	}
2326 
2327 	mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2328 	    "TIMESTAMP", "THREAD");
2329 
2330 	/*
2331 	 * If we have been passed an address, print out only log entries
2332 	 * corresponding to that address.  If opt_b is specified, then interpret
2333 	 * the address as a bufctl.
2334 	 */
2335 	if (flags & DCMD_ADDRSPEC) {
2336 		kmem_bufctl_audit_t b;
2337 
2338 		if (opt_b) {
2339 			kmd.kmd_addr = addr;
2340 		} else {
2341 			if (mdb_vread(&b,
2342 			    sizeof (kmem_bufctl_audit_t), addr) == -1) {
2343 				mdb_warn("failed to read bufctl at %p", addr);
2344 				return (DCMD_ERR);
2345 			}
2346 
2347 			(void) kmem_log_walk(addr, &b, &kmd);
2348 
2349 			return (DCMD_OK);
2350 		}
2351 	}
2352 
2353 	if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2354 		mdb_warn("can't find kmem log walker");
2355 		return (DCMD_ERR);
2356 	}
2357 
2358 	return (DCMD_OK);
2359 }
2360 
2361 typedef struct bufctl_history_cb {
2362 	int		bhc_flags;
2363 	int		bhc_argc;
2364 	const mdb_arg_t	*bhc_argv;
2365 	int		bhc_ret;
2366 } bufctl_history_cb_t;
2367 
2368 /*ARGSUSED*/
2369 static int
2370 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2371 {
2372 	bufctl_history_cb_t *bhc = arg;
2373 
2374 	bhc->bhc_ret =
2375 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2376 
2377 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2378 
2379 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2380 }
2381 
2382 void
2383 bufctl_help(void)
2384 {
2385 	mdb_printf("%s\n",
2386 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n");
2387 	mdb_dec_indent(2);
2388 	mdb_printf("%<b>OPTIONS%</b>\n");
2389 	mdb_inc_indent(2);
2390 	mdb_printf("%s",
2391 "  -v    Display the full content of the bufctl, including its stack trace\n"
2392 "  -h    retrieve the bufctl's transaction history, if available\n"
2393 "  -a addr\n"
2394 "        filter out bufctls not involving the buffer at addr\n"
2395 "  -c caller\n"
2396 "        filter out bufctls without the function/PC in their stack trace\n"
2397 "  -e earliest\n"
2398 "        filter out bufctls timestamped before earliest\n"
2399 "  -l latest\n"
2400 "        filter out bufctls timestamped after latest\n"
2401 "  -t thread\n"
2402 "        filter out bufctls not involving thread\n");
2403 }
2404 
2405 int
2406 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2407 {
2408 	kmem_bufctl_audit_t bc;
2409 	uint_t verbose = FALSE;
2410 	uint_t history = FALSE;
2411 	uint_t in_history = FALSE;
2412 	uintptr_t caller = NULL, thread = NULL;
2413 	uintptr_t laddr, haddr, baddr = NULL;
2414 	hrtime_t earliest = 0, latest = 0;
2415 	int i, depth;
2416 	char c[MDB_SYM_NAMLEN];
2417 	GElf_Sym sym;
2418 
2419 	if (mdb_getopts(argc, argv,
2420 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2421 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2422 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2423 	    'c', MDB_OPT_UINTPTR, &caller,
2424 	    't', MDB_OPT_UINTPTR, &thread,
2425 	    'e', MDB_OPT_UINT64, &earliest,
2426 	    'l', MDB_OPT_UINT64, &latest,
2427 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2428 		return (DCMD_USAGE);
2429 
2430 	if (!(flags & DCMD_ADDRSPEC))
2431 		return (DCMD_USAGE);
2432 
2433 	if (in_history && !history)
2434 		return (DCMD_USAGE);
2435 
2436 	if (history && !in_history) {
2437 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2438 		    UM_SLEEP | UM_GC);
2439 		bufctl_history_cb_t bhc;
2440 
2441 		nargv[0].a_type = MDB_TYPE_STRING;
2442 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2443 
2444 		for (i = 0; i < argc; i++)
2445 			nargv[i + 1] = argv[i];
2446 
2447 		/*
2448 		 * When in history mode, we treat each element as if it
2449 		 * were in a seperate loop, so that the headers group
2450 		 * bufctls with similar histories.
2451 		 */
2452 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2453 		bhc.bhc_argc = argc + 1;
2454 		bhc.bhc_argv = nargv;
2455 		bhc.bhc_ret = DCMD_OK;
2456 
2457 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2458 		    addr) == -1) {
2459 			mdb_warn("unable to walk bufctl_history");
2460 			return (DCMD_ERR);
2461 		}
2462 
2463 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2464 			mdb_printf("\n");
2465 
2466 		return (bhc.bhc_ret);
2467 	}
2468 
2469 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2470 		if (verbose) {
2471 			mdb_printf("%16s %16s %16s %16s\n"
2472 			    "%<u>%16s %16s %16s %16s%</u>\n",
2473 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2474 			    "", "CACHE", "LASTLOG", "CONTENTS");
2475 		} else {
2476 			mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2477 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2478 		}
2479 	}
2480 
2481 	if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2482 		mdb_warn("couldn't read bufctl at %p", addr);
2483 		return (DCMD_ERR);
2484 	}
2485 
2486 	/*
2487 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2488 	 * the address does not really refer to a bufctl.
2489 	 */
2490 	depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2491 
2492 	if (caller != NULL) {
2493 		laddr = caller;
2494 		haddr = caller + sizeof (caller);
2495 
2496 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2497 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2498 			/*
2499 			 * We were provided an exact symbol value; any
2500 			 * address in the function is valid.
2501 			 */
2502 			laddr = (uintptr_t)sym.st_value;
2503 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2504 		}
2505 
2506 		for (i = 0; i < depth; i++)
2507 			if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2508 				break;
2509 
2510 		if (i == depth)
2511 			return (DCMD_OK);
2512 	}
2513 
2514 	if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2515 		return (DCMD_OK);
2516 
2517 	if (earliest != 0 && bc.bc_timestamp < earliest)
2518 		return (DCMD_OK);
2519 
2520 	if (latest != 0 && bc.bc_timestamp > latest)
2521 		return (DCMD_OK);
2522 
2523 	if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2524 		return (DCMD_OK);
2525 
2526 	if (flags & DCMD_PIPE_OUT) {
2527 		mdb_printf("%#lr\n", addr);
2528 		return (DCMD_OK);
2529 	}
2530 
2531 	if (verbose) {
2532 		mdb_printf(
2533 		    "%<b>%16p%</b> %16p %16llx %16p\n"
2534 		    "%16s %16p %16p %16p\n",
2535 		    addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2536 		    "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2537 
2538 		mdb_inc_indent(17);
2539 		for (i = 0; i < depth; i++)
2540 			mdb_printf("%a\n", bc.bc_stack[i]);
2541 		mdb_dec_indent(17);
2542 		mdb_printf("\n");
2543 	} else {
2544 		mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2545 		    bc.bc_timestamp, bc.bc_thread);
2546 
2547 		for (i = 0; i < depth; i++) {
2548 			if (mdb_lookup_by_addr(bc.bc_stack[i],
2549 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2550 				continue;
2551 			if (strncmp(c, "kmem_", 5) == 0)
2552 				continue;
2553 			mdb_printf(" %a\n", bc.bc_stack[i]);
2554 			break;
2555 		}
2556 
2557 		if (i >= depth)
2558 			mdb_printf("\n");
2559 	}
2560 
2561 	return (DCMD_OK);
2562 }
2563 
2564 typedef struct kmem_verify {
2565 	uint64_t *kmv_buf;		/* buffer to read cache contents into */
2566 	size_t kmv_size;		/* number of bytes in kmv_buf */
2567 	int kmv_corruption;		/* > 0 if corruption found. */
2568 	int kmv_besilent;		/* report actual corruption sites */
2569 	struct kmem_cache kmv_cache;	/* the cache we're operating on */
2570 } kmem_verify_t;
2571 
2572 /*
2573  * verify_pattern()
2574  * 	verify that buf is filled with the pattern pat.
2575  */
2576 static int64_t
2577 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2578 {
2579 	/*LINTED*/
2580 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2581 	uint64_t *buf;
2582 
2583 	for (buf = buf_arg; buf < bufend; buf++)
2584 		if (*buf != pat)
2585 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2586 	return (-1);
2587 }
2588 
2589 /*
2590  * verify_buftag()
2591  *	verify that btp->bt_bxstat == (bcp ^ pat)
2592  */
2593 static int
2594 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
2595 {
2596 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2597 }
2598 
2599 /*
2600  * verify_free()
2601  * 	verify the integrity of a free block of memory by checking
2602  * 	that it is filled with 0xdeadbeef and that its buftag is sane.
2603  */
2604 /*ARGSUSED1*/
2605 static int
2606 verify_free(uintptr_t addr, const void *data, void *private)
2607 {
2608 	kmem_verify_t *kmv = (kmem_verify_t *)private;
2609 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
2610 	int64_t corrupt;		/* corruption offset */
2611 	kmem_buftag_t *buftagp;		/* ptr to buftag */
2612 	kmem_cache_t *cp = &kmv->kmv_cache;
2613 	int besilent = kmv->kmv_besilent;
2614 
2615 	/*LINTED*/
2616 	buftagp = KMEM_BUFTAG(cp, buf);
2617 
2618 	/*
2619 	 * Read the buffer to check.
2620 	 */
2621 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
2622 		if (!besilent)
2623 			mdb_warn("couldn't read %p", addr);
2624 		return (WALK_NEXT);
2625 	}
2626 
2627 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2628 	    KMEM_FREE_PATTERN)) >= 0) {
2629 		if (!besilent)
2630 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2631 			    addr, (uintptr_t)addr + corrupt);
2632 		goto corrupt;
2633 	}
2634 	/*
2635 	 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
2636 	 * the first bytes of the buffer, hence we cannot check for red
2637 	 * zone corruption.
2638 	 */
2639 	if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
2640 	    buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
2641 		if (!besilent)
2642 			mdb_printf("buffer %p (free) seems to "
2643 			    "have a corrupt redzone pattern\n", addr);
2644 		goto corrupt;
2645 	}
2646 
2647 	/*
2648 	 * confirm bufctl pointer integrity.
2649 	 */
2650 	if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
2651 		if (!besilent)
2652 			mdb_printf("buffer %p (free) has a corrupt "
2653 			    "buftag\n", addr);
2654 		goto corrupt;
2655 	}
2656 
2657 	return (WALK_NEXT);
2658 corrupt:
2659 	kmv->kmv_corruption++;
2660 	return (WALK_NEXT);
2661 }
2662 
2663 /*
2664  * verify_alloc()
2665  * 	Verify that the buftag of an allocated buffer makes sense with respect
2666  * 	to the buffer.
2667  */
2668 /*ARGSUSED1*/
2669 static int
2670 verify_alloc(uintptr_t addr, const void *data, void *private)
2671 {
2672 	kmem_verify_t *kmv = (kmem_verify_t *)private;
2673 	kmem_cache_t *cp = &kmv->kmv_cache;
2674 	uint64_t *buf = kmv->kmv_buf;	/* buf to validate */
2675 	/*LINTED*/
2676 	kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
2677 	uint32_t *ip = (uint32_t *)buftagp;
2678 	uint8_t *bp = (uint8_t *)buf;
2679 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2680 	int besilent = kmv->kmv_besilent;
2681 
2682 	/*
2683 	 * Read the buffer to check.
2684 	 */
2685 	if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
2686 		if (!besilent)
2687 			mdb_warn("couldn't read %p", addr);
2688 		return (WALK_NEXT);
2689 	}
2690 
2691 	/*
2692 	 * There are two cases to handle:
2693 	 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
2694 	 *    0xfeedfacefeedface at the end of it
2695 	 * 2. If the buf was alloc'd using kmem_alloc, it will have
2696 	 *    0xbb just past the end of the region in use.  At the buftag,
2697 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2698 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2699 	 *    endianness), followed by 32 bits containing the offset of the
2700 	 *    0xbb byte in the buffer.
2701 	 *
2702 	 * Finally, the two 32-bit words that comprise the second half of the
2703 	 * buftag should xor to KMEM_BUFTAG_ALLOC
2704 	 */
2705 
2706 	if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
2707 		looks_ok = 1;
2708 	else if (!KMEM_SIZE_VALID(ip[1]))
2709 		size_ok = 0;
2710 	else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
2711 		looks_ok = 1;
2712 	else
2713 		size_ok = 0;
2714 
2715 	if (!size_ok) {
2716 		if (!besilent)
2717 			mdb_printf("buffer %p (allocated) has a corrupt "
2718 			    "redzone size encoding\n", addr);
2719 		goto corrupt;
2720 	}
2721 
2722 	if (!looks_ok) {
2723 		if (!besilent)
2724 			mdb_printf("buffer %p (allocated) has a corrupt "
2725 			    "redzone signature\n", addr);
2726 		goto corrupt;
2727 	}
2728 
2729 	if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
2730 		if (!besilent)
2731 			mdb_printf("buffer %p (allocated) has a "
2732 			    "corrupt buftag\n", addr);
2733 		goto corrupt;
2734 	}
2735 
2736 	return (WALK_NEXT);
2737 corrupt:
2738 	kmv->kmv_corruption++;
2739 	return (WALK_NEXT);
2740 }
2741 
2742 /*ARGSUSED2*/
2743 int
2744 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2745 {
2746 	if (flags & DCMD_ADDRSPEC) {
2747 		int check_alloc = 0, check_free = 0;
2748 		kmem_verify_t kmv;
2749 
2750 		if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
2751 		    addr) == -1) {
2752 			mdb_warn("couldn't read kmem_cache %p", addr);
2753 			return (DCMD_ERR);
2754 		}
2755 
2756 		kmv.kmv_size = kmv.kmv_cache.cache_buftag +
2757 		    sizeof (kmem_buftag_t);
2758 		kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
2759 		kmv.kmv_corruption = 0;
2760 
2761 		if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
2762 			check_alloc = 1;
2763 			if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
2764 				check_free = 1;
2765 		} else {
2766 			if (!(flags & DCMD_LOOP)) {
2767 				mdb_warn("cache %p (%s) does not have "
2768 				    "redzone checking enabled\n", addr,
2769 				    kmv.kmv_cache.cache_name);
2770 			}
2771 			return (DCMD_ERR);
2772 		}
2773 
2774 		if (flags & DCMD_LOOP) {
2775 			/*
2776 			 * table mode, don't print out every corrupt buffer
2777 			 */
2778 			kmv.kmv_besilent = 1;
2779 		} else {
2780 			mdb_printf("Summary for cache '%s'\n",
2781 			    kmv.kmv_cache.cache_name);
2782 			mdb_inc_indent(2);
2783 			kmv.kmv_besilent = 0;
2784 		}
2785 
2786 		if (check_alloc)
2787 			(void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
2788 		if (check_free)
2789 			(void) mdb_pwalk("freemem", verify_free, &kmv, addr);
2790 
2791 		if (flags & DCMD_LOOP) {
2792 			if (kmv.kmv_corruption == 0) {
2793 				mdb_printf("%-*s %?p clean\n",
2794 				    KMEM_CACHE_NAMELEN,
2795 				    kmv.kmv_cache.cache_name, addr);
2796 			} else {
2797 				char *s = "";	/* optional s in "buffer[s]" */
2798 				if (kmv.kmv_corruption > 1)
2799 					s = "s";
2800 
2801 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2802 				    KMEM_CACHE_NAMELEN,
2803 				    kmv.kmv_cache.cache_name, addr,
2804 				    kmv.kmv_corruption, s);
2805 			}
2806 		} else {
2807 			/*
2808 			 * This is the more verbose mode, when the user has
2809 			 * type addr::kmem_verify.  If the cache was clean,
2810 			 * nothing will have yet been printed. So say something.
2811 			 */
2812 			if (kmv.kmv_corruption == 0)
2813 				mdb_printf("clean\n");
2814 
2815 			mdb_dec_indent(2);
2816 		}
2817 	} else {
2818 		/*
2819 		 * If the user didn't specify a cache to verify, we'll walk all
2820 		 * kmem_cache's, specifying ourself as a callback for each...
2821 		 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
2822 		 */
2823 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
2824 		    "Cache Name", "Addr", "Cache Integrity");
2825 		(void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
2826 	}
2827 
2828 	return (DCMD_OK);
2829 }
2830 
2831 typedef struct vmem_node {
2832 	struct vmem_node *vn_next;
2833 	struct vmem_node *vn_parent;
2834 	struct vmem_node *vn_sibling;
2835 	struct vmem_node *vn_children;
2836 	uintptr_t vn_addr;
2837 	int vn_marked;
2838 	vmem_t vn_vmem;
2839 } vmem_node_t;
2840 
2841 typedef struct vmem_walk {
2842 	vmem_node_t *vw_root;
2843 	vmem_node_t *vw_current;
2844 } vmem_walk_t;
2845 
2846 int
2847 vmem_walk_init(mdb_walk_state_t *wsp)
2848 {
2849 	uintptr_t vaddr, paddr;
2850 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2851 	vmem_walk_t *vw;
2852 
2853 	if (mdb_readvar(&vaddr, "vmem_list") == -1) {
2854 		mdb_warn("couldn't read 'vmem_list'");
2855 		return (WALK_ERR);
2856 	}
2857 
2858 	while (vaddr != NULL) {
2859 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2860 		vp->vn_addr = vaddr;
2861 		vp->vn_next = head;
2862 		head = vp;
2863 
2864 		if (vaddr == wsp->walk_addr)
2865 			current = vp;
2866 
2867 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2868 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2869 			goto err;
2870 		}
2871 
2872 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2873 	}
2874 
2875 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2876 
2877 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2878 			vp->vn_sibling = root;
2879 			root = vp;
2880 			continue;
2881 		}
2882 
2883 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2884 			if (parent->vn_addr != paddr)
2885 				continue;
2886 			vp->vn_sibling = parent->vn_children;
2887 			parent->vn_children = vp;
2888 			vp->vn_parent = parent;
2889 			break;
2890 		}
2891 
2892 		if (parent == NULL) {
2893 			mdb_warn("couldn't find %p's parent (%p)\n",
2894 			    vp->vn_addr, paddr);
2895 			goto err;
2896 		}
2897 	}
2898 
2899 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2900 	vw->vw_root = root;
2901 
2902 	if (current != NULL)
2903 		vw->vw_current = current;
2904 	else
2905 		vw->vw_current = root;
2906 
2907 	wsp->walk_data = vw;
2908 	return (WALK_NEXT);
2909 err:
2910 	for (vp = head; head != NULL; vp = head) {
2911 		head = vp->vn_next;
2912 		mdb_free(vp, sizeof (vmem_node_t));
2913 	}
2914 
2915 	return (WALK_ERR);
2916 }
2917 
2918 int
2919 vmem_walk_step(mdb_walk_state_t *wsp)
2920 {
2921 	vmem_walk_t *vw = wsp->walk_data;
2922 	vmem_node_t *vp;
2923 	int rval;
2924 
2925 	if ((vp = vw->vw_current) == NULL)
2926 		return (WALK_DONE);
2927 
2928 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2929 
2930 	if (vp->vn_children != NULL) {
2931 		vw->vw_current = vp->vn_children;
2932 		return (rval);
2933 	}
2934 
2935 	do {
2936 		vw->vw_current = vp->vn_sibling;
2937 		vp = vp->vn_parent;
2938 	} while (vw->vw_current == NULL && vp != NULL);
2939 
2940 	return (rval);
2941 }
2942 
2943 /*
2944  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2945  * children are visited before their parent.  We perform the postfix walk
2946  * iteratively (rather than recursively) to allow mdb to regain control
2947  * after each callback.
2948  */
2949 int
2950 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2951 {
2952 	vmem_walk_t *vw = wsp->walk_data;
2953 	vmem_node_t *vp = vw->vw_current;
2954 	int rval;
2955 
2956 	/*
2957 	 * If this node is marked, then we know that we have already visited
2958 	 * all of its children.  If the node has any siblings, they need to
2959 	 * be visited next; otherwise, we need to visit the parent.  Note
2960 	 * that vp->vn_marked will only be zero on the first invocation of
2961 	 * the step function.
2962 	 */
2963 	if (vp->vn_marked) {
2964 		if (vp->vn_sibling != NULL)
2965 			vp = vp->vn_sibling;
2966 		else if (vp->vn_parent != NULL)
2967 			vp = vp->vn_parent;
2968 		else {
2969 			/*
2970 			 * We have neither a parent, nor a sibling, and we
2971 			 * have already been visited; we're done.
2972 			 */
2973 			return (WALK_DONE);
2974 		}
2975 	}
2976 
2977 	/*
2978 	 * Before we visit this node, visit its children.
2979 	 */
2980 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2981 		vp = vp->vn_children;
2982 
2983 	vp->vn_marked = 1;
2984 	vw->vw_current = vp;
2985 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2986 
2987 	return (rval);
2988 }
2989 
2990 void
2991 vmem_walk_fini(mdb_walk_state_t *wsp)
2992 {
2993 	vmem_walk_t *vw = wsp->walk_data;
2994 	vmem_node_t *root = vw->vw_root;
2995 	int done;
2996 
2997 	if (root == NULL)
2998 		return;
2999 
3000 	if ((vw->vw_root = root->vn_children) != NULL)
3001 		vmem_walk_fini(wsp);
3002 
3003 	vw->vw_root = root->vn_sibling;
3004 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3005 	mdb_free(root, sizeof (vmem_node_t));
3006 
3007 	if (done) {
3008 		mdb_free(vw, sizeof (vmem_walk_t));
3009 	} else {
3010 		vmem_walk_fini(wsp);
3011 	}
3012 }
3013 
3014 typedef struct vmem_seg_walk {
3015 	uint8_t vsw_type;
3016 	uintptr_t vsw_start;
3017 	uintptr_t vsw_current;
3018 } vmem_seg_walk_t;
3019 
3020 /*ARGSUSED*/
3021 int
3022 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3023 {
3024 	vmem_seg_walk_t *vsw;
3025 
3026 	if (wsp->walk_addr == NULL) {
3027 		mdb_warn("vmem_%s does not support global walks\n", name);
3028 		return (WALK_ERR);
3029 	}
3030 
3031 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3032 
3033 	vsw->vsw_type = type;
3034 	vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3035 	vsw->vsw_current = vsw->vsw_start;
3036 
3037 	return (WALK_NEXT);
3038 }
3039 
3040 /*
3041  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3042  */
3043 #define	VMEM_NONE	0
3044 
3045 int
3046 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3047 {
3048 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3049 }
3050 
3051 int
3052 vmem_free_walk_init(mdb_walk_state_t *wsp)
3053 {
3054 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3055 }
3056 
3057 int
3058 vmem_span_walk_init(mdb_walk_state_t *wsp)
3059 {
3060 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3061 }
3062 
3063 int
3064 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3065 {
3066 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3067 }
3068 
3069 int
3070 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3071 {
3072 	vmem_seg_t seg;
3073 	vmem_seg_walk_t *vsw = wsp->walk_data;
3074 	uintptr_t addr = vsw->vsw_current;
3075 	static size_t seg_size = 0;
3076 	int rval;
3077 
3078 	if (!seg_size) {
3079 		if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3080 			mdb_warn("failed to read 'vmem_seg_size'");
3081 			seg_size = sizeof (vmem_seg_t);
3082 		}
3083 	}
3084 
3085 	if (seg_size < sizeof (seg))
3086 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3087 
3088 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3089 		mdb_warn("couldn't read vmem_seg at %p", addr);
3090 		return (WALK_ERR);
3091 	}
3092 
3093 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3094 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3095 		rval = WALK_NEXT;
3096 	} else {
3097 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3098 	}
3099 
3100 	if (vsw->vsw_current == vsw->vsw_start)
3101 		return (WALK_DONE);
3102 
3103 	return (rval);
3104 }
3105 
3106 void
3107 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3108 {
3109 	vmem_seg_walk_t *vsw = wsp->walk_data;
3110 
3111 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3112 }
3113 
3114 #define	VMEM_NAMEWIDTH	22
3115 
3116 int
3117 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3118 {
3119 	vmem_t v, parent;
3120 	vmem_kstat_t *vkp = &v.vm_kstat;
3121 	uintptr_t paddr;
3122 	int ident = 0;
3123 	char c[VMEM_NAMEWIDTH];
3124 
3125 	if (!(flags & DCMD_ADDRSPEC)) {
3126 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3127 			mdb_warn("can't walk vmem");
3128 			return (DCMD_ERR);
3129 		}
3130 		return (DCMD_OK);
3131 	}
3132 
3133 	if (DCMD_HDRSPEC(flags))
3134 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3135 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3136 		    "TOTAL", "SUCCEED", "FAIL");
3137 
3138 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3139 		mdb_warn("couldn't read vmem at %p", addr);
3140 		return (DCMD_ERR);
3141 	}
3142 
3143 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3144 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3145 			mdb_warn("couldn't trace %p's ancestry", addr);
3146 			ident = 0;
3147 			break;
3148 		}
3149 		paddr = (uintptr_t)parent.vm_source;
3150 	}
3151 
3152 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3153 
3154 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3155 	    addr, VMEM_NAMEWIDTH, c,
3156 	    vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3157 	    vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3158 
3159 	return (DCMD_OK);
3160 }
3161 
3162 void
3163 vmem_seg_help(void)
3164 {
3165 	mdb_printf("%s\n",
3166 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3167 "\n"
3168 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3169 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3170 "information.\n");
3171 	mdb_dec_indent(2);
3172 	mdb_printf("%<b>OPTIONS%</b>\n");
3173 	mdb_inc_indent(2);
3174 	mdb_printf("%s",
3175 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3176 "  -s    report the size of the segment, instead of the end address\n"
3177 "  -c caller\n"
3178 "        filter out segments without the function/PC in their stack trace\n"
3179 "  -e earliest\n"
3180 "        filter out segments timestamped before earliest\n"
3181 "  -l latest\n"
3182 "        filter out segments timestamped after latest\n"
3183 "  -m minsize\n"
3184 "        filer out segments smaller than minsize\n"
3185 "  -M maxsize\n"
3186 "        filer out segments larger than maxsize\n"
3187 "  -t thread\n"
3188 "        filter out segments not involving thread\n"
3189 "  -T type\n"
3190 "        filter out segments not of type 'type'\n"
3191 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3192 }
3193 
3194 /*ARGSUSED*/
3195 int
3196 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3197 {
3198 	vmem_seg_t vs;
3199 	pc_t *stk = vs.vs_stack;
3200 	uintptr_t sz;
3201 	uint8_t t;
3202 	const char *type = NULL;
3203 	GElf_Sym sym;
3204 	char c[MDB_SYM_NAMLEN];
3205 	int no_debug;
3206 	int i;
3207 	int depth;
3208 	uintptr_t laddr, haddr;
3209 
3210 	uintptr_t caller = NULL, thread = NULL;
3211 	uintptr_t minsize = 0, maxsize = 0;
3212 
3213 	hrtime_t earliest = 0, latest = 0;
3214 
3215 	uint_t size = 0;
3216 	uint_t verbose = 0;
3217 
3218 	if (!(flags & DCMD_ADDRSPEC))
3219 		return (DCMD_USAGE);
3220 
3221 	if (mdb_getopts(argc, argv,
3222 	    'c', MDB_OPT_UINTPTR, &caller,
3223 	    'e', MDB_OPT_UINT64, &earliest,
3224 	    'l', MDB_OPT_UINT64, &latest,
3225 	    's', MDB_OPT_SETBITS, TRUE, &size,
3226 	    'm', MDB_OPT_UINTPTR, &minsize,
3227 	    'M', MDB_OPT_UINTPTR, &maxsize,
3228 	    't', MDB_OPT_UINTPTR, &thread,
3229 	    'T', MDB_OPT_STR, &type,
3230 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3231 	    NULL) != argc)
3232 		return (DCMD_USAGE);
3233 
3234 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3235 		if (verbose) {
3236 			mdb_printf("%16s %4s %16s %16s %16s\n"
3237 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3238 			    "ADDR", "TYPE", "START", "END", "SIZE",
3239 			    "", "", "THREAD", "TIMESTAMP", "");
3240 		} else {
3241 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3242 			    "START", size? "SIZE" : "END", "WHO");
3243 		}
3244 	}
3245 
3246 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3247 		mdb_warn("couldn't read vmem_seg at %p", addr);
3248 		return (DCMD_ERR);
3249 	}
3250 
3251 	if (type != NULL) {
3252 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3253 			t = VMEM_ALLOC;
3254 		else if (strcmp(type, "FREE") == 0)
3255 			t = VMEM_FREE;
3256 		else if (strcmp(type, "SPAN") == 0)
3257 			t = VMEM_SPAN;
3258 		else if (strcmp(type, "ROTR") == 0 ||
3259 		    strcmp(type, "ROTOR") == 0)
3260 			t = VMEM_ROTOR;
3261 		else if (strcmp(type, "WLKR") == 0 ||
3262 		    strcmp(type, "WALKER") == 0)
3263 			t = VMEM_WALKER;
3264 		else {
3265 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3266 			    type);
3267 			return (DCMD_ERR);
3268 		}
3269 
3270 		if (vs.vs_type != t)
3271 			return (DCMD_OK);
3272 	}
3273 
3274 	sz = vs.vs_end - vs.vs_start;
3275 
3276 	if (minsize != 0 && sz < minsize)
3277 		return (DCMD_OK);
3278 
3279 	if (maxsize != 0 && sz > maxsize)
3280 		return (DCMD_OK);
3281 
3282 	t = vs.vs_type;
3283 	depth = vs.vs_depth;
3284 
3285 	/*
3286 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3287 	 */
3288 	no_debug = (t != VMEM_ALLOC) ||
3289 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3290 
3291 	if (no_debug) {
3292 		if (caller != NULL || thread != NULL || earliest != 0 ||
3293 		    latest != 0)
3294 			return (DCMD_OK);		/* not enough info */
3295 	} else {
3296 		if (caller != NULL) {
3297 			laddr = caller;
3298 			haddr = caller + sizeof (caller);
3299 
3300 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3301 			    sizeof (c), &sym) != -1 &&
3302 			    caller == (uintptr_t)sym.st_value) {
3303 				/*
3304 				 * We were provided an exact symbol value; any
3305 				 * address in the function is valid.
3306 				 */
3307 				laddr = (uintptr_t)sym.st_value;
3308 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3309 			}
3310 
3311 			for (i = 0; i < depth; i++)
3312 				if (vs.vs_stack[i] >= laddr &&
3313 				    vs.vs_stack[i] < haddr)
3314 					break;
3315 
3316 			if (i == depth)
3317 				return (DCMD_OK);
3318 		}
3319 
3320 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3321 			return (DCMD_OK);
3322 
3323 		if (earliest != 0 && vs.vs_timestamp < earliest)
3324 			return (DCMD_OK);
3325 
3326 		if (latest != 0 && vs.vs_timestamp > latest)
3327 			return (DCMD_OK);
3328 	}
3329 
3330 	type = (t == VMEM_ALLOC ? "ALLC" :
3331 	    t == VMEM_FREE ? "FREE" :
3332 	    t == VMEM_SPAN ? "SPAN" :
3333 	    t == VMEM_ROTOR ? "ROTR" :
3334 	    t == VMEM_WALKER ? "WLKR" :
3335 	    "????");
3336 
3337 	if (flags & DCMD_PIPE_OUT) {
3338 		mdb_printf("%#lr\n", addr);
3339 		return (DCMD_OK);
3340 	}
3341 
3342 	if (verbose) {
3343 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3344 		    addr, type, vs.vs_start, vs.vs_end, sz);
3345 
3346 		if (no_debug)
3347 			return (DCMD_OK);
3348 
3349 		mdb_printf("%16s %4s %16p %16llx\n",
3350 		    "", "", vs.vs_thread, vs.vs_timestamp);
3351 
3352 		mdb_inc_indent(17);
3353 		for (i = 0; i < depth; i++) {
3354 			mdb_printf("%a\n", stk[i]);
3355 		}
3356 		mdb_dec_indent(17);
3357 		mdb_printf("\n");
3358 	} else {
3359 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3360 		    vs.vs_start, size? sz : vs.vs_end);
3361 
3362 		if (no_debug) {
3363 			mdb_printf("\n");
3364 			return (DCMD_OK);
3365 		}
3366 
3367 		for (i = 0; i < depth; i++) {
3368 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3369 			    c, sizeof (c), &sym) == -1)
3370 				continue;
3371 			if (strncmp(c, "vmem_", 5) == 0)
3372 				continue;
3373 			break;
3374 		}
3375 		mdb_printf(" %a\n", stk[i]);
3376 	}
3377 	return (DCMD_OK);
3378 }
3379 
3380 typedef struct kmalog_data {
3381 	uintptr_t	kma_addr;
3382 	hrtime_t	kma_newest;
3383 } kmalog_data_t;
3384 
3385 /*ARGSUSED*/
3386 static int
3387 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3388 {
3389 	char name[KMEM_CACHE_NAMELEN + 1];
3390 	hrtime_t delta;
3391 	int i, depth;
3392 	size_t bufsize;
3393 
3394 	if (bcp->bc_timestamp == 0)
3395 		return (WALK_DONE);
3396 
3397 	if (kma->kma_newest == 0)
3398 		kma->kma_newest = bcp->bc_timestamp;
3399 
3400 	if (kma->kma_addr) {
3401 		if (mdb_vread(&bufsize, sizeof (bufsize),
3402 		    (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3403 			mdb_warn(
3404 			    "failed to read cache_bufsize for cache at %p",
3405 			    bcp->bc_cache);
3406 			return (WALK_ERR);
3407 		}
3408 
3409 		if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3410 		    kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3411 			return (WALK_NEXT);
3412 	}
3413 
3414 	delta = kma->kma_newest - bcp->bc_timestamp;
3415 	depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3416 
3417 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3418 	    &bcp->bc_cache->cache_name) <= 0)
3419 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3420 
3421 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3422 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3423 
3424 	for (i = 0; i < depth; i++)
3425 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3426 
3427 	return (WALK_NEXT);
3428 }
3429 
3430 int
3431 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3432 {
3433 	const char *logname = "kmem_transaction_log";
3434 	kmalog_data_t kma;
3435 
3436 	if (argc > 1)
3437 		return (DCMD_USAGE);
3438 
3439 	kma.kma_newest = 0;
3440 	if (flags & DCMD_ADDRSPEC)
3441 		kma.kma_addr = addr;
3442 	else
3443 		kma.kma_addr = NULL;
3444 
3445 	if (argc > 0) {
3446 		if (argv->a_type != MDB_TYPE_STRING)
3447 			return (DCMD_USAGE);
3448 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3449 			logname = "kmem_failure_log";
3450 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3451 			logname = "kmem_slab_log";
3452 		else
3453 			return (DCMD_USAGE);
3454 	}
3455 
3456 	if (mdb_readvar(&addr, logname) == -1) {
3457 		mdb_warn("failed to read %s log header pointer");
3458 		return (DCMD_ERR);
3459 	}
3460 
3461 	if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3462 		mdb_warn("failed to walk kmem log");
3463 		return (DCMD_ERR);
3464 	}
3465 
3466 	return (DCMD_OK);
3467 }
3468 
3469 /*
3470  * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3471  * The first piece is a structure which we use to accumulate kmem_cache_t
3472  * addresses of interest.  The kmc_add is used as a callback for the kmem_cache
3473  * walker; we either add all caches, or ones named explicitly as arguments.
3474  */
3475 
3476 typedef struct kmclist {
3477 	const char *kmc_name;			/* Name to match (or NULL) */
3478 	uintptr_t *kmc_caches;			/* List of kmem_cache_t addrs */
3479 	int kmc_nelems;				/* Num entries in kmc_caches */
3480 	int kmc_size;				/* Size of kmc_caches array */
3481 } kmclist_t;
3482 
3483 static int
3484 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3485 {
3486 	void *p;
3487 	int s;
3488 
3489 	if (kmc->kmc_name == NULL ||
3490 	    strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3491 		/*
3492 		 * If we have a match, grow our array (if necessary), and then
3493 		 * add the virtual address of the matching cache to our list.
3494 		 */
3495 		if (kmc->kmc_nelems >= kmc->kmc_size) {
3496 			s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3497 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3498 
3499 			bcopy(kmc->kmc_caches, p,
3500 			    sizeof (uintptr_t) * kmc->kmc_size);
3501 
3502 			kmc->kmc_caches = p;
3503 			kmc->kmc_size = s;
3504 		}
3505 
3506 		kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3507 		return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3508 	}
3509 
3510 	return (WALK_NEXT);
3511 }
3512 
3513 /*
3514  * The second piece of ::kmausers is a hash table of allocations.  Each
3515  * allocation owner is identified by its stack trace and data_size.  We then
3516  * track the total bytes of all such allocations, and the number of allocations
3517  * to report at the end.  Once we have a list of caches, we walk through the
3518  * allocated bufctls of each, and update our hash table accordingly.
3519  */
3520 
3521 typedef struct kmowner {
3522 	struct kmowner *kmo_head;		/* First hash elt in bucket */
3523 	struct kmowner *kmo_next;		/* Next hash elt in chain */
3524 	size_t kmo_signature;			/* Hash table signature */
3525 	uint_t kmo_num;				/* Number of allocations */
3526 	size_t kmo_data_size;			/* Size of each allocation */
3527 	size_t kmo_total_size;			/* Total bytes of allocation */
3528 	int kmo_depth;				/* Depth of stack trace */
3529 	uintptr_t kmo_stack[KMEM_STACK_DEPTH];	/* Stack trace */
3530 } kmowner_t;
3531 
3532 typedef struct kmusers {
3533 	uintptr_t kmu_addr;			/* address of interest */
3534 	const kmem_cache_t *kmu_cache;		/* Current kmem cache */
3535 	kmowner_t *kmu_hash;			/* Hash table of owners */
3536 	int kmu_nelems;				/* Number of entries in use */
3537 	int kmu_size;				/* Total number of entries */
3538 } kmusers_t;
3539 
3540 static void
3541 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3542     size_t size, size_t data_size)
3543 {
3544 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3545 	size_t bucket, signature = data_size;
3546 	kmowner_t *kmo, *kmoend;
3547 
3548 	/*
3549 	 * If the hash table is full, double its size and rehash everything.
3550 	 */
3551 	if (kmu->kmu_nelems >= kmu->kmu_size) {
3552 		int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3553 
3554 		kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3555 		bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3556 		kmu->kmu_hash = kmo;
3557 		kmu->kmu_size = s;
3558 
3559 		kmoend = kmu->kmu_hash + kmu->kmu_size;
3560 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3561 			kmo->kmo_head = NULL;
3562 
3563 		kmoend = kmu->kmu_hash + kmu->kmu_nelems;
3564 		for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
3565 			bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
3566 			kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3567 			kmu->kmu_hash[bucket].kmo_head = kmo;
3568 		}
3569 	}
3570 
3571 	/*
3572 	 * Finish computing the hash signature from the stack trace, and then
3573 	 * see if the owner is in the hash table.  If so, update our stats.
3574 	 */
3575 	for (i = 0; i < depth; i++)
3576 		signature += bcp->bc_stack[i];
3577 
3578 	bucket = signature & (kmu->kmu_size - 1);
3579 
3580 	for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
3581 		if (kmo->kmo_signature == signature) {
3582 			size_t difference = 0;
3583 
3584 			difference |= kmo->kmo_data_size - data_size;
3585 			difference |= kmo->kmo_depth - depth;
3586 
3587 			for (i = 0; i < depth; i++) {
3588 				difference |= kmo->kmo_stack[i] -
3589 				    bcp->bc_stack[i];
3590 			}
3591 
3592 			if (difference == 0) {
3593 				kmo->kmo_total_size += size;
3594 				kmo->kmo_num++;
3595 				return;
3596 			}
3597 		}
3598 	}
3599 
3600 	/*
3601 	 * If the owner is not yet hashed, grab the next element and fill it
3602 	 * in based on the allocation information.
3603 	 */
3604 	kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
3605 	kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3606 	kmu->kmu_hash[bucket].kmo_head = kmo;
3607 
3608 	kmo->kmo_signature = signature;
3609 	kmo->kmo_num = 1;
3610 	kmo->kmo_data_size = data_size;
3611 	kmo->kmo_total_size = size;
3612 	kmo->kmo_depth = depth;
3613 
3614 	for (i = 0; i < depth; i++)
3615 		kmo->kmo_stack[i] = bcp->bc_stack[i];
3616 }
3617 
3618 /*
3619  * When ::kmausers is invoked without the -f flag, we simply update our hash
3620  * table with the information from each allocated bufctl.
3621  */
3622 /*ARGSUSED*/
3623 static int
3624 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
3625 {
3626 	const kmem_cache_t *cp = kmu->kmu_cache;
3627 
3628 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3629 	return (WALK_NEXT);
3630 }
3631 
3632 /*
3633  * When ::kmausers is invoked with the -f flag, we print out the information
3634  * for each bufctl as well as updating the hash table.
3635  */
3636 static int
3637 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
3638 {
3639 	int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3640 	const kmem_cache_t *cp = kmu->kmu_cache;
3641 	kmem_bufctl_t bufctl;
3642 
3643 	if (kmu->kmu_addr) {
3644 		if (mdb_vread(&bufctl, sizeof (bufctl),  addr) == -1)
3645 			mdb_warn("couldn't read bufctl at %p", addr);
3646 		else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
3647 		    kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
3648 		    cp->cache_bufsize)
3649 			return (WALK_NEXT);
3650 	}
3651 
3652 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3653 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3654 
3655 	for (i = 0; i < depth; i++)
3656 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3657 
3658 	kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3659 	return (WALK_NEXT);
3660 }
3661 
3662 /*
3663  * We sort our results by allocation size before printing them.
3664  */
3665 static int
3666 kmownercmp(const void *lp, const void *rp)
3667 {
3668 	const kmowner_t *lhs = lp;
3669 	const kmowner_t *rhs = rp;
3670 
3671 	return (rhs->kmo_total_size - lhs->kmo_total_size);
3672 }
3673 
3674 /*
3675  * The main engine of ::kmausers is relatively straightforward: First we
3676  * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
3677  * iterate over the allocated bufctls of each cache in the list.  Finally,
3678  * we sort and print our results.
3679  */
3680 /*ARGSUSED*/
3681 int
3682 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3683 {
3684 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3685 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3686 	int audited_caches = 0;		/* Number of KMF_AUDIT caches found */
3687 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3688 	int opt_e = FALSE;		/* Include "small" users */
3689 	int opt_f = FALSE;		/* Print stack traces */
3690 
3691 	mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
3692 	kmowner_t *kmo, *kmoend;
3693 	int i, oelems;
3694 
3695 	kmclist_t kmc;
3696 	kmusers_t kmu;
3697 
3698 	bzero(&kmc, sizeof (kmc));
3699 	bzero(&kmu, sizeof (kmu));
3700 
3701 	while ((i = mdb_getopts(argc, argv,
3702 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3703 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3704 
3705 		argv += i;	/* skip past options we just processed */
3706 		argc -= i;	/* adjust argc */
3707 
3708 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3709 			return (DCMD_USAGE);
3710 
3711 		oelems = kmc.kmc_nelems;
3712 		kmc.kmc_name = argv->a_un.a_str;
3713 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
3714 
3715 		if (kmc.kmc_nelems == oelems) {
3716 			mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
3717 			return (DCMD_ERR);
3718 		}
3719 
3720 		do_all_caches = 0;
3721 		argv++;
3722 		argc--;
3723 	}
3724 
3725 	if (flags & DCMD_ADDRSPEC) {
3726 		opt_f = TRUE;
3727 		kmu.kmu_addr = addr;
3728 	} else {
3729 		kmu.kmu_addr = NULL;
3730 	}
3731 
3732 	if (opt_e)
3733 		mem_threshold = cnt_threshold = 0;
3734 
3735 	if (opt_f)
3736 		callback = (mdb_walk_cb_t)kmause2;
3737 
3738 	if (do_all_caches) {
3739 		kmc.kmc_name = NULL; /* match all cache names */
3740 		(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
3741 	}
3742 
3743 	for (i = 0; i < kmc.kmc_nelems; i++) {
3744 		uintptr_t cp = kmc.kmc_caches[i];
3745 		kmem_cache_t c;
3746 
3747 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3748 			mdb_warn("failed to read cache at %p", cp);
3749 			continue;
3750 		}
3751 
3752 		if (!(c.cache_flags & KMF_AUDIT)) {
3753 			if (!do_all_caches) {
3754 				mdb_warn("KMF_AUDIT is not enabled for %s\n",
3755 				    c.cache_name);
3756 			}
3757 			continue;
3758 		}
3759 
3760 		kmu.kmu_cache = &c;
3761 		(void) mdb_pwalk("bufctl", callback, &kmu, cp);
3762 		audited_caches++;
3763 	}
3764 
3765 	if (audited_caches == 0 && do_all_caches) {
3766 		mdb_warn("KMF_AUDIT is not enabled for any caches\n");
3767 		return (DCMD_ERR);
3768 	}
3769 
3770 	qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
3771 	kmoend = kmu.kmu_hash + kmu.kmu_nelems;
3772 
3773 	for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
3774 		if (kmo->kmo_total_size < mem_threshold &&
3775 		    kmo->kmo_num < cnt_threshold)
3776 			continue;
3777 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3778 		    kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
3779 		for (i = 0; i < kmo->kmo_depth; i++)
3780 			mdb_printf("\t %a\n", kmo->kmo_stack[i]);
3781 	}
3782 
3783 	return (DCMD_OK);
3784 }
3785 
3786 void
3787 kmausers_help(void)
3788 {
3789 	mdb_printf(
3790 	    "Displays the largest users of the kmem allocator, sorted by \n"
3791 	    "trace.  If one or more caches is specified, only those caches\n"
3792 	    "will be searched.  By default, all caches are searched.  If an\n"
3793 	    "address is specified, then only those allocations which include\n"
3794 	    "the given address are displayed.  Specifying an address implies\n"
3795 	    "-f.\n"
3796 	    "\n"
3797 	    "\t-e\tInclude all users, not just the largest\n"
3798 	    "\t-f\tDisplay individual allocations.  By default, users are\n"
3799 	    "\t\tgrouped by stack\n");
3800 }
3801 
3802 static int
3803 kmem_ready_check(void)
3804 {
3805 	int ready;
3806 
3807 	if (mdb_readvar(&ready, "kmem_ready") < 0)
3808 		return (-1); /* errno is set for us */
3809 
3810 	return (ready);
3811 }
3812 
3813 /*ARGSUSED*/
3814 static void
3815 kmem_statechange_cb(void *arg)
3816 {
3817 	static int been_ready = 0;
3818 
3819 	leaky_cleanup(1);	/* state changes invalidate leaky state */
3820 
3821 	if (been_ready)
3822 		return;
3823 
3824 	if (kmem_ready_check() <= 0)
3825 		return;
3826 
3827 	been_ready = 1;
3828 	(void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
3829 }
3830 
3831 void
3832 kmem_init(void)
3833 {
3834 	mdb_walker_t w = {
3835 		"kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
3836 		kmem_cache_walk_step, kmem_cache_walk_fini
3837 	};
3838 
3839 	/*
3840 	 * If kmem is ready, we'll need to invoke the kmem_cache walker
3841 	 * immediately.  Walkers in the linkage structure won't be ready until
3842 	 * _mdb_init returns, so we'll need to add this one manually.  If kmem
3843 	 * is ready, we'll use the walker to initialize the caches.  If kmem
3844 	 * isn't ready, we'll register a callback that will allow us to defer
3845 	 * cache walking until it is.
3846 	 */
3847 	if (mdb_add_walker(&w) != 0) {
3848 		mdb_warn("failed to add kmem_cache walker");
3849 		return;
3850 	}
3851 
3852 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, kmem_statechange_cb, NULL);
3853 	kmem_statechange_cb(NULL);
3854 }
3855 
3856 typedef struct whatthread {
3857 	uintptr_t	wt_target;
3858 	int		wt_verbose;
3859 } whatthread_t;
3860 
3861 static int
3862 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
3863 {
3864 	uintptr_t current, data;
3865 
3866 	if (t->t_stkbase == NULL)
3867 		return (WALK_NEXT);
3868 
3869 	/*
3870 	 * Warn about swapped out threads, but drive on anyway
3871 	 */
3872 	if (!(t->t_schedflag & TS_LOAD)) {
3873 		mdb_warn("thread %p's stack swapped out\n", addr);
3874 		return (WALK_NEXT);
3875 	}
3876 
3877 	/*
3878 	 * Search the thread's stack for the given pointer.  Note that it would
3879 	 * be more efficient to follow ::kgrep's lead and read in page-sized
3880 	 * chunks, but this routine is already fast and simple.
3881 	 */
3882 	for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
3883 	    current += sizeof (uintptr_t)) {
3884 		if (mdb_vread(&data, sizeof (data), current) == -1) {
3885 			mdb_warn("couldn't read thread %p's stack at %p",
3886 			    addr, current);
3887 			return (WALK_ERR);
3888 		}
3889 
3890 		if (data == w->wt_target) {
3891 			if (w->wt_verbose) {
3892 				mdb_printf("%p in thread %p's stack%s\n",
3893 				    current, addr, stack_active(t, current));
3894 			} else {
3895 				mdb_printf("%#lr\n", addr);
3896 				return (WALK_NEXT);
3897 			}
3898 		}
3899 	}
3900 
3901 	return (WALK_NEXT);
3902 }
3903 
3904 int
3905 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3906 {
3907 	whatthread_t w;
3908 
3909 	if (!(flags & DCMD_ADDRSPEC))
3910 		return (DCMD_USAGE);
3911 
3912 	w.wt_verbose = FALSE;
3913 	w.wt_target = addr;
3914 
3915 	if (mdb_getopts(argc, argv,
3916 	    'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
3917 		return (DCMD_USAGE);
3918 
3919 	if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
3920 	    == -1) {
3921 		mdb_warn("couldn't walk threads");
3922 		return (DCMD_ERR);
3923 	}
3924 
3925 	return (DCMD_OK);
3926 }
3927