xref: /titanic_41/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision 5aefb6555731130ca4fd295960123d71f2d21fe8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "umem.h"
29 
30 #include <sys/vmem_impl_user.h>
31 #include <umem_impl.h>
32 
33 #include <alloca.h>
34 #include <limits.h>
35 
36 #include "misc.h"
37 #include "leaky.h"
38 
39 #include "umem_pagesize.h"
40 
41 #define	UM_ALLOCATED		0x1
42 #define	UM_FREE			0x2
43 #define	UM_BUFCTL		0x4
44 #define	UM_HASH			0x8
45 
46 int umem_ready;
47 
48 static int umem_stack_depth_warned;
49 static uint32_t umem_max_ncpus;
50 uint32_t umem_stack_depth;
51 
52 size_t umem_pagesize;
53 
54 #define	UMEM_READVAR(var)				\
55 	(umem_readvar(&(var), #var) == -1 &&		\
56 	    (mdb_warn("failed to read "#var), 1))
57 
58 int
59 umem_update_variables(void)
60 {
61 	size_t pagesize;
62 
63 	/*
64 	 * Figure out which type of umem is being used; if it's not there
65 	 * yet, succeed quietly.
66 	 */
67 	if (umem_set_standalone() == -1) {
68 		umem_ready = 0;
69 		return (0);		/* umem not there yet */
70 	}
71 
72 	/*
73 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
74 	 * cheap backwards compatibility to check for both names.
75 	 */
76 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
77 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
78 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
79 		return (-1);
80 	}
81 	if (UMEM_READVAR(umem_ready))
82 		return (-1);
83 	if (UMEM_READVAR(umem_stack_depth))
84 		return (-1);
85 	if (UMEM_READVAR(pagesize))
86 		return (-1);
87 
88 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
89 		if (umem_stack_depth_warned == 0) {
90 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
91 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
92 			umem_stack_depth_warned = 1;
93 		}
94 		umem_stack_depth = 0;
95 	}
96 
97 	umem_pagesize = pagesize;
98 
99 	return (0);
100 }
101 
102 /*ARGSUSED*/
103 static int
104 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
105 {
106 	mdb_walker_t w;
107 	char descr[64];
108 
109 	(void) mdb_snprintf(descr, sizeof (descr),
110 	    "walk the %s cache", c->cache_name);
111 
112 	w.walk_name = c->cache_name;
113 	w.walk_descr = descr;
114 	w.walk_init = umem_walk_init;
115 	w.walk_step = umem_walk_step;
116 	w.walk_fini = umem_walk_fini;
117 	w.walk_init_arg = (void *)addr;
118 
119 	if (mdb_add_walker(&w) == -1)
120 		mdb_warn("failed to add %s walker", c->cache_name);
121 
122 	return (WALK_NEXT);
123 }
124 
125 /*ARGSUSED*/
126 static void
127 umem_statechange_cb(void *arg)
128 {
129 	static int been_ready = 0;
130 
131 #ifndef _KMDB
132 	leaky_cleanup(1);	/* state changes invalidate leaky state */
133 #endif
134 
135 	if (umem_update_variables() == -1)
136 		return;
137 
138 	if (been_ready)
139 		return;
140 
141 	if (umem_ready != UMEM_READY)
142 		return;
143 
144 	been_ready = 1;
145 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL);
146 }
147 
148 int
149 umem_init(void)
150 {
151 	mdb_walker_t w = {
152 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
153 		umem_cache_walk_step, umem_cache_walk_fini
154 	};
155 
156 	if (mdb_add_walker(&w) == -1) {
157 		mdb_warn("failed to add umem_cache walker");
158 		return (-1);
159 	}
160 
161 	if (umem_update_variables() == -1)
162 		return (-1);
163 
164 	/* install a callback so that our variables are always up-to-date */
165 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
166 	umem_statechange_cb(NULL);
167 
168 	return (0);
169 }
170 
171 int
172 umem_abort_messages(void)
173 {
174 	char *umem_error_buffer;
175 	uint_t umem_error_begin;
176 	GElf_Sym sym;
177 	size_t bufsize;
178 
179 	if (UMEM_READVAR(umem_error_begin))
180 		return (DCMD_ERR);
181 
182 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
183 		mdb_warn("unable to look up umem_error_buffer");
184 		return (DCMD_ERR);
185 	}
186 
187 	bufsize = (size_t)sym.st_size;
188 
189 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
190 
191 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
192 	    != bufsize) {
193 		mdb_warn("unable to read umem_error_buffer");
194 		return (DCMD_ERR);
195 	}
196 	/* put a zero after the end of the buffer to simplify printing */
197 	umem_error_buffer[bufsize] = 0;
198 
199 	if ((umem_error_begin % bufsize) == 0)
200 		mdb_printf("%s\n", umem_error_buffer);
201 	else {
202 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
203 		mdb_printf("%s%s\n",
204 		    &umem_error_buffer[umem_error_begin % bufsize],
205 		    umem_error_buffer);
206 	}
207 
208 	return (DCMD_OK);
209 }
210 
211 static void
212 umem_log_status(const char *name, umem_log_header_t *val)
213 {
214 	umem_log_header_t my_lh;
215 	uintptr_t pos = (uintptr_t)val;
216 	size_t size;
217 
218 	if (pos == NULL)
219 		return;
220 
221 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
222 		mdb_warn("\nunable to read umem_%s_log pointer %p",
223 		    name, pos);
224 		return;
225 	}
226 
227 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
228 
229 	if (size % (1024 * 1024) == 0)
230 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
231 	else if (size % 1024 == 0)
232 		mdb_printf("%s=%dk ", name, size / 1024);
233 	else
234 		mdb_printf("%s=%d ", name, size);
235 }
236 
237 typedef struct umem_debug_flags {
238 	const char	*udf_name;
239 	uint_t		udf_flags;
240 	uint_t		udf_clear;	/* if 0, uses udf_flags */
241 } umem_debug_flags_t;
242 
243 umem_debug_flags_t umem_status_flags[] = {
244 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
245 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
246 	{ "audit",	UMF_AUDIT },
247 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
248 	{ "nosignal",	UMF_CHECKSIGNAL },
249 	{ "firewall",	UMF_FIREWALL },
250 	{ "lite",	UMF_LITE },
251 	{ NULL }
252 };
253 
254 /*ARGSUSED*/
255 int
256 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
257 {
258 	int umem_logging;
259 
260 	umem_log_header_t *umem_transaction_log;
261 	umem_log_header_t *umem_content_log;
262 	umem_log_header_t *umem_failure_log;
263 	umem_log_header_t *umem_slab_log;
264 
265 	mdb_printf("Status:\t\t%s\n",
266 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
267 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
268 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
269 	    umem_ready == UMEM_READY ? "ready and active" :
270 	    umem_ready == 0 ? "not loaded into address space" :
271 	    "unknown (umem_ready invalid)");
272 
273 	if (umem_ready == 0)
274 		return (DCMD_OK);
275 
276 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
277 
278 	if (UMEM_READVAR(umem_logging))
279 		goto err;
280 	if (UMEM_READVAR(umem_transaction_log))
281 		goto err;
282 	if (UMEM_READVAR(umem_content_log))
283 		goto err;
284 	if (UMEM_READVAR(umem_failure_log))
285 		goto err;
286 	if (UMEM_READVAR(umem_slab_log))
287 		goto err;
288 
289 	mdb_printf("Logs:\t\t");
290 	umem_log_status("transaction", umem_transaction_log);
291 	umem_log_status("content", umem_content_log);
292 	umem_log_status("fail", umem_failure_log);
293 	umem_log_status("slab", umem_slab_log);
294 	if (!umem_logging)
295 		mdb_printf("(inactive)");
296 	mdb_printf("\n");
297 
298 	mdb_printf("Message buffer:\n");
299 	return (umem_abort_messages());
300 
301 err:
302 	mdb_printf("Message buffer:\n");
303 	(void) umem_abort_messages();
304 	return (DCMD_ERR);
305 }
306 
307 typedef struct {
308 	uintptr_t ucw_first;
309 	uintptr_t ucw_current;
310 } umem_cache_walk_t;
311 
312 int
313 umem_cache_walk_init(mdb_walk_state_t *wsp)
314 {
315 	umem_cache_walk_t *ucw;
316 	umem_cache_t c;
317 	uintptr_t cp;
318 	GElf_Sym sym;
319 
320 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
321 		mdb_warn("couldn't find umem_null_cache");
322 		return (WALK_ERR);
323 	}
324 
325 	cp = (uintptr_t)sym.st_value;
326 
327 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
328 		mdb_warn("couldn't read cache at %p", cp);
329 		return (WALK_ERR);
330 	}
331 
332 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
333 
334 	ucw->ucw_first = cp;
335 	ucw->ucw_current = (uintptr_t)c.cache_next;
336 	wsp->walk_data = ucw;
337 
338 	return (WALK_NEXT);
339 }
340 
341 int
342 umem_cache_walk_step(mdb_walk_state_t *wsp)
343 {
344 	umem_cache_walk_t *ucw = wsp->walk_data;
345 	umem_cache_t c;
346 	int status;
347 
348 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
349 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
350 		return (WALK_DONE);
351 	}
352 
353 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
354 
355 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
356 		return (WALK_DONE);
357 
358 	return (status);
359 }
360 
361 void
362 umem_cache_walk_fini(mdb_walk_state_t *wsp)
363 {
364 	umem_cache_walk_t *ucw = wsp->walk_data;
365 	mdb_free(ucw, sizeof (umem_cache_walk_t));
366 }
367 
368 typedef struct {
369 	umem_cpu_t *ucw_cpus;
370 	uint32_t ucw_current;
371 	uint32_t ucw_max;
372 } umem_cpu_walk_state_t;
373 
374 int
375 umem_cpu_walk_init(mdb_walk_state_t *wsp)
376 {
377 	umem_cpu_t *umem_cpus;
378 
379 	umem_cpu_walk_state_t *ucw;
380 
381 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
382 		mdb_warn("failed to read 'umem_cpus'");
383 		return (WALK_ERR);
384 	}
385 
386 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
387 
388 	ucw->ucw_cpus = umem_cpus;
389 	ucw->ucw_current = 0;
390 	ucw->ucw_max = umem_max_ncpus;
391 
392 	wsp->walk_data = ucw;
393 	return (WALK_NEXT);
394 }
395 
396 int
397 umem_cpu_walk_step(mdb_walk_state_t *wsp)
398 {
399 	umem_cpu_t cpu;
400 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
401 
402 	uintptr_t caddr;
403 
404 	if (ucw->ucw_current >= ucw->ucw_max)
405 		return (WALK_DONE);
406 
407 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
408 
409 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
410 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
411 		return (WALK_ERR);
412 	}
413 
414 	ucw->ucw_current++;
415 
416 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
417 }
418 
419 void
420 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
421 {
422 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
423 
424 	mdb_free(ucw, sizeof (*ucw));
425 }
426 
427 int
428 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
429 {
430 	if (wsp->walk_addr == NULL) {
431 		mdb_warn("umem_cpu_cache doesn't support global walks");
432 		return (WALK_ERR);
433 	}
434 
435 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
436 		mdb_warn("couldn't walk 'umem_cpu'");
437 		return (WALK_ERR);
438 	}
439 
440 	wsp->walk_data = (void *)wsp->walk_addr;
441 
442 	return (WALK_NEXT);
443 }
444 
445 int
446 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
447 {
448 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
449 	const umem_cpu_t *cpu = wsp->walk_layer;
450 	umem_cpu_cache_t cc;
451 
452 	caddr += cpu->cpu_cache_offset;
453 
454 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
455 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
456 		return (WALK_ERR);
457 	}
458 
459 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
460 }
461 
462 int
463 umem_slab_walk_init(mdb_walk_state_t *wsp)
464 {
465 	uintptr_t caddr = wsp->walk_addr;
466 	umem_cache_t c;
467 
468 	if (caddr == NULL) {
469 		mdb_warn("umem_slab doesn't support global walks\n");
470 		return (WALK_ERR);
471 	}
472 
473 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
474 		mdb_warn("couldn't read umem_cache at %p", caddr);
475 		return (WALK_ERR);
476 	}
477 
478 	wsp->walk_data =
479 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
480 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
481 
482 	return (WALK_NEXT);
483 }
484 
485 int
486 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
487 {
488 	uintptr_t caddr = wsp->walk_addr;
489 	umem_cache_t c;
490 
491 	if (caddr == NULL) {
492 		mdb_warn("umem_slab_partial doesn't support global walks\n");
493 		return (WALK_ERR);
494 	}
495 
496 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
497 		mdb_warn("couldn't read umem_cache at %p", caddr);
498 		return (WALK_ERR);
499 	}
500 
501 	wsp->walk_data =
502 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
503 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
504 
505 	/*
506 	 * Some consumers (umem_walk_step(), in particular) require at
507 	 * least one callback if there are any buffers in the cache.  So
508 	 * if there are *no* partial slabs, report the last full slab, if
509 	 * any.
510 	 *
511 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
512 	 */
513 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
514 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
515 
516 	return (WALK_NEXT);
517 }
518 
519 int
520 umem_slab_walk_step(mdb_walk_state_t *wsp)
521 {
522 	umem_slab_t s;
523 	uintptr_t addr = wsp->walk_addr;
524 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
525 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
526 
527 	if (addr == saddr)
528 		return (WALK_DONE);
529 
530 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
531 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
532 		return (WALK_ERR);
533 	}
534 
535 	if ((uintptr_t)s.slab_cache != caddr) {
536 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
537 		    addr, caddr, s.slab_cache);
538 		return (WALK_ERR);
539 	}
540 
541 	wsp->walk_addr = (uintptr_t)s.slab_next;
542 
543 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
544 }
545 
546 int
547 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
548 {
549 	umem_cache_t c;
550 
551 	if (!(flags & DCMD_ADDRSPEC)) {
552 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
553 			mdb_warn("can't walk umem_cache");
554 			return (DCMD_ERR);
555 		}
556 		return (DCMD_OK);
557 	}
558 
559 	if (DCMD_HDRSPEC(flags))
560 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
561 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
562 
563 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
564 		mdb_warn("couldn't read umem_cache at %p", addr);
565 		return (DCMD_ERR);
566 	}
567 
568 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
569 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
570 
571 	return (DCMD_OK);
572 }
573 
574 static int
575 addrcmp(const void *lhs, const void *rhs)
576 {
577 	uintptr_t p1 = *((uintptr_t *)lhs);
578 	uintptr_t p2 = *((uintptr_t *)rhs);
579 
580 	if (p1 < p2)
581 		return (-1);
582 	if (p1 > p2)
583 		return (1);
584 	return (0);
585 }
586 
587 static int
588 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
589 {
590 	const umem_bufctl_audit_t *bcp1 = *lhs;
591 	const umem_bufctl_audit_t *bcp2 = *rhs;
592 
593 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
594 		return (-1);
595 
596 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
597 		return (1);
598 
599 	return (0);
600 }
601 
602 typedef struct umem_hash_walk {
603 	uintptr_t *umhw_table;
604 	size_t umhw_nelems;
605 	size_t umhw_pos;
606 	umem_bufctl_t umhw_cur;
607 } umem_hash_walk_t;
608 
609 int
610 umem_hash_walk_init(mdb_walk_state_t *wsp)
611 {
612 	umem_hash_walk_t *umhw;
613 	uintptr_t *hash;
614 	umem_cache_t c;
615 	uintptr_t haddr, addr = wsp->walk_addr;
616 	size_t nelems;
617 	size_t hsize;
618 
619 	if (addr == NULL) {
620 		mdb_warn("umem_hash doesn't support global walks\n");
621 		return (WALK_ERR);
622 	}
623 
624 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
625 		mdb_warn("couldn't read cache at addr %p", addr);
626 		return (WALK_ERR);
627 	}
628 
629 	if (!(c.cache_flags & UMF_HASH)) {
630 		mdb_warn("cache %p doesn't have a hash table\n", addr);
631 		return (WALK_DONE);		/* nothing to do */
632 	}
633 
634 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
635 	umhw->umhw_cur.bc_next = NULL;
636 	umhw->umhw_pos = 0;
637 
638 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
639 	hsize = nelems * sizeof (uintptr_t);
640 	haddr = (uintptr_t)c.cache_hash_table;
641 
642 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
643 	if (mdb_vread(hash, hsize, haddr) == -1) {
644 		mdb_warn("failed to read hash table at %p", haddr);
645 		mdb_free(hash, hsize);
646 		mdb_free(umhw, sizeof (umem_hash_walk_t));
647 		return (WALK_ERR);
648 	}
649 
650 	wsp->walk_data = umhw;
651 
652 	return (WALK_NEXT);
653 }
654 
655 int
656 umem_hash_walk_step(mdb_walk_state_t *wsp)
657 {
658 	umem_hash_walk_t *umhw = wsp->walk_data;
659 	uintptr_t addr = NULL;
660 
661 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
662 		while (umhw->umhw_pos < umhw->umhw_nelems) {
663 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
664 				break;
665 		}
666 	}
667 	if (addr == NULL)
668 		return (WALK_DONE);
669 
670 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
671 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
672 		return (WALK_ERR);
673 	}
674 
675 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
676 }
677 
678 void
679 umem_hash_walk_fini(mdb_walk_state_t *wsp)
680 {
681 	umem_hash_walk_t *umhw = wsp->walk_data;
682 
683 	if (umhw == NULL)
684 		return;
685 
686 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
687 	mdb_free(umhw, sizeof (umem_hash_walk_t));
688 }
689 
690 /*
691  * Find the address of the bufctl structure for the address 'buf' in cache
692  * 'cp', which is at address caddr, and place it in *out.
693  */
694 static int
695 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
696 {
697 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
698 	umem_bufctl_t *bcp;
699 	umem_bufctl_t bc;
700 
701 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
702 		mdb_warn("unable to read hash bucket for %p in cache %p",
703 		    buf, caddr);
704 		return (-1);
705 	}
706 
707 	while (bcp != NULL) {
708 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
709 		    (uintptr_t)bcp) == -1) {
710 			mdb_warn("unable to read bufctl at %p", bcp);
711 			return (-1);
712 		}
713 		if (bc.bc_addr == buf) {
714 			*out = (uintptr_t)bcp;
715 			return (0);
716 		}
717 		bcp = bc.bc_next;
718 	}
719 
720 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
721 	return (-1);
722 }
723 
724 int
725 umem_get_magsize(const umem_cache_t *cp)
726 {
727 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
728 	GElf_Sym mt_sym;
729 	umem_magtype_t mt;
730 	int res;
731 
732 	/*
733 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
734 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
735 	 * it is okay to return 0 for them.
736 	 */
737 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
738 	    (cp->cache_flags & UMF_NOMAGAZINE))
739 		return (res);
740 
741 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
742 		mdb_warn("unable to read 'umem_magtype'");
743 	} else if (addr < mt_sym.st_value ||
744 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
745 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
746 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
747 		    cp->cache_name, addr);
748 		return (0);
749 	}
750 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
751 		mdb_warn("unable to read magtype at %a", addr);
752 		return (0);
753 	}
754 	return (mt.mt_magsize);
755 }
756 
757 /*ARGSUSED*/
758 static int
759 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
760 {
761 	*est -= (sp->slab_chunks - sp->slab_refcnt);
762 
763 	return (WALK_NEXT);
764 }
765 
766 /*
767  * Returns an upper bound on the number of allocated buffers in a given
768  * cache.
769  */
770 size_t
771 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
772 {
773 	int magsize;
774 	size_t cache_est;
775 
776 	cache_est = cp->cache_buftotal;
777 
778 	(void) mdb_pwalk("umem_slab_partial",
779 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
780 
781 	if ((magsize = umem_get_magsize(cp)) != 0) {
782 		size_t mag_est = cp->cache_full.ml_total * magsize;
783 
784 		if (cache_est >= mag_est) {
785 			cache_est -= mag_est;
786 		} else {
787 			mdb_warn("cache %p's magazine layer holds more buffers "
788 			    "than the slab layer.\n", addr);
789 		}
790 	}
791 	return (cache_est);
792 }
793 
794 #define	READMAG_ROUNDS(rounds) { \
795 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
796 		mdb_warn("couldn't read magazine at %p", ump); \
797 		goto fail; \
798 	} \
799 	for (i = 0; i < rounds; i++) { \
800 		maglist[magcnt++] = mp->mag_round[i]; \
801 		if (magcnt == magmax) { \
802 			mdb_warn("%d magazines exceeds fudge factor\n", \
803 			    magcnt); \
804 			goto fail; \
805 		} \
806 	} \
807 }
808 
809 int
810 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
811     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
812 {
813 	umem_magazine_t *ump, *mp;
814 	void **maglist = NULL;
815 	int i, cpu;
816 	size_t magsize, magmax, magbsize;
817 	size_t magcnt = 0;
818 
819 	/*
820 	 * Read the magtype out of the cache, after verifying the pointer's
821 	 * correctness.
822 	 */
823 	magsize = umem_get_magsize(cp);
824 	if (magsize == 0) {
825 		*maglistp = NULL;
826 		*magcntp = 0;
827 		*magmaxp = 0;
828 		return (WALK_NEXT);
829 	}
830 
831 	/*
832 	 * There are several places where we need to go buffer hunting:
833 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
834 	 * and the full magazine list in the depot.
835 	 *
836 	 * For an upper bound on the number of buffers in the magazine
837 	 * layer, we have the number of magazines on the cache_full
838 	 * list plus at most two magazines per CPU (the loaded and the
839 	 * spare).  Toss in 100 magazines as a fudge factor in case this
840 	 * is live (the number "100" comes from the same fudge factor in
841 	 * crash(1M)).
842 	 */
843 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
844 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
845 
846 	if (magbsize >= PAGESIZE / 2) {
847 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
848 		    addr, magbsize);
849 		return (WALK_ERR);
850 	}
851 
852 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
853 	mp = mdb_alloc(magbsize, alloc_flags);
854 	if (mp == NULL || maglist == NULL)
855 		goto fail;
856 
857 	/*
858 	 * First up: the magazines in the depot (i.e. on the cache_full list).
859 	 */
860 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
861 		READMAG_ROUNDS(magsize);
862 		ump = mp->mag_next;
863 
864 		if (ump == cp->cache_full.ml_list)
865 			break; /* cache_full list loop detected */
866 	}
867 
868 	dprintf(("cache_full list done\n"));
869 
870 	/*
871 	 * Now whip through the CPUs, snagging the loaded magazines
872 	 * and full spares.
873 	 */
874 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
875 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
876 
877 		dprintf(("reading cpu cache %p\n",
878 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
879 
880 		if (ccp->cc_rounds > 0 &&
881 		    (ump = ccp->cc_loaded) != NULL) {
882 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
883 			READMAG_ROUNDS(ccp->cc_rounds);
884 		}
885 
886 		if (ccp->cc_prounds > 0 &&
887 		    (ump = ccp->cc_ploaded) != NULL) {
888 			dprintf(("reading %d previously loaded rounds\n",
889 			    ccp->cc_prounds));
890 			READMAG_ROUNDS(ccp->cc_prounds);
891 		}
892 	}
893 
894 	dprintf(("magazine layer: %d buffers\n", magcnt));
895 
896 	if (!(alloc_flags & UM_GC))
897 		mdb_free(mp, magbsize);
898 
899 	*maglistp = maglist;
900 	*magcntp = magcnt;
901 	*magmaxp = magmax;
902 
903 	return (WALK_NEXT);
904 
905 fail:
906 	if (!(alloc_flags & UM_GC)) {
907 		if (mp)
908 			mdb_free(mp, magbsize);
909 		if (maglist)
910 			mdb_free(maglist, magmax * sizeof (void *));
911 	}
912 	return (WALK_ERR);
913 }
914 
915 static int
916 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
917 {
918 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
919 }
920 
921 static int
922 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
923 {
924 	umem_bufctl_audit_t *b;
925 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
926 
927 	/*
928 	 * if UMF_AUDIT is not set, we know that we're looking at a
929 	 * umem_bufctl_t.
930 	 */
931 	if (!(cp->cache_flags & UMF_AUDIT) ||
932 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
933 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
934 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
935 			mdb_warn("unable to read bufctl at %p", buf);
936 			return (WALK_ERR);
937 		}
938 	}
939 
940 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
941 }
942 
943 typedef struct umem_walk {
944 	int umw_type;
945 
946 	int umw_addr;			/* cache address */
947 	umem_cache_t *umw_cp;
948 	size_t umw_csize;
949 
950 	/*
951 	 * magazine layer
952 	 */
953 	void **umw_maglist;
954 	size_t umw_max;
955 	size_t umw_count;
956 	size_t umw_pos;
957 
958 	/*
959 	 * slab layer
960 	 */
961 	char *umw_valid;	/* to keep track of freed buffers */
962 	char *umw_ubase;	/* buffer for slab data */
963 } umem_walk_t;
964 
965 static int
966 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
967 {
968 	umem_walk_t *umw;
969 	int csize;
970 	umem_cache_t *cp;
971 	size_t vm_quantum;
972 
973 	size_t magmax, magcnt;
974 	void **maglist = NULL;
975 	uint_t chunksize, slabsize;
976 	int status = WALK_ERR;
977 	uintptr_t addr = wsp->walk_addr;
978 	const char *layered;
979 
980 	type &= ~UM_HASH;
981 
982 	if (addr == NULL) {
983 		mdb_warn("umem walk doesn't support global walks\n");
984 		return (WALK_ERR);
985 	}
986 
987 	dprintf(("walking %p\n", addr));
988 
989 	/*
990 	 * The number of "cpus" determines how large the cache is.
991 	 */
992 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
993 	cp = mdb_alloc(csize, UM_SLEEP);
994 
995 	if (mdb_vread(cp, csize, addr) == -1) {
996 		mdb_warn("couldn't read cache at addr %p", addr);
997 		goto out2;
998 	}
999 
1000 	/*
1001 	 * It's easy for someone to hand us an invalid cache address.
1002 	 * Unfortunately, it is hard for this walker to survive an
1003 	 * invalid cache cleanly.  So we make sure that:
1004 	 *
1005 	 *	1. the vmem arena for the cache is readable,
1006 	 *	2. the vmem arena's quantum is a power of 2,
1007 	 *	3. our slabsize is a multiple of the quantum, and
1008 	 *	4. our chunksize is >0 and less than our slabsize.
1009 	 */
1010 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1011 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1012 	    vm_quantum == 0 ||
1013 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1014 	    cp->cache_slabsize < vm_quantum ||
1015 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1016 	    cp->cache_chunksize == 0 ||
1017 	    cp->cache_chunksize > cp->cache_slabsize) {
1018 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1019 		goto out2;
1020 	}
1021 
1022 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1023 
1024 	if (cp->cache_buftotal == 0) {
1025 		mdb_free(cp, csize);
1026 		return (WALK_DONE);
1027 	}
1028 
1029 	/*
1030 	 * If they ask for bufctls, but it's a small-slab cache,
1031 	 * there is nothing to report.
1032 	 */
1033 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1034 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1035 		    cp->cache_flags));
1036 		mdb_free(cp, csize);
1037 		return (WALK_DONE);
1038 	}
1039 
1040 	/*
1041 	 * Read in the contents of the magazine layer
1042 	 */
1043 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax,
1044 	    UM_SLEEP) == WALK_ERR)
1045 		goto out2;
1046 
1047 	/*
1048 	 * We have all of the buffers from the magazines;  if we are walking
1049 	 * allocated buffers, sort them so we can bsearch them later.
1050 	 */
1051 	if (type & UM_ALLOCATED)
1052 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1053 
1054 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1055 
1056 	umw->umw_type = type;
1057 	umw->umw_addr = addr;
1058 	umw->umw_cp = cp;
1059 	umw->umw_csize = csize;
1060 	umw->umw_maglist = maglist;
1061 	umw->umw_max = magmax;
1062 	umw->umw_count = magcnt;
1063 	umw->umw_pos = 0;
1064 
1065 	/*
1066 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1067 	 * hash table instead of the slab layer.
1068 	 */
1069 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1070 		layered = "umem_hash";
1071 
1072 		umw->umw_type |= UM_HASH;
1073 	} else {
1074 		/*
1075 		 * If we are walking freed buffers, we only need the
1076 		 * magazine layer plus the partially allocated slabs.
1077 		 * To walk allocated buffers, we need all of the slabs.
1078 		 */
1079 		if (type & UM_ALLOCATED)
1080 			layered = "umem_slab";
1081 		else
1082 			layered = "umem_slab_partial";
1083 
1084 		/*
1085 		 * for small-slab caches, we read in the entire slab.  For
1086 		 * freed buffers, we can just walk the freelist.  For
1087 		 * allocated buffers, we use a 'valid' array to track
1088 		 * the freed buffers.
1089 		 */
1090 		if (!(cp->cache_flags & UMF_HASH)) {
1091 			chunksize = cp->cache_chunksize;
1092 			slabsize = cp->cache_slabsize;
1093 
1094 			umw->umw_ubase = mdb_alloc(slabsize +
1095 			    sizeof (umem_bufctl_t), UM_SLEEP);
1096 
1097 			if (type & UM_ALLOCATED)
1098 				umw->umw_valid =
1099 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1100 		}
1101 	}
1102 
1103 	status = WALK_NEXT;
1104 
1105 	if (mdb_layered_walk(layered, wsp) == -1) {
1106 		mdb_warn("unable to start layered '%s' walk", layered);
1107 		status = WALK_ERR;
1108 	}
1109 
1110 out1:
1111 	if (status == WALK_ERR) {
1112 		if (umw->umw_valid)
1113 			mdb_free(umw->umw_valid, slabsize / chunksize);
1114 
1115 		if (umw->umw_ubase)
1116 			mdb_free(umw->umw_ubase, slabsize +
1117 			    sizeof (umem_bufctl_t));
1118 
1119 		if (umw->umw_maglist)
1120 			mdb_free(umw->umw_maglist, umw->umw_max *
1121 			    sizeof (uintptr_t));
1122 
1123 		mdb_free(umw, sizeof (umem_walk_t));
1124 		wsp->walk_data = NULL;
1125 	}
1126 
1127 out2:
1128 	if (status == WALK_ERR)
1129 		mdb_free(cp, csize);
1130 
1131 	return (status);
1132 }
1133 
1134 int
1135 umem_walk_step(mdb_walk_state_t *wsp)
1136 {
1137 	umem_walk_t *umw = wsp->walk_data;
1138 	int type = umw->umw_type;
1139 	umem_cache_t *cp = umw->umw_cp;
1140 
1141 	void **maglist = umw->umw_maglist;
1142 	int magcnt = umw->umw_count;
1143 
1144 	uintptr_t chunksize, slabsize;
1145 	uintptr_t addr;
1146 	const umem_slab_t *sp;
1147 	const umem_bufctl_t *bcp;
1148 	umem_bufctl_t bc;
1149 
1150 	int chunks;
1151 	char *kbase;
1152 	void *buf;
1153 	int i, ret;
1154 
1155 	char *valid, *ubase;
1156 
1157 	/*
1158 	 * first, handle the 'umem_hash' layered walk case
1159 	 */
1160 	if (type & UM_HASH) {
1161 		/*
1162 		 * We have a buffer which has been allocated out of the
1163 		 * global layer. We need to make sure that it's not
1164 		 * actually sitting in a magazine before we report it as
1165 		 * an allocated buffer.
1166 		 */
1167 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1168 
1169 		if (magcnt > 0 &&
1170 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1171 		    addrcmp) != NULL)
1172 			return (WALK_NEXT);
1173 
1174 		if (type & UM_BUFCTL)
1175 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1176 
1177 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1178 	}
1179 
1180 	ret = WALK_NEXT;
1181 
1182 	addr = umw->umw_addr;
1183 
1184 	/*
1185 	 * If we're walking freed buffers, report everything in the
1186 	 * magazine layer before processing the first slab.
1187 	 */
1188 	if ((type & UM_FREE) && magcnt != 0) {
1189 		umw->umw_count = 0;		/* only do this once */
1190 		for (i = 0; i < magcnt; i++) {
1191 			buf = maglist[i];
1192 
1193 			if (type & UM_BUFCTL) {
1194 				uintptr_t out;
1195 
1196 				if (cp->cache_flags & UMF_BUFTAG) {
1197 					umem_buftag_t *btp;
1198 					umem_buftag_t tag;
1199 
1200 					/* LINTED - alignment */
1201 					btp = UMEM_BUFTAG(cp, buf);
1202 					if (mdb_vread(&tag, sizeof (tag),
1203 					    (uintptr_t)btp) == -1) {
1204 						mdb_warn("reading buftag for "
1205 						    "%p at %p", buf, btp);
1206 						continue;
1207 					}
1208 					out = (uintptr_t)tag.bt_bufctl;
1209 				} else {
1210 					if (umem_hash_lookup(cp, addr, buf,
1211 					    &out) == -1)
1212 						continue;
1213 				}
1214 				ret = bufctl_walk_callback(cp, wsp, out);
1215 			} else {
1216 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1217 			}
1218 
1219 			if (ret != WALK_NEXT)
1220 				return (ret);
1221 		}
1222 	}
1223 
1224 	/*
1225 	 * Handle the buffers in the current slab
1226 	 */
1227 	chunksize = cp->cache_chunksize;
1228 	slabsize = cp->cache_slabsize;
1229 
1230 	sp = wsp->walk_layer;
1231 	chunks = sp->slab_chunks;
1232 	kbase = sp->slab_base;
1233 
1234 	dprintf(("kbase is %p\n", kbase));
1235 
1236 	if (!(cp->cache_flags & UMF_HASH)) {
1237 		valid = umw->umw_valid;
1238 		ubase = umw->umw_ubase;
1239 
1240 		if (mdb_vread(ubase, chunks * chunksize,
1241 		    (uintptr_t)kbase) == -1) {
1242 			mdb_warn("failed to read slab contents at %p", kbase);
1243 			return (WALK_ERR);
1244 		}
1245 
1246 		/*
1247 		 * Set up the valid map as fully allocated -- we'll punch
1248 		 * out the freelist.
1249 		 */
1250 		if (type & UM_ALLOCATED)
1251 			(void) memset(valid, 1, chunks);
1252 	} else {
1253 		valid = NULL;
1254 		ubase = NULL;
1255 	}
1256 
1257 	/*
1258 	 * walk the slab's freelist
1259 	 */
1260 	bcp = sp->slab_head;
1261 
1262 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1263 
1264 	/*
1265 	 * since we could be in the middle of allocating a buffer,
1266 	 * our refcnt could be one higher than it aught.  So we
1267 	 * check one further on the freelist than the count allows.
1268 	 */
1269 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1270 		uint_t ndx;
1271 
1272 		dprintf(("bcp is %p\n", bcp));
1273 
1274 		if (bcp == NULL) {
1275 			if (i == chunks)
1276 				break;
1277 			mdb_warn(
1278 			    "slab %p in cache %p freelist too short by %d\n",
1279 			    sp, addr, chunks - i);
1280 			break;
1281 		}
1282 
1283 		if (cp->cache_flags & UMF_HASH) {
1284 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1285 				mdb_warn("failed to read bufctl ptr at %p",
1286 				    bcp);
1287 				break;
1288 			}
1289 			buf = bc.bc_addr;
1290 		} else {
1291 			/*
1292 			 * Otherwise the buffer is in the slab which
1293 			 * we've read in;  we just need to determine
1294 			 * its offset in the slab to find the
1295 			 * umem_bufctl_t.
1296 			 */
1297 			bc = *((umem_bufctl_t *)
1298 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1299 			    (uintptr_t)ubase));
1300 
1301 			buf = UMEM_BUF(cp, bcp);
1302 		}
1303 
1304 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1305 
1306 		if (ndx > slabsize / cp->cache_bufsize) {
1307 			/*
1308 			 * This is very wrong; we have managed to find
1309 			 * a buffer in the slab which shouldn't
1310 			 * actually be here.  Emit a warning, and
1311 			 * try to continue.
1312 			 */
1313 			mdb_warn("buf %p is out of range for "
1314 			    "slab %p, cache %p\n", buf, sp, addr);
1315 		} else if (type & UM_ALLOCATED) {
1316 			/*
1317 			 * we have found a buffer on the slab's freelist;
1318 			 * clear its entry
1319 			 */
1320 			valid[ndx] = 0;
1321 		} else {
1322 			/*
1323 			 * Report this freed buffer
1324 			 */
1325 			if (type & UM_BUFCTL) {
1326 				ret = bufctl_walk_callback(cp, wsp,
1327 				    (uintptr_t)bcp);
1328 			} else {
1329 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1330 			}
1331 			if (ret != WALK_NEXT)
1332 				return (ret);
1333 		}
1334 
1335 		bcp = bc.bc_next;
1336 	}
1337 
1338 	if (bcp != NULL) {
1339 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1340 		    sp, addr, bcp));
1341 	}
1342 
1343 	/*
1344 	 * If we are walking freed buffers, the loop above handled reporting
1345 	 * them.
1346 	 */
1347 	if (type & UM_FREE)
1348 		return (WALK_NEXT);
1349 
1350 	if (type & UM_BUFCTL) {
1351 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1352 		    "cache %p\n", addr);
1353 		return (WALK_ERR);
1354 	}
1355 
1356 	/*
1357 	 * Report allocated buffers, skipping buffers in the magazine layer.
1358 	 * We only get this far for small-slab caches.
1359 	 */
1360 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1361 		buf = (char *)kbase + i * chunksize;
1362 
1363 		if (!valid[i])
1364 			continue;		/* on slab freelist */
1365 
1366 		if (magcnt > 0 &&
1367 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1368 		    addrcmp) != NULL)
1369 			continue;		/* in magazine layer */
1370 
1371 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1372 	}
1373 	return (ret);
1374 }
1375 
1376 void
1377 umem_walk_fini(mdb_walk_state_t *wsp)
1378 {
1379 	umem_walk_t *umw = wsp->walk_data;
1380 	uintptr_t chunksize;
1381 	uintptr_t slabsize;
1382 
1383 	if (umw == NULL)
1384 		return;
1385 
1386 	if (umw->umw_maglist != NULL)
1387 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1388 
1389 	chunksize = umw->umw_cp->cache_chunksize;
1390 	slabsize = umw->umw_cp->cache_slabsize;
1391 
1392 	if (umw->umw_valid != NULL)
1393 		mdb_free(umw->umw_valid, slabsize / chunksize);
1394 	if (umw->umw_ubase != NULL)
1395 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1396 
1397 	mdb_free(umw->umw_cp, umw->umw_csize);
1398 	mdb_free(umw, sizeof (umem_walk_t));
1399 }
1400 
1401 /*ARGSUSED*/
1402 static int
1403 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1404 {
1405 	/*
1406 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1407 	 * memory in other caches.  This can be a little confusing, so we
1408 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1409 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1410 	 */
1411 	if (c->cache_cflags & UMC_NOTOUCH)
1412 		return (WALK_NEXT);
1413 
1414 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1415 	    wsp->walk_cbdata, addr) == -1)
1416 		return (WALK_DONE);
1417 
1418 	return (WALK_NEXT);
1419 }
1420 
1421 #define	UMEM_WALK_ALL(name, wsp) { \
1422 	wsp->walk_data = (name); \
1423 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1424 		return (WALK_ERR); \
1425 	return (WALK_DONE); \
1426 }
1427 
1428 int
1429 umem_walk_init(mdb_walk_state_t *wsp)
1430 {
1431 	if (wsp->walk_arg != NULL)
1432 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1433 
1434 	if (wsp->walk_addr == NULL)
1435 		UMEM_WALK_ALL("umem", wsp);
1436 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1437 }
1438 
1439 int
1440 bufctl_walk_init(mdb_walk_state_t *wsp)
1441 {
1442 	if (wsp->walk_addr == NULL)
1443 		UMEM_WALK_ALL("bufctl", wsp);
1444 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1445 }
1446 
1447 int
1448 freemem_walk_init(mdb_walk_state_t *wsp)
1449 {
1450 	if (wsp->walk_addr == NULL)
1451 		UMEM_WALK_ALL("freemem", wsp);
1452 	return (umem_walk_init_common(wsp, UM_FREE));
1453 }
1454 
1455 int
1456 freectl_walk_init(mdb_walk_state_t *wsp)
1457 {
1458 	if (wsp->walk_addr == NULL)
1459 		UMEM_WALK_ALL("freectl", wsp);
1460 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1461 }
1462 
1463 typedef struct bufctl_history_walk {
1464 	void		*bhw_next;
1465 	umem_cache_t	*bhw_cache;
1466 	umem_slab_t	*bhw_slab;
1467 	hrtime_t	bhw_timestamp;
1468 } bufctl_history_walk_t;
1469 
1470 int
1471 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1472 {
1473 	bufctl_history_walk_t *bhw;
1474 	umem_bufctl_audit_t bc;
1475 	umem_bufctl_audit_t bcn;
1476 
1477 	if (wsp->walk_addr == NULL) {
1478 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1479 		return (WALK_ERR);
1480 	}
1481 
1482 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1483 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1484 		return (WALK_ERR);
1485 	}
1486 
1487 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1488 	bhw->bhw_timestamp = 0;
1489 	bhw->bhw_cache = bc.bc_cache;
1490 	bhw->bhw_slab = bc.bc_slab;
1491 
1492 	/*
1493 	 * sometimes the first log entry matches the base bufctl;  in that
1494 	 * case, skip the base bufctl.
1495 	 */
1496 	if (bc.bc_lastlog != NULL &&
1497 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1498 	    bc.bc_addr == bcn.bc_addr &&
1499 	    bc.bc_cache == bcn.bc_cache &&
1500 	    bc.bc_slab == bcn.bc_slab &&
1501 	    bc.bc_timestamp == bcn.bc_timestamp &&
1502 	    bc.bc_thread == bcn.bc_thread)
1503 		bhw->bhw_next = bc.bc_lastlog;
1504 	else
1505 		bhw->bhw_next = (void *)wsp->walk_addr;
1506 
1507 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1508 	wsp->walk_data = bhw;
1509 
1510 	return (WALK_NEXT);
1511 }
1512 
1513 int
1514 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1515 {
1516 	bufctl_history_walk_t *bhw = wsp->walk_data;
1517 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1518 	uintptr_t baseaddr = wsp->walk_addr;
1519 	umem_bufctl_audit_t *b;
1520 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1521 
1522 	if (addr == NULL)
1523 		return (WALK_DONE);
1524 
1525 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1526 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1527 		return (WALK_ERR);
1528 	}
1529 
1530 	/*
1531 	 * The bufctl is only valid if the address, cache, and slab are
1532 	 * correct.  We also check that the timestamp is decreasing, to
1533 	 * prevent infinite loops.
1534 	 */
1535 	if ((uintptr_t)b->bc_addr != baseaddr ||
1536 	    b->bc_cache != bhw->bhw_cache ||
1537 	    b->bc_slab != bhw->bhw_slab ||
1538 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1539 		return (WALK_DONE);
1540 
1541 	bhw->bhw_next = b->bc_lastlog;
1542 	bhw->bhw_timestamp = b->bc_timestamp;
1543 
1544 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1545 }
1546 
1547 void
1548 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1549 {
1550 	bufctl_history_walk_t *bhw = wsp->walk_data;
1551 
1552 	mdb_free(bhw, sizeof (*bhw));
1553 }
1554 
1555 typedef struct umem_log_walk {
1556 	umem_bufctl_audit_t *ulw_base;
1557 	umem_bufctl_audit_t **ulw_sorted;
1558 	umem_log_header_t ulw_lh;
1559 	size_t ulw_size;
1560 	size_t ulw_maxndx;
1561 	size_t ulw_ndx;
1562 } umem_log_walk_t;
1563 
1564 int
1565 umem_log_walk_init(mdb_walk_state_t *wsp)
1566 {
1567 	uintptr_t lp = wsp->walk_addr;
1568 	umem_log_walk_t *ulw;
1569 	umem_log_header_t *lhp;
1570 	int maxndx, i, j, k;
1571 
1572 	/*
1573 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1574 	 * read the log whose umem_log_header_t is stored at walk_addr.
1575 	 */
1576 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1577 		mdb_warn("failed to read 'umem_transaction_log'");
1578 		return (WALK_ERR);
1579 	}
1580 
1581 	if (lp == NULL) {
1582 		mdb_warn("log is disabled\n");
1583 		return (WALK_ERR);
1584 	}
1585 
1586 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1587 	lhp = &ulw->ulw_lh;
1588 
1589 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1590 		mdb_warn("failed to read log header at %p", lp);
1591 		mdb_free(ulw, sizeof (umem_log_walk_t));
1592 		return (WALK_ERR);
1593 	}
1594 
1595 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1596 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1597 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1598 
1599 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1600 	    (uintptr_t)lhp->lh_base) == -1) {
1601 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1602 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1603 		mdb_free(ulw, sizeof (umem_log_walk_t));
1604 		return (WALK_ERR);
1605 	}
1606 
1607 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1608 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1609 
1610 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1611 		caddr_t chunk = (caddr_t)
1612 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1613 
1614 		for (j = 0; j < maxndx; j++) {
1615 			/* LINTED align */
1616 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1617 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1618 		}
1619 	}
1620 
1621 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1622 	    (int(*)(const void *, const void *))bufctlcmp);
1623 
1624 	ulw->ulw_maxndx = k;
1625 	wsp->walk_data = ulw;
1626 
1627 	return (WALK_NEXT);
1628 }
1629 
1630 int
1631 umem_log_walk_step(mdb_walk_state_t *wsp)
1632 {
1633 	umem_log_walk_t *ulw = wsp->walk_data;
1634 	umem_bufctl_audit_t *bcp;
1635 
1636 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1637 		return (WALK_DONE);
1638 
1639 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1640 
1641 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1642 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1643 }
1644 
1645 void
1646 umem_log_walk_fini(mdb_walk_state_t *wsp)
1647 {
1648 	umem_log_walk_t *ulw = wsp->walk_data;
1649 
1650 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1651 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1652 	    sizeof (umem_bufctl_audit_t *));
1653 	mdb_free(ulw, sizeof (umem_log_walk_t));
1654 }
1655 
1656 typedef struct allocdby_bufctl {
1657 	uintptr_t abb_addr;
1658 	hrtime_t abb_ts;
1659 } allocdby_bufctl_t;
1660 
1661 typedef struct allocdby_walk {
1662 	const char *abw_walk;
1663 	uintptr_t abw_thread;
1664 	size_t abw_nbufs;
1665 	size_t abw_size;
1666 	allocdby_bufctl_t *abw_buf;
1667 	size_t abw_ndx;
1668 } allocdby_walk_t;
1669 
1670 int
1671 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1672     allocdby_walk_t *abw)
1673 {
1674 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1675 		return (WALK_NEXT);
1676 
1677 	if (abw->abw_nbufs == abw->abw_size) {
1678 		allocdby_bufctl_t *buf;
1679 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1680 
1681 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1682 
1683 		bcopy(abw->abw_buf, buf, oldsize);
1684 		mdb_free(abw->abw_buf, oldsize);
1685 
1686 		abw->abw_size <<= 1;
1687 		abw->abw_buf = buf;
1688 	}
1689 
1690 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1691 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1692 	abw->abw_nbufs++;
1693 
1694 	return (WALK_NEXT);
1695 }
1696 
1697 /*ARGSUSED*/
1698 int
1699 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1700 {
1701 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1702 	    abw, addr) == -1) {
1703 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1704 		return (WALK_DONE);
1705 	}
1706 
1707 	return (WALK_NEXT);
1708 }
1709 
1710 static int
1711 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1712 {
1713 	if (lhs->abb_ts < rhs->abb_ts)
1714 		return (1);
1715 	if (lhs->abb_ts > rhs->abb_ts)
1716 		return (-1);
1717 	return (0);
1718 }
1719 
1720 static int
1721 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1722 {
1723 	allocdby_walk_t *abw;
1724 
1725 	if (wsp->walk_addr == NULL) {
1726 		mdb_warn("allocdby walk doesn't support global walks\n");
1727 		return (WALK_ERR);
1728 	}
1729 
1730 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1731 
1732 	abw->abw_thread = wsp->walk_addr;
1733 	abw->abw_walk = walk;
1734 	abw->abw_size = 128;	/* something reasonable */
1735 	abw->abw_buf =
1736 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1737 
1738 	wsp->walk_data = abw;
1739 
1740 	if (mdb_walk("umem_cache",
1741 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1742 		mdb_warn("couldn't walk umem_cache");
1743 		allocdby_walk_fini(wsp);
1744 		return (WALK_ERR);
1745 	}
1746 
1747 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1748 	    (int(*)(const void *, const void *))allocdby_cmp);
1749 
1750 	return (WALK_NEXT);
1751 }
1752 
1753 int
1754 allocdby_walk_init(mdb_walk_state_t *wsp)
1755 {
1756 	return (allocdby_walk_init_common(wsp, "bufctl"));
1757 }
1758 
1759 int
1760 freedby_walk_init(mdb_walk_state_t *wsp)
1761 {
1762 	return (allocdby_walk_init_common(wsp, "freectl"));
1763 }
1764 
1765 int
1766 allocdby_walk_step(mdb_walk_state_t *wsp)
1767 {
1768 	allocdby_walk_t *abw = wsp->walk_data;
1769 	uintptr_t addr;
1770 	umem_bufctl_audit_t *bcp;
1771 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1772 
1773 	if (abw->abw_ndx == abw->abw_nbufs)
1774 		return (WALK_DONE);
1775 
1776 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1777 
1778 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1779 		mdb_warn("couldn't read bufctl at %p", addr);
1780 		return (WALK_DONE);
1781 	}
1782 
1783 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1784 }
1785 
1786 void
1787 allocdby_walk_fini(mdb_walk_state_t *wsp)
1788 {
1789 	allocdby_walk_t *abw = wsp->walk_data;
1790 
1791 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1792 	mdb_free(abw, sizeof (allocdby_walk_t));
1793 }
1794 
1795 /*ARGSUSED*/
1796 int
1797 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1798 {
1799 	char c[MDB_SYM_NAMLEN];
1800 	GElf_Sym sym;
1801 	int i;
1802 
1803 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1804 	for (i = 0; i < bcp->bc_depth; i++) {
1805 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1806 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1807 			continue;
1808 		if (is_umem_sym(c, "umem_"))
1809 			continue;
1810 		mdb_printf("%s+0x%lx",
1811 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1812 		break;
1813 	}
1814 	mdb_printf("\n");
1815 
1816 	return (WALK_NEXT);
1817 }
1818 
1819 static int
1820 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1821 {
1822 	if (!(flags & DCMD_ADDRSPEC))
1823 		return (DCMD_USAGE);
1824 
1825 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1826 
1827 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1828 		mdb_warn("can't walk '%s' for %p", w, addr);
1829 		return (DCMD_ERR);
1830 	}
1831 
1832 	return (DCMD_OK);
1833 }
1834 
1835 /*ARGSUSED*/
1836 int
1837 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1838 {
1839 	return (allocdby_common(addr, flags, "allocdby"));
1840 }
1841 
1842 /*ARGSUSED*/
1843 int
1844 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1845 {
1846 	return (allocdby_common(addr, flags, "freedby"));
1847 }
1848 
1849 typedef struct whatis {
1850 	uintptr_t w_addr;
1851 	const umem_cache_t *w_cache;
1852 	const vmem_t *w_vmem;
1853 	int w_found;
1854 	uint_t w_verbose;
1855 	uint_t w_freemem;
1856 	uint_t w_all;
1857 	uint_t w_bufctl;
1858 } whatis_t;
1859 
1860 static void
1861 whatis_print_umem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
1862 {
1863 	/* LINTED pointer cast may result in improper alignment */
1864 	uintptr_t btaddr = (uintptr_t)UMEM_BUFTAG(w->w_cache, addr);
1865 	intptr_t stat;
1866 
1867 	if (w->w_cache->cache_flags & UMF_REDZONE) {
1868 		umem_buftag_t bt;
1869 
1870 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
1871 			goto done;
1872 
1873 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
1874 
1875 		if (stat != UMEM_BUFTAG_ALLOC && stat != UMEM_BUFTAG_FREE)
1876 			goto done;
1877 
1878 		/*
1879 		 * provide the bufctl ptr if it has useful information
1880 		 */
1881 		if (baddr == 0 && (w->w_cache->cache_flags & UMF_AUDIT))
1882 			baddr = (uintptr_t)bt.bt_bufctl;
1883 	}
1884 
1885 done:
1886 	if (baddr == 0)
1887 		mdb_printf("%p is %p+%p, %s from %s\n",
1888 		    w->w_addr, addr, w->w_addr - addr,
1889 		    w->w_freemem == FALSE ? "allocated" : "freed",
1890 		    w->w_cache->cache_name);
1891 	else
1892 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
1893 		    w->w_addr, addr, w->w_addr - addr, baddr,
1894 		    w->w_freemem == FALSE ? "allocated" : "freed",
1895 		    w->w_cache->cache_name);
1896 }
1897 
1898 /*ARGSUSED*/
1899 static int
1900 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_t *w)
1901 {
1902 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1903 		return (WALK_NEXT);
1904 
1905 	whatis_print_umem(addr, 0, w);
1906 	w->w_found++;
1907 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1908 }
1909 
1910 static int
1911 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
1912 {
1913 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
1914 		return (WALK_NEXT);
1915 
1916 	mdb_printf("%p is %p+%p ", w->w_addr,
1917 	    vs->vs_start, w->w_addr - vs->vs_start);
1918 
1919 	/*
1920 	 * Always provide the vmem_seg pointer if it has a stack trace.
1921 	 */
1922 	if (w->w_bufctl == TRUE ||
1923 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
1924 		mdb_printf("(vmem_seg %p) ", addr);
1925 	}
1926 
1927 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
1928 	    "freed " : "", w->w_vmem->vm_name);
1929 
1930 	w->w_found++;
1931 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1932 }
1933 
1934 static int
1935 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
1936 {
1937 	const char *nm = vmem->vm_name;
1938 	w->w_vmem = vmem;
1939 	w->w_freemem = FALSE;
1940 
1941 	if (w->w_verbose)
1942 		mdb_printf("Searching vmem arena %s...\n", nm);
1943 
1944 	if (mdb_pwalk("vmem_alloc",
1945 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1946 		mdb_warn("can't walk vmem seg for %p", addr);
1947 		return (WALK_NEXT);
1948 	}
1949 
1950 	if (w->w_found && w->w_all == FALSE)
1951 		return (WALK_DONE);
1952 
1953 	if (w->w_verbose)
1954 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
1955 
1956 	w->w_freemem = TRUE;
1957 
1958 	if (mdb_pwalk("vmem_free",
1959 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1960 		mdb_warn("can't walk vmem seg for %p", addr);
1961 		return (WALK_NEXT);
1962 	}
1963 
1964 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1965 }
1966 
1967 /*ARGSUSED*/
1968 static int
1969 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_t *w)
1970 {
1971 	uintptr_t addr;
1972 
1973 	if (bcp == NULL)
1974 		return (WALK_NEXT);
1975 
1976 	addr = (uintptr_t)bcp->bc_addr;
1977 
1978 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1979 		return (WALK_NEXT);
1980 
1981 	whatis_print_umem(addr, baddr, w);
1982 	w->w_found++;
1983 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1984 }
1985 
1986 static int
1987 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
1988 {
1989 	char *walk, *freewalk;
1990 	mdb_walk_cb_t func;
1991 
1992 	if (w->w_bufctl == FALSE) {
1993 		walk = "umem";
1994 		freewalk = "freemem";
1995 		func = (mdb_walk_cb_t)whatis_walk_umem;
1996 	} else {
1997 		walk = "bufctl";
1998 		freewalk = "freectl";
1999 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2000 	}
2001 
2002 	if (w->w_verbose)
2003 		mdb_printf("Searching %s...\n", c->cache_name);
2004 
2005 	w->w_cache = c;
2006 	w->w_freemem = FALSE;
2007 
2008 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2009 		mdb_warn("can't find %s walker", walk);
2010 		return (WALK_DONE);
2011 	}
2012 
2013 	if (w->w_found && w->w_all == FALSE)
2014 		return (WALK_DONE);
2015 
2016 	/*
2017 	 * We have searched for allocated memory; now search for freed memory.
2018 	 */
2019 	if (w->w_verbose)
2020 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2021 
2022 	w->w_freemem = TRUE;
2023 
2024 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2025 		mdb_warn("can't find %s walker", freewalk);
2026 		return (WALK_DONE);
2027 	}
2028 
2029 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2030 }
2031 
2032 static int
2033 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2034 {
2035 	if (c->cache_cflags & UMC_NOTOUCH)
2036 		return (WALK_NEXT);
2037 
2038 	return (whatis_walk_cache(addr, c, w));
2039 }
2040 
2041 static int
2042 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2043 {
2044 	if (!(c->cache_cflags & UMC_NOTOUCH))
2045 		return (WALK_NEXT);
2046 
2047 	return (whatis_walk_cache(addr, c, w));
2048 }
2049 
2050 int
2051 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2052 {
2053 	whatis_t w;
2054 
2055 	if (!(flags & DCMD_ADDRSPEC))
2056 		return (DCMD_USAGE);
2057 
2058 	w.w_verbose = FALSE;
2059 	w.w_bufctl = FALSE;
2060 	w.w_all = FALSE;
2061 
2062 	if (mdb_getopts(argc, argv,
2063 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2064 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2065 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2066 		return (DCMD_USAGE);
2067 
2068 	w.w_addr = addr;
2069 	w.w_found = 0;
2070 
2071 	/*
2072 	 * Mappings and threads should eventually be added here.
2073 	 */
2074 	if (mdb_walk("umem_cache",
2075 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2076 		mdb_warn("couldn't find umem_cache walker");
2077 		return (DCMD_ERR);
2078 	}
2079 
2080 	if (w.w_found && w.w_all == FALSE)
2081 		return (DCMD_OK);
2082 
2083 	if (mdb_walk("umem_cache",
2084 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2085 		mdb_warn("couldn't find umem_cache walker");
2086 		return (DCMD_ERR);
2087 	}
2088 
2089 	if (w.w_found && w.w_all == FALSE)
2090 		return (DCMD_OK);
2091 
2092 	if (mdb_walk("vmem_postfix",
2093 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2094 		mdb_warn("couldn't find vmem_postfix walker");
2095 		return (DCMD_ERR);
2096 	}
2097 
2098 	if (w.w_found == 0)
2099 		mdb_printf("%p is unknown\n", addr);
2100 
2101 	return (DCMD_OK);
2102 }
2103 
2104 typedef struct umem_log_cpu {
2105 	uintptr_t umc_low;
2106 	uintptr_t umc_high;
2107 } umem_log_cpu_t;
2108 
2109 int
2110 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2111 {
2112 	int i;
2113 
2114 	for (i = 0; i < umem_max_ncpus; i++) {
2115 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2116 			break;
2117 	}
2118 
2119 	if (i == umem_max_ncpus)
2120 		mdb_printf("   ");
2121 	else
2122 		mdb_printf("%3d", i);
2123 
2124 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2125 	    b->bc_timestamp, b->bc_thread);
2126 
2127 	return (WALK_NEXT);
2128 }
2129 
2130 /*ARGSUSED*/
2131 int
2132 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2133 {
2134 	umem_log_header_t lh;
2135 	umem_cpu_log_header_t clh;
2136 	uintptr_t lhp, clhp;
2137 	umem_log_cpu_t *umc;
2138 	int i;
2139 
2140 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2141 		mdb_warn("failed to read 'umem_transaction_log'");
2142 		return (DCMD_ERR);
2143 	}
2144 
2145 	if (lhp == NULL) {
2146 		mdb_warn("no umem transaction log\n");
2147 		return (DCMD_ERR);
2148 	}
2149 
2150 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2151 		mdb_warn("failed to read log header at %p", lhp);
2152 		return (DCMD_ERR);
2153 	}
2154 
2155 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2156 
2157 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2158 	    UM_SLEEP | UM_GC);
2159 
2160 	for (i = 0; i < umem_max_ncpus; i++) {
2161 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2162 			mdb_warn("cannot read cpu %d's log header at %p",
2163 			    i, clhp);
2164 			return (DCMD_ERR);
2165 		}
2166 
2167 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2168 		    (uintptr_t)lh.lh_base;
2169 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2170 
2171 		clhp += sizeof (umem_cpu_log_header_t);
2172 	}
2173 
2174 	if (DCMD_HDRSPEC(flags)) {
2175 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2176 		    "BUFADDR", "TIMESTAMP", "THREAD");
2177 	}
2178 
2179 	/*
2180 	 * If we have been passed an address, we'll just print out that
2181 	 * log entry.
2182 	 */
2183 	if (flags & DCMD_ADDRSPEC) {
2184 		umem_bufctl_audit_t *bp;
2185 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2186 
2187 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2188 			mdb_warn("failed to read bufctl at %p", addr);
2189 			return (DCMD_ERR);
2190 		}
2191 
2192 		(void) umem_log_walk(addr, bp, umc);
2193 
2194 		return (DCMD_OK);
2195 	}
2196 
2197 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2198 		mdb_warn("can't find umem log walker");
2199 		return (DCMD_ERR);
2200 	}
2201 
2202 	return (DCMD_OK);
2203 }
2204 
2205 typedef struct bufctl_history_cb {
2206 	int		bhc_flags;
2207 	int		bhc_argc;
2208 	const mdb_arg_t	*bhc_argv;
2209 	int		bhc_ret;
2210 } bufctl_history_cb_t;
2211 
2212 /*ARGSUSED*/
2213 static int
2214 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2215 {
2216 	bufctl_history_cb_t *bhc = arg;
2217 
2218 	bhc->bhc_ret =
2219 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2220 
2221 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2222 
2223 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2224 }
2225 
2226 void
2227 bufctl_help(void)
2228 {
2229 	mdb_printf("%s\n",
2230 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2231 	mdb_dec_indent(2);
2232 	mdb_printf("%<b>OPTIONS%</b>\n");
2233 	mdb_inc_indent(2);
2234 	mdb_printf("%s",
2235 "  -v    Display the full content of the bufctl, including its stack trace\n"
2236 "  -h    retrieve the bufctl's transaction history, if available\n"
2237 "  -a addr\n"
2238 "        filter out bufctls not involving the buffer at addr\n"
2239 "  -c caller\n"
2240 "        filter out bufctls without the function/PC in their stack trace\n"
2241 "  -e earliest\n"
2242 "        filter out bufctls timestamped before earliest\n"
2243 "  -l latest\n"
2244 "        filter out bufctls timestamped after latest\n"
2245 "  -t thread\n"
2246 "        filter out bufctls not involving thread\n");
2247 }
2248 
2249 int
2250 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2251 {
2252 	uint_t verbose = FALSE;
2253 	uint_t history = FALSE;
2254 	uint_t in_history = FALSE;
2255 	uintptr_t caller = NULL, thread = NULL;
2256 	uintptr_t laddr, haddr, baddr = NULL;
2257 	hrtime_t earliest = 0, latest = 0;
2258 	int i, depth;
2259 	char c[MDB_SYM_NAMLEN];
2260 	GElf_Sym sym;
2261 	umem_bufctl_audit_t *bcp;
2262 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2263 
2264 	if (mdb_getopts(argc, argv,
2265 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2266 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2267 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2268 	    'c', MDB_OPT_UINTPTR, &caller,
2269 	    't', MDB_OPT_UINTPTR, &thread,
2270 	    'e', MDB_OPT_UINT64, &earliest,
2271 	    'l', MDB_OPT_UINT64, &latest,
2272 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2273 		return (DCMD_USAGE);
2274 
2275 	if (!(flags & DCMD_ADDRSPEC))
2276 		return (DCMD_USAGE);
2277 
2278 	if (in_history && !history)
2279 		return (DCMD_USAGE);
2280 
2281 	if (history && !in_history) {
2282 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2283 		    UM_SLEEP | UM_GC);
2284 		bufctl_history_cb_t bhc;
2285 
2286 		nargv[0].a_type = MDB_TYPE_STRING;
2287 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2288 
2289 		for (i = 0; i < argc; i++)
2290 			nargv[i + 1] = argv[i];
2291 
2292 		/*
2293 		 * When in history mode, we treat each element as if it
2294 		 * were in a seperate loop, so that the headers group
2295 		 * bufctls with similar histories.
2296 		 */
2297 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2298 		bhc.bhc_argc = argc + 1;
2299 		bhc.bhc_argv = nargv;
2300 		bhc.bhc_ret = DCMD_OK;
2301 
2302 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2303 		    addr) == -1) {
2304 			mdb_warn("unable to walk bufctl_history");
2305 			return (DCMD_ERR);
2306 		}
2307 
2308 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2309 			mdb_printf("\n");
2310 
2311 		return (bhc.bhc_ret);
2312 	}
2313 
2314 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2315 		if (verbose) {
2316 			mdb_printf("%16s %16s %16s %16s\n"
2317 			    "%<u>%16s %16s %16s %16s%</u>\n",
2318 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2319 			    "", "CACHE", "LASTLOG", "CONTENTS");
2320 		} else {
2321 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2322 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2323 		}
2324 	}
2325 
2326 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2327 		mdb_warn("couldn't read bufctl at %p", addr);
2328 		return (DCMD_ERR);
2329 	}
2330 
2331 	/*
2332 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2333 	 * the address does not really refer to a bufctl.
2334 	 */
2335 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2336 
2337 	if (caller != NULL) {
2338 		laddr = caller;
2339 		haddr = caller + sizeof (caller);
2340 
2341 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2342 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2343 			/*
2344 			 * We were provided an exact symbol value; any
2345 			 * address in the function is valid.
2346 			 */
2347 			laddr = (uintptr_t)sym.st_value;
2348 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2349 		}
2350 
2351 		for (i = 0; i < depth; i++)
2352 			if (bcp->bc_stack[i] >= laddr &&
2353 			    bcp->bc_stack[i] < haddr)
2354 				break;
2355 
2356 		if (i == depth)
2357 			return (DCMD_OK);
2358 	}
2359 
2360 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2361 		return (DCMD_OK);
2362 
2363 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2364 		return (DCMD_OK);
2365 
2366 	if (latest != 0 && bcp->bc_timestamp > latest)
2367 		return (DCMD_OK);
2368 
2369 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2370 		return (DCMD_OK);
2371 
2372 	if (flags & DCMD_PIPE_OUT) {
2373 		mdb_printf("%#r\n", addr);
2374 		return (DCMD_OK);
2375 	}
2376 
2377 	if (verbose) {
2378 		mdb_printf(
2379 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2380 		    "%16s %16p %16p %16p\n",
2381 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2382 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2383 
2384 		mdb_inc_indent(17);
2385 		for (i = 0; i < depth; i++)
2386 			mdb_printf("%a\n", bcp->bc_stack[i]);
2387 		mdb_dec_indent(17);
2388 		mdb_printf("\n");
2389 	} else {
2390 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2391 		    bcp->bc_timestamp, bcp->bc_thread);
2392 
2393 		for (i = 0; i < depth; i++) {
2394 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2395 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2396 				continue;
2397 			if (is_umem_sym(c, "umem_"))
2398 				continue;
2399 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2400 			break;
2401 		}
2402 
2403 		if (i >= depth)
2404 			mdb_printf("\n");
2405 	}
2406 
2407 	return (DCMD_OK);
2408 }
2409 
2410 /*ARGSUSED*/
2411 int
2412 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2413 {
2414 	mdb_arg_t a;
2415 
2416 	if (!(flags & DCMD_ADDRSPEC))
2417 		return (DCMD_USAGE);
2418 
2419 	if (argc != 0)
2420 		return (DCMD_USAGE);
2421 
2422 	a.a_type = MDB_TYPE_STRING;
2423 	a.a_un.a_str = "-v";
2424 
2425 	return (bufctl(addr, flags, 1, &a));
2426 }
2427 
2428 typedef struct umem_verify {
2429 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2430 	size_t umv_size;		/* number of bytes in umv_buf */
2431 	int umv_corruption;		/* > 0 if corruption found. */
2432 	int umv_besilent;		/* report actual corruption sites */
2433 	struct umem_cache umv_cache;	/* the cache we're operating on */
2434 } umem_verify_t;
2435 
2436 /*
2437  * verify_pattern()
2438  *	verify that buf is filled with the pattern pat.
2439  */
2440 static int64_t
2441 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2442 {
2443 	/*LINTED*/
2444 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2445 	uint64_t *buf;
2446 
2447 	for (buf = buf_arg; buf < bufend; buf++)
2448 		if (*buf != pat)
2449 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2450 	return (-1);
2451 }
2452 
2453 /*
2454  * verify_buftag()
2455  *	verify that btp->bt_bxstat == (bcp ^ pat)
2456  */
2457 static int
2458 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2459 {
2460 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2461 }
2462 
2463 /*
2464  * verify_free()
2465  *	verify the integrity of a free block of memory by checking
2466  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2467  */
2468 /*ARGSUSED1*/
2469 static int
2470 verify_free(uintptr_t addr, const void *data, void *private)
2471 {
2472 	umem_verify_t *umv = (umem_verify_t *)private;
2473 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2474 	int64_t corrupt;		/* corruption offset */
2475 	umem_buftag_t *buftagp;		/* ptr to buftag */
2476 	umem_cache_t *cp = &umv->umv_cache;
2477 	int besilent = umv->umv_besilent;
2478 
2479 	/*LINTED*/
2480 	buftagp = UMEM_BUFTAG(cp, buf);
2481 
2482 	/*
2483 	 * Read the buffer to check.
2484 	 */
2485 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2486 		if (!besilent)
2487 			mdb_warn("couldn't read %p", addr);
2488 		return (WALK_NEXT);
2489 	}
2490 
2491 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2492 	    UMEM_FREE_PATTERN)) >= 0) {
2493 		if (!besilent)
2494 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2495 			    addr, (uintptr_t)addr + corrupt);
2496 		goto corrupt;
2497 	}
2498 
2499 	if ((cp->cache_flags & UMF_HASH) &&
2500 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2501 		if (!besilent)
2502 			mdb_printf("buffer %p (free) seems to "
2503 			    "have a corrupt redzone pattern\n", addr);
2504 		goto corrupt;
2505 	}
2506 
2507 	/*
2508 	 * confirm bufctl pointer integrity.
2509 	 */
2510 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2511 		if (!besilent)
2512 			mdb_printf("buffer %p (free) has a corrupt "
2513 			    "buftag\n", addr);
2514 		goto corrupt;
2515 	}
2516 
2517 	return (WALK_NEXT);
2518 corrupt:
2519 	umv->umv_corruption++;
2520 	return (WALK_NEXT);
2521 }
2522 
2523 /*
2524  * verify_alloc()
2525  *	Verify that the buftag of an allocated buffer makes sense with respect
2526  *	to the buffer.
2527  */
2528 /*ARGSUSED1*/
2529 static int
2530 verify_alloc(uintptr_t addr, const void *data, void *private)
2531 {
2532 	umem_verify_t *umv = (umem_verify_t *)private;
2533 	umem_cache_t *cp = &umv->umv_cache;
2534 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2535 	/*LINTED*/
2536 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2537 	uint32_t *ip = (uint32_t *)buftagp;
2538 	uint8_t *bp = (uint8_t *)buf;
2539 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2540 	int besilent = umv->umv_besilent;
2541 
2542 	/*
2543 	 * Read the buffer to check.
2544 	 */
2545 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2546 		if (!besilent)
2547 			mdb_warn("couldn't read %p", addr);
2548 		return (WALK_NEXT);
2549 	}
2550 
2551 	/*
2552 	 * There are two cases to handle:
2553 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2554 	 *    0xfeedfacefeedface at the end of it
2555 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2556 	 *    0xbb just past the end of the region in use.  At the buftag,
2557 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2558 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2559 	 *    endianness), followed by 32 bits containing the offset of the
2560 	 *    0xbb byte in the buffer.
2561 	 *
2562 	 * Finally, the two 32-bit words that comprise the second half of the
2563 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2564 	 */
2565 
2566 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2567 		looks_ok = 1;
2568 	else if (!UMEM_SIZE_VALID(ip[1]))
2569 		size_ok = 0;
2570 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2571 		looks_ok = 1;
2572 	else
2573 		size_ok = 0;
2574 
2575 	if (!size_ok) {
2576 		if (!besilent)
2577 			mdb_printf("buffer %p (allocated) has a corrupt "
2578 			    "redzone size encoding\n", addr);
2579 		goto corrupt;
2580 	}
2581 
2582 	if (!looks_ok) {
2583 		if (!besilent)
2584 			mdb_printf("buffer %p (allocated) has a corrupt "
2585 			    "redzone signature\n", addr);
2586 		goto corrupt;
2587 	}
2588 
2589 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2590 		if (!besilent)
2591 			mdb_printf("buffer %p (allocated) has a "
2592 			    "corrupt buftag\n", addr);
2593 		goto corrupt;
2594 	}
2595 
2596 	return (WALK_NEXT);
2597 corrupt:
2598 	umv->umv_corruption++;
2599 	return (WALK_NEXT);
2600 }
2601 
2602 /*ARGSUSED2*/
2603 int
2604 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2605 {
2606 	if (flags & DCMD_ADDRSPEC) {
2607 		int check_alloc = 0, check_free = 0;
2608 		umem_verify_t umv;
2609 
2610 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2611 		    addr) == -1) {
2612 			mdb_warn("couldn't read umem_cache %p", addr);
2613 			return (DCMD_ERR);
2614 		}
2615 
2616 		umv.umv_size = umv.umv_cache.cache_buftag +
2617 		    sizeof (umem_buftag_t);
2618 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2619 		umv.umv_corruption = 0;
2620 
2621 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2622 			check_alloc = 1;
2623 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2624 				check_free = 1;
2625 		} else {
2626 			if (!(flags & DCMD_LOOP)) {
2627 				mdb_warn("cache %p (%s) does not have "
2628 				    "redzone checking enabled\n", addr,
2629 				    umv.umv_cache.cache_name);
2630 			}
2631 			return (DCMD_ERR);
2632 		}
2633 
2634 		if (flags & DCMD_LOOP) {
2635 			/*
2636 			 * table mode, don't print out every corrupt buffer
2637 			 */
2638 			umv.umv_besilent = 1;
2639 		} else {
2640 			mdb_printf("Summary for cache '%s'\n",
2641 			    umv.umv_cache.cache_name);
2642 			mdb_inc_indent(2);
2643 			umv.umv_besilent = 0;
2644 		}
2645 
2646 		if (check_alloc)
2647 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2648 		if (check_free)
2649 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2650 
2651 		if (flags & DCMD_LOOP) {
2652 			if (umv.umv_corruption == 0) {
2653 				mdb_printf("%-*s %?p clean\n",
2654 				    UMEM_CACHE_NAMELEN,
2655 				    umv.umv_cache.cache_name, addr);
2656 			} else {
2657 				char *s = "";	/* optional s in "buffer[s]" */
2658 				if (umv.umv_corruption > 1)
2659 					s = "s";
2660 
2661 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2662 				    UMEM_CACHE_NAMELEN,
2663 				    umv.umv_cache.cache_name, addr,
2664 				    umv.umv_corruption, s);
2665 			}
2666 		} else {
2667 			/*
2668 			 * This is the more verbose mode, when the user has
2669 			 * type addr::umem_verify.  If the cache was clean,
2670 			 * nothing will have yet been printed. So say something.
2671 			 */
2672 			if (umv.umv_corruption == 0)
2673 				mdb_printf("clean\n");
2674 
2675 			mdb_dec_indent(2);
2676 		}
2677 	} else {
2678 		/*
2679 		 * If the user didn't specify a cache to verify, we'll walk all
2680 		 * umem_cache's, specifying ourself as a callback for each...
2681 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2682 		 */
2683 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2684 		    "Cache Name", "Addr", "Cache Integrity");
2685 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2686 	}
2687 
2688 	return (DCMD_OK);
2689 }
2690 
2691 typedef struct vmem_node {
2692 	struct vmem_node *vn_next;
2693 	struct vmem_node *vn_parent;
2694 	struct vmem_node *vn_sibling;
2695 	struct vmem_node *vn_children;
2696 	uintptr_t vn_addr;
2697 	int vn_marked;
2698 	vmem_t vn_vmem;
2699 } vmem_node_t;
2700 
2701 typedef struct vmem_walk {
2702 	vmem_node_t *vw_root;
2703 	vmem_node_t *vw_current;
2704 } vmem_walk_t;
2705 
2706 int
2707 vmem_walk_init(mdb_walk_state_t *wsp)
2708 {
2709 	uintptr_t vaddr, paddr;
2710 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2711 	vmem_walk_t *vw;
2712 
2713 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2714 		mdb_warn("couldn't read 'vmem_list'");
2715 		return (WALK_ERR);
2716 	}
2717 
2718 	while (vaddr != NULL) {
2719 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2720 		vp->vn_addr = vaddr;
2721 		vp->vn_next = head;
2722 		head = vp;
2723 
2724 		if (vaddr == wsp->walk_addr)
2725 			current = vp;
2726 
2727 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2728 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2729 			goto err;
2730 		}
2731 
2732 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2733 	}
2734 
2735 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2736 
2737 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2738 			vp->vn_sibling = root;
2739 			root = vp;
2740 			continue;
2741 		}
2742 
2743 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2744 			if (parent->vn_addr != paddr)
2745 				continue;
2746 			vp->vn_sibling = parent->vn_children;
2747 			parent->vn_children = vp;
2748 			vp->vn_parent = parent;
2749 			break;
2750 		}
2751 
2752 		if (parent == NULL) {
2753 			mdb_warn("couldn't find %p's parent (%p)\n",
2754 			    vp->vn_addr, paddr);
2755 			goto err;
2756 		}
2757 	}
2758 
2759 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2760 	vw->vw_root = root;
2761 
2762 	if (current != NULL)
2763 		vw->vw_current = current;
2764 	else
2765 		vw->vw_current = root;
2766 
2767 	wsp->walk_data = vw;
2768 	return (WALK_NEXT);
2769 err:
2770 	for (vp = head; head != NULL; vp = head) {
2771 		head = vp->vn_next;
2772 		mdb_free(vp, sizeof (vmem_node_t));
2773 	}
2774 
2775 	return (WALK_ERR);
2776 }
2777 
2778 int
2779 vmem_walk_step(mdb_walk_state_t *wsp)
2780 {
2781 	vmem_walk_t *vw = wsp->walk_data;
2782 	vmem_node_t *vp;
2783 	int rval;
2784 
2785 	if ((vp = vw->vw_current) == NULL)
2786 		return (WALK_DONE);
2787 
2788 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2789 
2790 	if (vp->vn_children != NULL) {
2791 		vw->vw_current = vp->vn_children;
2792 		return (rval);
2793 	}
2794 
2795 	do {
2796 		vw->vw_current = vp->vn_sibling;
2797 		vp = vp->vn_parent;
2798 	} while (vw->vw_current == NULL && vp != NULL);
2799 
2800 	return (rval);
2801 }
2802 
2803 /*
2804  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2805  * children are visited before their parent.  We perform the postfix walk
2806  * iteratively (rather than recursively) to allow mdb to regain control
2807  * after each callback.
2808  */
2809 int
2810 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2811 {
2812 	vmem_walk_t *vw = wsp->walk_data;
2813 	vmem_node_t *vp = vw->vw_current;
2814 	int rval;
2815 
2816 	/*
2817 	 * If this node is marked, then we know that we have already visited
2818 	 * all of its children.  If the node has any siblings, they need to
2819 	 * be visited next; otherwise, we need to visit the parent.  Note
2820 	 * that vp->vn_marked will only be zero on the first invocation of
2821 	 * the step function.
2822 	 */
2823 	if (vp->vn_marked) {
2824 		if (vp->vn_sibling != NULL)
2825 			vp = vp->vn_sibling;
2826 		else if (vp->vn_parent != NULL)
2827 			vp = vp->vn_parent;
2828 		else {
2829 			/*
2830 			 * We have neither a parent, nor a sibling, and we
2831 			 * have already been visited; we're done.
2832 			 */
2833 			return (WALK_DONE);
2834 		}
2835 	}
2836 
2837 	/*
2838 	 * Before we visit this node, visit its children.
2839 	 */
2840 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2841 		vp = vp->vn_children;
2842 
2843 	vp->vn_marked = 1;
2844 	vw->vw_current = vp;
2845 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2846 
2847 	return (rval);
2848 }
2849 
2850 void
2851 vmem_walk_fini(mdb_walk_state_t *wsp)
2852 {
2853 	vmem_walk_t *vw = wsp->walk_data;
2854 	vmem_node_t *root = vw->vw_root;
2855 	int done;
2856 
2857 	if (root == NULL)
2858 		return;
2859 
2860 	if ((vw->vw_root = root->vn_children) != NULL)
2861 		vmem_walk_fini(wsp);
2862 
2863 	vw->vw_root = root->vn_sibling;
2864 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2865 	mdb_free(root, sizeof (vmem_node_t));
2866 
2867 	if (done) {
2868 		mdb_free(vw, sizeof (vmem_walk_t));
2869 	} else {
2870 		vmem_walk_fini(wsp);
2871 	}
2872 }
2873 
2874 typedef struct vmem_seg_walk {
2875 	uint8_t vsw_type;
2876 	uintptr_t vsw_start;
2877 	uintptr_t vsw_current;
2878 } vmem_seg_walk_t;
2879 
2880 /*ARGSUSED*/
2881 int
2882 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2883 {
2884 	vmem_seg_walk_t *vsw;
2885 
2886 	if (wsp->walk_addr == NULL) {
2887 		mdb_warn("vmem_%s does not support global walks\n", name);
2888 		return (WALK_ERR);
2889 	}
2890 
2891 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2892 
2893 	vsw->vsw_type = type;
2894 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2895 	vsw->vsw_current = vsw->vsw_start;
2896 
2897 	return (WALK_NEXT);
2898 }
2899 
2900 /*
2901  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2902  */
2903 #define	VMEM_NONE	0
2904 
2905 int
2906 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2907 {
2908 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2909 }
2910 
2911 int
2912 vmem_free_walk_init(mdb_walk_state_t *wsp)
2913 {
2914 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2915 }
2916 
2917 int
2918 vmem_span_walk_init(mdb_walk_state_t *wsp)
2919 {
2920 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2921 }
2922 
2923 int
2924 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2925 {
2926 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2927 }
2928 
2929 int
2930 vmem_seg_walk_step(mdb_walk_state_t *wsp)
2931 {
2932 	vmem_seg_t seg;
2933 	vmem_seg_walk_t *vsw = wsp->walk_data;
2934 	uintptr_t addr = vsw->vsw_current;
2935 	static size_t seg_size = 0;
2936 	int rval;
2937 
2938 	if (!seg_size) {
2939 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
2940 			mdb_warn("failed to read 'vmem_seg_size'");
2941 			seg_size = sizeof (vmem_seg_t);
2942 		}
2943 	}
2944 
2945 	if (seg_size < sizeof (seg))
2946 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
2947 
2948 	if (mdb_vread(&seg, seg_size, addr) == -1) {
2949 		mdb_warn("couldn't read vmem_seg at %p", addr);
2950 		return (WALK_ERR);
2951 	}
2952 
2953 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
2954 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
2955 		rval = WALK_NEXT;
2956 	} else {
2957 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
2958 	}
2959 
2960 	if (vsw->vsw_current == vsw->vsw_start)
2961 		return (WALK_DONE);
2962 
2963 	return (rval);
2964 }
2965 
2966 void
2967 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
2968 {
2969 	vmem_seg_walk_t *vsw = wsp->walk_data;
2970 
2971 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
2972 }
2973 
2974 #define	VMEM_NAMEWIDTH	22
2975 
2976 int
2977 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2978 {
2979 	vmem_t v, parent;
2980 	uintptr_t paddr;
2981 	int ident = 0;
2982 	char c[VMEM_NAMEWIDTH];
2983 
2984 	if (!(flags & DCMD_ADDRSPEC)) {
2985 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
2986 			mdb_warn("can't walk vmem");
2987 			return (DCMD_ERR);
2988 		}
2989 		return (DCMD_OK);
2990 	}
2991 
2992 	if (DCMD_HDRSPEC(flags))
2993 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
2994 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
2995 		    "TOTAL", "SUCCEED", "FAIL");
2996 
2997 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
2998 		mdb_warn("couldn't read vmem at %p", addr);
2999 		return (DCMD_ERR);
3000 	}
3001 
3002 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3003 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3004 			mdb_warn("couldn't trace %p's ancestry", addr);
3005 			ident = 0;
3006 			break;
3007 		}
3008 		paddr = (uintptr_t)parent.vm_source;
3009 	}
3010 
3011 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3012 
3013 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3014 	    addr, VMEM_NAMEWIDTH, c,
3015 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3016 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3017 
3018 	return (DCMD_OK);
3019 }
3020 
3021 void
3022 vmem_seg_help(void)
3023 {
3024 	mdb_printf("%s\n",
3025 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3026 "\n"
3027 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3028 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3029 "information.\n");
3030 	mdb_dec_indent(2);
3031 	mdb_printf("%<b>OPTIONS%</b>\n");
3032 	mdb_inc_indent(2);
3033 	mdb_printf("%s",
3034 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3035 "  -s    report the size of the segment, instead of the end address\n"
3036 "  -c caller\n"
3037 "        filter out segments without the function/PC in their stack trace\n"
3038 "  -e earliest\n"
3039 "        filter out segments timestamped before earliest\n"
3040 "  -l latest\n"
3041 "        filter out segments timestamped after latest\n"
3042 "  -m minsize\n"
3043 "        filer out segments smaller than minsize\n"
3044 "  -M maxsize\n"
3045 "        filer out segments larger than maxsize\n"
3046 "  -t thread\n"
3047 "        filter out segments not involving thread\n"
3048 "  -T type\n"
3049 "        filter out segments not of type 'type'\n"
3050 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3051 }
3052 
3053 
3054 /*ARGSUSED*/
3055 int
3056 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3057 {
3058 	vmem_seg_t vs;
3059 	uintptr_t *stk = vs.vs_stack;
3060 	uintptr_t sz;
3061 	uint8_t t;
3062 	const char *type = NULL;
3063 	GElf_Sym sym;
3064 	char c[MDB_SYM_NAMLEN];
3065 	int no_debug;
3066 	int i;
3067 	int depth;
3068 	uintptr_t laddr, haddr;
3069 
3070 	uintptr_t caller = NULL, thread = NULL;
3071 	uintptr_t minsize = 0, maxsize = 0;
3072 
3073 	hrtime_t earliest = 0, latest = 0;
3074 
3075 	uint_t size = 0;
3076 	uint_t verbose = 0;
3077 
3078 	if (!(flags & DCMD_ADDRSPEC))
3079 		return (DCMD_USAGE);
3080 
3081 	if (mdb_getopts(argc, argv,
3082 	    'c', MDB_OPT_UINTPTR, &caller,
3083 	    'e', MDB_OPT_UINT64, &earliest,
3084 	    'l', MDB_OPT_UINT64, &latest,
3085 	    's', MDB_OPT_SETBITS, TRUE, &size,
3086 	    'm', MDB_OPT_UINTPTR, &minsize,
3087 	    'M', MDB_OPT_UINTPTR, &maxsize,
3088 	    't', MDB_OPT_UINTPTR, &thread,
3089 	    'T', MDB_OPT_STR, &type,
3090 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3091 	    NULL) != argc)
3092 		return (DCMD_USAGE);
3093 
3094 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3095 		if (verbose) {
3096 			mdb_printf("%16s %4s %16s %16s %16s\n"
3097 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3098 			    "ADDR", "TYPE", "START", "END", "SIZE",
3099 			    "", "", "THREAD", "TIMESTAMP", "");
3100 		} else {
3101 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3102 			    "START", size? "SIZE" : "END", "WHO");
3103 		}
3104 	}
3105 
3106 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3107 		mdb_warn("couldn't read vmem_seg at %p", addr);
3108 		return (DCMD_ERR);
3109 	}
3110 
3111 	if (type != NULL) {
3112 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3113 			t = VMEM_ALLOC;
3114 		else if (strcmp(type, "FREE") == 0)
3115 			t = VMEM_FREE;
3116 		else if (strcmp(type, "SPAN") == 0)
3117 			t = VMEM_SPAN;
3118 		else if (strcmp(type, "ROTR") == 0 ||
3119 		    strcmp(type, "ROTOR") == 0)
3120 			t = VMEM_ROTOR;
3121 		else if (strcmp(type, "WLKR") == 0 ||
3122 		    strcmp(type, "WALKER") == 0)
3123 			t = VMEM_WALKER;
3124 		else {
3125 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3126 			    type);
3127 			return (DCMD_ERR);
3128 		}
3129 
3130 		if (vs.vs_type != t)
3131 			return (DCMD_OK);
3132 	}
3133 
3134 	sz = vs.vs_end - vs.vs_start;
3135 
3136 	if (minsize != 0 && sz < minsize)
3137 		return (DCMD_OK);
3138 
3139 	if (maxsize != 0 && sz > maxsize)
3140 		return (DCMD_OK);
3141 
3142 	t = vs.vs_type;
3143 	depth = vs.vs_depth;
3144 
3145 	/*
3146 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3147 	 */
3148 	no_debug = (t != VMEM_ALLOC) ||
3149 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3150 
3151 	if (no_debug) {
3152 		if (caller != NULL || thread != NULL || earliest != 0 ||
3153 		    latest != 0)
3154 			return (DCMD_OK);		/* not enough info */
3155 	} else {
3156 		if (caller != NULL) {
3157 			laddr = caller;
3158 			haddr = caller + sizeof (caller);
3159 
3160 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3161 			    sizeof (c), &sym) != -1 &&
3162 			    caller == (uintptr_t)sym.st_value) {
3163 				/*
3164 				 * We were provided an exact symbol value; any
3165 				 * address in the function is valid.
3166 				 */
3167 				laddr = (uintptr_t)sym.st_value;
3168 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3169 			}
3170 
3171 			for (i = 0; i < depth; i++)
3172 				if (vs.vs_stack[i] >= laddr &&
3173 				    vs.vs_stack[i] < haddr)
3174 					break;
3175 
3176 			if (i == depth)
3177 				return (DCMD_OK);
3178 		}
3179 
3180 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3181 			return (DCMD_OK);
3182 
3183 		if (earliest != 0 && vs.vs_timestamp < earliest)
3184 			return (DCMD_OK);
3185 
3186 		if (latest != 0 && vs.vs_timestamp > latest)
3187 			return (DCMD_OK);
3188 	}
3189 
3190 	type = (t == VMEM_ALLOC ? "ALLC" :
3191 	    t == VMEM_FREE ? "FREE" :
3192 	    t == VMEM_SPAN ? "SPAN" :
3193 	    t == VMEM_ROTOR ? "ROTR" :
3194 	    t == VMEM_WALKER ? "WLKR" :
3195 	    "????");
3196 
3197 	if (flags & DCMD_PIPE_OUT) {
3198 		mdb_printf("%#r\n", addr);
3199 		return (DCMD_OK);
3200 	}
3201 
3202 	if (verbose) {
3203 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3204 		    addr, type, vs.vs_start, vs.vs_end, sz);
3205 
3206 		if (no_debug)
3207 			return (DCMD_OK);
3208 
3209 		mdb_printf("%16s %4s %16d %16llx\n",
3210 		    "", "", vs.vs_thread, vs.vs_timestamp);
3211 
3212 		mdb_inc_indent(17);
3213 		for (i = 0; i < depth; i++) {
3214 			mdb_printf("%a\n", stk[i]);
3215 		}
3216 		mdb_dec_indent(17);
3217 		mdb_printf("\n");
3218 	} else {
3219 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3220 		    vs.vs_start, size? sz : vs.vs_end);
3221 
3222 		if (no_debug) {
3223 			mdb_printf("\n");
3224 			return (DCMD_OK);
3225 		}
3226 
3227 		for (i = 0; i < depth; i++) {
3228 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3229 			    c, sizeof (c), &sym) == -1)
3230 				continue;
3231 			if (is_umem_sym(c, "vmem_"))
3232 				continue;
3233 			break;
3234 		}
3235 		mdb_printf(" %a\n", stk[i]);
3236 	}
3237 	return (DCMD_OK);
3238 }
3239 
3240 /*ARGSUSED*/
3241 static int
3242 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3243 {
3244 	char name[UMEM_CACHE_NAMELEN + 1];
3245 	hrtime_t delta;
3246 	int i, depth;
3247 
3248 	if (bcp->bc_timestamp == 0)
3249 		return (WALK_DONE);
3250 
3251 	if (*newest == 0)
3252 		*newest = bcp->bc_timestamp;
3253 
3254 	delta = *newest - bcp->bc_timestamp;
3255 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3256 
3257 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3258 	    &bcp->bc_cache->cache_name) <= 0)
3259 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3260 
3261 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3262 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3263 
3264 	for (i = 0; i < depth; i++)
3265 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3266 
3267 	return (WALK_NEXT);
3268 }
3269 
3270 int
3271 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3272 {
3273 	const char *logname = "umem_transaction_log";
3274 	hrtime_t newest = 0;
3275 
3276 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3277 		return (DCMD_USAGE);
3278 
3279 	if (argc > 0) {
3280 		if (argv->a_type != MDB_TYPE_STRING)
3281 			return (DCMD_USAGE);
3282 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3283 			logname = "umem_failure_log";
3284 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3285 			logname = "umem_slab_log";
3286 		else
3287 			return (DCMD_USAGE);
3288 	}
3289 
3290 	if (umem_readvar(&addr, logname) == -1) {
3291 		mdb_warn("failed to read %s log header pointer");
3292 		return (DCMD_ERR);
3293 	}
3294 
3295 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3296 		mdb_warn("failed to walk umem log");
3297 		return (DCMD_ERR);
3298 	}
3299 
3300 	return (DCMD_OK);
3301 }
3302 
3303 /*
3304  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3305  * The first piece is a structure which we use to accumulate umem_cache_t
3306  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3307  * walker; we either add all caches, or ones named explicitly as arguments.
3308  */
3309 
3310 typedef struct umclist {
3311 	const char *umc_name;			/* Name to match (or NULL) */
3312 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3313 	int umc_nelems;				/* Num entries in umc_caches */
3314 	int umc_size;				/* Size of umc_caches array */
3315 } umclist_t;
3316 
3317 static int
3318 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3319 {
3320 	void *p;
3321 	int s;
3322 
3323 	if (umc->umc_name == NULL ||
3324 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3325 		/*
3326 		 * If we have a match, grow our array (if necessary), and then
3327 		 * add the virtual address of the matching cache to our list.
3328 		 */
3329 		if (umc->umc_nelems >= umc->umc_size) {
3330 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3331 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3332 
3333 			bcopy(umc->umc_caches, p,
3334 			    sizeof (uintptr_t) * umc->umc_size);
3335 
3336 			umc->umc_caches = p;
3337 			umc->umc_size = s;
3338 		}
3339 
3340 		umc->umc_caches[umc->umc_nelems++] = addr;
3341 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3342 	}
3343 
3344 	return (WALK_NEXT);
3345 }
3346 
3347 /*
3348  * The second piece of ::umausers is a hash table of allocations.  Each
3349  * allocation owner is identified by its stack trace and data_size.  We then
3350  * track the total bytes of all such allocations, and the number of allocations
3351  * to report at the end.  Once we have a list of caches, we walk through the
3352  * allocated bufctls of each, and update our hash table accordingly.
3353  */
3354 
3355 typedef struct umowner {
3356 	struct umowner *umo_head;		/* First hash elt in bucket */
3357 	struct umowner *umo_next;		/* Next hash elt in chain */
3358 	size_t umo_signature;			/* Hash table signature */
3359 	uint_t umo_num;				/* Number of allocations */
3360 	size_t umo_data_size;			/* Size of each allocation */
3361 	size_t umo_total_size;			/* Total bytes of allocation */
3362 	int umo_depth;				/* Depth of stack trace */
3363 	uintptr_t *umo_stack;			/* Stack trace */
3364 } umowner_t;
3365 
3366 typedef struct umusers {
3367 	const umem_cache_t *umu_cache;		/* Current umem cache */
3368 	umowner_t *umu_hash;			/* Hash table of owners */
3369 	uintptr_t *umu_stacks;			/* stacks for owners */
3370 	int umu_nelems;				/* Number of entries in use */
3371 	int umu_size;				/* Total number of entries */
3372 } umusers_t;
3373 
3374 static void
3375 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3376     size_t size, size_t data_size)
3377 {
3378 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3379 	size_t bucket, signature = data_size;
3380 	umowner_t *umo, *umoend;
3381 
3382 	/*
3383 	 * If the hash table is full, double its size and rehash everything.
3384 	 */
3385 	if (umu->umu_nelems >= umu->umu_size) {
3386 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3387 		size_t umowner_size = sizeof (umowner_t);
3388 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3389 		uintptr_t *new_stacks;
3390 
3391 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3392 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3393 
3394 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3395 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3396 		umu->umu_hash = umo;
3397 		umu->umu_stacks = new_stacks;
3398 		umu->umu_size = s;
3399 
3400 		umoend = umu->umu_hash + umu->umu_size;
3401 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3402 			umo->umo_head = NULL;
3403 			umo->umo_stack = &umu->umu_stacks[
3404 			    umem_stack_depth * (umo - umu->umu_hash)];
3405 		}
3406 
3407 		umoend = umu->umu_hash + umu->umu_nelems;
3408 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3409 			bucket = umo->umo_signature & (umu->umu_size - 1);
3410 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3411 			umu->umu_hash[bucket].umo_head = umo;
3412 		}
3413 	}
3414 
3415 	/*
3416 	 * Finish computing the hash signature from the stack trace, and then
3417 	 * see if the owner is in the hash table.  If so, update our stats.
3418 	 */
3419 	for (i = 0; i < depth; i++)
3420 		signature += bcp->bc_stack[i];
3421 
3422 	bucket = signature & (umu->umu_size - 1);
3423 
3424 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3425 		if (umo->umo_signature == signature) {
3426 			size_t difference = 0;
3427 
3428 			difference |= umo->umo_data_size - data_size;
3429 			difference |= umo->umo_depth - depth;
3430 
3431 			for (i = 0; i < depth; i++) {
3432 				difference |= umo->umo_stack[i] -
3433 				    bcp->bc_stack[i];
3434 			}
3435 
3436 			if (difference == 0) {
3437 				umo->umo_total_size += size;
3438 				umo->umo_num++;
3439 				return;
3440 			}
3441 		}
3442 	}
3443 
3444 	/*
3445 	 * If the owner is not yet hashed, grab the next element and fill it
3446 	 * in based on the allocation information.
3447 	 */
3448 	umo = &umu->umu_hash[umu->umu_nelems++];
3449 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3450 	umu->umu_hash[bucket].umo_head = umo;
3451 
3452 	umo->umo_signature = signature;
3453 	umo->umo_num = 1;
3454 	umo->umo_data_size = data_size;
3455 	umo->umo_total_size = size;
3456 	umo->umo_depth = depth;
3457 
3458 	for (i = 0; i < depth; i++)
3459 		umo->umo_stack[i] = bcp->bc_stack[i];
3460 }
3461 
3462 /*
3463  * When ::umausers is invoked without the -f flag, we simply update our hash
3464  * table with the information from each allocated bufctl.
3465  */
3466 /*ARGSUSED*/
3467 static int
3468 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3469 {
3470 	const umem_cache_t *cp = umu->umu_cache;
3471 
3472 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3473 	return (WALK_NEXT);
3474 }
3475 
3476 /*
3477  * When ::umausers is invoked with the -f flag, we print out the information
3478  * for each bufctl as well as updating the hash table.
3479  */
3480 static int
3481 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3482 {
3483 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3484 	const umem_cache_t *cp = umu->umu_cache;
3485 
3486 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3487 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3488 
3489 	for (i = 0; i < depth; i++)
3490 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3491 
3492 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3493 	return (WALK_NEXT);
3494 }
3495 
3496 /*
3497  * We sort our results by allocation size before printing them.
3498  */
3499 static int
3500 umownercmp(const void *lp, const void *rp)
3501 {
3502 	const umowner_t *lhs = lp;
3503 	const umowner_t *rhs = rp;
3504 
3505 	return (rhs->umo_total_size - lhs->umo_total_size);
3506 }
3507 
3508 /*
3509  * The main engine of ::umausers is relatively straightforward: First we
3510  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3511  * iterate over the allocated bufctls of each cache in the list.  Finally,
3512  * we sort and print our results.
3513  */
3514 /*ARGSUSED*/
3515 int
3516 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3517 {
3518 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3519 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3520 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3521 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3522 	int opt_e = FALSE;		/* Include "small" users */
3523 	int opt_f = FALSE;		/* Print stack traces */
3524 
3525 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3526 	umowner_t *umo, *umoend;
3527 	int i, oelems;
3528 
3529 	umclist_t umc;
3530 	umusers_t umu;
3531 
3532 	if (flags & DCMD_ADDRSPEC)
3533 		return (DCMD_USAGE);
3534 
3535 	bzero(&umc, sizeof (umc));
3536 	bzero(&umu, sizeof (umu));
3537 
3538 	while ((i = mdb_getopts(argc, argv,
3539 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3540 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3541 
3542 		argv += i;	/* skip past options we just processed */
3543 		argc -= i;	/* adjust argc */
3544 
3545 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3546 			return (DCMD_USAGE);
3547 
3548 		oelems = umc.umc_nelems;
3549 		umc.umc_name = argv->a_un.a_str;
3550 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3551 
3552 		if (umc.umc_nelems == oelems) {
3553 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3554 			return (DCMD_ERR);
3555 		}
3556 
3557 		do_all_caches = 0;
3558 		argv++;
3559 		argc--;
3560 	}
3561 
3562 	if (opt_e)
3563 		mem_threshold = cnt_threshold = 0;
3564 
3565 	if (opt_f)
3566 		callback = (mdb_walk_cb_t)umause2;
3567 
3568 	if (do_all_caches) {
3569 		umc.umc_name = NULL; /* match all cache names */
3570 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3571 	}
3572 
3573 	for (i = 0; i < umc.umc_nelems; i++) {
3574 		uintptr_t cp = umc.umc_caches[i];
3575 		umem_cache_t c;
3576 
3577 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3578 			mdb_warn("failed to read cache at %p", cp);
3579 			continue;
3580 		}
3581 
3582 		if (!(c.cache_flags & UMF_AUDIT)) {
3583 			if (!do_all_caches) {
3584 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3585 				    c.cache_name);
3586 			}
3587 			continue;
3588 		}
3589 
3590 		umu.umu_cache = &c;
3591 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3592 		audited_caches++;
3593 	}
3594 
3595 	if (audited_caches == 0 && do_all_caches) {
3596 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3597 		return (DCMD_ERR);
3598 	}
3599 
3600 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3601 	umoend = umu.umu_hash + umu.umu_nelems;
3602 
3603 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3604 		if (umo->umo_total_size < mem_threshold &&
3605 		    umo->umo_num < cnt_threshold)
3606 			continue;
3607 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3608 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3609 		for (i = 0; i < umo->umo_depth; i++)
3610 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3611 	}
3612 
3613 	return (DCMD_OK);
3614 }
3615 
3616 struct malloc_data {
3617 	uint32_t malloc_size;
3618 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3619 };
3620 
3621 #ifdef _LP64
3622 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3623 #else
3624 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3625 #endif
3626 
3627 typedef struct umem_malloc_info {
3628 	size_t um_total;	/* total allocated buffers */
3629 	size_t um_malloc;	/* malloc buffers */
3630 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3631 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3632 
3633 	umem_cache_t *um_cp;
3634 
3635 	uint_t *um_bucket;
3636 } umem_malloc_info_t;
3637 
3638 static const int *
3639 dist_linear(int buckets, int beg, int end)
3640 {
3641 	int *out = mdb_alloc((buckets + 1) * sizeof (*out), UM_SLEEP | UM_GC);
3642 	int pos;
3643 	int dist = end - beg + 1;
3644 
3645 	for (pos = 0; pos < buckets; pos++)
3646 		out[pos] = beg + (pos * dist)/buckets;
3647 	out[buckets] = end + 1;
3648 
3649 	return (out);
3650 }
3651 
3652 /*
3653  * We want the bins to be a constant ratio:
3654  *
3655  *	b_0	  = beg;
3656  *	b_idx	  = b_{idx-1} * r;
3657  *	b_buckets = end + 1;
3658  *
3659  * That is:
3660  *
3661  *	       buckets
3662  *	beg * r        = end
3663  *
3664  * Which reduces to:
3665  *
3666  *		  buckets ___________________
3667  *	      r = -------/ ((end + 1) / beg)
3668  *
3669  *		  log ((end + 1) / beg)
3670  *	  log r = ---------------------
3671  *		         buckets
3672  *
3673  *		   (log ((end + 1) / beg)) / buckets
3674  *	      r = e
3675  */
3676 static const int *
3677 dist_geometric(int buckets, int beg, int end, int minbucketsize)
3678 {
3679 #ifdef	_KMDB
3680 	return (dist_linear(buckets, beg, end));
3681 #else
3682 	int *out = mdb_alloc((buckets + 1) * sizeof (*out), UM_SLEEP | UM_GC);
3683 
3684 	extern double log(double);
3685 	extern double exp(double);
3686 
3687 	double r;
3688 	double b;
3689 	int idx = 0;
3690 	int last;
3691 	int begzero;
3692 
3693 	if (minbucketsize == 0)
3694 		minbucketsize = 1;
3695 
3696 	if (buckets == 1) {
3697 		out[0] = beg;
3698 		out[1] = end + 1;
3699 		return (out);
3700 	}
3701 
3702 	begzero = (beg == 0);
3703 	if (begzero)
3704 		beg = 1;
3705 
3706 	r = exp(log((double)(end + 1) / beg) / buckets);
3707 
3708 	/*
3709 	 * We've now computed r, using the previously derived formula.  We
3710 	 * now need to generate the array of bucket bounds.  There are
3711 	 * two major variables:
3712 	 *
3713 	 *	b	holds b_idx, the current index, as a double.
3714 	 *	last	holds the integer which goes into out[idx]
3715 	 *
3716 	 * Our job is to transform the smooth function b_idx, defined
3717 	 * above, into integer-sized buckets, with a specified minimum
3718 	 * bucket size.  Since b_idx is an exponentially growing function,
3719 	 * any inadequate buckets must be at the beginning.  To deal
3720 	 * with this, we make buckets of minimum size until b catches up
3721 	 * with last.
3722 	 *
3723 	 * A final wrinkle is that beg *can* be zero.  We compute r and b
3724 	 * as if beg was 1, then start last as 0.  This can lead to a bit
3725 	 * of oddness around the 0 bucket, but it's mostly reasonable.
3726 	 */
3727 
3728 	b = last = beg;
3729 	if (begzero)
3730 		last = 0;
3731 
3732 	for (idx = 0; idx < buckets; idx++) {
3733 		int next;
3734 
3735 		out[idx] = last;
3736 
3737 		b *= r;
3738 		next = (int)b;
3739 
3740 		if (next > last + minbucketsize - 1)
3741 			last = next;
3742 		else
3743 			last += minbucketsize;
3744 	}
3745 	out[buckets] = end + 1;
3746 
3747 	return (out);
3748 #endif
3749 }
3750 
3751 #define	NCHARS	50
3752 static void
3753 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3754     size_t maxbuckets, size_t minbucketsize, int geometric)
3755 {
3756 	size_t um_malloc;
3757 	int minb = -1;
3758 	int maxb = -1;
3759 	int buckets;
3760 	int nbucks;
3761 	int i;
3762 	int n;
3763 	int b;
3764 	const char *dist = " Distribution ";
3765 	char dashes[NCHARS + 1];
3766 	const int *distarray;
3767 
3768 	minb = (int)minmalloc;
3769 	maxb = (int)maxmalloc;
3770 
3771 	nbucks = buckets = maxb - minb + 1;
3772 
3773 	um_malloc = 0;
3774 	for (b = minb; b <= maxb; b++)
3775 		um_malloc += um_bucket[b];
3776 	if (um_malloc == 0)
3777 		um_malloc = 1;			/* avoid divide-by-zero */
3778 
3779 	if (maxbuckets != 0)
3780 		buckets = MIN(buckets, maxbuckets);
3781 
3782 	if (minbucketsize > 1) {
3783 		buckets = MIN(buckets, nbucks/minbucketsize);
3784 		if (buckets == 0) {
3785 			buckets = 1;
3786 			minbucketsize = nbucks;
3787 		}
3788 	}
3789 
3790 
3791 	if (geometric)
3792 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3793 	else
3794 		distarray = dist_linear(buckets, minb, maxb);
3795 
3796 	n = (NCHARS - strlen(dist)) / 2;
3797 	(void) memset(dashes, '-', n);
3798 	dashes[n] = 0;
3799 
3800 	mdb_printf("%11s  %s%s%s %s\n",
3801 	    "malloc size", dashes, dist, dashes, "count");
3802 
3803 	for (i = 0; i < buckets; i++) {
3804 		int bb = distarray[i];
3805 		int be = distarray[i+1] - 1;
3806 		uint64_t amount = 0;
3807 
3808 		int nats;
3809 		char ats[NCHARS + 1], spaces[NCHARS + 1];
3810 		char range[40];
3811 
3812 		for (b = bb; b <= be; b++)
3813 			amount += um_bucket[b];
3814 
3815 		nats = (NCHARS * amount)/um_malloc;
3816 		(void) memset(ats, '@', nats);
3817 		ats[nats] = 0;
3818 		(void) memset(spaces, ' ', NCHARS - nats);
3819 		spaces[NCHARS - nats] = 0;
3820 
3821 		if (bb == be)
3822 			mdb_snprintf(range, sizeof (range), "%d", bb);
3823 		else
3824 			mdb_snprintf(range, sizeof (range), "%d-%d", bb, be);
3825 		mdb_printf("%11s |%s%s %lld\n", range, ats, spaces, amount);
3826 	}
3827 	mdb_printf("\n");
3828 }
3829 #undef NCHARS
3830 
3831 /*
3832  * A malloc()ed buffer looks like:
3833  *
3834  *	<----------- mi.malloc_size --->
3835  *	<----------- cp.cache_bufsize ------------------>
3836  *	<----------- cp.cache_chunksize -------------------------------->
3837  *	+-------+-----------------------+---------------+---------------+
3838  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3839  *	+-------+---------------------------------------+---------------+
3840  *		<-- usable space ------>
3841  *
3842  * mallocsz is the argument to malloc(3C).
3843  * mi.malloc_size is the actual size passed to umem_alloc(), which
3844  * is rounded up to the smallest available cache size, which is
3845  * cache_bufsize.  If there is debugging or alignment overhead in
3846  * the cache, that is reflected in a larger cache_chunksize.
3847  *
3848  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3849  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3850  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3851  * the tag has 8 bytes of padding before it.
3852  *
3853  * 32-byte, 64-byte buffers <= 8 bytes:
3854  *	+-------+-------+--------- ...
3855  *	|/size//|/stat//| mallocsz ...
3856  *	+-------+-------+--------- ...
3857  *			^
3858  *			pointer returned from malloc(3C)
3859  *
3860  * 64-byte buffers > 8 bytes:
3861  *	+---------------+-------+-------+--------- ...
3862  *	|/padding///////|/size//|/stat//| mallocsz ...
3863  *	+---------------+-------+-------+--------- ...
3864  *					^
3865  *					pointer returned from malloc(3C)
3866  *
3867  * The "size" field is "malloc_size", which is mallocsz + the padding.
3868  * The "stat" field is derived from malloc_size, and functions as a
3869  * validation that this buffer is actually from malloc(3C).
3870  */
3871 /*ARGSUSED*/
3872 static int
3873 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3874 {
3875 	struct malloc_data md;
3876 	size_t m_addr = addr;
3877 	size_t overhead = sizeof (md);
3878 	size_t mallocsz;
3879 
3880 	ump->um_total++;
3881 
3882 #ifdef _LP64
3883 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3884 		m_addr += overhead;
3885 		overhead += sizeof (md);
3886 	}
3887 #endif
3888 
3889 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3890 		mdb_warn("unable to read malloc header at %p", m_addr);
3891 		return (WALK_NEXT);
3892 	}
3893 
3894 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3895 	case MALLOC_MAGIC:
3896 #ifdef _LP64
3897 	case MALLOC_SECOND_MAGIC:
3898 #endif
3899 		mallocsz = md.malloc_size - overhead;
3900 
3901 		ump->um_malloc++;
3902 		ump->um_malloc_size += mallocsz;
3903 		ump->um_malloc_overhead += overhead;
3904 
3905 		/* include round-off and debug overhead */
3906 		ump->um_malloc_overhead +=
3907 		    ump->um_cp->cache_chunksize - md.malloc_size;
3908 
3909 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
3910 			ump->um_bucket[mallocsz]++;
3911 
3912 		break;
3913 	default:
3914 		break;
3915 	}
3916 
3917 	return (WALK_NEXT);
3918 }
3919 
3920 int
3921 get_umem_alloc_sizes(int **out, size_t *out_num)
3922 {
3923 	GElf_Sym sym;
3924 
3925 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
3926 		mdb_warn("unable to look up umem_alloc_sizes");
3927 		return (-1);
3928 	}
3929 
3930 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
3931 	*out_num = sym.st_size / sizeof (int);
3932 
3933 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
3934 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
3935 		*out = NULL;
3936 		return (-1);
3937 	}
3938 
3939 	return (0);
3940 }
3941 
3942 
3943 static int
3944 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
3945 {
3946 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
3947 		return (WALK_NEXT);
3948 
3949 	ump->um_cp = cp;
3950 
3951 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
3952 	    -1) {
3953 		mdb_warn("can't walk 'umem' for cache %p", addr);
3954 		return (WALK_ERR);
3955 	}
3956 
3957 	return (WALK_NEXT);
3958 }
3959 
3960 void
3961 umem_malloc_dist_help(void)
3962 {
3963 	mdb_printf("%s\n",
3964 	    "report distribution of outstanding malloc()s");
3965 	mdb_dec_indent(2);
3966 	mdb_printf("%<b>OPTIONS%</b>\n");
3967 	mdb_inc_indent(2);
3968 	mdb_printf("%s",
3969 "  -b maxbins\n"
3970 "        Use at most maxbins bins for the data\n"
3971 "  -B minbinsize\n"
3972 "        Make the bins at least minbinsize bytes apart\n"
3973 "  -d    dump the raw data out, without binning\n"
3974 "  -g    use geometric binning instead of linear binning\n");
3975 }
3976 
3977 /*ARGSUSED*/
3978 int
3979 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3980 {
3981 	umem_malloc_info_t mi;
3982 	uint_t geometric = 0;
3983 	uint_t dump = 0;
3984 	size_t maxbuckets = 0;
3985 	size_t minbucketsize = 0;
3986 
3987 	size_t minalloc = 0;
3988 	size_t maxalloc = UMI_MAX_BUCKET;
3989 
3990 	if (flags & DCMD_ADDRSPEC)
3991 		return (DCMD_USAGE);
3992 
3993 	if (mdb_getopts(argc, argv,
3994 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3995 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3996 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3997 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3998 	    0) != argc)
3999 		return (DCMD_USAGE);
4000 
4001 	bzero(&mi, sizeof (mi));
4002 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4003 	    UM_SLEEP | UM_GC);
4004 
4005 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4006 	    &mi) == -1) {
4007 		mdb_warn("unable to walk 'umem_cache'");
4008 		return (DCMD_ERR);
4009 	}
4010 
4011 	if (dump) {
4012 		int i;
4013 		for (i = minalloc; i <= maxalloc; i++)
4014 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4015 
4016 		return (DCMD_OK);
4017 	}
4018 
4019 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4020 	    maxbuckets, minbucketsize, geometric);
4021 
4022 	return (DCMD_OK);
4023 }
4024 
4025 void
4026 umem_malloc_info_help(void)
4027 {
4028 	mdb_printf("%s\n",
4029 	    "report information about malloc()s by cache.  ");
4030 	mdb_dec_indent(2);
4031 	mdb_printf("%<b>OPTIONS%</b>\n");
4032 	mdb_inc_indent(2);
4033 	mdb_printf("%s",
4034 "  -b maxbins\n"
4035 "        Use at most maxbins bins for the data\n"
4036 "  -B minbinsize\n"
4037 "        Make the bins at least minbinsize bytes apart\n"
4038 "  -d    dump the raw distribution data without binning\n"
4039 #ifndef _KMDB
4040 "  -g    use geometric binning instead of linear binning\n"
4041 #endif
4042 	    "");
4043 }
4044 int
4045 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4046 {
4047 	umem_cache_t c;
4048 	umem_malloc_info_t mi;
4049 
4050 	int skip = 0;
4051 
4052 	size_t maxmalloc;
4053 	size_t overhead;
4054 	size_t allocated;
4055 	size_t avg_malloc;
4056 	size_t overhead_pct;	/* 1000 * overhead_percent */
4057 
4058 	uint_t verbose = 0;
4059 	uint_t dump = 0;
4060 	uint_t geometric = 0;
4061 	size_t maxbuckets = 0;
4062 	size_t minbucketsize = 0;
4063 
4064 	int *alloc_sizes;
4065 	int idx;
4066 	size_t num;
4067 	size_t minmalloc;
4068 
4069 	if (mdb_getopts(argc, argv,
4070 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
4071 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
4072 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
4073 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
4074 	    0) != argc)
4075 		return (DCMD_USAGE);
4076 
4077 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4078 		verbose = 1;
4079 
4080 	if (!(flags & DCMD_ADDRSPEC)) {
4081 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4082 		    argc, argv) == -1) {
4083 			mdb_warn("can't walk umem_cache");
4084 			return (DCMD_ERR);
4085 		}
4086 		return (DCMD_OK);
4087 	}
4088 
4089 	if (!mdb_vread(&c, sizeof (c), addr)) {
4090 		mdb_warn("unable to read cache at %p", addr);
4091 		return (DCMD_ERR);
4092 	}
4093 
4094 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4095 		if (!(flags & DCMD_LOOP))
4096 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4097 			    "by malloc()\n", c.cache_name);
4098 		skip = 1;
4099 	}
4100 
4101 	/*
4102 	 * normally, print the header only the first time.  In verbose mode,
4103 	 * print the header on every non-skipped buffer
4104 	 */
4105 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4106 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4107 		    "CACHE", "BUFSZ", "MAXMAL",
4108 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4109 
4110 	if (skip)
4111 		return (DCMD_OK);
4112 
4113 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4114 #ifdef _LP64
4115 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4116 		maxmalloc -= sizeof (struct malloc_data);
4117 #endif
4118 
4119 	bzero(&mi, sizeof (mi));
4120 	mi.um_cp = &c;
4121 	if (verbose)
4122 		mi.um_bucket =
4123 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4124 		    UM_SLEEP | UM_GC);
4125 
4126 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4127 	    -1) {
4128 		mdb_warn("can't walk 'umem'");
4129 		return (DCMD_ERR);
4130 	}
4131 
4132 	overhead = mi.um_malloc_overhead;
4133 	allocated = mi.um_malloc_size;
4134 
4135 	/* do integer round off for the average */
4136 	if (mi.um_malloc != 0)
4137 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4138 	else
4139 		avg_malloc = 0;
4140 
4141 	/*
4142 	 * include per-slab overhead
4143 	 *
4144 	 * Each slab in a given cache is the same size, and has the same
4145 	 * number of chunks in it;  we read in the first slab on the
4146 	 * slab list to get the number of chunks for all slabs.  To
4147 	 * compute the per-slab overhead, we just subtract the chunk usage
4148 	 * from the slabsize:
4149 	 *
4150 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4151 	 * |////////////|	|	| ...	|	|///////|///////|
4152 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4153 	 * |////////////|	|	| ...	|	|///////|///////|
4154 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4155 	 * |		\_______chunksize * chunks_____/		|
4156 	 * \__________________________slabsize__________________________/
4157 	 *
4158 	 * For UMF_HASH caches, there is an additional source of overhead;
4159 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4160 	 * include those in our per-slab overhead.
4161 	 *
4162 	 * Once we have a number for the per-slab overhead, we estimate
4163 	 * the actual overhead by treating the malloc()ed buffers as if
4164 	 * they were densely packed:
4165 	 *
4166 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4167 	 *
4168 	 * carefully ordering the multiply before the divide, to avoid
4169 	 * round-off error.
4170 	 */
4171 	if (mi.um_malloc != 0) {
4172 		umem_slab_t slab;
4173 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4174 
4175 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4176 			mdb_warn("unable to read slab at %p\n", saddr);
4177 		} else {
4178 			long chunks = slab.slab_chunks;
4179 			if (chunks != 0 && c.cache_chunksize != 0 &&
4180 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4181 				uintmax_t perslab =
4182 				    c.cache_slabsize -
4183 				    (c.cache_chunksize * chunks);
4184 
4185 				if (c.cache_flags & UMF_HASH) {
4186 					perslab += sizeof (umem_slab_t) +
4187 					    chunks *
4188 					    ((c.cache_flags & UMF_AUDIT) ?
4189 					    sizeof (umem_bufctl_audit_t) :
4190 					    sizeof (umem_bufctl_t));
4191 				}
4192 				overhead +=
4193 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4194 			} else {
4195 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4196 				    chunks, saddr);
4197 			}
4198 		}
4199 	}
4200 
4201 	if (allocated != 0)
4202 		overhead_pct = (1000ULL * overhead) / allocated;
4203 	else
4204 		overhead_pct = 0;
4205 
4206 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4207 	    addr, c.cache_bufsize, maxmalloc,
4208 	    mi.um_malloc, avg_malloc, allocated, overhead,
4209 	    overhead_pct / 10, overhead_pct % 10);
4210 
4211 	if (!verbose)
4212 		return (DCMD_OK);
4213 
4214 	if (!dump)
4215 		mdb_printf("\n");
4216 
4217 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4218 		return (DCMD_ERR);
4219 
4220 	for (idx = 0; idx < num; idx++) {
4221 		if (alloc_sizes[idx] == c.cache_bufsize)
4222 			break;
4223 		if (alloc_sizes[idx] == 0) {
4224 			idx = num;	/* 0-terminated array */
4225 			break;
4226 		}
4227 	}
4228 	if (idx == num) {
4229 		mdb_warn(
4230 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4231 		    addr, c.cache_bufsize);
4232 		return (DCMD_ERR);
4233 	}
4234 
4235 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4236 	if (minmalloc > 0) {
4237 #ifdef _LP64
4238 		if (minmalloc > UMEM_SECOND_ALIGN)
4239 			minmalloc -= sizeof (struct malloc_data);
4240 #endif
4241 		minmalloc -= sizeof (struct malloc_data);
4242 		minmalloc += 1;
4243 	}
4244 
4245 	if (dump) {
4246 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4247 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4248 		mdb_printf("\n");
4249 	} else {
4250 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4251 		    maxbuckets, minbucketsize, geometric);
4252 	}
4253 
4254 	return (DCMD_OK);
4255 }
4256