xref: /titanic_51/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision 9853d9e82e7a067a2b88dae2fd257207e6be5f94)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "umem.h"
29 
30 #include <sys/vmem_impl_user.h>
31 #include <umem_impl.h>
32 
33 #include <alloca.h>
34 #include <limits.h>
35 
36 #include "misc.h"
37 #include "leaky.h"
38 #include "dist.h"
39 
40 #include "umem_pagesize.h"
41 
42 #define	UM_ALLOCATED		0x1
43 #define	UM_FREE			0x2
44 #define	UM_BUFCTL		0x4
45 #define	UM_HASH			0x8
46 
47 int umem_ready;
48 
49 static int umem_stack_depth_warned;
50 static uint32_t umem_max_ncpus;
51 uint32_t umem_stack_depth;
52 
53 size_t umem_pagesize;
54 
55 #define	UMEM_READVAR(var)				\
56 	(umem_readvar(&(var), #var) == -1 &&		\
57 	    (mdb_warn("failed to read "#var), 1))
58 
59 int
60 umem_update_variables(void)
61 {
62 	size_t pagesize;
63 
64 	/*
65 	 * Figure out which type of umem is being used; if it's not there
66 	 * yet, succeed quietly.
67 	 */
68 	if (umem_set_standalone() == -1) {
69 		umem_ready = 0;
70 		return (0);		/* umem not there yet */
71 	}
72 
73 	/*
74 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
75 	 * cheap backwards compatibility to check for both names.
76 	 */
77 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
78 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
79 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
80 		return (-1);
81 	}
82 	if (UMEM_READVAR(umem_ready))
83 		return (-1);
84 	if (UMEM_READVAR(umem_stack_depth))
85 		return (-1);
86 	if (UMEM_READVAR(pagesize))
87 		return (-1);
88 
89 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
90 		if (umem_stack_depth_warned == 0) {
91 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
92 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
93 			umem_stack_depth_warned = 1;
94 		}
95 		umem_stack_depth = 0;
96 	}
97 
98 	umem_pagesize = pagesize;
99 
100 	return (0);
101 }
102 
103 /*ARGSUSED*/
104 static int
105 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
106 {
107 	mdb_walker_t w;
108 	char descr[64];
109 
110 	(void) mdb_snprintf(descr, sizeof (descr),
111 	    "walk the %s cache", c->cache_name);
112 
113 	w.walk_name = c->cache_name;
114 	w.walk_descr = descr;
115 	w.walk_init = umem_walk_init;
116 	w.walk_step = umem_walk_step;
117 	w.walk_fini = umem_walk_fini;
118 	w.walk_init_arg = (void *)addr;
119 
120 	if (mdb_add_walker(&w) == -1)
121 		mdb_warn("failed to add %s walker", c->cache_name);
122 
123 	return (WALK_NEXT);
124 }
125 
126 /*ARGSUSED*/
127 static void
128 umem_statechange_cb(void *arg)
129 {
130 	static int been_ready = 0;
131 
132 #ifndef _KMDB
133 	leaky_cleanup(1);	/* state changes invalidate leaky state */
134 #endif
135 
136 	if (umem_update_variables() == -1)
137 		return;
138 
139 	if (been_ready)
140 		return;
141 
142 	if (umem_ready != UMEM_READY)
143 		return;
144 
145 	been_ready = 1;
146 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL);
147 }
148 
149 int
150 umem_init(void)
151 {
152 	mdb_walker_t w = {
153 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
154 		umem_cache_walk_step, umem_cache_walk_fini
155 	};
156 
157 	if (mdb_add_walker(&w) == -1) {
158 		mdb_warn("failed to add umem_cache walker");
159 		return (-1);
160 	}
161 
162 	if (umem_update_variables() == -1)
163 		return (-1);
164 
165 	/* install a callback so that our variables are always up-to-date */
166 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
167 	umem_statechange_cb(NULL);
168 
169 	return (0);
170 }
171 
172 int
173 umem_abort_messages(void)
174 {
175 	char *umem_error_buffer;
176 	uint_t umem_error_begin;
177 	GElf_Sym sym;
178 	size_t bufsize;
179 
180 	if (UMEM_READVAR(umem_error_begin))
181 		return (DCMD_ERR);
182 
183 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
184 		mdb_warn("unable to look up umem_error_buffer");
185 		return (DCMD_ERR);
186 	}
187 
188 	bufsize = (size_t)sym.st_size;
189 
190 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
191 
192 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
193 	    != bufsize) {
194 		mdb_warn("unable to read umem_error_buffer");
195 		return (DCMD_ERR);
196 	}
197 	/* put a zero after the end of the buffer to simplify printing */
198 	umem_error_buffer[bufsize] = 0;
199 
200 	if ((umem_error_begin % bufsize) == 0)
201 		mdb_printf("%s\n", umem_error_buffer);
202 	else {
203 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
204 		mdb_printf("%s%s\n",
205 		    &umem_error_buffer[umem_error_begin % bufsize],
206 		    umem_error_buffer);
207 	}
208 
209 	return (DCMD_OK);
210 }
211 
212 static void
213 umem_log_status(const char *name, umem_log_header_t *val)
214 {
215 	umem_log_header_t my_lh;
216 	uintptr_t pos = (uintptr_t)val;
217 	size_t size;
218 
219 	if (pos == NULL)
220 		return;
221 
222 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
223 		mdb_warn("\nunable to read umem_%s_log pointer %p",
224 		    name, pos);
225 		return;
226 	}
227 
228 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
229 
230 	if (size % (1024 * 1024) == 0)
231 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
232 	else if (size % 1024 == 0)
233 		mdb_printf("%s=%dk ", name, size / 1024);
234 	else
235 		mdb_printf("%s=%d ", name, size);
236 }
237 
238 typedef struct umem_debug_flags {
239 	const char	*udf_name;
240 	uint_t		udf_flags;
241 	uint_t		udf_clear;	/* if 0, uses udf_flags */
242 } umem_debug_flags_t;
243 
244 umem_debug_flags_t umem_status_flags[] = {
245 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
246 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
247 	{ "audit",	UMF_AUDIT },
248 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
249 	{ "nosignal",	UMF_CHECKSIGNAL },
250 	{ "firewall",	UMF_FIREWALL },
251 	{ "lite",	UMF_LITE },
252 	{ NULL }
253 };
254 
255 /*ARGSUSED*/
256 int
257 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
258 {
259 	int umem_logging;
260 
261 	umem_log_header_t *umem_transaction_log;
262 	umem_log_header_t *umem_content_log;
263 	umem_log_header_t *umem_failure_log;
264 	umem_log_header_t *umem_slab_log;
265 
266 	mdb_printf("Status:\t\t%s\n",
267 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
268 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
269 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
270 	    umem_ready == UMEM_READY ? "ready and active" :
271 	    umem_ready == 0 ? "not loaded into address space" :
272 	    "unknown (umem_ready invalid)");
273 
274 	if (umem_ready == 0)
275 		return (DCMD_OK);
276 
277 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
278 
279 	if (UMEM_READVAR(umem_logging))
280 		goto err;
281 	if (UMEM_READVAR(umem_transaction_log))
282 		goto err;
283 	if (UMEM_READVAR(umem_content_log))
284 		goto err;
285 	if (UMEM_READVAR(umem_failure_log))
286 		goto err;
287 	if (UMEM_READVAR(umem_slab_log))
288 		goto err;
289 
290 	mdb_printf("Logs:\t\t");
291 	umem_log_status("transaction", umem_transaction_log);
292 	umem_log_status("content", umem_content_log);
293 	umem_log_status("fail", umem_failure_log);
294 	umem_log_status("slab", umem_slab_log);
295 	if (!umem_logging)
296 		mdb_printf("(inactive)");
297 	mdb_printf("\n");
298 
299 	mdb_printf("Message buffer:\n");
300 	return (umem_abort_messages());
301 
302 err:
303 	mdb_printf("Message buffer:\n");
304 	(void) umem_abort_messages();
305 	return (DCMD_ERR);
306 }
307 
308 typedef struct {
309 	uintptr_t ucw_first;
310 	uintptr_t ucw_current;
311 } umem_cache_walk_t;
312 
313 int
314 umem_cache_walk_init(mdb_walk_state_t *wsp)
315 {
316 	umem_cache_walk_t *ucw;
317 	umem_cache_t c;
318 	uintptr_t cp;
319 	GElf_Sym sym;
320 
321 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
322 		mdb_warn("couldn't find umem_null_cache");
323 		return (WALK_ERR);
324 	}
325 
326 	cp = (uintptr_t)sym.st_value;
327 
328 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
329 		mdb_warn("couldn't read cache at %p", cp);
330 		return (WALK_ERR);
331 	}
332 
333 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
334 
335 	ucw->ucw_first = cp;
336 	ucw->ucw_current = (uintptr_t)c.cache_next;
337 	wsp->walk_data = ucw;
338 
339 	return (WALK_NEXT);
340 }
341 
342 int
343 umem_cache_walk_step(mdb_walk_state_t *wsp)
344 {
345 	umem_cache_walk_t *ucw = wsp->walk_data;
346 	umem_cache_t c;
347 	int status;
348 
349 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
350 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
351 		return (WALK_DONE);
352 	}
353 
354 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
355 
356 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
357 		return (WALK_DONE);
358 
359 	return (status);
360 }
361 
362 void
363 umem_cache_walk_fini(mdb_walk_state_t *wsp)
364 {
365 	umem_cache_walk_t *ucw = wsp->walk_data;
366 	mdb_free(ucw, sizeof (umem_cache_walk_t));
367 }
368 
369 typedef struct {
370 	umem_cpu_t *ucw_cpus;
371 	uint32_t ucw_current;
372 	uint32_t ucw_max;
373 } umem_cpu_walk_state_t;
374 
375 int
376 umem_cpu_walk_init(mdb_walk_state_t *wsp)
377 {
378 	umem_cpu_t *umem_cpus;
379 
380 	umem_cpu_walk_state_t *ucw;
381 
382 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
383 		mdb_warn("failed to read 'umem_cpus'");
384 		return (WALK_ERR);
385 	}
386 
387 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
388 
389 	ucw->ucw_cpus = umem_cpus;
390 	ucw->ucw_current = 0;
391 	ucw->ucw_max = umem_max_ncpus;
392 
393 	wsp->walk_data = ucw;
394 	return (WALK_NEXT);
395 }
396 
397 int
398 umem_cpu_walk_step(mdb_walk_state_t *wsp)
399 {
400 	umem_cpu_t cpu;
401 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
402 
403 	uintptr_t caddr;
404 
405 	if (ucw->ucw_current >= ucw->ucw_max)
406 		return (WALK_DONE);
407 
408 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
409 
410 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
411 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
412 		return (WALK_ERR);
413 	}
414 
415 	ucw->ucw_current++;
416 
417 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
418 }
419 
420 void
421 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
422 {
423 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
424 
425 	mdb_free(ucw, sizeof (*ucw));
426 }
427 
428 int
429 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
430 {
431 	if (wsp->walk_addr == NULL) {
432 		mdb_warn("umem_cpu_cache doesn't support global walks");
433 		return (WALK_ERR);
434 	}
435 
436 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
437 		mdb_warn("couldn't walk 'umem_cpu'");
438 		return (WALK_ERR);
439 	}
440 
441 	wsp->walk_data = (void *)wsp->walk_addr;
442 
443 	return (WALK_NEXT);
444 }
445 
446 int
447 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
448 {
449 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
450 	const umem_cpu_t *cpu = wsp->walk_layer;
451 	umem_cpu_cache_t cc;
452 
453 	caddr += cpu->cpu_cache_offset;
454 
455 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
456 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
457 		return (WALK_ERR);
458 	}
459 
460 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
461 }
462 
463 int
464 umem_slab_walk_init(mdb_walk_state_t *wsp)
465 {
466 	uintptr_t caddr = wsp->walk_addr;
467 	umem_cache_t c;
468 
469 	if (caddr == NULL) {
470 		mdb_warn("umem_slab doesn't support global walks\n");
471 		return (WALK_ERR);
472 	}
473 
474 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
475 		mdb_warn("couldn't read umem_cache at %p", caddr);
476 		return (WALK_ERR);
477 	}
478 
479 	wsp->walk_data =
480 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
481 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
482 
483 	return (WALK_NEXT);
484 }
485 
486 int
487 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
488 {
489 	uintptr_t caddr = wsp->walk_addr;
490 	umem_cache_t c;
491 
492 	if (caddr == NULL) {
493 		mdb_warn("umem_slab_partial doesn't support global walks\n");
494 		return (WALK_ERR);
495 	}
496 
497 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
498 		mdb_warn("couldn't read umem_cache at %p", caddr);
499 		return (WALK_ERR);
500 	}
501 
502 	wsp->walk_data =
503 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
504 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
505 
506 	/*
507 	 * Some consumers (umem_walk_step(), in particular) require at
508 	 * least one callback if there are any buffers in the cache.  So
509 	 * if there are *no* partial slabs, report the last full slab, if
510 	 * any.
511 	 *
512 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
513 	 */
514 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
515 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
516 
517 	return (WALK_NEXT);
518 }
519 
520 int
521 umem_slab_walk_step(mdb_walk_state_t *wsp)
522 {
523 	umem_slab_t s;
524 	uintptr_t addr = wsp->walk_addr;
525 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
526 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
527 
528 	if (addr == saddr)
529 		return (WALK_DONE);
530 
531 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
532 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
533 		return (WALK_ERR);
534 	}
535 
536 	if ((uintptr_t)s.slab_cache != caddr) {
537 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
538 		    addr, caddr, s.slab_cache);
539 		return (WALK_ERR);
540 	}
541 
542 	wsp->walk_addr = (uintptr_t)s.slab_next;
543 
544 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
545 }
546 
547 int
548 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
549 {
550 	umem_cache_t c;
551 
552 	if (!(flags & DCMD_ADDRSPEC)) {
553 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
554 			mdb_warn("can't walk umem_cache");
555 			return (DCMD_ERR);
556 		}
557 		return (DCMD_OK);
558 	}
559 
560 	if (DCMD_HDRSPEC(flags))
561 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
562 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
563 
564 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
565 		mdb_warn("couldn't read umem_cache at %p", addr);
566 		return (DCMD_ERR);
567 	}
568 
569 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
570 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
571 
572 	return (DCMD_OK);
573 }
574 
575 static int
576 addrcmp(const void *lhs, const void *rhs)
577 {
578 	uintptr_t p1 = *((uintptr_t *)lhs);
579 	uintptr_t p2 = *((uintptr_t *)rhs);
580 
581 	if (p1 < p2)
582 		return (-1);
583 	if (p1 > p2)
584 		return (1);
585 	return (0);
586 }
587 
588 static int
589 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
590 {
591 	const umem_bufctl_audit_t *bcp1 = *lhs;
592 	const umem_bufctl_audit_t *bcp2 = *rhs;
593 
594 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
595 		return (-1);
596 
597 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
598 		return (1);
599 
600 	return (0);
601 }
602 
603 typedef struct umem_hash_walk {
604 	uintptr_t *umhw_table;
605 	size_t umhw_nelems;
606 	size_t umhw_pos;
607 	umem_bufctl_t umhw_cur;
608 } umem_hash_walk_t;
609 
610 int
611 umem_hash_walk_init(mdb_walk_state_t *wsp)
612 {
613 	umem_hash_walk_t *umhw;
614 	uintptr_t *hash;
615 	umem_cache_t c;
616 	uintptr_t haddr, addr = wsp->walk_addr;
617 	size_t nelems;
618 	size_t hsize;
619 
620 	if (addr == NULL) {
621 		mdb_warn("umem_hash doesn't support global walks\n");
622 		return (WALK_ERR);
623 	}
624 
625 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
626 		mdb_warn("couldn't read cache at addr %p", addr);
627 		return (WALK_ERR);
628 	}
629 
630 	if (!(c.cache_flags & UMF_HASH)) {
631 		mdb_warn("cache %p doesn't have a hash table\n", addr);
632 		return (WALK_DONE);		/* nothing to do */
633 	}
634 
635 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
636 	umhw->umhw_cur.bc_next = NULL;
637 	umhw->umhw_pos = 0;
638 
639 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
640 	hsize = nelems * sizeof (uintptr_t);
641 	haddr = (uintptr_t)c.cache_hash_table;
642 
643 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
644 	if (mdb_vread(hash, hsize, haddr) == -1) {
645 		mdb_warn("failed to read hash table at %p", haddr);
646 		mdb_free(hash, hsize);
647 		mdb_free(umhw, sizeof (umem_hash_walk_t));
648 		return (WALK_ERR);
649 	}
650 
651 	wsp->walk_data = umhw;
652 
653 	return (WALK_NEXT);
654 }
655 
656 int
657 umem_hash_walk_step(mdb_walk_state_t *wsp)
658 {
659 	umem_hash_walk_t *umhw = wsp->walk_data;
660 	uintptr_t addr = NULL;
661 
662 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
663 		while (umhw->umhw_pos < umhw->umhw_nelems) {
664 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
665 				break;
666 		}
667 	}
668 	if (addr == NULL)
669 		return (WALK_DONE);
670 
671 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
672 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
673 		return (WALK_ERR);
674 	}
675 
676 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
677 }
678 
679 void
680 umem_hash_walk_fini(mdb_walk_state_t *wsp)
681 {
682 	umem_hash_walk_t *umhw = wsp->walk_data;
683 
684 	if (umhw == NULL)
685 		return;
686 
687 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
688 	mdb_free(umhw, sizeof (umem_hash_walk_t));
689 }
690 
691 /*
692  * Find the address of the bufctl structure for the address 'buf' in cache
693  * 'cp', which is at address caddr, and place it in *out.
694  */
695 static int
696 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
697 {
698 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
699 	umem_bufctl_t *bcp;
700 	umem_bufctl_t bc;
701 
702 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
703 		mdb_warn("unable to read hash bucket for %p in cache %p",
704 		    buf, caddr);
705 		return (-1);
706 	}
707 
708 	while (bcp != NULL) {
709 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
710 		    (uintptr_t)bcp) == -1) {
711 			mdb_warn("unable to read bufctl at %p", bcp);
712 			return (-1);
713 		}
714 		if (bc.bc_addr == buf) {
715 			*out = (uintptr_t)bcp;
716 			return (0);
717 		}
718 		bcp = bc.bc_next;
719 	}
720 
721 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
722 	return (-1);
723 }
724 
725 int
726 umem_get_magsize(const umem_cache_t *cp)
727 {
728 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
729 	GElf_Sym mt_sym;
730 	umem_magtype_t mt;
731 	int res;
732 
733 	/*
734 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
735 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
736 	 * it is okay to return 0 for them.
737 	 */
738 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
739 	    (cp->cache_flags & UMF_NOMAGAZINE))
740 		return (res);
741 
742 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
743 		mdb_warn("unable to read 'umem_magtype'");
744 	} else if (addr < mt_sym.st_value ||
745 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
746 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
747 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
748 		    cp->cache_name, addr);
749 		return (0);
750 	}
751 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
752 		mdb_warn("unable to read magtype at %a", addr);
753 		return (0);
754 	}
755 	return (mt.mt_magsize);
756 }
757 
758 /*ARGSUSED*/
759 static int
760 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
761 {
762 	*est -= (sp->slab_chunks - sp->slab_refcnt);
763 
764 	return (WALK_NEXT);
765 }
766 
767 /*
768  * Returns an upper bound on the number of allocated buffers in a given
769  * cache.
770  */
771 size_t
772 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
773 {
774 	int magsize;
775 	size_t cache_est;
776 
777 	cache_est = cp->cache_buftotal;
778 
779 	(void) mdb_pwalk("umem_slab_partial",
780 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
781 
782 	if ((magsize = umem_get_magsize(cp)) != 0) {
783 		size_t mag_est = cp->cache_full.ml_total * magsize;
784 
785 		if (cache_est >= mag_est) {
786 			cache_est -= mag_est;
787 		} else {
788 			mdb_warn("cache %p's magazine layer holds more buffers "
789 			    "than the slab layer.\n", addr);
790 		}
791 	}
792 	return (cache_est);
793 }
794 
795 #define	READMAG_ROUNDS(rounds) { \
796 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
797 		mdb_warn("couldn't read magazine at %p", ump); \
798 		goto fail; \
799 	} \
800 	for (i = 0; i < rounds; i++) { \
801 		maglist[magcnt++] = mp->mag_round[i]; \
802 		if (magcnt == magmax) { \
803 			mdb_warn("%d magazines exceeds fudge factor\n", \
804 			    magcnt); \
805 			goto fail; \
806 		} \
807 	} \
808 }
809 
810 int
811 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
812     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
813 {
814 	umem_magazine_t *ump, *mp;
815 	void **maglist = NULL;
816 	int i, cpu;
817 	size_t magsize, magmax, magbsize;
818 	size_t magcnt = 0;
819 
820 	/*
821 	 * Read the magtype out of the cache, after verifying the pointer's
822 	 * correctness.
823 	 */
824 	magsize = umem_get_magsize(cp);
825 	if (magsize == 0) {
826 		*maglistp = NULL;
827 		*magcntp = 0;
828 		*magmaxp = 0;
829 		return (WALK_NEXT);
830 	}
831 
832 	/*
833 	 * There are several places where we need to go buffer hunting:
834 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
835 	 * and the full magazine list in the depot.
836 	 *
837 	 * For an upper bound on the number of buffers in the magazine
838 	 * layer, we have the number of magazines on the cache_full
839 	 * list plus at most two magazines per CPU (the loaded and the
840 	 * spare).  Toss in 100 magazines as a fudge factor in case this
841 	 * is live (the number "100" comes from the same fudge factor in
842 	 * crash(1M)).
843 	 */
844 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
845 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
846 
847 	if (magbsize >= PAGESIZE / 2) {
848 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
849 		    addr, magbsize);
850 		return (WALK_ERR);
851 	}
852 
853 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
854 	mp = mdb_alloc(magbsize, alloc_flags);
855 	if (mp == NULL || maglist == NULL)
856 		goto fail;
857 
858 	/*
859 	 * First up: the magazines in the depot (i.e. on the cache_full list).
860 	 */
861 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
862 		READMAG_ROUNDS(magsize);
863 		ump = mp->mag_next;
864 
865 		if (ump == cp->cache_full.ml_list)
866 			break; /* cache_full list loop detected */
867 	}
868 
869 	dprintf(("cache_full list done\n"));
870 
871 	/*
872 	 * Now whip through the CPUs, snagging the loaded magazines
873 	 * and full spares.
874 	 */
875 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
876 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
877 
878 		dprintf(("reading cpu cache %p\n",
879 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
880 
881 		if (ccp->cc_rounds > 0 &&
882 		    (ump = ccp->cc_loaded) != NULL) {
883 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
884 			READMAG_ROUNDS(ccp->cc_rounds);
885 		}
886 
887 		if (ccp->cc_prounds > 0 &&
888 		    (ump = ccp->cc_ploaded) != NULL) {
889 			dprintf(("reading %d previously loaded rounds\n",
890 			    ccp->cc_prounds));
891 			READMAG_ROUNDS(ccp->cc_prounds);
892 		}
893 	}
894 
895 	dprintf(("magazine layer: %d buffers\n", magcnt));
896 
897 	if (!(alloc_flags & UM_GC))
898 		mdb_free(mp, magbsize);
899 
900 	*maglistp = maglist;
901 	*magcntp = magcnt;
902 	*magmaxp = magmax;
903 
904 	return (WALK_NEXT);
905 
906 fail:
907 	if (!(alloc_flags & UM_GC)) {
908 		if (mp)
909 			mdb_free(mp, magbsize);
910 		if (maglist)
911 			mdb_free(maglist, magmax * sizeof (void *));
912 	}
913 	return (WALK_ERR);
914 }
915 
916 static int
917 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
918 {
919 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
920 }
921 
922 static int
923 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
924 {
925 	umem_bufctl_audit_t *b;
926 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
927 
928 	/*
929 	 * if UMF_AUDIT is not set, we know that we're looking at a
930 	 * umem_bufctl_t.
931 	 */
932 	if (!(cp->cache_flags & UMF_AUDIT) ||
933 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
934 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
935 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
936 			mdb_warn("unable to read bufctl at %p", buf);
937 			return (WALK_ERR);
938 		}
939 	}
940 
941 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
942 }
943 
944 typedef struct umem_walk {
945 	int umw_type;
946 
947 	int umw_addr;			/* cache address */
948 	umem_cache_t *umw_cp;
949 	size_t umw_csize;
950 
951 	/*
952 	 * magazine layer
953 	 */
954 	void **umw_maglist;
955 	size_t umw_max;
956 	size_t umw_count;
957 	size_t umw_pos;
958 
959 	/*
960 	 * slab layer
961 	 */
962 	char *umw_valid;	/* to keep track of freed buffers */
963 	char *umw_ubase;	/* buffer for slab data */
964 } umem_walk_t;
965 
966 static int
967 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
968 {
969 	umem_walk_t *umw;
970 	int csize;
971 	umem_cache_t *cp;
972 	size_t vm_quantum;
973 
974 	size_t magmax, magcnt;
975 	void **maglist = NULL;
976 	uint_t chunksize, slabsize;
977 	int status = WALK_ERR;
978 	uintptr_t addr = wsp->walk_addr;
979 	const char *layered;
980 
981 	type &= ~UM_HASH;
982 
983 	if (addr == NULL) {
984 		mdb_warn("umem walk doesn't support global walks\n");
985 		return (WALK_ERR);
986 	}
987 
988 	dprintf(("walking %p\n", addr));
989 
990 	/*
991 	 * The number of "cpus" determines how large the cache is.
992 	 */
993 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
994 	cp = mdb_alloc(csize, UM_SLEEP);
995 
996 	if (mdb_vread(cp, csize, addr) == -1) {
997 		mdb_warn("couldn't read cache at addr %p", addr);
998 		goto out2;
999 	}
1000 
1001 	/*
1002 	 * It's easy for someone to hand us an invalid cache address.
1003 	 * Unfortunately, it is hard for this walker to survive an
1004 	 * invalid cache cleanly.  So we make sure that:
1005 	 *
1006 	 *	1. the vmem arena for the cache is readable,
1007 	 *	2. the vmem arena's quantum is a power of 2,
1008 	 *	3. our slabsize is a multiple of the quantum, and
1009 	 *	4. our chunksize is >0 and less than our slabsize.
1010 	 */
1011 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1012 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1013 	    vm_quantum == 0 ||
1014 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1015 	    cp->cache_slabsize < vm_quantum ||
1016 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1017 	    cp->cache_chunksize == 0 ||
1018 	    cp->cache_chunksize > cp->cache_slabsize) {
1019 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1020 		goto out2;
1021 	}
1022 
1023 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1024 
1025 	if (cp->cache_buftotal == 0) {
1026 		mdb_free(cp, csize);
1027 		return (WALK_DONE);
1028 	}
1029 
1030 	/*
1031 	 * If they ask for bufctls, but it's a small-slab cache,
1032 	 * there is nothing to report.
1033 	 */
1034 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1035 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1036 		    cp->cache_flags));
1037 		mdb_free(cp, csize);
1038 		return (WALK_DONE);
1039 	}
1040 
1041 	/*
1042 	 * Read in the contents of the magazine layer
1043 	 */
1044 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax,
1045 	    UM_SLEEP) == WALK_ERR)
1046 		goto out2;
1047 
1048 	/*
1049 	 * We have all of the buffers from the magazines;  if we are walking
1050 	 * allocated buffers, sort them so we can bsearch them later.
1051 	 */
1052 	if (type & UM_ALLOCATED)
1053 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1054 
1055 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1056 
1057 	umw->umw_type = type;
1058 	umw->umw_addr = addr;
1059 	umw->umw_cp = cp;
1060 	umw->umw_csize = csize;
1061 	umw->umw_maglist = maglist;
1062 	umw->umw_max = magmax;
1063 	umw->umw_count = magcnt;
1064 	umw->umw_pos = 0;
1065 
1066 	/*
1067 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1068 	 * hash table instead of the slab layer.
1069 	 */
1070 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1071 		layered = "umem_hash";
1072 
1073 		umw->umw_type |= UM_HASH;
1074 	} else {
1075 		/*
1076 		 * If we are walking freed buffers, we only need the
1077 		 * magazine layer plus the partially allocated slabs.
1078 		 * To walk allocated buffers, we need all of the slabs.
1079 		 */
1080 		if (type & UM_ALLOCATED)
1081 			layered = "umem_slab";
1082 		else
1083 			layered = "umem_slab_partial";
1084 
1085 		/*
1086 		 * for small-slab caches, we read in the entire slab.  For
1087 		 * freed buffers, we can just walk the freelist.  For
1088 		 * allocated buffers, we use a 'valid' array to track
1089 		 * the freed buffers.
1090 		 */
1091 		if (!(cp->cache_flags & UMF_HASH)) {
1092 			chunksize = cp->cache_chunksize;
1093 			slabsize = cp->cache_slabsize;
1094 
1095 			umw->umw_ubase = mdb_alloc(slabsize +
1096 			    sizeof (umem_bufctl_t), UM_SLEEP);
1097 
1098 			if (type & UM_ALLOCATED)
1099 				umw->umw_valid =
1100 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1101 		}
1102 	}
1103 
1104 	status = WALK_NEXT;
1105 
1106 	if (mdb_layered_walk(layered, wsp) == -1) {
1107 		mdb_warn("unable to start layered '%s' walk", layered);
1108 		status = WALK_ERR;
1109 	}
1110 
1111 out1:
1112 	if (status == WALK_ERR) {
1113 		if (umw->umw_valid)
1114 			mdb_free(umw->umw_valid, slabsize / chunksize);
1115 
1116 		if (umw->umw_ubase)
1117 			mdb_free(umw->umw_ubase, slabsize +
1118 			    sizeof (umem_bufctl_t));
1119 
1120 		if (umw->umw_maglist)
1121 			mdb_free(umw->umw_maglist, umw->umw_max *
1122 			    sizeof (uintptr_t));
1123 
1124 		mdb_free(umw, sizeof (umem_walk_t));
1125 		wsp->walk_data = NULL;
1126 	}
1127 
1128 out2:
1129 	if (status == WALK_ERR)
1130 		mdb_free(cp, csize);
1131 
1132 	return (status);
1133 }
1134 
1135 int
1136 umem_walk_step(mdb_walk_state_t *wsp)
1137 {
1138 	umem_walk_t *umw = wsp->walk_data;
1139 	int type = umw->umw_type;
1140 	umem_cache_t *cp = umw->umw_cp;
1141 
1142 	void **maglist = umw->umw_maglist;
1143 	int magcnt = umw->umw_count;
1144 
1145 	uintptr_t chunksize, slabsize;
1146 	uintptr_t addr;
1147 	const umem_slab_t *sp;
1148 	const umem_bufctl_t *bcp;
1149 	umem_bufctl_t bc;
1150 
1151 	int chunks;
1152 	char *kbase;
1153 	void *buf;
1154 	int i, ret;
1155 
1156 	char *valid, *ubase;
1157 
1158 	/*
1159 	 * first, handle the 'umem_hash' layered walk case
1160 	 */
1161 	if (type & UM_HASH) {
1162 		/*
1163 		 * We have a buffer which has been allocated out of the
1164 		 * global layer. We need to make sure that it's not
1165 		 * actually sitting in a magazine before we report it as
1166 		 * an allocated buffer.
1167 		 */
1168 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1169 
1170 		if (magcnt > 0 &&
1171 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1172 		    addrcmp) != NULL)
1173 			return (WALK_NEXT);
1174 
1175 		if (type & UM_BUFCTL)
1176 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1177 
1178 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1179 	}
1180 
1181 	ret = WALK_NEXT;
1182 
1183 	addr = umw->umw_addr;
1184 
1185 	/*
1186 	 * If we're walking freed buffers, report everything in the
1187 	 * magazine layer before processing the first slab.
1188 	 */
1189 	if ((type & UM_FREE) && magcnt != 0) {
1190 		umw->umw_count = 0;		/* only do this once */
1191 		for (i = 0; i < magcnt; i++) {
1192 			buf = maglist[i];
1193 
1194 			if (type & UM_BUFCTL) {
1195 				uintptr_t out;
1196 
1197 				if (cp->cache_flags & UMF_BUFTAG) {
1198 					umem_buftag_t *btp;
1199 					umem_buftag_t tag;
1200 
1201 					/* LINTED - alignment */
1202 					btp = UMEM_BUFTAG(cp, buf);
1203 					if (mdb_vread(&tag, sizeof (tag),
1204 					    (uintptr_t)btp) == -1) {
1205 						mdb_warn("reading buftag for "
1206 						    "%p at %p", buf, btp);
1207 						continue;
1208 					}
1209 					out = (uintptr_t)tag.bt_bufctl;
1210 				} else {
1211 					if (umem_hash_lookup(cp, addr, buf,
1212 					    &out) == -1)
1213 						continue;
1214 				}
1215 				ret = bufctl_walk_callback(cp, wsp, out);
1216 			} else {
1217 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1218 			}
1219 
1220 			if (ret != WALK_NEXT)
1221 				return (ret);
1222 		}
1223 	}
1224 
1225 	/*
1226 	 * Handle the buffers in the current slab
1227 	 */
1228 	chunksize = cp->cache_chunksize;
1229 	slabsize = cp->cache_slabsize;
1230 
1231 	sp = wsp->walk_layer;
1232 	chunks = sp->slab_chunks;
1233 	kbase = sp->slab_base;
1234 
1235 	dprintf(("kbase is %p\n", kbase));
1236 
1237 	if (!(cp->cache_flags & UMF_HASH)) {
1238 		valid = umw->umw_valid;
1239 		ubase = umw->umw_ubase;
1240 
1241 		if (mdb_vread(ubase, chunks * chunksize,
1242 		    (uintptr_t)kbase) == -1) {
1243 			mdb_warn("failed to read slab contents at %p", kbase);
1244 			return (WALK_ERR);
1245 		}
1246 
1247 		/*
1248 		 * Set up the valid map as fully allocated -- we'll punch
1249 		 * out the freelist.
1250 		 */
1251 		if (type & UM_ALLOCATED)
1252 			(void) memset(valid, 1, chunks);
1253 	} else {
1254 		valid = NULL;
1255 		ubase = NULL;
1256 	}
1257 
1258 	/*
1259 	 * walk the slab's freelist
1260 	 */
1261 	bcp = sp->slab_head;
1262 
1263 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1264 
1265 	/*
1266 	 * since we could be in the middle of allocating a buffer,
1267 	 * our refcnt could be one higher than it aught.  So we
1268 	 * check one further on the freelist than the count allows.
1269 	 */
1270 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1271 		uint_t ndx;
1272 
1273 		dprintf(("bcp is %p\n", bcp));
1274 
1275 		if (bcp == NULL) {
1276 			if (i == chunks)
1277 				break;
1278 			mdb_warn(
1279 			    "slab %p in cache %p freelist too short by %d\n",
1280 			    sp, addr, chunks - i);
1281 			break;
1282 		}
1283 
1284 		if (cp->cache_flags & UMF_HASH) {
1285 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1286 				mdb_warn("failed to read bufctl ptr at %p",
1287 				    bcp);
1288 				break;
1289 			}
1290 			buf = bc.bc_addr;
1291 		} else {
1292 			/*
1293 			 * Otherwise the buffer is in the slab which
1294 			 * we've read in;  we just need to determine
1295 			 * its offset in the slab to find the
1296 			 * umem_bufctl_t.
1297 			 */
1298 			bc = *((umem_bufctl_t *)
1299 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1300 			    (uintptr_t)ubase));
1301 
1302 			buf = UMEM_BUF(cp, bcp);
1303 		}
1304 
1305 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1306 
1307 		if (ndx > slabsize / cp->cache_bufsize) {
1308 			/*
1309 			 * This is very wrong; we have managed to find
1310 			 * a buffer in the slab which shouldn't
1311 			 * actually be here.  Emit a warning, and
1312 			 * try to continue.
1313 			 */
1314 			mdb_warn("buf %p is out of range for "
1315 			    "slab %p, cache %p\n", buf, sp, addr);
1316 		} else if (type & UM_ALLOCATED) {
1317 			/*
1318 			 * we have found a buffer on the slab's freelist;
1319 			 * clear its entry
1320 			 */
1321 			valid[ndx] = 0;
1322 		} else {
1323 			/*
1324 			 * Report this freed buffer
1325 			 */
1326 			if (type & UM_BUFCTL) {
1327 				ret = bufctl_walk_callback(cp, wsp,
1328 				    (uintptr_t)bcp);
1329 			} else {
1330 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1331 			}
1332 			if (ret != WALK_NEXT)
1333 				return (ret);
1334 		}
1335 
1336 		bcp = bc.bc_next;
1337 	}
1338 
1339 	if (bcp != NULL) {
1340 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1341 		    sp, addr, bcp));
1342 	}
1343 
1344 	/*
1345 	 * If we are walking freed buffers, the loop above handled reporting
1346 	 * them.
1347 	 */
1348 	if (type & UM_FREE)
1349 		return (WALK_NEXT);
1350 
1351 	if (type & UM_BUFCTL) {
1352 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1353 		    "cache %p\n", addr);
1354 		return (WALK_ERR);
1355 	}
1356 
1357 	/*
1358 	 * Report allocated buffers, skipping buffers in the magazine layer.
1359 	 * We only get this far for small-slab caches.
1360 	 */
1361 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1362 		buf = (char *)kbase + i * chunksize;
1363 
1364 		if (!valid[i])
1365 			continue;		/* on slab freelist */
1366 
1367 		if (magcnt > 0 &&
1368 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1369 		    addrcmp) != NULL)
1370 			continue;		/* in magazine layer */
1371 
1372 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1373 	}
1374 	return (ret);
1375 }
1376 
1377 void
1378 umem_walk_fini(mdb_walk_state_t *wsp)
1379 {
1380 	umem_walk_t *umw = wsp->walk_data;
1381 	uintptr_t chunksize;
1382 	uintptr_t slabsize;
1383 
1384 	if (umw == NULL)
1385 		return;
1386 
1387 	if (umw->umw_maglist != NULL)
1388 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1389 
1390 	chunksize = umw->umw_cp->cache_chunksize;
1391 	slabsize = umw->umw_cp->cache_slabsize;
1392 
1393 	if (umw->umw_valid != NULL)
1394 		mdb_free(umw->umw_valid, slabsize / chunksize);
1395 	if (umw->umw_ubase != NULL)
1396 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1397 
1398 	mdb_free(umw->umw_cp, umw->umw_csize);
1399 	mdb_free(umw, sizeof (umem_walk_t));
1400 }
1401 
1402 /*ARGSUSED*/
1403 static int
1404 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1405 {
1406 	/*
1407 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1408 	 * memory in other caches.  This can be a little confusing, so we
1409 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1410 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1411 	 */
1412 	if (c->cache_cflags & UMC_NOTOUCH)
1413 		return (WALK_NEXT);
1414 
1415 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1416 	    wsp->walk_cbdata, addr) == -1)
1417 		return (WALK_DONE);
1418 
1419 	return (WALK_NEXT);
1420 }
1421 
1422 #define	UMEM_WALK_ALL(name, wsp) { \
1423 	wsp->walk_data = (name); \
1424 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1425 		return (WALK_ERR); \
1426 	return (WALK_DONE); \
1427 }
1428 
1429 int
1430 umem_walk_init(mdb_walk_state_t *wsp)
1431 {
1432 	if (wsp->walk_arg != NULL)
1433 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1434 
1435 	if (wsp->walk_addr == NULL)
1436 		UMEM_WALK_ALL("umem", wsp);
1437 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1438 }
1439 
1440 int
1441 bufctl_walk_init(mdb_walk_state_t *wsp)
1442 {
1443 	if (wsp->walk_addr == NULL)
1444 		UMEM_WALK_ALL("bufctl", wsp);
1445 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1446 }
1447 
1448 int
1449 freemem_walk_init(mdb_walk_state_t *wsp)
1450 {
1451 	if (wsp->walk_addr == NULL)
1452 		UMEM_WALK_ALL("freemem", wsp);
1453 	return (umem_walk_init_common(wsp, UM_FREE));
1454 }
1455 
1456 int
1457 freectl_walk_init(mdb_walk_state_t *wsp)
1458 {
1459 	if (wsp->walk_addr == NULL)
1460 		UMEM_WALK_ALL("freectl", wsp);
1461 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1462 }
1463 
1464 typedef struct bufctl_history_walk {
1465 	void		*bhw_next;
1466 	umem_cache_t	*bhw_cache;
1467 	umem_slab_t	*bhw_slab;
1468 	hrtime_t	bhw_timestamp;
1469 } bufctl_history_walk_t;
1470 
1471 int
1472 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1473 {
1474 	bufctl_history_walk_t *bhw;
1475 	umem_bufctl_audit_t bc;
1476 	umem_bufctl_audit_t bcn;
1477 
1478 	if (wsp->walk_addr == NULL) {
1479 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1480 		return (WALK_ERR);
1481 	}
1482 
1483 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1484 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1485 		return (WALK_ERR);
1486 	}
1487 
1488 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1489 	bhw->bhw_timestamp = 0;
1490 	bhw->bhw_cache = bc.bc_cache;
1491 	bhw->bhw_slab = bc.bc_slab;
1492 
1493 	/*
1494 	 * sometimes the first log entry matches the base bufctl;  in that
1495 	 * case, skip the base bufctl.
1496 	 */
1497 	if (bc.bc_lastlog != NULL &&
1498 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1499 	    bc.bc_addr == bcn.bc_addr &&
1500 	    bc.bc_cache == bcn.bc_cache &&
1501 	    bc.bc_slab == bcn.bc_slab &&
1502 	    bc.bc_timestamp == bcn.bc_timestamp &&
1503 	    bc.bc_thread == bcn.bc_thread)
1504 		bhw->bhw_next = bc.bc_lastlog;
1505 	else
1506 		bhw->bhw_next = (void *)wsp->walk_addr;
1507 
1508 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1509 	wsp->walk_data = bhw;
1510 
1511 	return (WALK_NEXT);
1512 }
1513 
1514 int
1515 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1516 {
1517 	bufctl_history_walk_t *bhw = wsp->walk_data;
1518 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1519 	uintptr_t baseaddr = wsp->walk_addr;
1520 	umem_bufctl_audit_t *b;
1521 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1522 
1523 	if (addr == NULL)
1524 		return (WALK_DONE);
1525 
1526 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1527 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1528 		return (WALK_ERR);
1529 	}
1530 
1531 	/*
1532 	 * The bufctl is only valid if the address, cache, and slab are
1533 	 * correct.  We also check that the timestamp is decreasing, to
1534 	 * prevent infinite loops.
1535 	 */
1536 	if ((uintptr_t)b->bc_addr != baseaddr ||
1537 	    b->bc_cache != bhw->bhw_cache ||
1538 	    b->bc_slab != bhw->bhw_slab ||
1539 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1540 		return (WALK_DONE);
1541 
1542 	bhw->bhw_next = b->bc_lastlog;
1543 	bhw->bhw_timestamp = b->bc_timestamp;
1544 
1545 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1546 }
1547 
1548 void
1549 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1550 {
1551 	bufctl_history_walk_t *bhw = wsp->walk_data;
1552 
1553 	mdb_free(bhw, sizeof (*bhw));
1554 }
1555 
1556 typedef struct umem_log_walk {
1557 	umem_bufctl_audit_t *ulw_base;
1558 	umem_bufctl_audit_t **ulw_sorted;
1559 	umem_log_header_t ulw_lh;
1560 	size_t ulw_size;
1561 	size_t ulw_maxndx;
1562 	size_t ulw_ndx;
1563 } umem_log_walk_t;
1564 
1565 int
1566 umem_log_walk_init(mdb_walk_state_t *wsp)
1567 {
1568 	uintptr_t lp = wsp->walk_addr;
1569 	umem_log_walk_t *ulw;
1570 	umem_log_header_t *lhp;
1571 	int maxndx, i, j, k;
1572 
1573 	/*
1574 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1575 	 * read the log whose umem_log_header_t is stored at walk_addr.
1576 	 */
1577 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1578 		mdb_warn("failed to read 'umem_transaction_log'");
1579 		return (WALK_ERR);
1580 	}
1581 
1582 	if (lp == NULL) {
1583 		mdb_warn("log is disabled\n");
1584 		return (WALK_ERR);
1585 	}
1586 
1587 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1588 	lhp = &ulw->ulw_lh;
1589 
1590 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1591 		mdb_warn("failed to read log header at %p", lp);
1592 		mdb_free(ulw, sizeof (umem_log_walk_t));
1593 		return (WALK_ERR);
1594 	}
1595 
1596 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1597 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1598 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1599 
1600 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1601 	    (uintptr_t)lhp->lh_base) == -1) {
1602 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1603 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1604 		mdb_free(ulw, sizeof (umem_log_walk_t));
1605 		return (WALK_ERR);
1606 	}
1607 
1608 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1609 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1610 
1611 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1612 		caddr_t chunk = (caddr_t)
1613 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1614 
1615 		for (j = 0; j < maxndx; j++) {
1616 			/* LINTED align */
1617 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1618 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1619 		}
1620 	}
1621 
1622 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1623 	    (int(*)(const void *, const void *))bufctlcmp);
1624 
1625 	ulw->ulw_maxndx = k;
1626 	wsp->walk_data = ulw;
1627 
1628 	return (WALK_NEXT);
1629 }
1630 
1631 int
1632 umem_log_walk_step(mdb_walk_state_t *wsp)
1633 {
1634 	umem_log_walk_t *ulw = wsp->walk_data;
1635 	umem_bufctl_audit_t *bcp;
1636 
1637 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1638 		return (WALK_DONE);
1639 
1640 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1641 
1642 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1643 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1644 }
1645 
1646 void
1647 umem_log_walk_fini(mdb_walk_state_t *wsp)
1648 {
1649 	umem_log_walk_t *ulw = wsp->walk_data;
1650 
1651 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1652 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1653 	    sizeof (umem_bufctl_audit_t *));
1654 	mdb_free(ulw, sizeof (umem_log_walk_t));
1655 }
1656 
1657 typedef struct allocdby_bufctl {
1658 	uintptr_t abb_addr;
1659 	hrtime_t abb_ts;
1660 } allocdby_bufctl_t;
1661 
1662 typedef struct allocdby_walk {
1663 	const char *abw_walk;
1664 	uintptr_t abw_thread;
1665 	size_t abw_nbufs;
1666 	size_t abw_size;
1667 	allocdby_bufctl_t *abw_buf;
1668 	size_t abw_ndx;
1669 } allocdby_walk_t;
1670 
1671 int
1672 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1673     allocdby_walk_t *abw)
1674 {
1675 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1676 		return (WALK_NEXT);
1677 
1678 	if (abw->abw_nbufs == abw->abw_size) {
1679 		allocdby_bufctl_t *buf;
1680 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1681 
1682 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1683 
1684 		bcopy(abw->abw_buf, buf, oldsize);
1685 		mdb_free(abw->abw_buf, oldsize);
1686 
1687 		abw->abw_size <<= 1;
1688 		abw->abw_buf = buf;
1689 	}
1690 
1691 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1692 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1693 	abw->abw_nbufs++;
1694 
1695 	return (WALK_NEXT);
1696 }
1697 
1698 /*ARGSUSED*/
1699 int
1700 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1701 {
1702 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1703 	    abw, addr) == -1) {
1704 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1705 		return (WALK_DONE);
1706 	}
1707 
1708 	return (WALK_NEXT);
1709 }
1710 
1711 static int
1712 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1713 {
1714 	if (lhs->abb_ts < rhs->abb_ts)
1715 		return (1);
1716 	if (lhs->abb_ts > rhs->abb_ts)
1717 		return (-1);
1718 	return (0);
1719 }
1720 
1721 static int
1722 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1723 {
1724 	allocdby_walk_t *abw;
1725 
1726 	if (wsp->walk_addr == NULL) {
1727 		mdb_warn("allocdby walk doesn't support global walks\n");
1728 		return (WALK_ERR);
1729 	}
1730 
1731 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1732 
1733 	abw->abw_thread = wsp->walk_addr;
1734 	abw->abw_walk = walk;
1735 	abw->abw_size = 128;	/* something reasonable */
1736 	abw->abw_buf =
1737 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1738 
1739 	wsp->walk_data = abw;
1740 
1741 	if (mdb_walk("umem_cache",
1742 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1743 		mdb_warn("couldn't walk umem_cache");
1744 		allocdby_walk_fini(wsp);
1745 		return (WALK_ERR);
1746 	}
1747 
1748 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1749 	    (int(*)(const void *, const void *))allocdby_cmp);
1750 
1751 	return (WALK_NEXT);
1752 }
1753 
1754 int
1755 allocdby_walk_init(mdb_walk_state_t *wsp)
1756 {
1757 	return (allocdby_walk_init_common(wsp, "bufctl"));
1758 }
1759 
1760 int
1761 freedby_walk_init(mdb_walk_state_t *wsp)
1762 {
1763 	return (allocdby_walk_init_common(wsp, "freectl"));
1764 }
1765 
1766 int
1767 allocdby_walk_step(mdb_walk_state_t *wsp)
1768 {
1769 	allocdby_walk_t *abw = wsp->walk_data;
1770 	uintptr_t addr;
1771 	umem_bufctl_audit_t *bcp;
1772 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1773 
1774 	if (abw->abw_ndx == abw->abw_nbufs)
1775 		return (WALK_DONE);
1776 
1777 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1778 
1779 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1780 		mdb_warn("couldn't read bufctl at %p", addr);
1781 		return (WALK_DONE);
1782 	}
1783 
1784 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1785 }
1786 
1787 void
1788 allocdby_walk_fini(mdb_walk_state_t *wsp)
1789 {
1790 	allocdby_walk_t *abw = wsp->walk_data;
1791 
1792 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1793 	mdb_free(abw, sizeof (allocdby_walk_t));
1794 }
1795 
1796 /*ARGSUSED*/
1797 int
1798 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1799 {
1800 	char c[MDB_SYM_NAMLEN];
1801 	GElf_Sym sym;
1802 	int i;
1803 
1804 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1805 	for (i = 0; i < bcp->bc_depth; i++) {
1806 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1807 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1808 			continue;
1809 		if (is_umem_sym(c, "umem_"))
1810 			continue;
1811 		mdb_printf("%s+0x%lx",
1812 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1813 		break;
1814 	}
1815 	mdb_printf("\n");
1816 
1817 	return (WALK_NEXT);
1818 }
1819 
1820 static int
1821 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1822 {
1823 	if (!(flags & DCMD_ADDRSPEC))
1824 		return (DCMD_USAGE);
1825 
1826 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1827 
1828 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1829 		mdb_warn("can't walk '%s' for %p", w, addr);
1830 		return (DCMD_ERR);
1831 	}
1832 
1833 	return (DCMD_OK);
1834 }
1835 
1836 /*ARGSUSED*/
1837 int
1838 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1839 {
1840 	return (allocdby_common(addr, flags, "allocdby"));
1841 }
1842 
1843 /*ARGSUSED*/
1844 int
1845 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1846 {
1847 	return (allocdby_common(addr, flags, "freedby"));
1848 }
1849 
1850 typedef struct whatis {
1851 	uintptr_t w_addr;
1852 	const umem_cache_t *w_cache;
1853 	const vmem_t *w_vmem;
1854 	int w_found;
1855 	uint_t w_verbose;
1856 	uint_t w_freemem;
1857 	uint_t w_all;
1858 	uint_t w_bufctl;
1859 } whatis_t;
1860 
1861 static void
1862 whatis_print_umem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
1863 {
1864 	/* LINTED pointer cast may result in improper alignment */
1865 	uintptr_t btaddr = (uintptr_t)UMEM_BUFTAG(w->w_cache, addr);
1866 	intptr_t stat;
1867 
1868 	if (w->w_cache->cache_flags & UMF_REDZONE) {
1869 		umem_buftag_t bt;
1870 
1871 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
1872 			goto done;
1873 
1874 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
1875 
1876 		if (stat != UMEM_BUFTAG_ALLOC && stat != UMEM_BUFTAG_FREE)
1877 			goto done;
1878 
1879 		/*
1880 		 * provide the bufctl ptr if it has useful information
1881 		 */
1882 		if (baddr == 0 && (w->w_cache->cache_flags & UMF_AUDIT))
1883 			baddr = (uintptr_t)bt.bt_bufctl;
1884 	}
1885 
1886 done:
1887 	if (baddr == 0)
1888 		mdb_printf("%p is %p+%p, %s from %s\n",
1889 		    w->w_addr, addr, w->w_addr - addr,
1890 		    w->w_freemem == FALSE ? "allocated" : "freed",
1891 		    w->w_cache->cache_name);
1892 	else
1893 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
1894 		    w->w_addr, addr, w->w_addr - addr, baddr,
1895 		    w->w_freemem == FALSE ? "allocated" : "freed",
1896 		    w->w_cache->cache_name);
1897 }
1898 
1899 /*ARGSUSED*/
1900 static int
1901 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_t *w)
1902 {
1903 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1904 		return (WALK_NEXT);
1905 
1906 	whatis_print_umem(addr, 0, w);
1907 	w->w_found++;
1908 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1909 }
1910 
1911 static int
1912 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
1913 {
1914 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
1915 		return (WALK_NEXT);
1916 
1917 	mdb_printf("%p is %p+%p ", w->w_addr,
1918 	    vs->vs_start, w->w_addr - vs->vs_start);
1919 
1920 	/*
1921 	 * Always provide the vmem_seg pointer if it has a stack trace.
1922 	 */
1923 	if (w->w_bufctl == TRUE ||
1924 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
1925 		mdb_printf("(vmem_seg %p) ", addr);
1926 	}
1927 
1928 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
1929 	    "freed " : "", w->w_vmem->vm_name);
1930 
1931 	w->w_found++;
1932 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1933 }
1934 
1935 static int
1936 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
1937 {
1938 	const char *nm = vmem->vm_name;
1939 	w->w_vmem = vmem;
1940 	w->w_freemem = FALSE;
1941 
1942 	if (w->w_verbose)
1943 		mdb_printf("Searching vmem arena %s...\n", nm);
1944 
1945 	if (mdb_pwalk("vmem_alloc",
1946 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1947 		mdb_warn("can't walk vmem seg for %p", addr);
1948 		return (WALK_NEXT);
1949 	}
1950 
1951 	if (w->w_found && w->w_all == FALSE)
1952 		return (WALK_DONE);
1953 
1954 	if (w->w_verbose)
1955 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
1956 
1957 	w->w_freemem = TRUE;
1958 
1959 	if (mdb_pwalk("vmem_free",
1960 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1961 		mdb_warn("can't walk vmem seg for %p", addr);
1962 		return (WALK_NEXT);
1963 	}
1964 
1965 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1966 }
1967 
1968 /*ARGSUSED*/
1969 static int
1970 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_t *w)
1971 {
1972 	uintptr_t addr;
1973 
1974 	if (bcp == NULL)
1975 		return (WALK_NEXT);
1976 
1977 	addr = (uintptr_t)bcp->bc_addr;
1978 
1979 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1980 		return (WALK_NEXT);
1981 
1982 	whatis_print_umem(addr, baddr, w);
1983 	w->w_found++;
1984 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1985 }
1986 
1987 static int
1988 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
1989 {
1990 	char *walk, *freewalk;
1991 	mdb_walk_cb_t func;
1992 
1993 	if (w->w_bufctl == FALSE) {
1994 		walk = "umem";
1995 		freewalk = "freemem";
1996 		func = (mdb_walk_cb_t)whatis_walk_umem;
1997 	} else {
1998 		walk = "bufctl";
1999 		freewalk = "freectl";
2000 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2001 	}
2002 
2003 	if (w->w_verbose)
2004 		mdb_printf("Searching %s...\n", c->cache_name);
2005 
2006 	w->w_cache = c;
2007 	w->w_freemem = FALSE;
2008 
2009 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2010 		mdb_warn("can't find %s walker", walk);
2011 		return (WALK_DONE);
2012 	}
2013 
2014 	if (w->w_found && w->w_all == FALSE)
2015 		return (WALK_DONE);
2016 
2017 	/*
2018 	 * We have searched for allocated memory; now search for freed memory.
2019 	 */
2020 	if (w->w_verbose)
2021 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2022 
2023 	w->w_freemem = TRUE;
2024 
2025 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2026 		mdb_warn("can't find %s walker", freewalk);
2027 		return (WALK_DONE);
2028 	}
2029 
2030 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2031 }
2032 
2033 static int
2034 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2035 {
2036 	if (c->cache_cflags & UMC_NOTOUCH)
2037 		return (WALK_NEXT);
2038 
2039 	return (whatis_walk_cache(addr, c, w));
2040 }
2041 
2042 static int
2043 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2044 {
2045 	if (!(c->cache_cflags & UMC_NOTOUCH))
2046 		return (WALK_NEXT);
2047 
2048 	return (whatis_walk_cache(addr, c, w));
2049 }
2050 
2051 int
2052 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2053 {
2054 	whatis_t w;
2055 
2056 	if (!(flags & DCMD_ADDRSPEC))
2057 		return (DCMD_USAGE);
2058 
2059 	w.w_verbose = FALSE;
2060 	w.w_bufctl = FALSE;
2061 	w.w_all = FALSE;
2062 
2063 	if (mdb_getopts(argc, argv,
2064 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2065 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2066 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
2067 		return (DCMD_USAGE);
2068 
2069 	w.w_addr = addr;
2070 	w.w_found = 0;
2071 
2072 	/*
2073 	 * Mappings and threads should eventually be added here.
2074 	 */
2075 	if (mdb_walk("umem_cache",
2076 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2077 		mdb_warn("couldn't find umem_cache walker");
2078 		return (DCMD_ERR);
2079 	}
2080 
2081 	if (w.w_found && w.w_all == FALSE)
2082 		return (DCMD_OK);
2083 
2084 	if (mdb_walk("umem_cache",
2085 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2086 		mdb_warn("couldn't find umem_cache walker");
2087 		return (DCMD_ERR);
2088 	}
2089 
2090 	if (w.w_found && w.w_all == FALSE)
2091 		return (DCMD_OK);
2092 
2093 	if (mdb_walk("vmem_postfix",
2094 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2095 		mdb_warn("couldn't find vmem_postfix walker");
2096 		return (DCMD_ERR);
2097 	}
2098 
2099 	if (w.w_found == 0)
2100 		mdb_printf("%p is unknown\n", addr);
2101 
2102 	return (DCMD_OK);
2103 }
2104 
2105 typedef struct umem_log_cpu {
2106 	uintptr_t umc_low;
2107 	uintptr_t umc_high;
2108 } umem_log_cpu_t;
2109 
2110 int
2111 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2112 {
2113 	int i;
2114 
2115 	for (i = 0; i < umem_max_ncpus; i++) {
2116 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2117 			break;
2118 	}
2119 
2120 	if (i == umem_max_ncpus)
2121 		mdb_printf("   ");
2122 	else
2123 		mdb_printf("%3d", i);
2124 
2125 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2126 	    b->bc_timestamp, b->bc_thread);
2127 
2128 	return (WALK_NEXT);
2129 }
2130 
2131 /*ARGSUSED*/
2132 int
2133 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2134 {
2135 	umem_log_header_t lh;
2136 	umem_cpu_log_header_t clh;
2137 	uintptr_t lhp, clhp;
2138 	umem_log_cpu_t *umc;
2139 	int i;
2140 
2141 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2142 		mdb_warn("failed to read 'umem_transaction_log'");
2143 		return (DCMD_ERR);
2144 	}
2145 
2146 	if (lhp == NULL) {
2147 		mdb_warn("no umem transaction log\n");
2148 		return (DCMD_ERR);
2149 	}
2150 
2151 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2152 		mdb_warn("failed to read log header at %p", lhp);
2153 		return (DCMD_ERR);
2154 	}
2155 
2156 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2157 
2158 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2159 	    UM_SLEEP | UM_GC);
2160 
2161 	for (i = 0; i < umem_max_ncpus; i++) {
2162 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2163 			mdb_warn("cannot read cpu %d's log header at %p",
2164 			    i, clhp);
2165 			return (DCMD_ERR);
2166 		}
2167 
2168 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2169 		    (uintptr_t)lh.lh_base;
2170 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2171 
2172 		clhp += sizeof (umem_cpu_log_header_t);
2173 	}
2174 
2175 	if (DCMD_HDRSPEC(flags)) {
2176 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2177 		    "BUFADDR", "TIMESTAMP", "THREAD");
2178 	}
2179 
2180 	/*
2181 	 * If we have been passed an address, we'll just print out that
2182 	 * log entry.
2183 	 */
2184 	if (flags & DCMD_ADDRSPEC) {
2185 		umem_bufctl_audit_t *bp;
2186 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2187 
2188 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2189 			mdb_warn("failed to read bufctl at %p", addr);
2190 			return (DCMD_ERR);
2191 		}
2192 
2193 		(void) umem_log_walk(addr, bp, umc);
2194 
2195 		return (DCMD_OK);
2196 	}
2197 
2198 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2199 		mdb_warn("can't find umem log walker");
2200 		return (DCMD_ERR);
2201 	}
2202 
2203 	return (DCMD_OK);
2204 }
2205 
2206 typedef struct bufctl_history_cb {
2207 	int		bhc_flags;
2208 	int		bhc_argc;
2209 	const mdb_arg_t	*bhc_argv;
2210 	int		bhc_ret;
2211 } bufctl_history_cb_t;
2212 
2213 /*ARGSUSED*/
2214 static int
2215 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2216 {
2217 	bufctl_history_cb_t *bhc = arg;
2218 
2219 	bhc->bhc_ret =
2220 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2221 
2222 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2223 
2224 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2225 }
2226 
2227 void
2228 bufctl_help(void)
2229 {
2230 	mdb_printf("%s\n",
2231 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2232 	mdb_dec_indent(2);
2233 	mdb_printf("%<b>OPTIONS%</b>\n");
2234 	mdb_inc_indent(2);
2235 	mdb_printf("%s",
2236 "  -v    Display the full content of the bufctl, including its stack trace\n"
2237 "  -h    retrieve the bufctl's transaction history, if available\n"
2238 "  -a addr\n"
2239 "        filter out bufctls not involving the buffer at addr\n"
2240 "  -c caller\n"
2241 "        filter out bufctls without the function/PC in their stack trace\n"
2242 "  -e earliest\n"
2243 "        filter out bufctls timestamped before earliest\n"
2244 "  -l latest\n"
2245 "        filter out bufctls timestamped after latest\n"
2246 "  -t thread\n"
2247 "        filter out bufctls not involving thread\n");
2248 }
2249 
2250 int
2251 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2252 {
2253 	uint_t verbose = FALSE;
2254 	uint_t history = FALSE;
2255 	uint_t in_history = FALSE;
2256 	uintptr_t caller = NULL, thread = NULL;
2257 	uintptr_t laddr, haddr, baddr = NULL;
2258 	hrtime_t earliest = 0, latest = 0;
2259 	int i, depth;
2260 	char c[MDB_SYM_NAMLEN];
2261 	GElf_Sym sym;
2262 	umem_bufctl_audit_t *bcp;
2263 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2264 
2265 	if (mdb_getopts(argc, argv,
2266 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2267 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2268 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2269 	    'c', MDB_OPT_UINTPTR, &caller,
2270 	    't', MDB_OPT_UINTPTR, &thread,
2271 	    'e', MDB_OPT_UINT64, &earliest,
2272 	    'l', MDB_OPT_UINT64, &latest,
2273 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2274 		return (DCMD_USAGE);
2275 
2276 	if (!(flags & DCMD_ADDRSPEC))
2277 		return (DCMD_USAGE);
2278 
2279 	if (in_history && !history)
2280 		return (DCMD_USAGE);
2281 
2282 	if (history && !in_history) {
2283 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2284 		    UM_SLEEP | UM_GC);
2285 		bufctl_history_cb_t bhc;
2286 
2287 		nargv[0].a_type = MDB_TYPE_STRING;
2288 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2289 
2290 		for (i = 0; i < argc; i++)
2291 			nargv[i + 1] = argv[i];
2292 
2293 		/*
2294 		 * When in history mode, we treat each element as if it
2295 		 * were in a seperate loop, so that the headers group
2296 		 * bufctls with similar histories.
2297 		 */
2298 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2299 		bhc.bhc_argc = argc + 1;
2300 		bhc.bhc_argv = nargv;
2301 		bhc.bhc_ret = DCMD_OK;
2302 
2303 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2304 		    addr) == -1) {
2305 			mdb_warn("unable to walk bufctl_history");
2306 			return (DCMD_ERR);
2307 		}
2308 
2309 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2310 			mdb_printf("\n");
2311 
2312 		return (bhc.bhc_ret);
2313 	}
2314 
2315 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2316 		if (verbose) {
2317 			mdb_printf("%16s %16s %16s %16s\n"
2318 			    "%<u>%16s %16s %16s %16s%</u>\n",
2319 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2320 			    "", "CACHE", "LASTLOG", "CONTENTS");
2321 		} else {
2322 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2323 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2324 		}
2325 	}
2326 
2327 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2328 		mdb_warn("couldn't read bufctl at %p", addr);
2329 		return (DCMD_ERR);
2330 	}
2331 
2332 	/*
2333 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2334 	 * the address does not really refer to a bufctl.
2335 	 */
2336 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2337 
2338 	if (caller != NULL) {
2339 		laddr = caller;
2340 		haddr = caller + sizeof (caller);
2341 
2342 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2343 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2344 			/*
2345 			 * We were provided an exact symbol value; any
2346 			 * address in the function is valid.
2347 			 */
2348 			laddr = (uintptr_t)sym.st_value;
2349 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2350 		}
2351 
2352 		for (i = 0; i < depth; i++)
2353 			if (bcp->bc_stack[i] >= laddr &&
2354 			    bcp->bc_stack[i] < haddr)
2355 				break;
2356 
2357 		if (i == depth)
2358 			return (DCMD_OK);
2359 	}
2360 
2361 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2362 		return (DCMD_OK);
2363 
2364 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2365 		return (DCMD_OK);
2366 
2367 	if (latest != 0 && bcp->bc_timestamp > latest)
2368 		return (DCMD_OK);
2369 
2370 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2371 		return (DCMD_OK);
2372 
2373 	if (flags & DCMD_PIPE_OUT) {
2374 		mdb_printf("%#r\n", addr);
2375 		return (DCMD_OK);
2376 	}
2377 
2378 	if (verbose) {
2379 		mdb_printf(
2380 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2381 		    "%16s %16p %16p %16p\n",
2382 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2383 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2384 
2385 		mdb_inc_indent(17);
2386 		for (i = 0; i < depth; i++)
2387 			mdb_printf("%a\n", bcp->bc_stack[i]);
2388 		mdb_dec_indent(17);
2389 		mdb_printf("\n");
2390 	} else {
2391 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2392 		    bcp->bc_timestamp, bcp->bc_thread);
2393 
2394 		for (i = 0; i < depth; i++) {
2395 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2396 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2397 				continue;
2398 			if (is_umem_sym(c, "umem_"))
2399 				continue;
2400 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2401 			break;
2402 		}
2403 
2404 		if (i >= depth)
2405 			mdb_printf("\n");
2406 	}
2407 
2408 	return (DCMD_OK);
2409 }
2410 
2411 /*ARGSUSED*/
2412 int
2413 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2414 {
2415 	mdb_arg_t a;
2416 
2417 	if (!(flags & DCMD_ADDRSPEC))
2418 		return (DCMD_USAGE);
2419 
2420 	if (argc != 0)
2421 		return (DCMD_USAGE);
2422 
2423 	a.a_type = MDB_TYPE_STRING;
2424 	a.a_un.a_str = "-v";
2425 
2426 	return (bufctl(addr, flags, 1, &a));
2427 }
2428 
2429 typedef struct umem_verify {
2430 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2431 	size_t umv_size;		/* number of bytes in umv_buf */
2432 	int umv_corruption;		/* > 0 if corruption found. */
2433 	int umv_besilent;		/* report actual corruption sites */
2434 	struct umem_cache umv_cache;	/* the cache we're operating on */
2435 } umem_verify_t;
2436 
2437 /*
2438  * verify_pattern()
2439  *	verify that buf is filled with the pattern pat.
2440  */
2441 static int64_t
2442 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2443 {
2444 	/*LINTED*/
2445 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2446 	uint64_t *buf;
2447 
2448 	for (buf = buf_arg; buf < bufend; buf++)
2449 		if (*buf != pat)
2450 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2451 	return (-1);
2452 }
2453 
2454 /*
2455  * verify_buftag()
2456  *	verify that btp->bt_bxstat == (bcp ^ pat)
2457  */
2458 static int
2459 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2460 {
2461 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2462 }
2463 
2464 /*
2465  * verify_free()
2466  *	verify the integrity of a free block of memory by checking
2467  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2468  */
2469 /*ARGSUSED1*/
2470 static int
2471 verify_free(uintptr_t addr, const void *data, void *private)
2472 {
2473 	umem_verify_t *umv = (umem_verify_t *)private;
2474 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2475 	int64_t corrupt;		/* corruption offset */
2476 	umem_buftag_t *buftagp;		/* ptr to buftag */
2477 	umem_cache_t *cp = &umv->umv_cache;
2478 	int besilent = umv->umv_besilent;
2479 
2480 	/*LINTED*/
2481 	buftagp = UMEM_BUFTAG(cp, buf);
2482 
2483 	/*
2484 	 * Read the buffer to check.
2485 	 */
2486 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2487 		if (!besilent)
2488 			mdb_warn("couldn't read %p", addr);
2489 		return (WALK_NEXT);
2490 	}
2491 
2492 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2493 	    UMEM_FREE_PATTERN)) >= 0) {
2494 		if (!besilent)
2495 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2496 			    addr, (uintptr_t)addr + corrupt);
2497 		goto corrupt;
2498 	}
2499 
2500 	if ((cp->cache_flags & UMF_HASH) &&
2501 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2502 		if (!besilent)
2503 			mdb_printf("buffer %p (free) seems to "
2504 			    "have a corrupt redzone pattern\n", addr);
2505 		goto corrupt;
2506 	}
2507 
2508 	/*
2509 	 * confirm bufctl pointer integrity.
2510 	 */
2511 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2512 		if (!besilent)
2513 			mdb_printf("buffer %p (free) has a corrupt "
2514 			    "buftag\n", addr);
2515 		goto corrupt;
2516 	}
2517 
2518 	return (WALK_NEXT);
2519 corrupt:
2520 	umv->umv_corruption++;
2521 	return (WALK_NEXT);
2522 }
2523 
2524 /*
2525  * verify_alloc()
2526  *	Verify that the buftag of an allocated buffer makes sense with respect
2527  *	to the buffer.
2528  */
2529 /*ARGSUSED1*/
2530 static int
2531 verify_alloc(uintptr_t addr, const void *data, void *private)
2532 {
2533 	umem_verify_t *umv = (umem_verify_t *)private;
2534 	umem_cache_t *cp = &umv->umv_cache;
2535 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2536 	/*LINTED*/
2537 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2538 	uint32_t *ip = (uint32_t *)buftagp;
2539 	uint8_t *bp = (uint8_t *)buf;
2540 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2541 	int besilent = umv->umv_besilent;
2542 
2543 	/*
2544 	 * Read the buffer to check.
2545 	 */
2546 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2547 		if (!besilent)
2548 			mdb_warn("couldn't read %p", addr);
2549 		return (WALK_NEXT);
2550 	}
2551 
2552 	/*
2553 	 * There are two cases to handle:
2554 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2555 	 *    0xfeedfacefeedface at the end of it
2556 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2557 	 *    0xbb just past the end of the region in use.  At the buftag,
2558 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2559 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2560 	 *    endianness), followed by 32 bits containing the offset of the
2561 	 *    0xbb byte in the buffer.
2562 	 *
2563 	 * Finally, the two 32-bit words that comprise the second half of the
2564 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2565 	 */
2566 
2567 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2568 		looks_ok = 1;
2569 	else if (!UMEM_SIZE_VALID(ip[1]))
2570 		size_ok = 0;
2571 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2572 		looks_ok = 1;
2573 	else
2574 		size_ok = 0;
2575 
2576 	if (!size_ok) {
2577 		if (!besilent)
2578 			mdb_printf("buffer %p (allocated) has a corrupt "
2579 			    "redzone size encoding\n", addr);
2580 		goto corrupt;
2581 	}
2582 
2583 	if (!looks_ok) {
2584 		if (!besilent)
2585 			mdb_printf("buffer %p (allocated) has a corrupt "
2586 			    "redzone signature\n", addr);
2587 		goto corrupt;
2588 	}
2589 
2590 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2591 		if (!besilent)
2592 			mdb_printf("buffer %p (allocated) has a "
2593 			    "corrupt buftag\n", addr);
2594 		goto corrupt;
2595 	}
2596 
2597 	return (WALK_NEXT);
2598 corrupt:
2599 	umv->umv_corruption++;
2600 	return (WALK_NEXT);
2601 }
2602 
2603 /*ARGSUSED2*/
2604 int
2605 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2606 {
2607 	if (flags & DCMD_ADDRSPEC) {
2608 		int check_alloc = 0, check_free = 0;
2609 		umem_verify_t umv;
2610 
2611 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2612 		    addr) == -1) {
2613 			mdb_warn("couldn't read umem_cache %p", addr);
2614 			return (DCMD_ERR);
2615 		}
2616 
2617 		umv.umv_size = umv.umv_cache.cache_buftag +
2618 		    sizeof (umem_buftag_t);
2619 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2620 		umv.umv_corruption = 0;
2621 
2622 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2623 			check_alloc = 1;
2624 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2625 				check_free = 1;
2626 		} else {
2627 			if (!(flags & DCMD_LOOP)) {
2628 				mdb_warn("cache %p (%s) does not have "
2629 				    "redzone checking enabled\n", addr,
2630 				    umv.umv_cache.cache_name);
2631 			}
2632 			return (DCMD_ERR);
2633 		}
2634 
2635 		if (flags & DCMD_LOOP) {
2636 			/*
2637 			 * table mode, don't print out every corrupt buffer
2638 			 */
2639 			umv.umv_besilent = 1;
2640 		} else {
2641 			mdb_printf("Summary for cache '%s'\n",
2642 			    umv.umv_cache.cache_name);
2643 			mdb_inc_indent(2);
2644 			umv.umv_besilent = 0;
2645 		}
2646 
2647 		if (check_alloc)
2648 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2649 		if (check_free)
2650 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2651 
2652 		if (flags & DCMD_LOOP) {
2653 			if (umv.umv_corruption == 0) {
2654 				mdb_printf("%-*s %?p clean\n",
2655 				    UMEM_CACHE_NAMELEN,
2656 				    umv.umv_cache.cache_name, addr);
2657 			} else {
2658 				char *s = "";	/* optional s in "buffer[s]" */
2659 				if (umv.umv_corruption > 1)
2660 					s = "s";
2661 
2662 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2663 				    UMEM_CACHE_NAMELEN,
2664 				    umv.umv_cache.cache_name, addr,
2665 				    umv.umv_corruption, s);
2666 			}
2667 		} else {
2668 			/*
2669 			 * This is the more verbose mode, when the user has
2670 			 * type addr::umem_verify.  If the cache was clean,
2671 			 * nothing will have yet been printed. So say something.
2672 			 */
2673 			if (umv.umv_corruption == 0)
2674 				mdb_printf("clean\n");
2675 
2676 			mdb_dec_indent(2);
2677 		}
2678 	} else {
2679 		/*
2680 		 * If the user didn't specify a cache to verify, we'll walk all
2681 		 * umem_cache's, specifying ourself as a callback for each...
2682 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2683 		 */
2684 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2685 		    "Cache Name", "Addr", "Cache Integrity");
2686 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2687 	}
2688 
2689 	return (DCMD_OK);
2690 }
2691 
2692 typedef struct vmem_node {
2693 	struct vmem_node *vn_next;
2694 	struct vmem_node *vn_parent;
2695 	struct vmem_node *vn_sibling;
2696 	struct vmem_node *vn_children;
2697 	uintptr_t vn_addr;
2698 	int vn_marked;
2699 	vmem_t vn_vmem;
2700 } vmem_node_t;
2701 
2702 typedef struct vmem_walk {
2703 	vmem_node_t *vw_root;
2704 	vmem_node_t *vw_current;
2705 } vmem_walk_t;
2706 
2707 int
2708 vmem_walk_init(mdb_walk_state_t *wsp)
2709 {
2710 	uintptr_t vaddr, paddr;
2711 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2712 	vmem_walk_t *vw;
2713 
2714 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2715 		mdb_warn("couldn't read 'vmem_list'");
2716 		return (WALK_ERR);
2717 	}
2718 
2719 	while (vaddr != NULL) {
2720 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2721 		vp->vn_addr = vaddr;
2722 		vp->vn_next = head;
2723 		head = vp;
2724 
2725 		if (vaddr == wsp->walk_addr)
2726 			current = vp;
2727 
2728 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2729 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2730 			goto err;
2731 		}
2732 
2733 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2734 	}
2735 
2736 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2737 
2738 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2739 			vp->vn_sibling = root;
2740 			root = vp;
2741 			continue;
2742 		}
2743 
2744 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2745 			if (parent->vn_addr != paddr)
2746 				continue;
2747 			vp->vn_sibling = parent->vn_children;
2748 			parent->vn_children = vp;
2749 			vp->vn_parent = parent;
2750 			break;
2751 		}
2752 
2753 		if (parent == NULL) {
2754 			mdb_warn("couldn't find %p's parent (%p)\n",
2755 			    vp->vn_addr, paddr);
2756 			goto err;
2757 		}
2758 	}
2759 
2760 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2761 	vw->vw_root = root;
2762 
2763 	if (current != NULL)
2764 		vw->vw_current = current;
2765 	else
2766 		vw->vw_current = root;
2767 
2768 	wsp->walk_data = vw;
2769 	return (WALK_NEXT);
2770 err:
2771 	for (vp = head; head != NULL; vp = head) {
2772 		head = vp->vn_next;
2773 		mdb_free(vp, sizeof (vmem_node_t));
2774 	}
2775 
2776 	return (WALK_ERR);
2777 }
2778 
2779 int
2780 vmem_walk_step(mdb_walk_state_t *wsp)
2781 {
2782 	vmem_walk_t *vw = wsp->walk_data;
2783 	vmem_node_t *vp;
2784 	int rval;
2785 
2786 	if ((vp = vw->vw_current) == NULL)
2787 		return (WALK_DONE);
2788 
2789 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2790 
2791 	if (vp->vn_children != NULL) {
2792 		vw->vw_current = vp->vn_children;
2793 		return (rval);
2794 	}
2795 
2796 	do {
2797 		vw->vw_current = vp->vn_sibling;
2798 		vp = vp->vn_parent;
2799 	} while (vw->vw_current == NULL && vp != NULL);
2800 
2801 	return (rval);
2802 }
2803 
2804 /*
2805  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2806  * children are visited before their parent.  We perform the postfix walk
2807  * iteratively (rather than recursively) to allow mdb to regain control
2808  * after each callback.
2809  */
2810 int
2811 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2812 {
2813 	vmem_walk_t *vw = wsp->walk_data;
2814 	vmem_node_t *vp = vw->vw_current;
2815 	int rval;
2816 
2817 	/*
2818 	 * If this node is marked, then we know that we have already visited
2819 	 * all of its children.  If the node has any siblings, they need to
2820 	 * be visited next; otherwise, we need to visit the parent.  Note
2821 	 * that vp->vn_marked will only be zero on the first invocation of
2822 	 * the step function.
2823 	 */
2824 	if (vp->vn_marked) {
2825 		if (vp->vn_sibling != NULL)
2826 			vp = vp->vn_sibling;
2827 		else if (vp->vn_parent != NULL)
2828 			vp = vp->vn_parent;
2829 		else {
2830 			/*
2831 			 * We have neither a parent, nor a sibling, and we
2832 			 * have already been visited; we're done.
2833 			 */
2834 			return (WALK_DONE);
2835 		}
2836 	}
2837 
2838 	/*
2839 	 * Before we visit this node, visit its children.
2840 	 */
2841 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2842 		vp = vp->vn_children;
2843 
2844 	vp->vn_marked = 1;
2845 	vw->vw_current = vp;
2846 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2847 
2848 	return (rval);
2849 }
2850 
2851 void
2852 vmem_walk_fini(mdb_walk_state_t *wsp)
2853 {
2854 	vmem_walk_t *vw = wsp->walk_data;
2855 	vmem_node_t *root = vw->vw_root;
2856 	int done;
2857 
2858 	if (root == NULL)
2859 		return;
2860 
2861 	if ((vw->vw_root = root->vn_children) != NULL)
2862 		vmem_walk_fini(wsp);
2863 
2864 	vw->vw_root = root->vn_sibling;
2865 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2866 	mdb_free(root, sizeof (vmem_node_t));
2867 
2868 	if (done) {
2869 		mdb_free(vw, sizeof (vmem_walk_t));
2870 	} else {
2871 		vmem_walk_fini(wsp);
2872 	}
2873 }
2874 
2875 typedef struct vmem_seg_walk {
2876 	uint8_t vsw_type;
2877 	uintptr_t vsw_start;
2878 	uintptr_t vsw_current;
2879 } vmem_seg_walk_t;
2880 
2881 /*ARGSUSED*/
2882 int
2883 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2884 {
2885 	vmem_seg_walk_t *vsw;
2886 
2887 	if (wsp->walk_addr == NULL) {
2888 		mdb_warn("vmem_%s does not support global walks\n", name);
2889 		return (WALK_ERR);
2890 	}
2891 
2892 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2893 
2894 	vsw->vsw_type = type;
2895 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2896 	vsw->vsw_current = vsw->vsw_start;
2897 
2898 	return (WALK_NEXT);
2899 }
2900 
2901 /*
2902  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2903  */
2904 #define	VMEM_NONE	0
2905 
2906 int
2907 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2908 {
2909 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2910 }
2911 
2912 int
2913 vmem_free_walk_init(mdb_walk_state_t *wsp)
2914 {
2915 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2916 }
2917 
2918 int
2919 vmem_span_walk_init(mdb_walk_state_t *wsp)
2920 {
2921 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2922 }
2923 
2924 int
2925 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2926 {
2927 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2928 }
2929 
2930 int
2931 vmem_seg_walk_step(mdb_walk_state_t *wsp)
2932 {
2933 	vmem_seg_t seg;
2934 	vmem_seg_walk_t *vsw = wsp->walk_data;
2935 	uintptr_t addr = vsw->vsw_current;
2936 	static size_t seg_size = 0;
2937 	int rval;
2938 
2939 	if (!seg_size) {
2940 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
2941 			mdb_warn("failed to read 'vmem_seg_size'");
2942 			seg_size = sizeof (vmem_seg_t);
2943 		}
2944 	}
2945 
2946 	if (seg_size < sizeof (seg))
2947 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
2948 
2949 	if (mdb_vread(&seg, seg_size, addr) == -1) {
2950 		mdb_warn("couldn't read vmem_seg at %p", addr);
2951 		return (WALK_ERR);
2952 	}
2953 
2954 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
2955 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
2956 		rval = WALK_NEXT;
2957 	} else {
2958 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
2959 	}
2960 
2961 	if (vsw->vsw_current == vsw->vsw_start)
2962 		return (WALK_DONE);
2963 
2964 	return (rval);
2965 }
2966 
2967 void
2968 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
2969 {
2970 	vmem_seg_walk_t *vsw = wsp->walk_data;
2971 
2972 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
2973 }
2974 
2975 #define	VMEM_NAMEWIDTH	22
2976 
2977 int
2978 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2979 {
2980 	vmem_t v, parent;
2981 	uintptr_t paddr;
2982 	int ident = 0;
2983 	char c[VMEM_NAMEWIDTH];
2984 
2985 	if (!(flags & DCMD_ADDRSPEC)) {
2986 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
2987 			mdb_warn("can't walk vmem");
2988 			return (DCMD_ERR);
2989 		}
2990 		return (DCMD_OK);
2991 	}
2992 
2993 	if (DCMD_HDRSPEC(flags))
2994 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
2995 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
2996 		    "TOTAL", "SUCCEED", "FAIL");
2997 
2998 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
2999 		mdb_warn("couldn't read vmem at %p", addr);
3000 		return (DCMD_ERR);
3001 	}
3002 
3003 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3004 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3005 			mdb_warn("couldn't trace %p's ancestry", addr);
3006 			ident = 0;
3007 			break;
3008 		}
3009 		paddr = (uintptr_t)parent.vm_source;
3010 	}
3011 
3012 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3013 
3014 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3015 	    addr, VMEM_NAMEWIDTH, c,
3016 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3017 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3018 
3019 	return (DCMD_OK);
3020 }
3021 
3022 void
3023 vmem_seg_help(void)
3024 {
3025 	mdb_printf("%s\n",
3026 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3027 "\n"
3028 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3029 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3030 "information.\n");
3031 	mdb_dec_indent(2);
3032 	mdb_printf("%<b>OPTIONS%</b>\n");
3033 	mdb_inc_indent(2);
3034 	mdb_printf("%s",
3035 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3036 "  -s    report the size of the segment, instead of the end address\n"
3037 "  -c caller\n"
3038 "        filter out segments without the function/PC in their stack trace\n"
3039 "  -e earliest\n"
3040 "        filter out segments timestamped before earliest\n"
3041 "  -l latest\n"
3042 "        filter out segments timestamped after latest\n"
3043 "  -m minsize\n"
3044 "        filer out segments smaller than minsize\n"
3045 "  -M maxsize\n"
3046 "        filer out segments larger than maxsize\n"
3047 "  -t thread\n"
3048 "        filter out segments not involving thread\n"
3049 "  -T type\n"
3050 "        filter out segments not of type 'type'\n"
3051 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3052 }
3053 
3054 
3055 /*ARGSUSED*/
3056 int
3057 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3058 {
3059 	vmem_seg_t vs;
3060 	uintptr_t *stk = vs.vs_stack;
3061 	uintptr_t sz;
3062 	uint8_t t;
3063 	const char *type = NULL;
3064 	GElf_Sym sym;
3065 	char c[MDB_SYM_NAMLEN];
3066 	int no_debug;
3067 	int i;
3068 	int depth;
3069 	uintptr_t laddr, haddr;
3070 
3071 	uintptr_t caller = NULL, thread = NULL;
3072 	uintptr_t minsize = 0, maxsize = 0;
3073 
3074 	hrtime_t earliest = 0, latest = 0;
3075 
3076 	uint_t size = 0;
3077 	uint_t verbose = 0;
3078 
3079 	if (!(flags & DCMD_ADDRSPEC))
3080 		return (DCMD_USAGE);
3081 
3082 	if (mdb_getopts(argc, argv,
3083 	    'c', MDB_OPT_UINTPTR, &caller,
3084 	    'e', MDB_OPT_UINT64, &earliest,
3085 	    'l', MDB_OPT_UINT64, &latest,
3086 	    's', MDB_OPT_SETBITS, TRUE, &size,
3087 	    'm', MDB_OPT_UINTPTR, &minsize,
3088 	    'M', MDB_OPT_UINTPTR, &maxsize,
3089 	    't', MDB_OPT_UINTPTR, &thread,
3090 	    'T', MDB_OPT_STR, &type,
3091 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3092 	    NULL) != argc)
3093 		return (DCMD_USAGE);
3094 
3095 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3096 		if (verbose) {
3097 			mdb_printf("%16s %4s %16s %16s %16s\n"
3098 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3099 			    "ADDR", "TYPE", "START", "END", "SIZE",
3100 			    "", "", "THREAD", "TIMESTAMP", "");
3101 		} else {
3102 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3103 			    "START", size? "SIZE" : "END", "WHO");
3104 		}
3105 	}
3106 
3107 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3108 		mdb_warn("couldn't read vmem_seg at %p", addr);
3109 		return (DCMD_ERR);
3110 	}
3111 
3112 	if (type != NULL) {
3113 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3114 			t = VMEM_ALLOC;
3115 		else if (strcmp(type, "FREE") == 0)
3116 			t = VMEM_FREE;
3117 		else if (strcmp(type, "SPAN") == 0)
3118 			t = VMEM_SPAN;
3119 		else if (strcmp(type, "ROTR") == 0 ||
3120 		    strcmp(type, "ROTOR") == 0)
3121 			t = VMEM_ROTOR;
3122 		else if (strcmp(type, "WLKR") == 0 ||
3123 		    strcmp(type, "WALKER") == 0)
3124 			t = VMEM_WALKER;
3125 		else {
3126 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3127 			    type);
3128 			return (DCMD_ERR);
3129 		}
3130 
3131 		if (vs.vs_type != t)
3132 			return (DCMD_OK);
3133 	}
3134 
3135 	sz = vs.vs_end - vs.vs_start;
3136 
3137 	if (minsize != 0 && sz < minsize)
3138 		return (DCMD_OK);
3139 
3140 	if (maxsize != 0 && sz > maxsize)
3141 		return (DCMD_OK);
3142 
3143 	t = vs.vs_type;
3144 	depth = vs.vs_depth;
3145 
3146 	/*
3147 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3148 	 */
3149 	no_debug = (t != VMEM_ALLOC) ||
3150 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3151 
3152 	if (no_debug) {
3153 		if (caller != NULL || thread != NULL || earliest != 0 ||
3154 		    latest != 0)
3155 			return (DCMD_OK);		/* not enough info */
3156 	} else {
3157 		if (caller != NULL) {
3158 			laddr = caller;
3159 			haddr = caller + sizeof (caller);
3160 
3161 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3162 			    sizeof (c), &sym) != -1 &&
3163 			    caller == (uintptr_t)sym.st_value) {
3164 				/*
3165 				 * We were provided an exact symbol value; any
3166 				 * address in the function is valid.
3167 				 */
3168 				laddr = (uintptr_t)sym.st_value;
3169 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3170 			}
3171 
3172 			for (i = 0; i < depth; i++)
3173 				if (vs.vs_stack[i] >= laddr &&
3174 				    vs.vs_stack[i] < haddr)
3175 					break;
3176 
3177 			if (i == depth)
3178 				return (DCMD_OK);
3179 		}
3180 
3181 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3182 			return (DCMD_OK);
3183 
3184 		if (earliest != 0 && vs.vs_timestamp < earliest)
3185 			return (DCMD_OK);
3186 
3187 		if (latest != 0 && vs.vs_timestamp > latest)
3188 			return (DCMD_OK);
3189 	}
3190 
3191 	type = (t == VMEM_ALLOC ? "ALLC" :
3192 	    t == VMEM_FREE ? "FREE" :
3193 	    t == VMEM_SPAN ? "SPAN" :
3194 	    t == VMEM_ROTOR ? "ROTR" :
3195 	    t == VMEM_WALKER ? "WLKR" :
3196 	    "????");
3197 
3198 	if (flags & DCMD_PIPE_OUT) {
3199 		mdb_printf("%#r\n", addr);
3200 		return (DCMD_OK);
3201 	}
3202 
3203 	if (verbose) {
3204 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3205 		    addr, type, vs.vs_start, vs.vs_end, sz);
3206 
3207 		if (no_debug)
3208 			return (DCMD_OK);
3209 
3210 		mdb_printf("%16s %4s %16d %16llx\n",
3211 		    "", "", vs.vs_thread, vs.vs_timestamp);
3212 
3213 		mdb_inc_indent(17);
3214 		for (i = 0; i < depth; i++) {
3215 			mdb_printf("%a\n", stk[i]);
3216 		}
3217 		mdb_dec_indent(17);
3218 		mdb_printf("\n");
3219 	} else {
3220 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3221 		    vs.vs_start, size? sz : vs.vs_end);
3222 
3223 		if (no_debug) {
3224 			mdb_printf("\n");
3225 			return (DCMD_OK);
3226 		}
3227 
3228 		for (i = 0; i < depth; i++) {
3229 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3230 			    c, sizeof (c), &sym) == -1)
3231 				continue;
3232 			if (is_umem_sym(c, "vmem_"))
3233 				continue;
3234 			break;
3235 		}
3236 		mdb_printf(" %a\n", stk[i]);
3237 	}
3238 	return (DCMD_OK);
3239 }
3240 
3241 /*ARGSUSED*/
3242 static int
3243 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3244 {
3245 	char name[UMEM_CACHE_NAMELEN + 1];
3246 	hrtime_t delta;
3247 	int i, depth;
3248 
3249 	if (bcp->bc_timestamp == 0)
3250 		return (WALK_DONE);
3251 
3252 	if (*newest == 0)
3253 		*newest = bcp->bc_timestamp;
3254 
3255 	delta = *newest - bcp->bc_timestamp;
3256 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3257 
3258 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3259 	    &bcp->bc_cache->cache_name) <= 0)
3260 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3261 
3262 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3263 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3264 
3265 	for (i = 0; i < depth; i++)
3266 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3267 
3268 	return (WALK_NEXT);
3269 }
3270 
3271 int
3272 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3273 {
3274 	const char *logname = "umem_transaction_log";
3275 	hrtime_t newest = 0;
3276 
3277 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3278 		return (DCMD_USAGE);
3279 
3280 	if (argc > 0) {
3281 		if (argv->a_type != MDB_TYPE_STRING)
3282 			return (DCMD_USAGE);
3283 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3284 			logname = "umem_failure_log";
3285 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3286 			logname = "umem_slab_log";
3287 		else
3288 			return (DCMD_USAGE);
3289 	}
3290 
3291 	if (umem_readvar(&addr, logname) == -1) {
3292 		mdb_warn("failed to read %s log header pointer");
3293 		return (DCMD_ERR);
3294 	}
3295 
3296 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3297 		mdb_warn("failed to walk umem log");
3298 		return (DCMD_ERR);
3299 	}
3300 
3301 	return (DCMD_OK);
3302 }
3303 
3304 /*
3305  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3306  * The first piece is a structure which we use to accumulate umem_cache_t
3307  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3308  * walker; we either add all caches, or ones named explicitly as arguments.
3309  */
3310 
3311 typedef struct umclist {
3312 	const char *umc_name;			/* Name to match (or NULL) */
3313 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3314 	int umc_nelems;				/* Num entries in umc_caches */
3315 	int umc_size;				/* Size of umc_caches array */
3316 } umclist_t;
3317 
3318 static int
3319 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3320 {
3321 	void *p;
3322 	int s;
3323 
3324 	if (umc->umc_name == NULL ||
3325 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3326 		/*
3327 		 * If we have a match, grow our array (if necessary), and then
3328 		 * add the virtual address of the matching cache to our list.
3329 		 */
3330 		if (umc->umc_nelems >= umc->umc_size) {
3331 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3332 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3333 
3334 			bcopy(umc->umc_caches, p,
3335 			    sizeof (uintptr_t) * umc->umc_size);
3336 
3337 			umc->umc_caches = p;
3338 			umc->umc_size = s;
3339 		}
3340 
3341 		umc->umc_caches[umc->umc_nelems++] = addr;
3342 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3343 	}
3344 
3345 	return (WALK_NEXT);
3346 }
3347 
3348 /*
3349  * The second piece of ::umausers is a hash table of allocations.  Each
3350  * allocation owner is identified by its stack trace and data_size.  We then
3351  * track the total bytes of all such allocations, and the number of allocations
3352  * to report at the end.  Once we have a list of caches, we walk through the
3353  * allocated bufctls of each, and update our hash table accordingly.
3354  */
3355 
3356 typedef struct umowner {
3357 	struct umowner *umo_head;		/* First hash elt in bucket */
3358 	struct umowner *umo_next;		/* Next hash elt in chain */
3359 	size_t umo_signature;			/* Hash table signature */
3360 	uint_t umo_num;				/* Number of allocations */
3361 	size_t umo_data_size;			/* Size of each allocation */
3362 	size_t umo_total_size;			/* Total bytes of allocation */
3363 	int umo_depth;				/* Depth of stack trace */
3364 	uintptr_t *umo_stack;			/* Stack trace */
3365 } umowner_t;
3366 
3367 typedef struct umusers {
3368 	const umem_cache_t *umu_cache;		/* Current umem cache */
3369 	umowner_t *umu_hash;			/* Hash table of owners */
3370 	uintptr_t *umu_stacks;			/* stacks for owners */
3371 	int umu_nelems;				/* Number of entries in use */
3372 	int umu_size;				/* Total number of entries */
3373 } umusers_t;
3374 
3375 static void
3376 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3377     size_t size, size_t data_size)
3378 {
3379 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3380 	size_t bucket, signature = data_size;
3381 	umowner_t *umo, *umoend;
3382 
3383 	/*
3384 	 * If the hash table is full, double its size and rehash everything.
3385 	 */
3386 	if (umu->umu_nelems >= umu->umu_size) {
3387 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3388 		size_t umowner_size = sizeof (umowner_t);
3389 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3390 		uintptr_t *new_stacks;
3391 
3392 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3393 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3394 
3395 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3396 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3397 		umu->umu_hash = umo;
3398 		umu->umu_stacks = new_stacks;
3399 		umu->umu_size = s;
3400 
3401 		umoend = umu->umu_hash + umu->umu_size;
3402 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3403 			umo->umo_head = NULL;
3404 			umo->umo_stack = &umu->umu_stacks[
3405 			    umem_stack_depth * (umo - umu->umu_hash)];
3406 		}
3407 
3408 		umoend = umu->umu_hash + umu->umu_nelems;
3409 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3410 			bucket = umo->umo_signature & (umu->umu_size - 1);
3411 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3412 			umu->umu_hash[bucket].umo_head = umo;
3413 		}
3414 	}
3415 
3416 	/*
3417 	 * Finish computing the hash signature from the stack trace, and then
3418 	 * see if the owner is in the hash table.  If so, update our stats.
3419 	 */
3420 	for (i = 0; i < depth; i++)
3421 		signature += bcp->bc_stack[i];
3422 
3423 	bucket = signature & (umu->umu_size - 1);
3424 
3425 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3426 		if (umo->umo_signature == signature) {
3427 			size_t difference = 0;
3428 
3429 			difference |= umo->umo_data_size - data_size;
3430 			difference |= umo->umo_depth - depth;
3431 
3432 			for (i = 0; i < depth; i++) {
3433 				difference |= umo->umo_stack[i] -
3434 				    bcp->bc_stack[i];
3435 			}
3436 
3437 			if (difference == 0) {
3438 				umo->umo_total_size += size;
3439 				umo->umo_num++;
3440 				return;
3441 			}
3442 		}
3443 	}
3444 
3445 	/*
3446 	 * If the owner is not yet hashed, grab the next element and fill it
3447 	 * in based on the allocation information.
3448 	 */
3449 	umo = &umu->umu_hash[umu->umu_nelems++];
3450 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3451 	umu->umu_hash[bucket].umo_head = umo;
3452 
3453 	umo->umo_signature = signature;
3454 	umo->umo_num = 1;
3455 	umo->umo_data_size = data_size;
3456 	umo->umo_total_size = size;
3457 	umo->umo_depth = depth;
3458 
3459 	for (i = 0; i < depth; i++)
3460 		umo->umo_stack[i] = bcp->bc_stack[i];
3461 }
3462 
3463 /*
3464  * When ::umausers is invoked without the -f flag, we simply update our hash
3465  * table with the information from each allocated bufctl.
3466  */
3467 /*ARGSUSED*/
3468 static int
3469 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3470 {
3471 	const umem_cache_t *cp = umu->umu_cache;
3472 
3473 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3474 	return (WALK_NEXT);
3475 }
3476 
3477 /*
3478  * When ::umausers is invoked with the -f flag, we print out the information
3479  * for each bufctl as well as updating the hash table.
3480  */
3481 static int
3482 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3483 {
3484 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3485 	const umem_cache_t *cp = umu->umu_cache;
3486 
3487 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3488 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3489 
3490 	for (i = 0; i < depth; i++)
3491 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3492 
3493 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3494 	return (WALK_NEXT);
3495 }
3496 
3497 /*
3498  * We sort our results by allocation size before printing them.
3499  */
3500 static int
3501 umownercmp(const void *lp, const void *rp)
3502 {
3503 	const umowner_t *lhs = lp;
3504 	const umowner_t *rhs = rp;
3505 
3506 	return (rhs->umo_total_size - lhs->umo_total_size);
3507 }
3508 
3509 /*
3510  * The main engine of ::umausers is relatively straightforward: First we
3511  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3512  * iterate over the allocated bufctls of each cache in the list.  Finally,
3513  * we sort and print our results.
3514  */
3515 /*ARGSUSED*/
3516 int
3517 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3518 {
3519 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3520 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3521 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3522 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3523 	int opt_e = FALSE;		/* Include "small" users */
3524 	int opt_f = FALSE;		/* Print stack traces */
3525 
3526 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3527 	umowner_t *umo, *umoend;
3528 	int i, oelems;
3529 
3530 	umclist_t umc;
3531 	umusers_t umu;
3532 
3533 	if (flags & DCMD_ADDRSPEC)
3534 		return (DCMD_USAGE);
3535 
3536 	bzero(&umc, sizeof (umc));
3537 	bzero(&umu, sizeof (umu));
3538 
3539 	while ((i = mdb_getopts(argc, argv,
3540 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3541 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3542 
3543 		argv += i;	/* skip past options we just processed */
3544 		argc -= i;	/* adjust argc */
3545 
3546 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3547 			return (DCMD_USAGE);
3548 
3549 		oelems = umc.umc_nelems;
3550 		umc.umc_name = argv->a_un.a_str;
3551 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3552 
3553 		if (umc.umc_nelems == oelems) {
3554 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3555 			return (DCMD_ERR);
3556 		}
3557 
3558 		do_all_caches = 0;
3559 		argv++;
3560 		argc--;
3561 	}
3562 
3563 	if (opt_e)
3564 		mem_threshold = cnt_threshold = 0;
3565 
3566 	if (opt_f)
3567 		callback = (mdb_walk_cb_t)umause2;
3568 
3569 	if (do_all_caches) {
3570 		umc.umc_name = NULL; /* match all cache names */
3571 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3572 	}
3573 
3574 	for (i = 0; i < umc.umc_nelems; i++) {
3575 		uintptr_t cp = umc.umc_caches[i];
3576 		umem_cache_t c;
3577 
3578 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3579 			mdb_warn("failed to read cache at %p", cp);
3580 			continue;
3581 		}
3582 
3583 		if (!(c.cache_flags & UMF_AUDIT)) {
3584 			if (!do_all_caches) {
3585 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3586 				    c.cache_name);
3587 			}
3588 			continue;
3589 		}
3590 
3591 		umu.umu_cache = &c;
3592 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3593 		audited_caches++;
3594 	}
3595 
3596 	if (audited_caches == 0 && do_all_caches) {
3597 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3598 		return (DCMD_ERR);
3599 	}
3600 
3601 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3602 	umoend = umu.umu_hash + umu.umu_nelems;
3603 
3604 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3605 		if (umo->umo_total_size < mem_threshold &&
3606 		    umo->umo_num < cnt_threshold)
3607 			continue;
3608 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3609 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3610 		for (i = 0; i < umo->umo_depth; i++)
3611 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3612 	}
3613 
3614 	return (DCMD_OK);
3615 }
3616 
3617 struct malloc_data {
3618 	uint32_t malloc_size;
3619 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3620 };
3621 
3622 #ifdef _LP64
3623 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3624 #else
3625 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3626 #endif
3627 
3628 typedef struct umem_malloc_info {
3629 	size_t um_total;	/* total allocated buffers */
3630 	size_t um_malloc;	/* malloc buffers */
3631 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3632 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3633 
3634 	umem_cache_t *um_cp;
3635 
3636 	uint_t *um_bucket;
3637 } umem_malloc_info_t;
3638 
3639 static void
3640 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3641     size_t maxbuckets, size_t minbucketsize, int geometric)
3642 {
3643 	uint64_t um_malloc;
3644 	int minb = -1;
3645 	int maxb = -1;
3646 	int buckets;
3647 	int nbucks;
3648 	int i;
3649 	int b;
3650 	const int *distarray;
3651 
3652 	minb = (int)minmalloc;
3653 	maxb = (int)maxmalloc;
3654 
3655 	nbucks = buckets = maxb - minb + 1;
3656 
3657 	um_malloc = 0;
3658 	for (b = minb; b <= maxb; b++)
3659 		um_malloc += um_bucket[b];
3660 
3661 	if (maxbuckets != 0)
3662 		buckets = MIN(buckets, maxbuckets);
3663 
3664 	if (minbucketsize > 1) {
3665 		buckets = MIN(buckets, nbucks/minbucketsize);
3666 		if (buckets == 0) {
3667 			buckets = 1;
3668 			minbucketsize = nbucks;
3669 		}
3670 	}
3671 
3672 	if (geometric)
3673 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3674 	else
3675 		distarray = dist_linear(buckets, minb, maxb);
3676 
3677 	dist_print_header("malloc size", 11, "count");
3678 	for (i = 0; i < buckets; i++) {
3679 		dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3680 	}
3681 	mdb_printf("\n");
3682 }
3683 
3684 /*
3685  * A malloc()ed buffer looks like:
3686  *
3687  *	<----------- mi.malloc_size --->
3688  *	<----------- cp.cache_bufsize ------------------>
3689  *	<----------- cp.cache_chunksize -------------------------------->
3690  *	+-------+-----------------------+---------------+---------------+
3691  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3692  *	+-------+---------------------------------------+---------------+
3693  *		<-- usable space ------>
3694  *
3695  * mallocsz is the argument to malloc(3C).
3696  * mi.malloc_size is the actual size passed to umem_alloc(), which
3697  * is rounded up to the smallest available cache size, which is
3698  * cache_bufsize.  If there is debugging or alignment overhead in
3699  * the cache, that is reflected in a larger cache_chunksize.
3700  *
3701  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3702  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3703  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3704  * the tag has 8 bytes of padding before it.
3705  *
3706  * 32-byte, 64-byte buffers <= 8 bytes:
3707  *	+-------+-------+--------- ...
3708  *	|/size//|/stat//| mallocsz ...
3709  *	+-------+-------+--------- ...
3710  *			^
3711  *			pointer returned from malloc(3C)
3712  *
3713  * 64-byte buffers > 8 bytes:
3714  *	+---------------+-------+-------+--------- ...
3715  *	|/padding///////|/size//|/stat//| mallocsz ...
3716  *	+---------------+-------+-------+--------- ...
3717  *					^
3718  *					pointer returned from malloc(3C)
3719  *
3720  * The "size" field is "malloc_size", which is mallocsz + the padding.
3721  * The "stat" field is derived from malloc_size, and functions as a
3722  * validation that this buffer is actually from malloc(3C).
3723  */
3724 /*ARGSUSED*/
3725 static int
3726 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3727 {
3728 	struct malloc_data md;
3729 	size_t m_addr = addr;
3730 	size_t overhead = sizeof (md);
3731 	size_t mallocsz;
3732 
3733 	ump->um_total++;
3734 
3735 #ifdef _LP64
3736 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3737 		m_addr += overhead;
3738 		overhead += sizeof (md);
3739 	}
3740 #endif
3741 
3742 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3743 		mdb_warn("unable to read malloc header at %p", m_addr);
3744 		return (WALK_NEXT);
3745 	}
3746 
3747 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3748 	case MALLOC_MAGIC:
3749 #ifdef _LP64
3750 	case MALLOC_SECOND_MAGIC:
3751 #endif
3752 		mallocsz = md.malloc_size - overhead;
3753 
3754 		ump->um_malloc++;
3755 		ump->um_malloc_size += mallocsz;
3756 		ump->um_malloc_overhead += overhead;
3757 
3758 		/* include round-off and debug overhead */
3759 		ump->um_malloc_overhead +=
3760 		    ump->um_cp->cache_chunksize - md.malloc_size;
3761 
3762 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
3763 			ump->um_bucket[mallocsz]++;
3764 
3765 		break;
3766 	default:
3767 		break;
3768 	}
3769 
3770 	return (WALK_NEXT);
3771 }
3772 
3773 int
3774 get_umem_alloc_sizes(int **out, size_t *out_num)
3775 {
3776 	GElf_Sym sym;
3777 
3778 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
3779 		mdb_warn("unable to look up umem_alloc_sizes");
3780 		return (-1);
3781 	}
3782 
3783 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
3784 	*out_num = sym.st_size / sizeof (int);
3785 
3786 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
3787 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
3788 		*out = NULL;
3789 		return (-1);
3790 	}
3791 
3792 	return (0);
3793 }
3794 
3795 
3796 static int
3797 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
3798 {
3799 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
3800 		return (WALK_NEXT);
3801 
3802 	ump->um_cp = cp;
3803 
3804 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
3805 	    -1) {
3806 		mdb_warn("can't walk 'umem' for cache %p", addr);
3807 		return (WALK_ERR);
3808 	}
3809 
3810 	return (WALK_NEXT);
3811 }
3812 
3813 void
3814 umem_malloc_dist_help(void)
3815 {
3816 	mdb_printf("%s\n",
3817 	    "report distribution of outstanding malloc()s");
3818 	mdb_dec_indent(2);
3819 	mdb_printf("%<b>OPTIONS%</b>\n");
3820 	mdb_inc_indent(2);
3821 	mdb_printf("%s",
3822 "  -b maxbins\n"
3823 "        Use at most maxbins bins for the data\n"
3824 "  -B minbinsize\n"
3825 "        Make the bins at least minbinsize bytes apart\n"
3826 "  -d    dump the raw data out, without binning\n"
3827 "  -g    use geometric binning instead of linear binning\n");
3828 }
3829 
3830 /*ARGSUSED*/
3831 int
3832 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3833 {
3834 	umem_malloc_info_t mi;
3835 	uint_t geometric = 0;
3836 	uint_t dump = 0;
3837 	size_t maxbuckets = 0;
3838 	size_t minbucketsize = 0;
3839 
3840 	size_t minalloc = 0;
3841 	size_t maxalloc = UMI_MAX_BUCKET;
3842 
3843 	if (flags & DCMD_ADDRSPEC)
3844 		return (DCMD_USAGE);
3845 
3846 	if (mdb_getopts(argc, argv,
3847 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3848 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3849 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3850 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3851 	    0) != argc)
3852 		return (DCMD_USAGE);
3853 
3854 	bzero(&mi, sizeof (mi));
3855 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
3856 	    UM_SLEEP | UM_GC);
3857 
3858 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
3859 	    &mi) == -1) {
3860 		mdb_warn("unable to walk 'umem_cache'");
3861 		return (DCMD_ERR);
3862 	}
3863 
3864 	if (dump) {
3865 		int i;
3866 		for (i = minalloc; i <= maxalloc; i++)
3867 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
3868 
3869 		return (DCMD_OK);
3870 	}
3871 
3872 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
3873 	    maxbuckets, minbucketsize, geometric);
3874 
3875 	return (DCMD_OK);
3876 }
3877 
3878 void
3879 umem_malloc_info_help(void)
3880 {
3881 	mdb_printf("%s\n",
3882 	    "report information about malloc()s by cache.  ");
3883 	mdb_dec_indent(2);
3884 	mdb_printf("%<b>OPTIONS%</b>\n");
3885 	mdb_inc_indent(2);
3886 	mdb_printf("%s",
3887 "  -b maxbins\n"
3888 "        Use at most maxbins bins for the data\n"
3889 "  -B minbinsize\n"
3890 "        Make the bins at least minbinsize bytes apart\n"
3891 "  -d    dump the raw distribution data without binning\n"
3892 #ifndef _KMDB
3893 "  -g    use geometric binning instead of linear binning\n"
3894 #endif
3895 	    "");
3896 }
3897 int
3898 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3899 {
3900 	umem_cache_t c;
3901 	umem_malloc_info_t mi;
3902 
3903 	int skip = 0;
3904 
3905 	size_t maxmalloc;
3906 	size_t overhead;
3907 	size_t allocated;
3908 	size_t avg_malloc;
3909 	size_t overhead_pct;	/* 1000 * overhead_percent */
3910 
3911 	uint_t verbose = 0;
3912 	uint_t dump = 0;
3913 	uint_t geometric = 0;
3914 	size_t maxbuckets = 0;
3915 	size_t minbucketsize = 0;
3916 
3917 	int *alloc_sizes;
3918 	int idx;
3919 	size_t num;
3920 	size_t minmalloc;
3921 
3922 	if (mdb_getopts(argc, argv,
3923 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3924 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3925 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3926 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3927 	    0) != argc)
3928 		return (DCMD_USAGE);
3929 
3930 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
3931 		verbose = 1;
3932 
3933 	if (!(flags & DCMD_ADDRSPEC)) {
3934 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
3935 		    argc, argv) == -1) {
3936 			mdb_warn("can't walk umem_cache");
3937 			return (DCMD_ERR);
3938 		}
3939 		return (DCMD_OK);
3940 	}
3941 
3942 	if (!mdb_vread(&c, sizeof (c), addr)) {
3943 		mdb_warn("unable to read cache at %p", addr);
3944 		return (DCMD_ERR);
3945 	}
3946 
3947 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
3948 		if (!(flags & DCMD_LOOP))
3949 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
3950 			    "by malloc()\n", c.cache_name);
3951 		skip = 1;
3952 	}
3953 
3954 	/*
3955 	 * normally, print the header only the first time.  In verbose mode,
3956 	 * print the header on every non-skipped buffer
3957 	 */
3958 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
3959 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
3960 		    "CACHE", "BUFSZ", "MAXMAL",
3961 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
3962 
3963 	if (skip)
3964 		return (DCMD_OK);
3965 
3966 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
3967 #ifdef _LP64
3968 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
3969 		maxmalloc -= sizeof (struct malloc_data);
3970 #endif
3971 
3972 	bzero(&mi, sizeof (mi));
3973 	mi.um_cp = &c;
3974 	if (verbose)
3975 		mi.um_bucket =
3976 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
3977 		    UM_SLEEP | UM_GC);
3978 
3979 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
3980 	    -1) {
3981 		mdb_warn("can't walk 'umem'");
3982 		return (DCMD_ERR);
3983 	}
3984 
3985 	overhead = mi.um_malloc_overhead;
3986 	allocated = mi.um_malloc_size;
3987 
3988 	/* do integer round off for the average */
3989 	if (mi.um_malloc != 0)
3990 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
3991 	else
3992 		avg_malloc = 0;
3993 
3994 	/*
3995 	 * include per-slab overhead
3996 	 *
3997 	 * Each slab in a given cache is the same size, and has the same
3998 	 * number of chunks in it;  we read in the first slab on the
3999 	 * slab list to get the number of chunks for all slabs.  To
4000 	 * compute the per-slab overhead, we just subtract the chunk usage
4001 	 * from the slabsize:
4002 	 *
4003 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4004 	 * |////////////|	|	| ...	|	|///////|///////|
4005 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4006 	 * |////////////|	|	| ...	|	|///////|///////|
4007 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4008 	 * |		\_______chunksize * chunks_____/		|
4009 	 * \__________________________slabsize__________________________/
4010 	 *
4011 	 * For UMF_HASH caches, there is an additional source of overhead;
4012 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4013 	 * include those in our per-slab overhead.
4014 	 *
4015 	 * Once we have a number for the per-slab overhead, we estimate
4016 	 * the actual overhead by treating the malloc()ed buffers as if
4017 	 * they were densely packed:
4018 	 *
4019 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4020 	 *
4021 	 * carefully ordering the multiply before the divide, to avoid
4022 	 * round-off error.
4023 	 */
4024 	if (mi.um_malloc != 0) {
4025 		umem_slab_t slab;
4026 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4027 
4028 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4029 			mdb_warn("unable to read slab at %p\n", saddr);
4030 		} else {
4031 			long chunks = slab.slab_chunks;
4032 			if (chunks != 0 && c.cache_chunksize != 0 &&
4033 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4034 				uintmax_t perslab =
4035 				    c.cache_slabsize -
4036 				    (c.cache_chunksize * chunks);
4037 
4038 				if (c.cache_flags & UMF_HASH) {
4039 					perslab += sizeof (umem_slab_t) +
4040 					    chunks *
4041 					    ((c.cache_flags & UMF_AUDIT) ?
4042 					    sizeof (umem_bufctl_audit_t) :
4043 					    sizeof (umem_bufctl_t));
4044 				}
4045 				overhead +=
4046 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4047 			} else {
4048 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4049 				    chunks, saddr);
4050 			}
4051 		}
4052 	}
4053 
4054 	if (allocated != 0)
4055 		overhead_pct = (1000ULL * overhead) / allocated;
4056 	else
4057 		overhead_pct = 0;
4058 
4059 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4060 	    addr, c.cache_bufsize, maxmalloc,
4061 	    mi.um_malloc, avg_malloc, allocated, overhead,
4062 	    overhead_pct / 10, overhead_pct % 10);
4063 
4064 	if (!verbose)
4065 		return (DCMD_OK);
4066 
4067 	if (!dump)
4068 		mdb_printf("\n");
4069 
4070 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4071 		return (DCMD_ERR);
4072 
4073 	for (idx = 0; idx < num; idx++) {
4074 		if (alloc_sizes[idx] == c.cache_bufsize)
4075 			break;
4076 		if (alloc_sizes[idx] == 0) {
4077 			idx = num;	/* 0-terminated array */
4078 			break;
4079 		}
4080 	}
4081 	if (idx == num) {
4082 		mdb_warn(
4083 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4084 		    addr, c.cache_bufsize);
4085 		return (DCMD_ERR);
4086 	}
4087 
4088 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4089 	if (minmalloc > 0) {
4090 #ifdef _LP64
4091 		if (minmalloc > UMEM_SECOND_ALIGN)
4092 			minmalloc -= sizeof (struct malloc_data);
4093 #endif
4094 		minmalloc -= sizeof (struct malloc_data);
4095 		minmalloc += 1;
4096 	}
4097 
4098 	if (dump) {
4099 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4100 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4101 		mdb_printf("\n");
4102 	} else {
4103 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4104 		    maxbuckets, minbucketsize, geometric);
4105 	}
4106 
4107 	return (DCMD_OK);
4108 }
4109