xref: /titanic_51/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision d33341fb88062a3afe7066acda297c3a1959176a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2011 Joyent, Inc.  All rights reserved.
28  * Copyright (c) 2013 by Delphix. All rights reserved.
29  */
30 
31 #include "umem.h"
32 
33 #include <sys/vmem_impl_user.h>
34 #include <umem_impl.h>
35 
36 #include <alloca.h>
37 #include <limits.h>
38 #include <mdb/mdb_whatis.h>
39 
40 #include "misc.h"
41 #include "leaky.h"
42 #include "dist.h"
43 
44 #include "umem_pagesize.h"
45 
46 #define	UM_ALLOCATED		0x1
47 #define	UM_FREE			0x2
48 #define	UM_BUFCTL		0x4
49 #define	UM_HASH			0x8
50 
51 int umem_ready;
52 
53 static int umem_stack_depth_warned;
54 static uint32_t umem_max_ncpus;
55 uint32_t umem_stack_depth;
56 
57 size_t umem_pagesize;
58 
59 #define	UMEM_READVAR(var)				\
60 	(umem_readvar(&(var), #var) == -1 &&		\
61 	    (mdb_warn("failed to read "#var), 1))
62 
63 int
64 umem_update_variables(void)
65 {
66 	size_t pagesize;
67 
68 	/*
69 	 * Figure out which type of umem is being used; if it's not there
70 	 * yet, succeed quietly.
71 	 */
72 	if (umem_set_standalone() == -1) {
73 		umem_ready = 0;
74 		return (0);		/* umem not there yet */
75 	}
76 
77 	/*
78 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
79 	 * cheap backwards compatibility to check for both names.
80 	 */
81 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
82 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
83 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
84 		return (-1);
85 	}
86 	if (UMEM_READVAR(umem_ready))
87 		return (-1);
88 	if (UMEM_READVAR(umem_stack_depth))
89 		return (-1);
90 	if (UMEM_READVAR(pagesize))
91 		return (-1);
92 
93 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
94 		if (umem_stack_depth_warned == 0) {
95 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
96 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
97 			umem_stack_depth_warned = 1;
98 		}
99 		umem_stack_depth = 0;
100 	}
101 
102 	umem_pagesize = pagesize;
103 
104 	return (0);
105 }
106 
107 /*ARGSUSED*/
108 static int
109 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
110 {
111 	mdb_walker_t w;
112 	char descr[64];
113 
114 	(void) mdb_snprintf(descr, sizeof (descr),
115 	    "walk the %s cache", c->cache_name);
116 
117 	w.walk_name = c->cache_name;
118 	w.walk_descr = descr;
119 	w.walk_init = umem_walk_init;
120 	w.walk_step = umem_walk_step;
121 	w.walk_fini = umem_walk_fini;
122 	w.walk_init_arg = (void *)addr;
123 
124 	if (mdb_add_walker(&w) == -1)
125 		mdb_warn("failed to add %s walker", c->cache_name);
126 
127 	return (WALK_NEXT);
128 }
129 
130 /*ARGSUSED*/
131 static void
132 umem_statechange_cb(void *arg)
133 {
134 	static int been_ready = 0;
135 
136 #ifndef _KMDB
137 	leaky_cleanup(1);	/* state changes invalidate leaky state */
138 #endif
139 
140 	if (umem_update_variables() == -1)
141 		return;
142 
143 	if (been_ready)
144 		return;
145 
146 	if (umem_ready != UMEM_READY)
147 		return;
148 
149 	been_ready = 1;
150 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL);
151 }
152 
153 int
154 umem_abort_messages(void)
155 {
156 	char *umem_error_buffer;
157 	uint_t umem_error_begin;
158 	GElf_Sym sym;
159 	size_t bufsize;
160 
161 	if (UMEM_READVAR(umem_error_begin))
162 		return (DCMD_ERR);
163 
164 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
165 		mdb_warn("unable to look up umem_error_buffer");
166 		return (DCMD_ERR);
167 	}
168 
169 	bufsize = (size_t)sym.st_size;
170 
171 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
172 
173 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
174 	    != bufsize) {
175 		mdb_warn("unable to read umem_error_buffer");
176 		return (DCMD_ERR);
177 	}
178 	/* put a zero after the end of the buffer to simplify printing */
179 	umem_error_buffer[bufsize] = 0;
180 
181 	if ((umem_error_begin % bufsize) == 0)
182 		mdb_printf("%s\n", umem_error_buffer);
183 	else {
184 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
185 		mdb_printf("%s%s\n",
186 		    &umem_error_buffer[umem_error_begin % bufsize],
187 		    umem_error_buffer);
188 	}
189 
190 	return (DCMD_OK);
191 }
192 
193 static void
194 umem_log_status(const char *name, umem_log_header_t *val)
195 {
196 	umem_log_header_t my_lh;
197 	uintptr_t pos = (uintptr_t)val;
198 	size_t size;
199 
200 	if (pos == NULL)
201 		return;
202 
203 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
204 		mdb_warn("\nunable to read umem_%s_log pointer %p",
205 		    name, pos);
206 		return;
207 	}
208 
209 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
210 
211 	if (size % (1024 * 1024) == 0)
212 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
213 	else if (size % 1024 == 0)
214 		mdb_printf("%s=%dk ", name, size / 1024);
215 	else
216 		mdb_printf("%s=%d ", name, size);
217 }
218 
219 typedef struct umem_debug_flags {
220 	const char	*udf_name;
221 	uint_t		udf_flags;
222 	uint_t		udf_clear;	/* if 0, uses udf_flags */
223 } umem_debug_flags_t;
224 
225 umem_debug_flags_t umem_status_flags[] = {
226 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
227 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
228 	{ "audit",	UMF_AUDIT },
229 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
230 	{ "nosignal",	UMF_CHECKSIGNAL },
231 	{ "firewall",	UMF_FIREWALL },
232 	{ "lite",	UMF_LITE },
233 	{ NULL }
234 };
235 
236 /*ARGSUSED*/
237 int
238 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
239 {
240 	int umem_logging;
241 
242 	umem_log_header_t *umem_transaction_log;
243 	umem_log_header_t *umem_content_log;
244 	umem_log_header_t *umem_failure_log;
245 	umem_log_header_t *umem_slab_log;
246 
247 	mdb_printf("Status:\t\t%s\n",
248 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
249 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
250 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
251 	    umem_ready == UMEM_READY ? "ready and active" :
252 	    umem_ready == 0 ? "not loaded into address space" :
253 	    "unknown (umem_ready invalid)");
254 
255 	if (umem_ready == 0)
256 		return (DCMD_OK);
257 
258 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
259 
260 	if (UMEM_READVAR(umem_logging))
261 		goto err;
262 	if (UMEM_READVAR(umem_transaction_log))
263 		goto err;
264 	if (UMEM_READVAR(umem_content_log))
265 		goto err;
266 	if (UMEM_READVAR(umem_failure_log))
267 		goto err;
268 	if (UMEM_READVAR(umem_slab_log))
269 		goto err;
270 
271 	mdb_printf("Logs:\t\t");
272 	umem_log_status("transaction", umem_transaction_log);
273 	umem_log_status("content", umem_content_log);
274 	umem_log_status("fail", umem_failure_log);
275 	umem_log_status("slab", umem_slab_log);
276 	if (!umem_logging)
277 		mdb_printf("(inactive)");
278 	mdb_printf("\n");
279 
280 	mdb_printf("Message buffer:\n");
281 	return (umem_abort_messages());
282 
283 err:
284 	mdb_printf("Message buffer:\n");
285 	(void) umem_abort_messages();
286 	return (DCMD_ERR);
287 }
288 
289 typedef struct {
290 	uintptr_t ucw_first;
291 	uintptr_t ucw_current;
292 } umem_cache_walk_t;
293 
294 int
295 umem_cache_walk_init(mdb_walk_state_t *wsp)
296 {
297 	umem_cache_walk_t *ucw;
298 	umem_cache_t c;
299 	uintptr_t cp;
300 	GElf_Sym sym;
301 
302 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
303 		mdb_warn("couldn't find umem_null_cache");
304 		return (WALK_ERR);
305 	}
306 
307 	cp = (uintptr_t)sym.st_value;
308 
309 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
310 		mdb_warn("couldn't read cache at %p", cp);
311 		return (WALK_ERR);
312 	}
313 
314 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
315 
316 	ucw->ucw_first = cp;
317 	ucw->ucw_current = (uintptr_t)c.cache_next;
318 	wsp->walk_data = ucw;
319 
320 	return (WALK_NEXT);
321 }
322 
323 int
324 umem_cache_walk_step(mdb_walk_state_t *wsp)
325 {
326 	umem_cache_walk_t *ucw = wsp->walk_data;
327 	umem_cache_t c;
328 	int status;
329 
330 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
331 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
332 		return (WALK_DONE);
333 	}
334 
335 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
336 
337 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
338 		return (WALK_DONE);
339 
340 	return (status);
341 }
342 
343 void
344 umem_cache_walk_fini(mdb_walk_state_t *wsp)
345 {
346 	umem_cache_walk_t *ucw = wsp->walk_data;
347 	mdb_free(ucw, sizeof (umem_cache_walk_t));
348 }
349 
350 typedef struct {
351 	umem_cpu_t *ucw_cpus;
352 	uint32_t ucw_current;
353 	uint32_t ucw_max;
354 } umem_cpu_walk_state_t;
355 
356 int
357 umem_cpu_walk_init(mdb_walk_state_t *wsp)
358 {
359 	umem_cpu_t *umem_cpus;
360 
361 	umem_cpu_walk_state_t *ucw;
362 
363 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
364 		mdb_warn("failed to read 'umem_cpus'");
365 		return (WALK_ERR);
366 	}
367 
368 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
369 
370 	ucw->ucw_cpus = umem_cpus;
371 	ucw->ucw_current = 0;
372 	ucw->ucw_max = umem_max_ncpus;
373 
374 	wsp->walk_data = ucw;
375 	return (WALK_NEXT);
376 }
377 
378 int
379 umem_cpu_walk_step(mdb_walk_state_t *wsp)
380 {
381 	umem_cpu_t cpu;
382 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
383 
384 	uintptr_t caddr;
385 
386 	if (ucw->ucw_current >= ucw->ucw_max)
387 		return (WALK_DONE);
388 
389 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
390 
391 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
392 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
393 		return (WALK_ERR);
394 	}
395 
396 	ucw->ucw_current++;
397 
398 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
399 }
400 
401 void
402 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
403 {
404 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
405 
406 	mdb_free(ucw, sizeof (*ucw));
407 }
408 
409 int
410 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
411 {
412 	if (wsp->walk_addr == NULL) {
413 		mdb_warn("umem_cpu_cache doesn't support global walks");
414 		return (WALK_ERR);
415 	}
416 
417 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
418 		mdb_warn("couldn't walk 'umem_cpu'");
419 		return (WALK_ERR);
420 	}
421 
422 	wsp->walk_data = (void *)wsp->walk_addr;
423 
424 	return (WALK_NEXT);
425 }
426 
427 int
428 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
429 {
430 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
431 	const umem_cpu_t *cpu = wsp->walk_layer;
432 	umem_cpu_cache_t cc;
433 
434 	caddr += cpu->cpu_cache_offset;
435 
436 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
437 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
438 		return (WALK_ERR);
439 	}
440 
441 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
442 }
443 
444 int
445 umem_slab_walk_init(mdb_walk_state_t *wsp)
446 {
447 	uintptr_t caddr = wsp->walk_addr;
448 	umem_cache_t c;
449 
450 	if (caddr == NULL) {
451 		mdb_warn("umem_slab doesn't support global walks\n");
452 		return (WALK_ERR);
453 	}
454 
455 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
456 		mdb_warn("couldn't read umem_cache at %p", caddr);
457 		return (WALK_ERR);
458 	}
459 
460 	wsp->walk_data =
461 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
462 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
463 
464 	return (WALK_NEXT);
465 }
466 
467 int
468 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
469 {
470 	uintptr_t caddr = wsp->walk_addr;
471 	umem_cache_t c;
472 
473 	if (caddr == NULL) {
474 		mdb_warn("umem_slab_partial doesn't support global walks\n");
475 		return (WALK_ERR);
476 	}
477 
478 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
479 		mdb_warn("couldn't read umem_cache at %p", caddr);
480 		return (WALK_ERR);
481 	}
482 
483 	wsp->walk_data =
484 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
485 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
486 
487 	/*
488 	 * Some consumers (umem_walk_step(), in particular) require at
489 	 * least one callback if there are any buffers in the cache.  So
490 	 * if there are *no* partial slabs, report the last full slab, if
491 	 * any.
492 	 *
493 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
494 	 */
495 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
496 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
497 
498 	return (WALK_NEXT);
499 }
500 
501 int
502 umem_slab_walk_step(mdb_walk_state_t *wsp)
503 {
504 	umem_slab_t s;
505 	uintptr_t addr = wsp->walk_addr;
506 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
507 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
508 
509 	if (addr == saddr)
510 		return (WALK_DONE);
511 
512 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
513 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
514 		return (WALK_ERR);
515 	}
516 
517 	if ((uintptr_t)s.slab_cache != caddr) {
518 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
519 		    addr, caddr, s.slab_cache);
520 		return (WALK_ERR);
521 	}
522 
523 	wsp->walk_addr = (uintptr_t)s.slab_next;
524 
525 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
526 }
527 
528 int
529 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
530 {
531 	umem_cache_t c;
532 
533 	if (!(flags & DCMD_ADDRSPEC)) {
534 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
535 			mdb_warn("can't walk umem_cache");
536 			return (DCMD_ERR);
537 		}
538 		return (DCMD_OK);
539 	}
540 
541 	if (DCMD_HDRSPEC(flags))
542 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
543 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
544 
545 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
546 		mdb_warn("couldn't read umem_cache at %p", addr);
547 		return (DCMD_ERR);
548 	}
549 
550 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
551 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
552 
553 	return (DCMD_OK);
554 }
555 
556 static int
557 addrcmp(const void *lhs, const void *rhs)
558 {
559 	uintptr_t p1 = *((uintptr_t *)lhs);
560 	uintptr_t p2 = *((uintptr_t *)rhs);
561 
562 	if (p1 < p2)
563 		return (-1);
564 	if (p1 > p2)
565 		return (1);
566 	return (0);
567 }
568 
569 static int
570 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
571 {
572 	const umem_bufctl_audit_t *bcp1 = *lhs;
573 	const umem_bufctl_audit_t *bcp2 = *rhs;
574 
575 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
576 		return (-1);
577 
578 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
579 		return (1);
580 
581 	return (0);
582 }
583 
584 typedef struct umem_hash_walk {
585 	uintptr_t *umhw_table;
586 	size_t umhw_nelems;
587 	size_t umhw_pos;
588 	umem_bufctl_t umhw_cur;
589 } umem_hash_walk_t;
590 
591 int
592 umem_hash_walk_init(mdb_walk_state_t *wsp)
593 {
594 	umem_hash_walk_t *umhw;
595 	uintptr_t *hash;
596 	umem_cache_t c;
597 	uintptr_t haddr, addr = wsp->walk_addr;
598 	size_t nelems;
599 	size_t hsize;
600 
601 	if (addr == NULL) {
602 		mdb_warn("umem_hash doesn't support global walks\n");
603 		return (WALK_ERR);
604 	}
605 
606 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
607 		mdb_warn("couldn't read cache at addr %p", addr);
608 		return (WALK_ERR);
609 	}
610 
611 	if (!(c.cache_flags & UMF_HASH)) {
612 		mdb_warn("cache %p doesn't have a hash table\n", addr);
613 		return (WALK_DONE);		/* nothing to do */
614 	}
615 
616 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
617 	umhw->umhw_cur.bc_next = NULL;
618 	umhw->umhw_pos = 0;
619 
620 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
621 	hsize = nelems * sizeof (uintptr_t);
622 	haddr = (uintptr_t)c.cache_hash_table;
623 
624 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
625 	if (mdb_vread(hash, hsize, haddr) == -1) {
626 		mdb_warn("failed to read hash table at %p", haddr);
627 		mdb_free(hash, hsize);
628 		mdb_free(umhw, sizeof (umem_hash_walk_t));
629 		return (WALK_ERR);
630 	}
631 
632 	wsp->walk_data = umhw;
633 
634 	return (WALK_NEXT);
635 }
636 
637 int
638 umem_hash_walk_step(mdb_walk_state_t *wsp)
639 {
640 	umem_hash_walk_t *umhw = wsp->walk_data;
641 	uintptr_t addr = NULL;
642 
643 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
644 		while (umhw->umhw_pos < umhw->umhw_nelems) {
645 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
646 				break;
647 		}
648 	}
649 	if (addr == NULL)
650 		return (WALK_DONE);
651 
652 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
653 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
654 		return (WALK_ERR);
655 	}
656 
657 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
658 }
659 
660 void
661 umem_hash_walk_fini(mdb_walk_state_t *wsp)
662 {
663 	umem_hash_walk_t *umhw = wsp->walk_data;
664 
665 	if (umhw == NULL)
666 		return;
667 
668 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
669 	mdb_free(umhw, sizeof (umem_hash_walk_t));
670 }
671 
672 /*
673  * Find the address of the bufctl structure for the address 'buf' in cache
674  * 'cp', which is at address caddr, and place it in *out.
675  */
676 static int
677 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
678 {
679 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
680 	umem_bufctl_t *bcp;
681 	umem_bufctl_t bc;
682 
683 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
684 		mdb_warn("unable to read hash bucket for %p in cache %p",
685 		    buf, caddr);
686 		return (-1);
687 	}
688 
689 	while (bcp != NULL) {
690 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
691 		    (uintptr_t)bcp) == -1) {
692 			mdb_warn("unable to read bufctl at %p", bcp);
693 			return (-1);
694 		}
695 		if (bc.bc_addr == buf) {
696 			*out = (uintptr_t)bcp;
697 			return (0);
698 		}
699 		bcp = bc.bc_next;
700 	}
701 
702 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
703 	return (-1);
704 }
705 
706 int
707 umem_get_magsize(const umem_cache_t *cp)
708 {
709 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
710 	GElf_Sym mt_sym;
711 	umem_magtype_t mt;
712 	int res;
713 
714 	/*
715 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
716 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
717 	 * it is okay to return 0 for them.
718 	 */
719 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
720 	    (cp->cache_flags & UMF_NOMAGAZINE))
721 		return (res);
722 
723 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
724 		mdb_warn("unable to read 'umem_magtype'");
725 	} else if (addr < mt_sym.st_value ||
726 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
727 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
728 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
729 		    cp->cache_name, addr);
730 		return (0);
731 	}
732 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
733 		mdb_warn("unable to read magtype at %a", addr);
734 		return (0);
735 	}
736 	return (mt.mt_magsize);
737 }
738 
739 /*ARGSUSED*/
740 static int
741 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
742 {
743 	*est -= (sp->slab_chunks - sp->slab_refcnt);
744 
745 	return (WALK_NEXT);
746 }
747 
748 /*
749  * Returns an upper bound on the number of allocated buffers in a given
750  * cache.
751  */
752 size_t
753 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
754 {
755 	int magsize;
756 	size_t cache_est;
757 
758 	cache_est = cp->cache_buftotal;
759 
760 	(void) mdb_pwalk("umem_slab_partial",
761 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
762 
763 	if ((magsize = umem_get_magsize(cp)) != 0) {
764 		size_t mag_est = cp->cache_full.ml_total * magsize;
765 
766 		if (cache_est >= mag_est) {
767 			cache_est -= mag_est;
768 		} else {
769 			mdb_warn("cache %p's magazine layer holds more buffers "
770 			    "than the slab layer.\n", addr);
771 		}
772 	}
773 	return (cache_est);
774 }
775 
776 #define	READMAG_ROUNDS(rounds) { \
777 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
778 		mdb_warn("couldn't read magazine at %p", ump); \
779 		goto fail; \
780 	} \
781 	for (i = 0; i < rounds; i++) { \
782 		maglist[magcnt++] = mp->mag_round[i]; \
783 		if (magcnt == magmax) { \
784 			mdb_warn("%d magazines exceeds fudge factor\n", \
785 			    magcnt); \
786 			goto fail; \
787 		} \
788 	} \
789 }
790 
791 int
792 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
793     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
794 {
795 	umem_magazine_t *ump, *mp;
796 	void **maglist = NULL;
797 	int i, cpu;
798 	size_t magsize, magmax, magbsize;
799 	size_t magcnt = 0;
800 
801 	/*
802 	 * Read the magtype out of the cache, after verifying the pointer's
803 	 * correctness.
804 	 */
805 	magsize = umem_get_magsize(cp);
806 	if (magsize == 0) {
807 		*maglistp = NULL;
808 		*magcntp = 0;
809 		*magmaxp = 0;
810 		return (WALK_NEXT);
811 	}
812 
813 	/*
814 	 * There are several places where we need to go buffer hunting:
815 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
816 	 * and the full magazine list in the depot.
817 	 *
818 	 * For an upper bound on the number of buffers in the magazine
819 	 * layer, we have the number of magazines on the cache_full
820 	 * list plus at most two magazines per CPU (the loaded and the
821 	 * spare).  Toss in 100 magazines as a fudge factor in case this
822 	 * is live (the number "100" comes from the same fudge factor in
823 	 * crash(1M)).
824 	 */
825 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
826 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
827 
828 	if (magbsize >= PAGESIZE / 2) {
829 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
830 		    addr, magbsize);
831 		return (WALK_ERR);
832 	}
833 
834 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
835 	mp = mdb_alloc(magbsize, alloc_flags);
836 	if (mp == NULL || maglist == NULL)
837 		goto fail;
838 
839 	/*
840 	 * First up: the magazines in the depot (i.e. on the cache_full list).
841 	 */
842 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
843 		READMAG_ROUNDS(magsize);
844 		ump = mp->mag_next;
845 
846 		if (ump == cp->cache_full.ml_list)
847 			break; /* cache_full list loop detected */
848 	}
849 
850 	dprintf(("cache_full list done\n"));
851 
852 	/*
853 	 * Now whip through the CPUs, snagging the loaded magazines
854 	 * and full spares.
855 	 */
856 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
857 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
858 
859 		dprintf(("reading cpu cache %p\n",
860 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
861 
862 		if (ccp->cc_rounds > 0 &&
863 		    (ump = ccp->cc_loaded) != NULL) {
864 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
865 			READMAG_ROUNDS(ccp->cc_rounds);
866 		}
867 
868 		if (ccp->cc_prounds > 0 &&
869 		    (ump = ccp->cc_ploaded) != NULL) {
870 			dprintf(("reading %d previously loaded rounds\n",
871 			    ccp->cc_prounds));
872 			READMAG_ROUNDS(ccp->cc_prounds);
873 		}
874 	}
875 
876 	dprintf(("magazine layer: %d buffers\n", magcnt));
877 
878 	if (!(alloc_flags & UM_GC))
879 		mdb_free(mp, magbsize);
880 
881 	*maglistp = maglist;
882 	*magcntp = magcnt;
883 	*magmaxp = magmax;
884 
885 	return (WALK_NEXT);
886 
887 fail:
888 	if (!(alloc_flags & UM_GC)) {
889 		if (mp)
890 			mdb_free(mp, magbsize);
891 		if (maglist)
892 			mdb_free(maglist, magmax * sizeof (void *));
893 	}
894 	return (WALK_ERR);
895 }
896 
897 static int
898 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
899 {
900 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
901 }
902 
903 static int
904 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
905 {
906 	umem_bufctl_audit_t *b;
907 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
908 
909 	/*
910 	 * if UMF_AUDIT is not set, we know that we're looking at a
911 	 * umem_bufctl_t.
912 	 */
913 	if (!(cp->cache_flags & UMF_AUDIT) ||
914 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
915 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
916 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
917 			mdb_warn("unable to read bufctl at %p", buf);
918 			return (WALK_ERR);
919 		}
920 	}
921 
922 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
923 }
924 
925 typedef struct umem_walk {
926 	int umw_type;
927 
928 	uintptr_t umw_addr;		/* cache address */
929 	umem_cache_t *umw_cp;
930 	size_t umw_csize;
931 
932 	/*
933 	 * magazine layer
934 	 */
935 	void **umw_maglist;
936 	size_t umw_max;
937 	size_t umw_count;
938 	size_t umw_pos;
939 
940 	/*
941 	 * slab layer
942 	 */
943 	char *umw_valid;	/* to keep track of freed buffers */
944 	char *umw_ubase;	/* buffer for slab data */
945 } umem_walk_t;
946 
947 static int
948 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
949 {
950 	umem_walk_t *umw;
951 	int csize;
952 	umem_cache_t *cp;
953 	size_t vm_quantum;
954 
955 	size_t magmax, magcnt;
956 	void **maglist = NULL;
957 	uint_t chunksize, slabsize;
958 	int status = WALK_ERR;
959 	uintptr_t addr = wsp->walk_addr;
960 	const char *layered;
961 
962 	type &= ~UM_HASH;
963 
964 	if (addr == NULL) {
965 		mdb_warn("umem walk doesn't support global walks\n");
966 		return (WALK_ERR);
967 	}
968 
969 	dprintf(("walking %p\n", addr));
970 
971 	/*
972 	 * The number of "cpus" determines how large the cache is.
973 	 */
974 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
975 	cp = mdb_alloc(csize, UM_SLEEP);
976 
977 	if (mdb_vread(cp, csize, addr) == -1) {
978 		mdb_warn("couldn't read cache at addr %p", addr);
979 		goto out2;
980 	}
981 
982 	/*
983 	 * It's easy for someone to hand us an invalid cache address.
984 	 * Unfortunately, it is hard for this walker to survive an
985 	 * invalid cache cleanly.  So we make sure that:
986 	 *
987 	 *	1. the vmem arena for the cache is readable,
988 	 *	2. the vmem arena's quantum is a power of 2,
989 	 *	3. our slabsize is a multiple of the quantum, and
990 	 *	4. our chunksize is >0 and less than our slabsize.
991 	 */
992 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
993 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
994 	    vm_quantum == 0 ||
995 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
996 	    cp->cache_slabsize < vm_quantum ||
997 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
998 	    cp->cache_chunksize == 0 ||
999 	    cp->cache_chunksize > cp->cache_slabsize) {
1000 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1001 		goto out2;
1002 	}
1003 
1004 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1005 
1006 	if (cp->cache_buftotal == 0) {
1007 		mdb_free(cp, csize);
1008 		return (WALK_DONE);
1009 	}
1010 
1011 	/*
1012 	 * If they ask for bufctls, but it's a small-slab cache,
1013 	 * there is nothing to report.
1014 	 */
1015 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1016 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1017 		    cp->cache_flags));
1018 		mdb_free(cp, csize);
1019 		return (WALK_DONE);
1020 	}
1021 
1022 	/*
1023 	 * Read in the contents of the magazine layer
1024 	 */
1025 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax,
1026 	    UM_SLEEP) == WALK_ERR)
1027 		goto out2;
1028 
1029 	/*
1030 	 * We have all of the buffers from the magazines;  if we are walking
1031 	 * allocated buffers, sort them so we can bsearch them later.
1032 	 */
1033 	if (type & UM_ALLOCATED)
1034 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1035 
1036 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1037 
1038 	umw->umw_type = type;
1039 	umw->umw_addr = addr;
1040 	umw->umw_cp = cp;
1041 	umw->umw_csize = csize;
1042 	umw->umw_maglist = maglist;
1043 	umw->umw_max = magmax;
1044 	umw->umw_count = magcnt;
1045 	umw->umw_pos = 0;
1046 
1047 	/*
1048 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1049 	 * hash table instead of the slab layer.
1050 	 */
1051 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1052 		layered = "umem_hash";
1053 
1054 		umw->umw_type |= UM_HASH;
1055 	} else {
1056 		/*
1057 		 * If we are walking freed buffers, we only need the
1058 		 * magazine layer plus the partially allocated slabs.
1059 		 * To walk allocated buffers, we need all of the slabs.
1060 		 */
1061 		if (type & UM_ALLOCATED)
1062 			layered = "umem_slab";
1063 		else
1064 			layered = "umem_slab_partial";
1065 
1066 		/*
1067 		 * for small-slab caches, we read in the entire slab.  For
1068 		 * freed buffers, we can just walk the freelist.  For
1069 		 * allocated buffers, we use a 'valid' array to track
1070 		 * the freed buffers.
1071 		 */
1072 		if (!(cp->cache_flags & UMF_HASH)) {
1073 			chunksize = cp->cache_chunksize;
1074 			slabsize = cp->cache_slabsize;
1075 
1076 			umw->umw_ubase = mdb_alloc(slabsize +
1077 			    sizeof (umem_bufctl_t), UM_SLEEP);
1078 
1079 			if (type & UM_ALLOCATED)
1080 				umw->umw_valid =
1081 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1082 		}
1083 	}
1084 
1085 	status = WALK_NEXT;
1086 
1087 	if (mdb_layered_walk(layered, wsp) == -1) {
1088 		mdb_warn("unable to start layered '%s' walk", layered);
1089 		status = WALK_ERR;
1090 	}
1091 
1092 out1:
1093 	if (status == WALK_ERR) {
1094 		if (umw->umw_valid)
1095 			mdb_free(umw->umw_valid, slabsize / chunksize);
1096 
1097 		if (umw->umw_ubase)
1098 			mdb_free(umw->umw_ubase, slabsize +
1099 			    sizeof (umem_bufctl_t));
1100 
1101 		if (umw->umw_maglist)
1102 			mdb_free(umw->umw_maglist, umw->umw_max *
1103 			    sizeof (uintptr_t));
1104 
1105 		mdb_free(umw, sizeof (umem_walk_t));
1106 		wsp->walk_data = NULL;
1107 	}
1108 
1109 out2:
1110 	if (status == WALK_ERR)
1111 		mdb_free(cp, csize);
1112 
1113 	return (status);
1114 }
1115 
1116 int
1117 umem_walk_step(mdb_walk_state_t *wsp)
1118 {
1119 	umem_walk_t *umw = wsp->walk_data;
1120 	int type = umw->umw_type;
1121 	umem_cache_t *cp = umw->umw_cp;
1122 
1123 	void **maglist = umw->umw_maglist;
1124 	int magcnt = umw->umw_count;
1125 
1126 	uintptr_t chunksize, slabsize;
1127 	uintptr_t addr;
1128 	const umem_slab_t *sp;
1129 	const umem_bufctl_t *bcp;
1130 	umem_bufctl_t bc;
1131 
1132 	int chunks;
1133 	char *kbase;
1134 	void *buf;
1135 	int i, ret;
1136 
1137 	char *valid, *ubase;
1138 
1139 	/*
1140 	 * first, handle the 'umem_hash' layered walk case
1141 	 */
1142 	if (type & UM_HASH) {
1143 		/*
1144 		 * We have a buffer which has been allocated out of the
1145 		 * global layer. We need to make sure that it's not
1146 		 * actually sitting in a magazine before we report it as
1147 		 * an allocated buffer.
1148 		 */
1149 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1150 
1151 		if (magcnt > 0 &&
1152 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1153 		    addrcmp) != NULL)
1154 			return (WALK_NEXT);
1155 
1156 		if (type & UM_BUFCTL)
1157 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1158 
1159 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1160 	}
1161 
1162 	ret = WALK_NEXT;
1163 
1164 	addr = umw->umw_addr;
1165 
1166 	/*
1167 	 * If we're walking freed buffers, report everything in the
1168 	 * magazine layer before processing the first slab.
1169 	 */
1170 	if ((type & UM_FREE) && magcnt != 0) {
1171 		umw->umw_count = 0;		/* only do this once */
1172 		for (i = 0; i < magcnt; i++) {
1173 			buf = maglist[i];
1174 
1175 			if (type & UM_BUFCTL) {
1176 				uintptr_t out;
1177 
1178 				if (cp->cache_flags & UMF_BUFTAG) {
1179 					umem_buftag_t *btp;
1180 					umem_buftag_t tag;
1181 
1182 					/* LINTED - alignment */
1183 					btp = UMEM_BUFTAG(cp, buf);
1184 					if (mdb_vread(&tag, sizeof (tag),
1185 					    (uintptr_t)btp) == -1) {
1186 						mdb_warn("reading buftag for "
1187 						    "%p at %p", buf, btp);
1188 						continue;
1189 					}
1190 					out = (uintptr_t)tag.bt_bufctl;
1191 				} else {
1192 					if (umem_hash_lookup(cp, addr, buf,
1193 					    &out) == -1)
1194 						continue;
1195 				}
1196 				ret = bufctl_walk_callback(cp, wsp, out);
1197 			} else {
1198 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1199 			}
1200 
1201 			if (ret != WALK_NEXT)
1202 				return (ret);
1203 		}
1204 	}
1205 
1206 	/*
1207 	 * Handle the buffers in the current slab
1208 	 */
1209 	chunksize = cp->cache_chunksize;
1210 	slabsize = cp->cache_slabsize;
1211 
1212 	sp = wsp->walk_layer;
1213 	chunks = sp->slab_chunks;
1214 	kbase = sp->slab_base;
1215 
1216 	dprintf(("kbase is %p\n", kbase));
1217 
1218 	if (!(cp->cache_flags & UMF_HASH)) {
1219 		valid = umw->umw_valid;
1220 		ubase = umw->umw_ubase;
1221 
1222 		if (mdb_vread(ubase, chunks * chunksize,
1223 		    (uintptr_t)kbase) == -1) {
1224 			mdb_warn("failed to read slab contents at %p", kbase);
1225 			return (WALK_ERR);
1226 		}
1227 
1228 		/*
1229 		 * Set up the valid map as fully allocated -- we'll punch
1230 		 * out the freelist.
1231 		 */
1232 		if (type & UM_ALLOCATED)
1233 			(void) memset(valid, 1, chunks);
1234 	} else {
1235 		valid = NULL;
1236 		ubase = NULL;
1237 	}
1238 
1239 	/*
1240 	 * walk the slab's freelist
1241 	 */
1242 	bcp = sp->slab_head;
1243 
1244 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1245 
1246 	/*
1247 	 * since we could be in the middle of allocating a buffer,
1248 	 * our refcnt could be one higher than it aught.  So we
1249 	 * check one further on the freelist than the count allows.
1250 	 */
1251 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1252 		uint_t ndx;
1253 
1254 		dprintf(("bcp is %p\n", bcp));
1255 
1256 		if (bcp == NULL) {
1257 			if (i == chunks)
1258 				break;
1259 			mdb_warn(
1260 			    "slab %p in cache %p freelist too short by %d\n",
1261 			    sp, addr, chunks - i);
1262 			break;
1263 		}
1264 
1265 		if (cp->cache_flags & UMF_HASH) {
1266 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1267 				mdb_warn("failed to read bufctl ptr at %p",
1268 				    bcp);
1269 				break;
1270 			}
1271 			buf = bc.bc_addr;
1272 		} else {
1273 			/*
1274 			 * Otherwise the buffer is (or should be) in the slab
1275 			 * that we've read in; determine its offset in the
1276 			 * slab, validate that it's not corrupt, and add to
1277 			 * our base address to find the umem_bufctl_t.  (Note
1278 			 * that we don't need to add the size of the bufctl
1279 			 * to our offset calculation because of the slop that's
1280 			 * allocated for the buffer at ubase.)
1281 			 */
1282 			uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1283 
1284 			if (offs > chunks * chunksize) {
1285 				mdb_warn("found corrupt bufctl ptr %p"
1286 				    " in slab %p in cache %p\n", bcp,
1287 				    wsp->walk_addr, addr);
1288 				break;
1289 			}
1290 
1291 			bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1292 			buf = UMEM_BUF(cp, bcp);
1293 		}
1294 
1295 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1296 
1297 		if (ndx > slabsize / cp->cache_bufsize) {
1298 			/*
1299 			 * This is very wrong; we have managed to find
1300 			 * a buffer in the slab which shouldn't
1301 			 * actually be here.  Emit a warning, and
1302 			 * try to continue.
1303 			 */
1304 			mdb_warn("buf %p is out of range for "
1305 			    "slab %p, cache %p\n", buf, sp, addr);
1306 		} else if (type & UM_ALLOCATED) {
1307 			/*
1308 			 * we have found a buffer on the slab's freelist;
1309 			 * clear its entry
1310 			 */
1311 			valid[ndx] = 0;
1312 		} else {
1313 			/*
1314 			 * Report this freed buffer
1315 			 */
1316 			if (type & UM_BUFCTL) {
1317 				ret = bufctl_walk_callback(cp, wsp,
1318 				    (uintptr_t)bcp);
1319 			} else {
1320 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1321 			}
1322 			if (ret != WALK_NEXT)
1323 				return (ret);
1324 		}
1325 
1326 		bcp = bc.bc_next;
1327 	}
1328 
1329 	if (bcp != NULL) {
1330 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1331 		    sp, addr, bcp));
1332 	}
1333 
1334 	/*
1335 	 * If we are walking freed buffers, the loop above handled reporting
1336 	 * them.
1337 	 */
1338 	if (type & UM_FREE)
1339 		return (WALK_NEXT);
1340 
1341 	if (type & UM_BUFCTL) {
1342 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1343 		    "cache %p\n", addr);
1344 		return (WALK_ERR);
1345 	}
1346 
1347 	/*
1348 	 * Report allocated buffers, skipping buffers in the magazine layer.
1349 	 * We only get this far for small-slab caches.
1350 	 */
1351 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1352 		buf = (char *)kbase + i * chunksize;
1353 
1354 		if (!valid[i])
1355 			continue;		/* on slab freelist */
1356 
1357 		if (magcnt > 0 &&
1358 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1359 		    addrcmp) != NULL)
1360 			continue;		/* in magazine layer */
1361 
1362 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1363 	}
1364 	return (ret);
1365 }
1366 
1367 void
1368 umem_walk_fini(mdb_walk_state_t *wsp)
1369 {
1370 	umem_walk_t *umw = wsp->walk_data;
1371 	uintptr_t chunksize;
1372 	uintptr_t slabsize;
1373 
1374 	if (umw == NULL)
1375 		return;
1376 
1377 	if (umw->umw_maglist != NULL)
1378 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1379 
1380 	chunksize = umw->umw_cp->cache_chunksize;
1381 	slabsize = umw->umw_cp->cache_slabsize;
1382 
1383 	if (umw->umw_valid != NULL)
1384 		mdb_free(umw->umw_valid, slabsize / chunksize);
1385 	if (umw->umw_ubase != NULL)
1386 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1387 
1388 	mdb_free(umw->umw_cp, umw->umw_csize);
1389 	mdb_free(umw, sizeof (umem_walk_t));
1390 }
1391 
1392 /*ARGSUSED*/
1393 static int
1394 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1395 {
1396 	/*
1397 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1398 	 * memory in other caches.  This can be a little confusing, so we
1399 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1400 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1401 	 */
1402 	if (c->cache_cflags & UMC_NOTOUCH)
1403 		return (WALK_NEXT);
1404 
1405 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1406 	    wsp->walk_cbdata, addr) == -1)
1407 		return (WALK_DONE);
1408 
1409 	return (WALK_NEXT);
1410 }
1411 
1412 #define	UMEM_WALK_ALL(name, wsp) { \
1413 	wsp->walk_data = (name); \
1414 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1415 		return (WALK_ERR); \
1416 	return (WALK_DONE); \
1417 }
1418 
1419 int
1420 umem_walk_init(mdb_walk_state_t *wsp)
1421 {
1422 	if (wsp->walk_arg != NULL)
1423 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1424 
1425 	if (wsp->walk_addr == NULL)
1426 		UMEM_WALK_ALL("umem", wsp);
1427 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1428 }
1429 
1430 int
1431 bufctl_walk_init(mdb_walk_state_t *wsp)
1432 {
1433 	if (wsp->walk_addr == NULL)
1434 		UMEM_WALK_ALL("bufctl", wsp);
1435 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1436 }
1437 
1438 int
1439 freemem_walk_init(mdb_walk_state_t *wsp)
1440 {
1441 	if (wsp->walk_addr == NULL)
1442 		UMEM_WALK_ALL("freemem", wsp);
1443 	return (umem_walk_init_common(wsp, UM_FREE));
1444 }
1445 
1446 int
1447 freectl_walk_init(mdb_walk_state_t *wsp)
1448 {
1449 	if (wsp->walk_addr == NULL)
1450 		UMEM_WALK_ALL("freectl", wsp);
1451 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1452 }
1453 
1454 typedef struct bufctl_history_walk {
1455 	void		*bhw_next;
1456 	umem_cache_t	*bhw_cache;
1457 	umem_slab_t	*bhw_slab;
1458 	hrtime_t	bhw_timestamp;
1459 } bufctl_history_walk_t;
1460 
1461 int
1462 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1463 {
1464 	bufctl_history_walk_t *bhw;
1465 	umem_bufctl_audit_t bc;
1466 	umem_bufctl_audit_t bcn;
1467 
1468 	if (wsp->walk_addr == NULL) {
1469 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1470 		return (WALK_ERR);
1471 	}
1472 
1473 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1474 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1475 		return (WALK_ERR);
1476 	}
1477 
1478 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1479 	bhw->bhw_timestamp = 0;
1480 	bhw->bhw_cache = bc.bc_cache;
1481 	bhw->bhw_slab = bc.bc_slab;
1482 
1483 	/*
1484 	 * sometimes the first log entry matches the base bufctl;  in that
1485 	 * case, skip the base bufctl.
1486 	 */
1487 	if (bc.bc_lastlog != NULL &&
1488 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1489 	    bc.bc_addr == bcn.bc_addr &&
1490 	    bc.bc_cache == bcn.bc_cache &&
1491 	    bc.bc_slab == bcn.bc_slab &&
1492 	    bc.bc_timestamp == bcn.bc_timestamp &&
1493 	    bc.bc_thread == bcn.bc_thread)
1494 		bhw->bhw_next = bc.bc_lastlog;
1495 	else
1496 		bhw->bhw_next = (void *)wsp->walk_addr;
1497 
1498 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1499 	wsp->walk_data = bhw;
1500 
1501 	return (WALK_NEXT);
1502 }
1503 
1504 int
1505 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1506 {
1507 	bufctl_history_walk_t *bhw = wsp->walk_data;
1508 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1509 	uintptr_t baseaddr = wsp->walk_addr;
1510 	umem_bufctl_audit_t *b;
1511 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1512 
1513 	if (addr == NULL)
1514 		return (WALK_DONE);
1515 
1516 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1517 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1518 		return (WALK_ERR);
1519 	}
1520 
1521 	/*
1522 	 * The bufctl is only valid if the address, cache, and slab are
1523 	 * correct.  We also check that the timestamp is decreasing, to
1524 	 * prevent infinite loops.
1525 	 */
1526 	if ((uintptr_t)b->bc_addr != baseaddr ||
1527 	    b->bc_cache != bhw->bhw_cache ||
1528 	    b->bc_slab != bhw->bhw_slab ||
1529 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1530 		return (WALK_DONE);
1531 
1532 	bhw->bhw_next = b->bc_lastlog;
1533 	bhw->bhw_timestamp = b->bc_timestamp;
1534 
1535 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1536 }
1537 
1538 void
1539 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1540 {
1541 	bufctl_history_walk_t *bhw = wsp->walk_data;
1542 
1543 	mdb_free(bhw, sizeof (*bhw));
1544 }
1545 
1546 typedef struct umem_log_walk {
1547 	umem_bufctl_audit_t *ulw_base;
1548 	umem_bufctl_audit_t **ulw_sorted;
1549 	umem_log_header_t ulw_lh;
1550 	size_t ulw_size;
1551 	size_t ulw_maxndx;
1552 	size_t ulw_ndx;
1553 } umem_log_walk_t;
1554 
1555 int
1556 umem_log_walk_init(mdb_walk_state_t *wsp)
1557 {
1558 	uintptr_t lp = wsp->walk_addr;
1559 	umem_log_walk_t *ulw;
1560 	umem_log_header_t *lhp;
1561 	int maxndx, i, j, k;
1562 
1563 	/*
1564 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1565 	 * read the log whose umem_log_header_t is stored at walk_addr.
1566 	 */
1567 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1568 		mdb_warn("failed to read 'umem_transaction_log'");
1569 		return (WALK_ERR);
1570 	}
1571 
1572 	if (lp == NULL) {
1573 		mdb_warn("log is disabled\n");
1574 		return (WALK_ERR);
1575 	}
1576 
1577 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1578 	lhp = &ulw->ulw_lh;
1579 
1580 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1581 		mdb_warn("failed to read log header at %p", lp);
1582 		mdb_free(ulw, sizeof (umem_log_walk_t));
1583 		return (WALK_ERR);
1584 	}
1585 
1586 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1587 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1588 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1589 
1590 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1591 	    (uintptr_t)lhp->lh_base) == -1) {
1592 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1593 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1594 		mdb_free(ulw, sizeof (umem_log_walk_t));
1595 		return (WALK_ERR);
1596 	}
1597 
1598 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1599 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1600 
1601 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1602 		caddr_t chunk = (caddr_t)
1603 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1604 
1605 		for (j = 0; j < maxndx; j++) {
1606 			/* LINTED align */
1607 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1608 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1609 		}
1610 	}
1611 
1612 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1613 	    (int(*)(const void *, const void *))bufctlcmp);
1614 
1615 	ulw->ulw_maxndx = k;
1616 	wsp->walk_data = ulw;
1617 
1618 	return (WALK_NEXT);
1619 }
1620 
1621 int
1622 umem_log_walk_step(mdb_walk_state_t *wsp)
1623 {
1624 	umem_log_walk_t *ulw = wsp->walk_data;
1625 	umem_bufctl_audit_t *bcp;
1626 
1627 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1628 		return (WALK_DONE);
1629 
1630 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1631 
1632 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1633 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1634 }
1635 
1636 void
1637 umem_log_walk_fini(mdb_walk_state_t *wsp)
1638 {
1639 	umem_log_walk_t *ulw = wsp->walk_data;
1640 
1641 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1642 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1643 	    sizeof (umem_bufctl_audit_t *));
1644 	mdb_free(ulw, sizeof (umem_log_walk_t));
1645 }
1646 
1647 typedef struct allocdby_bufctl {
1648 	uintptr_t abb_addr;
1649 	hrtime_t abb_ts;
1650 } allocdby_bufctl_t;
1651 
1652 typedef struct allocdby_walk {
1653 	const char *abw_walk;
1654 	uintptr_t abw_thread;
1655 	size_t abw_nbufs;
1656 	size_t abw_size;
1657 	allocdby_bufctl_t *abw_buf;
1658 	size_t abw_ndx;
1659 } allocdby_walk_t;
1660 
1661 int
1662 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1663     allocdby_walk_t *abw)
1664 {
1665 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1666 		return (WALK_NEXT);
1667 
1668 	if (abw->abw_nbufs == abw->abw_size) {
1669 		allocdby_bufctl_t *buf;
1670 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1671 
1672 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1673 
1674 		bcopy(abw->abw_buf, buf, oldsize);
1675 		mdb_free(abw->abw_buf, oldsize);
1676 
1677 		abw->abw_size <<= 1;
1678 		abw->abw_buf = buf;
1679 	}
1680 
1681 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1682 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1683 	abw->abw_nbufs++;
1684 
1685 	return (WALK_NEXT);
1686 }
1687 
1688 /*ARGSUSED*/
1689 int
1690 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1691 {
1692 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1693 	    abw, addr) == -1) {
1694 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1695 		return (WALK_DONE);
1696 	}
1697 
1698 	return (WALK_NEXT);
1699 }
1700 
1701 static int
1702 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1703 {
1704 	if (lhs->abb_ts < rhs->abb_ts)
1705 		return (1);
1706 	if (lhs->abb_ts > rhs->abb_ts)
1707 		return (-1);
1708 	return (0);
1709 }
1710 
1711 static int
1712 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1713 {
1714 	allocdby_walk_t *abw;
1715 
1716 	if (wsp->walk_addr == NULL) {
1717 		mdb_warn("allocdby walk doesn't support global walks\n");
1718 		return (WALK_ERR);
1719 	}
1720 
1721 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1722 
1723 	abw->abw_thread = wsp->walk_addr;
1724 	abw->abw_walk = walk;
1725 	abw->abw_size = 128;	/* something reasonable */
1726 	abw->abw_buf =
1727 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1728 
1729 	wsp->walk_data = abw;
1730 
1731 	if (mdb_walk("umem_cache",
1732 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1733 		mdb_warn("couldn't walk umem_cache");
1734 		allocdby_walk_fini(wsp);
1735 		return (WALK_ERR);
1736 	}
1737 
1738 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1739 	    (int(*)(const void *, const void *))allocdby_cmp);
1740 
1741 	return (WALK_NEXT);
1742 }
1743 
1744 int
1745 allocdby_walk_init(mdb_walk_state_t *wsp)
1746 {
1747 	return (allocdby_walk_init_common(wsp, "bufctl"));
1748 }
1749 
1750 int
1751 freedby_walk_init(mdb_walk_state_t *wsp)
1752 {
1753 	return (allocdby_walk_init_common(wsp, "freectl"));
1754 }
1755 
1756 int
1757 allocdby_walk_step(mdb_walk_state_t *wsp)
1758 {
1759 	allocdby_walk_t *abw = wsp->walk_data;
1760 	uintptr_t addr;
1761 	umem_bufctl_audit_t *bcp;
1762 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1763 
1764 	if (abw->abw_ndx == abw->abw_nbufs)
1765 		return (WALK_DONE);
1766 
1767 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1768 
1769 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1770 		mdb_warn("couldn't read bufctl at %p", addr);
1771 		return (WALK_DONE);
1772 	}
1773 
1774 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1775 }
1776 
1777 void
1778 allocdby_walk_fini(mdb_walk_state_t *wsp)
1779 {
1780 	allocdby_walk_t *abw = wsp->walk_data;
1781 
1782 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1783 	mdb_free(abw, sizeof (allocdby_walk_t));
1784 }
1785 
1786 /*ARGSUSED*/
1787 int
1788 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1789 {
1790 	char c[MDB_SYM_NAMLEN];
1791 	GElf_Sym sym;
1792 	int i;
1793 
1794 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1795 	for (i = 0; i < bcp->bc_depth; i++) {
1796 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1797 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1798 			continue;
1799 		if (is_umem_sym(c, "umem_"))
1800 			continue;
1801 		mdb_printf("%s+0x%lx",
1802 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1803 		break;
1804 	}
1805 	mdb_printf("\n");
1806 
1807 	return (WALK_NEXT);
1808 }
1809 
1810 static int
1811 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1812 {
1813 	if (!(flags & DCMD_ADDRSPEC))
1814 		return (DCMD_USAGE);
1815 
1816 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1817 
1818 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1819 		mdb_warn("can't walk '%s' for %p", w, addr);
1820 		return (DCMD_ERR);
1821 	}
1822 
1823 	return (DCMD_OK);
1824 }
1825 
1826 /*ARGSUSED*/
1827 int
1828 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1829 {
1830 	return (allocdby_common(addr, flags, "allocdby"));
1831 }
1832 
1833 /*ARGSUSED*/
1834 int
1835 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1836 {
1837 	return (allocdby_common(addr, flags, "freedby"));
1838 }
1839 
1840 typedef struct whatis_info {
1841 	mdb_whatis_t *wi_w;
1842 	const umem_cache_t *wi_cache;
1843 	const vmem_t *wi_vmem;
1844 	vmem_t *wi_msb_arena;
1845 	size_t wi_slab_size;
1846 	int wi_slab_found;
1847 	uint_t wi_freemem;
1848 } whatis_info_t;
1849 
1850 /* call one of our dcmd functions with "-v" and the provided address */
1851 static void
1852 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
1853 {
1854 	mdb_arg_t a;
1855 	a.a_type = MDB_TYPE_STRING;
1856 	a.a_un.a_str = "-v";
1857 
1858 	mdb_printf(":\n");
1859 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
1860 }
1861 
1862 static void
1863 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
1864     uintptr_t baddr)
1865 {
1866 	mdb_whatis_t *w = wi->wi_w;
1867 	const umem_cache_t *cp = wi->wi_cache;
1868 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
1869 
1870 	int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
1871 
1872 	mdb_whatis_report_object(w, maddr, addr, "");
1873 
1874 	if (baddr != 0 && !call_printer)
1875 		mdb_printf("bufctl %p ", baddr);
1876 
1877 	mdb_printf("%s from %s",
1878 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
1879 
1880 	if (call_printer && baddr != 0) {
1881 		whatis_call_printer(bufctl, baddr);
1882 		return;
1883 	}
1884 	mdb_printf("\n");
1885 }
1886 
1887 /*ARGSUSED*/
1888 static int
1889 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
1890 {
1891 	mdb_whatis_t *w = wi->wi_w;
1892 
1893 	uintptr_t cur;
1894 	size_t size = wi->wi_cache->cache_bufsize;
1895 
1896 	while (mdb_whatis_match(w, addr, size, &cur))
1897 		whatis_print_umem(wi, cur, addr, NULL);
1898 
1899 	return (WHATIS_WALKRET(w));
1900 }
1901 
1902 /*ARGSUSED*/
1903 static int
1904 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
1905 {
1906 	mdb_whatis_t *w = wi->wi_w;
1907 
1908 	uintptr_t cur;
1909 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
1910 	size_t size = wi->wi_cache->cache_bufsize;
1911 
1912 	while (mdb_whatis_match(w, addr, size, &cur))
1913 		whatis_print_umem(wi, cur, addr, baddr);
1914 
1915 	return (WHATIS_WALKRET(w));
1916 }
1917 
1918 
1919 static int
1920 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
1921 {
1922 	mdb_whatis_t *w = wi->wi_w;
1923 
1924 	size_t size = vs->vs_end - vs->vs_start;
1925 	uintptr_t cur;
1926 
1927 	/* We're not interested in anything but alloc and free segments */
1928 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
1929 		return (WALK_NEXT);
1930 
1931 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
1932 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
1933 
1934 		/*
1935 		 * If we're not printing it seperately, provide the vmem_seg
1936 		 * pointer if it has a stack trace.
1937 		 */
1938 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
1939 		    ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
1940 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
1941 			mdb_printf("vmem_seg %p ", addr);
1942 		}
1943 
1944 		mdb_printf("%s from %s vmem arena",
1945 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
1946 		    wi->wi_vmem->vm_name);
1947 
1948 		if (!mdb_whatis_flags(w) & WHATIS_QUIET)
1949 			whatis_call_printer(vmem_seg, addr);
1950 		else
1951 			mdb_printf("\n");
1952 	}
1953 
1954 	return (WHATIS_WALKRET(w));
1955 }
1956 
1957 static int
1958 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
1959 {
1960 	mdb_whatis_t *w = wi->wi_w;
1961 	const char *nm = vmem->vm_name;
1962 	wi->wi_vmem = vmem;
1963 
1964 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
1965 		mdb_printf("Searching vmem arena %s...\n", nm);
1966 
1967 	if (mdb_pwalk("vmem_seg",
1968 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
1969 		mdb_warn("can't walk vmem seg for %p", addr);
1970 		return (WALK_NEXT);
1971 	}
1972 
1973 	return (WHATIS_WALKRET(w));
1974 }
1975 
1976 /*ARGSUSED*/
1977 static int
1978 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
1979 {
1980 	mdb_whatis_t *w = wi->wi_w;
1981 
1982 	/* It must overlap with the slab data, or it's not interesting */
1983 	if (mdb_whatis_overlaps(w,
1984 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
1985 		wi->wi_slab_found++;
1986 		return (WALK_DONE);
1987 	}
1988 	return (WALK_NEXT);
1989 }
1990 
1991 static int
1992 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
1993 {
1994 	mdb_whatis_t *w = wi->wi_w;
1995 	char *walk, *freewalk;
1996 	mdb_walk_cb_t func;
1997 	int do_bufctl;
1998 
1999 	/* Override the '-b' flag as necessary */
2000 	if (!(c->cache_flags & UMF_HASH))
2001 		do_bufctl = FALSE;	/* no bufctls to walk */
2002 	else if (c->cache_flags & UMF_AUDIT)
2003 		do_bufctl = TRUE;	/* we always want debugging info */
2004 	else
2005 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2006 
2007 	if (do_bufctl) {
2008 		walk = "bufctl";
2009 		freewalk = "freectl";
2010 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2011 	} else {
2012 		walk = "umem";
2013 		freewalk = "freemem";
2014 		func = (mdb_walk_cb_t)whatis_walk_umem;
2015 	}
2016 
2017 	wi->wi_cache = c;
2018 
2019 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2020 		mdb_printf("Searching %s...\n", c->cache_name);
2021 
2022 	/*
2023 	 * If more then two buffers live on each slab, figure out if we're
2024 	 * interested in anything in any slab before doing the more expensive
2025 	 * umem/freemem (bufctl/freectl) walkers.
2026 	 */
2027 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2028 	if (!(c->cache_flags & UMF_HASH))
2029 		wi->wi_slab_size -= sizeof (umem_slab_t);
2030 
2031 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2032 		wi->wi_slab_found = 0;
2033 		if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2034 		    addr) == -1) {
2035 			mdb_warn("can't find umem_slab walker");
2036 			return (WALK_DONE);
2037 		}
2038 		if (wi->wi_slab_found == 0)
2039 			return (WALK_NEXT);
2040 	}
2041 
2042 	wi->wi_freemem = FALSE;
2043 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2044 		mdb_warn("can't find %s walker", walk);
2045 		return (WALK_DONE);
2046 	}
2047 
2048 	if (mdb_whatis_done(w))
2049 		return (WALK_DONE);
2050 
2051 	/*
2052 	 * We have searched for allocated memory; now search for freed memory.
2053 	 */
2054 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2055 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2056 
2057 	wi->wi_freemem = TRUE;
2058 
2059 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2060 		mdb_warn("can't find %s walker", freewalk);
2061 		return (WALK_DONE);
2062 	}
2063 
2064 	return (WHATIS_WALKRET(w));
2065 }
2066 
2067 static int
2068 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2069 {
2070 	if (c->cache_arena == wi->wi_msb_arena ||
2071 	    (c->cache_cflags & UMC_NOTOUCH))
2072 		return (WALK_NEXT);
2073 
2074 	return (whatis_walk_cache(addr, c, wi));
2075 }
2076 
2077 static int
2078 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2079 {
2080 	if (c->cache_arena != wi->wi_msb_arena)
2081 		return (WALK_NEXT);
2082 
2083 	return (whatis_walk_cache(addr, c, wi));
2084 }
2085 
2086 static int
2087 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2088 {
2089 	if (c->cache_arena == wi->wi_msb_arena ||
2090 	    !(c->cache_cflags & UMC_NOTOUCH))
2091 		return (WALK_NEXT);
2092 
2093 	return (whatis_walk_cache(addr, c, wi));
2094 }
2095 
2096 /*ARGSUSED*/
2097 static int
2098 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2099 {
2100 	whatis_info_t wi;
2101 
2102 	bzero(&wi, sizeof (wi));
2103 	wi.wi_w = w;
2104 
2105 	/* umem's metadata is allocated from the umem_internal_arena */
2106 	if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2107 		mdb_warn("unable to readvar \"umem_internal_arena\"");
2108 
2109 	/*
2110 	 * We process umem caches in the following order:
2111 	 *
2112 	 *	non-UMC_NOTOUCH, non-metadata	(typically the most interesting)
2113 	 *	metadata			(can be huge with UMF_AUDIT)
2114 	 *	UMC_NOTOUCH, non-metadata	(see umem_walk_all())
2115 	 */
2116 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2117 	    &wi) == -1 ||
2118 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2119 	    &wi) == -1 ||
2120 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2121 	    &wi) == -1) {
2122 		mdb_warn("couldn't find umem_cache walker");
2123 		return (1);
2124 	}
2125 	return (0);
2126 }
2127 
2128 /*ARGSUSED*/
2129 static int
2130 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2131 {
2132 	whatis_info_t wi;
2133 
2134 	bzero(&wi, sizeof (wi));
2135 	wi.wi_w = w;
2136 
2137 	if (mdb_walk("vmem_postfix",
2138 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2139 		mdb_warn("couldn't find vmem_postfix walker");
2140 		return (1);
2141 	}
2142 	return (0);
2143 }
2144 
2145 int
2146 umem_init(void)
2147 {
2148 	mdb_walker_t w = {
2149 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
2150 		umem_cache_walk_step, umem_cache_walk_fini
2151 	};
2152 
2153 	if (mdb_add_walker(&w) == -1) {
2154 		mdb_warn("failed to add umem_cache walker");
2155 		return (-1);
2156 	}
2157 
2158 	if (umem_update_variables() == -1)
2159 		return (-1);
2160 
2161 	/* install a callback so that our variables are always up-to-date */
2162 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2163 	umem_statechange_cb(NULL);
2164 
2165 	/*
2166 	 * Register our ::whatis callbacks.
2167 	 */
2168 	mdb_whatis_register("umem", whatis_run_umem, NULL,
2169 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2170 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2171 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2172 
2173 	return (0);
2174 }
2175 
2176 typedef struct umem_log_cpu {
2177 	uintptr_t umc_low;
2178 	uintptr_t umc_high;
2179 } umem_log_cpu_t;
2180 
2181 int
2182 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2183 {
2184 	int i;
2185 
2186 	for (i = 0; i < umem_max_ncpus; i++) {
2187 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2188 			break;
2189 	}
2190 
2191 	if (i == umem_max_ncpus)
2192 		mdb_printf("   ");
2193 	else
2194 		mdb_printf("%3d", i);
2195 
2196 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2197 	    b->bc_timestamp, b->bc_thread);
2198 
2199 	return (WALK_NEXT);
2200 }
2201 
2202 /*ARGSUSED*/
2203 int
2204 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2205 {
2206 	umem_log_header_t lh;
2207 	umem_cpu_log_header_t clh;
2208 	uintptr_t lhp, clhp;
2209 	umem_log_cpu_t *umc;
2210 	int i;
2211 
2212 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2213 		mdb_warn("failed to read 'umem_transaction_log'");
2214 		return (DCMD_ERR);
2215 	}
2216 
2217 	if (lhp == NULL) {
2218 		mdb_warn("no umem transaction log\n");
2219 		return (DCMD_ERR);
2220 	}
2221 
2222 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2223 		mdb_warn("failed to read log header at %p", lhp);
2224 		return (DCMD_ERR);
2225 	}
2226 
2227 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2228 
2229 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2230 	    UM_SLEEP | UM_GC);
2231 
2232 	for (i = 0; i < umem_max_ncpus; i++) {
2233 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2234 			mdb_warn("cannot read cpu %d's log header at %p",
2235 			    i, clhp);
2236 			return (DCMD_ERR);
2237 		}
2238 
2239 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2240 		    (uintptr_t)lh.lh_base;
2241 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2242 
2243 		clhp += sizeof (umem_cpu_log_header_t);
2244 	}
2245 
2246 	if (DCMD_HDRSPEC(flags)) {
2247 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2248 		    "BUFADDR", "TIMESTAMP", "THREAD");
2249 	}
2250 
2251 	/*
2252 	 * If we have been passed an address, we'll just print out that
2253 	 * log entry.
2254 	 */
2255 	if (flags & DCMD_ADDRSPEC) {
2256 		umem_bufctl_audit_t *bp;
2257 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2258 
2259 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2260 			mdb_warn("failed to read bufctl at %p", addr);
2261 			return (DCMD_ERR);
2262 		}
2263 
2264 		(void) umem_log_walk(addr, bp, umc);
2265 
2266 		return (DCMD_OK);
2267 	}
2268 
2269 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2270 		mdb_warn("can't find umem log walker");
2271 		return (DCMD_ERR);
2272 	}
2273 
2274 	return (DCMD_OK);
2275 }
2276 
2277 typedef struct bufctl_history_cb {
2278 	int		bhc_flags;
2279 	int		bhc_argc;
2280 	const mdb_arg_t	*bhc_argv;
2281 	int		bhc_ret;
2282 } bufctl_history_cb_t;
2283 
2284 /*ARGSUSED*/
2285 static int
2286 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2287 {
2288 	bufctl_history_cb_t *bhc = arg;
2289 
2290 	bhc->bhc_ret =
2291 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2292 
2293 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2294 
2295 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2296 }
2297 
2298 void
2299 bufctl_help(void)
2300 {
2301 	mdb_printf("%s\n",
2302 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2303 	mdb_dec_indent(2);
2304 	mdb_printf("%<b>OPTIONS%</b>\n");
2305 	mdb_inc_indent(2);
2306 	mdb_printf("%s",
2307 "  -v    Display the full content of the bufctl, including its stack trace\n"
2308 "  -h    retrieve the bufctl's transaction history, if available\n"
2309 "  -a addr\n"
2310 "        filter out bufctls not involving the buffer at addr\n"
2311 "  -c caller\n"
2312 "        filter out bufctls without the function/PC in their stack trace\n"
2313 "  -e earliest\n"
2314 "        filter out bufctls timestamped before earliest\n"
2315 "  -l latest\n"
2316 "        filter out bufctls timestamped after latest\n"
2317 "  -t thread\n"
2318 "        filter out bufctls not involving thread\n");
2319 }
2320 
2321 int
2322 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2323 {
2324 	uint_t verbose = FALSE;
2325 	uint_t history = FALSE;
2326 	uint_t in_history = FALSE;
2327 	uintptr_t caller = NULL, thread = NULL;
2328 	uintptr_t laddr, haddr, baddr = NULL;
2329 	hrtime_t earliest = 0, latest = 0;
2330 	int i, depth;
2331 	char c[MDB_SYM_NAMLEN];
2332 	GElf_Sym sym;
2333 	umem_bufctl_audit_t *bcp;
2334 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2335 
2336 	if (mdb_getopts(argc, argv,
2337 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2338 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2339 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2340 	    'c', MDB_OPT_UINTPTR, &caller,
2341 	    't', MDB_OPT_UINTPTR, &thread,
2342 	    'e', MDB_OPT_UINT64, &earliest,
2343 	    'l', MDB_OPT_UINT64, &latest,
2344 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2345 		return (DCMD_USAGE);
2346 
2347 	if (!(flags & DCMD_ADDRSPEC))
2348 		return (DCMD_USAGE);
2349 
2350 	if (in_history && !history)
2351 		return (DCMD_USAGE);
2352 
2353 	if (history && !in_history) {
2354 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2355 		    UM_SLEEP | UM_GC);
2356 		bufctl_history_cb_t bhc;
2357 
2358 		nargv[0].a_type = MDB_TYPE_STRING;
2359 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2360 
2361 		for (i = 0; i < argc; i++)
2362 			nargv[i + 1] = argv[i];
2363 
2364 		/*
2365 		 * When in history mode, we treat each element as if it
2366 		 * were in a seperate loop, so that the headers group
2367 		 * bufctls with similar histories.
2368 		 */
2369 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2370 		bhc.bhc_argc = argc + 1;
2371 		bhc.bhc_argv = nargv;
2372 		bhc.bhc_ret = DCMD_OK;
2373 
2374 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2375 		    addr) == -1) {
2376 			mdb_warn("unable to walk bufctl_history");
2377 			return (DCMD_ERR);
2378 		}
2379 
2380 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2381 			mdb_printf("\n");
2382 
2383 		return (bhc.bhc_ret);
2384 	}
2385 
2386 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2387 		if (verbose) {
2388 			mdb_printf("%16s %16s %16s %16s\n"
2389 			    "%<u>%16s %16s %16s %16s%</u>\n",
2390 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2391 			    "", "CACHE", "LASTLOG", "CONTENTS");
2392 		} else {
2393 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2394 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2395 		}
2396 	}
2397 
2398 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2399 		mdb_warn("couldn't read bufctl at %p", addr);
2400 		return (DCMD_ERR);
2401 	}
2402 
2403 	/*
2404 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2405 	 * the address does not really refer to a bufctl.
2406 	 */
2407 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2408 
2409 	if (caller != NULL) {
2410 		laddr = caller;
2411 		haddr = caller + sizeof (caller);
2412 
2413 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2414 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2415 			/*
2416 			 * We were provided an exact symbol value; any
2417 			 * address in the function is valid.
2418 			 */
2419 			laddr = (uintptr_t)sym.st_value;
2420 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2421 		}
2422 
2423 		for (i = 0; i < depth; i++)
2424 			if (bcp->bc_stack[i] >= laddr &&
2425 			    bcp->bc_stack[i] < haddr)
2426 				break;
2427 
2428 		if (i == depth)
2429 			return (DCMD_OK);
2430 	}
2431 
2432 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2433 		return (DCMD_OK);
2434 
2435 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2436 		return (DCMD_OK);
2437 
2438 	if (latest != 0 && bcp->bc_timestamp > latest)
2439 		return (DCMD_OK);
2440 
2441 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2442 		return (DCMD_OK);
2443 
2444 	if (flags & DCMD_PIPE_OUT) {
2445 		mdb_printf("%#r\n", addr);
2446 		return (DCMD_OK);
2447 	}
2448 
2449 	if (verbose) {
2450 		mdb_printf(
2451 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2452 		    "%16s %16p %16p %16p\n",
2453 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2454 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2455 
2456 		mdb_inc_indent(17);
2457 		for (i = 0; i < depth; i++)
2458 			mdb_printf("%a\n", bcp->bc_stack[i]);
2459 		mdb_dec_indent(17);
2460 		mdb_printf("\n");
2461 	} else {
2462 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2463 		    bcp->bc_timestamp, bcp->bc_thread);
2464 
2465 		for (i = 0; i < depth; i++) {
2466 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2467 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2468 				continue;
2469 			if (is_umem_sym(c, "umem_"))
2470 				continue;
2471 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2472 			break;
2473 		}
2474 
2475 		if (i >= depth)
2476 			mdb_printf("\n");
2477 	}
2478 
2479 	return (DCMD_OK);
2480 }
2481 
2482 /*ARGSUSED*/
2483 int
2484 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2485 {
2486 	mdb_arg_t a;
2487 
2488 	if (!(flags & DCMD_ADDRSPEC))
2489 		return (DCMD_USAGE);
2490 
2491 	if (argc != 0)
2492 		return (DCMD_USAGE);
2493 
2494 	a.a_type = MDB_TYPE_STRING;
2495 	a.a_un.a_str = "-v";
2496 
2497 	return (bufctl(addr, flags, 1, &a));
2498 }
2499 
2500 typedef struct umem_verify {
2501 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2502 	size_t umv_size;		/* number of bytes in umv_buf */
2503 	int umv_corruption;		/* > 0 if corruption found. */
2504 	int umv_besilent;		/* report actual corruption sites */
2505 	struct umem_cache umv_cache;	/* the cache we're operating on */
2506 } umem_verify_t;
2507 
2508 /*
2509  * verify_pattern()
2510  *	verify that buf is filled with the pattern pat.
2511  */
2512 static int64_t
2513 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2514 {
2515 	/*LINTED*/
2516 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2517 	uint64_t *buf;
2518 
2519 	for (buf = buf_arg; buf < bufend; buf++)
2520 		if (*buf != pat)
2521 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2522 	return (-1);
2523 }
2524 
2525 /*
2526  * verify_buftag()
2527  *	verify that btp->bt_bxstat == (bcp ^ pat)
2528  */
2529 static int
2530 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2531 {
2532 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2533 }
2534 
2535 /*
2536  * verify_free()
2537  *	verify the integrity of a free block of memory by checking
2538  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2539  */
2540 /*ARGSUSED1*/
2541 static int
2542 verify_free(uintptr_t addr, const void *data, void *private)
2543 {
2544 	umem_verify_t *umv = (umem_verify_t *)private;
2545 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2546 	int64_t corrupt;		/* corruption offset */
2547 	umem_buftag_t *buftagp;		/* ptr to buftag */
2548 	umem_cache_t *cp = &umv->umv_cache;
2549 	int besilent = umv->umv_besilent;
2550 
2551 	/*LINTED*/
2552 	buftagp = UMEM_BUFTAG(cp, buf);
2553 
2554 	/*
2555 	 * Read the buffer to check.
2556 	 */
2557 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2558 		if (!besilent)
2559 			mdb_warn("couldn't read %p", addr);
2560 		return (WALK_NEXT);
2561 	}
2562 
2563 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2564 	    UMEM_FREE_PATTERN)) >= 0) {
2565 		if (!besilent)
2566 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2567 			    addr, (uintptr_t)addr + corrupt);
2568 		goto corrupt;
2569 	}
2570 
2571 	if ((cp->cache_flags & UMF_HASH) &&
2572 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2573 		if (!besilent)
2574 			mdb_printf("buffer %p (free) seems to "
2575 			    "have a corrupt redzone pattern\n", addr);
2576 		goto corrupt;
2577 	}
2578 
2579 	/*
2580 	 * confirm bufctl pointer integrity.
2581 	 */
2582 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2583 		if (!besilent)
2584 			mdb_printf("buffer %p (free) has a corrupt "
2585 			    "buftag\n", addr);
2586 		goto corrupt;
2587 	}
2588 
2589 	return (WALK_NEXT);
2590 corrupt:
2591 	umv->umv_corruption++;
2592 	return (WALK_NEXT);
2593 }
2594 
2595 /*
2596  * verify_alloc()
2597  *	Verify that the buftag of an allocated buffer makes sense with respect
2598  *	to the buffer.
2599  */
2600 /*ARGSUSED1*/
2601 static int
2602 verify_alloc(uintptr_t addr, const void *data, void *private)
2603 {
2604 	umem_verify_t *umv = (umem_verify_t *)private;
2605 	umem_cache_t *cp = &umv->umv_cache;
2606 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2607 	/*LINTED*/
2608 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2609 	uint32_t *ip = (uint32_t *)buftagp;
2610 	uint8_t *bp = (uint8_t *)buf;
2611 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2612 	int besilent = umv->umv_besilent;
2613 
2614 	/*
2615 	 * Read the buffer to check.
2616 	 */
2617 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2618 		if (!besilent)
2619 			mdb_warn("couldn't read %p", addr);
2620 		return (WALK_NEXT);
2621 	}
2622 
2623 	/*
2624 	 * There are two cases to handle:
2625 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2626 	 *    0xfeedfacefeedface at the end of it
2627 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2628 	 *    0xbb just past the end of the region in use.  At the buftag,
2629 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2630 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2631 	 *    endianness), followed by 32 bits containing the offset of the
2632 	 *    0xbb byte in the buffer.
2633 	 *
2634 	 * Finally, the two 32-bit words that comprise the second half of the
2635 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2636 	 */
2637 
2638 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2639 		looks_ok = 1;
2640 	else if (!UMEM_SIZE_VALID(ip[1]))
2641 		size_ok = 0;
2642 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2643 		looks_ok = 1;
2644 	else
2645 		size_ok = 0;
2646 
2647 	if (!size_ok) {
2648 		if (!besilent)
2649 			mdb_printf("buffer %p (allocated) has a corrupt "
2650 			    "redzone size encoding\n", addr);
2651 		goto corrupt;
2652 	}
2653 
2654 	if (!looks_ok) {
2655 		if (!besilent)
2656 			mdb_printf("buffer %p (allocated) has a corrupt "
2657 			    "redzone signature\n", addr);
2658 		goto corrupt;
2659 	}
2660 
2661 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2662 		if (!besilent)
2663 			mdb_printf("buffer %p (allocated) has a "
2664 			    "corrupt buftag\n", addr);
2665 		goto corrupt;
2666 	}
2667 
2668 	return (WALK_NEXT);
2669 corrupt:
2670 	umv->umv_corruption++;
2671 	return (WALK_NEXT);
2672 }
2673 
2674 /*ARGSUSED2*/
2675 int
2676 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2677 {
2678 	if (flags & DCMD_ADDRSPEC) {
2679 		int check_alloc = 0, check_free = 0;
2680 		umem_verify_t umv;
2681 
2682 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2683 		    addr) == -1) {
2684 			mdb_warn("couldn't read umem_cache %p", addr);
2685 			return (DCMD_ERR);
2686 		}
2687 
2688 		umv.umv_size = umv.umv_cache.cache_buftag +
2689 		    sizeof (umem_buftag_t);
2690 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2691 		umv.umv_corruption = 0;
2692 
2693 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2694 			check_alloc = 1;
2695 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2696 				check_free = 1;
2697 		} else {
2698 			if (!(flags & DCMD_LOOP)) {
2699 				mdb_warn("cache %p (%s) does not have "
2700 				    "redzone checking enabled\n", addr,
2701 				    umv.umv_cache.cache_name);
2702 			}
2703 			return (DCMD_ERR);
2704 		}
2705 
2706 		if (flags & DCMD_LOOP) {
2707 			/*
2708 			 * table mode, don't print out every corrupt buffer
2709 			 */
2710 			umv.umv_besilent = 1;
2711 		} else {
2712 			mdb_printf("Summary for cache '%s'\n",
2713 			    umv.umv_cache.cache_name);
2714 			mdb_inc_indent(2);
2715 			umv.umv_besilent = 0;
2716 		}
2717 
2718 		if (check_alloc)
2719 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2720 		if (check_free)
2721 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2722 
2723 		if (flags & DCMD_LOOP) {
2724 			if (umv.umv_corruption == 0) {
2725 				mdb_printf("%-*s %?p clean\n",
2726 				    UMEM_CACHE_NAMELEN,
2727 				    umv.umv_cache.cache_name, addr);
2728 			} else {
2729 				char *s = "";	/* optional s in "buffer[s]" */
2730 				if (umv.umv_corruption > 1)
2731 					s = "s";
2732 
2733 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2734 				    UMEM_CACHE_NAMELEN,
2735 				    umv.umv_cache.cache_name, addr,
2736 				    umv.umv_corruption, s);
2737 			}
2738 		} else {
2739 			/*
2740 			 * This is the more verbose mode, when the user has
2741 			 * type addr::umem_verify.  If the cache was clean,
2742 			 * nothing will have yet been printed. So say something.
2743 			 */
2744 			if (umv.umv_corruption == 0)
2745 				mdb_printf("clean\n");
2746 
2747 			mdb_dec_indent(2);
2748 		}
2749 	} else {
2750 		/*
2751 		 * If the user didn't specify a cache to verify, we'll walk all
2752 		 * umem_cache's, specifying ourself as a callback for each...
2753 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2754 		 */
2755 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2756 		    "Cache Name", "Addr", "Cache Integrity");
2757 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2758 	}
2759 
2760 	return (DCMD_OK);
2761 }
2762 
2763 typedef struct vmem_node {
2764 	struct vmem_node *vn_next;
2765 	struct vmem_node *vn_parent;
2766 	struct vmem_node *vn_sibling;
2767 	struct vmem_node *vn_children;
2768 	uintptr_t vn_addr;
2769 	int vn_marked;
2770 	vmem_t vn_vmem;
2771 } vmem_node_t;
2772 
2773 typedef struct vmem_walk {
2774 	vmem_node_t *vw_root;
2775 	vmem_node_t *vw_current;
2776 } vmem_walk_t;
2777 
2778 int
2779 vmem_walk_init(mdb_walk_state_t *wsp)
2780 {
2781 	uintptr_t vaddr, paddr;
2782 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2783 	vmem_walk_t *vw;
2784 
2785 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2786 		mdb_warn("couldn't read 'vmem_list'");
2787 		return (WALK_ERR);
2788 	}
2789 
2790 	while (vaddr != NULL) {
2791 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2792 		vp->vn_addr = vaddr;
2793 		vp->vn_next = head;
2794 		head = vp;
2795 
2796 		if (vaddr == wsp->walk_addr)
2797 			current = vp;
2798 
2799 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2800 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2801 			goto err;
2802 		}
2803 
2804 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2805 	}
2806 
2807 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2808 
2809 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2810 			vp->vn_sibling = root;
2811 			root = vp;
2812 			continue;
2813 		}
2814 
2815 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2816 			if (parent->vn_addr != paddr)
2817 				continue;
2818 			vp->vn_sibling = parent->vn_children;
2819 			parent->vn_children = vp;
2820 			vp->vn_parent = parent;
2821 			break;
2822 		}
2823 
2824 		if (parent == NULL) {
2825 			mdb_warn("couldn't find %p's parent (%p)\n",
2826 			    vp->vn_addr, paddr);
2827 			goto err;
2828 		}
2829 	}
2830 
2831 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2832 	vw->vw_root = root;
2833 
2834 	if (current != NULL)
2835 		vw->vw_current = current;
2836 	else
2837 		vw->vw_current = root;
2838 
2839 	wsp->walk_data = vw;
2840 	return (WALK_NEXT);
2841 err:
2842 	for (vp = head; head != NULL; vp = head) {
2843 		head = vp->vn_next;
2844 		mdb_free(vp, sizeof (vmem_node_t));
2845 	}
2846 
2847 	return (WALK_ERR);
2848 }
2849 
2850 int
2851 vmem_walk_step(mdb_walk_state_t *wsp)
2852 {
2853 	vmem_walk_t *vw = wsp->walk_data;
2854 	vmem_node_t *vp;
2855 	int rval;
2856 
2857 	if ((vp = vw->vw_current) == NULL)
2858 		return (WALK_DONE);
2859 
2860 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2861 
2862 	if (vp->vn_children != NULL) {
2863 		vw->vw_current = vp->vn_children;
2864 		return (rval);
2865 	}
2866 
2867 	do {
2868 		vw->vw_current = vp->vn_sibling;
2869 		vp = vp->vn_parent;
2870 	} while (vw->vw_current == NULL && vp != NULL);
2871 
2872 	return (rval);
2873 }
2874 
2875 /*
2876  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2877  * children are visited before their parent.  We perform the postfix walk
2878  * iteratively (rather than recursively) to allow mdb to regain control
2879  * after each callback.
2880  */
2881 int
2882 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2883 {
2884 	vmem_walk_t *vw = wsp->walk_data;
2885 	vmem_node_t *vp = vw->vw_current;
2886 	int rval;
2887 
2888 	/*
2889 	 * If this node is marked, then we know that we have already visited
2890 	 * all of its children.  If the node has any siblings, they need to
2891 	 * be visited next; otherwise, we need to visit the parent.  Note
2892 	 * that vp->vn_marked will only be zero on the first invocation of
2893 	 * the step function.
2894 	 */
2895 	if (vp->vn_marked) {
2896 		if (vp->vn_sibling != NULL)
2897 			vp = vp->vn_sibling;
2898 		else if (vp->vn_parent != NULL)
2899 			vp = vp->vn_parent;
2900 		else {
2901 			/*
2902 			 * We have neither a parent, nor a sibling, and we
2903 			 * have already been visited; we're done.
2904 			 */
2905 			return (WALK_DONE);
2906 		}
2907 	}
2908 
2909 	/*
2910 	 * Before we visit this node, visit its children.
2911 	 */
2912 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2913 		vp = vp->vn_children;
2914 
2915 	vp->vn_marked = 1;
2916 	vw->vw_current = vp;
2917 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2918 
2919 	return (rval);
2920 }
2921 
2922 void
2923 vmem_walk_fini(mdb_walk_state_t *wsp)
2924 {
2925 	vmem_walk_t *vw = wsp->walk_data;
2926 	vmem_node_t *root = vw->vw_root;
2927 	int done;
2928 
2929 	if (root == NULL)
2930 		return;
2931 
2932 	if ((vw->vw_root = root->vn_children) != NULL)
2933 		vmem_walk_fini(wsp);
2934 
2935 	vw->vw_root = root->vn_sibling;
2936 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2937 	mdb_free(root, sizeof (vmem_node_t));
2938 
2939 	if (done) {
2940 		mdb_free(vw, sizeof (vmem_walk_t));
2941 	} else {
2942 		vmem_walk_fini(wsp);
2943 	}
2944 }
2945 
2946 typedef struct vmem_seg_walk {
2947 	uint8_t vsw_type;
2948 	uintptr_t vsw_start;
2949 	uintptr_t vsw_current;
2950 } vmem_seg_walk_t;
2951 
2952 /*ARGSUSED*/
2953 int
2954 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2955 {
2956 	vmem_seg_walk_t *vsw;
2957 
2958 	if (wsp->walk_addr == NULL) {
2959 		mdb_warn("vmem_%s does not support global walks\n", name);
2960 		return (WALK_ERR);
2961 	}
2962 
2963 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2964 
2965 	vsw->vsw_type = type;
2966 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2967 	vsw->vsw_current = vsw->vsw_start;
2968 
2969 	return (WALK_NEXT);
2970 }
2971 
2972 /*
2973  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2974  */
2975 #define	VMEM_NONE	0
2976 
2977 int
2978 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2979 {
2980 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2981 }
2982 
2983 int
2984 vmem_free_walk_init(mdb_walk_state_t *wsp)
2985 {
2986 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2987 }
2988 
2989 int
2990 vmem_span_walk_init(mdb_walk_state_t *wsp)
2991 {
2992 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2993 }
2994 
2995 int
2996 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2997 {
2998 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2999 }
3000 
3001 int
3002 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3003 {
3004 	vmem_seg_t seg;
3005 	vmem_seg_walk_t *vsw = wsp->walk_data;
3006 	uintptr_t addr = vsw->vsw_current;
3007 	static size_t seg_size = 0;
3008 	int rval;
3009 
3010 	if (!seg_size) {
3011 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3012 			mdb_warn("failed to read 'vmem_seg_size'");
3013 			seg_size = sizeof (vmem_seg_t);
3014 		}
3015 	}
3016 
3017 	if (seg_size < sizeof (seg))
3018 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3019 
3020 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3021 		mdb_warn("couldn't read vmem_seg at %p", addr);
3022 		return (WALK_ERR);
3023 	}
3024 
3025 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3026 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3027 		rval = WALK_NEXT;
3028 	} else {
3029 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3030 	}
3031 
3032 	if (vsw->vsw_current == vsw->vsw_start)
3033 		return (WALK_DONE);
3034 
3035 	return (rval);
3036 }
3037 
3038 void
3039 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3040 {
3041 	vmem_seg_walk_t *vsw = wsp->walk_data;
3042 
3043 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3044 }
3045 
3046 #define	VMEM_NAMEWIDTH	22
3047 
3048 int
3049 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3050 {
3051 	vmem_t v, parent;
3052 	uintptr_t paddr;
3053 	int ident = 0;
3054 	char c[VMEM_NAMEWIDTH];
3055 
3056 	if (!(flags & DCMD_ADDRSPEC)) {
3057 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3058 			mdb_warn("can't walk vmem");
3059 			return (DCMD_ERR);
3060 		}
3061 		return (DCMD_OK);
3062 	}
3063 
3064 	if (DCMD_HDRSPEC(flags))
3065 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3066 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3067 		    "TOTAL", "SUCCEED", "FAIL");
3068 
3069 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3070 		mdb_warn("couldn't read vmem at %p", addr);
3071 		return (DCMD_ERR);
3072 	}
3073 
3074 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3075 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3076 			mdb_warn("couldn't trace %p's ancestry", addr);
3077 			ident = 0;
3078 			break;
3079 		}
3080 		paddr = (uintptr_t)parent.vm_source;
3081 	}
3082 
3083 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3084 
3085 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3086 	    addr, VMEM_NAMEWIDTH, c,
3087 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3088 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3089 
3090 	return (DCMD_OK);
3091 }
3092 
3093 void
3094 vmem_seg_help(void)
3095 {
3096 	mdb_printf("%s\n",
3097 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3098 "\n"
3099 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3100 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3101 "information.\n");
3102 	mdb_dec_indent(2);
3103 	mdb_printf("%<b>OPTIONS%</b>\n");
3104 	mdb_inc_indent(2);
3105 	mdb_printf("%s",
3106 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3107 "  -s    report the size of the segment, instead of the end address\n"
3108 "  -c caller\n"
3109 "        filter out segments without the function/PC in their stack trace\n"
3110 "  -e earliest\n"
3111 "        filter out segments timestamped before earliest\n"
3112 "  -l latest\n"
3113 "        filter out segments timestamped after latest\n"
3114 "  -m minsize\n"
3115 "        filer out segments smaller than minsize\n"
3116 "  -M maxsize\n"
3117 "        filer out segments larger than maxsize\n"
3118 "  -t thread\n"
3119 "        filter out segments not involving thread\n"
3120 "  -T type\n"
3121 "        filter out segments not of type 'type'\n"
3122 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3123 }
3124 
3125 
3126 /*ARGSUSED*/
3127 int
3128 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3129 {
3130 	vmem_seg_t vs;
3131 	uintptr_t *stk = vs.vs_stack;
3132 	uintptr_t sz;
3133 	uint8_t t;
3134 	const char *type = NULL;
3135 	GElf_Sym sym;
3136 	char c[MDB_SYM_NAMLEN];
3137 	int no_debug;
3138 	int i;
3139 	int depth;
3140 	uintptr_t laddr, haddr;
3141 
3142 	uintptr_t caller = NULL, thread = NULL;
3143 	uintptr_t minsize = 0, maxsize = 0;
3144 
3145 	hrtime_t earliest = 0, latest = 0;
3146 
3147 	uint_t size = 0;
3148 	uint_t verbose = 0;
3149 
3150 	if (!(flags & DCMD_ADDRSPEC))
3151 		return (DCMD_USAGE);
3152 
3153 	if (mdb_getopts(argc, argv,
3154 	    'c', MDB_OPT_UINTPTR, &caller,
3155 	    'e', MDB_OPT_UINT64, &earliest,
3156 	    'l', MDB_OPT_UINT64, &latest,
3157 	    's', MDB_OPT_SETBITS, TRUE, &size,
3158 	    'm', MDB_OPT_UINTPTR, &minsize,
3159 	    'M', MDB_OPT_UINTPTR, &maxsize,
3160 	    't', MDB_OPT_UINTPTR, &thread,
3161 	    'T', MDB_OPT_STR, &type,
3162 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3163 	    NULL) != argc)
3164 		return (DCMD_USAGE);
3165 
3166 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3167 		if (verbose) {
3168 			mdb_printf("%16s %4s %16s %16s %16s\n"
3169 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3170 			    "ADDR", "TYPE", "START", "END", "SIZE",
3171 			    "", "", "THREAD", "TIMESTAMP", "");
3172 		} else {
3173 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3174 			    "START", size? "SIZE" : "END", "WHO");
3175 		}
3176 	}
3177 
3178 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3179 		mdb_warn("couldn't read vmem_seg at %p", addr);
3180 		return (DCMD_ERR);
3181 	}
3182 
3183 	if (type != NULL) {
3184 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3185 			t = VMEM_ALLOC;
3186 		else if (strcmp(type, "FREE") == 0)
3187 			t = VMEM_FREE;
3188 		else if (strcmp(type, "SPAN") == 0)
3189 			t = VMEM_SPAN;
3190 		else if (strcmp(type, "ROTR") == 0 ||
3191 		    strcmp(type, "ROTOR") == 0)
3192 			t = VMEM_ROTOR;
3193 		else if (strcmp(type, "WLKR") == 0 ||
3194 		    strcmp(type, "WALKER") == 0)
3195 			t = VMEM_WALKER;
3196 		else {
3197 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3198 			    type);
3199 			return (DCMD_ERR);
3200 		}
3201 
3202 		if (vs.vs_type != t)
3203 			return (DCMD_OK);
3204 	}
3205 
3206 	sz = vs.vs_end - vs.vs_start;
3207 
3208 	if (minsize != 0 && sz < minsize)
3209 		return (DCMD_OK);
3210 
3211 	if (maxsize != 0 && sz > maxsize)
3212 		return (DCMD_OK);
3213 
3214 	t = vs.vs_type;
3215 	depth = vs.vs_depth;
3216 
3217 	/*
3218 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3219 	 */
3220 	no_debug = (t != VMEM_ALLOC) ||
3221 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3222 
3223 	if (no_debug) {
3224 		if (caller != NULL || thread != NULL || earliest != 0 ||
3225 		    latest != 0)
3226 			return (DCMD_OK);		/* not enough info */
3227 	} else {
3228 		if (caller != NULL) {
3229 			laddr = caller;
3230 			haddr = caller + sizeof (caller);
3231 
3232 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3233 			    sizeof (c), &sym) != -1 &&
3234 			    caller == (uintptr_t)sym.st_value) {
3235 				/*
3236 				 * We were provided an exact symbol value; any
3237 				 * address in the function is valid.
3238 				 */
3239 				laddr = (uintptr_t)sym.st_value;
3240 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3241 			}
3242 
3243 			for (i = 0; i < depth; i++)
3244 				if (vs.vs_stack[i] >= laddr &&
3245 				    vs.vs_stack[i] < haddr)
3246 					break;
3247 
3248 			if (i == depth)
3249 				return (DCMD_OK);
3250 		}
3251 
3252 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3253 			return (DCMD_OK);
3254 
3255 		if (earliest != 0 && vs.vs_timestamp < earliest)
3256 			return (DCMD_OK);
3257 
3258 		if (latest != 0 && vs.vs_timestamp > latest)
3259 			return (DCMD_OK);
3260 	}
3261 
3262 	type = (t == VMEM_ALLOC ? "ALLC" :
3263 	    t == VMEM_FREE ? "FREE" :
3264 	    t == VMEM_SPAN ? "SPAN" :
3265 	    t == VMEM_ROTOR ? "ROTR" :
3266 	    t == VMEM_WALKER ? "WLKR" :
3267 	    "????");
3268 
3269 	if (flags & DCMD_PIPE_OUT) {
3270 		mdb_printf("%#r\n", addr);
3271 		return (DCMD_OK);
3272 	}
3273 
3274 	if (verbose) {
3275 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3276 		    addr, type, vs.vs_start, vs.vs_end, sz);
3277 
3278 		if (no_debug)
3279 			return (DCMD_OK);
3280 
3281 		mdb_printf("%16s %4s %16d %16llx\n",
3282 		    "", "", vs.vs_thread, vs.vs_timestamp);
3283 
3284 		mdb_inc_indent(17);
3285 		for (i = 0; i < depth; i++) {
3286 			mdb_printf("%a\n", stk[i]);
3287 		}
3288 		mdb_dec_indent(17);
3289 		mdb_printf("\n");
3290 	} else {
3291 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3292 		    vs.vs_start, size? sz : vs.vs_end);
3293 
3294 		if (no_debug) {
3295 			mdb_printf("\n");
3296 			return (DCMD_OK);
3297 		}
3298 
3299 		for (i = 0; i < depth; i++) {
3300 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3301 			    c, sizeof (c), &sym) == -1)
3302 				continue;
3303 			if (is_umem_sym(c, "vmem_"))
3304 				continue;
3305 			break;
3306 		}
3307 		mdb_printf(" %a\n", stk[i]);
3308 	}
3309 	return (DCMD_OK);
3310 }
3311 
3312 /*ARGSUSED*/
3313 static int
3314 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3315 {
3316 	char name[UMEM_CACHE_NAMELEN + 1];
3317 	hrtime_t delta;
3318 	int i, depth;
3319 
3320 	if (bcp->bc_timestamp == 0)
3321 		return (WALK_DONE);
3322 
3323 	if (*newest == 0)
3324 		*newest = bcp->bc_timestamp;
3325 
3326 	delta = *newest - bcp->bc_timestamp;
3327 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3328 
3329 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3330 	    &bcp->bc_cache->cache_name) <= 0)
3331 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3332 
3333 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3334 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3335 
3336 	for (i = 0; i < depth; i++)
3337 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3338 
3339 	return (WALK_NEXT);
3340 }
3341 
3342 int
3343 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3344 {
3345 	const char *logname = "umem_transaction_log";
3346 	hrtime_t newest = 0;
3347 
3348 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3349 		return (DCMD_USAGE);
3350 
3351 	if (argc > 0) {
3352 		if (argv->a_type != MDB_TYPE_STRING)
3353 			return (DCMD_USAGE);
3354 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3355 			logname = "umem_failure_log";
3356 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3357 			logname = "umem_slab_log";
3358 		else
3359 			return (DCMD_USAGE);
3360 	}
3361 
3362 	if (umem_readvar(&addr, logname) == -1) {
3363 		mdb_warn("failed to read %s log header pointer");
3364 		return (DCMD_ERR);
3365 	}
3366 
3367 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3368 		mdb_warn("failed to walk umem log");
3369 		return (DCMD_ERR);
3370 	}
3371 
3372 	return (DCMD_OK);
3373 }
3374 
3375 /*
3376  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3377  * The first piece is a structure which we use to accumulate umem_cache_t
3378  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3379  * walker; we either add all caches, or ones named explicitly as arguments.
3380  */
3381 
3382 typedef struct umclist {
3383 	const char *umc_name;			/* Name to match (or NULL) */
3384 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3385 	int umc_nelems;				/* Num entries in umc_caches */
3386 	int umc_size;				/* Size of umc_caches array */
3387 } umclist_t;
3388 
3389 static int
3390 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3391 {
3392 	void *p;
3393 	int s;
3394 
3395 	if (umc->umc_name == NULL ||
3396 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3397 		/*
3398 		 * If we have a match, grow our array (if necessary), and then
3399 		 * add the virtual address of the matching cache to our list.
3400 		 */
3401 		if (umc->umc_nelems >= umc->umc_size) {
3402 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3403 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3404 
3405 			bcopy(umc->umc_caches, p,
3406 			    sizeof (uintptr_t) * umc->umc_size);
3407 
3408 			umc->umc_caches = p;
3409 			umc->umc_size = s;
3410 		}
3411 
3412 		umc->umc_caches[umc->umc_nelems++] = addr;
3413 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3414 	}
3415 
3416 	return (WALK_NEXT);
3417 }
3418 
3419 /*
3420  * The second piece of ::umausers is a hash table of allocations.  Each
3421  * allocation owner is identified by its stack trace and data_size.  We then
3422  * track the total bytes of all such allocations, and the number of allocations
3423  * to report at the end.  Once we have a list of caches, we walk through the
3424  * allocated bufctls of each, and update our hash table accordingly.
3425  */
3426 
3427 typedef struct umowner {
3428 	struct umowner *umo_head;		/* First hash elt in bucket */
3429 	struct umowner *umo_next;		/* Next hash elt in chain */
3430 	size_t umo_signature;			/* Hash table signature */
3431 	uint_t umo_num;				/* Number of allocations */
3432 	size_t umo_data_size;			/* Size of each allocation */
3433 	size_t umo_total_size;			/* Total bytes of allocation */
3434 	int umo_depth;				/* Depth of stack trace */
3435 	uintptr_t *umo_stack;			/* Stack trace */
3436 } umowner_t;
3437 
3438 typedef struct umusers {
3439 	const umem_cache_t *umu_cache;		/* Current umem cache */
3440 	umowner_t *umu_hash;			/* Hash table of owners */
3441 	uintptr_t *umu_stacks;			/* stacks for owners */
3442 	int umu_nelems;				/* Number of entries in use */
3443 	int umu_size;				/* Total number of entries */
3444 } umusers_t;
3445 
3446 static void
3447 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3448     size_t size, size_t data_size)
3449 {
3450 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3451 	size_t bucket, signature = data_size;
3452 	umowner_t *umo, *umoend;
3453 
3454 	/*
3455 	 * If the hash table is full, double its size and rehash everything.
3456 	 */
3457 	if (umu->umu_nelems >= umu->umu_size) {
3458 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3459 		size_t umowner_size = sizeof (umowner_t);
3460 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3461 		uintptr_t *new_stacks;
3462 
3463 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3464 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3465 
3466 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3467 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3468 		umu->umu_hash = umo;
3469 		umu->umu_stacks = new_stacks;
3470 		umu->umu_size = s;
3471 
3472 		umoend = umu->umu_hash + umu->umu_size;
3473 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3474 			umo->umo_head = NULL;
3475 			umo->umo_stack = &umu->umu_stacks[
3476 			    umem_stack_depth * (umo - umu->umu_hash)];
3477 		}
3478 
3479 		umoend = umu->umu_hash + umu->umu_nelems;
3480 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3481 			bucket = umo->umo_signature & (umu->umu_size - 1);
3482 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3483 			umu->umu_hash[bucket].umo_head = umo;
3484 		}
3485 	}
3486 
3487 	/*
3488 	 * Finish computing the hash signature from the stack trace, and then
3489 	 * see if the owner is in the hash table.  If so, update our stats.
3490 	 */
3491 	for (i = 0; i < depth; i++)
3492 		signature += bcp->bc_stack[i];
3493 
3494 	bucket = signature & (umu->umu_size - 1);
3495 
3496 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3497 		if (umo->umo_signature == signature) {
3498 			size_t difference = 0;
3499 
3500 			difference |= umo->umo_data_size - data_size;
3501 			difference |= umo->umo_depth - depth;
3502 
3503 			for (i = 0; i < depth; i++) {
3504 				difference |= umo->umo_stack[i] -
3505 				    bcp->bc_stack[i];
3506 			}
3507 
3508 			if (difference == 0) {
3509 				umo->umo_total_size += size;
3510 				umo->umo_num++;
3511 				return;
3512 			}
3513 		}
3514 	}
3515 
3516 	/*
3517 	 * If the owner is not yet hashed, grab the next element and fill it
3518 	 * in based on the allocation information.
3519 	 */
3520 	umo = &umu->umu_hash[umu->umu_nelems++];
3521 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3522 	umu->umu_hash[bucket].umo_head = umo;
3523 
3524 	umo->umo_signature = signature;
3525 	umo->umo_num = 1;
3526 	umo->umo_data_size = data_size;
3527 	umo->umo_total_size = size;
3528 	umo->umo_depth = depth;
3529 
3530 	for (i = 0; i < depth; i++)
3531 		umo->umo_stack[i] = bcp->bc_stack[i];
3532 }
3533 
3534 /*
3535  * When ::umausers is invoked without the -f flag, we simply update our hash
3536  * table with the information from each allocated bufctl.
3537  */
3538 /*ARGSUSED*/
3539 static int
3540 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3541 {
3542 	const umem_cache_t *cp = umu->umu_cache;
3543 
3544 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3545 	return (WALK_NEXT);
3546 }
3547 
3548 /*
3549  * When ::umausers is invoked with the -f flag, we print out the information
3550  * for each bufctl as well as updating the hash table.
3551  */
3552 static int
3553 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3554 {
3555 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3556 	const umem_cache_t *cp = umu->umu_cache;
3557 
3558 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3559 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3560 
3561 	for (i = 0; i < depth; i++)
3562 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3563 
3564 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3565 	return (WALK_NEXT);
3566 }
3567 
3568 /*
3569  * We sort our results by allocation size before printing them.
3570  */
3571 static int
3572 umownercmp(const void *lp, const void *rp)
3573 {
3574 	const umowner_t *lhs = lp;
3575 	const umowner_t *rhs = rp;
3576 
3577 	return (rhs->umo_total_size - lhs->umo_total_size);
3578 }
3579 
3580 /*
3581  * The main engine of ::umausers is relatively straightforward: First we
3582  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3583  * iterate over the allocated bufctls of each cache in the list.  Finally,
3584  * we sort and print our results.
3585  */
3586 /*ARGSUSED*/
3587 int
3588 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3589 {
3590 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3591 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3592 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3593 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3594 	int opt_e = FALSE;		/* Include "small" users */
3595 	int opt_f = FALSE;		/* Print stack traces */
3596 
3597 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3598 	umowner_t *umo, *umoend;
3599 	int i, oelems;
3600 
3601 	umclist_t umc;
3602 	umusers_t umu;
3603 
3604 	if (flags & DCMD_ADDRSPEC)
3605 		return (DCMD_USAGE);
3606 
3607 	bzero(&umc, sizeof (umc));
3608 	bzero(&umu, sizeof (umu));
3609 
3610 	while ((i = mdb_getopts(argc, argv,
3611 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3612 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3613 
3614 		argv += i;	/* skip past options we just processed */
3615 		argc -= i;	/* adjust argc */
3616 
3617 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3618 			return (DCMD_USAGE);
3619 
3620 		oelems = umc.umc_nelems;
3621 		umc.umc_name = argv->a_un.a_str;
3622 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3623 
3624 		if (umc.umc_nelems == oelems) {
3625 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3626 			return (DCMD_ERR);
3627 		}
3628 
3629 		do_all_caches = 0;
3630 		argv++;
3631 		argc--;
3632 	}
3633 
3634 	if (opt_e)
3635 		mem_threshold = cnt_threshold = 0;
3636 
3637 	if (opt_f)
3638 		callback = (mdb_walk_cb_t)umause2;
3639 
3640 	if (do_all_caches) {
3641 		umc.umc_name = NULL; /* match all cache names */
3642 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3643 	}
3644 
3645 	for (i = 0; i < umc.umc_nelems; i++) {
3646 		uintptr_t cp = umc.umc_caches[i];
3647 		umem_cache_t c;
3648 
3649 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3650 			mdb_warn("failed to read cache at %p", cp);
3651 			continue;
3652 		}
3653 
3654 		if (!(c.cache_flags & UMF_AUDIT)) {
3655 			if (!do_all_caches) {
3656 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3657 				    c.cache_name);
3658 			}
3659 			continue;
3660 		}
3661 
3662 		umu.umu_cache = &c;
3663 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3664 		audited_caches++;
3665 	}
3666 
3667 	if (audited_caches == 0 && do_all_caches) {
3668 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3669 		return (DCMD_ERR);
3670 	}
3671 
3672 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3673 	umoend = umu.umu_hash + umu.umu_nelems;
3674 
3675 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3676 		if (umo->umo_total_size < mem_threshold &&
3677 		    umo->umo_num < cnt_threshold)
3678 			continue;
3679 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3680 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3681 		for (i = 0; i < umo->umo_depth; i++)
3682 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3683 	}
3684 
3685 	return (DCMD_OK);
3686 }
3687 
3688 struct malloc_data {
3689 	uint32_t malloc_size;
3690 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3691 };
3692 
3693 #ifdef _LP64
3694 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3695 #else
3696 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3697 #endif
3698 
3699 typedef struct umem_malloc_info {
3700 	size_t um_total;	/* total allocated buffers */
3701 	size_t um_malloc;	/* malloc buffers */
3702 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3703 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3704 
3705 	umem_cache_t *um_cp;
3706 
3707 	uint_t *um_bucket;
3708 } umem_malloc_info_t;
3709 
3710 static void
3711 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3712     size_t maxbuckets, size_t minbucketsize, int geometric)
3713 {
3714 	uint64_t um_malloc;
3715 	int minb = -1;
3716 	int maxb = -1;
3717 	int buckets;
3718 	int nbucks;
3719 	int i;
3720 	int b;
3721 	const int *distarray;
3722 
3723 	minb = (int)minmalloc;
3724 	maxb = (int)maxmalloc;
3725 
3726 	nbucks = buckets = maxb - minb + 1;
3727 
3728 	um_malloc = 0;
3729 	for (b = minb; b <= maxb; b++)
3730 		um_malloc += um_bucket[b];
3731 
3732 	if (maxbuckets != 0)
3733 		buckets = MIN(buckets, maxbuckets);
3734 
3735 	if (minbucketsize > 1) {
3736 		buckets = MIN(buckets, nbucks/minbucketsize);
3737 		if (buckets == 0) {
3738 			buckets = 1;
3739 			minbucketsize = nbucks;
3740 		}
3741 	}
3742 
3743 	if (geometric)
3744 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3745 	else
3746 		distarray = dist_linear(buckets, minb, maxb);
3747 
3748 	dist_print_header("malloc size", 11, "count");
3749 	for (i = 0; i < buckets; i++) {
3750 		dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3751 	}
3752 	mdb_printf("\n");
3753 }
3754 
3755 /*
3756  * A malloc()ed buffer looks like:
3757  *
3758  *	<----------- mi.malloc_size --->
3759  *	<----------- cp.cache_bufsize ------------------>
3760  *	<----------- cp.cache_chunksize -------------------------------->
3761  *	+-------+-----------------------+---------------+---------------+
3762  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3763  *	+-------+---------------------------------------+---------------+
3764  *		<-- usable space ------>
3765  *
3766  * mallocsz is the argument to malloc(3C).
3767  * mi.malloc_size is the actual size passed to umem_alloc(), which
3768  * is rounded up to the smallest available cache size, which is
3769  * cache_bufsize.  If there is debugging or alignment overhead in
3770  * the cache, that is reflected in a larger cache_chunksize.
3771  *
3772  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3773  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3774  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3775  * the tag has 8 bytes of padding before it.
3776  *
3777  * 32-byte, 64-byte buffers <= 8 bytes:
3778  *	+-------+-------+--------- ...
3779  *	|/size//|/stat//| mallocsz ...
3780  *	+-------+-------+--------- ...
3781  *			^
3782  *			pointer returned from malloc(3C)
3783  *
3784  * 64-byte buffers > 8 bytes:
3785  *	+---------------+-------+-------+--------- ...
3786  *	|/padding///////|/size//|/stat//| mallocsz ...
3787  *	+---------------+-------+-------+--------- ...
3788  *					^
3789  *					pointer returned from malloc(3C)
3790  *
3791  * The "size" field is "malloc_size", which is mallocsz + the padding.
3792  * The "stat" field is derived from malloc_size, and functions as a
3793  * validation that this buffer is actually from malloc(3C).
3794  */
3795 /*ARGSUSED*/
3796 static int
3797 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3798 {
3799 	struct malloc_data md;
3800 	size_t m_addr = addr;
3801 	size_t overhead = sizeof (md);
3802 	size_t mallocsz;
3803 
3804 	ump->um_total++;
3805 
3806 #ifdef _LP64
3807 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3808 		m_addr += overhead;
3809 		overhead += sizeof (md);
3810 	}
3811 #endif
3812 
3813 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3814 		mdb_warn("unable to read malloc header at %p", m_addr);
3815 		return (WALK_NEXT);
3816 	}
3817 
3818 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3819 	case MALLOC_MAGIC:
3820 #ifdef _LP64
3821 	case MALLOC_SECOND_MAGIC:
3822 #endif
3823 		mallocsz = md.malloc_size - overhead;
3824 
3825 		ump->um_malloc++;
3826 		ump->um_malloc_size += mallocsz;
3827 		ump->um_malloc_overhead += overhead;
3828 
3829 		/* include round-off and debug overhead */
3830 		ump->um_malloc_overhead +=
3831 		    ump->um_cp->cache_chunksize - md.malloc_size;
3832 
3833 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
3834 			ump->um_bucket[mallocsz]++;
3835 
3836 		break;
3837 	default:
3838 		break;
3839 	}
3840 
3841 	return (WALK_NEXT);
3842 }
3843 
3844 int
3845 get_umem_alloc_sizes(int **out, size_t *out_num)
3846 {
3847 	GElf_Sym sym;
3848 
3849 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
3850 		mdb_warn("unable to look up umem_alloc_sizes");
3851 		return (-1);
3852 	}
3853 
3854 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
3855 	*out_num = sym.st_size / sizeof (int);
3856 
3857 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
3858 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
3859 		*out = NULL;
3860 		return (-1);
3861 	}
3862 
3863 	return (0);
3864 }
3865 
3866 
3867 static int
3868 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
3869 {
3870 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
3871 		return (WALK_NEXT);
3872 
3873 	ump->um_cp = cp;
3874 
3875 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
3876 	    -1) {
3877 		mdb_warn("can't walk 'umem' for cache %p", addr);
3878 		return (WALK_ERR);
3879 	}
3880 
3881 	return (WALK_NEXT);
3882 }
3883 
3884 void
3885 umem_malloc_dist_help(void)
3886 {
3887 	mdb_printf("%s\n",
3888 	    "report distribution of outstanding malloc()s");
3889 	mdb_dec_indent(2);
3890 	mdb_printf("%<b>OPTIONS%</b>\n");
3891 	mdb_inc_indent(2);
3892 	mdb_printf("%s",
3893 "  -b maxbins\n"
3894 "        Use at most maxbins bins for the data\n"
3895 "  -B minbinsize\n"
3896 "        Make the bins at least minbinsize bytes apart\n"
3897 "  -d    dump the raw data out, without binning\n"
3898 "  -g    use geometric binning instead of linear binning\n");
3899 }
3900 
3901 /*ARGSUSED*/
3902 int
3903 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3904 {
3905 	umem_malloc_info_t mi;
3906 	uint_t geometric = 0;
3907 	uint_t dump = 0;
3908 	size_t maxbuckets = 0;
3909 	size_t minbucketsize = 0;
3910 
3911 	size_t minalloc = 0;
3912 	size_t maxalloc = UMI_MAX_BUCKET;
3913 
3914 	if (flags & DCMD_ADDRSPEC)
3915 		return (DCMD_USAGE);
3916 
3917 	if (mdb_getopts(argc, argv,
3918 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3919 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3920 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3921 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3922 	    0) != argc)
3923 		return (DCMD_USAGE);
3924 
3925 	bzero(&mi, sizeof (mi));
3926 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
3927 	    UM_SLEEP | UM_GC);
3928 
3929 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
3930 	    &mi) == -1) {
3931 		mdb_warn("unable to walk 'umem_cache'");
3932 		return (DCMD_ERR);
3933 	}
3934 
3935 	if (dump) {
3936 		int i;
3937 		for (i = minalloc; i <= maxalloc; i++)
3938 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
3939 
3940 		return (DCMD_OK);
3941 	}
3942 
3943 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
3944 	    maxbuckets, minbucketsize, geometric);
3945 
3946 	return (DCMD_OK);
3947 }
3948 
3949 void
3950 umem_malloc_info_help(void)
3951 {
3952 	mdb_printf("%s\n",
3953 	    "report information about malloc()s by cache.  ");
3954 	mdb_dec_indent(2);
3955 	mdb_printf("%<b>OPTIONS%</b>\n");
3956 	mdb_inc_indent(2);
3957 	mdb_printf("%s",
3958 "  -b maxbins\n"
3959 "        Use at most maxbins bins for the data\n"
3960 "  -B minbinsize\n"
3961 "        Make the bins at least minbinsize bytes apart\n"
3962 "  -d    dump the raw distribution data without binning\n"
3963 #ifndef _KMDB
3964 "  -g    use geometric binning instead of linear binning\n"
3965 #endif
3966 	    "");
3967 }
3968 int
3969 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3970 {
3971 	umem_cache_t c;
3972 	umem_malloc_info_t mi;
3973 
3974 	int skip = 0;
3975 
3976 	size_t maxmalloc;
3977 	size_t overhead;
3978 	size_t allocated;
3979 	size_t avg_malloc;
3980 	size_t overhead_pct;	/* 1000 * overhead_percent */
3981 
3982 	uint_t verbose = 0;
3983 	uint_t dump = 0;
3984 	uint_t geometric = 0;
3985 	size_t maxbuckets = 0;
3986 	size_t minbucketsize = 0;
3987 
3988 	int *alloc_sizes;
3989 	int idx;
3990 	size_t num;
3991 	size_t minmalloc;
3992 
3993 	if (mdb_getopts(argc, argv,
3994 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3995 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3996 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3997 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3998 	    0) != argc)
3999 		return (DCMD_USAGE);
4000 
4001 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4002 		verbose = 1;
4003 
4004 	if (!(flags & DCMD_ADDRSPEC)) {
4005 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4006 		    argc, argv) == -1) {
4007 			mdb_warn("can't walk umem_cache");
4008 			return (DCMD_ERR);
4009 		}
4010 		return (DCMD_OK);
4011 	}
4012 
4013 	if (!mdb_vread(&c, sizeof (c), addr)) {
4014 		mdb_warn("unable to read cache at %p", addr);
4015 		return (DCMD_ERR);
4016 	}
4017 
4018 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4019 		if (!(flags & DCMD_LOOP))
4020 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4021 			    "by malloc()\n", c.cache_name);
4022 		skip = 1;
4023 	}
4024 
4025 	/*
4026 	 * normally, print the header only the first time.  In verbose mode,
4027 	 * print the header on every non-skipped buffer
4028 	 */
4029 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4030 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4031 		    "CACHE", "BUFSZ", "MAXMAL",
4032 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4033 
4034 	if (skip)
4035 		return (DCMD_OK);
4036 
4037 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4038 #ifdef _LP64
4039 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4040 		maxmalloc -= sizeof (struct malloc_data);
4041 #endif
4042 
4043 	bzero(&mi, sizeof (mi));
4044 	mi.um_cp = &c;
4045 	if (verbose)
4046 		mi.um_bucket =
4047 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4048 		    UM_SLEEP | UM_GC);
4049 
4050 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4051 	    -1) {
4052 		mdb_warn("can't walk 'umem'");
4053 		return (DCMD_ERR);
4054 	}
4055 
4056 	overhead = mi.um_malloc_overhead;
4057 	allocated = mi.um_malloc_size;
4058 
4059 	/* do integer round off for the average */
4060 	if (mi.um_malloc != 0)
4061 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4062 	else
4063 		avg_malloc = 0;
4064 
4065 	/*
4066 	 * include per-slab overhead
4067 	 *
4068 	 * Each slab in a given cache is the same size, and has the same
4069 	 * number of chunks in it;  we read in the first slab on the
4070 	 * slab list to get the number of chunks for all slabs.  To
4071 	 * compute the per-slab overhead, we just subtract the chunk usage
4072 	 * from the slabsize:
4073 	 *
4074 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4075 	 * |////////////|	|	| ...	|	|///////|///////|
4076 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4077 	 * |////////////|	|	| ...	|	|///////|///////|
4078 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4079 	 * |		\_______chunksize * chunks_____/		|
4080 	 * \__________________________slabsize__________________________/
4081 	 *
4082 	 * For UMF_HASH caches, there is an additional source of overhead;
4083 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4084 	 * include those in our per-slab overhead.
4085 	 *
4086 	 * Once we have a number for the per-slab overhead, we estimate
4087 	 * the actual overhead by treating the malloc()ed buffers as if
4088 	 * they were densely packed:
4089 	 *
4090 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4091 	 *
4092 	 * carefully ordering the multiply before the divide, to avoid
4093 	 * round-off error.
4094 	 */
4095 	if (mi.um_malloc != 0) {
4096 		umem_slab_t slab;
4097 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4098 
4099 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4100 			mdb_warn("unable to read slab at %p\n", saddr);
4101 		} else {
4102 			long chunks = slab.slab_chunks;
4103 			if (chunks != 0 && c.cache_chunksize != 0 &&
4104 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4105 				uintmax_t perslab =
4106 				    c.cache_slabsize -
4107 				    (c.cache_chunksize * chunks);
4108 
4109 				if (c.cache_flags & UMF_HASH) {
4110 					perslab += sizeof (umem_slab_t) +
4111 					    chunks *
4112 					    ((c.cache_flags & UMF_AUDIT) ?
4113 					    sizeof (umem_bufctl_audit_t) :
4114 					    sizeof (umem_bufctl_t));
4115 				}
4116 				overhead +=
4117 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4118 			} else {
4119 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4120 				    chunks, saddr);
4121 			}
4122 		}
4123 	}
4124 
4125 	if (allocated != 0)
4126 		overhead_pct = (1000ULL * overhead) / allocated;
4127 	else
4128 		overhead_pct = 0;
4129 
4130 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4131 	    addr, c.cache_bufsize, maxmalloc,
4132 	    mi.um_malloc, avg_malloc, allocated, overhead,
4133 	    overhead_pct / 10, overhead_pct % 10);
4134 
4135 	if (!verbose)
4136 		return (DCMD_OK);
4137 
4138 	if (!dump)
4139 		mdb_printf("\n");
4140 
4141 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4142 		return (DCMD_ERR);
4143 
4144 	for (idx = 0; idx < num; idx++) {
4145 		if (alloc_sizes[idx] == c.cache_bufsize)
4146 			break;
4147 		if (alloc_sizes[idx] == 0) {
4148 			idx = num;	/* 0-terminated array */
4149 			break;
4150 		}
4151 	}
4152 	if (idx == num) {
4153 		mdb_warn(
4154 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4155 		    addr, c.cache_bufsize);
4156 		return (DCMD_ERR);
4157 	}
4158 
4159 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4160 	if (minmalloc > 0) {
4161 #ifdef _LP64
4162 		if (minmalloc > UMEM_SECOND_ALIGN)
4163 			minmalloc -= sizeof (struct malloc_data);
4164 #endif
4165 		minmalloc -= sizeof (struct malloc_data);
4166 		minmalloc += 1;
4167 	}
4168 
4169 	if (dump) {
4170 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4171 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4172 		mdb_printf("\n");
4173 	} else {
4174 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4175 		    maxbuckets, minbucketsize, geometric);
4176 	}
4177 
4178 	return (DCMD_OK);
4179 }
4180