xref: /titanic_44/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision 0bc46f0d82f5e2ab983b9daff3aa7c9abb447ff2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include "umem.h"
27 
28 #include <sys/vmem_impl_user.h>
29 #include <umem_impl.h>
30 
31 #include <alloca.h>
32 #include <limits.h>
33 
34 #include "misc.h"
35 #include "leaky.h"
36 #include "dist.h"
37 
38 #include "umem_pagesize.h"
39 
40 #define	UM_ALLOCATED		0x1
41 #define	UM_FREE			0x2
42 #define	UM_BUFCTL		0x4
43 #define	UM_HASH			0x8
44 
45 int umem_ready;
46 
47 static int umem_stack_depth_warned;
48 static uint32_t umem_max_ncpus;
49 uint32_t umem_stack_depth;
50 
51 size_t umem_pagesize;
52 
53 #define	UMEM_READVAR(var)				\
54 	(umem_readvar(&(var), #var) == -1 &&		\
55 	    (mdb_warn("failed to read "#var), 1))
56 
57 int
58 umem_update_variables(void)
59 {
60 	size_t pagesize;
61 
62 	/*
63 	 * Figure out which type of umem is being used; if it's not there
64 	 * yet, succeed quietly.
65 	 */
66 	if (umem_set_standalone() == -1) {
67 		umem_ready = 0;
68 		return (0);		/* umem not there yet */
69 	}
70 
71 	/*
72 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
73 	 * cheap backwards compatibility to check for both names.
74 	 */
75 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
76 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
77 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
78 		return (-1);
79 	}
80 	if (UMEM_READVAR(umem_ready))
81 		return (-1);
82 	if (UMEM_READVAR(umem_stack_depth))
83 		return (-1);
84 	if (UMEM_READVAR(pagesize))
85 		return (-1);
86 
87 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
88 		if (umem_stack_depth_warned == 0) {
89 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
90 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
91 			umem_stack_depth_warned = 1;
92 		}
93 		umem_stack_depth = 0;
94 	}
95 
96 	umem_pagesize = pagesize;
97 
98 	return (0);
99 }
100 
101 /*ARGSUSED*/
102 static int
103 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
104 {
105 	mdb_walker_t w;
106 	char descr[64];
107 
108 	(void) mdb_snprintf(descr, sizeof (descr),
109 	    "walk the %s cache", c->cache_name);
110 
111 	w.walk_name = c->cache_name;
112 	w.walk_descr = descr;
113 	w.walk_init = umem_walk_init;
114 	w.walk_step = umem_walk_step;
115 	w.walk_fini = umem_walk_fini;
116 	w.walk_init_arg = (void *)addr;
117 
118 	if (mdb_add_walker(&w) == -1)
119 		mdb_warn("failed to add %s walker", c->cache_name);
120 
121 	return (WALK_NEXT);
122 }
123 
124 /*ARGSUSED*/
125 static void
126 umem_statechange_cb(void *arg)
127 {
128 	static int been_ready = 0;
129 
130 #ifndef _KMDB
131 	leaky_cleanup(1);	/* state changes invalidate leaky state */
132 #endif
133 
134 	if (umem_update_variables() == -1)
135 		return;
136 
137 	if (been_ready)
138 		return;
139 
140 	if (umem_ready != UMEM_READY)
141 		return;
142 
143 	been_ready = 1;
144 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL);
145 }
146 
147 int
148 umem_init(void)
149 {
150 	mdb_walker_t w = {
151 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
152 		umem_cache_walk_step, umem_cache_walk_fini
153 	};
154 
155 	if (mdb_add_walker(&w) == -1) {
156 		mdb_warn("failed to add umem_cache walker");
157 		return (-1);
158 	}
159 
160 	if (umem_update_variables() == -1)
161 		return (-1);
162 
163 	/* install a callback so that our variables are always up-to-date */
164 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
165 	umem_statechange_cb(NULL);
166 
167 	return (0);
168 }
169 
170 int
171 umem_abort_messages(void)
172 {
173 	char *umem_error_buffer;
174 	uint_t umem_error_begin;
175 	GElf_Sym sym;
176 	size_t bufsize;
177 
178 	if (UMEM_READVAR(umem_error_begin))
179 		return (DCMD_ERR);
180 
181 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
182 		mdb_warn("unable to look up umem_error_buffer");
183 		return (DCMD_ERR);
184 	}
185 
186 	bufsize = (size_t)sym.st_size;
187 
188 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
189 
190 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
191 	    != bufsize) {
192 		mdb_warn("unable to read umem_error_buffer");
193 		return (DCMD_ERR);
194 	}
195 	/* put a zero after the end of the buffer to simplify printing */
196 	umem_error_buffer[bufsize] = 0;
197 
198 	if ((umem_error_begin % bufsize) == 0)
199 		mdb_printf("%s\n", umem_error_buffer);
200 	else {
201 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
202 		mdb_printf("%s%s\n",
203 		    &umem_error_buffer[umem_error_begin % bufsize],
204 		    umem_error_buffer);
205 	}
206 
207 	return (DCMD_OK);
208 }
209 
210 static void
211 umem_log_status(const char *name, umem_log_header_t *val)
212 {
213 	umem_log_header_t my_lh;
214 	uintptr_t pos = (uintptr_t)val;
215 	size_t size;
216 
217 	if (pos == NULL)
218 		return;
219 
220 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
221 		mdb_warn("\nunable to read umem_%s_log pointer %p",
222 		    name, pos);
223 		return;
224 	}
225 
226 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
227 
228 	if (size % (1024 * 1024) == 0)
229 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
230 	else if (size % 1024 == 0)
231 		mdb_printf("%s=%dk ", name, size / 1024);
232 	else
233 		mdb_printf("%s=%d ", name, size);
234 }
235 
236 typedef struct umem_debug_flags {
237 	const char	*udf_name;
238 	uint_t		udf_flags;
239 	uint_t		udf_clear;	/* if 0, uses udf_flags */
240 } umem_debug_flags_t;
241 
242 umem_debug_flags_t umem_status_flags[] = {
243 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
244 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
245 	{ "audit",	UMF_AUDIT },
246 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
247 	{ "nosignal",	UMF_CHECKSIGNAL },
248 	{ "firewall",	UMF_FIREWALL },
249 	{ "lite",	UMF_LITE },
250 	{ NULL }
251 };
252 
253 /*ARGSUSED*/
254 int
255 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
256 {
257 	int umem_logging;
258 
259 	umem_log_header_t *umem_transaction_log;
260 	umem_log_header_t *umem_content_log;
261 	umem_log_header_t *umem_failure_log;
262 	umem_log_header_t *umem_slab_log;
263 
264 	mdb_printf("Status:\t\t%s\n",
265 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
266 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
267 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
268 	    umem_ready == UMEM_READY ? "ready and active" :
269 	    umem_ready == 0 ? "not loaded into address space" :
270 	    "unknown (umem_ready invalid)");
271 
272 	if (umem_ready == 0)
273 		return (DCMD_OK);
274 
275 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
276 
277 	if (UMEM_READVAR(umem_logging))
278 		goto err;
279 	if (UMEM_READVAR(umem_transaction_log))
280 		goto err;
281 	if (UMEM_READVAR(umem_content_log))
282 		goto err;
283 	if (UMEM_READVAR(umem_failure_log))
284 		goto err;
285 	if (UMEM_READVAR(umem_slab_log))
286 		goto err;
287 
288 	mdb_printf("Logs:\t\t");
289 	umem_log_status("transaction", umem_transaction_log);
290 	umem_log_status("content", umem_content_log);
291 	umem_log_status("fail", umem_failure_log);
292 	umem_log_status("slab", umem_slab_log);
293 	if (!umem_logging)
294 		mdb_printf("(inactive)");
295 	mdb_printf("\n");
296 
297 	mdb_printf("Message buffer:\n");
298 	return (umem_abort_messages());
299 
300 err:
301 	mdb_printf("Message buffer:\n");
302 	(void) umem_abort_messages();
303 	return (DCMD_ERR);
304 }
305 
306 typedef struct {
307 	uintptr_t ucw_first;
308 	uintptr_t ucw_current;
309 } umem_cache_walk_t;
310 
311 int
312 umem_cache_walk_init(mdb_walk_state_t *wsp)
313 {
314 	umem_cache_walk_t *ucw;
315 	umem_cache_t c;
316 	uintptr_t cp;
317 	GElf_Sym sym;
318 
319 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
320 		mdb_warn("couldn't find umem_null_cache");
321 		return (WALK_ERR);
322 	}
323 
324 	cp = (uintptr_t)sym.st_value;
325 
326 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
327 		mdb_warn("couldn't read cache at %p", cp);
328 		return (WALK_ERR);
329 	}
330 
331 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
332 
333 	ucw->ucw_first = cp;
334 	ucw->ucw_current = (uintptr_t)c.cache_next;
335 	wsp->walk_data = ucw;
336 
337 	return (WALK_NEXT);
338 }
339 
340 int
341 umem_cache_walk_step(mdb_walk_state_t *wsp)
342 {
343 	umem_cache_walk_t *ucw = wsp->walk_data;
344 	umem_cache_t c;
345 	int status;
346 
347 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
348 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
349 		return (WALK_DONE);
350 	}
351 
352 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
353 
354 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
355 		return (WALK_DONE);
356 
357 	return (status);
358 }
359 
360 void
361 umem_cache_walk_fini(mdb_walk_state_t *wsp)
362 {
363 	umem_cache_walk_t *ucw = wsp->walk_data;
364 	mdb_free(ucw, sizeof (umem_cache_walk_t));
365 }
366 
367 typedef struct {
368 	umem_cpu_t *ucw_cpus;
369 	uint32_t ucw_current;
370 	uint32_t ucw_max;
371 } umem_cpu_walk_state_t;
372 
373 int
374 umem_cpu_walk_init(mdb_walk_state_t *wsp)
375 {
376 	umem_cpu_t *umem_cpus;
377 
378 	umem_cpu_walk_state_t *ucw;
379 
380 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
381 		mdb_warn("failed to read 'umem_cpus'");
382 		return (WALK_ERR);
383 	}
384 
385 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
386 
387 	ucw->ucw_cpus = umem_cpus;
388 	ucw->ucw_current = 0;
389 	ucw->ucw_max = umem_max_ncpus;
390 
391 	wsp->walk_data = ucw;
392 	return (WALK_NEXT);
393 }
394 
395 int
396 umem_cpu_walk_step(mdb_walk_state_t *wsp)
397 {
398 	umem_cpu_t cpu;
399 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
400 
401 	uintptr_t caddr;
402 
403 	if (ucw->ucw_current >= ucw->ucw_max)
404 		return (WALK_DONE);
405 
406 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
407 
408 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
409 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
410 		return (WALK_ERR);
411 	}
412 
413 	ucw->ucw_current++;
414 
415 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
416 }
417 
418 void
419 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
420 {
421 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
422 
423 	mdb_free(ucw, sizeof (*ucw));
424 }
425 
426 int
427 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
428 {
429 	if (wsp->walk_addr == NULL) {
430 		mdb_warn("umem_cpu_cache doesn't support global walks");
431 		return (WALK_ERR);
432 	}
433 
434 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
435 		mdb_warn("couldn't walk 'umem_cpu'");
436 		return (WALK_ERR);
437 	}
438 
439 	wsp->walk_data = (void *)wsp->walk_addr;
440 
441 	return (WALK_NEXT);
442 }
443 
444 int
445 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
446 {
447 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
448 	const umem_cpu_t *cpu = wsp->walk_layer;
449 	umem_cpu_cache_t cc;
450 
451 	caddr += cpu->cpu_cache_offset;
452 
453 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
454 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
455 		return (WALK_ERR);
456 	}
457 
458 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
459 }
460 
461 int
462 umem_slab_walk_init(mdb_walk_state_t *wsp)
463 {
464 	uintptr_t caddr = wsp->walk_addr;
465 	umem_cache_t c;
466 
467 	if (caddr == NULL) {
468 		mdb_warn("umem_slab doesn't support global walks\n");
469 		return (WALK_ERR);
470 	}
471 
472 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
473 		mdb_warn("couldn't read umem_cache at %p", caddr);
474 		return (WALK_ERR);
475 	}
476 
477 	wsp->walk_data =
478 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
479 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
480 
481 	return (WALK_NEXT);
482 }
483 
484 int
485 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
486 {
487 	uintptr_t caddr = wsp->walk_addr;
488 	umem_cache_t c;
489 
490 	if (caddr == NULL) {
491 		mdb_warn("umem_slab_partial doesn't support global walks\n");
492 		return (WALK_ERR);
493 	}
494 
495 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
496 		mdb_warn("couldn't read umem_cache at %p", caddr);
497 		return (WALK_ERR);
498 	}
499 
500 	wsp->walk_data =
501 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
502 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
503 
504 	/*
505 	 * Some consumers (umem_walk_step(), in particular) require at
506 	 * least one callback if there are any buffers in the cache.  So
507 	 * if there are *no* partial slabs, report the last full slab, if
508 	 * any.
509 	 *
510 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
511 	 */
512 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
513 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
514 
515 	return (WALK_NEXT);
516 }
517 
518 int
519 umem_slab_walk_step(mdb_walk_state_t *wsp)
520 {
521 	umem_slab_t s;
522 	uintptr_t addr = wsp->walk_addr;
523 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
524 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
525 
526 	if (addr == saddr)
527 		return (WALK_DONE);
528 
529 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
530 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
531 		return (WALK_ERR);
532 	}
533 
534 	if ((uintptr_t)s.slab_cache != caddr) {
535 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
536 		    addr, caddr, s.slab_cache);
537 		return (WALK_ERR);
538 	}
539 
540 	wsp->walk_addr = (uintptr_t)s.slab_next;
541 
542 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
543 }
544 
545 int
546 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
547 {
548 	umem_cache_t c;
549 
550 	if (!(flags & DCMD_ADDRSPEC)) {
551 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
552 			mdb_warn("can't walk umem_cache");
553 			return (DCMD_ERR);
554 		}
555 		return (DCMD_OK);
556 	}
557 
558 	if (DCMD_HDRSPEC(flags))
559 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
560 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
561 
562 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
563 		mdb_warn("couldn't read umem_cache at %p", addr);
564 		return (DCMD_ERR);
565 	}
566 
567 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
568 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
569 
570 	return (DCMD_OK);
571 }
572 
573 static int
574 addrcmp(const void *lhs, const void *rhs)
575 {
576 	uintptr_t p1 = *((uintptr_t *)lhs);
577 	uintptr_t p2 = *((uintptr_t *)rhs);
578 
579 	if (p1 < p2)
580 		return (-1);
581 	if (p1 > p2)
582 		return (1);
583 	return (0);
584 }
585 
586 static int
587 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
588 {
589 	const umem_bufctl_audit_t *bcp1 = *lhs;
590 	const umem_bufctl_audit_t *bcp2 = *rhs;
591 
592 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
593 		return (-1);
594 
595 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
596 		return (1);
597 
598 	return (0);
599 }
600 
601 typedef struct umem_hash_walk {
602 	uintptr_t *umhw_table;
603 	size_t umhw_nelems;
604 	size_t umhw_pos;
605 	umem_bufctl_t umhw_cur;
606 } umem_hash_walk_t;
607 
608 int
609 umem_hash_walk_init(mdb_walk_state_t *wsp)
610 {
611 	umem_hash_walk_t *umhw;
612 	uintptr_t *hash;
613 	umem_cache_t c;
614 	uintptr_t haddr, addr = wsp->walk_addr;
615 	size_t nelems;
616 	size_t hsize;
617 
618 	if (addr == NULL) {
619 		mdb_warn("umem_hash doesn't support global walks\n");
620 		return (WALK_ERR);
621 	}
622 
623 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
624 		mdb_warn("couldn't read cache at addr %p", addr);
625 		return (WALK_ERR);
626 	}
627 
628 	if (!(c.cache_flags & UMF_HASH)) {
629 		mdb_warn("cache %p doesn't have a hash table\n", addr);
630 		return (WALK_DONE);		/* nothing to do */
631 	}
632 
633 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
634 	umhw->umhw_cur.bc_next = NULL;
635 	umhw->umhw_pos = 0;
636 
637 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
638 	hsize = nelems * sizeof (uintptr_t);
639 	haddr = (uintptr_t)c.cache_hash_table;
640 
641 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
642 	if (mdb_vread(hash, hsize, haddr) == -1) {
643 		mdb_warn("failed to read hash table at %p", haddr);
644 		mdb_free(hash, hsize);
645 		mdb_free(umhw, sizeof (umem_hash_walk_t));
646 		return (WALK_ERR);
647 	}
648 
649 	wsp->walk_data = umhw;
650 
651 	return (WALK_NEXT);
652 }
653 
654 int
655 umem_hash_walk_step(mdb_walk_state_t *wsp)
656 {
657 	umem_hash_walk_t *umhw = wsp->walk_data;
658 	uintptr_t addr = NULL;
659 
660 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
661 		while (umhw->umhw_pos < umhw->umhw_nelems) {
662 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
663 				break;
664 		}
665 	}
666 	if (addr == NULL)
667 		return (WALK_DONE);
668 
669 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
670 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
671 		return (WALK_ERR);
672 	}
673 
674 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
675 }
676 
677 void
678 umem_hash_walk_fini(mdb_walk_state_t *wsp)
679 {
680 	umem_hash_walk_t *umhw = wsp->walk_data;
681 
682 	if (umhw == NULL)
683 		return;
684 
685 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
686 	mdb_free(umhw, sizeof (umem_hash_walk_t));
687 }
688 
689 /*
690  * Find the address of the bufctl structure for the address 'buf' in cache
691  * 'cp', which is at address caddr, and place it in *out.
692  */
693 static int
694 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
695 {
696 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
697 	umem_bufctl_t *bcp;
698 	umem_bufctl_t bc;
699 
700 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
701 		mdb_warn("unable to read hash bucket for %p in cache %p",
702 		    buf, caddr);
703 		return (-1);
704 	}
705 
706 	while (bcp != NULL) {
707 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
708 		    (uintptr_t)bcp) == -1) {
709 			mdb_warn("unable to read bufctl at %p", bcp);
710 			return (-1);
711 		}
712 		if (bc.bc_addr == buf) {
713 			*out = (uintptr_t)bcp;
714 			return (0);
715 		}
716 		bcp = bc.bc_next;
717 	}
718 
719 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
720 	return (-1);
721 }
722 
723 int
724 umem_get_magsize(const umem_cache_t *cp)
725 {
726 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
727 	GElf_Sym mt_sym;
728 	umem_magtype_t mt;
729 	int res;
730 
731 	/*
732 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
733 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
734 	 * it is okay to return 0 for them.
735 	 */
736 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
737 	    (cp->cache_flags & UMF_NOMAGAZINE))
738 		return (res);
739 
740 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
741 		mdb_warn("unable to read 'umem_magtype'");
742 	} else if (addr < mt_sym.st_value ||
743 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
744 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
745 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
746 		    cp->cache_name, addr);
747 		return (0);
748 	}
749 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
750 		mdb_warn("unable to read magtype at %a", addr);
751 		return (0);
752 	}
753 	return (mt.mt_magsize);
754 }
755 
756 /*ARGSUSED*/
757 static int
758 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
759 {
760 	*est -= (sp->slab_chunks - sp->slab_refcnt);
761 
762 	return (WALK_NEXT);
763 }
764 
765 /*
766  * Returns an upper bound on the number of allocated buffers in a given
767  * cache.
768  */
769 size_t
770 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
771 {
772 	int magsize;
773 	size_t cache_est;
774 
775 	cache_est = cp->cache_buftotal;
776 
777 	(void) mdb_pwalk("umem_slab_partial",
778 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
779 
780 	if ((magsize = umem_get_magsize(cp)) != 0) {
781 		size_t mag_est = cp->cache_full.ml_total * magsize;
782 
783 		if (cache_est >= mag_est) {
784 			cache_est -= mag_est;
785 		} else {
786 			mdb_warn("cache %p's magazine layer holds more buffers "
787 			    "than the slab layer.\n", addr);
788 		}
789 	}
790 	return (cache_est);
791 }
792 
793 #define	READMAG_ROUNDS(rounds) { \
794 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
795 		mdb_warn("couldn't read magazine at %p", ump); \
796 		goto fail; \
797 	} \
798 	for (i = 0; i < rounds; i++) { \
799 		maglist[magcnt++] = mp->mag_round[i]; \
800 		if (magcnt == magmax) { \
801 			mdb_warn("%d magazines exceeds fudge factor\n", \
802 			    magcnt); \
803 			goto fail; \
804 		} \
805 	} \
806 }
807 
808 int
809 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
810     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
811 {
812 	umem_magazine_t *ump, *mp;
813 	void **maglist = NULL;
814 	int i, cpu;
815 	size_t magsize, magmax, magbsize;
816 	size_t magcnt = 0;
817 
818 	/*
819 	 * Read the magtype out of the cache, after verifying the pointer's
820 	 * correctness.
821 	 */
822 	magsize = umem_get_magsize(cp);
823 	if (magsize == 0) {
824 		*maglistp = NULL;
825 		*magcntp = 0;
826 		*magmaxp = 0;
827 		return (WALK_NEXT);
828 	}
829 
830 	/*
831 	 * There are several places where we need to go buffer hunting:
832 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
833 	 * and the full magazine list in the depot.
834 	 *
835 	 * For an upper bound on the number of buffers in the magazine
836 	 * layer, we have the number of magazines on the cache_full
837 	 * list plus at most two magazines per CPU (the loaded and the
838 	 * spare).  Toss in 100 magazines as a fudge factor in case this
839 	 * is live (the number "100" comes from the same fudge factor in
840 	 * crash(1M)).
841 	 */
842 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
843 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
844 
845 	if (magbsize >= PAGESIZE / 2) {
846 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
847 		    addr, magbsize);
848 		return (WALK_ERR);
849 	}
850 
851 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
852 	mp = mdb_alloc(magbsize, alloc_flags);
853 	if (mp == NULL || maglist == NULL)
854 		goto fail;
855 
856 	/*
857 	 * First up: the magazines in the depot (i.e. on the cache_full list).
858 	 */
859 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
860 		READMAG_ROUNDS(magsize);
861 		ump = mp->mag_next;
862 
863 		if (ump == cp->cache_full.ml_list)
864 			break; /* cache_full list loop detected */
865 	}
866 
867 	dprintf(("cache_full list done\n"));
868 
869 	/*
870 	 * Now whip through the CPUs, snagging the loaded magazines
871 	 * and full spares.
872 	 */
873 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
874 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
875 
876 		dprintf(("reading cpu cache %p\n",
877 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
878 
879 		if (ccp->cc_rounds > 0 &&
880 		    (ump = ccp->cc_loaded) != NULL) {
881 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
882 			READMAG_ROUNDS(ccp->cc_rounds);
883 		}
884 
885 		if (ccp->cc_prounds > 0 &&
886 		    (ump = ccp->cc_ploaded) != NULL) {
887 			dprintf(("reading %d previously loaded rounds\n",
888 			    ccp->cc_prounds));
889 			READMAG_ROUNDS(ccp->cc_prounds);
890 		}
891 	}
892 
893 	dprintf(("magazine layer: %d buffers\n", magcnt));
894 
895 	if (!(alloc_flags & UM_GC))
896 		mdb_free(mp, magbsize);
897 
898 	*maglistp = maglist;
899 	*magcntp = magcnt;
900 	*magmaxp = magmax;
901 
902 	return (WALK_NEXT);
903 
904 fail:
905 	if (!(alloc_flags & UM_GC)) {
906 		if (mp)
907 			mdb_free(mp, magbsize);
908 		if (maglist)
909 			mdb_free(maglist, magmax * sizeof (void *));
910 	}
911 	return (WALK_ERR);
912 }
913 
914 static int
915 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
916 {
917 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
918 }
919 
920 static int
921 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
922 {
923 	umem_bufctl_audit_t *b;
924 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
925 
926 	/*
927 	 * if UMF_AUDIT is not set, we know that we're looking at a
928 	 * umem_bufctl_t.
929 	 */
930 	if (!(cp->cache_flags & UMF_AUDIT) ||
931 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
932 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
933 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
934 			mdb_warn("unable to read bufctl at %p", buf);
935 			return (WALK_ERR);
936 		}
937 	}
938 
939 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
940 }
941 
942 typedef struct umem_walk {
943 	int umw_type;
944 
945 	int umw_addr;			/* cache address */
946 	umem_cache_t *umw_cp;
947 	size_t umw_csize;
948 
949 	/*
950 	 * magazine layer
951 	 */
952 	void **umw_maglist;
953 	size_t umw_max;
954 	size_t umw_count;
955 	size_t umw_pos;
956 
957 	/*
958 	 * slab layer
959 	 */
960 	char *umw_valid;	/* to keep track of freed buffers */
961 	char *umw_ubase;	/* buffer for slab data */
962 } umem_walk_t;
963 
964 static int
965 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
966 {
967 	umem_walk_t *umw;
968 	int csize;
969 	umem_cache_t *cp;
970 	size_t vm_quantum;
971 
972 	size_t magmax, magcnt;
973 	void **maglist = NULL;
974 	uint_t chunksize, slabsize;
975 	int status = WALK_ERR;
976 	uintptr_t addr = wsp->walk_addr;
977 	const char *layered;
978 
979 	type &= ~UM_HASH;
980 
981 	if (addr == NULL) {
982 		mdb_warn("umem walk doesn't support global walks\n");
983 		return (WALK_ERR);
984 	}
985 
986 	dprintf(("walking %p\n", addr));
987 
988 	/*
989 	 * The number of "cpus" determines how large the cache is.
990 	 */
991 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
992 	cp = mdb_alloc(csize, UM_SLEEP);
993 
994 	if (mdb_vread(cp, csize, addr) == -1) {
995 		mdb_warn("couldn't read cache at addr %p", addr);
996 		goto out2;
997 	}
998 
999 	/*
1000 	 * It's easy for someone to hand us an invalid cache address.
1001 	 * Unfortunately, it is hard for this walker to survive an
1002 	 * invalid cache cleanly.  So we make sure that:
1003 	 *
1004 	 *	1. the vmem arena for the cache is readable,
1005 	 *	2. the vmem arena's quantum is a power of 2,
1006 	 *	3. our slabsize is a multiple of the quantum, and
1007 	 *	4. our chunksize is >0 and less than our slabsize.
1008 	 */
1009 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1010 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1011 	    vm_quantum == 0 ||
1012 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1013 	    cp->cache_slabsize < vm_quantum ||
1014 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1015 	    cp->cache_chunksize == 0 ||
1016 	    cp->cache_chunksize > cp->cache_slabsize) {
1017 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1018 		goto out2;
1019 	}
1020 
1021 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1022 
1023 	if (cp->cache_buftotal == 0) {
1024 		mdb_free(cp, csize);
1025 		return (WALK_DONE);
1026 	}
1027 
1028 	/*
1029 	 * If they ask for bufctls, but it's a small-slab cache,
1030 	 * there is nothing to report.
1031 	 */
1032 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1033 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1034 		    cp->cache_flags));
1035 		mdb_free(cp, csize);
1036 		return (WALK_DONE);
1037 	}
1038 
1039 	/*
1040 	 * Read in the contents of the magazine layer
1041 	 */
1042 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax,
1043 	    UM_SLEEP) == WALK_ERR)
1044 		goto out2;
1045 
1046 	/*
1047 	 * We have all of the buffers from the magazines;  if we are walking
1048 	 * allocated buffers, sort them so we can bsearch them later.
1049 	 */
1050 	if (type & UM_ALLOCATED)
1051 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1052 
1053 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1054 
1055 	umw->umw_type = type;
1056 	umw->umw_addr = addr;
1057 	umw->umw_cp = cp;
1058 	umw->umw_csize = csize;
1059 	umw->umw_maglist = maglist;
1060 	umw->umw_max = magmax;
1061 	umw->umw_count = magcnt;
1062 	umw->umw_pos = 0;
1063 
1064 	/*
1065 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1066 	 * hash table instead of the slab layer.
1067 	 */
1068 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1069 		layered = "umem_hash";
1070 
1071 		umw->umw_type |= UM_HASH;
1072 	} else {
1073 		/*
1074 		 * If we are walking freed buffers, we only need the
1075 		 * magazine layer plus the partially allocated slabs.
1076 		 * To walk allocated buffers, we need all of the slabs.
1077 		 */
1078 		if (type & UM_ALLOCATED)
1079 			layered = "umem_slab";
1080 		else
1081 			layered = "umem_slab_partial";
1082 
1083 		/*
1084 		 * for small-slab caches, we read in the entire slab.  For
1085 		 * freed buffers, we can just walk the freelist.  For
1086 		 * allocated buffers, we use a 'valid' array to track
1087 		 * the freed buffers.
1088 		 */
1089 		if (!(cp->cache_flags & UMF_HASH)) {
1090 			chunksize = cp->cache_chunksize;
1091 			slabsize = cp->cache_slabsize;
1092 
1093 			umw->umw_ubase = mdb_alloc(slabsize +
1094 			    sizeof (umem_bufctl_t), UM_SLEEP);
1095 
1096 			if (type & UM_ALLOCATED)
1097 				umw->umw_valid =
1098 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1099 		}
1100 	}
1101 
1102 	status = WALK_NEXT;
1103 
1104 	if (mdb_layered_walk(layered, wsp) == -1) {
1105 		mdb_warn("unable to start layered '%s' walk", layered);
1106 		status = WALK_ERR;
1107 	}
1108 
1109 out1:
1110 	if (status == WALK_ERR) {
1111 		if (umw->umw_valid)
1112 			mdb_free(umw->umw_valid, slabsize / chunksize);
1113 
1114 		if (umw->umw_ubase)
1115 			mdb_free(umw->umw_ubase, slabsize +
1116 			    sizeof (umem_bufctl_t));
1117 
1118 		if (umw->umw_maglist)
1119 			mdb_free(umw->umw_maglist, umw->umw_max *
1120 			    sizeof (uintptr_t));
1121 
1122 		mdb_free(umw, sizeof (umem_walk_t));
1123 		wsp->walk_data = NULL;
1124 	}
1125 
1126 out2:
1127 	if (status == WALK_ERR)
1128 		mdb_free(cp, csize);
1129 
1130 	return (status);
1131 }
1132 
1133 int
1134 umem_walk_step(mdb_walk_state_t *wsp)
1135 {
1136 	umem_walk_t *umw = wsp->walk_data;
1137 	int type = umw->umw_type;
1138 	umem_cache_t *cp = umw->umw_cp;
1139 
1140 	void **maglist = umw->umw_maglist;
1141 	int magcnt = umw->umw_count;
1142 
1143 	uintptr_t chunksize, slabsize;
1144 	uintptr_t addr;
1145 	const umem_slab_t *sp;
1146 	const umem_bufctl_t *bcp;
1147 	umem_bufctl_t bc;
1148 
1149 	int chunks;
1150 	char *kbase;
1151 	void *buf;
1152 	int i, ret;
1153 
1154 	char *valid, *ubase;
1155 
1156 	/*
1157 	 * first, handle the 'umem_hash' layered walk case
1158 	 */
1159 	if (type & UM_HASH) {
1160 		/*
1161 		 * We have a buffer which has been allocated out of the
1162 		 * global layer. We need to make sure that it's not
1163 		 * actually sitting in a magazine before we report it as
1164 		 * an allocated buffer.
1165 		 */
1166 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1167 
1168 		if (magcnt > 0 &&
1169 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1170 		    addrcmp) != NULL)
1171 			return (WALK_NEXT);
1172 
1173 		if (type & UM_BUFCTL)
1174 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1175 
1176 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1177 	}
1178 
1179 	ret = WALK_NEXT;
1180 
1181 	addr = umw->umw_addr;
1182 
1183 	/*
1184 	 * If we're walking freed buffers, report everything in the
1185 	 * magazine layer before processing the first slab.
1186 	 */
1187 	if ((type & UM_FREE) && magcnt != 0) {
1188 		umw->umw_count = 0;		/* only do this once */
1189 		for (i = 0; i < magcnt; i++) {
1190 			buf = maglist[i];
1191 
1192 			if (type & UM_BUFCTL) {
1193 				uintptr_t out;
1194 
1195 				if (cp->cache_flags & UMF_BUFTAG) {
1196 					umem_buftag_t *btp;
1197 					umem_buftag_t tag;
1198 
1199 					/* LINTED - alignment */
1200 					btp = UMEM_BUFTAG(cp, buf);
1201 					if (mdb_vread(&tag, sizeof (tag),
1202 					    (uintptr_t)btp) == -1) {
1203 						mdb_warn("reading buftag for "
1204 						    "%p at %p", buf, btp);
1205 						continue;
1206 					}
1207 					out = (uintptr_t)tag.bt_bufctl;
1208 				} else {
1209 					if (umem_hash_lookup(cp, addr, buf,
1210 					    &out) == -1)
1211 						continue;
1212 				}
1213 				ret = bufctl_walk_callback(cp, wsp, out);
1214 			} else {
1215 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1216 			}
1217 
1218 			if (ret != WALK_NEXT)
1219 				return (ret);
1220 		}
1221 	}
1222 
1223 	/*
1224 	 * Handle the buffers in the current slab
1225 	 */
1226 	chunksize = cp->cache_chunksize;
1227 	slabsize = cp->cache_slabsize;
1228 
1229 	sp = wsp->walk_layer;
1230 	chunks = sp->slab_chunks;
1231 	kbase = sp->slab_base;
1232 
1233 	dprintf(("kbase is %p\n", kbase));
1234 
1235 	if (!(cp->cache_flags & UMF_HASH)) {
1236 		valid = umw->umw_valid;
1237 		ubase = umw->umw_ubase;
1238 
1239 		if (mdb_vread(ubase, chunks * chunksize,
1240 		    (uintptr_t)kbase) == -1) {
1241 			mdb_warn("failed to read slab contents at %p", kbase);
1242 			return (WALK_ERR);
1243 		}
1244 
1245 		/*
1246 		 * Set up the valid map as fully allocated -- we'll punch
1247 		 * out the freelist.
1248 		 */
1249 		if (type & UM_ALLOCATED)
1250 			(void) memset(valid, 1, chunks);
1251 	} else {
1252 		valid = NULL;
1253 		ubase = NULL;
1254 	}
1255 
1256 	/*
1257 	 * walk the slab's freelist
1258 	 */
1259 	bcp = sp->slab_head;
1260 
1261 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1262 
1263 	/*
1264 	 * since we could be in the middle of allocating a buffer,
1265 	 * our refcnt could be one higher than it aught.  So we
1266 	 * check one further on the freelist than the count allows.
1267 	 */
1268 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1269 		uint_t ndx;
1270 
1271 		dprintf(("bcp is %p\n", bcp));
1272 
1273 		if (bcp == NULL) {
1274 			if (i == chunks)
1275 				break;
1276 			mdb_warn(
1277 			    "slab %p in cache %p freelist too short by %d\n",
1278 			    sp, addr, chunks - i);
1279 			break;
1280 		}
1281 
1282 		if (cp->cache_flags & UMF_HASH) {
1283 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1284 				mdb_warn("failed to read bufctl ptr at %p",
1285 				    bcp);
1286 				break;
1287 			}
1288 			buf = bc.bc_addr;
1289 		} else {
1290 			/*
1291 			 * Otherwise the buffer is in the slab which
1292 			 * we've read in;  we just need to determine
1293 			 * its offset in the slab to find the
1294 			 * umem_bufctl_t.
1295 			 */
1296 			bc = *((umem_bufctl_t *)
1297 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1298 			    (uintptr_t)ubase));
1299 
1300 			buf = UMEM_BUF(cp, bcp);
1301 		}
1302 
1303 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1304 
1305 		if (ndx > slabsize / cp->cache_bufsize) {
1306 			/*
1307 			 * This is very wrong; we have managed to find
1308 			 * a buffer in the slab which shouldn't
1309 			 * actually be here.  Emit a warning, and
1310 			 * try to continue.
1311 			 */
1312 			mdb_warn("buf %p is out of range for "
1313 			    "slab %p, cache %p\n", buf, sp, addr);
1314 		} else if (type & UM_ALLOCATED) {
1315 			/*
1316 			 * we have found a buffer on the slab's freelist;
1317 			 * clear its entry
1318 			 */
1319 			valid[ndx] = 0;
1320 		} else {
1321 			/*
1322 			 * Report this freed buffer
1323 			 */
1324 			if (type & UM_BUFCTL) {
1325 				ret = bufctl_walk_callback(cp, wsp,
1326 				    (uintptr_t)bcp);
1327 			} else {
1328 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1329 			}
1330 			if (ret != WALK_NEXT)
1331 				return (ret);
1332 		}
1333 
1334 		bcp = bc.bc_next;
1335 	}
1336 
1337 	if (bcp != NULL) {
1338 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1339 		    sp, addr, bcp));
1340 	}
1341 
1342 	/*
1343 	 * If we are walking freed buffers, the loop above handled reporting
1344 	 * them.
1345 	 */
1346 	if (type & UM_FREE)
1347 		return (WALK_NEXT);
1348 
1349 	if (type & UM_BUFCTL) {
1350 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1351 		    "cache %p\n", addr);
1352 		return (WALK_ERR);
1353 	}
1354 
1355 	/*
1356 	 * Report allocated buffers, skipping buffers in the magazine layer.
1357 	 * We only get this far for small-slab caches.
1358 	 */
1359 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1360 		buf = (char *)kbase + i * chunksize;
1361 
1362 		if (!valid[i])
1363 			continue;		/* on slab freelist */
1364 
1365 		if (magcnt > 0 &&
1366 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1367 		    addrcmp) != NULL)
1368 			continue;		/* in magazine layer */
1369 
1370 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1371 	}
1372 	return (ret);
1373 }
1374 
1375 void
1376 umem_walk_fini(mdb_walk_state_t *wsp)
1377 {
1378 	umem_walk_t *umw = wsp->walk_data;
1379 	uintptr_t chunksize;
1380 	uintptr_t slabsize;
1381 
1382 	if (umw == NULL)
1383 		return;
1384 
1385 	if (umw->umw_maglist != NULL)
1386 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1387 
1388 	chunksize = umw->umw_cp->cache_chunksize;
1389 	slabsize = umw->umw_cp->cache_slabsize;
1390 
1391 	if (umw->umw_valid != NULL)
1392 		mdb_free(umw->umw_valid, slabsize / chunksize);
1393 	if (umw->umw_ubase != NULL)
1394 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1395 
1396 	mdb_free(umw->umw_cp, umw->umw_csize);
1397 	mdb_free(umw, sizeof (umem_walk_t));
1398 }
1399 
1400 /*ARGSUSED*/
1401 static int
1402 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1403 {
1404 	/*
1405 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1406 	 * memory in other caches.  This can be a little confusing, so we
1407 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1408 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1409 	 */
1410 	if (c->cache_cflags & UMC_NOTOUCH)
1411 		return (WALK_NEXT);
1412 
1413 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1414 	    wsp->walk_cbdata, addr) == -1)
1415 		return (WALK_DONE);
1416 
1417 	return (WALK_NEXT);
1418 }
1419 
1420 #define	UMEM_WALK_ALL(name, wsp) { \
1421 	wsp->walk_data = (name); \
1422 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1423 		return (WALK_ERR); \
1424 	return (WALK_DONE); \
1425 }
1426 
1427 int
1428 umem_walk_init(mdb_walk_state_t *wsp)
1429 {
1430 	if (wsp->walk_arg != NULL)
1431 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1432 
1433 	if (wsp->walk_addr == NULL)
1434 		UMEM_WALK_ALL("umem", wsp);
1435 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1436 }
1437 
1438 int
1439 bufctl_walk_init(mdb_walk_state_t *wsp)
1440 {
1441 	if (wsp->walk_addr == NULL)
1442 		UMEM_WALK_ALL("bufctl", wsp);
1443 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1444 }
1445 
1446 int
1447 freemem_walk_init(mdb_walk_state_t *wsp)
1448 {
1449 	if (wsp->walk_addr == NULL)
1450 		UMEM_WALK_ALL("freemem", wsp);
1451 	return (umem_walk_init_common(wsp, UM_FREE));
1452 }
1453 
1454 int
1455 freectl_walk_init(mdb_walk_state_t *wsp)
1456 {
1457 	if (wsp->walk_addr == NULL)
1458 		UMEM_WALK_ALL("freectl", wsp);
1459 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1460 }
1461 
1462 typedef struct bufctl_history_walk {
1463 	void		*bhw_next;
1464 	umem_cache_t	*bhw_cache;
1465 	umem_slab_t	*bhw_slab;
1466 	hrtime_t	bhw_timestamp;
1467 } bufctl_history_walk_t;
1468 
1469 int
1470 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1471 {
1472 	bufctl_history_walk_t *bhw;
1473 	umem_bufctl_audit_t bc;
1474 	umem_bufctl_audit_t bcn;
1475 
1476 	if (wsp->walk_addr == NULL) {
1477 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1478 		return (WALK_ERR);
1479 	}
1480 
1481 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1482 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1483 		return (WALK_ERR);
1484 	}
1485 
1486 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1487 	bhw->bhw_timestamp = 0;
1488 	bhw->bhw_cache = bc.bc_cache;
1489 	bhw->bhw_slab = bc.bc_slab;
1490 
1491 	/*
1492 	 * sometimes the first log entry matches the base bufctl;  in that
1493 	 * case, skip the base bufctl.
1494 	 */
1495 	if (bc.bc_lastlog != NULL &&
1496 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1497 	    bc.bc_addr == bcn.bc_addr &&
1498 	    bc.bc_cache == bcn.bc_cache &&
1499 	    bc.bc_slab == bcn.bc_slab &&
1500 	    bc.bc_timestamp == bcn.bc_timestamp &&
1501 	    bc.bc_thread == bcn.bc_thread)
1502 		bhw->bhw_next = bc.bc_lastlog;
1503 	else
1504 		bhw->bhw_next = (void *)wsp->walk_addr;
1505 
1506 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1507 	wsp->walk_data = bhw;
1508 
1509 	return (WALK_NEXT);
1510 }
1511 
1512 int
1513 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1514 {
1515 	bufctl_history_walk_t *bhw = wsp->walk_data;
1516 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1517 	uintptr_t baseaddr = wsp->walk_addr;
1518 	umem_bufctl_audit_t *b;
1519 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1520 
1521 	if (addr == NULL)
1522 		return (WALK_DONE);
1523 
1524 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1525 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1526 		return (WALK_ERR);
1527 	}
1528 
1529 	/*
1530 	 * The bufctl is only valid if the address, cache, and slab are
1531 	 * correct.  We also check that the timestamp is decreasing, to
1532 	 * prevent infinite loops.
1533 	 */
1534 	if ((uintptr_t)b->bc_addr != baseaddr ||
1535 	    b->bc_cache != bhw->bhw_cache ||
1536 	    b->bc_slab != bhw->bhw_slab ||
1537 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1538 		return (WALK_DONE);
1539 
1540 	bhw->bhw_next = b->bc_lastlog;
1541 	bhw->bhw_timestamp = b->bc_timestamp;
1542 
1543 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1544 }
1545 
1546 void
1547 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1548 {
1549 	bufctl_history_walk_t *bhw = wsp->walk_data;
1550 
1551 	mdb_free(bhw, sizeof (*bhw));
1552 }
1553 
1554 typedef struct umem_log_walk {
1555 	umem_bufctl_audit_t *ulw_base;
1556 	umem_bufctl_audit_t **ulw_sorted;
1557 	umem_log_header_t ulw_lh;
1558 	size_t ulw_size;
1559 	size_t ulw_maxndx;
1560 	size_t ulw_ndx;
1561 } umem_log_walk_t;
1562 
1563 int
1564 umem_log_walk_init(mdb_walk_state_t *wsp)
1565 {
1566 	uintptr_t lp = wsp->walk_addr;
1567 	umem_log_walk_t *ulw;
1568 	umem_log_header_t *lhp;
1569 	int maxndx, i, j, k;
1570 
1571 	/*
1572 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1573 	 * read the log whose umem_log_header_t is stored at walk_addr.
1574 	 */
1575 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1576 		mdb_warn("failed to read 'umem_transaction_log'");
1577 		return (WALK_ERR);
1578 	}
1579 
1580 	if (lp == NULL) {
1581 		mdb_warn("log is disabled\n");
1582 		return (WALK_ERR);
1583 	}
1584 
1585 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1586 	lhp = &ulw->ulw_lh;
1587 
1588 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1589 		mdb_warn("failed to read log header at %p", lp);
1590 		mdb_free(ulw, sizeof (umem_log_walk_t));
1591 		return (WALK_ERR);
1592 	}
1593 
1594 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1595 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1596 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1597 
1598 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1599 	    (uintptr_t)lhp->lh_base) == -1) {
1600 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1601 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1602 		mdb_free(ulw, sizeof (umem_log_walk_t));
1603 		return (WALK_ERR);
1604 	}
1605 
1606 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1607 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1608 
1609 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1610 		caddr_t chunk = (caddr_t)
1611 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1612 
1613 		for (j = 0; j < maxndx; j++) {
1614 			/* LINTED align */
1615 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1616 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1617 		}
1618 	}
1619 
1620 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1621 	    (int(*)(const void *, const void *))bufctlcmp);
1622 
1623 	ulw->ulw_maxndx = k;
1624 	wsp->walk_data = ulw;
1625 
1626 	return (WALK_NEXT);
1627 }
1628 
1629 int
1630 umem_log_walk_step(mdb_walk_state_t *wsp)
1631 {
1632 	umem_log_walk_t *ulw = wsp->walk_data;
1633 	umem_bufctl_audit_t *bcp;
1634 
1635 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1636 		return (WALK_DONE);
1637 
1638 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1639 
1640 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1641 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1642 }
1643 
1644 void
1645 umem_log_walk_fini(mdb_walk_state_t *wsp)
1646 {
1647 	umem_log_walk_t *ulw = wsp->walk_data;
1648 
1649 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1650 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1651 	    sizeof (umem_bufctl_audit_t *));
1652 	mdb_free(ulw, sizeof (umem_log_walk_t));
1653 }
1654 
1655 typedef struct allocdby_bufctl {
1656 	uintptr_t abb_addr;
1657 	hrtime_t abb_ts;
1658 } allocdby_bufctl_t;
1659 
1660 typedef struct allocdby_walk {
1661 	const char *abw_walk;
1662 	uintptr_t abw_thread;
1663 	size_t abw_nbufs;
1664 	size_t abw_size;
1665 	allocdby_bufctl_t *abw_buf;
1666 	size_t abw_ndx;
1667 } allocdby_walk_t;
1668 
1669 int
1670 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1671     allocdby_walk_t *abw)
1672 {
1673 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1674 		return (WALK_NEXT);
1675 
1676 	if (abw->abw_nbufs == abw->abw_size) {
1677 		allocdby_bufctl_t *buf;
1678 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1679 
1680 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1681 
1682 		bcopy(abw->abw_buf, buf, oldsize);
1683 		mdb_free(abw->abw_buf, oldsize);
1684 
1685 		abw->abw_size <<= 1;
1686 		abw->abw_buf = buf;
1687 	}
1688 
1689 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1690 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1691 	abw->abw_nbufs++;
1692 
1693 	return (WALK_NEXT);
1694 }
1695 
1696 /*ARGSUSED*/
1697 int
1698 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1699 {
1700 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1701 	    abw, addr) == -1) {
1702 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1703 		return (WALK_DONE);
1704 	}
1705 
1706 	return (WALK_NEXT);
1707 }
1708 
1709 static int
1710 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1711 {
1712 	if (lhs->abb_ts < rhs->abb_ts)
1713 		return (1);
1714 	if (lhs->abb_ts > rhs->abb_ts)
1715 		return (-1);
1716 	return (0);
1717 }
1718 
1719 static int
1720 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1721 {
1722 	allocdby_walk_t *abw;
1723 
1724 	if (wsp->walk_addr == NULL) {
1725 		mdb_warn("allocdby walk doesn't support global walks\n");
1726 		return (WALK_ERR);
1727 	}
1728 
1729 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1730 
1731 	abw->abw_thread = wsp->walk_addr;
1732 	abw->abw_walk = walk;
1733 	abw->abw_size = 128;	/* something reasonable */
1734 	abw->abw_buf =
1735 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1736 
1737 	wsp->walk_data = abw;
1738 
1739 	if (mdb_walk("umem_cache",
1740 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1741 		mdb_warn("couldn't walk umem_cache");
1742 		allocdby_walk_fini(wsp);
1743 		return (WALK_ERR);
1744 	}
1745 
1746 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1747 	    (int(*)(const void *, const void *))allocdby_cmp);
1748 
1749 	return (WALK_NEXT);
1750 }
1751 
1752 int
1753 allocdby_walk_init(mdb_walk_state_t *wsp)
1754 {
1755 	return (allocdby_walk_init_common(wsp, "bufctl"));
1756 }
1757 
1758 int
1759 freedby_walk_init(mdb_walk_state_t *wsp)
1760 {
1761 	return (allocdby_walk_init_common(wsp, "freectl"));
1762 }
1763 
1764 int
1765 allocdby_walk_step(mdb_walk_state_t *wsp)
1766 {
1767 	allocdby_walk_t *abw = wsp->walk_data;
1768 	uintptr_t addr;
1769 	umem_bufctl_audit_t *bcp;
1770 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1771 
1772 	if (abw->abw_ndx == abw->abw_nbufs)
1773 		return (WALK_DONE);
1774 
1775 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1776 
1777 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1778 		mdb_warn("couldn't read bufctl at %p", addr);
1779 		return (WALK_DONE);
1780 	}
1781 
1782 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1783 }
1784 
1785 void
1786 allocdby_walk_fini(mdb_walk_state_t *wsp)
1787 {
1788 	allocdby_walk_t *abw = wsp->walk_data;
1789 
1790 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1791 	mdb_free(abw, sizeof (allocdby_walk_t));
1792 }
1793 
1794 /*ARGSUSED*/
1795 int
1796 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1797 {
1798 	char c[MDB_SYM_NAMLEN];
1799 	GElf_Sym sym;
1800 	int i;
1801 
1802 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1803 	for (i = 0; i < bcp->bc_depth; i++) {
1804 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1805 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1806 			continue;
1807 		if (is_umem_sym(c, "umem_"))
1808 			continue;
1809 		mdb_printf("%s+0x%lx",
1810 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1811 		break;
1812 	}
1813 	mdb_printf("\n");
1814 
1815 	return (WALK_NEXT);
1816 }
1817 
1818 static int
1819 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1820 {
1821 	if (!(flags & DCMD_ADDRSPEC))
1822 		return (DCMD_USAGE);
1823 
1824 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1825 
1826 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1827 		mdb_warn("can't walk '%s' for %p", w, addr);
1828 		return (DCMD_ERR);
1829 	}
1830 
1831 	return (DCMD_OK);
1832 }
1833 
1834 /*ARGSUSED*/
1835 int
1836 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1837 {
1838 	return (allocdby_common(addr, flags, "allocdby"));
1839 }
1840 
1841 /*ARGSUSED*/
1842 int
1843 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1844 {
1845 	return (allocdby_common(addr, flags, "freedby"));
1846 }
1847 
1848 typedef struct whatis {
1849 	uintptr_t w_addr;
1850 	const umem_cache_t *w_cache;
1851 	const vmem_t *w_vmem;
1852 	int w_found;
1853 	uint_t w_all;
1854 	uint_t w_bufctl;
1855 	uint_t w_freemem;
1856 	uint_t w_quiet;
1857 	uint_t w_verbose;
1858 } whatis_t;
1859 
1860 /* nicely report pointers as offsets from a base */
1861 static void
1862 whatis_report_pointer(uintptr_t addr, uintptr_t base, const char *description)
1863 {
1864 	if (addr == base)
1865 		mdb_printf("%p is %s",
1866 		    addr, description);
1867 	else
1868 		mdb_printf("%p is %p+%p, %s",
1869 		    addr, base, addr - base, description);
1870 }
1871 
1872 /* call one of our dcmd functions with "-v" and the provided address */
1873 static void
1874 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
1875 {
1876 	mdb_arg_t a;
1877 	a.a_type = MDB_TYPE_STRING;
1878 	a.a_un.a_str = "-v";
1879 
1880 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
1881 }
1882 
1883 static void
1884 whatis_print_umem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
1885 {
1886 	const umem_cache_t *cp = w->w_cache;
1887 	/* LINTED pointer cast may result in improper alignment */
1888 	uintptr_t btaddr = (uintptr_t)UMEM_BUFTAG(cp, addr);
1889 	intptr_t stat;
1890 	int call_printer;
1891 
1892 	if (cp->cache_flags & UMF_REDZONE) {
1893 		umem_buftag_t bt;
1894 
1895 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
1896 			goto done;
1897 
1898 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
1899 
1900 		if (stat != UMEM_BUFTAG_ALLOC && stat != UMEM_BUFTAG_FREE)
1901 			goto done;
1902 
1903 		/*
1904 		 * provide the bufctl ptr if it has useful information
1905 		 */
1906 		if (baddr == 0 && (cp->cache_flags & UMF_AUDIT))
1907 			baddr = (uintptr_t)bt.bt_bufctl;
1908 	}
1909 
1910 done:
1911 	call_printer =
1912 	    (!w->w_quiet && baddr != 0 && (cp->cache_flags & UMF_AUDIT));
1913 
1914 	whatis_report_pointer(w->w_addr, addr, "");
1915 
1916 	if (baddr != 0 && !call_printer)
1917 		mdb_printf("bufctl %p ", baddr);
1918 
1919 	mdb_printf("%s from %s%s\n",
1920 	    (w->w_freemem == FALSE) ? "allocated" : "freed", cp->cache_name,
1921 	    call_printer ? ":" : "");
1922 
1923 	if (call_printer)
1924 		whatis_call_printer(bufctl, baddr);
1925 }
1926 
1927 /*ARGSUSED*/
1928 static int
1929 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_t *w)
1930 {
1931 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1932 		return (WALK_NEXT);
1933 
1934 	whatis_print_umem(addr, 0, w);
1935 	w->w_found++;
1936 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1937 }
1938 
1939 static int
1940 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
1941 {
1942 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
1943 		return (WALK_NEXT);
1944 
1945 	whatis_report_pointer(w->w_addr, vs->vs_start, "");
1946 
1947 	/*
1948 	 * If we're not going to print it anyway, provide the vmem_seg pointer
1949 	 * if it has a stack trace.
1950 	 */
1951 	if (w->w_quiet && (w->w_bufctl ||
1952 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
1953 		mdb_printf("vmem_seg %p ", addr);
1954 	}
1955 
1956 	mdb_printf("%s from %s vmem arena%s\n",
1957 	    (w->w_freemem == FALSE) ? "allocated" : "freed",
1958 	    w->w_vmem->vm_name, !w->w_quiet ? ":" : "");
1959 
1960 	if (!w->w_quiet)
1961 		whatis_call_printer(vmem_seg, addr);
1962 
1963 	w->w_found++;
1964 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1965 }
1966 
1967 static int
1968 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
1969 {
1970 	const char *nm = vmem->vm_name;
1971 	w->w_vmem = vmem;
1972 	w->w_freemem = FALSE;
1973 
1974 	if (w->w_verbose)
1975 		mdb_printf("Searching vmem arena %s...\n", nm);
1976 
1977 	if (mdb_pwalk("vmem_alloc",
1978 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1979 		mdb_warn("can't walk vmem seg for %p", addr);
1980 		return (WALK_NEXT);
1981 	}
1982 
1983 	if (w->w_found && w->w_all == FALSE)
1984 		return (WALK_DONE);
1985 
1986 	if (w->w_verbose)
1987 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
1988 
1989 	w->w_freemem = TRUE;
1990 
1991 	if (mdb_pwalk("vmem_free",
1992 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1993 		mdb_warn("can't walk vmem seg for %p", addr);
1994 		return (WALK_NEXT);
1995 	}
1996 
1997 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1998 }
1999 
2000 /*ARGSUSED*/
2001 static int
2002 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_t *w)
2003 {
2004 	uintptr_t addr;
2005 
2006 	if (bcp == NULL)
2007 		return (WALK_NEXT);
2008 
2009 	addr = (uintptr_t)bcp->bc_addr;
2010 
2011 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
2012 		return (WALK_NEXT);
2013 
2014 	whatis_print_umem(addr, baddr, w);
2015 	w->w_found++;
2016 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
2017 }
2018 
2019 static int
2020 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2021 {
2022 	char *walk, *freewalk;
2023 	mdb_walk_cb_t func;
2024 
2025 	/* For caches with auditing info, we always walk the bufctls */
2026 	if (w->w_bufctl || (c->cache_flags & UMF_AUDIT)) {
2027 		walk = "bufctl";
2028 		freewalk = "freectl";
2029 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2030 	} else {
2031 		walk = "umem";
2032 		freewalk = "freemem";
2033 		func = (mdb_walk_cb_t)whatis_walk_umem;
2034 	}
2035 
2036 	if (w->w_verbose)
2037 		mdb_printf("Searching %s...\n", c->cache_name);
2038 
2039 	w->w_cache = c;
2040 	w->w_freemem = FALSE;
2041 
2042 	if (mdb_pwalk(walk, func, w, addr) == -1) {
2043 		mdb_warn("can't find %s walker", walk);
2044 		return (WALK_DONE);
2045 	}
2046 
2047 	if (w->w_found && w->w_all == FALSE)
2048 		return (WALK_DONE);
2049 
2050 	/*
2051 	 * We have searched for allocated memory; now search for freed memory.
2052 	 */
2053 	if (w->w_verbose)
2054 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2055 
2056 	w->w_freemem = TRUE;
2057 
2058 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
2059 		mdb_warn("can't find %s walker", freewalk);
2060 		return (WALK_DONE);
2061 	}
2062 
2063 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
2064 }
2065 
2066 static int
2067 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2068 {
2069 	if (c->cache_cflags & UMC_NOTOUCH)
2070 		return (WALK_NEXT);
2071 
2072 	return (whatis_walk_cache(addr, c, w));
2073 }
2074 
2075 static int
2076 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
2077 {
2078 	if (!(c->cache_cflags & UMC_NOTOUCH))
2079 		return (WALK_NEXT);
2080 
2081 	return (whatis_walk_cache(addr, c, w));
2082 }
2083 
2084 int
2085 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2086 {
2087 	whatis_t w;
2088 
2089 	if (!(flags & DCMD_ADDRSPEC))
2090 		return (DCMD_USAGE);
2091 
2092 	w.w_all = FALSE;
2093 	w.w_bufctl = FALSE;
2094 	w.w_quiet = FALSE;
2095 	w.w_verbose = FALSE;
2096 
2097 	if (mdb_getopts(argc, argv,
2098 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
2099 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl,
2100 	    'q', MDB_OPT_SETBITS, TRUE, &w.w_quiet,
2101 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
2102 	    NULL) != argc)
2103 		return (DCMD_USAGE);
2104 
2105 	w.w_addr = addr;
2106 	w.w_found = 0;
2107 
2108 	/*
2109 	 * Mappings and threads should eventually be added here.
2110 	 */
2111 	if (mdb_walk("umem_cache",
2112 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
2113 		mdb_warn("couldn't find umem_cache walker");
2114 		return (DCMD_ERR);
2115 	}
2116 
2117 	if (w.w_found && w.w_all == FALSE)
2118 		return (DCMD_OK);
2119 
2120 	if (mdb_walk("umem_cache",
2121 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
2122 		mdb_warn("couldn't find umem_cache walker");
2123 		return (DCMD_ERR);
2124 	}
2125 
2126 	if (w.w_found && w.w_all == FALSE)
2127 		return (DCMD_OK);
2128 
2129 	if (mdb_walk("vmem_postfix",
2130 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2131 		mdb_warn("couldn't find vmem_postfix walker");
2132 		return (DCMD_ERR);
2133 	}
2134 
2135 	if (w.w_found == 0)
2136 		mdb_printf("%p is unknown\n", addr);
2137 
2138 	return (DCMD_OK);
2139 }
2140 
2141 typedef struct umem_log_cpu {
2142 	uintptr_t umc_low;
2143 	uintptr_t umc_high;
2144 } umem_log_cpu_t;
2145 
2146 int
2147 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2148 {
2149 	int i;
2150 
2151 	for (i = 0; i < umem_max_ncpus; i++) {
2152 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2153 			break;
2154 	}
2155 
2156 	if (i == umem_max_ncpus)
2157 		mdb_printf("   ");
2158 	else
2159 		mdb_printf("%3d", i);
2160 
2161 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2162 	    b->bc_timestamp, b->bc_thread);
2163 
2164 	return (WALK_NEXT);
2165 }
2166 
2167 /*ARGSUSED*/
2168 int
2169 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2170 {
2171 	umem_log_header_t lh;
2172 	umem_cpu_log_header_t clh;
2173 	uintptr_t lhp, clhp;
2174 	umem_log_cpu_t *umc;
2175 	int i;
2176 
2177 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2178 		mdb_warn("failed to read 'umem_transaction_log'");
2179 		return (DCMD_ERR);
2180 	}
2181 
2182 	if (lhp == NULL) {
2183 		mdb_warn("no umem transaction log\n");
2184 		return (DCMD_ERR);
2185 	}
2186 
2187 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2188 		mdb_warn("failed to read log header at %p", lhp);
2189 		return (DCMD_ERR);
2190 	}
2191 
2192 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2193 
2194 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2195 	    UM_SLEEP | UM_GC);
2196 
2197 	for (i = 0; i < umem_max_ncpus; i++) {
2198 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2199 			mdb_warn("cannot read cpu %d's log header at %p",
2200 			    i, clhp);
2201 			return (DCMD_ERR);
2202 		}
2203 
2204 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2205 		    (uintptr_t)lh.lh_base;
2206 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2207 
2208 		clhp += sizeof (umem_cpu_log_header_t);
2209 	}
2210 
2211 	if (DCMD_HDRSPEC(flags)) {
2212 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2213 		    "BUFADDR", "TIMESTAMP", "THREAD");
2214 	}
2215 
2216 	/*
2217 	 * If we have been passed an address, we'll just print out that
2218 	 * log entry.
2219 	 */
2220 	if (flags & DCMD_ADDRSPEC) {
2221 		umem_bufctl_audit_t *bp;
2222 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2223 
2224 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2225 			mdb_warn("failed to read bufctl at %p", addr);
2226 			return (DCMD_ERR);
2227 		}
2228 
2229 		(void) umem_log_walk(addr, bp, umc);
2230 
2231 		return (DCMD_OK);
2232 	}
2233 
2234 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2235 		mdb_warn("can't find umem log walker");
2236 		return (DCMD_ERR);
2237 	}
2238 
2239 	return (DCMD_OK);
2240 }
2241 
2242 typedef struct bufctl_history_cb {
2243 	int		bhc_flags;
2244 	int		bhc_argc;
2245 	const mdb_arg_t	*bhc_argv;
2246 	int		bhc_ret;
2247 } bufctl_history_cb_t;
2248 
2249 /*ARGSUSED*/
2250 static int
2251 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2252 {
2253 	bufctl_history_cb_t *bhc = arg;
2254 
2255 	bhc->bhc_ret =
2256 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2257 
2258 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2259 
2260 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2261 }
2262 
2263 void
2264 bufctl_help(void)
2265 {
2266 	mdb_printf("%s\n",
2267 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2268 	mdb_dec_indent(2);
2269 	mdb_printf("%<b>OPTIONS%</b>\n");
2270 	mdb_inc_indent(2);
2271 	mdb_printf("%s",
2272 "  -v    Display the full content of the bufctl, including its stack trace\n"
2273 "  -h    retrieve the bufctl's transaction history, if available\n"
2274 "  -a addr\n"
2275 "        filter out bufctls not involving the buffer at addr\n"
2276 "  -c caller\n"
2277 "        filter out bufctls without the function/PC in their stack trace\n"
2278 "  -e earliest\n"
2279 "        filter out bufctls timestamped before earliest\n"
2280 "  -l latest\n"
2281 "        filter out bufctls timestamped after latest\n"
2282 "  -t thread\n"
2283 "        filter out bufctls not involving thread\n");
2284 }
2285 
2286 int
2287 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2288 {
2289 	uint_t verbose = FALSE;
2290 	uint_t history = FALSE;
2291 	uint_t in_history = FALSE;
2292 	uintptr_t caller = NULL, thread = NULL;
2293 	uintptr_t laddr, haddr, baddr = NULL;
2294 	hrtime_t earliest = 0, latest = 0;
2295 	int i, depth;
2296 	char c[MDB_SYM_NAMLEN];
2297 	GElf_Sym sym;
2298 	umem_bufctl_audit_t *bcp;
2299 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2300 
2301 	if (mdb_getopts(argc, argv,
2302 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2303 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2304 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2305 	    'c', MDB_OPT_UINTPTR, &caller,
2306 	    't', MDB_OPT_UINTPTR, &thread,
2307 	    'e', MDB_OPT_UINT64, &earliest,
2308 	    'l', MDB_OPT_UINT64, &latest,
2309 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2310 		return (DCMD_USAGE);
2311 
2312 	if (!(flags & DCMD_ADDRSPEC))
2313 		return (DCMD_USAGE);
2314 
2315 	if (in_history && !history)
2316 		return (DCMD_USAGE);
2317 
2318 	if (history && !in_history) {
2319 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2320 		    UM_SLEEP | UM_GC);
2321 		bufctl_history_cb_t bhc;
2322 
2323 		nargv[0].a_type = MDB_TYPE_STRING;
2324 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2325 
2326 		for (i = 0; i < argc; i++)
2327 			nargv[i + 1] = argv[i];
2328 
2329 		/*
2330 		 * When in history mode, we treat each element as if it
2331 		 * were in a seperate loop, so that the headers group
2332 		 * bufctls with similar histories.
2333 		 */
2334 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2335 		bhc.bhc_argc = argc + 1;
2336 		bhc.bhc_argv = nargv;
2337 		bhc.bhc_ret = DCMD_OK;
2338 
2339 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2340 		    addr) == -1) {
2341 			mdb_warn("unable to walk bufctl_history");
2342 			return (DCMD_ERR);
2343 		}
2344 
2345 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2346 			mdb_printf("\n");
2347 
2348 		return (bhc.bhc_ret);
2349 	}
2350 
2351 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2352 		if (verbose) {
2353 			mdb_printf("%16s %16s %16s %16s\n"
2354 			    "%<u>%16s %16s %16s %16s%</u>\n",
2355 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2356 			    "", "CACHE", "LASTLOG", "CONTENTS");
2357 		} else {
2358 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2359 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2360 		}
2361 	}
2362 
2363 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2364 		mdb_warn("couldn't read bufctl at %p", addr);
2365 		return (DCMD_ERR);
2366 	}
2367 
2368 	/*
2369 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2370 	 * the address does not really refer to a bufctl.
2371 	 */
2372 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2373 
2374 	if (caller != NULL) {
2375 		laddr = caller;
2376 		haddr = caller + sizeof (caller);
2377 
2378 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2379 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2380 			/*
2381 			 * We were provided an exact symbol value; any
2382 			 * address in the function is valid.
2383 			 */
2384 			laddr = (uintptr_t)sym.st_value;
2385 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2386 		}
2387 
2388 		for (i = 0; i < depth; i++)
2389 			if (bcp->bc_stack[i] >= laddr &&
2390 			    bcp->bc_stack[i] < haddr)
2391 				break;
2392 
2393 		if (i == depth)
2394 			return (DCMD_OK);
2395 	}
2396 
2397 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2398 		return (DCMD_OK);
2399 
2400 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2401 		return (DCMD_OK);
2402 
2403 	if (latest != 0 && bcp->bc_timestamp > latest)
2404 		return (DCMD_OK);
2405 
2406 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2407 		return (DCMD_OK);
2408 
2409 	if (flags & DCMD_PIPE_OUT) {
2410 		mdb_printf("%#r\n", addr);
2411 		return (DCMD_OK);
2412 	}
2413 
2414 	if (verbose) {
2415 		mdb_printf(
2416 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2417 		    "%16s %16p %16p %16p\n",
2418 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2419 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2420 
2421 		mdb_inc_indent(17);
2422 		for (i = 0; i < depth; i++)
2423 			mdb_printf("%a\n", bcp->bc_stack[i]);
2424 		mdb_dec_indent(17);
2425 		mdb_printf("\n");
2426 	} else {
2427 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2428 		    bcp->bc_timestamp, bcp->bc_thread);
2429 
2430 		for (i = 0; i < depth; i++) {
2431 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2432 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2433 				continue;
2434 			if (is_umem_sym(c, "umem_"))
2435 				continue;
2436 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2437 			break;
2438 		}
2439 
2440 		if (i >= depth)
2441 			mdb_printf("\n");
2442 	}
2443 
2444 	return (DCMD_OK);
2445 }
2446 
2447 /*ARGSUSED*/
2448 int
2449 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2450 {
2451 	mdb_arg_t a;
2452 
2453 	if (!(flags & DCMD_ADDRSPEC))
2454 		return (DCMD_USAGE);
2455 
2456 	if (argc != 0)
2457 		return (DCMD_USAGE);
2458 
2459 	a.a_type = MDB_TYPE_STRING;
2460 	a.a_un.a_str = "-v";
2461 
2462 	return (bufctl(addr, flags, 1, &a));
2463 }
2464 
2465 typedef struct umem_verify {
2466 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2467 	size_t umv_size;		/* number of bytes in umv_buf */
2468 	int umv_corruption;		/* > 0 if corruption found. */
2469 	int umv_besilent;		/* report actual corruption sites */
2470 	struct umem_cache umv_cache;	/* the cache we're operating on */
2471 } umem_verify_t;
2472 
2473 /*
2474  * verify_pattern()
2475  *	verify that buf is filled with the pattern pat.
2476  */
2477 static int64_t
2478 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2479 {
2480 	/*LINTED*/
2481 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2482 	uint64_t *buf;
2483 
2484 	for (buf = buf_arg; buf < bufend; buf++)
2485 		if (*buf != pat)
2486 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2487 	return (-1);
2488 }
2489 
2490 /*
2491  * verify_buftag()
2492  *	verify that btp->bt_bxstat == (bcp ^ pat)
2493  */
2494 static int
2495 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2496 {
2497 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2498 }
2499 
2500 /*
2501  * verify_free()
2502  *	verify the integrity of a free block of memory by checking
2503  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2504  */
2505 /*ARGSUSED1*/
2506 static int
2507 verify_free(uintptr_t addr, const void *data, void *private)
2508 {
2509 	umem_verify_t *umv = (umem_verify_t *)private;
2510 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2511 	int64_t corrupt;		/* corruption offset */
2512 	umem_buftag_t *buftagp;		/* ptr to buftag */
2513 	umem_cache_t *cp = &umv->umv_cache;
2514 	int besilent = umv->umv_besilent;
2515 
2516 	/*LINTED*/
2517 	buftagp = UMEM_BUFTAG(cp, buf);
2518 
2519 	/*
2520 	 * Read the buffer to check.
2521 	 */
2522 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2523 		if (!besilent)
2524 			mdb_warn("couldn't read %p", addr);
2525 		return (WALK_NEXT);
2526 	}
2527 
2528 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2529 	    UMEM_FREE_PATTERN)) >= 0) {
2530 		if (!besilent)
2531 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2532 			    addr, (uintptr_t)addr + corrupt);
2533 		goto corrupt;
2534 	}
2535 
2536 	if ((cp->cache_flags & UMF_HASH) &&
2537 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2538 		if (!besilent)
2539 			mdb_printf("buffer %p (free) seems to "
2540 			    "have a corrupt redzone pattern\n", addr);
2541 		goto corrupt;
2542 	}
2543 
2544 	/*
2545 	 * confirm bufctl pointer integrity.
2546 	 */
2547 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2548 		if (!besilent)
2549 			mdb_printf("buffer %p (free) has a corrupt "
2550 			    "buftag\n", addr);
2551 		goto corrupt;
2552 	}
2553 
2554 	return (WALK_NEXT);
2555 corrupt:
2556 	umv->umv_corruption++;
2557 	return (WALK_NEXT);
2558 }
2559 
2560 /*
2561  * verify_alloc()
2562  *	Verify that the buftag of an allocated buffer makes sense with respect
2563  *	to the buffer.
2564  */
2565 /*ARGSUSED1*/
2566 static int
2567 verify_alloc(uintptr_t addr, const void *data, void *private)
2568 {
2569 	umem_verify_t *umv = (umem_verify_t *)private;
2570 	umem_cache_t *cp = &umv->umv_cache;
2571 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2572 	/*LINTED*/
2573 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2574 	uint32_t *ip = (uint32_t *)buftagp;
2575 	uint8_t *bp = (uint8_t *)buf;
2576 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2577 	int besilent = umv->umv_besilent;
2578 
2579 	/*
2580 	 * Read the buffer to check.
2581 	 */
2582 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2583 		if (!besilent)
2584 			mdb_warn("couldn't read %p", addr);
2585 		return (WALK_NEXT);
2586 	}
2587 
2588 	/*
2589 	 * There are two cases to handle:
2590 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2591 	 *    0xfeedfacefeedface at the end of it
2592 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2593 	 *    0xbb just past the end of the region in use.  At the buftag,
2594 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2595 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2596 	 *    endianness), followed by 32 bits containing the offset of the
2597 	 *    0xbb byte in the buffer.
2598 	 *
2599 	 * Finally, the two 32-bit words that comprise the second half of the
2600 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2601 	 */
2602 
2603 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2604 		looks_ok = 1;
2605 	else if (!UMEM_SIZE_VALID(ip[1]))
2606 		size_ok = 0;
2607 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2608 		looks_ok = 1;
2609 	else
2610 		size_ok = 0;
2611 
2612 	if (!size_ok) {
2613 		if (!besilent)
2614 			mdb_printf("buffer %p (allocated) has a corrupt "
2615 			    "redzone size encoding\n", addr);
2616 		goto corrupt;
2617 	}
2618 
2619 	if (!looks_ok) {
2620 		if (!besilent)
2621 			mdb_printf("buffer %p (allocated) has a corrupt "
2622 			    "redzone signature\n", addr);
2623 		goto corrupt;
2624 	}
2625 
2626 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2627 		if (!besilent)
2628 			mdb_printf("buffer %p (allocated) has a "
2629 			    "corrupt buftag\n", addr);
2630 		goto corrupt;
2631 	}
2632 
2633 	return (WALK_NEXT);
2634 corrupt:
2635 	umv->umv_corruption++;
2636 	return (WALK_NEXT);
2637 }
2638 
2639 /*ARGSUSED2*/
2640 int
2641 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2642 {
2643 	if (flags & DCMD_ADDRSPEC) {
2644 		int check_alloc = 0, check_free = 0;
2645 		umem_verify_t umv;
2646 
2647 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2648 		    addr) == -1) {
2649 			mdb_warn("couldn't read umem_cache %p", addr);
2650 			return (DCMD_ERR);
2651 		}
2652 
2653 		umv.umv_size = umv.umv_cache.cache_buftag +
2654 		    sizeof (umem_buftag_t);
2655 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2656 		umv.umv_corruption = 0;
2657 
2658 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2659 			check_alloc = 1;
2660 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2661 				check_free = 1;
2662 		} else {
2663 			if (!(flags & DCMD_LOOP)) {
2664 				mdb_warn("cache %p (%s) does not have "
2665 				    "redzone checking enabled\n", addr,
2666 				    umv.umv_cache.cache_name);
2667 			}
2668 			return (DCMD_ERR);
2669 		}
2670 
2671 		if (flags & DCMD_LOOP) {
2672 			/*
2673 			 * table mode, don't print out every corrupt buffer
2674 			 */
2675 			umv.umv_besilent = 1;
2676 		} else {
2677 			mdb_printf("Summary for cache '%s'\n",
2678 			    umv.umv_cache.cache_name);
2679 			mdb_inc_indent(2);
2680 			umv.umv_besilent = 0;
2681 		}
2682 
2683 		if (check_alloc)
2684 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2685 		if (check_free)
2686 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2687 
2688 		if (flags & DCMD_LOOP) {
2689 			if (umv.umv_corruption == 0) {
2690 				mdb_printf("%-*s %?p clean\n",
2691 				    UMEM_CACHE_NAMELEN,
2692 				    umv.umv_cache.cache_name, addr);
2693 			} else {
2694 				char *s = "";	/* optional s in "buffer[s]" */
2695 				if (umv.umv_corruption > 1)
2696 					s = "s";
2697 
2698 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2699 				    UMEM_CACHE_NAMELEN,
2700 				    umv.umv_cache.cache_name, addr,
2701 				    umv.umv_corruption, s);
2702 			}
2703 		} else {
2704 			/*
2705 			 * This is the more verbose mode, when the user has
2706 			 * type addr::umem_verify.  If the cache was clean,
2707 			 * nothing will have yet been printed. So say something.
2708 			 */
2709 			if (umv.umv_corruption == 0)
2710 				mdb_printf("clean\n");
2711 
2712 			mdb_dec_indent(2);
2713 		}
2714 	} else {
2715 		/*
2716 		 * If the user didn't specify a cache to verify, we'll walk all
2717 		 * umem_cache's, specifying ourself as a callback for each...
2718 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2719 		 */
2720 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2721 		    "Cache Name", "Addr", "Cache Integrity");
2722 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2723 	}
2724 
2725 	return (DCMD_OK);
2726 }
2727 
2728 typedef struct vmem_node {
2729 	struct vmem_node *vn_next;
2730 	struct vmem_node *vn_parent;
2731 	struct vmem_node *vn_sibling;
2732 	struct vmem_node *vn_children;
2733 	uintptr_t vn_addr;
2734 	int vn_marked;
2735 	vmem_t vn_vmem;
2736 } vmem_node_t;
2737 
2738 typedef struct vmem_walk {
2739 	vmem_node_t *vw_root;
2740 	vmem_node_t *vw_current;
2741 } vmem_walk_t;
2742 
2743 int
2744 vmem_walk_init(mdb_walk_state_t *wsp)
2745 {
2746 	uintptr_t vaddr, paddr;
2747 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2748 	vmem_walk_t *vw;
2749 
2750 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2751 		mdb_warn("couldn't read 'vmem_list'");
2752 		return (WALK_ERR);
2753 	}
2754 
2755 	while (vaddr != NULL) {
2756 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2757 		vp->vn_addr = vaddr;
2758 		vp->vn_next = head;
2759 		head = vp;
2760 
2761 		if (vaddr == wsp->walk_addr)
2762 			current = vp;
2763 
2764 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2765 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2766 			goto err;
2767 		}
2768 
2769 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2770 	}
2771 
2772 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2773 
2774 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2775 			vp->vn_sibling = root;
2776 			root = vp;
2777 			continue;
2778 		}
2779 
2780 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2781 			if (parent->vn_addr != paddr)
2782 				continue;
2783 			vp->vn_sibling = parent->vn_children;
2784 			parent->vn_children = vp;
2785 			vp->vn_parent = parent;
2786 			break;
2787 		}
2788 
2789 		if (parent == NULL) {
2790 			mdb_warn("couldn't find %p's parent (%p)\n",
2791 			    vp->vn_addr, paddr);
2792 			goto err;
2793 		}
2794 	}
2795 
2796 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2797 	vw->vw_root = root;
2798 
2799 	if (current != NULL)
2800 		vw->vw_current = current;
2801 	else
2802 		vw->vw_current = root;
2803 
2804 	wsp->walk_data = vw;
2805 	return (WALK_NEXT);
2806 err:
2807 	for (vp = head; head != NULL; vp = head) {
2808 		head = vp->vn_next;
2809 		mdb_free(vp, sizeof (vmem_node_t));
2810 	}
2811 
2812 	return (WALK_ERR);
2813 }
2814 
2815 int
2816 vmem_walk_step(mdb_walk_state_t *wsp)
2817 {
2818 	vmem_walk_t *vw = wsp->walk_data;
2819 	vmem_node_t *vp;
2820 	int rval;
2821 
2822 	if ((vp = vw->vw_current) == NULL)
2823 		return (WALK_DONE);
2824 
2825 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2826 
2827 	if (vp->vn_children != NULL) {
2828 		vw->vw_current = vp->vn_children;
2829 		return (rval);
2830 	}
2831 
2832 	do {
2833 		vw->vw_current = vp->vn_sibling;
2834 		vp = vp->vn_parent;
2835 	} while (vw->vw_current == NULL && vp != NULL);
2836 
2837 	return (rval);
2838 }
2839 
2840 /*
2841  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2842  * children are visited before their parent.  We perform the postfix walk
2843  * iteratively (rather than recursively) to allow mdb to regain control
2844  * after each callback.
2845  */
2846 int
2847 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2848 {
2849 	vmem_walk_t *vw = wsp->walk_data;
2850 	vmem_node_t *vp = vw->vw_current;
2851 	int rval;
2852 
2853 	/*
2854 	 * If this node is marked, then we know that we have already visited
2855 	 * all of its children.  If the node has any siblings, they need to
2856 	 * be visited next; otherwise, we need to visit the parent.  Note
2857 	 * that vp->vn_marked will only be zero on the first invocation of
2858 	 * the step function.
2859 	 */
2860 	if (vp->vn_marked) {
2861 		if (vp->vn_sibling != NULL)
2862 			vp = vp->vn_sibling;
2863 		else if (vp->vn_parent != NULL)
2864 			vp = vp->vn_parent;
2865 		else {
2866 			/*
2867 			 * We have neither a parent, nor a sibling, and we
2868 			 * have already been visited; we're done.
2869 			 */
2870 			return (WALK_DONE);
2871 		}
2872 	}
2873 
2874 	/*
2875 	 * Before we visit this node, visit its children.
2876 	 */
2877 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2878 		vp = vp->vn_children;
2879 
2880 	vp->vn_marked = 1;
2881 	vw->vw_current = vp;
2882 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2883 
2884 	return (rval);
2885 }
2886 
2887 void
2888 vmem_walk_fini(mdb_walk_state_t *wsp)
2889 {
2890 	vmem_walk_t *vw = wsp->walk_data;
2891 	vmem_node_t *root = vw->vw_root;
2892 	int done;
2893 
2894 	if (root == NULL)
2895 		return;
2896 
2897 	if ((vw->vw_root = root->vn_children) != NULL)
2898 		vmem_walk_fini(wsp);
2899 
2900 	vw->vw_root = root->vn_sibling;
2901 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2902 	mdb_free(root, sizeof (vmem_node_t));
2903 
2904 	if (done) {
2905 		mdb_free(vw, sizeof (vmem_walk_t));
2906 	} else {
2907 		vmem_walk_fini(wsp);
2908 	}
2909 }
2910 
2911 typedef struct vmem_seg_walk {
2912 	uint8_t vsw_type;
2913 	uintptr_t vsw_start;
2914 	uintptr_t vsw_current;
2915 } vmem_seg_walk_t;
2916 
2917 /*ARGSUSED*/
2918 int
2919 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2920 {
2921 	vmem_seg_walk_t *vsw;
2922 
2923 	if (wsp->walk_addr == NULL) {
2924 		mdb_warn("vmem_%s does not support global walks\n", name);
2925 		return (WALK_ERR);
2926 	}
2927 
2928 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2929 
2930 	vsw->vsw_type = type;
2931 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2932 	vsw->vsw_current = vsw->vsw_start;
2933 
2934 	return (WALK_NEXT);
2935 }
2936 
2937 /*
2938  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2939  */
2940 #define	VMEM_NONE	0
2941 
2942 int
2943 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2944 {
2945 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2946 }
2947 
2948 int
2949 vmem_free_walk_init(mdb_walk_state_t *wsp)
2950 {
2951 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2952 }
2953 
2954 int
2955 vmem_span_walk_init(mdb_walk_state_t *wsp)
2956 {
2957 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2958 }
2959 
2960 int
2961 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2962 {
2963 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2964 }
2965 
2966 int
2967 vmem_seg_walk_step(mdb_walk_state_t *wsp)
2968 {
2969 	vmem_seg_t seg;
2970 	vmem_seg_walk_t *vsw = wsp->walk_data;
2971 	uintptr_t addr = vsw->vsw_current;
2972 	static size_t seg_size = 0;
2973 	int rval;
2974 
2975 	if (!seg_size) {
2976 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
2977 			mdb_warn("failed to read 'vmem_seg_size'");
2978 			seg_size = sizeof (vmem_seg_t);
2979 		}
2980 	}
2981 
2982 	if (seg_size < sizeof (seg))
2983 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
2984 
2985 	if (mdb_vread(&seg, seg_size, addr) == -1) {
2986 		mdb_warn("couldn't read vmem_seg at %p", addr);
2987 		return (WALK_ERR);
2988 	}
2989 
2990 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
2991 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
2992 		rval = WALK_NEXT;
2993 	} else {
2994 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
2995 	}
2996 
2997 	if (vsw->vsw_current == vsw->vsw_start)
2998 		return (WALK_DONE);
2999 
3000 	return (rval);
3001 }
3002 
3003 void
3004 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3005 {
3006 	vmem_seg_walk_t *vsw = wsp->walk_data;
3007 
3008 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3009 }
3010 
3011 #define	VMEM_NAMEWIDTH	22
3012 
3013 int
3014 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3015 {
3016 	vmem_t v, parent;
3017 	uintptr_t paddr;
3018 	int ident = 0;
3019 	char c[VMEM_NAMEWIDTH];
3020 
3021 	if (!(flags & DCMD_ADDRSPEC)) {
3022 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3023 			mdb_warn("can't walk vmem");
3024 			return (DCMD_ERR);
3025 		}
3026 		return (DCMD_OK);
3027 	}
3028 
3029 	if (DCMD_HDRSPEC(flags))
3030 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3031 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3032 		    "TOTAL", "SUCCEED", "FAIL");
3033 
3034 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3035 		mdb_warn("couldn't read vmem at %p", addr);
3036 		return (DCMD_ERR);
3037 	}
3038 
3039 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3040 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3041 			mdb_warn("couldn't trace %p's ancestry", addr);
3042 			ident = 0;
3043 			break;
3044 		}
3045 		paddr = (uintptr_t)parent.vm_source;
3046 	}
3047 
3048 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3049 
3050 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3051 	    addr, VMEM_NAMEWIDTH, c,
3052 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3053 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3054 
3055 	return (DCMD_OK);
3056 }
3057 
3058 void
3059 vmem_seg_help(void)
3060 {
3061 	mdb_printf("%s\n",
3062 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3063 "\n"
3064 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3065 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3066 "information.\n");
3067 	mdb_dec_indent(2);
3068 	mdb_printf("%<b>OPTIONS%</b>\n");
3069 	mdb_inc_indent(2);
3070 	mdb_printf("%s",
3071 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3072 "  -s    report the size of the segment, instead of the end address\n"
3073 "  -c caller\n"
3074 "        filter out segments without the function/PC in their stack trace\n"
3075 "  -e earliest\n"
3076 "        filter out segments timestamped before earliest\n"
3077 "  -l latest\n"
3078 "        filter out segments timestamped after latest\n"
3079 "  -m minsize\n"
3080 "        filer out segments smaller than minsize\n"
3081 "  -M maxsize\n"
3082 "        filer out segments larger than maxsize\n"
3083 "  -t thread\n"
3084 "        filter out segments not involving thread\n"
3085 "  -T type\n"
3086 "        filter out segments not of type 'type'\n"
3087 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3088 }
3089 
3090 
3091 /*ARGSUSED*/
3092 int
3093 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3094 {
3095 	vmem_seg_t vs;
3096 	uintptr_t *stk = vs.vs_stack;
3097 	uintptr_t sz;
3098 	uint8_t t;
3099 	const char *type = NULL;
3100 	GElf_Sym sym;
3101 	char c[MDB_SYM_NAMLEN];
3102 	int no_debug;
3103 	int i;
3104 	int depth;
3105 	uintptr_t laddr, haddr;
3106 
3107 	uintptr_t caller = NULL, thread = NULL;
3108 	uintptr_t minsize = 0, maxsize = 0;
3109 
3110 	hrtime_t earliest = 0, latest = 0;
3111 
3112 	uint_t size = 0;
3113 	uint_t verbose = 0;
3114 
3115 	if (!(flags & DCMD_ADDRSPEC))
3116 		return (DCMD_USAGE);
3117 
3118 	if (mdb_getopts(argc, argv,
3119 	    'c', MDB_OPT_UINTPTR, &caller,
3120 	    'e', MDB_OPT_UINT64, &earliest,
3121 	    'l', MDB_OPT_UINT64, &latest,
3122 	    's', MDB_OPT_SETBITS, TRUE, &size,
3123 	    'm', MDB_OPT_UINTPTR, &minsize,
3124 	    'M', MDB_OPT_UINTPTR, &maxsize,
3125 	    't', MDB_OPT_UINTPTR, &thread,
3126 	    'T', MDB_OPT_STR, &type,
3127 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3128 	    NULL) != argc)
3129 		return (DCMD_USAGE);
3130 
3131 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3132 		if (verbose) {
3133 			mdb_printf("%16s %4s %16s %16s %16s\n"
3134 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3135 			    "ADDR", "TYPE", "START", "END", "SIZE",
3136 			    "", "", "THREAD", "TIMESTAMP", "");
3137 		} else {
3138 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3139 			    "START", size? "SIZE" : "END", "WHO");
3140 		}
3141 	}
3142 
3143 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3144 		mdb_warn("couldn't read vmem_seg at %p", addr);
3145 		return (DCMD_ERR);
3146 	}
3147 
3148 	if (type != NULL) {
3149 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3150 			t = VMEM_ALLOC;
3151 		else if (strcmp(type, "FREE") == 0)
3152 			t = VMEM_FREE;
3153 		else if (strcmp(type, "SPAN") == 0)
3154 			t = VMEM_SPAN;
3155 		else if (strcmp(type, "ROTR") == 0 ||
3156 		    strcmp(type, "ROTOR") == 0)
3157 			t = VMEM_ROTOR;
3158 		else if (strcmp(type, "WLKR") == 0 ||
3159 		    strcmp(type, "WALKER") == 0)
3160 			t = VMEM_WALKER;
3161 		else {
3162 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3163 			    type);
3164 			return (DCMD_ERR);
3165 		}
3166 
3167 		if (vs.vs_type != t)
3168 			return (DCMD_OK);
3169 	}
3170 
3171 	sz = vs.vs_end - vs.vs_start;
3172 
3173 	if (minsize != 0 && sz < minsize)
3174 		return (DCMD_OK);
3175 
3176 	if (maxsize != 0 && sz > maxsize)
3177 		return (DCMD_OK);
3178 
3179 	t = vs.vs_type;
3180 	depth = vs.vs_depth;
3181 
3182 	/*
3183 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3184 	 */
3185 	no_debug = (t != VMEM_ALLOC) ||
3186 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3187 
3188 	if (no_debug) {
3189 		if (caller != NULL || thread != NULL || earliest != 0 ||
3190 		    latest != 0)
3191 			return (DCMD_OK);		/* not enough info */
3192 	} else {
3193 		if (caller != NULL) {
3194 			laddr = caller;
3195 			haddr = caller + sizeof (caller);
3196 
3197 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3198 			    sizeof (c), &sym) != -1 &&
3199 			    caller == (uintptr_t)sym.st_value) {
3200 				/*
3201 				 * We were provided an exact symbol value; any
3202 				 * address in the function is valid.
3203 				 */
3204 				laddr = (uintptr_t)sym.st_value;
3205 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3206 			}
3207 
3208 			for (i = 0; i < depth; i++)
3209 				if (vs.vs_stack[i] >= laddr &&
3210 				    vs.vs_stack[i] < haddr)
3211 					break;
3212 
3213 			if (i == depth)
3214 				return (DCMD_OK);
3215 		}
3216 
3217 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3218 			return (DCMD_OK);
3219 
3220 		if (earliest != 0 && vs.vs_timestamp < earliest)
3221 			return (DCMD_OK);
3222 
3223 		if (latest != 0 && vs.vs_timestamp > latest)
3224 			return (DCMD_OK);
3225 	}
3226 
3227 	type = (t == VMEM_ALLOC ? "ALLC" :
3228 	    t == VMEM_FREE ? "FREE" :
3229 	    t == VMEM_SPAN ? "SPAN" :
3230 	    t == VMEM_ROTOR ? "ROTR" :
3231 	    t == VMEM_WALKER ? "WLKR" :
3232 	    "????");
3233 
3234 	if (flags & DCMD_PIPE_OUT) {
3235 		mdb_printf("%#r\n", addr);
3236 		return (DCMD_OK);
3237 	}
3238 
3239 	if (verbose) {
3240 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3241 		    addr, type, vs.vs_start, vs.vs_end, sz);
3242 
3243 		if (no_debug)
3244 			return (DCMD_OK);
3245 
3246 		mdb_printf("%16s %4s %16d %16llx\n",
3247 		    "", "", vs.vs_thread, vs.vs_timestamp);
3248 
3249 		mdb_inc_indent(17);
3250 		for (i = 0; i < depth; i++) {
3251 			mdb_printf("%a\n", stk[i]);
3252 		}
3253 		mdb_dec_indent(17);
3254 		mdb_printf("\n");
3255 	} else {
3256 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3257 		    vs.vs_start, size? sz : vs.vs_end);
3258 
3259 		if (no_debug) {
3260 			mdb_printf("\n");
3261 			return (DCMD_OK);
3262 		}
3263 
3264 		for (i = 0; i < depth; i++) {
3265 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3266 			    c, sizeof (c), &sym) == -1)
3267 				continue;
3268 			if (is_umem_sym(c, "vmem_"))
3269 				continue;
3270 			break;
3271 		}
3272 		mdb_printf(" %a\n", stk[i]);
3273 	}
3274 	return (DCMD_OK);
3275 }
3276 
3277 /*ARGSUSED*/
3278 static int
3279 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3280 {
3281 	char name[UMEM_CACHE_NAMELEN + 1];
3282 	hrtime_t delta;
3283 	int i, depth;
3284 
3285 	if (bcp->bc_timestamp == 0)
3286 		return (WALK_DONE);
3287 
3288 	if (*newest == 0)
3289 		*newest = bcp->bc_timestamp;
3290 
3291 	delta = *newest - bcp->bc_timestamp;
3292 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3293 
3294 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3295 	    &bcp->bc_cache->cache_name) <= 0)
3296 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3297 
3298 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3299 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3300 
3301 	for (i = 0; i < depth; i++)
3302 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3303 
3304 	return (WALK_NEXT);
3305 }
3306 
3307 int
3308 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3309 {
3310 	const char *logname = "umem_transaction_log";
3311 	hrtime_t newest = 0;
3312 
3313 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3314 		return (DCMD_USAGE);
3315 
3316 	if (argc > 0) {
3317 		if (argv->a_type != MDB_TYPE_STRING)
3318 			return (DCMD_USAGE);
3319 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3320 			logname = "umem_failure_log";
3321 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3322 			logname = "umem_slab_log";
3323 		else
3324 			return (DCMD_USAGE);
3325 	}
3326 
3327 	if (umem_readvar(&addr, logname) == -1) {
3328 		mdb_warn("failed to read %s log header pointer");
3329 		return (DCMD_ERR);
3330 	}
3331 
3332 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3333 		mdb_warn("failed to walk umem log");
3334 		return (DCMD_ERR);
3335 	}
3336 
3337 	return (DCMD_OK);
3338 }
3339 
3340 /*
3341  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3342  * The first piece is a structure which we use to accumulate umem_cache_t
3343  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3344  * walker; we either add all caches, or ones named explicitly as arguments.
3345  */
3346 
3347 typedef struct umclist {
3348 	const char *umc_name;			/* Name to match (or NULL) */
3349 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3350 	int umc_nelems;				/* Num entries in umc_caches */
3351 	int umc_size;				/* Size of umc_caches array */
3352 } umclist_t;
3353 
3354 static int
3355 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3356 {
3357 	void *p;
3358 	int s;
3359 
3360 	if (umc->umc_name == NULL ||
3361 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3362 		/*
3363 		 * If we have a match, grow our array (if necessary), and then
3364 		 * add the virtual address of the matching cache to our list.
3365 		 */
3366 		if (umc->umc_nelems >= umc->umc_size) {
3367 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3368 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3369 
3370 			bcopy(umc->umc_caches, p,
3371 			    sizeof (uintptr_t) * umc->umc_size);
3372 
3373 			umc->umc_caches = p;
3374 			umc->umc_size = s;
3375 		}
3376 
3377 		umc->umc_caches[umc->umc_nelems++] = addr;
3378 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3379 	}
3380 
3381 	return (WALK_NEXT);
3382 }
3383 
3384 /*
3385  * The second piece of ::umausers is a hash table of allocations.  Each
3386  * allocation owner is identified by its stack trace and data_size.  We then
3387  * track the total bytes of all such allocations, and the number of allocations
3388  * to report at the end.  Once we have a list of caches, we walk through the
3389  * allocated bufctls of each, and update our hash table accordingly.
3390  */
3391 
3392 typedef struct umowner {
3393 	struct umowner *umo_head;		/* First hash elt in bucket */
3394 	struct umowner *umo_next;		/* Next hash elt in chain */
3395 	size_t umo_signature;			/* Hash table signature */
3396 	uint_t umo_num;				/* Number of allocations */
3397 	size_t umo_data_size;			/* Size of each allocation */
3398 	size_t umo_total_size;			/* Total bytes of allocation */
3399 	int umo_depth;				/* Depth of stack trace */
3400 	uintptr_t *umo_stack;			/* Stack trace */
3401 } umowner_t;
3402 
3403 typedef struct umusers {
3404 	const umem_cache_t *umu_cache;		/* Current umem cache */
3405 	umowner_t *umu_hash;			/* Hash table of owners */
3406 	uintptr_t *umu_stacks;			/* stacks for owners */
3407 	int umu_nelems;				/* Number of entries in use */
3408 	int umu_size;				/* Total number of entries */
3409 } umusers_t;
3410 
3411 static void
3412 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3413     size_t size, size_t data_size)
3414 {
3415 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3416 	size_t bucket, signature = data_size;
3417 	umowner_t *umo, *umoend;
3418 
3419 	/*
3420 	 * If the hash table is full, double its size and rehash everything.
3421 	 */
3422 	if (umu->umu_nelems >= umu->umu_size) {
3423 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3424 		size_t umowner_size = sizeof (umowner_t);
3425 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3426 		uintptr_t *new_stacks;
3427 
3428 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3429 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3430 
3431 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3432 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3433 		umu->umu_hash = umo;
3434 		umu->umu_stacks = new_stacks;
3435 		umu->umu_size = s;
3436 
3437 		umoend = umu->umu_hash + umu->umu_size;
3438 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3439 			umo->umo_head = NULL;
3440 			umo->umo_stack = &umu->umu_stacks[
3441 			    umem_stack_depth * (umo - umu->umu_hash)];
3442 		}
3443 
3444 		umoend = umu->umu_hash + umu->umu_nelems;
3445 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3446 			bucket = umo->umo_signature & (umu->umu_size - 1);
3447 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3448 			umu->umu_hash[bucket].umo_head = umo;
3449 		}
3450 	}
3451 
3452 	/*
3453 	 * Finish computing the hash signature from the stack trace, and then
3454 	 * see if the owner is in the hash table.  If so, update our stats.
3455 	 */
3456 	for (i = 0; i < depth; i++)
3457 		signature += bcp->bc_stack[i];
3458 
3459 	bucket = signature & (umu->umu_size - 1);
3460 
3461 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3462 		if (umo->umo_signature == signature) {
3463 			size_t difference = 0;
3464 
3465 			difference |= umo->umo_data_size - data_size;
3466 			difference |= umo->umo_depth - depth;
3467 
3468 			for (i = 0; i < depth; i++) {
3469 				difference |= umo->umo_stack[i] -
3470 				    bcp->bc_stack[i];
3471 			}
3472 
3473 			if (difference == 0) {
3474 				umo->umo_total_size += size;
3475 				umo->umo_num++;
3476 				return;
3477 			}
3478 		}
3479 	}
3480 
3481 	/*
3482 	 * If the owner is not yet hashed, grab the next element and fill it
3483 	 * in based on the allocation information.
3484 	 */
3485 	umo = &umu->umu_hash[umu->umu_nelems++];
3486 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3487 	umu->umu_hash[bucket].umo_head = umo;
3488 
3489 	umo->umo_signature = signature;
3490 	umo->umo_num = 1;
3491 	umo->umo_data_size = data_size;
3492 	umo->umo_total_size = size;
3493 	umo->umo_depth = depth;
3494 
3495 	for (i = 0; i < depth; i++)
3496 		umo->umo_stack[i] = bcp->bc_stack[i];
3497 }
3498 
3499 /*
3500  * When ::umausers is invoked without the -f flag, we simply update our hash
3501  * table with the information from each allocated bufctl.
3502  */
3503 /*ARGSUSED*/
3504 static int
3505 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3506 {
3507 	const umem_cache_t *cp = umu->umu_cache;
3508 
3509 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3510 	return (WALK_NEXT);
3511 }
3512 
3513 /*
3514  * When ::umausers is invoked with the -f flag, we print out the information
3515  * for each bufctl as well as updating the hash table.
3516  */
3517 static int
3518 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3519 {
3520 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3521 	const umem_cache_t *cp = umu->umu_cache;
3522 
3523 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3524 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3525 
3526 	for (i = 0; i < depth; i++)
3527 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3528 
3529 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3530 	return (WALK_NEXT);
3531 }
3532 
3533 /*
3534  * We sort our results by allocation size before printing them.
3535  */
3536 static int
3537 umownercmp(const void *lp, const void *rp)
3538 {
3539 	const umowner_t *lhs = lp;
3540 	const umowner_t *rhs = rp;
3541 
3542 	return (rhs->umo_total_size - lhs->umo_total_size);
3543 }
3544 
3545 /*
3546  * The main engine of ::umausers is relatively straightforward: First we
3547  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3548  * iterate over the allocated bufctls of each cache in the list.  Finally,
3549  * we sort and print our results.
3550  */
3551 /*ARGSUSED*/
3552 int
3553 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3554 {
3555 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3556 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3557 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3558 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3559 	int opt_e = FALSE;		/* Include "small" users */
3560 	int opt_f = FALSE;		/* Print stack traces */
3561 
3562 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3563 	umowner_t *umo, *umoend;
3564 	int i, oelems;
3565 
3566 	umclist_t umc;
3567 	umusers_t umu;
3568 
3569 	if (flags & DCMD_ADDRSPEC)
3570 		return (DCMD_USAGE);
3571 
3572 	bzero(&umc, sizeof (umc));
3573 	bzero(&umu, sizeof (umu));
3574 
3575 	while ((i = mdb_getopts(argc, argv,
3576 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3577 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3578 
3579 		argv += i;	/* skip past options we just processed */
3580 		argc -= i;	/* adjust argc */
3581 
3582 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3583 			return (DCMD_USAGE);
3584 
3585 		oelems = umc.umc_nelems;
3586 		umc.umc_name = argv->a_un.a_str;
3587 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3588 
3589 		if (umc.umc_nelems == oelems) {
3590 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3591 			return (DCMD_ERR);
3592 		}
3593 
3594 		do_all_caches = 0;
3595 		argv++;
3596 		argc--;
3597 	}
3598 
3599 	if (opt_e)
3600 		mem_threshold = cnt_threshold = 0;
3601 
3602 	if (opt_f)
3603 		callback = (mdb_walk_cb_t)umause2;
3604 
3605 	if (do_all_caches) {
3606 		umc.umc_name = NULL; /* match all cache names */
3607 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3608 	}
3609 
3610 	for (i = 0; i < umc.umc_nelems; i++) {
3611 		uintptr_t cp = umc.umc_caches[i];
3612 		umem_cache_t c;
3613 
3614 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3615 			mdb_warn("failed to read cache at %p", cp);
3616 			continue;
3617 		}
3618 
3619 		if (!(c.cache_flags & UMF_AUDIT)) {
3620 			if (!do_all_caches) {
3621 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3622 				    c.cache_name);
3623 			}
3624 			continue;
3625 		}
3626 
3627 		umu.umu_cache = &c;
3628 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3629 		audited_caches++;
3630 	}
3631 
3632 	if (audited_caches == 0 && do_all_caches) {
3633 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3634 		return (DCMD_ERR);
3635 	}
3636 
3637 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3638 	umoend = umu.umu_hash + umu.umu_nelems;
3639 
3640 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3641 		if (umo->umo_total_size < mem_threshold &&
3642 		    umo->umo_num < cnt_threshold)
3643 			continue;
3644 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3645 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3646 		for (i = 0; i < umo->umo_depth; i++)
3647 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3648 	}
3649 
3650 	return (DCMD_OK);
3651 }
3652 
3653 struct malloc_data {
3654 	uint32_t malloc_size;
3655 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3656 };
3657 
3658 #ifdef _LP64
3659 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3660 #else
3661 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3662 #endif
3663 
3664 typedef struct umem_malloc_info {
3665 	size_t um_total;	/* total allocated buffers */
3666 	size_t um_malloc;	/* malloc buffers */
3667 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3668 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3669 
3670 	umem_cache_t *um_cp;
3671 
3672 	uint_t *um_bucket;
3673 } umem_malloc_info_t;
3674 
3675 static void
3676 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3677     size_t maxbuckets, size_t minbucketsize, int geometric)
3678 {
3679 	uint64_t um_malloc;
3680 	int minb = -1;
3681 	int maxb = -1;
3682 	int buckets;
3683 	int nbucks;
3684 	int i;
3685 	int b;
3686 	const int *distarray;
3687 
3688 	minb = (int)minmalloc;
3689 	maxb = (int)maxmalloc;
3690 
3691 	nbucks = buckets = maxb - minb + 1;
3692 
3693 	um_malloc = 0;
3694 	for (b = minb; b <= maxb; b++)
3695 		um_malloc += um_bucket[b];
3696 
3697 	if (maxbuckets != 0)
3698 		buckets = MIN(buckets, maxbuckets);
3699 
3700 	if (minbucketsize > 1) {
3701 		buckets = MIN(buckets, nbucks/minbucketsize);
3702 		if (buckets == 0) {
3703 			buckets = 1;
3704 			minbucketsize = nbucks;
3705 		}
3706 	}
3707 
3708 	if (geometric)
3709 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3710 	else
3711 		distarray = dist_linear(buckets, minb, maxb);
3712 
3713 	dist_print_header("malloc size", 11, "count");
3714 	for (i = 0; i < buckets; i++) {
3715 		dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3716 	}
3717 	mdb_printf("\n");
3718 }
3719 
3720 /*
3721  * A malloc()ed buffer looks like:
3722  *
3723  *	<----------- mi.malloc_size --->
3724  *	<----------- cp.cache_bufsize ------------------>
3725  *	<----------- cp.cache_chunksize -------------------------------->
3726  *	+-------+-----------------------+---------------+---------------+
3727  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3728  *	+-------+---------------------------------------+---------------+
3729  *		<-- usable space ------>
3730  *
3731  * mallocsz is the argument to malloc(3C).
3732  * mi.malloc_size is the actual size passed to umem_alloc(), which
3733  * is rounded up to the smallest available cache size, which is
3734  * cache_bufsize.  If there is debugging or alignment overhead in
3735  * the cache, that is reflected in a larger cache_chunksize.
3736  *
3737  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3738  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3739  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3740  * the tag has 8 bytes of padding before it.
3741  *
3742  * 32-byte, 64-byte buffers <= 8 bytes:
3743  *	+-------+-------+--------- ...
3744  *	|/size//|/stat//| mallocsz ...
3745  *	+-------+-------+--------- ...
3746  *			^
3747  *			pointer returned from malloc(3C)
3748  *
3749  * 64-byte buffers > 8 bytes:
3750  *	+---------------+-------+-------+--------- ...
3751  *	|/padding///////|/size//|/stat//| mallocsz ...
3752  *	+---------------+-------+-------+--------- ...
3753  *					^
3754  *					pointer returned from malloc(3C)
3755  *
3756  * The "size" field is "malloc_size", which is mallocsz + the padding.
3757  * The "stat" field is derived from malloc_size, and functions as a
3758  * validation that this buffer is actually from malloc(3C).
3759  */
3760 /*ARGSUSED*/
3761 static int
3762 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3763 {
3764 	struct malloc_data md;
3765 	size_t m_addr = addr;
3766 	size_t overhead = sizeof (md);
3767 	size_t mallocsz;
3768 
3769 	ump->um_total++;
3770 
3771 #ifdef _LP64
3772 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3773 		m_addr += overhead;
3774 		overhead += sizeof (md);
3775 	}
3776 #endif
3777 
3778 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3779 		mdb_warn("unable to read malloc header at %p", m_addr);
3780 		return (WALK_NEXT);
3781 	}
3782 
3783 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3784 	case MALLOC_MAGIC:
3785 #ifdef _LP64
3786 	case MALLOC_SECOND_MAGIC:
3787 #endif
3788 		mallocsz = md.malloc_size - overhead;
3789 
3790 		ump->um_malloc++;
3791 		ump->um_malloc_size += mallocsz;
3792 		ump->um_malloc_overhead += overhead;
3793 
3794 		/* include round-off and debug overhead */
3795 		ump->um_malloc_overhead +=
3796 		    ump->um_cp->cache_chunksize - md.malloc_size;
3797 
3798 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
3799 			ump->um_bucket[mallocsz]++;
3800 
3801 		break;
3802 	default:
3803 		break;
3804 	}
3805 
3806 	return (WALK_NEXT);
3807 }
3808 
3809 int
3810 get_umem_alloc_sizes(int **out, size_t *out_num)
3811 {
3812 	GElf_Sym sym;
3813 
3814 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
3815 		mdb_warn("unable to look up umem_alloc_sizes");
3816 		return (-1);
3817 	}
3818 
3819 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
3820 	*out_num = sym.st_size / sizeof (int);
3821 
3822 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
3823 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
3824 		*out = NULL;
3825 		return (-1);
3826 	}
3827 
3828 	return (0);
3829 }
3830 
3831 
3832 static int
3833 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
3834 {
3835 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
3836 		return (WALK_NEXT);
3837 
3838 	ump->um_cp = cp;
3839 
3840 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
3841 	    -1) {
3842 		mdb_warn("can't walk 'umem' for cache %p", addr);
3843 		return (WALK_ERR);
3844 	}
3845 
3846 	return (WALK_NEXT);
3847 }
3848 
3849 void
3850 umem_malloc_dist_help(void)
3851 {
3852 	mdb_printf("%s\n",
3853 	    "report distribution of outstanding malloc()s");
3854 	mdb_dec_indent(2);
3855 	mdb_printf("%<b>OPTIONS%</b>\n");
3856 	mdb_inc_indent(2);
3857 	mdb_printf("%s",
3858 "  -b maxbins\n"
3859 "        Use at most maxbins bins for the data\n"
3860 "  -B minbinsize\n"
3861 "        Make the bins at least minbinsize bytes apart\n"
3862 "  -d    dump the raw data out, without binning\n"
3863 "  -g    use geometric binning instead of linear binning\n");
3864 }
3865 
3866 /*ARGSUSED*/
3867 int
3868 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3869 {
3870 	umem_malloc_info_t mi;
3871 	uint_t geometric = 0;
3872 	uint_t dump = 0;
3873 	size_t maxbuckets = 0;
3874 	size_t minbucketsize = 0;
3875 
3876 	size_t minalloc = 0;
3877 	size_t maxalloc = UMI_MAX_BUCKET;
3878 
3879 	if (flags & DCMD_ADDRSPEC)
3880 		return (DCMD_USAGE);
3881 
3882 	if (mdb_getopts(argc, argv,
3883 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3884 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3885 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3886 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3887 	    0) != argc)
3888 		return (DCMD_USAGE);
3889 
3890 	bzero(&mi, sizeof (mi));
3891 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
3892 	    UM_SLEEP | UM_GC);
3893 
3894 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
3895 	    &mi) == -1) {
3896 		mdb_warn("unable to walk 'umem_cache'");
3897 		return (DCMD_ERR);
3898 	}
3899 
3900 	if (dump) {
3901 		int i;
3902 		for (i = minalloc; i <= maxalloc; i++)
3903 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
3904 
3905 		return (DCMD_OK);
3906 	}
3907 
3908 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
3909 	    maxbuckets, minbucketsize, geometric);
3910 
3911 	return (DCMD_OK);
3912 }
3913 
3914 void
3915 umem_malloc_info_help(void)
3916 {
3917 	mdb_printf("%s\n",
3918 	    "report information about malloc()s by cache.  ");
3919 	mdb_dec_indent(2);
3920 	mdb_printf("%<b>OPTIONS%</b>\n");
3921 	mdb_inc_indent(2);
3922 	mdb_printf("%s",
3923 "  -b maxbins\n"
3924 "        Use at most maxbins bins for the data\n"
3925 "  -B minbinsize\n"
3926 "        Make the bins at least minbinsize bytes apart\n"
3927 "  -d    dump the raw distribution data without binning\n"
3928 #ifndef _KMDB
3929 "  -g    use geometric binning instead of linear binning\n"
3930 #endif
3931 	    "");
3932 }
3933 int
3934 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3935 {
3936 	umem_cache_t c;
3937 	umem_malloc_info_t mi;
3938 
3939 	int skip = 0;
3940 
3941 	size_t maxmalloc;
3942 	size_t overhead;
3943 	size_t allocated;
3944 	size_t avg_malloc;
3945 	size_t overhead_pct;	/* 1000 * overhead_percent */
3946 
3947 	uint_t verbose = 0;
3948 	uint_t dump = 0;
3949 	uint_t geometric = 0;
3950 	size_t maxbuckets = 0;
3951 	size_t minbucketsize = 0;
3952 
3953 	int *alloc_sizes;
3954 	int idx;
3955 	size_t num;
3956 	size_t minmalloc;
3957 
3958 	if (mdb_getopts(argc, argv,
3959 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3960 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3961 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3962 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3963 	    0) != argc)
3964 		return (DCMD_USAGE);
3965 
3966 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
3967 		verbose = 1;
3968 
3969 	if (!(flags & DCMD_ADDRSPEC)) {
3970 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
3971 		    argc, argv) == -1) {
3972 			mdb_warn("can't walk umem_cache");
3973 			return (DCMD_ERR);
3974 		}
3975 		return (DCMD_OK);
3976 	}
3977 
3978 	if (!mdb_vread(&c, sizeof (c), addr)) {
3979 		mdb_warn("unable to read cache at %p", addr);
3980 		return (DCMD_ERR);
3981 	}
3982 
3983 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
3984 		if (!(flags & DCMD_LOOP))
3985 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
3986 			    "by malloc()\n", c.cache_name);
3987 		skip = 1;
3988 	}
3989 
3990 	/*
3991 	 * normally, print the header only the first time.  In verbose mode,
3992 	 * print the header on every non-skipped buffer
3993 	 */
3994 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
3995 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
3996 		    "CACHE", "BUFSZ", "MAXMAL",
3997 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
3998 
3999 	if (skip)
4000 		return (DCMD_OK);
4001 
4002 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4003 #ifdef _LP64
4004 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4005 		maxmalloc -= sizeof (struct malloc_data);
4006 #endif
4007 
4008 	bzero(&mi, sizeof (mi));
4009 	mi.um_cp = &c;
4010 	if (verbose)
4011 		mi.um_bucket =
4012 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4013 		    UM_SLEEP | UM_GC);
4014 
4015 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4016 	    -1) {
4017 		mdb_warn("can't walk 'umem'");
4018 		return (DCMD_ERR);
4019 	}
4020 
4021 	overhead = mi.um_malloc_overhead;
4022 	allocated = mi.um_malloc_size;
4023 
4024 	/* do integer round off for the average */
4025 	if (mi.um_malloc != 0)
4026 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4027 	else
4028 		avg_malloc = 0;
4029 
4030 	/*
4031 	 * include per-slab overhead
4032 	 *
4033 	 * Each slab in a given cache is the same size, and has the same
4034 	 * number of chunks in it;  we read in the first slab on the
4035 	 * slab list to get the number of chunks for all slabs.  To
4036 	 * compute the per-slab overhead, we just subtract the chunk usage
4037 	 * from the slabsize:
4038 	 *
4039 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4040 	 * |////////////|	|	| ...	|	|///////|///////|
4041 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4042 	 * |////////////|	|	| ...	|	|///////|///////|
4043 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4044 	 * |		\_______chunksize * chunks_____/		|
4045 	 * \__________________________slabsize__________________________/
4046 	 *
4047 	 * For UMF_HASH caches, there is an additional source of overhead;
4048 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4049 	 * include those in our per-slab overhead.
4050 	 *
4051 	 * Once we have a number for the per-slab overhead, we estimate
4052 	 * the actual overhead by treating the malloc()ed buffers as if
4053 	 * they were densely packed:
4054 	 *
4055 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4056 	 *
4057 	 * carefully ordering the multiply before the divide, to avoid
4058 	 * round-off error.
4059 	 */
4060 	if (mi.um_malloc != 0) {
4061 		umem_slab_t slab;
4062 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4063 
4064 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4065 			mdb_warn("unable to read slab at %p\n", saddr);
4066 		} else {
4067 			long chunks = slab.slab_chunks;
4068 			if (chunks != 0 && c.cache_chunksize != 0 &&
4069 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4070 				uintmax_t perslab =
4071 				    c.cache_slabsize -
4072 				    (c.cache_chunksize * chunks);
4073 
4074 				if (c.cache_flags & UMF_HASH) {
4075 					perslab += sizeof (umem_slab_t) +
4076 					    chunks *
4077 					    ((c.cache_flags & UMF_AUDIT) ?
4078 					    sizeof (umem_bufctl_audit_t) :
4079 					    sizeof (umem_bufctl_t));
4080 				}
4081 				overhead +=
4082 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4083 			} else {
4084 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4085 				    chunks, saddr);
4086 			}
4087 		}
4088 	}
4089 
4090 	if (allocated != 0)
4091 		overhead_pct = (1000ULL * overhead) / allocated;
4092 	else
4093 		overhead_pct = 0;
4094 
4095 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4096 	    addr, c.cache_bufsize, maxmalloc,
4097 	    mi.um_malloc, avg_malloc, allocated, overhead,
4098 	    overhead_pct / 10, overhead_pct % 10);
4099 
4100 	if (!verbose)
4101 		return (DCMD_OK);
4102 
4103 	if (!dump)
4104 		mdb_printf("\n");
4105 
4106 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4107 		return (DCMD_ERR);
4108 
4109 	for (idx = 0; idx < num; idx++) {
4110 		if (alloc_sizes[idx] == c.cache_bufsize)
4111 			break;
4112 		if (alloc_sizes[idx] == 0) {
4113 			idx = num;	/* 0-terminated array */
4114 			break;
4115 		}
4116 	}
4117 	if (idx == num) {
4118 		mdb_warn(
4119 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4120 		    addr, c.cache_bufsize);
4121 		return (DCMD_ERR);
4122 	}
4123 
4124 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4125 	if (minmalloc > 0) {
4126 #ifdef _LP64
4127 		if (minmalloc > UMEM_SECOND_ALIGN)
4128 			minmalloc -= sizeof (struct malloc_data);
4129 #endif
4130 		minmalloc -= sizeof (struct malloc_data);
4131 		minmalloc += 1;
4132 	}
4133 
4134 	if (dump) {
4135 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4136 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4137 		mdb_printf("\n");
4138 	} else {
4139 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4140 		    maxbuckets, minbucketsize, geometric);
4141 	}
4142 
4143 	return (DCMD_OK);
4144 }
4145