xref: /titanic_52/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision 7386f4319a95797d8397f24d609435b2c0e027a6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2011 Joyent, Inc.  All rights reserved.
28  */
29 
30 #include "umem.h"
31 
32 #include <sys/vmem_impl_user.h>
33 #include <umem_impl.h>
34 
35 #include <alloca.h>
36 #include <limits.h>
37 #include <mdb/mdb_whatis.h>
38 
39 #include "misc.h"
40 #include "leaky.h"
41 #include "dist.h"
42 
43 #include "umem_pagesize.h"
44 
45 #define	UM_ALLOCATED		0x1
46 #define	UM_FREE			0x2
47 #define	UM_BUFCTL		0x4
48 #define	UM_HASH			0x8
49 
50 int umem_ready;
51 
52 static int umem_stack_depth_warned;
53 static uint32_t umem_max_ncpus;
54 uint32_t umem_stack_depth;
55 
56 size_t umem_pagesize;
57 
58 #define	UMEM_READVAR(var)				\
59 	(umem_readvar(&(var), #var) == -1 &&		\
60 	    (mdb_warn("failed to read "#var), 1))
61 
62 int
63 umem_update_variables(void)
64 {
65 	size_t pagesize;
66 
67 	/*
68 	 * Figure out which type of umem is being used; if it's not there
69 	 * yet, succeed quietly.
70 	 */
71 	if (umem_set_standalone() == -1) {
72 		umem_ready = 0;
73 		return (0);		/* umem not there yet */
74 	}
75 
76 	/*
77 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
78 	 * cheap backwards compatibility to check for both names.
79 	 */
80 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
81 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
82 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
83 		return (-1);
84 	}
85 	if (UMEM_READVAR(umem_ready))
86 		return (-1);
87 	if (UMEM_READVAR(umem_stack_depth))
88 		return (-1);
89 	if (UMEM_READVAR(pagesize))
90 		return (-1);
91 
92 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
93 		if (umem_stack_depth_warned == 0) {
94 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
95 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
96 			umem_stack_depth_warned = 1;
97 		}
98 		umem_stack_depth = 0;
99 	}
100 
101 	umem_pagesize = pagesize;
102 
103 	return (0);
104 }
105 
106 /*ARGSUSED*/
107 static int
108 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
109 {
110 	mdb_walker_t w;
111 	char descr[64];
112 
113 	(void) mdb_snprintf(descr, sizeof (descr),
114 	    "walk the %s cache", c->cache_name);
115 
116 	w.walk_name = c->cache_name;
117 	w.walk_descr = descr;
118 	w.walk_init = umem_walk_init;
119 	w.walk_step = umem_walk_step;
120 	w.walk_fini = umem_walk_fini;
121 	w.walk_init_arg = (void *)addr;
122 
123 	if (mdb_add_walker(&w) == -1)
124 		mdb_warn("failed to add %s walker", c->cache_name);
125 
126 	return (WALK_NEXT);
127 }
128 
129 /*ARGSUSED*/
130 static void
131 umem_statechange_cb(void *arg)
132 {
133 	static int been_ready = 0;
134 
135 #ifndef _KMDB
136 	leaky_cleanup(1);	/* state changes invalidate leaky state */
137 #endif
138 
139 	if (umem_update_variables() == -1)
140 		return;
141 
142 	if (been_ready)
143 		return;
144 
145 	if (umem_ready != UMEM_READY)
146 		return;
147 
148 	been_ready = 1;
149 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL);
150 }
151 
152 int
153 umem_abort_messages(void)
154 {
155 	char *umem_error_buffer;
156 	uint_t umem_error_begin;
157 	GElf_Sym sym;
158 	size_t bufsize;
159 
160 	if (UMEM_READVAR(umem_error_begin))
161 		return (DCMD_ERR);
162 
163 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
164 		mdb_warn("unable to look up umem_error_buffer");
165 		return (DCMD_ERR);
166 	}
167 
168 	bufsize = (size_t)sym.st_size;
169 
170 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
171 
172 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
173 	    != bufsize) {
174 		mdb_warn("unable to read umem_error_buffer");
175 		return (DCMD_ERR);
176 	}
177 	/* put a zero after the end of the buffer to simplify printing */
178 	umem_error_buffer[bufsize] = 0;
179 
180 	if ((umem_error_begin % bufsize) == 0)
181 		mdb_printf("%s\n", umem_error_buffer);
182 	else {
183 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
184 		mdb_printf("%s%s\n",
185 		    &umem_error_buffer[umem_error_begin % bufsize],
186 		    umem_error_buffer);
187 	}
188 
189 	return (DCMD_OK);
190 }
191 
192 static void
193 umem_log_status(const char *name, umem_log_header_t *val)
194 {
195 	umem_log_header_t my_lh;
196 	uintptr_t pos = (uintptr_t)val;
197 	size_t size;
198 
199 	if (pos == NULL)
200 		return;
201 
202 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
203 		mdb_warn("\nunable to read umem_%s_log pointer %p",
204 		    name, pos);
205 		return;
206 	}
207 
208 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
209 
210 	if (size % (1024 * 1024) == 0)
211 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
212 	else if (size % 1024 == 0)
213 		mdb_printf("%s=%dk ", name, size / 1024);
214 	else
215 		mdb_printf("%s=%d ", name, size);
216 }
217 
218 typedef struct umem_debug_flags {
219 	const char	*udf_name;
220 	uint_t		udf_flags;
221 	uint_t		udf_clear;	/* if 0, uses udf_flags */
222 } umem_debug_flags_t;
223 
224 umem_debug_flags_t umem_status_flags[] = {
225 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
226 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
227 	{ "audit",	UMF_AUDIT },
228 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
229 	{ "nosignal",	UMF_CHECKSIGNAL },
230 	{ "firewall",	UMF_FIREWALL },
231 	{ "lite",	UMF_LITE },
232 	{ NULL }
233 };
234 
235 /*ARGSUSED*/
236 int
237 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
238 {
239 	int umem_logging;
240 
241 	umem_log_header_t *umem_transaction_log;
242 	umem_log_header_t *umem_content_log;
243 	umem_log_header_t *umem_failure_log;
244 	umem_log_header_t *umem_slab_log;
245 
246 	mdb_printf("Status:\t\t%s\n",
247 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
248 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
249 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
250 	    umem_ready == UMEM_READY ? "ready and active" :
251 	    umem_ready == 0 ? "not loaded into address space" :
252 	    "unknown (umem_ready invalid)");
253 
254 	if (umem_ready == 0)
255 		return (DCMD_OK);
256 
257 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
258 
259 	if (UMEM_READVAR(umem_logging))
260 		goto err;
261 	if (UMEM_READVAR(umem_transaction_log))
262 		goto err;
263 	if (UMEM_READVAR(umem_content_log))
264 		goto err;
265 	if (UMEM_READVAR(umem_failure_log))
266 		goto err;
267 	if (UMEM_READVAR(umem_slab_log))
268 		goto err;
269 
270 	mdb_printf("Logs:\t\t");
271 	umem_log_status("transaction", umem_transaction_log);
272 	umem_log_status("content", umem_content_log);
273 	umem_log_status("fail", umem_failure_log);
274 	umem_log_status("slab", umem_slab_log);
275 	if (!umem_logging)
276 		mdb_printf("(inactive)");
277 	mdb_printf("\n");
278 
279 	mdb_printf("Message buffer:\n");
280 	return (umem_abort_messages());
281 
282 err:
283 	mdb_printf("Message buffer:\n");
284 	(void) umem_abort_messages();
285 	return (DCMD_ERR);
286 }
287 
288 typedef struct {
289 	uintptr_t ucw_first;
290 	uintptr_t ucw_current;
291 } umem_cache_walk_t;
292 
293 int
294 umem_cache_walk_init(mdb_walk_state_t *wsp)
295 {
296 	umem_cache_walk_t *ucw;
297 	umem_cache_t c;
298 	uintptr_t cp;
299 	GElf_Sym sym;
300 
301 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
302 		mdb_warn("couldn't find umem_null_cache");
303 		return (WALK_ERR);
304 	}
305 
306 	cp = (uintptr_t)sym.st_value;
307 
308 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
309 		mdb_warn("couldn't read cache at %p", cp);
310 		return (WALK_ERR);
311 	}
312 
313 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
314 
315 	ucw->ucw_first = cp;
316 	ucw->ucw_current = (uintptr_t)c.cache_next;
317 	wsp->walk_data = ucw;
318 
319 	return (WALK_NEXT);
320 }
321 
322 int
323 umem_cache_walk_step(mdb_walk_state_t *wsp)
324 {
325 	umem_cache_walk_t *ucw = wsp->walk_data;
326 	umem_cache_t c;
327 	int status;
328 
329 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
330 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
331 		return (WALK_DONE);
332 	}
333 
334 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
335 
336 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
337 		return (WALK_DONE);
338 
339 	return (status);
340 }
341 
342 void
343 umem_cache_walk_fini(mdb_walk_state_t *wsp)
344 {
345 	umem_cache_walk_t *ucw = wsp->walk_data;
346 	mdb_free(ucw, sizeof (umem_cache_walk_t));
347 }
348 
349 typedef struct {
350 	umem_cpu_t *ucw_cpus;
351 	uint32_t ucw_current;
352 	uint32_t ucw_max;
353 } umem_cpu_walk_state_t;
354 
355 int
356 umem_cpu_walk_init(mdb_walk_state_t *wsp)
357 {
358 	umem_cpu_t *umem_cpus;
359 
360 	umem_cpu_walk_state_t *ucw;
361 
362 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
363 		mdb_warn("failed to read 'umem_cpus'");
364 		return (WALK_ERR);
365 	}
366 
367 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
368 
369 	ucw->ucw_cpus = umem_cpus;
370 	ucw->ucw_current = 0;
371 	ucw->ucw_max = umem_max_ncpus;
372 
373 	wsp->walk_data = ucw;
374 	return (WALK_NEXT);
375 }
376 
377 int
378 umem_cpu_walk_step(mdb_walk_state_t *wsp)
379 {
380 	umem_cpu_t cpu;
381 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
382 
383 	uintptr_t caddr;
384 
385 	if (ucw->ucw_current >= ucw->ucw_max)
386 		return (WALK_DONE);
387 
388 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
389 
390 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
391 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
392 		return (WALK_ERR);
393 	}
394 
395 	ucw->ucw_current++;
396 
397 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
398 }
399 
400 void
401 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
402 {
403 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
404 
405 	mdb_free(ucw, sizeof (*ucw));
406 }
407 
408 int
409 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
410 {
411 	if (wsp->walk_addr == NULL) {
412 		mdb_warn("umem_cpu_cache doesn't support global walks");
413 		return (WALK_ERR);
414 	}
415 
416 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
417 		mdb_warn("couldn't walk 'umem_cpu'");
418 		return (WALK_ERR);
419 	}
420 
421 	wsp->walk_data = (void *)wsp->walk_addr;
422 
423 	return (WALK_NEXT);
424 }
425 
426 int
427 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
428 {
429 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
430 	const umem_cpu_t *cpu = wsp->walk_layer;
431 	umem_cpu_cache_t cc;
432 
433 	caddr += cpu->cpu_cache_offset;
434 
435 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
436 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
437 		return (WALK_ERR);
438 	}
439 
440 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
441 }
442 
443 int
444 umem_slab_walk_init(mdb_walk_state_t *wsp)
445 {
446 	uintptr_t caddr = wsp->walk_addr;
447 	umem_cache_t c;
448 
449 	if (caddr == NULL) {
450 		mdb_warn("umem_slab doesn't support global walks\n");
451 		return (WALK_ERR);
452 	}
453 
454 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
455 		mdb_warn("couldn't read umem_cache at %p", caddr);
456 		return (WALK_ERR);
457 	}
458 
459 	wsp->walk_data =
460 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
461 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
462 
463 	return (WALK_NEXT);
464 }
465 
466 int
467 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
468 {
469 	uintptr_t caddr = wsp->walk_addr;
470 	umem_cache_t c;
471 
472 	if (caddr == NULL) {
473 		mdb_warn("umem_slab_partial doesn't support global walks\n");
474 		return (WALK_ERR);
475 	}
476 
477 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
478 		mdb_warn("couldn't read umem_cache at %p", caddr);
479 		return (WALK_ERR);
480 	}
481 
482 	wsp->walk_data =
483 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
484 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
485 
486 	/*
487 	 * Some consumers (umem_walk_step(), in particular) require at
488 	 * least one callback if there are any buffers in the cache.  So
489 	 * if there are *no* partial slabs, report the last full slab, if
490 	 * any.
491 	 *
492 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
493 	 */
494 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
495 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
496 
497 	return (WALK_NEXT);
498 }
499 
500 int
501 umem_slab_walk_step(mdb_walk_state_t *wsp)
502 {
503 	umem_slab_t s;
504 	uintptr_t addr = wsp->walk_addr;
505 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
506 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
507 
508 	if (addr == saddr)
509 		return (WALK_DONE);
510 
511 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
512 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
513 		return (WALK_ERR);
514 	}
515 
516 	if ((uintptr_t)s.slab_cache != caddr) {
517 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
518 		    addr, caddr, s.slab_cache);
519 		return (WALK_ERR);
520 	}
521 
522 	wsp->walk_addr = (uintptr_t)s.slab_next;
523 
524 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
525 }
526 
527 int
528 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
529 {
530 	umem_cache_t c;
531 
532 	if (!(flags & DCMD_ADDRSPEC)) {
533 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
534 			mdb_warn("can't walk umem_cache");
535 			return (DCMD_ERR);
536 		}
537 		return (DCMD_OK);
538 	}
539 
540 	if (DCMD_HDRSPEC(flags))
541 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
542 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
543 
544 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
545 		mdb_warn("couldn't read umem_cache at %p", addr);
546 		return (DCMD_ERR);
547 	}
548 
549 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
550 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
551 
552 	return (DCMD_OK);
553 }
554 
555 static int
556 addrcmp(const void *lhs, const void *rhs)
557 {
558 	uintptr_t p1 = *((uintptr_t *)lhs);
559 	uintptr_t p2 = *((uintptr_t *)rhs);
560 
561 	if (p1 < p2)
562 		return (-1);
563 	if (p1 > p2)
564 		return (1);
565 	return (0);
566 }
567 
568 static int
569 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
570 {
571 	const umem_bufctl_audit_t *bcp1 = *lhs;
572 	const umem_bufctl_audit_t *bcp2 = *rhs;
573 
574 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
575 		return (-1);
576 
577 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
578 		return (1);
579 
580 	return (0);
581 }
582 
583 typedef struct umem_hash_walk {
584 	uintptr_t *umhw_table;
585 	size_t umhw_nelems;
586 	size_t umhw_pos;
587 	umem_bufctl_t umhw_cur;
588 } umem_hash_walk_t;
589 
590 int
591 umem_hash_walk_init(mdb_walk_state_t *wsp)
592 {
593 	umem_hash_walk_t *umhw;
594 	uintptr_t *hash;
595 	umem_cache_t c;
596 	uintptr_t haddr, addr = wsp->walk_addr;
597 	size_t nelems;
598 	size_t hsize;
599 
600 	if (addr == NULL) {
601 		mdb_warn("umem_hash doesn't support global walks\n");
602 		return (WALK_ERR);
603 	}
604 
605 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
606 		mdb_warn("couldn't read cache at addr %p", addr);
607 		return (WALK_ERR);
608 	}
609 
610 	if (!(c.cache_flags & UMF_HASH)) {
611 		mdb_warn("cache %p doesn't have a hash table\n", addr);
612 		return (WALK_DONE);		/* nothing to do */
613 	}
614 
615 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
616 	umhw->umhw_cur.bc_next = NULL;
617 	umhw->umhw_pos = 0;
618 
619 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
620 	hsize = nelems * sizeof (uintptr_t);
621 	haddr = (uintptr_t)c.cache_hash_table;
622 
623 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
624 	if (mdb_vread(hash, hsize, haddr) == -1) {
625 		mdb_warn("failed to read hash table at %p", haddr);
626 		mdb_free(hash, hsize);
627 		mdb_free(umhw, sizeof (umem_hash_walk_t));
628 		return (WALK_ERR);
629 	}
630 
631 	wsp->walk_data = umhw;
632 
633 	return (WALK_NEXT);
634 }
635 
636 int
637 umem_hash_walk_step(mdb_walk_state_t *wsp)
638 {
639 	umem_hash_walk_t *umhw = wsp->walk_data;
640 	uintptr_t addr = NULL;
641 
642 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
643 		while (umhw->umhw_pos < umhw->umhw_nelems) {
644 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
645 				break;
646 		}
647 	}
648 	if (addr == NULL)
649 		return (WALK_DONE);
650 
651 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
652 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
653 		return (WALK_ERR);
654 	}
655 
656 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
657 }
658 
659 void
660 umem_hash_walk_fini(mdb_walk_state_t *wsp)
661 {
662 	umem_hash_walk_t *umhw = wsp->walk_data;
663 
664 	if (umhw == NULL)
665 		return;
666 
667 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
668 	mdb_free(umhw, sizeof (umem_hash_walk_t));
669 }
670 
671 /*
672  * Find the address of the bufctl structure for the address 'buf' in cache
673  * 'cp', which is at address caddr, and place it in *out.
674  */
675 static int
676 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
677 {
678 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
679 	umem_bufctl_t *bcp;
680 	umem_bufctl_t bc;
681 
682 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
683 		mdb_warn("unable to read hash bucket for %p in cache %p",
684 		    buf, caddr);
685 		return (-1);
686 	}
687 
688 	while (bcp != NULL) {
689 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
690 		    (uintptr_t)bcp) == -1) {
691 			mdb_warn("unable to read bufctl at %p", bcp);
692 			return (-1);
693 		}
694 		if (bc.bc_addr == buf) {
695 			*out = (uintptr_t)bcp;
696 			return (0);
697 		}
698 		bcp = bc.bc_next;
699 	}
700 
701 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
702 	return (-1);
703 }
704 
705 int
706 umem_get_magsize(const umem_cache_t *cp)
707 {
708 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
709 	GElf_Sym mt_sym;
710 	umem_magtype_t mt;
711 	int res;
712 
713 	/*
714 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
715 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
716 	 * it is okay to return 0 for them.
717 	 */
718 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
719 	    (cp->cache_flags & UMF_NOMAGAZINE))
720 		return (res);
721 
722 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
723 		mdb_warn("unable to read 'umem_magtype'");
724 	} else if (addr < mt_sym.st_value ||
725 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
726 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
727 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
728 		    cp->cache_name, addr);
729 		return (0);
730 	}
731 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
732 		mdb_warn("unable to read magtype at %a", addr);
733 		return (0);
734 	}
735 	return (mt.mt_magsize);
736 }
737 
738 /*ARGSUSED*/
739 static int
740 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
741 {
742 	*est -= (sp->slab_chunks - sp->slab_refcnt);
743 
744 	return (WALK_NEXT);
745 }
746 
747 /*
748  * Returns an upper bound on the number of allocated buffers in a given
749  * cache.
750  */
751 size_t
752 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
753 {
754 	int magsize;
755 	size_t cache_est;
756 
757 	cache_est = cp->cache_buftotal;
758 
759 	(void) mdb_pwalk("umem_slab_partial",
760 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
761 
762 	if ((magsize = umem_get_magsize(cp)) != 0) {
763 		size_t mag_est = cp->cache_full.ml_total * magsize;
764 
765 		if (cache_est >= mag_est) {
766 			cache_est -= mag_est;
767 		} else {
768 			mdb_warn("cache %p's magazine layer holds more buffers "
769 			    "than the slab layer.\n", addr);
770 		}
771 	}
772 	return (cache_est);
773 }
774 
775 #define	READMAG_ROUNDS(rounds) { \
776 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
777 		mdb_warn("couldn't read magazine at %p", ump); \
778 		goto fail; \
779 	} \
780 	for (i = 0; i < rounds; i++) { \
781 		maglist[magcnt++] = mp->mag_round[i]; \
782 		if (magcnt == magmax) { \
783 			mdb_warn("%d magazines exceeds fudge factor\n", \
784 			    magcnt); \
785 			goto fail; \
786 		} \
787 	} \
788 }
789 
790 int
791 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
792     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
793 {
794 	umem_magazine_t *ump, *mp;
795 	void **maglist = NULL;
796 	int i, cpu;
797 	size_t magsize, magmax, magbsize;
798 	size_t magcnt = 0;
799 
800 	/*
801 	 * Read the magtype out of the cache, after verifying the pointer's
802 	 * correctness.
803 	 */
804 	magsize = umem_get_magsize(cp);
805 	if (magsize == 0) {
806 		*maglistp = NULL;
807 		*magcntp = 0;
808 		*magmaxp = 0;
809 		return (WALK_NEXT);
810 	}
811 
812 	/*
813 	 * There are several places where we need to go buffer hunting:
814 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
815 	 * and the full magazine list in the depot.
816 	 *
817 	 * For an upper bound on the number of buffers in the magazine
818 	 * layer, we have the number of magazines on the cache_full
819 	 * list plus at most two magazines per CPU (the loaded and the
820 	 * spare).  Toss in 100 magazines as a fudge factor in case this
821 	 * is live (the number "100" comes from the same fudge factor in
822 	 * crash(1M)).
823 	 */
824 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
825 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
826 
827 	if (magbsize >= PAGESIZE / 2) {
828 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
829 		    addr, magbsize);
830 		return (WALK_ERR);
831 	}
832 
833 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
834 	mp = mdb_alloc(magbsize, alloc_flags);
835 	if (mp == NULL || maglist == NULL)
836 		goto fail;
837 
838 	/*
839 	 * First up: the magazines in the depot (i.e. on the cache_full list).
840 	 */
841 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
842 		READMAG_ROUNDS(magsize);
843 		ump = mp->mag_next;
844 
845 		if (ump == cp->cache_full.ml_list)
846 			break; /* cache_full list loop detected */
847 	}
848 
849 	dprintf(("cache_full list done\n"));
850 
851 	/*
852 	 * Now whip through the CPUs, snagging the loaded magazines
853 	 * and full spares.
854 	 */
855 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
856 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
857 
858 		dprintf(("reading cpu cache %p\n",
859 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
860 
861 		if (ccp->cc_rounds > 0 &&
862 		    (ump = ccp->cc_loaded) != NULL) {
863 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
864 			READMAG_ROUNDS(ccp->cc_rounds);
865 		}
866 
867 		if (ccp->cc_prounds > 0 &&
868 		    (ump = ccp->cc_ploaded) != NULL) {
869 			dprintf(("reading %d previously loaded rounds\n",
870 			    ccp->cc_prounds));
871 			READMAG_ROUNDS(ccp->cc_prounds);
872 		}
873 	}
874 
875 	dprintf(("magazine layer: %d buffers\n", magcnt));
876 
877 	if (!(alloc_flags & UM_GC))
878 		mdb_free(mp, magbsize);
879 
880 	*maglistp = maglist;
881 	*magcntp = magcnt;
882 	*magmaxp = magmax;
883 
884 	return (WALK_NEXT);
885 
886 fail:
887 	if (!(alloc_flags & UM_GC)) {
888 		if (mp)
889 			mdb_free(mp, magbsize);
890 		if (maglist)
891 			mdb_free(maglist, magmax * sizeof (void *));
892 	}
893 	return (WALK_ERR);
894 }
895 
896 static int
897 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
898 {
899 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
900 }
901 
902 static int
903 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
904 {
905 	umem_bufctl_audit_t *b;
906 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
907 
908 	/*
909 	 * if UMF_AUDIT is not set, we know that we're looking at a
910 	 * umem_bufctl_t.
911 	 */
912 	if (!(cp->cache_flags & UMF_AUDIT) ||
913 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
914 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
915 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
916 			mdb_warn("unable to read bufctl at %p", buf);
917 			return (WALK_ERR);
918 		}
919 	}
920 
921 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
922 }
923 
924 typedef struct umem_walk {
925 	int umw_type;
926 
927 	uintptr_t umw_addr;		/* cache address */
928 	umem_cache_t *umw_cp;
929 	size_t umw_csize;
930 
931 	/*
932 	 * magazine layer
933 	 */
934 	void **umw_maglist;
935 	size_t umw_max;
936 	size_t umw_count;
937 	size_t umw_pos;
938 
939 	/*
940 	 * slab layer
941 	 */
942 	char *umw_valid;	/* to keep track of freed buffers */
943 	char *umw_ubase;	/* buffer for slab data */
944 } umem_walk_t;
945 
946 static int
947 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
948 {
949 	umem_walk_t *umw;
950 	int csize;
951 	umem_cache_t *cp;
952 	size_t vm_quantum;
953 
954 	size_t magmax, magcnt;
955 	void **maglist = NULL;
956 	uint_t chunksize, slabsize;
957 	int status = WALK_ERR;
958 	uintptr_t addr = wsp->walk_addr;
959 	const char *layered;
960 
961 	type &= ~UM_HASH;
962 
963 	if (addr == NULL) {
964 		mdb_warn("umem walk doesn't support global walks\n");
965 		return (WALK_ERR);
966 	}
967 
968 	dprintf(("walking %p\n", addr));
969 
970 	/*
971 	 * The number of "cpus" determines how large the cache is.
972 	 */
973 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
974 	cp = mdb_alloc(csize, UM_SLEEP);
975 
976 	if (mdb_vread(cp, csize, addr) == -1) {
977 		mdb_warn("couldn't read cache at addr %p", addr);
978 		goto out2;
979 	}
980 
981 	/*
982 	 * It's easy for someone to hand us an invalid cache address.
983 	 * Unfortunately, it is hard for this walker to survive an
984 	 * invalid cache cleanly.  So we make sure that:
985 	 *
986 	 *	1. the vmem arena for the cache is readable,
987 	 *	2. the vmem arena's quantum is a power of 2,
988 	 *	3. our slabsize is a multiple of the quantum, and
989 	 *	4. our chunksize is >0 and less than our slabsize.
990 	 */
991 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
992 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
993 	    vm_quantum == 0 ||
994 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
995 	    cp->cache_slabsize < vm_quantum ||
996 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
997 	    cp->cache_chunksize == 0 ||
998 	    cp->cache_chunksize > cp->cache_slabsize) {
999 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1000 		goto out2;
1001 	}
1002 
1003 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1004 
1005 	if (cp->cache_buftotal == 0) {
1006 		mdb_free(cp, csize);
1007 		return (WALK_DONE);
1008 	}
1009 
1010 	/*
1011 	 * If they ask for bufctls, but it's a small-slab cache,
1012 	 * there is nothing to report.
1013 	 */
1014 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1015 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1016 		    cp->cache_flags));
1017 		mdb_free(cp, csize);
1018 		return (WALK_DONE);
1019 	}
1020 
1021 	/*
1022 	 * Read in the contents of the magazine layer
1023 	 */
1024 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax,
1025 	    UM_SLEEP) == WALK_ERR)
1026 		goto out2;
1027 
1028 	/*
1029 	 * We have all of the buffers from the magazines;  if we are walking
1030 	 * allocated buffers, sort them so we can bsearch them later.
1031 	 */
1032 	if (type & UM_ALLOCATED)
1033 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1034 
1035 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1036 
1037 	umw->umw_type = type;
1038 	umw->umw_addr = addr;
1039 	umw->umw_cp = cp;
1040 	umw->umw_csize = csize;
1041 	umw->umw_maglist = maglist;
1042 	umw->umw_max = magmax;
1043 	umw->umw_count = magcnt;
1044 	umw->umw_pos = 0;
1045 
1046 	/*
1047 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1048 	 * hash table instead of the slab layer.
1049 	 */
1050 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1051 		layered = "umem_hash";
1052 
1053 		umw->umw_type |= UM_HASH;
1054 	} else {
1055 		/*
1056 		 * If we are walking freed buffers, we only need the
1057 		 * magazine layer plus the partially allocated slabs.
1058 		 * To walk allocated buffers, we need all of the slabs.
1059 		 */
1060 		if (type & UM_ALLOCATED)
1061 			layered = "umem_slab";
1062 		else
1063 			layered = "umem_slab_partial";
1064 
1065 		/*
1066 		 * for small-slab caches, we read in the entire slab.  For
1067 		 * freed buffers, we can just walk the freelist.  For
1068 		 * allocated buffers, we use a 'valid' array to track
1069 		 * the freed buffers.
1070 		 */
1071 		if (!(cp->cache_flags & UMF_HASH)) {
1072 			chunksize = cp->cache_chunksize;
1073 			slabsize = cp->cache_slabsize;
1074 
1075 			umw->umw_ubase = mdb_alloc(slabsize +
1076 			    sizeof (umem_bufctl_t), UM_SLEEP);
1077 
1078 			if (type & UM_ALLOCATED)
1079 				umw->umw_valid =
1080 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1081 		}
1082 	}
1083 
1084 	status = WALK_NEXT;
1085 
1086 	if (mdb_layered_walk(layered, wsp) == -1) {
1087 		mdb_warn("unable to start layered '%s' walk", layered);
1088 		status = WALK_ERR;
1089 	}
1090 
1091 out1:
1092 	if (status == WALK_ERR) {
1093 		if (umw->umw_valid)
1094 			mdb_free(umw->umw_valid, slabsize / chunksize);
1095 
1096 		if (umw->umw_ubase)
1097 			mdb_free(umw->umw_ubase, slabsize +
1098 			    sizeof (umem_bufctl_t));
1099 
1100 		if (umw->umw_maglist)
1101 			mdb_free(umw->umw_maglist, umw->umw_max *
1102 			    sizeof (uintptr_t));
1103 
1104 		mdb_free(umw, sizeof (umem_walk_t));
1105 		wsp->walk_data = NULL;
1106 	}
1107 
1108 out2:
1109 	if (status == WALK_ERR)
1110 		mdb_free(cp, csize);
1111 
1112 	return (status);
1113 }
1114 
1115 int
1116 umem_walk_step(mdb_walk_state_t *wsp)
1117 {
1118 	umem_walk_t *umw = wsp->walk_data;
1119 	int type = umw->umw_type;
1120 	umem_cache_t *cp = umw->umw_cp;
1121 
1122 	void **maglist = umw->umw_maglist;
1123 	int magcnt = umw->umw_count;
1124 
1125 	uintptr_t chunksize, slabsize;
1126 	uintptr_t addr;
1127 	const umem_slab_t *sp;
1128 	const umem_bufctl_t *bcp;
1129 	umem_bufctl_t bc;
1130 
1131 	int chunks;
1132 	char *kbase;
1133 	void *buf;
1134 	int i, ret;
1135 
1136 	char *valid, *ubase;
1137 
1138 	/*
1139 	 * first, handle the 'umem_hash' layered walk case
1140 	 */
1141 	if (type & UM_HASH) {
1142 		/*
1143 		 * We have a buffer which has been allocated out of the
1144 		 * global layer. We need to make sure that it's not
1145 		 * actually sitting in a magazine before we report it as
1146 		 * an allocated buffer.
1147 		 */
1148 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1149 
1150 		if (magcnt > 0 &&
1151 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1152 		    addrcmp) != NULL)
1153 			return (WALK_NEXT);
1154 
1155 		if (type & UM_BUFCTL)
1156 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1157 
1158 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1159 	}
1160 
1161 	ret = WALK_NEXT;
1162 
1163 	addr = umw->umw_addr;
1164 
1165 	/*
1166 	 * If we're walking freed buffers, report everything in the
1167 	 * magazine layer before processing the first slab.
1168 	 */
1169 	if ((type & UM_FREE) && magcnt != 0) {
1170 		umw->umw_count = 0;		/* only do this once */
1171 		for (i = 0; i < magcnt; i++) {
1172 			buf = maglist[i];
1173 
1174 			if (type & UM_BUFCTL) {
1175 				uintptr_t out;
1176 
1177 				if (cp->cache_flags & UMF_BUFTAG) {
1178 					umem_buftag_t *btp;
1179 					umem_buftag_t tag;
1180 
1181 					/* LINTED - alignment */
1182 					btp = UMEM_BUFTAG(cp, buf);
1183 					if (mdb_vread(&tag, sizeof (tag),
1184 					    (uintptr_t)btp) == -1) {
1185 						mdb_warn("reading buftag for "
1186 						    "%p at %p", buf, btp);
1187 						continue;
1188 					}
1189 					out = (uintptr_t)tag.bt_bufctl;
1190 				} else {
1191 					if (umem_hash_lookup(cp, addr, buf,
1192 					    &out) == -1)
1193 						continue;
1194 				}
1195 				ret = bufctl_walk_callback(cp, wsp, out);
1196 			} else {
1197 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1198 			}
1199 
1200 			if (ret != WALK_NEXT)
1201 				return (ret);
1202 		}
1203 	}
1204 
1205 	/*
1206 	 * Handle the buffers in the current slab
1207 	 */
1208 	chunksize = cp->cache_chunksize;
1209 	slabsize = cp->cache_slabsize;
1210 
1211 	sp = wsp->walk_layer;
1212 	chunks = sp->slab_chunks;
1213 	kbase = sp->slab_base;
1214 
1215 	dprintf(("kbase is %p\n", kbase));
1216 
1217 	if (!(cp->cache_flags & UMF_HASH)) {
1218 		valid = umw->umw_valid;
1219 		ubase = umw->umw_ubase;
1220 
1221 		if (mdb_vread(ubase, chunks * chunksize,
1222 		    (uintptr_t)kbase) == -1) {
1223 			mdb_warn("failed to read slab contents at %p", kbase);
1224 			return (WALK_ERR);
1225 		}
1226 
1227 		/*
1228 		 * Set up the valid map as fully allocated -- we'll punch
1229 		 * out the freelist.
1230 		 */
1231 		if (type & UM_ALLOCATED)
1232 			(void) memset(valid, 1, chunks);
1233 	} else {
1234 		valid = NULL;
1235 		ubase = NULL;
1236 	}
1237 
1238 	/*
1239 	 * walk the slab's freelist
1240 	 */
1241 	bcp = sp->slab_head;
1242 
1243 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1244 
1245 	/*
1246 	 * since we could be in the middle of allocating a buffer,
1247 	 * our refcnt could be one higher than it aught.  So we
1248 	 * check one further on the freelist than the count allows.
1249 	 */
1250 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1251 		uint_t ndx;
1252 
1253 		dprintf(("bcp is %p\n", bcp));
1254 
1255 		if (bcp == NULL) {
1256 			if (i == chunks)
1257 				break;
1258 			mdb_warn(
1259 			    "slab %p in cache %p freelist too short by %d\n",
1260 			    sp, addr, chunks - i);
1261 			break;
1262 		}
1263 
1264 		if (cp->cache_flags & UMF_HASH) {
1265 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1266 				mdb_warn("failed to read bufctl ptr at %p",
1267 				    bcp);
1268 				break;
1269 			}
1270 			buf = bc.bc_addr;
1271 		} else {
1272 			/*
1273 			 * Otherwise the buffer is (or should be) in the slab
1274 			 * that we've read in; determine its offset in the
1275 			 * slab, validate that it's not corrupt, and add to
1276 			 * our base address to find the umem_bufctl_t.  (Note
1277 			 * that we don't need to add the size of the bufctl
1278 			 * to our offset calculation because of the slop that's
1279 			 * allocated for the buffer at ubase.)
1280 			 */
1281 			uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1282 
1283 			if (offs > chunks * chunksize) {
1284 				mdb_warn("found corrupt bufctl ptr %p"
1285 				    " in slab %p in cache %p\n", bcp,
1286 				    wsp->walk_addr, addr);
1287 				break;
1288 			}
1289 
1290 			bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1291 			buf = UMEM_BUF(cp, bcp);
1292 		}
1293 
1294 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1295 
1296 		if (ndx > slabsize / cp->cache_bufsize) {
1297 			/*
1298 			 * This is very wrong; we have managed to find
1299 			 * a buffer in the slab which shouldn't
1300 			 * actually be here.  Emit a warning, and
1301 			 * try to continue.
1302 			 */
1303 			mdb_warn("buf %p is out of range for "
1304 			    "slab %p, cache %p\n", buf, sp, addr);
1305 		} else if (type & UM_ALLOCATED) {
1306 			/*
1307 			 * we have found a buffer on the slab's freelist;
1308 			 * clear its entry
1309 			 */
1310 			valid[ndx] = 0;
1311 		} else {
1312 			/*
1313 			 * Report this freed buffer
1314 			 */
1315 			if (type & UM_BUFCTL) {
1316 				ret = bufctl_walk_callback(cp, wsp,
1317 				    (uintptr_t)bcp);
1318 			} else {
1319 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1320 			}
1321 			if (ret != WALK_NEXT)
1322 				return (ret);
1323 		}
1324 
1325 		bcp = bc.bc_next;
1326 	}
1327 
1328 	if (bcp != NULL) {
1329 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1330 		    sp, addr, bcp));
1331 	}
1332 
1333 	/*
1334 	 * If we are walking freed buffers, the loop above handled reporting
1335 	 * them.
1336 	 */
1337 	if (type & UM_FREE)
1338 		return (WALK_NEXT);
1339 
1340 	if (type & UM_BUFCTL) {
1341 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1342 		    "cache %p\n", addr);
1343 		return (WALK_ERR);
1344 	}
1345 
1346 	/*
1347 	 * Report allocated buffers, skipping buffers in the magazine layer.
1348 	 * We only get this far for small-slab caches.
1349 	 */
1350 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1351 		buf = (char *)kbase + i * chunksize;
1352 
1353 		if (!valid[i])
1354 			continue;		/* on slab freelist */
1355 
1356 		if (magcnt > 0 &&
1357 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1358 		    addrcmp) != NULL)
1359 			continue;		/* in magazine layer */
1360 
1361 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1362 	}
1363 	return (ret);
1364 }
1365 
1366 void
1367 umem_walk_fini(mdb_walk_state_t *wsp)
1368 {
1369 	umem_walk_t *umw = wsp->walk_data;
1370 	uintptr_t chunksize;
1371 	uintptr_t slabsize;
1372 
1373 	if (umw == NULL)
1374 		return;
1375 
1376 	if (umw->umw_maglist != NULL)
1377 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1378 
1379 	chunksize = umw->umw_cp->cache_chunksize;
1380 	slabsize = umw->umw_cp->cache_slabsize;
1381 
1382 	if (umw->umw_valid != NULL)
1383 		mdb_free(umw->umw_valid, slabsize / chunksize);
1384 	if (umw->umw_ubase != NULL)
1385 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1386 
1387 	mdb_free(umw->umw_cp, umw->umw_csize);
1388 	mdb_free(umw, sizeof (umem_walk_t));
1389 }
1390 
1391 /*ARGSUSED*/
1392 static int
1393 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1394 {
1395 	/*
1396 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1397 	 * memory in other caches.  This can be a little confusing, so we
1398 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1399 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1400 	 */
1401 	if (c->cache_cflags & UMC_NOTOUCH)
1402 		return (WALK_NEXT);
1403 
1404 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1405 	    wsp->walk_cbdata, addr) == -1)
1406 		return (WALK_DONE);
1407 
1408 	return (WALK_NEXT);
1409 }
1410 
1411 #define	UMEM_WALK_ALL(name, wsp) { \
1412 	wsp->walk_data = (name); \
1413 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1414 		return (WALK_ERR); \
1415 	return (WALK_DONE); \
1416 }
1417 
1418 int
1419 umem_walk_init(mdb_walk_state_t *wsp)
1420 {
1421 	if (wsp->walk_arg != NULL)
1422 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1423 
1424 	if (wsp->walk_addr == NULL)
1425 		UMEM_WALK_ALL("umem", wsp);
1426 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1427 }
1428 
1429 int
1430 bufctl_walk_init(mdb_walk_state_t *wsp)
1431 {
1432 	if (wsp->walk_addr == NULL)
1433 		UMEM_WALK_ALL("bufctl", wsp);
1434 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1435 }
1436 
1437 int
1438 freemem_walk_init(mdb_walk_state_t *wsp)
1439 {
1440 	if (wsp->walk_addr == NULL)
1441 		UMEM_WALK_ALL("freemem", wsp);
1442 	return (umem_walk_init_common(wsp, UM_FREE));
1443 }
1444 
1445 int
1446 freectl_walk_init(mdb_walk_state_t *wsp)
1447 {
1448 	if (wsp->walk_addr == NULL)
1449 		UMEM_WALK_ALL("freectl", wsp);
1450 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1451 }
1452 
1453 typedef struct bufctl_history_walk {
1454 	void		*bhw_next;
1455 	umem_cache_t	*bhw_cache;
1456 	umem_slab_t	*bhw_slab;
1457 	hrtime_t	bhw_timestamp;
1458 } bufctl_history_walk_t;
1459 
1460 int
1461 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1462 {
1463 	bufctl_history_walk_t *bhw;
1464 	umem_bufctl_audit_t bc;
1465 	umem_bufctl_audit_t bcn;
1466 
1467 	if (wsp->walk_addr == NULL) {
1468 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1469 		return (WALK_ERR);
1470 	}
1471 
1472 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1473 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1474 		return (WALK_ERR);
1475 	}
1476 
1477 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1478 	bhw->bhw_timestamp = 0;
1479 	bhw->bhw_cache = bc.bc_cache;
1480 	bhw->bhw_slab = bc.bc_slab;
1481 
1482 	/*
1483 	 * sometimes the first log entry matches the base bufctl;  in that
1484 	 * case, skip the base bufctl.
1485 	 */
1486 	if (bc.bc_lastlog != NULL &&
1487 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1488 	    bc.bc_addr == bcn.bc_addr &&
1489 	    bc.bc_cache == bcn.bc_cache &&
1490 	    bc.bc_slab == bcn.bc_slab &&
1491 	    bc.bc_timestamp == bcn.bc_timestamp &&
1492 	    bc.bc_thread == bcn.bc_thread)
1493 		bhw->bhw_next = bc.bc_lastlog;
1494 	else
1495 		bhw->bhw_next = (void *)wsp->walk_addr;
1496 
1497 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1498 	wsp->walk_data = bhw;
1499 
1500 	return (WALK_NEXT);
1501 }
1502 
1503 int
1504 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1505 {
1506 	bufctl_history_walk_t *bhw = wsp->walk_data;
1507 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1508 	uintptr_t baseaddr = wsp->walk_addr;
1509 	umem_bufctl_audit_t *b;
1510 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1511 
1512 	if (addr == NULL)
1513 		return (WALK_DONE);
1514 
1515 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1516 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1517 		return (WALK_ERR);
1518 	}
1519 
1520 	/*
1521 	 * The bufctl is only valid if the address, cache, and slab are
1522 	 * correct.  We also check that the timestamp is decreasing, to
1523 	 * prevent infinite loops.
1524 	 */
1525 	if ((uintptr_t)b->bc_addr != baseaddr ||
1526 	    b->bc_cache != bhw->bhw_cache ||
1527 	    b->bc_slab != bhw->bhw_slab ||
1528 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1529 		return (WALK_DONE);
1530 
1531 	bhw->bhw_next = b->bc_lastlog;
1532 	bhw->bhw_timestamp = b->bc_timestamp;
1533 
1534 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1535 }
1536 
1537 void
1538 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1539 {
1540 	bufctl_history_walk_t *bhw = wsp->walk_data;
1541 
1542 	mdb_free(bhw, sizeof (*bhw));
1543 }
1544 
1545 typedef struct umem_log_walk {
1546 	umem_bufctl_audit_t *ulw_base;
1547 	umem_bufctl_audit_t **ulw_sorted;
1548 	umem_log_header_t ulw_lh;
1549 	size_t ulw_size;
1550 	size_t ulw_maxndx;
1551 	size_t ulw_ndx;
1552 } umem_log_walk_t;
1553 
1554 int
1555 umem_log_walk_init(mdb_walk_state_t *wsp)
1556 {
1557 	uintptr_t lp = wsp->walk_addr;
1558 	umem_log_walk_t *ulw;
1559 	umem_log_header_t *lhp;
1560 	int maxndx, i, j, k;
1561 
1562 	/*
1563 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1564 	 * read the log whose umem_log_header_t is stored at walk_addr.
1565 	 */
1566 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1567 		mdb_warn("failed to read 'umem_transaction_log'");
1568 		return (WALK_ERR);
1569 	}
1570 
1571 	if (lp == NULL) {
1572 		mdb_warn("log is disabled\n");
1573 		return (WALK_ERR);
1574 	}
1575 
1576 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1577 	lhp = &ulw->ulw_lh;
1578 
1579 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1580 		mdb_warn("failed to read log header at %p", lp);
1581 		mdb_free(ulw, sizeof (umem_log_walk_t));
1582 		return (WALK_ERR);
1583 	}
1584 
1585 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1586 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1587 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1588 
1589 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1590 	    (uintptr_t)lhp->lh_base) == -1) {
1591 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1592 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1593 		mdb_free(ulw, sizeof (umem_log_walk_t));
1594 		return (WALK_ERR);
1595 	}
1596 
1597 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1598 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1599 
1600 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1601 		caddr_t chunk = (caddr_t)
1602 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1603 
1604 		for (j = 0; j < maxndx; j++) {
1605 			/* LINTED align */
1606 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1607 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1608 		}
1609 	}
1610 
1611 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1612 	    (int(*)(const void *, const void *))bufctlcmp);
1613 
1614 	ulw->ulw_maxndx = k;
1615 	wsp->walk_data = ulw;
1616 
1617 	return (WALK_NEXT);
1618 }
1619 
1620 int
1621 umem_log_walk_step(mdb_walk_state_t *wsp)
1622 {
1623 	umem_log_walk_t *ulw = wsp->walk_data;
1624 	umem_bufctl_audit_t *bcp;
1625 
1626 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1627 		return (WALK_DONE);
1628 
1629 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1630 
1631 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1632 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1633 }
1634 
1635 void
1636 umem_log_walk_fini(mdb_walk_state_t *wsp)
1637 {
1638 	umem_log_walk_t *ulw = wsp->walk_data;
1639 
1640 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1641 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1642 	    sizeof (umem_bufctl_audit_t *));
1643 	mdb_free(ulw, sizeof (umem_log_walk_t));
1644 }
1645 
1646 typedef struct allocdby_bufctl {
1647 	uintptr_t abb_addr;
1648 	hrtime_t abb_ts;
1649 } allocdby_bufctl_t;
1650 
1651 typedef struct allocdby_walk {
1652 	const char *abw_walk;
1653 	uintptr_t abw_thread;
1654 	size_t abw_nbufs;
1655 	size_t abw_size;
1656 	allocdby_bufctl_t *abw_buf;
1657 	size_t abw_ndx;
1658 } allocdby_walk_t;
1659 
1660 int
1661 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1662     allocdby_walk_t *abw)
1663 {
1664 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1665 		return (WALK_NEXT);
1666 
1667 	if (abw->abw_nbufs == abw->abw_size) {
1668 		allocdby_bufctl_t *buf;
1669 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1670 
1671 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1672 
1673 		bcopy(abw->abw_buf, buf, oldsize);
1674 		mdb_free(abw->abw_buf, oldsize);
1675 
1676 		abw->abw_size <<= 1;
1677 		abw->abw_buf = buf;
1678 	}
1679 
1680 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1681 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1682 	abw->abw_nbufs++;
1683 
1684 	return (WALK_NEXT);
1685 }
1686 
1687 /*ARGSUSED*/
1688 int
1689 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1690 {
1691 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1692 	    abw, addr) == -1) {
1693 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1694 		return (WALK_DONE);
1695 	}
1696 
1697 	return (WALK_NEXT);
1698 }
1699 
1700 static int
1701 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1702 {
1703 	if (lhs->abb_ts < rhs->abb_ts)
1704 		return (1);
1705 	if (lhs->abb_ts > rhs->abb_ts)
1706 		return (-1);
1707 	return (0);
1708 }
1709 
1710 static int
1711 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1712 {
1713 	allocdby_walk_t *abw;
1714 
1715 	if (wsp->walk_addr == NULL) {
1716 		mdb_warn("allocdby walk doesn't support global walks\n");
1717 		return (WALK_ERR);
1718 	}
1719 
1720 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1721 
1722 	abw->abw_thread = wsp->walk_addr;
1723 	abw->abw_walk = walk;
1724 	abw->abw_size = 128;	/* something reasonable */
1725 	abw->abw_buf =
1726 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1727 
1728 	wsp->walk_data = abw;
1729 
1730 	if (mdb_walk("umem_cache",
1731 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1732 		mdb_warn("couldn't walk umem_cache");
1733 		allocdby_walk_fini(wsp);
1734 		return (WALK_ERR);
1735 	}
1736 
1737 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1738 	    (int(*)(const void *, const void *))allocdby_cmp);
1739 
1740 	return (WALK_NEXT);
1741 }
1742 
1743 int
1744 allocdby_walk_init(mdb_walk_state_t *wsp)
1745 {
1746 	return (allocdby_walk_init_common(wsp, "bufctl"));
1747 }
1748 
1749 int
1750 freedby_walk_init(mdb_walk_state_t *wsp)
1751 {
1752 	return (allocdby_walk_init_common(wsp, "freectl"));
1753 }
1754 
1755 int
1756 allocdby_walk_step(mdb_walk_state_t *wsp)
1757 {
1758 	allocdby_walk_t *abw = wsp->walk_data;
1759 	uintptr_t addr;
1760 	umem_bufctl_audit_t *bcp;
1761 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1762 
1763 	if (abw->abw_ndx == abw->abw_nbufs)
1764 		return (WALK_DONE);
1765 
1766 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1767 
1768 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1769 		mdb_warn("couldn't read bufctl at %p", addr);
1770 		return (WALK_DONE);
1771 	}
1772 
1773 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1774 }
1775 
1776 void
1777 allocdby_walk_fini(mdb_walk_state_t *wsp)
1778 {
1779 	allocdby_walk_t *abw = wsp->walk_data;
1780 
1781 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1782 	mdb_free(abw, sizeof (allocdby_walk_t));
1783 }
1784 
1785 /*ARGSUSED*/
1786 int
1787 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1788 {
1789 	char c[MDB_SYM_NAMLEN];
1790 	GElf_Sym sym;
1791 	int i;
1792 
1793 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1794 	for (i = 0; i < bcp->bc_depth; i++) {
1795 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1796 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1797 			continue;
1798 		if (is_umem_sym(c, "umem_"))
1799 			continue;
1800 		mdb_printf("%s+0x%lx",
1801 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1802 		break;
1803 	}
1804 	mdb_printf("\n");
1805 
1806 	return (WALK_NEXT);
1807 }
1808 
1809 static int
1810 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1811 {
1812 	if (!(flags & DCMD_ADDRSPEC))
1813 		return (DCMD_USAGE);
1814 
1815 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1816 
1817 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1818 		mdb_warn("can't walk '%s' for %p", w, addr);
1819 		return (DCMD_ERR);
1820 	}
1821 
1822 	return (DCMD_OK);
1823 }
1824 
1825 /*ARGSUSED*/
1826 int
1827 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1828 {
1829 	return (allocdby_common(addr, flags, "allocdby"));
1830 }
1831 
1832 /*ARGSUSED*/
1833 int
1834 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1835 {
1836 	return (allocdby_common(addr, flags, "freedby"));
1837 }
1838 
1839 typedef struct whatis_info {
1840 	mdb_whatis_t *wi_w;
1841 	const umem_cache_t *wi_cache;
1842 	const vmem_t *wi_vmem;
1843 	vmem_t *wi_msb_arena;
1844 	size_t wi_slab_size;
1845 	int wi_slab_found;
1846 	uint_t wi_freemem;
1847 } whatis_info_t;
1848 
1849 /* call one of our dcmd functions with "-v" and the provided address */
1850 static void
1851 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
1852 {
1853 	mdb_arg_t a;
1854 	a.a_type = MDB_TYPE_STRING;
1855 	a.a_un.a_str = "-v";
1856 
1857 	mdb_printf(":\n");
1858 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
1859 }
1860 
1861 static void
1862 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
1863     uintptr_t baddr)
1864 {
1865 	mdb_whatis_t *w = wi->wi_w;
1866 	const umem_cache_t *cp = wi->wi_cache;
1867 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
1868 
1869 	int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
1870 
1871 	mdb_whatis_report_object(w, maddr, addr, "");
1872 
1873 	if (baddr != 0 && !call_printer)
1874 		mdb_printf("bufctl %p ", baddr);
1875 
1876 	mdb_printf("%s from %s",
1877 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
1878 
1879 	if (call_printer && baddr != 0) {
1880 		whatis_call_printer(bufctl, baddr);
1881 		return;
1882 	}
1883 	mdb_printf("\n");
1884 }
1885 
1886 /*ARGSUSED*/
1887 static int
1888 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
1889 {
1890 	mdb_whatis_t *w = wi->wi_w;
1891 
1892 	uintptr_t cur;
1893 	size_t size = wi->wi_cache->cache_bufsize;
1894 
1895 	while (mdb_whatis_match(w, addr, size, &cur))
1896 		whatis_print_umem(wi, cur, addr, NULL);
1897 
1898 	return (WHATIS_WALKRET(w));
1899 }
1900 
1901 /*ARGSUSED*/
1902 static int
1903 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
1904 {
1905 	mdb_whatis_t *w = wi->wi_w;
1906 
1907 	uintptr_t cur;
1908 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
1909 	size_t size = wi->wi_cache->cache_bufsize;
1910 
1911 	while (mdb_whatis_match(w, addr, size, &cur))
1912 		whatis_print_umem(wi, cur, addr, baddr);
1913 
1914 	return (WHATIS_WALKRET(w));
1915 }
1916 
1917 
1918 static int
1919 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
1920 {
1921 	mdb_whatis_t *w = wi->wi_w;
1922 
1923 	size_t size = vs->vs_end - vs->vs_start;
1924 	uintptr_t cur;
1925 
1926 	/* We're not interested in anything but alloc and free segments */
1927 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
1928 		return (WALK_NEXT);
1929 
1930 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
1931 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
1932 
1933 		/*
1934 		 * If we're not printing it seperately, provide the vmem_seg
1935 		 * pointer if it has a stack trace.
1936 		 */
1937 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
1938 		    ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
1939 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
1940 			mdb_printf("vmem_seg %p ", addr);
1941 		}
1942 
1943 		mdb_printf("%s from %s vmem arena",
1944 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
1945 		    wi->wi_vmem->vm_name);
1946 
1947 		if (!mdb_whatis_flags(w) & WHATIS_QUIET)
1948 			whatis_call_printer(vmem_seg, addr);
1949 		else
1950 			mdb_printf("\n");
1951 	}
1952 
1953 	return (WHATIS_WALKRET(w));
1954 }
1955 
1956 static int
1957 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
1958 {
1959 	mdb_whatis_t *w = wi->wi_w;
1960 	const char *nm = vmem->vm_name;
1961 	wi->wi_vmem = vmem;
1962 
1963 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
1964 		mdb_printf("Searching vmem arena %s...\n", nm);
1965 
1966 	if (mdb_pwalk("vmem_seg",
1967 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
1968 		mdb_warn("can't walk vmem seg for %p", addr);
1969 		return (WALK_NEXT);
1970 	}
1971 
1972 	return (WHATIS_WALKRET(w));
1973 }
1974 
1975 /*ARGSUSED*/
1976 static int
1977 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
1978 {
1979 	mdb_whatis_t *w = wi->wi_w;
1980 
1981 	/* It must overlap with the slab data, or it's not interesting */
1982 	if (mdb_whatis_overlaps(w,
1983 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
1984 		wi->wi_slab_found++;
1985 		return (WALK_DONE);
1986 	}
1987 	return (WALK_NEXT);
1988 }
1989 
1990 static int
1991 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
1992 {
1993 	mdb_whatis_t *w = wi->wi_w;
1994 	char *walk, *freewalk;
1995 	mdb_walk_cb_t func;
1996 	int do_bufctl;
1997 
1998 	/* Override the '-b' flag as necessary */
1999 	if (!(c->cache_flags & UMF_HASH))
2000 		do_bufctl = FALSE;	/* no bufctls to walk */
2001 	else if (c->cache_flags & UMF_AUDIT)
2002 		do_bufctl = TRUE;	/* we always want debugging info */
2003 	else
2004 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2005 
2006 	if (do_bufctl) {
2007 		walk = "bufctl";
2008 		freewalk = "freectl";
2009 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2010 	} else {
2011 		walk = "umem";
2012 		freewalk = "freemem";
2013 		func = (mdb_walk_cb_t)whatis_walk_umem;
2014 	}
2015 
2016 	wi->wi_cache = c;
2017 
2018 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2019 		mdb_printf("Searching %s...\n", c->cache_name);
2020 
2021 	/*
2022 	 * If more then two buffers live on each slab, figure out if we're
2023 	 * interested in anything in any slab before doing the more expensive
2024 	 * umem/freemem (bufctl/freectl) walkers.
2025 	 */
2026 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2027 	if (!(c->cache_flags & UMF_HASH))
2028 		wi->wi_slab_size -= sizeof (umem_slab_t);
2029 
2030 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2031 		wi->wi_slab_found = 0;
2032 		if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2033 		    addr) == -1) {
2034 			mdb_warn("can't find umem_slab walker");
2035 			return (WALK_DONE);
2036 		}
2037 		if (wi->wi_slab_found == 0)
2038 			return (WALK_NEXT);
2039 	}
2040 
2041 	wi->wi_freemem = FALSE;
2042 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2043 		mdb_warn("can't find %s walker", walk);
2044 		return (WALK_DONE);
2045 	}
2046 
2047 	if (mdb_whatis_done(w))
2048 		return (WALK_DONE);
2049 
2050 	/*
2051 	 * We have searched for allocated memory; now search for freed memory.
2052 	 */
2053 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2054 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2055 
2056 	wi->wi_freemem = TRUE;
2057 
2058 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2059 		mdb_warn("can't find %s walker", freewalk);
2060 		return (WALK_DONE);
2061 	}
2062 
2063 	return (WHATIS_WALKRET(w));
2064 }
2065 
2066 static int
2067 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2068 {
2069 	if (c->cache_arena == wi->wi_msb_arena ||
2070 	    (c->cache_cflags & UMC_NOTOUCH))
2071 		return (WALK_NEXT);
2072 
2073 	return (whatis_walk_cache(addr, c, wi));
2074 }
2075 
2076 static int
2077 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2078 {
2079 	if (c->cache_arena != wi->wi_msb_arena)
2080 		return (WALK_NEXT);
2081 
2082 	return (whatis_walk_cache(addr, c, wi));
2083 }
2084 
2085 static int
2086 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2087 {
2088 	if (c->cache_arena == wi->wi_msb_arena ||
2089 	    !(c->cache_cflags & UMC_NOTOUCH))
2090 		return (WALK_NEXT);
2091 
2092 	return (whatis_walk_cache(addr, c, wi));
2093 }
2094 
2095 /*ARGSUSED*/
2096 static int
2097 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2098 {
2099 	whatis_info_t wi;
2100 
2101 	bzero(&wi, sizeof (wi));
2102 	wi.wi_w = w;
2103 
2104 	/* umem's metadata is allocated from the umem_internal_arena */
2105 	if (mdb_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2106 		mdb_warn("unable to readvar \"umem_internal_arena\"");
2107 
2108 	/*
2109 	 * We process umem caches in the following order:
2110 	 *
2111 	 *	non-UMC_NOTOUCH, non-metadata	(typically the most interesting)
2112 	 *	metadata			(can be huge with UMF_AUDIT)
2113 	 *	UMC_NOTOUCH, non-metadata	(see umem_walk_all())
2114 	 */
2115 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2116 	    &wi) == -1 ||
2117 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2118 	    &wi) == -1 ||
2119 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2120 	    &wi) == -1) {
2121 		mdb_warn("couldn't find umem_cache walker");
2122 		return (1);
2123 	}
2124 	return (0);
2125 }
2126 
2127 /*ARGSUSED*/
2128 static int
2129 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2130 {
2131 	whatis_info_t wi;
2132 
2133 	bzero(&wi, sizeof (wi));
2134 	wi.wi_w = w;
2135 
2136 	if (mdb_walk("vmem_postfix",
2137 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2138 		mdb_warn("couldn't find vmem_postfix walker");
2139 		return (1);
2140 	}
2141 	return (0);
2142 }
2143 
2144 int
2145 umem_init(void)
2146 {
2147 	mdb_walker_t w = {
2148 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
2149 		umem_cache_walk_step, umem_cache_walk_fini
2150 	};
2151 
2152 	if (mdb_add_walker(&w) == -1) {
2153 		mdb_warn("failed to add umem_cache walker");
2154 		return (-1);
2155 	}
2156 
2157 	if (umem_update_variables() == -1)
2158 		return (-1);
2159 
2160 	/* install a callback so that our variables are always up-to-date */
2161 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2162 	umem_statechange_cb(NULL);
2163 
2164 	/*
2165 	 * Register our ::whatis callbacks.
2166 	 */
2167 	mdb_whatis_register("umem", whatis_run_umem, NULL,
2168 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2169 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2170 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2171 
2172 	return (0);
2173 }
2174 
2175 typedef struct umem_log_cpu {
2176 	uintptr_t umc_low;
2177 	uintptr_t umc_high;
2178 } umem_log_cpu_t;
2179 
2180 int
2181 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2182 {
2183 	int i;
2184 
2185 	for (i = 0; i < umem_max_ncpus; i++) {
2186 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2187 			break;
2188 	}
2189 
2190 	if (i == umem_max_ncpus)
2191 		mdb_printf("   ");
2192 	else
2193 		mdb_printf("%3d", i);
2194 
2195 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2196 	    b->bc_timestamp, b->bc_thread);
2197 
2198 	return (WALK_NEXT);
2199 }
2200 
2201 /*ARGSUSED*/
2202 int
2203 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2204 {
2205 	umem_log_header_t lh;
2206 	umem_cpu_log_header_t clh;
2207 	uintptr_t lhp, clhp;
2208 	umem_log_cpu_t *umc;
2209 	int i;
2210 
2211 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2212 		mdb_warn("failed to read 'umem_transaction_log'");
2213 		return (DCMD_ERR);
2214 	}
2215 
2216 	if (lhp == NULL) {
2217 		mdb_warn("no umem transaction log\n");
2218 		return (DCMD_ERR);
2219 	}
2220 
2221 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2222 		mdb_warn("failed to read log header at %p", lhp);
2223 		return (DCMD_ERR);
2224 	}
2225 
2226 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2227 
2228 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2229 	    UM_SLEEP | UM_GC);
2230 
2231 	for (i = 0; i < umem_max_ncpus; i++) {
2232 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2233 			mdb_warn("cannot read cpu %d's log header at %p",
2234 			    i, clhp);
2235 			return (DCMD_ERR);
2236 		}
2237 
2238 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2239 		    (uintptr_t)lh.lh_base;
2240 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2241 
2242 		clhp += sizeof (umem_cpu_log_header_t);
2243 	}
2244 
2245 	if (DCMD_HDRSPEC(flags)) {
2246 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2247 		    "BUFADDR", "TIMESTAMP", "THREAD");
2248 	}
2249 
2250 	/*
2251 	 * If we have been passed an address, we'll just print out that
2252 	 * log entry.
2253 	 */
2254 	if (flags & DCMD_ADDRSPEC) {
2255 		umem_bufctl_audit_t *bp;
2256 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2257 
2258 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2259 			mdb_warn("failed to read bufctl at %p", addr);
2260 			return (DCMD_ERR);
2261 		}
2262 
2263 		(void) umem_log_walk(addr, bp, umc);
2264 
2265 		return (DCMD_OK);
2266 	}
2267 
2268 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2269 		mdb_warn("can't find umem log walker");
2270 		return (DCMD_ERR);
2271 	}
2272 
2273 	return (DCMD_OK);
2274 }
2275 
2276 typedef struct bufctl_history_cb {
2277 	int		bhc_flags;
2278 	int		bhc_argc;
2279 	const mdb_arg_t	*bhc_argv;
2280 	int		bhc_ret;
2281 } bufctl_history_cb_t;
2282 
2283 /*ARGSUSED*/
2284 static int
2285 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2286 {
2287 	bufctl_history_cb_t *bhc = arg;
2288 
2289 	bhc->bhc_ret =
2290 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2291 
2292 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2293 
2294 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2295 }
2296 
2297 void
2298 bufctl_help(void)
2299 {
2300 	mdb_printf("%s\n",
2301 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2302 	mdb_dec_indent(2);
2303 	mdb_printf("%<b>OPTIONS%</b>\n");
2304 	mdb_inc_indent(2);
2305 	mdb_printf("%s",
2306 "  -v    Display the full content of the bufctl, including its stack trace\n"
2307 "  -h    retrieve the bufctl's transaction history, if available\n"
2308 "  -a addr\n"
2309 "        filter out bufctls not involving the buffer at addr\n"
2310 "  -c caller\n"
2311 "        filter out bufctls without the function/PC in their stack trace\n"
2312 "  -e earliest\n"
2313 "        filter out bufctls timestamped before earliest\n"
2314 "  -l latest\n"
2315 "        filter out bufctls timestamped after latest\n"
2316 "  -t thread\n"
2317 "        filter out bufctls not involving thread\n");
2318 }
2319 
2320 int
2321 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2322 {
2323 	uint_t verbose = FALSE;
2324 	uint_t history = FALSE;
2325 	uint_t in_history = FALSE;
2326 	uintptr_t caller = NULL, thread = NULL;
2327 	uintptr_t laddr, haddr, baddr = NULL;
2328 	hrtime_t earliest = 0, latest = 0;
2329 	int i, depth;
2330 	char c[MDB_SYM_NAMLEN];
2331 	GElf_Sym sym;
2332 	umem_bufctl_audit_t *bcp;
2333 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2334 
2335 	if (mdb_getopts(argc, argv,
2336 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2337 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2338 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2339 	    'c', MDB_OPT_UINTPTR, &caller,
2340 	    't', MDB_OPT_UINTPTR, &thread,
2341 	    'e', MDB_OPT_UINT64, &earliest,
2342 	    'l', MDB_OPT_UINT64, &latest,
2343 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2344 		return (DCMD_USAGE);
2345 
2346 	if (!(flags & DCMD_ADDRSPEC))
2347 		return (DCMD_USAGE);
2348 
2349 	if (in_history && !history)
2350 		return (DCMD_USAGE);
2351 
2352 	if (history && !in_history) {
2353 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2354 		    UM_SLEEP | UM_GC);
2355 		bufctl_history_cb_t bhc;
2356 
2357 		nargv[0].a_type = MDB_TYPE_STRING;
2358 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2359 
2360 		for (i = 0; i < argc; i++)
2361 			nargv[i + 1] = argv[i];
2362 
2363 		/*
2364 		 * When in history mode, we treat each element as if it
2365 		 * were in a seperate loop, so that the headers group
2366 		 * bufctls with similar histories.
2367 		 */
2368 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2369 		bhc.bhc_argc = argc + 1;
2370 		bhc.bhc_argv = nargv;
2371 		bhc.bhc_ret = DCMD_OK;
2372 
2373 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2374 		    addr) == -1) {
2375 			mdb_warn("unable to walk bufctl_history");
2376 			return (DCMD_ERR);
2377 		}
2378 
2379 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2380 			mdb_printf("\n");
2381 
2382 		return (bhc.bhc_ret);
2383 	}
2384 
2385 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2386 		if (verbose) {
2387 			mdb_printf("%16s %16s %16s %16s\n"
2388 			    "%<u>%16s %16s %16s %16s%</u>\n",
2389 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2390 			    "", "CACHE", "LASTLOG", "CONTENTS");
2391 		} else {
2392 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2393 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2394 		}
2395 	}
2396 
2397 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2398 		mdb_warn("couldn't read bufctl at %p", addr);
2399 		return (DCMD_ERR);
2400 	}
2401 
2402 	/*
2403 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2404 	 * the address does not really refer to a bufctl.
2405 	 */
2406 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2407 
2408 	if (caller != NULL) {
2409 		laddr = caller;
2410 		haddr = caller + sizeof (caller);
2411 
2412 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2413 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2414 			/*
2415 			 * We were provided an exact symbol value; any
2416 			 * address in the function is valid.
2417 			 */
2418 			laddr = (uintptr_t)sym.st_value;
2419 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2420 		}
2421 
2422 		for (i = 0; i < depth; i++)
2423 			if (bcp->bc_stack[i] >= laddr &&
2424 			    bcp->bc_stack[i] < haddr)
2425 				break;
2426 
2427 		if (i == depth)
2428 			return (DCMD_OK);
2429 	}
2430 
2431 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2432 		return (DCMD_OK);
2433 
2434 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2435 		return (DCMD_OK);
2436 
2437 	if (latest != 0 && bcp->bc_timestamp > latest)
2438 		return (DCMD_OK);
2439 
2440 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2441 		return (DCMD_OK);
2442 
2443 	if (flags & DCMD_PIPE_OUT) {
2444 		mdb_printf("%#r\n", addr);
2445 		return (DCMD_OK);
2446 	}
2447 
2448 	if (verbose) {
2449 		mdb_printf(
2450 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2451 		    "%16s %16p %16p %16p\n",
2452 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2453 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2454 
2455 		mdb_inc_indent(17);
2456 		for (i = 0; i < depth; i++)
2457 			mdb_printf("%a\n", bcp->bc_stack[i]);
2458 		mdb_dec_indent(17);
2459 		mdb_printf("\n");
2460 	} else {
2461 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2462 		    bcp->bc_timestamp, bcp->bc_thread);
2463 
2464 		for (i = 0; i < depth; i++) {
2465 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2466 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2467 				continue;
2468 			if (is_umem_sym(c, "umem_"))
2469 				continue;
2470 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2471 			break;
2472 		}
2473 
2474 		if (i >= depth)
2475 			mdb_printf("\n");
2476 	}
2477 
2478 	return (DCMD_OK);
2479 }
2480 
2481 /*ARGSUSED*/
2482 int
2483 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2484 {
2485 	mdb_arg_t a;
2486 
2487 	if (!(flags & DCMD_ADDRSPEC))
2488 		return (DCMD_USAGE);
2489 
2490 	if (argc != 0)
2491 		return (DCMD_USAGE);
2492 
2493 	a.a_type = MDB_TYPE_STRING;
2494 	a.a_un.a_str = "-v";
2495 
2496 	return (bufctl(addr, flags, 1, &a));
2497 }
2498 
2499 typedef struct umem_verify {
2500 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2501 	size_t umv_size;		/* number of bytes in umv_buf */
2502 	int umv_corruption;		/* > 0 if corruption found. */
2503 	int umv_besilent;		/* report actual corruption sites */
2504 	struct umem_cache umv_cache;	/* the cache we're operating on */
2505 } umem_verify_t;
2506 
2507 /*
2508  * verify_pattern()
2509  *	verify that buf is filled with the pattern pat.
2510  */
2511 static int64_t
2512 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2513 {
2514 	/*LINTED*/
2515 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2516 	uint64_t *buf;
2517 
2518 	for (buf = buf_arg; buf < bufend; buf++)
2519 		if (*buf != pat)
2520 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2521 	return (-1);
2522 }
2523 
2524 /*
2525  * verify_buftag()
2526  *	verify that btp->bt_bxstat == (bcp ^ pat)
2527  */
2528 static int
2529 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2530 {
2531 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2532 }
2533 
2534 /*
2535  * verify_free()
2536  *	verify the integrity of a free block of memory by checking
2537  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2538  */
2539 /*ARGSUSED1*/
2540 static int
2541 verify_free(uintptr_t addr, const void *data, void *private)
2542 {
2543 	umem_verify_t *umv = (umem_verify_t *)private;
2544 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2545 	int64_t corrupt;		/* corruption offset */
2546 	umem_buftag_t *buftagp;		/* ptr to buftag */
2547 	umem_cache_t *cp = &umv->umv_cache;
2548 	int besilent = umv->umv_besilent;
2549 
2550 	/*LINTED*/
2551 	buftagp = UMEM_BUFTAG(cp, buf);
2552 
2553 	/*
2554 	 * Read the buffer to check.
2555 	 */
2556 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2557 		if (!besilent)
2558 			mdb_warn("couldn't read %p", addr);
2559 		return (WALK_NEXT);
2560 	}
2561 
2562 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2563 	    UMEM_FREE_PATTERN)) >= 0) {
2564 		if (!besilent)
2565 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2566 			    addr, (uintptr_t)addr + corrupt);
2567 		goto corrupt;
2568 	}
2569 
2570 	if ((cp->cache_flags & UMF_HASH) &&
2571 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2572 		if (!besilent)
2573 			mdb_printf("buffer %p (free) seems to "
2574 			    "have a corrupt redzone pattern\n", addr);
2575 		goto corrupt;
2576 	}
2577 
2578 	/*
2579 	 * confirm bufctl pointer integrity.
2580 	 */
2581 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2582 		if (!besilent)
2583 			mdb_printf("buffer %p (free) has a corrupt "
2584 			    "buftag\n", addr);
2585 		goto corrupt;
2586 	}
2587 
2588 	return (WALK_NEXT);
2589 corrupt:
2590 	umv->umv_corruption++;
2591 	return (WALK_NEXT);
2592 }
2593 
2594 /*
2595  * verify_alloc()
2596  *	Verify that the buftag of an allocated buffer makes sense with respect
2597  *	to the buffer.
2598  */
2599 /*ARGSUSED1*/
2600 static int
2601 verify_alloc(uintptr_t addr, const void *data, void *private)
2602 {
2603 	umem_verify_t *umv = (umem_verify_t *)private;
2604 	umem_cache_t *cp = &umv->umv_cache;
2605 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2606 	/*LINTED*/
2607 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2608 	uint32_t *ip = (uint32_t *)buftagp;
2609 	uint8_t *bp = (uint8_t *)buf;
2610 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2611 	int besilent = umv->umv_besilent;
2612 
2613 	/*
2614 	 * Read the buffer to check.
2615 	 */
2616 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2617 		if (!besilent)
2618 			mdb_warn("couldn't read %p", addr);
2619 		return (WALK_NEXT);
2620 	}
2621 
2622 	/*
2623 	 * There are two cases to handle:
2624 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2625 	 *    0xfeedfacefeedface at the end of it
2626 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2627 	 *    0xbb just past the end of the region in use.  At the buftag,
2628 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2629 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2630 	 *    endianness), followed by 32 bits containing the offset of the
2631 	 *    0xbb byte in the buffer.
2632 	 *
2633 	 * Finally, the two 32-bit words that comprise the second half of the
2634 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2635 	 */
2636 
2637 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2638 		looks_ok = 1;
2639 	else if (!UMEM_SIZE_VALID(ip[1]))
2640 		size_ok = 0;
2641 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2642 		looks_ok = 1;
2643 	else
2644 		size_ok = 0;
2645 
2646 	if (!size_ok) {
2647 		if (!besilent)
2648 			mdb_printf("buffer %p (allocated) has a corrupt "
2649 			    "redzone size encoding\n", addr);
2650 		goto corrupt;
2651 	}
2652 
2653 	if (!looks_ok) {
2654 		if (!besilent)
2655 			mdb_printf("buffer %p (allocated) has a corrupt "
2656 			    "redzone signature\n", addr);
2657 		goto corrupt;
2658 	}
2659 
2660 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2661 		if (!besilent)
2662 			mdb_printf("buffer %p (allocated) has a "
2663 			    "corrupt buftag\n", addr);
2664 		goto corrupt;
2665 	}
2666 
2667 	return (WALK_NEXT);
2668 corrupt:
2669 	umv->umv_corruption++;
2670 	return (WALK_NEXT);
2671 }
2672 
2673 /*ARGSUSED2*/
2674 int
2675 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2676 {
2677 	if (flags & DCMD_ADDRSPEC) {
2678 		int check_alloc = 0, check_free = 0;
2679 		umem_verify_t umv;
2680 
2681 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2682 		    addr) == -1) {
2683 			mdb_warn("couldn't read umem_cache %p", addr);
2684 			return (DCMD_ERR);
2685 		}
2686 
2687 		umv.umv_size = umv.umv_cache.cache_buftag +
2688 		    sizeof (umem_buftag_t);
2689 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2690 		umv.umv_corruption = 0;
2691 
2692 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2693 			check_alloc = 1;
2694 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2695 				check_free = 1;
2696 		} else {
2697 			if (!(flags & DCMD_LOOP)) {
2698 				mdb_warn("cache %p (%s) does not have "
2699 				    "redzone checking enabled\n", addr,
2700 				    umv.umv_cache.cache_name);
2701 			}
2702 			return (DCMD_ERR);
2703 		}
2704 
2705 		if (flags & DCMD_LOOP) {
2706 			/*
2707 			 * table mode, don't print out every corrupt buffer
2708 			 */
2709 			umv.umv_besilent = 1;
2710 		} else {
2711 			mdb_printf("Summary for cache '%s'\n",
2712 			    umv.umv_cache.cache_name);
2713 			mdb_inc_indent(2);
2714 			umv.umv_besilent = 0;
2715 		}
2716 
2717 		if (check_alloc)
2718 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2719 		if (check_free)
2720 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2721 
2722 		if (flags & DCMD_LOOP) {
2723 			if (umv.umv_corruption == 0) {
2724 				mdb_printf("%-*s %?p clean\n",
2725 				    UMEM_CACHE_NAMELEN,
2726 				    umv.umv_cache.cache_name, addr);
2727 			} else {
2728 				char *s = "";	/* optional s in "buffer[s]" */
2729 				if (umv.umv_corruption > 1)
2730 					s = "s";
2731 
2732 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2733 				    UMEM_CACHE_NAMELEN,
2734 				    umv.umv_cache.cache_name, addr,
2735 				    umv.umv_corruption, s);
2736 			}
2737 		} else {
2738 			/*
2739 			 * This is the more verbose mode, when the user has
2740 			 * type addr::umem_verify.  If the cache was clean,
2741 			 * nothing will have yet been printed. So say something.
2742 			 */
2743 			if (umv.umv_corruption == 0)
2744 				mdb_printf("clean\n");
2745 
2746 			mdb_dec_indent(2);
2747 		}
2748 	} else {
2749 		/*
2750 		 * If the user didn't specify a cache to verify, we'll walk all
2751 		 * umem_cache's, specifying ourself as a callback for each...
2752 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2753 		 */
2754 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2755 		    "Cache Name", "Addr", "Cache Integrity");
2756 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2757 	}
2758 
2759 	return (DCMD_OK);
2760 }
2761 
2762 typedef struct vmem_node {
2763 	struct vmem_node *vn_next;
2764 	struct vmem_node *vn_parent;
2765 	struct vmem_node *vn_sibling;
2766 	struct vmem_node *vn_children;
2767 	uintptr_t vn_addr;
2768 	int vn_marked;
2769 	vmem_t vn_vmem;
2770 } vmem_node_t;
2771 
2772 typedef struct vmem_walk {
2773 	vmem_node_t *vw_root;
2774 	vmem_node_t *vw_current;
2775 } vmem_walk_t;
2776 
2777 int
2778 vmem_walk_init(mdb_walk_state_t *wsp)
2779 {
2780 	uintptr_t vaddr, paddr;
2781 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2782 	vmem_walk_t *vw;
2783 
2784 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2785 		mdb_warn("couldn't read 'vmem_list'");
2786 		return (WALK_ERR);
2787 	}
2788 
2789 	while (vaddr != NULL) {
2790 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2791 		vp->vn_addr = vaddr;
2792 		vp->vn_next = head;
2793 		head = vp;
2794 
2795 		if (vaddr == wsp->walk_addr)
2796 			current = vp;
2797 
2798 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2799 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2800 			goto err;
2801 		}
2802 
2803 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2804 	}
2805 
2806 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2807 
2808 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2809 			vp->vn_sibling = root;
2810 			root = vp;
2811 			continue;
2812 		}
2813 
2814 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2815 			if (parent->vn_addr != paddr)
2816 				continue;
2817 			vp->vn_sibling = parent->vn_children;
2818 			parent->vn_children = vp;
2819 			vp->vn_parent = parent;
2820 			break;
2821 		}
2822 
2823 		if (parent == NULL) {
2824 			mdb_warn("couldn't find %p's parent (%p)\n",
2825 			    vp->vn_addr, paddr);
2826 			goto err;
2827 		}
2828 	}
2829 
2830 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2831 	vw->vw_root = root;
2832 
2833 	if (current != NULL)
2834 		vw->vw_current = current;
2835 	else
2836 		vw->vw_current = root;
2837 
2838 	wsp->walk_data = vw;
2839 	return (WALK_NEXT);
2840 err:
2841 	for (vp = head; head != NULL; vp = head) {
2842 		head = vp->vn_next;
2843 		mdb_free(vp, sizeof (vmem_node_t));
2844 	}
2845 
2846 	return (WALK_ERR);
2847 }
2848 
2849 int
2850 vmem_walk_step(mdb_walk_state_t *wsp)
2851 {
2852 	vmem_walk_t *vw = wsp->walk_data;
2853 	vmem_node_t *vp;
2854 	int rval;
2855 
2856 	if ((vp = vw->vw_current) == NULL)
2857 		return (WALK_DONE);
2858 
2859 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2860 
2861 	if (vp->vn_children != NULL) {
2862 		vw->vw_current = vp->vn_children;
2863 		return (rval);
2864 	}
2865 
2866 	do {
2867 		vw->vw_current = vp->vn_sibling;
2868 		vp = vp->vn_parent;
2869 	} while (vw->vw_current == NULL && vp != NULL);
2870 
2871 	return (rval);
2872 }
2873 
2874 /*
2875  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2876  * children are visited before their parent.  We perform the postfix walk
2877  * iteratively (rather than recursively) to allow mdb to regain control
2878  * after each callback.
2879  */
2880 int
2881 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2882 {
2883 	vmem_walk_t *vw = wsp->walk_data;
2884 	vmem_node_t *vp = vw->vw_current;
2885 	int rval;
2886 
2887 	/*
2888 	 * If this node is marked, then we know that we have already visited
2889 	 * all of its children.  If the node has any siblings, they need to
2890 	 * be visited next; otherwise, we need to visit the parent.  Note
2891 	 * that vp->vn_marked will only be zero on the first invocation of
2892 	 * the step function.
2893 	 */
2894 	if (vp->vn_marked) {
2895 		if (vp->vn_sibling != NULL)
2896 			vp = vp->vn_sibling;
2897 		else if (vp->vn_parent != NULL)
2898 			vp = vp->vn_parent;
2899 		else {
2900 			/*
2901 			 * We have neither a parent, nor a sibling, and we
2902 			 * have already been visited; we're done.
2903 			 */
2904 			return (WALK_DONE);
2905 		}
2906 	}
2907 
2908 	/*
2909 	 * Before we visit this node, visit its children.
2910 	 */
2911 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2912 		vp = vp->vn_children;
2913 
2914 	vp->vn_marked = 1;
2915 	vw->vw_current = vp;
2916 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2917 
2918 	return (rval);
2919 }
2920 
2921 void
2922 vmem_walk_fini(mdb_walk_state_t *wsp)
2923 {
2924 	vmem_walk_t *vw = wsp->walk_data;
2925 	vmem_node_t *root = vw->vw_root;
2926 	int done;
2927 
2928 	if (root == NULL)
2929 		return;
2930 
2931 	if ((vw->vw_root = root->vn_children) != NULL)
2932 		vmem_walk_fini(wsp);
2933 
2934 	vw->vw_root = root->vn_sibling;
2935 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2936 	mdb_free(root, sizeof (vmem_node_t));
2937 
2938 	if (done) {
2939 		mdb_free(vw, sizeof (vmem_walk_t));
2940 	} else {
2941 		vmem_walk_fini(wsp);
2942 	}
2943 }
2944 
2945 typedef struct vmem_seg_walk {
2946 	uint8_t vsw_type;
2947 	uintptr_t vsw_start;
2948 	uintptr_t vsw_current;
2949 } vmem_seg_walk_t;
2950 
2951 /*ARGSUSED*/
2952 int
2953 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2954 {
2955 	vmem_seg_walk_t *vsw;
2956 
2957 	if (wsp->walk_addr == NULL) {
2958 		mdb_warn("vmem_%s does not support global walks\n", name);
2959 		return (WALK_ERR);
2960 	}
2961 
2962 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2963 
2964 	vsw->vsw_type = type;
2965 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2966 	vsw->vsw_current = vsw->vsw_start;
2967 
2968 	return (WALK_NEXT);
2969 }
2970 
2971 /*
2972  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2973  */
2974 #define	VMEM_NONE	0
2975 
2976 int
2977 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2978 {
2979 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2980 }
2981 
2982 int
2983 vmem_free_walk_init(mdb_walk_state_t *wsp)
2984 {
2985 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2986 }
2987 
2988 int
2989 vmem_span_walk_init(mdb_walk_state_t *wsp)
2990 {
2991 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2992 }
2993 
2994 int
2995 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2996 {
2997 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2998 }
2999 
3000 int
3001 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3002 {
3003 	vmem_seg_t seg;
3004 	vmem_seg_walk_t *vsw = wsp->walk_data;
3005 	uintptr_t addr = vsw->vsw_current;
3006 	static size_t seg_size = 0;
3007 	int rval;
3008 
3009 	if (!seg_size) {
3010 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3011 			mdb_warn("failed to read 'vmem_seg_size'");
3012 			seg_size = sizeof (vmem_seg_t);
3013 		}
3014 	}
3015 
3016 	if (seg_size < sizeof (seg))
3017 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3018 
3019 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3020 		mdb_warn("couldn't read vmem_seg at %p", addr);
3021 		return (WALK_ERR);
3022 	}
3023 
3024 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3025 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3026 		rval = WALK_NEXT;
3027 	} else {
3028 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3029 	}
3030 
3031 	if (vsw->vsw_current == vsw->vsw_start)
3032 		return (WALK_DONE);
3033 
3034 	return (rval);
3035 }
3036 
3037 void
3038 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3039 {
3040 	vmem_seg_walk_t *vsw = wsp->walk_data;
3041 
3042 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3043 }
3044 
3045 #define	VMEM_NAMEWIDTH	22
3046 
3047 int
3048 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3049 {
3050 	vmem_t v, parent;
3051 	uintptr_t paddr;
3052 	int ident = 0;
3053 	char c[VMEM_NAMEWIDTH];
3054 
3055 	if (!(flags & DCMD_ADDRSPEC)) {
3056 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3057 			mdb_warn("can't walk vmem");
3058 			return (DCMD_ERR);
3059 		}
3060 		return (DCMD_OK);
3061 	}
3062 
3063 	if (DCMD_HDRSPEC(flags))
3064 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3065 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3066 		    "TOTAL", "SUCCEED", "FAIL");
3067 
3068 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3069 		mdb_warn("couldn't read vmem at %p", addr);
3070 		return (DCMD_ERR);
3071 	}
3072 
3073 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3074 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3075 			mdb_warn("couldn't trace %p's ancestry", addr);
3076 			ident = 0;
3077 			break;
3078 		}
3079 		paddr = (uintptr_t)parent.vm_source;
3080 	}
3081 
3082 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3083 
3084 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3085 	    addr, VMEM_NAMEWIDTH, c,
3086 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3087 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3088 
3089 	return (DCMD_OK);
3090 }
3091 
3092 void
3093 vmem_seg_help(void)
3094 {
3095 	mdb_printf("%s\n",
3096 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3097 "\n"
3098 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3099 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3100 "information.\n");
3101 	mdb_dec_indent(2);
3102 	mdb_printf("%<b>OPTIONS%</b>\n");
3103 	mdb_inc_indent(2);
3104 	mdb_printf("%s",
3105 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3106 "  -s    report the size of the segment, instead of the end address\n"
3107 "  -c caller\n"
3108 "        filter out segments without the function/PC in their stack trace\n"
3109 "  -e earliest\n"
3110 "        filter out segments timestamped before earliest\n"
3111 "  -l latest\n"
3112 "        filter out segments timestamped after latest\n"
3113 "  -m minsize\n"
3114 "        filer out segments smaller than minsize\n"
3115 "  -M maxsize\n"
3116 "        filer out segments larger than maxsize\n"
3117 "  -t thread\n"
3118 "        filter out segments not involving thread\n"
3119 "  -T type\n"
3120 "        filter out segments not of type 'type'\n"
3121 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3122 }
3123 
3124 
3125 /*ARGSUSED*/
3126 int
3127 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3128 {
3129 	vmem_seg_t vs;
3130 	uintptr_t *stk = vs.vs_stack;
3131 	uintptr_t sz;
3132 	uint8_t t;
3133 	const char *type = NULL;
3134 	GElf_Sym sym;
3135 	char c[MDB_SYM_NAMLEN];
3136 	int no_debug;
3137 	int i;
3138 	int depth;
3139 	uintptr_t laddr, haddr;
3140 
3141 	uintptr_t caller = NULL, thread = NULL;
3142 	uintptr_t minsize = 0, maxsize = 0;
3143 
3144 	hrtime_t earliest = 0, latest = 0;
3145 
3146 	uint_t size = 0;
3147 	uint_t verbose = 0;
3148 
3149 	if (!(flags & DCMD_ADDRSPEC))
3150 		return (DCMD_USAGE);
3151 
3152 	if (mdb_getopts(argc, argv,
3153 	    'c', MDB_OPT_UINTPTR, &caller,
3154 	    'e', MDB_OPT_UINT64, &earliest,
3155 	    'l', MDB_OPT_UINT64, &latest,
3156 	    's', MDB_OPT_SETBITS, TRUE, &size,
3157 	    'm', MDB_OPT_UINTPTR, &minsize,
3158 	    'M', MDB_OPT_UINTPTR, &maxsize,
3159 	    't', MDB_OPT_UINTPTR, &thread,
3160 	    'T', MDB_OPT_STR, &type,
3161 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3162 	    NULL) != argc)
3163 		return (DCMD_USAGE);
3164 
3165 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3166 		if (verbose) {
3167 			mdb_printf("%16s %4s %16s %16s %16s\n"
3168 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3169 			    "ADDR", "TYPE", "START", "END", "SIZE",
3170 			    "", "", "THREAD", "TIMESTAMP", "");
3171 		} else {
3172 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3173 			    "START", size? "SIZE" : "END", "WHO");
3174 		}
3175 	}
3176 
3177 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3178 		mdb_warn("couldn't read vmem_seg at %p", addr);
3179 		return (DCMD_ERR);
3180 	}
3181 
3182 	if (type != NULL) {
3183 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3184 			t = VMEM_ALLOC;
3185 		else if (strcmp(type, "FREE") == 0)
3186 			t = VMEM_FREE;
3187 		else if (strcmp(type, "SPAN") == 0)
3188 			t = VMEM_SPAN;
3189 		else if (strcmp(type, "ROTR") == 0 ||
3190 		    strcmp(type, "ROTOR") == 0)
3191 			t = VMEM_ROTOR;
3192 		else if (strcmp(type, "WLKR") == 0 ||
3193 		    strcmp(type, "WALKER") == 0)
3194 			t = VMEM_WALKER;
3195 		else {
3196 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3197 			    type);
3198 			return (DCMD_ERR);
3199 		}
3200 
3201 		if (vs.vs_type != t)
3202 			return (DCMD_OK);
3203 	}
3204 
3205 	sz = vs.vs_end - vs.vs_start;
3206 
3207 	if (minsize != 0 && sz < minsize)
3208 		return (DCMD_OK);
3209 
3210 	if (maxsize != 0 && sz > maxsize)
3211 		return (DCMD_OK);
3212 
3213 	t = vs.vs_type;
3214 	depth = vs.vs_depth;
3215 
3216 	/*
3217 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3218 	 */
3219 	no_debug = (t != VMEM_ALLOC) ||
3220 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3221 
3222 	if (no_debug) {
3223 		if (caller != NULL || thread != NULL || earliest != 0 ||
3224 		    latest != 0)
3225 			return (DCMD_OK);		/* not enough info */
3226 	} else {
3227 		if (caller != NULL) {
3228 			laddr = caller;
3229 			haddr = caller + sizeof (caller);
3230 
3231 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3232 			    sizeof (c), &sym) != -1 &&
3233 			    caller == (uintptr_t)sym.st_value) {
3234 				/*
3235 				 * We were provided an exact symbol value; any
3236 				 * address in the function is valid.
3237 				 */
3238 				laddr = (uintptr_t)sym.st_value;
3239 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3240 			}
3241 
3242 			for (i = 0; i < depth; i++)
3243 				if (vs.vs_stack[i] >= laddr &&
3244 				    vs.vs_stack[i] < haddr)
3245 					break;
3246 
3247 			if (i == depth)
3248 				return (DCMD_OK);
3249 		}
3250 
3251 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3252 			return (DCMD_OK);
3253 
3254 		if (earliest != 0 && vs.vs_timestamp < earliest)
3255 			return (DCMD_OK);
3256 
3257 		if (latest != 0 && vs.vs_timestamp > latest)
3258 			return (DCMD_OK);
3259 	}
3260 
3261 	type = (t == VMEM_ALLOC ? "ALLC" :
3262 	    t == VMEM_FREE ? "FREE" :
3263 	    t == VMEM_SPAN ? "SPAN" :
3264 	    t == VMEM_ROTOR ? "ROTR" :
3265 	    t == VMEM_WALKER ? "WLKR" :
3266 	    "????");
3267 
3268 	if (flags & DCMD_PIPE_OUT) {
3269 		mdb_printf("%#r\n", addr);
3270 		return (DCMD_OK);
3271 	}
3272 
3273 	if (verbose) {
3274 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3275 		    addr, type, vs.vs_start, vs.vs_end, sz);
3276 
3277 		if (no_debug)
3278 			return (DCMD_OK);
3279 
3280 		mdb_printf("%16s %4s %16d %16llx\n",
3281 		    "", "", vs.vs_thread, vs.vs_timestamp);
3282 
3283 		mdb_inc_indent(17);
3284 		for (i = 0; i < depth; i++) {
3285 			mdb_printf("%a\n", stk[i]);
3286 		}
3287 		mdb_dec_indent(17);
3288 		mdb_printf("\n");
3289 	} else {
3290 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3291 		    vs.vs_start, size? sz : vs.vs_end);
3292 
3293 		if (no_debug) {
3294 			mdb_printf("\n");
3295 			return (DCMD_OK);
3296 		}
3297 
3298 		for (i = 0; i < depth; i++) {
3299 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3300 			    c, sizeof (c), &sym) == -1)
3301 				continue;
3302 			if (is_umem_sym(c, "vmem_"))
3303 				continue;
3304 			break;
3305 		}
3306 		mdb_printf(" %a\n", stk[i]);
3307 	}
3308 	return (DCMD_OK);
3309 }
3310 
3311 /*ARGSUSED*/
3312 static int
3313 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3314 {
3315 	char name[UMEM_CACHE_NAMELEN + 1];
3316 	hrtime_t delta;
3317 	int i, depth;
3318 
3319 	if (bcp->bc_timestamp == 0)
3320 		return (WALK_DONE);
3321 
3322 	if (*newest == 0)
3323 		*newest = bcp->bc_timestamp;
3324 
3325 	delta = *newest - bcp->bc_timestamp;
3326 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3327 
3328 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3329 	    &bcp->bc_cache->cache_name) <= 0)
3330 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3331 
3332 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3333 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3334 
3335 	for (i = 0; i < depth; i++)
3336 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3337 
3338 	return (WALK_NEXT);
3339 }
3340 
3341 int
3342 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3343 {
3344 	const char *logname = "umem_transaction_log";
3345 	hrtime_t newest = 0;
3346 
3347 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3348 		return (DCMD_USAGE);
3349 
3350 	if (argc > 0) {
3351 		if (argv->a_type != MDB_TYPE_STRING)
3352 			return (DCMD_USAGE);
3353 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3354 			logname = "umem_failure_log";
3355 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3356 			logname = "umem_slab_log";
3357 		else
3358 			return (DCMD_USAGE);
3359 	}
3360 
3361 	if (umem_readvar(&addr, logname) == -1) {
3362 		mdb_warn("failed to read %s log header pointer");
3363 		return (DCMD_ERR);
3364 	}
3365 
3366 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3367 		mdb_warn("failed to walk umem log");
3368 		return (DCMD_ERR);
3369 	}
3370 
3371 	return (DCMD_OK);
3372 }
3373 
3374 /*
3375  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3376  * The first piece is a structure which we use to accumulate umem_cache_t
3377  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3378  * walker; we either add all caches, or ones named explicitly as arguments.
3379  */
3380 
3381 typedef struct umclist {
3382 	const char *umc_name;			/* Name to match (or NULL) */
3383 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3384 	int umc_nelems;				/* Num entries in umc_caches */
3385 	int umc_size;				/* Size of umc_caches array */
3386 } umclist_t;
3387 
3388 static int
3389 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3390 {
3391 	void *p;
3392 	int s;
3393 
3394 	if (umc->umc_name == NULL ||
3395 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3396 		/*
3397 		 * If we have a match, grow our array (if necessary), and then
3398 		 * add the virtual address of the matching cache to our list.
3399 		 */
3400 		if (umc->umc_nelems >= umc->umc_size) {
3401 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3402 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3403 
3404 			bcopy(umc->umc_caches, p,
3405 			    sizeof (uintptr_t) * umc->umc_size);
3406 
3407 			umc->umc_caches = p;
3408 			umc->umc_size = s;
3409 		}
3410 
3411 		umc->umc_caches[umc->umc_nelems++] = addr;
3412 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3413 	}
3414 
3415 	return (WALK_NEXT);
3416 }
3417 
3418 /*
3419  * The second piece of ::umausers is a hash table of allocations.  Each
3420  * allocation owner is identified by its stack trace and data_size.  We then
3421  * track the total bytes of all such allocations, and the number of allocations
3422  * to report at the end.  Once we have a list of caches, we walk through the
3423  * allocated bufctls of each, and update our hash table accordingly.
3424  */
3425 
3426 typedef struct umowner {
3427 	struct umowner *umo_head;		/* First hash elt in bucket */
3428 	struct umowner *umo_next;		/* Next hash elt in chain */
3429 	size_t umo_signature;			/* Hash table signature */
3430 	uint_t umo_num;				/* Number of allocations */
3431 	size_t umo_data_size;			/* Size of each allocation */
3432 	size_t umo_total_size;			/* Total bytes of allocation */
3433 	int umo_depth;				/* Depth of stack trace */
3434 	uintptr_t *umo_stack;			/* Stack trace */
3435 } umowner_t;
3436 
3437 typedef struct umusers {
3438 	const umem_cache_t *umu_cache;		/* Current umem cache */
3439 	umowner_t *umu_hash;			/* Hash table of owners */
3440 	uintptr_t *umu_stacks;			/* stacks for owners */
3441 	int umu_nelems;				/* Number of entries in use */
3442 	int umu_size;				/* Total number of entries */
3443 } umusers_t;
3444 
3445 static void
3446 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3447     size_t size, size_t data_size)
3448 {
3449 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3450 	size_t bucket, signature = data_size;
3451 	umowner_t *umo, *umoend;
3452 
3453 	/*
3454 	 * If the hash table is full, double its size and rehash everything.
3455 	 */
3456 	if (umu->umu_nelems >= umu->umu_size) {
3457 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3458 		size_t umowner_size = sizeof (umowner_t);
3459 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3460 		uintptr_t *new_stacks;
3461 
3462 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3463 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3464 
3465 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3466 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3467 		umu->umu_hash = umo;
3468 		umu->umu_stacks = new_stacks;
3469 		umu->umu_size = s;
3470 
3471 		umoend = umu->umu_hash + umu->umu_size;
3472 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3473 			umo->umo_head = NULL;
3474 			umo->umo_stack = &umu->umu_stacks[
3475 			    umem_stack_depth * (umo - umu->umu_hash)];
3476 		}
3477 
3478 		umoend = umu->umu_hash + umu->umu_nelems;
3479 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3480 			bucket = umo->umo_signature & (umu->umu_size - 1);
3481 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3482 			umu->umu_hash[bucket].umo_head = umo;
3483 		}
3484 	}
3485 
3486 	/*
3487 	 * Finish computing the hash signature from the stack trace, and then
3488 	 * see if the owner is in the hash table.  If so, update our stats.
3489 	 */
3490 	for (i = 0; i < depth; i++)
3491 		signature += bcp->bc_stack[i];
3492 
3493 	bucket = signature & (umu->umu_size - 1);
3494 
3495 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3496 		if (umo->umo_signature == signature) {
3497 			size_t difference = 0;
3498 
3499 			difference |= umo->umo_data_size - data_size;
3500 			difference |= umo->umo_depth - depth;
3501 
3502 			for (i = 0; i < depth; i++) {
3503 				difference |= umo->umo_stack[i] -
3504 				    bcp->bc_stack[i];
3505 			}
3506 
3507 			if (difference == 0) {
3508 				umo->umo_total_size += size;
3509 				umo->umo_num++;
3510 				return;
3511 			}
3512 		}
3513 	}
3514 
3515 	/*
3516 	 * If the owner is not yet hashed, grab the next element and fill it
3517 	 * in based on the allocation information.
3518 	 */
3519 	umo = &umu->umu_hash[umu->umu_nelems++];
3520 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3521 	umu->umu_hash[bucket].umo_head = umo;
3522 
3523 	umo->umo_signature = signature;
3524 	umo->umo_num = 1;
3525 	umo->umo_data_size = data_size;
3526 	umo->umo_total_size = size;
3527 	umo->umo_depth = depth;
3528 
3529 	for (i = 0; i < depth; i++)
3530 		umo->umo_stack[i] = bcp->bc_stack[i];
3531 }
3532 
3533 /*
3534  * When ::umausers is invoked without the -f flag, we simply update our hash
3535  * table with the information from each allocated bufctl.
3536  */
3537 /*ARGSUSED*/
3538 static int
3539 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3540 {
3541 	const umem_cache_t *cp = umu->umu_cache;
3542 
3543 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3544 	return (WALK_NEXT);
3545 }
3546 
3547 /*
3548  * When ::umausers is invoked with the -f flag, we print out the information
3549  * for each bufctl as well as updating the hash table.
3550  */
3551 static int
3552 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3553 {
3554 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3555 	const umem_cache_t *cp = umu->umu_cache;
3556 
3557 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3558 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3559 
3560 	for (i = 0; i < depth; i++)
3561 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3562 
3563 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3564 	return (WALK_NEXT);
3565 }
3566 
3567 /*
3568  * We sort our results by allocation size before printing them.
3569  */
3570 static int
3571 umownercmp(const void *lp, const void *rp)
3572 {
3573 	const umowner_t *lhs = lp;
3574 	const umowner_t *rhs = rp;
3575 
3576 	return (rhs->umo_total_size - lhs->umo_total_size);
3577 }
3578 
3579 /*
3580  * The main engine of ::umausers is relatively straightforward: First we
3581  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3582  * iterate over the allocated bufctls of each cache in the list.  Finally,
3583  * we sort and print our results.
3584  */
3585 /*ARGSUSED*/
3586 int
3587 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3588 {
3589 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3590 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3591 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3592 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3593 	int opt_e = FALSE;		/* Include "small" users */
3594 	int opt_f = FALSE;		/* Print stack traces */
3595 
3596 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3597 	umowner_t *umo, *umoend;
3598 	int i, oelems;
3599 
3600 	umclist_t umc;
3601 	umusers_t umu;
3602 
3603 	if (flags & DCMD_ADDRSPEC)
3604 		return (DCMD_USAGE);
3605 
3606 	bzero(&umc, sizeof (umc));
3607 	bzero(&umu, sizeof (umu));
3608 
3609 	while ((i = mdb_getopts(argc, argv,
3610 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3611 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3612 
3613 		argv += i;	/* skip past options we just processed */
3614 		argc -= i;	/* adjust argc */
3615 
3616 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3617 			return (DCMD_USAGE);
3618 
3619 		oelems = umc.umc_nelems;
3620 		umc.umc_name = argv->a_un.a_str;
3621 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3622 
3623 		if (umc.umc_nelems == oelems) {
3624 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3625 			return (DCMD_ERR);
3626 		}
3627 
3628 		do_all_caches = 0;
3629 		argv++;
3630 		argc--;
3631 	}
3632 
3633 	if (opt_e)
3634 		mem_threshold = cnt_threshold = 0;
3635 
3636 	if (opt_f)
3637 		callback = (mdb_walk_cb_t)umause2;
3638 
3639 	if (do_all_caches) {
3640 		umc.umc_name = NULL; /* match all cache names */
3641 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3642 	}
3643 
3644 	for (i = 0; i < umc.umc_nelems; i++) {
3645 		uintptr_t cp = umc.umc_caches[i];
3646 		umem_cache_t c;
3647 
3648 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3649 			mdb_warn("failed to read cache at %p", cp);
3650 			continue;
3651 		}
3652 
3653 		if (!(c.cache_flags & UMF_AUDIT)) {
3654 			if (!do_all_caches) {
3655 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3656 				    c.cache_name);
3657 			}
3658 			continue;
3659 		}
3660 
3661 		umu.umu_cache = &c;
3662 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3663 		audited_caches++;
3664 	}
3665 
3666 	if (audited_caches == 0 && do_all_caches) {
3667 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3668 		return (DCMD_ERR);
3669 	}
3670 
3671 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3672 	umoend = umu.umu_hash + umu.umu_nelems;
3673 
3674 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3675 		if (umo->umo_total_size < mem_threshold &&
3676 		    umo->umo_num < cnt_threshold)
3677 			continue;
3678 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3679 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3680 		for (i = 0; i < umo->umo_depth; i++)
3681 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3682 	}
3683 
3684 	return (DCMD_OK);
3685 }
3686 
3687 struct malloc_data {
3688 	uint32_t malloc_size;
3689 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3690 };
3691 
3692 #ifdef _LP64
3693 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3694 #else
3695 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3696 #endif
3697 
3698 typedef struct umem_malloc_info {
3699 	size_t um_total;	/* total allocated buffers */
3700 	size_t um_malloc;	/* malloc buffers */
3701 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3702 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3703 
3704 	umem_cache_t *um_cp;
3705 
3706 	uint_t *um_bucket;
3707 } umem_malloc_info_t;
3708 
3709 static void
3710 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3711     size_t maxbuckets, size_t minbucketsize, int geometric)
3712 {
3713 	uint64_t um_malloc;
3714 	int minb = -1;
3715 	int maxb = -1;
3716 	int buckets;
3717 	int nbucks;
3718 	int i;
3719 	int b;
3720 	const int *distarray;
3721 
3722 	minb = (int)minmalloc;
3723 	maxb = (int)maxmalloc;
3724 
3725 	nbucks = buckets = maxb - minb + 1;
3726 
3727 	um_malloc = 0;
3728 	for (b = minb; b <= maxb; b++)
3729 		um_malloc += um_bucket[b];
3730 
3731 	if (maxbuckets != 0)
3732 		buckets = MIN(buckets, maxbuckets);
3733 
3734 	if (minbucketsize > 1) {
3735 		buckets = MIN(buckets, nbucks/minbucketsize);
3736 		if (buckets == 0) {
3737 			buckets = 1;
3738 			minbucketsize = nbucks;
3739 		}
3740 	}
3741 
3742 	if (geometric)
3743 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3744 	else
3745 		distarray = dist_linear(buckets, minb, maxb);
3746 
3747 	dist_print_header("malloc size", 11, "count");
3748 	for (i = 0; i < buckets; i++) {
3749 		dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3750 	}
3751 	mdb_printf("\n");
3752 }
3753 
3754 /*
3755  * A malloc()ed buffer looks like:
3756  *
3757  *	<----------- mi.malloc_size --->
3758  *	<----------- cp.cache_bufsize ------------------>
3759  *	<----------- cp.cache_chunksize -------------------------------->
3760  *	+-------+-----------------------+---------------+---------------+
3761  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3762  *	+-------+---------------------------------------+---------------+
3763  *		<-- usable space ------>
3764  *
3765  * mallocsz is the argument to malloc(3C).
3766  * mi.malloc_size is the actual size passed to umem_alloc(), which
3767  * is rounded up to the smallest available cache size, which is
3768  * cache_bufsize.  If there is debugging or alignment overhead in
3769  * the cache, that is reflected in a larger cache_chunksize.
3770  *
3771  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3772  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3773  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3774  * the tag has 8 bytes of padding before it.
3775  *
3776  * 32-byte, 64-byte buffers <= 8 bytes:
3777  *	+-------+-------+--------- ...
3778  *	|/size//|/stat//| mallocsz ...
3779  *	+-------+-------+--------- ...
3780  *			^
3781  *			pointer returned from malloc(3C)
3782  *
3783  * 64-byte buffers > 8 bytes:
3784  *	+---------------+-------+-------+--------- ...
3785  *	|/padding///////|/size//|/stat//| mallocsz ...
3786  *	+---------------+-------+-------+--------- ...
3787  *					^
3788  *					pointer returned from malloc(3C)
3789  *
3790  * The "size" field is "malloc_size", which is mallocsz + the padding.
3791  * The "stat" field is derived from malloc_size, and functions as a
3792  * validation that this buffer is actually from malloc(3C).
3793  */
3794 /*ARGSUSED*/
3795 static int
3796 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3797 {
3798 	struct malloc_data md;
3799 	size_t m_addr = addr;
3800 	size_t overhead = sizeof (md);
3801 	size_t mallocsz;
3802 
3803 	ump->um_total++;
3804 
3805 #ifdef _LP64
3806 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3807 		m_addr += overhead;
3808 		overhead += sizeof (md);
3809 	}
3810 #endif
3811 
3812 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3813 		mdb_warn("unable to read malloc header at %p", m_addr);
3814 		return (WALK_NEXT);
3815 	}
3816 
3817 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3818 	case MALLOC_MAGIC:
3819 #ifdef _LP64
3820 	case MALLOC_SECOND_MAGIC:
3821 #endif
3822 		mallocsz = md.malloc_size - overhead;
3823 
3824 		ump->um_malloc++;
3825 		ump->um_malloc_size += mallocsz;
3826 		ump->um_malloc_overhead += overhead;
3827 
3828 		/* include round-off and debug overhead */
3829 		ump->um_malloc_overhead +=
3830 		    ump->um_cp->cache_chunksize - md.malloc_size;
3831 
3832 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
3833 			ump->um_bucket[mallocsz]++;
3834 
3835 		break;
3836 	default:
3837 		break;
3838 	}
3839 
3840 	return (WALK_NEXT);
3841 }
3842 
3843 int
3844 get_umem_alloc_sizes(int **out, size_t *out_num)
3845 {
3846 	GElf_Sym sym;
3847 
3848 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
3849 		mdb_warn("unable to look up umem_alloc_sizes");
3850 		return (-1);
3851 	}
3852 
3853 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
3854 	*out_num = sym.st_size / sizeof (int);
3855 
3856 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
3857 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
3858 		*out = NULL;
3859 		return (-1);
3860 	}
3861 
3862 	return (0);
3863 }
3864 
3865 
3866 static int
3867 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
3868 {
3869 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
3870 		return (WALK_NEXT);
3871 
3872 	ump->um_cp = cp;
3873 
3874 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
3875 	    -1) {
3876 		mdb_warn("can't walk 'umem' for cache %p", addr);
3877 		return (WALK_ERR);
3878 	}
3879 
3880 	return (WALK_NEXT);
3881 }
3882 
3883 void
3884 umem_malloc_dist_help(void)
3885 {
3886 	mdb_printf("%s\n",
3887 	    "report distribution of outstanding malloc()s");
3888 	mdb_dec_indent(2);
3889 	mdb_printf("%<b>OPTIONS%</b>\n");
3890 	mdb_inc_indent(2);
3891 	mdb_printf("%s",
3892 "  -b maxbins\n"
3893 "        Use at most maxbins bins for the data\n"
3894 "  -B minbinsize\n"
3895 "        Make the bins at least minbinsize bytes apart\n"
3896 "  -d    dump the raw data out, without binning\n"
3897 "  -g    use geometric binning instead of linear binning\n");
3898 }
3899 
3900 /*ARGSUSED*/
3901 int
3902 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3903 {
3904 	umem_malloc_info_t mi;
3905 	uint_t geometric = 0;
3906 	uint_t dump = 0;
3907 	size_t maxbuckets = 0;
3908 	size_t minbucketsize = 0;
3909 
3910 	size_t minalloc = 0;
3911 	size_t maxalloc = UMI_MAX_BUCKET;
3912 
3913 	if (flags & DCMD_ADDRSPEC)
3914 		return (DCMD_USAGE);
3915 
3916 	if (mdb_getopts(argc, argv,
3917 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3918 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3919 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3920 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3921 	    0) != argc)
3922 		return (DCMD_USAGE);
3923 
3924 	bzero(&mi, sizeof (mi));
3925 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
3926 	    UM_SLEEP | UM_GC);
3927 
3928 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
3929 	    &mi) == -1) {
3930 		mdb_warn("unable to walk 'umem_cache'");
3931 		return (DCMD_ERR);
3932 	}
3933 
3934 	if (dump) {
3935 		int i;
3936 		for (i = minalloc; i <= maxalloc; i++)
3937 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
3938 
3939 		return (DCMD_OK);
3940 	}
3941 
3942 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
3943 	    maxbuckets, minbucketsize, geometric);
3944 
3945 	return (DCMD_OK);
3946 }
3947 
3948 void
3949 umem_malloc_info_help(void)
3950 {
3951 	mdb_printf("%s\n",
3952 	    "report information about malloc()s by cache.  ");
3953 	mdb_dec_indent(2);
3954 	mdb_printf("%<b>OPTIONS%</b>\n");
3955 	mdb_inc_indent(2);
3956 	mdb_printf("%s",
3957 "  -b maxbins\n"
3958 "        Use at most maxbins bins for the data\n"
3959 "  -B minbinsize\n"
3960 "        Make the bins at least minbinsize bytes apart\n"
3961 "  -d    dump the raw distribution data without binning\n"
3962 #ifndef _KMDB
3963 "  -g    use geometric binning instead of linear binning\n"
3964 #endif
3965 	    "");
3966 }
3967 int
3968 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3969 {
3970 	umem_cache_t c;
3971 	umem_malloc_info_t mi;
3972 
3973 	int skip = 0;
3974 
3975 	size_t maxmalloc;
3976 	size_t overhead;
3977 	size_t allocated;
3978 	size_t avg_malloc;
3979 	size_t overhead_pct;	/* 1000 * overhead_percent */
3980 
3981 	uint_t verbose = 0;
3982 	uint_t dump = 0;
3983 	uint_t geometric = 0;
3984 	size_t maxbuckets = 0;
3985 	size_t minbucketsize = 0;
3986 
3987 	int *alloc_sizes;
3988 	int idx;
3989 	size_t num;
3990 	size_t minmalloc;
3991 
3992 	if (mdb_getopts(argc, argv,
3993 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
3994 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
3995 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
3996 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
3997 	    0) != argc)
3998 		return (DCMD_USAGE);
3999 
4000 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4001 		verbose = 1;
4002 
4003 	if (!(flags & DCMD_ADDRSPEC)) {
4004 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4005 		    argc, argv) == -1) {
4006 			mdb_warn("can't walk umem_cache");
4007 			return (DCMD_ERR);
4008 		}
4009 		return (DCMD_OK);
4010 	}
4011 
4012 	if (!mdb_vread(&c, sizeof (c), addr)) {
4013 		mdb_warn("unable to read cache at %p", addr);
4014 		return (DCMD_ERR);
4015 	}
4016 
4017 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4018 		if (!(flags & DCMD_LOOP))
4019 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4020 			    "by malloc()\n", c.cache_name);
4021 		skip = 1;
4022 	}
4023 
4024 	/*
4025 	 * normally, print the header only the first time.  In verbose mode,
4026 	 * print the header on every non-skipped buffer
4027 	 */
4028 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4029 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4030 		    "CACHE", "BUFSZ", "MAXMAL",
4031 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4032 
4033 	if (skip)
4034 		return (DCMD_OK);
4035 
4036 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4037 #ifdef _LP64
4038 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4039 		maxmalloc -= sizeof (struct malloc_data);
4040 #endif
4041 
4042 	bzero(&mi, sizeof (mi));
4043 	mi.um_cp = &c;
4044 	if (verbose)
4045 		mi.um_bucket =
4046 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4047 		    UM_SLEEP | UM_GC);
4048 
4049 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4050 	    -1) {
4051 		mdb_warn("can't walk 'umem'");
4052 		return (DCMD_ERR);
4053 	}
4054 
4055 	overhead = mi.um_malloc_overhead;
4056 	allocated = mi.um_malloc_size;
4057 
4058 	/* do integer round off for the average */
4059 	if (mi.um_malloc != 0)
4060 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4061 	else
4062 		avg_malloc = 0;
4063 
4064 	/*
4065 	 * include per-slab overhead
4066 	 *
4067 	 * Each slab in a given cache is the same size, and has the same
4068 	 * number of chunks in it;  we read in the first slab on the
4069 	 * slab list to get the number of chunks for all slabs.  To
4070 	 * compute the per-slab overhead, we just subtract the chunk usage
4071 	 * from the slabsize:
4072 	 *
4073 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4074 	 * |////////////|	|	| ...	|	|///////|///////|
4075 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4076 	 * |////////////|	|	| ...	|	|///////|///////|
4077 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4078 	 * |		\_______chunksize * chunks_____/		|
4079 	 * \__________________________slabsize__________________________/
4080 	 *
4081 	 * For UMF_HASH caches, there is an additional source of overhead;
4082 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4083 	 * include those in our per-slab overhead.
4084 	 *
4085 	 * Once we have a number for the per-slab overhead, we estimate
4086 	 * the actual overhead by treating the malloc()ed buffers as if
4087 	 * they were densely packed:
4088 	 *
4089 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4090 	 *
4091 	 * carefully ordering the multiply before the divide, to avoid
4092 	 * round-off error.
4093 	 */
4094 	if (mi.um_malloc != 0) {
4095 		umem_slab_t slab;
4096 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4097 
4098 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4099 			mdb_warn("unable to read slab at %p\n", saddr);
4100 		} else {
4101 			long chunks = slab.slab_chunks;
4102 			if (chunks != 0 && c.cache_chunksize != 0 &&
4103 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4104 				uintmax_t perslab =
4105 				    c.cache_slabsize -
4106 				    (c.cache_chunksize * chunks);
4107 
4108 				if (c.cache_flags & UMF_HASH) {
4109 					perslab += sizeof (umem_slab_t) +
4110 					    chunks *
4111 					    ((c.cache_flags & UMF_AUDIT) ?
4112 					    sizeof (umem_bufctl_audit_t) :
4113 					    sizeof (umem_bufctl_t));
4114 				}
4115 				overhead +=
4116 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4117 			} else {
4118 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4119 				    chunks, saddr);
4120 			}
4121 		}
4122 	}
4123 
4124 	if (allocated != 0)
4125 		overhead_pct = (1000ULL * overhead) / allocated;
4126 	else
4127 		overhead_pct = 0;
4128 
4129 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4130 	    addr, c.cache_bufsize, maxmalloc,
4131 	    mi.um_malloc, avg_malloc, allocated, overhead,
4132 	    overhead_pct / 10, overhead_pct % 10);
4133 
4134 	if (!verbose)
4135 		return (DCMD_OK);
4136 
4137 	if (!dump)
4138 		mdb_printf("\n");
4139 
4140 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4141 		return (DCMD_ERR);
4142 
4143 	for (idx = 0; idx < num; idx++) {
4144 		if (alloc_sizes[idx] == c.cache_bufsize)
4145 			break;
4146 		if (alloc_sizes[idx] == 0) {
4147 			idx = num;	/* 0-terminated array */
4148 			break;
4149 		}
4150 	}
4151 	if (idx == num) {
4152 		mdb_warn(
4153 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4154 		    addr, c.cache_bufsize);
4155 		return (DCMD_ERR);
4156 	}
4157 
4158 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4159 	if (minmalloc > 0) {
4160 #ifdef _LP64
4161 		if (minmalloc > UMEM_SECOND_ALIGN)
4162 			minmalloc -= sizeof (struct malloc_data);
4163 #endif
4164 		minmalloc -= sizeof (struct malloc_data);
4165 		minmalloc += 1;
4166 	}
4167 
4168 	if (dump) {
4169 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4170 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4171 		mdb_printf("\n");
4172 	} else {
4173 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4174 		    maxbuckets, minbucketsize, geometric);
4175 	}
4176 
4177 	return (DCMD_OK);
4178 }
4179