xref: /titanic_52/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision 06e46062ef4f5f4b687cbafb4518fb123fe23920)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "umem.h"
30 
31 #include <sys/vmem_impl_user.h>
32 #include <umem_impl.h>
33 
34 #include <alloca.h>
35 
36 #include "misc.h"
37 
38 #include "umem_pagesize.h"
39 
40 #define	UM_ALLOCATED		0x1
41 #define	UM_FREE			0x2
42 #define	UM_BUFCTL		0x4
43 #define	UM_HASH			0x8
44 
45 uint32_t umem_max_ncpus;
46 
47 uint32_t umem_stack_depth;
48 size_t umem_pagesize;
49 
50 #define	UMEM_READVAR(var)				\
51 	(umem_readvar(&(var), #var) == -1 &&		\
52 	    ((void) mdb_warn("failed to read "#var), 1))
53 
54 int
55 umem_init(void)
56 {
57 	size_t pagesize;
58 	GElf_Sym sym;
59 
60 	/*
61 	 * Figure out which type of umem is being used
62 	 */
63 	if (mdb_lookup_by_obj("libumem.so.1", "umem_alloc", &sym) != 0)
64 		umem_set_standalone();
65 
66 	if (UMEM_READVAR(umem_max_ncpus))
67 		return (-1);
68 	if (UMEM_READVAR(umem_stack_depth))
69 		return (-1);
70 	if (UMEM_READVAR(pagesize))
71 		return (-1);
72 
73 	umem_pagesize = pagesize;
74 
75 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
76 		mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
77 		    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
78 		umem_stack_depth = 0;
79 	}
80 	return (0);
81 }
82 
83 /*ARGSUSED*/
84 int
85 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
86 {
87 	mdb_walker_t w;
88 	char descr[64];
89 
90 	(void) mdb_snprintf(descr, sizeof (descr),
91 	    "walk the %s cache", c->cache_name);
92 
93 	w.walk_name = c->cache_name;
94 	w.walk_descr = descr;
95 	w.walk_init = umem_walk_init;
96 	w.walk_step = umem_walk_step;
97 	w.walk_fini = umem_walk_fini;
98 	w.walk_init_arg = (void *)addr;
99 
100 	if (mdb_add_walker(&w) == -1)
101 		mdb_warn("failed to add %s walker", c->cache_name);
102 
103 	return (WALK_NEXT);
104 }
105 
106 int
107 umem_abort_messages(void)
108 {
109 	char *umem_error_buffer;
110 	uint_t umem_error_begin;
111 	GElf_Sym sym;
112 	size_t bufsize;
113 
114 	if (UMEM_READVAR(umem_error_begin))
115 		return (DCMD_ERR);
116 
117 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
118 		mdb_warn("unable to look up umem_error_buffer");
119 		return (DCMD_ERR);
120 	}
121 
122 	bufsize = (size_t)sym.st_size;
123 
124 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
125 
126 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
127 	    != bufsize) {
128 		mdb_warn("unable to read umem_error_buffer");
129 		return (DCMD_ERR);
130 	}
131 	/* put a zero after the end of the buffer to simplify printing */
132 	umem_error_buffer[bufsize] = 0;
133 
134 	if ((umem_error_begin % bufsize) == 0)
135 		mdb_printf("%s\n", umem_error_buffer);
136 	else {
137 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
138 		mdb_printf("%s%s\n",
139 		    &umem_error_buffer[umem_error_begin % bufsize],
140 		    umem_error_buffer);
141 	}
142 
143 	return (DCMD_OK);
144 }
145 
146 static void
147 umem_log_status(const char *name, umem_log_header_t *val)
148 {
149 	umem_log_header_t my_lh;
150 	uintptr_t pos = (uintptr_t)val;
151 	size_t size;
152 
153 	if (pos == NULL)
154 		return;
155 
156 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
157 		mdb_warn("\nunable to read umem_%s_log pointer %p",
158 		    name, pos);
159 		return;
160 	}
161 
162 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
163 
164 	if (size % (1024 * 1024) == 0)
165 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
166 	else if (size % 1024 == 0)
167 		mdb_printf("%s=%dk ", name, size / 1024);
168 	else
169 		mdb_printf("%s=%d ", name, size);
170 }
171 
172 typedef struct umem_debug_flags {
173 	const char	*udf_name;
174 	uint_t		udf_flags;
175 	uint_t		udf_clear;	/* if 0, uses udf_flags */
176 } umem_debug_flags_t;
177 
178 umem_debug_flags_t umem_status_flags[] = {
179 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
180 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
181 	{ "audit",	UMF_AUDIT },
182 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
183 	{ "nosignal",	UMF_CHECKSIGNAL },
184 	{ "firewall",	UMF_FIREWALL },
185 	{ "lite",	UMF_LITE },
186 	{ NULL }
187 };
188 
189 /*ARGSUSED*/
190 int
191 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
192 {
193 	int umem_ready;
194 	int umem_logging;
195 
196 	umem_log_header_t *umem_transaction_log;
197 	umem_log_header_t *umem_content_log;
198 	umem_log_header_t *umem_failure_log;
199 	umem_log_header_t *umem_slab_log;
200 
201 	if (UMEM_READVAR(umem_ready))
202 		goto err;
203 
204 	mdb_printf("Status:\t\t%s\n",
205 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
206 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
207 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
208 	    umem_ready == UMEM_READY ? "ready and active" :
209 	    "unknown (umem_ready invalid)");
210 
211 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
212 
213 	if (UMEM_READVAR(umem_logging))
214 		goto err;
215 	if (UMEM_READVAR(umem_transaction_log))
216 		goto err;
217 	if (UMEM_READVAR(umem_content_log))
218 		goto err;
219 	if (UMEM_READVAR(umem_failure_log))
220 		goto err;
221 	if (UMEM_READVAR(umem_slab_log))
222 		goto err;
223 
224 	mdb_printf("Logs:\t\t");
225 	umem_log_status("transaction", umem_transaction_log);
226 	umem_log_status("content", umem_content_log);
227 	umem_log_status("fail", umem_failure_log);
228 	umem_log_status("slab", umem_slab_log);
229 	if (!umem_logging)
230 		mdb_printf("(inactive)");
231 	mdb_printf("\n");
232 
233 	mdb_printf("Message buffer:\n");
234 	return (umem_abort_messages());
235 
236 err:
237 	mdb_printf("Message buffer:\n");
238 	(void) umem_abort_messages();
239 	return (DCMD_ERR);
240 }
241 
242 typedef struct {
243 	uintptr_t ucw_first;
244 	uintptr_t ucw_current;
245 } umem_cache_walk_t;
246 
247 int
248 umem_cache_walk_init(mdb_walk_state_t *wsp)
249 {
250 	umem_cache_walk_t *ucw;
251 	umem_cache_t c;
252 	uintptr_t cp;
253 	GElf_Sym sym;
254 
255 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
256 		mdb_warn("couldn't find umem_null_cache");
257 		return (WALK_ERR);
258 	}
259 
260 	cp = (uintptr_t)sym.st_value;
261 
262 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
263 		mdb_warn("couldn't read cache at %p", cp);
264 		return (WALK_ERR);
265 	}
266 
267 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
268 
269 	ucw->ucw_first = cp;
270 	ucw->ucw_current = (uintptr_t)c.cache_next;
271 	wsp->walk_data = ucw;
272 
273 	return (WALK_NEXT);
274 }
275 
276 int
277 umem_cache_walk_step(mdb_walk_state_t *wsp)
278 {
279 	umem_cache_walk_t *ucw = wsp->walk_data;
280 	umem_cache_t c;
281 	int status;
282 
283 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
284 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
285 		return (WALK_DONE);
286 	}
287 
288 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
289 
290 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
291 		return (WALK_DONE);
292 
293 	return (status);
294 }
295 
296 void
297 umem_cache_walk_fini(mdb_walk_state_t *wsp)
298 {
299 	umem_cache_walk_t *ucw = wsp->walk_data;
300 	mdb_free(ucw, sizeof (umem_cache_walk_t));
301 }
302 
303 typedef struct {
304 	umem_cpu_t *ucw_cpus;
305 	uint32_t ucw_current;
306 	uint32_t ucw_max;
307 } umem_cpu_walk_state_t;
308 
309 int
310 umem_cpu_walk_init(mdb_walk_state_t *wsp)
311 {
312 	umem_cpu_t *umem_cpus;
313 
314 	umem_cpu_walk_state_t *ucw;
315 
316 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
317 		mdb_warn("failed to read 'umem_cpus'");
318 		return (WALK_ERR);
319 	}
320 
321 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
322 
323 	ucw->ucw_cpus = umem_cpus;
324 	ucw->ucw_current = 0;
325 	ucw->ucw_max = umem_max_ncpus;
326 
327 	wsp->walk_data = ucw;
328 	return (WALK_NEXT);
329 }
330 
331 int
332 umem_cpu_walk_step(mdb_walk_state_t *wsp)
333 {
334 	umem_cpu_t cpu;
335 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
336 
337 	uintptr_t caddr;
338 
339 	if (ucw->ucw_current >= ucw->ucw_max)
340 		return (WALK_DONE);
341 
342 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
343 
344 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
345 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
346 		return (WALK_ERR);
347 	}
348 
349 	ucw->ucw_current++;
350 
351 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
352 }
353 
354 void
355 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
356 {
357 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
358 
359 	mdb_free(ucw, sizeof (*ucw));
360 }
361 
362 int
363 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
364 {
365 	if (wsp->walk_addr == NULL) {
366 		mdb_warn("umem_cpu_cache doesn't support global walks");
367 		return (WALK_ERR);
368 	}
369 
370 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
371 		mdb_warn("couldn't walk 'umem_cpu'");
372 		return (WALK_ERR);
373 	}
374 
375 	wsp->walk_data = (void *)wsp->walk_addr;
376 
377 	return (WALK_NEXT);
378 }
379 
380 int
381 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
382 {
383 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
384 	const umem_cpu_t *cpu = wsp->walk_layer;
385 	umem_cpu_cache_t cc;
386 
387 	caddr += cpu->cpu_cache_offset;
388 
389 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
390 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
391 		return (WALK_ERR);
392 	}
393 
394 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
395 }
396 
397 int
398 umem_slab_walk_init(mdb_walk_state_t *wsp)
399 {
400 	uintptr_t caddr = wsp->walk_addr;
401 	umem_cache_t c;
402 
403 	if (caddr == NULL) {
404 		mdb_warn("umem_slab doesn't support global walks\n");
405 		return (WALK_ERR);
406 	}
407 
408 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
409 		mdb_warn("couldn't read umem_cache at %p", caddr);
410 		return (WALK_ERR);
411 	}
412 
413 	wsp->walk_data =
414 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
415 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
416 
417 	return (WALK_NEXT);
418 }
419 
420 int
421 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
422 {
423 	uintptr_t caddr = wsp->walk_addr;
424 	umem_cache_t c;
425 
426 	if (caddr == NULL) {
427 		mdb_warn("umem_slab_partial doesn't support global walks\n");
428 		return (WALK_ERR);
429 	}
430 
431 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
432 		mdb_warn("couldn't read umem_cache at %p", caddr);
433 		return (WALK_ERR);
434 	}
435 
436 	wsp->walk_data =
437 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
438 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
439 
440 	/*
441 	 * Some consumers (umem_walk_step(), in particular) require at
442 	 * least one callback if there are any buffers in the cache.  So
443 	 * if there are *no* partial slabs, report the last full slab, if
444 	 * any.
445 	 *
446 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
447 	 */
448 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
449 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
450 
451 	return (WALK_NEXT);
452 }
453 
454 int
455 umem_slab_walk_step(mdb_walk_state_t *wsp)
456 {
457 	umem_slab_t s;
458 	uintptr_t addr = wsp->walk_addr;
459 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
460 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
461 
462 	if (addr == saddr)
463 		return (WALK_DONE);
464 
465 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
466 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
467 		return (WALK_ERR);
468 	}
469 
470 	if ((uintptr_t)s.slab_cache != caddr) {
471 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
472 		    addr, caddr, s.slab_cache);
473 		return (WALK_ERR);
474 	}
475 
476 	wsp->walk_addr = (uintptr_t)s.slab_next;
477 
478 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
479 }
480 
481 int
482 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
483 {
484 	umem_cache_t c;
485 
486 	if (!(flags & DCMD_ADDRSPEC)) {
487 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
488 			mdb_warn("can't walk umem_cache");
489 			return (DCMD_ERR);
490 		}
491 		return (DCMD_OK);
492 	}
493 
494 	if (DCMD_HDRSPEC(flags))
495 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
496 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
497 
498 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
499 		mdb_warn("couldn't read umem_cache at %p", addr);
500 		return (DCMD_ERR);
501 	}
502 
503 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
504 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
505 
506 	return (DCMD_OK);
507 }
508 
509 static int
510 addrcmp(const void *lhs, const void *rhs)
511 {
512 	uintptr_t p1 = *((uintptr_t *)lhs);
513 	uintptr_t p2 = *((uintptr_t *)rhs);
514 
515 	if (p1 < p2)
516 		return (-1);
517 	if (p1 > p2)
518 		return (1);
519 	return (0);
520 }
521 
522 static int
523 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
524 {
525 	const umem_bufctl_audit_t *bcp1 = *lhs;
526 	const umem_bufctl_audit_t *bcp2 = *rhs;
527 
528 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
529 		return (-1);
530 
531 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
532 		return (1);
533 
534 	return (0);
535 }
536 
537 typedef struct umem_hash_walk {
538 	uintptr_t *umhw_table;
539 	size_t umhw_nelems;
540 	size_t umhw_pos;
541 	umem_bufctl_t umhw_cur;
542 } umem_hash_walk_t;
543 
544 int
545 umem_hash_walk_init(mdb_walk_state_t *wsp)
546 {
547 	umem_hash_walk_t *umhw;
548 	uintptr_t *hash;
549 	umem_cache_t c;
550 	uintptr_t haddr, addr = wsp->walk_addr;
551 	size_t nelems;
552 	size_t hsize;
553 
554 	if (addr == NULL) {
555 		mdb_warn("umem_hash doesn't support global walks\n");
556 		return (WALK_ERR);
557 	}
558 
559 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
560 		mdb_warn("couldn't read cache at addr %p", addr);
561 		return (WALK_ERR);
562 	}
563 
564 	if (!(c.cache_flags & UMF_HASH)) {
565 		mdb_warn("cache %p doesn't have a hash table\n", addr);
566 		return (WALK_DONE);		/* nothing to do */
567 	}
568 
569 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
570 	umhw->umhw_cur.bc_next = NULL;
571 	umhw->umhw_pos = 0;
572 
573 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
574 	hsize = nelems * sizeof (uintptr_t);
575 	haddr = (uintptr_t)c.cache_hash_table;
576 
577 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
578 	if (mdb_vread(hash, hsize, haddr) == -1) {
579 		mdb_warn("failed to read hash table at %p", haddr);
580 		mdb_free(hash, hsize);
581 		mdb_free(umhw, sizeof (umem_hash_walk_t));
582 		return (WALK_ERR);
583 	}
584 
585 	wsp->walk_data = umhw;
586 
587 	return (WALK_NEXT);
588 }
589 
590 int
591 umem_hash_walk_step(mdb_walk_state_t *wsp)
592 {
593 	umem_hash_walk_t *umhw = wsp->walk_data;
594 	uintptr_t addr = NULL;
595 
596 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
597 		while (umhw->umhw_pos < umhw->umhw_nelems) {
598 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
599 				break;
600 		}
601 	}
602 	if (addr == NULL)
603 		return (WALK_DONE);
604 
605 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
606 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
607 		return (WALK_ERR);
608 	}
609 
610 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
611 }
612 
613 void
614 umem_hash_walk_fini(mdb_walk_state_t *wsp)
615 {
616 	umem_hash_walk_t *umhw = wsp->walk_data;
617 
618 	if (umhw == NULL)
619 		return;
620 
621 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
622 	mdb_free(umhw, sizeof (umem_hash_walk_t));
623 }
624 
625 /*
626  * Find the address of the bufctl structure for the address 'buf' in cache
627  * 'cp', which is at address caddr, and place it in *out.
628  */
629 static int
630 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
631 {
632 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
633 	umem_bufctl_t *bcp;
634 	umem_bufctl_t bc;
635 
636 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
637 		mdb_warn("unable to read hash bucket for %p in cache %p",
638 		    buf, caddr);
639 		return (-1);
640 	}
641 
642 	while (bcp != NULL) {
643 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
644 		    (uintptr_t)bcp) == -1) {
645 			mdb_warn("unable to read bufctl at %p", bcp);
646 			return (-1);
647 		}
648 		if (bc.bc_addr == buf) {
649 			*out = (uintptr_t)bcp;
650 			return (0);
651 		}
652 		bcp = bc.bc_next;
653 	}
654 
655 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
656 	return (-1);
657 }
658 
659 int
660 umem_get_magsize(const umem_cache_t *cp)
661 {
662 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
663 	GElf_Sym mt_sym;
664 	umem_magtype_t mt;
665 	int res;
666 
667 	/*
668 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
669 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
670 	 * it is okay to return 0 for them.
671 	 */
672 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
673 	    (cp->cache_flags & UMF_NOMAGAZINE))
674 		return (res);
675 
676 	if (mdb_lookup_by_name("umem_magtype", &mt_sym) == -1) {
677 		mdb_warn("unable to read 'umem_magtype'");
678 	} else if (addr < mt_sym.st_value ||
679 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
680 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
681 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
682 		    cp->cache_name, addr);
683 		return (0);
684 	}
685 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
686 		mdb_warn("unable to read magtype at %a", addr);
687 		return (0);
688 	}
689 	return (mt.mt_magsize);
690 }
691 
692 /*ARGSUSED*/
693 static int
694 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
695 {
696 	*est -= (sp->slab_chunks - sp->slab_refcnt);
697 
698 	return (WALK_NEXT);
699 }
700 
701 /*
702  * Returns an upper bound on the number of allocated buffers in a given
703  * cache.
704  */
705 size_t
706 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
707 {
708 	int magsize;
709 	size_t cache_est;
710 
711 	cache_est = cp->cache_buftotal;
712 
713 	(void) mdb_pwalk("umem_slab_partial",
714 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
715 
716 	if ((magsize = umem_get_magsize(cp)) != 0) {
717 		size_t mag_est = cp->cache_full.ml_total * magsize;
718 
719 		if (cache_est >= mag_est) {
720 			cache_est -= mag_est;
721 		} else {
722 			mdb_warn("cache %p's magazine layer holds more buffers "
723 			    "than the slab layer.\n", addr);
724 		}
725 	}
726 	return (cache_est);
727 }
728 
729 #define	READMAG_ROUNDS(rounds) { \
730 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
731 		mdb_warn("couldn't read magazine at %p", ump); \
732 		goto fail; \
733 	} \
734 	for (i = 0; i < rounds; i++) { \
735 		maglist[magcnt++] = mp->mag_round[i]; \
736 		if (magcnt == magmax) { \
737 			mdb_warn("%d magazines exceeds fudge factor\n", \
738 			    magcnt); \
739 			goto fail; \
740 		} \
741 	} \
742 }
743 
744 int
745 umem_read_magazines(umem_cache_t *cp, uintptr_t addr, int ncpus,
746     void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
747 {
748 	umem_magazine_t *ump, *mp;
749 	void **maglist = NULL;
750 	int i, cpu;
751 	size_t magsize, magmax, magbsize;
752 	size_t magcnt = 0;
753 
754 	/*
755 	 * Read the magtype out of the cache, after verifying the pointer's
756 	 * correctness.
757 	 */
758 	magsize = umem_get_magsize(cp);
759 	if (magsize == 0)
760 		magsize = 1;
761 
762 	/*
763 	 * There are several places where we need to go buffer hunting:
764 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
765 	 * and the full magazine list in the depot.
766 	 *
767 	 * For an upper bound on the number of buffers in the magazine
768 	 * layer, we have the number of magazines on the cache_full
769 	 * list plus at most two magazines per CPU (the loaded and the
770 	 * spare).  Toss in 100 magazines as a fudge factor in case this
771 	 * is live (the number "100" comes from the same fudge factor in
772 	 * crash(1M)).
773 	 */
774 	magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
775 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
776 
777 	if (magbsize >= PAGESIZE / 2) {
778 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
779 		    addr, magbsize);
780 		goto fail;
781 	}
782 
783 	maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
784 	mp = mdb_alloc(magbsize, alloc_flags);
785 	if (mp == NULL || maglist == NULL)
786 		goto fail;
787 
788 	/*
789 	 * First up: the magazines in the depot (i.e. on the cache_full list).
790 	 */
791 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
792 		READMAG_ROUNDS(magsize);
793 		ump = mp->mag_next;
794 
795 		if (ump == cp->cache_full.ml_list)
796 			break; /* cache_full list loop detected */
797 	}
798 
799 	dprintf(("cache_full list done\n"));
800 
801 	/*
802 	 * Now whip through the CPUs, snagging the loaded magazines
803 	 * and full spares.
804 	 */
805 	for (cpu = 0; cpu < ncpus; cpu++) {
806 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
807 
808 		dprintf(("reading cpu cache %p\n",
809 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
810 
811 		if (ccp->cc_rounds > 0 &&
812 		    (ump = ccp->cc_loaded) != NULL) {
813 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
814 			READMAG_ROUNDS(ccp->cc_rounds);
815 		}
816 
817 		if (ccp->cc_prounds > 0 &&
818 		    (ump = ccp->cc_ploaded) != NULL) {
819 			dprintf(("reading %d previously loaded rounds\n",
820 			    ccp->cc_prounds));
821 			READMAG_ROUNDS(ccp->cc_prounds);
822 		}
823 	}
824 
825 	dprintf(("magazine layer: %d buffers\n", magcnt));
826 
827 	if (!(alloc_flags & UM_GC))
828 		mdb_free(mp, magbsize);
829 
830 	*maglistp = maglist;
831 	*magcntp = magcnt;
832 	*magmaxp = magmax;
833 
834 	return (WALK_NEXT);
835 
836 fail:
837 	if (!(alloc_flags & UM_GC)) {
838 		if (mp)
839 			mdb_free(mp, magbsize);
840 		if (maglist)
841 			mdb_free(maglist, magmax * sizeof (void *));
842 	}
843 	return (WALK_ERR);
844 }
845 
846 static int
847 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
848 {
849 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
850 }
851 
852 static int
853 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
854 {
855 	umem_bufctl_audit_t *b;
856 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
857 
858 	/*
859 	 * if UMF_AUDIT is not set, we know that we're looking at a
860 	 * umem_bufctl_t.
861 	 */
862 	if (!(cp->cache_flags & UMF_AUDIT) ||
863 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
864 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
865 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
866 			mdb_warn("unable to read bufctl at %p", buf);
867 			return (WALK_ERR);
868 		}
869 	}
870 
871 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
872 }
873 
874 typedef struct umem_walk {
875 	int umw_type;
876 
877 	int umw_addr;			/* cache address */
878 	umem_cache_t *umw_cp;
879 	size_t umw_csize;
880 
881 	/*
882 	 * magazine layer
883 	 */
884 	void **umw_maglist;
885 	size_t umw_max;
886 	size_t umw_count;
887 	size_t umw_pos;
888 
889 	/*
890 	 * slab layer
891 	 */
892 	char *umw_valid;	/* to keep track of freed buffers */
893 	char *umw_ubase;	/* buffer for slab data */
894 } umem_walk_t;
895 
896 static int
897 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
898 {
899 	umem_walk_t *umw;
900 	int ncpus, csize;
901 	umem_cache_t *cp;
902 
903 	size_t magmax, magcnt;
904 	void **maglist = NULL;
905 	uint_t chunksize, slabsize;
906 	int status = WALK_ERR;
907 	uintptr_t addr = wsp->walk_addr;
908 	const char *layered;
909 
910 	type &= ~UM_HASH;
911 
912 	if (addr == NULL) {
913 		mdb_warn("umem walk doesn't support global walks\n");
914 		return (WALK_ERR);
915 	}
916 
917 	dprintf(("walking %p\n", addr));
918 
919 	/*
920 	 * First we need to figure out how many CPUs are configured in the
921 	 * system to know how much to slurp out.
922 	 */
923 	umem_readvar(&ncpus, "umem_max_ncpus");
924 
925 	csize = UMEM_CACHE_SIZE(ncpus);
926 	cp = mdb_alloc(csize, UM_SLEEP);
927 
928 	if (mdb_vread(cp, csize, addr) == -1) {
929 		mdb_warn("couldn't read cache at addr %p", addr);
930 		goto out2;
931 	}
932 
933 	dprintf(("buf total is %d\n", cp->cache_buftotal));
934 
935 	if (cp->cache_buftotal == 0) {
936 		mdb_free(cp, csize);
937 		return (WALK_DONE);
938 	}
939 
940 	/*
941 	 * If they ask for bufctls, but it's a small-slab cache,
942 	 * there is nothing to report.
943 	 */
944 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
945 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
946 		    cp->cache_flags));
947 		mdb_free(cp, csize);
948 		return (WALK_DONE);
949 	}
950 
951 	/*
952 	 * Read in the contents of the magazine layer
953 	 */
954 	if (umem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
955 	    &magmax, UM_SLEEP) == WALK_ERR)
956 		goto out2;
957 
958 	/*
959 	 * We have all of the buffers from the magazines;  if we are walking
960 	 * allocated buffers, sort them so we can bsearch them later.
961 	 */
962 	if (type & UM_ALLOCATED)
963 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
964 
965 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
966 
967 	umw->umw_type = type;
968 	umw->umw_addr = addr;
969 	umw->umw_cp = cp;
970 	umw->umw_csize = csize;
971 	umw->umw_maglist = maglist;
972 	umw->umw_max = magmax;
973 	umw->umw_count = magcnt;
974 	umw->umw_pos = 0;
975 
976 	/*
977 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
978 	 * hash table instead of the slab layer.
979 	 */
980 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
981 		layered = "umem_hash";
982 
983 		umw->umw_type |= UM_HASH;
984 	} else {
985 		/*
986 		 * If we are walking freed buffers, we only need the
987 		 * magazine layer plus the partially allocated slabs.
988 		 * To walk allocated buffers, we need all of the slabs.
989 		 */
990 		if (type & UM_ALLOCATED)
991 			layered = "umem_slab";
992 		else
993 			layered = "umem_slab_partial";
994 
995 		/*
996 		 * for small-slab caches, we read in the entire slab.  For
997 		 * freed buffers, we can just walk the freelist.  For
998 		 * allocated buffers, we use a 'valid' array to track
999 		 * the freed buffers.
1000 		 */
1001 		if (!(cp->cache_flags & UMF_HASH)) {
1002 			chunksize = cp->cache_chunksize;
1003 			slabsize = cp->cache_slabsize;
1004 
1005 			umw->umw_ubase = mdb_alloc(slabsize +
1006 			    sizeof (umem_bufctl_t), UM_SLEEP);
1007 
1008 			if (type & UM_ALLOCATED)
1009 				umw->umw_valid =
1010 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1011 		}
1012 	}
1013 
1014 	status = WALK_NEXT;
1015 
1016 	if (mdb_layered_walk(layered, wsp) == -1) {
1017 		mdb_warn("unable to start layered '%s' walk", layered);
1018 		status = WALK_ERR;
1019 	}
1020 
1021 out1:
1022 	if (status == WALK_ERR) {
1023 		if (umw->umw_valid)
1024 			mdb_free(umw->umw_valid, slabsize / chunksize);
1025 
1026 		if (umw->umw_ubase)
1027 			mdb_free(umw->umw_ubase, slabsize +
1028 			    sizeof (umem_bufctl_t));
1029 
1030 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (uintptr_t));
1031 		mdb_free(umw, sizeof (umem_walk_t));
1032 		wsp->walk_data = NULL;
1033 	}
1034 
1035 out2:
1036 	if (status == WALK_ERR)
1037 		mdb_free(cp, csize);
1038 
1039 	return (status);
1040 }
1041 
1042 int
1043 umem_walk_step(mdb_walk_state_t *wsp)
1044 {
1045 	umem_walk_t *umw = wsp->walk_data;
1046 	int type = umw->umw_type;
1047 	umem_cache_t *cp = umw->umw_cp;
1048 
1049 	void **maglist = umw->umw_maglist;
1050 	int magcnt = umw->umw_count;
1051 
1052 	uintptr_t chunksize, slabsize;
1053 	uintptr_t addr;
1054 	const umem_slab_t *sp;
1055 	const umem_bufctl_t *bcp;
1056 	umem_bufctl_t bc;
1057 
1058 	int chunks;
1059 	char *kbase;
1060 	void *buf;
1061 	int i, ret;
1062 
1063 	char *valid, *ubase;
1064 
1065 	/*
1066 	 * first, handle the 'umem_hash' layered walk case
1067 	 */
1068 	if (type & UM_HASH) {
1069 		/*
1070 		 * We have a buffer which has been allocated out of the
1071 		 * global layer. We need to make sure that it's not
1072 		 * actually sitting in a magazine before we report it as
1073 		 * an allocated buffer.
1074 		 */
1075 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1076 
1077 		if (magcnt > 0 &&
1078 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1079 		    addrcmp) != NULL)
1080 			return (WALK_NEXT);
1081 
1082 		if (type & UM_BUFCTL)
1083 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1084 
1085 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1086 	}
1087 
1088 	ret = WALK_NEXT;
1089 
1090 	addr = umw->umw_addr;
1091 
1092 	/*
1093 	 * If we're walking freed buffers, report everything in the
1094 	 * magazine layer before processing the first slab.
1095 	 */
1096 	if ((type & UM_FREE) && magcnt != 0) {
1097 		umw->umw_count = 0;		/* only do this once */
1098 		for (i = 0; i < magcnt; i++) {
1099 			buf = maglist[i];
1100 
1101 			if (type & UM_BUFCTL) {
1102 				uintptr_t out;
1103 
1104 				if (cp->cache_flags & UMF_BUFTAG) {
1105 					umem_buftag_t *btp;
1106 					umem_buftag_t tag;
1107 
1108 					/* LINTED - alignment */
1109 					btp = UMEM_BUFTAG(cp, buf);
1110 					if (mdb_vread(&tag, sizeof (tag),
1111 					    (uintptr_t)btp) == -1) {
1112 						mdb_warn("reading buftag for "
1113 						    "%p at %p", buf, btp);
1114 						continue;
1115 					}
1116 					out = (uintptr_t)tag.bt_bufctl;
1117 				} else {
1118 					if (umem_hash_lookup(cp, addr, buf,
1119 					    &out) == -1)
1120 						continue;
1121 				}
1122 				ret = bufctl_walk_callback(cp, wsp, out);
1123 			} else {
1124 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1125 			}
1126 
1127 			if (ret != WALK_NEXT)
1128 				return (ret);
1129 		}
1130 	}
1131 
1132 	/*
1133 	 * Handle the buffers in the current slab
1134 	 */
1135 	chunksize = cp->cache_chunksize;
1136 	slabsize = cp->cache_slabsize;
1137 
1138 	sp = wsp->walk_layer;
1139 	chunks = sp->slab_chunks;
1140 	kbase = sp->slab_base;
1141 
1142 	dprintf(("kbase is %p\n", kbase));
1143 
1144 	if (!(cp->cache_flags & UMF_HASH)) {
1145 		valid = umw->umw_valid;
1146 		ubase = umw->umw_ubase;
1147 
1148 		if (mdb_vread(ubase, chunks * chunksize,
1149 		    (uintptr_t)kbase) == -1) {
1150 			mdb_warn("failed to read slab contents at %p", kbase);
1151 			return (WALK_ERR);
1152 		}
1153 
1154 		/*
1155 		 * Set up the valid map as fully allocated -- we'll punch
1156 		 * out the freelist.
1157 		 */
1158 		if (type & UM_ALLOCATED)
1159 			(void) memset(valid, 1, chunks);
1160 	} else {
1161 		valid = NULL;
1162 		ubase = NULL;
1163 	}
1164 
1165 	/*
1166 	 * walk the slab's freelist
1167 	 */
1168 	bcp = sp->slab_head;
1169 
1170 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1171 
1172 	/*
1173 	 * since we could be in the middle of allocating a buffer,
1174 	 * our refcnt could be one higher than it aught.  So we
1175 	 * check one further on the freelist than the count allows.
1176 	 */
1177 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1178 		uint_t ndx;
1179 
1180 		dprintf(("bcp is %p\n", bcp));
1181 
1182 		if (bcp == NULL) {
1183 			if (i == chunks)
1184 				break;
1185 			mdb_warn(
1186 			    "slab %p in cache %p freelist too short by %d\n",
1187 			    sp, addr, chunks - i);
1188 			break;
1189 		}
1190 
1191 		if (cp->cache_flags & UMF_HASH) {
1192 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1193 				mdb_warn("failed to read bufctl ptr at %p",
1194 				    bcp);
1195 				break;
1196 			}
1197 			buf = bc.bc_addr;
1198 		} else {
1199 			/*
1200 			 * Otherwise the buffer is in the slab which
1201 			 * we've read in;  we just need to determine
1202 			 * its offset in the slab to find the
1203 			 * umem_bufctl_t.
1204 			 */
1205 			bc = *((umem_bufctl_t *)
1206 			    ((uintptr_t)bcp - (uintptr_t)kbase +
1207 			    (uintptr_t)ubase));
1208 
1209 			buf = UMEM_BUF(cp, bcp);
1210 		}
1211 
1212 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1213 
1214 		if (ndx > slabsize / cp->cache_bufsize) {
1215 			/*
1216 			 * This is very wrong; we have managed to find
1217 			 * a buffer in the slab which shouldn't
1218 			 * actually be here.  Emit a warning, and
1219 			 * try to continue.
1220 			 */
1221 			mdb_warn("buf %p is out of range for "
1222 			    "slab %p, cache %p\n", buf, sp, addr);
1223 		} else if (type & UM_ALLOCATED) {
1224 			/*
1225 			 * we have found a buffer on the slab's freelist;
1226 			 * clear its entry
1227 			 */
1228 			valid[ndx] = 0;
1229 		} else {
1230 			/*
1231 			 * Report this freed buffer
1232 			 */
1233 			if (type & UM_BUFCTL) {
1234 				ret = bufctl_walk_callback(cp, wsp,
1235 				    (uintptr_t)bcp);
1236 			} else {
1237 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1238 			}
1239 			if (ret != WALK_NEXT)
1240 				return (ret);
1241 		}
1242 
1243 		bcp = bc.bc_next;
1244 	}
1245 
1246 	if (bcp != NULL) {
1247 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1248 		    sp, addr, bcp));
1249 	}
1250 
1251 	/*
1252 	 * If we are walking freed buffers, the loop above handled reporting
1253 	 * them.
1254 	 */
1255 	if (type & UM_FREE)
1256 		return (WALK_NEXT);
1257 
1258 	if (type & UM_BUFCTL) {
1259 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1260 		    "cache %p\n", addr);
1261 		return (WALK_ERR);
1262 	}
1263 
1264 	/*
1265 	 * Report allocated buffers, skipping buffers in the magazine layer.
1266 	 * We only get this far for small-slab caches.
1267 	 */
1268 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1269 		buf = (char *)kbase + i * chunksize;
1270 
1271 		if (!valid[i])
1272 			continue;		/* on slab freelist */
1273 
1274 		if (magcnt > 0 &&
1275 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1276 		    addrcmp) != NULL)
1277 			continue;		/* in magazine layer */
1278 
1279 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1280 	}
1281 	return (ret);
1282 }
1283 
1284 void
1285 umem_walk_fini(mdb_walk_state_t *wsp)
1286 {
1287 	umem_walk_t *umw = wsp->walk_data;
1288 	uintptr_t chunksize;
1289 	uintptr_t slabsize;
1290 
1291 	if (umw == NULL)
1292 		return;
1293 
1294 	if (umw->umw_maglist != NULL)
1295 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1296 
1297 	chunksize = umw->umw_cp->cache_chunksize;
1298 	slabsize = umw->umw_cp->cache_slabsize;
1299 
1300 	if (umw->umw_valid != NULL)
1301 		mdb_free(umw->umw_valid, slabsize / chunksize);
1302 	if (umw->umw_ubase != NULL)
1303 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1304 
1305 	mdb_free(umw->umw_cp, umw->umw_csize);
1306 	mdb_free(umw, sizeof (umem_walk_t));
1307 }
1308 
1309 /*ARGSUSED*/
1310 static int
1311 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1312 {
1313 	/*
1314 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1315 	 * memory in other caches.  This can be a little confusing, so we
1316 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1317 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1318 	 */
1319 	if (c->cache_cflags & UMC_NOTOUCH)
1320 		return (WALK_NEXT);
1321 
1322 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1323 	    wsp->walk_cbdata, addr) == -1)
1324 		return (WALK_DONE);
1325 
1326 	return (WALK_NEXT);
1327 }
1328 
1329 #define	UMEM_WALK_ALL(name, wsp) { \
1330 	wsp->walk_data = (name); \
1331 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1332 		return (WALK_ERR); \
1333 	return (WALK_DONE); \
1334 }
1335 
1336 int
1337 umem_walk_init(mdb_walk_state_t *wsp)
1338 {
1339 	if (wsp->walk_arg != NULL)
1340 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1341 
1342 	if (wsp->walk_addr == NULL)
1343 		UMEM_WALK_ALL("umem", wsp);
1344 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1345 }
1346 
1347 int
1348 bufctl_walk_init(mdb_walk_state_t *wsp)
1349 {
1350 	if (wsp->walk_addr == NULL)
1351 		UMEM_WALK_ALL("bufctl", wsp);
1352 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1353 }
1354 
1355 int
1356 freemem_walk_init(mdb_walk_state_t *wsp)
1357 {
1358 	if (wsp->walk_addr == NULL)
1359 		UMEM_WALK_ALL("freemem", wsp);
1360 	return (umem_walk_init_common(wsp, UM_FREE));
1361 }
1362 
1363 int
1364 freectl_walk_init(mdb_walk_state_t *wsp)
1365 {
1366 	if (wsp->walk_addr == NULL)
1367 		UMEM_WALK_ALL("freectl", wsp);
1368 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1369 }
1370 
1371 typedef struct bufctl_history_walk {
1372 	void		*bhw_next;
1373 	umem_cache_t	*bhw_cache;
1374 	umem_slab_t	*bhw_slab;
1375 	hrtime_t	bhw_timestamp;
1376 } bufctl_history_walk_t;
1377 
1378 int
1379 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1380 {
1381 	bufctl_history_walk_t *bhw;
1382 	umem_bufctl_audit_t bc;
1383 	umem_bufctl_audit_t bcn;
1384 
1385 	if (wsp->walk_addr == NULL) {
1386 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1387 		return (WALK_ERR);
1388 	}
1389 
1390 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1391 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1392 		return (WALK_ERR);
1393 	}
1394 
1395 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1396 	bhw->bhw_timestamp = 0;
1397 	bhw->bhw_cache = bc.bc_cache;
1398 	bhw->bhw_slab = bc.bc_slab;
1399 
1400 	/*
1401 	 * sometimes the first log entry matches the base bufctl;  in that
1402 	 * case, skip the base bufctl.
1403 	 */
1404 	if (bc.bc_lastlog != NULL &&
1405 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1406 	    bc.bc_addr == bcn.bc_addr &&
1407 	    bc.bc_cache == bcn.bc_cache &&
1408 	    bc.bc_slab == bcn.bc_slab &&
1409 	    bc.bc_timestamp == bcn.bc_timestamp &&
1410 	    bc.bc_thread == bcn.bc_thread)
1411 		bhw->bhw_next = bc.bc_lastlog;
1412 	else
1413 		bhw->bhw_next = (void *)wsp->walk_addr;
1414 
1415 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1416 	wsp->walk_data = bhw;
1417 
1418 	return (WALK_NEXT);
1419 }
1420 
1421 int
1422 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1423 {
1424 	bufctl_history_walk_t *bhw = wsp->walk_data;
1425 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1426 	uintptr_t baseaddr = wsp->walk_addr;
1427 	umem_bufctl_audit_t *b;
1428 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1429 
1430 	if (addr == NULL)
1431 		return (WALK_DONE);
1432 
1433 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1434 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1435 		return (WALK_ERR);
1436 	}
1437 
1438 	/*
1439 	 * The bufctl is only valid if the address, cache, and slab are
1440 	 * correct.  We also check that the timestamp is decreasing, to
1441 	 * prevent infinite loops.
1442 	 */
1443 	if ((uintptr_t)b->bc_addr != baseaddr ||
1444 	    b->bc_cache != bhw->bhw_cache ||
1445 	    b->bc_slab != bhw->bhw_slab ||
1446 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1447 		return (WALK_DONE);
1448 
1449 	bhw->bhw_next = b->bc_lastlog;
1450 	bhw->bhw_timestamp = b->bc_timestamp;
1451 
1452 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1453 }
1454 
1455 void
1456 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1457 {
1458 	bufctl_history_walk_t *bhw = wsp->walk_data;
1459 
1460 	mdb_free(bhw, sizeof (*bhw));
1461 }
1462 
1463 typedef struct umem_log_walk {
1464 	umem_bufctl_audit_t *ulw_base;
1465 	umem_bufctl_audit_t **ulw_sorted;
1466 	umem_log_header_t ulw_lh;
1467 	size_t ulw_size;
1468 	size_t ulw_maxndx;
1469 	size_t ulw_ndx;
1470 } umem_log_walk_t;
1471 
1472 int
1473 umem_log_walk_init(mdb_walk_state_t *wsp)
1474 {
1475 	uintptr_t lp = wsp->walk_addr;
1476 	umem_log_walk_t *ulw;
1477 	umem_log_header_t *lhp;
1478 	int maxndx, i, j, k;
1479 
1480 	/*
1481 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1482 	 * read the log whose umem_log_header_t is stored at walk_addr.
1483 	 */
1484 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1485 		mdb_warn("failed to read 'umem_transaction_log'");
1486 		return (WALK_ERR);
1487 	}
1488 
1489 	if (lp == NULL) {
1490 		mdb_warn("log is disabled\n");
1491 		return (WALK_ERR);
1492 	}
1493 
1494 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1495 	lhp = &ulw->ulw_lh;
1496 
1497 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1498 		mdb_warn("failed to read log header at %p", lp);
1499 		mdb_free(ulw, sizeof (umem_log_walk_t));
1500 		return (WALK_ERR);
1501 	}
1502 
1503 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1504 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1505 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1506 
1507 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1508 	    (uintptr_t)lhp->lh_base) == -1) {
1509 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1510 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1511 		mdb_free(ulw, sizeof (umem_log_walk_t));
1512 		return (WALK_ERR);
1513 	}
1514 
1515 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1516 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1517 
1518 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1519 		caddr_t chunk = (caddr_t)
1520 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1521 
1522 		for (j = 0; j < maxndx; j++) {
1523 			/* LINTED align */
1524 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1525 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1526 		}
1527 	}
1528 
1529 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1530 	    (int(*)(const void *, const void *))bufctlcmp);
1531 
1532 	ulw->ulw_maxndx = k;
1533 	wsp->walk_data = ulw;
1534 
1535 	return (WALK_NEXT);
1536 }
1537 
1538 int
1539 umem_log_walk_step(mdb_walk_state_t *wsp)
1540 {
1541 	umem_log_walk_t *ulw = wsp->walk_data;
1542 	umem_bufctl_audit_t *bcp;
1543 
1544 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1545 		return (WALK_DONE);
1546 
1547 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1548 
1549 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1550 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1551 }
1552 
1553 void
1554 umem_log_walk_fini(mdb_walk_state_t *wsp)
1555 {
1556 	umem_log_walk_t *ulw = wsp->walk_data;
1557 
1558 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1559 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1560 	    sizeof (umem_bufctl_audit_t *));
1561 	mdb_free(ulw, sizeof (umem_log_walk_t));
1562 }
1563 
1564 typedef struct allocdby_bufctl {
1565 	uintptr_t abb_addr;
1566 	hrtime_t abb_ts;
1567 } allocdby_bufctl_t;
1568 
1569 typedef struct allocdby_walk {
1570 	const char *abw_walk;
1571 	uintptr_t abw_thread;
1572 	size_t abw_nbufs;
1573 	size_t abw_size;
1574 	allocdby_bufctl_t *abw_buf;
1575 	size_t abw_ndx;
1576 } allocdby_walk_t;
1577 
1578 int
1579 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1580     allocdby_walk_t *abw)
1581 {
1582 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1583 		return (WALK_NEXT);
1584 
1585 	if (abw->abw_nbufs == abw->abw_size) {
1586 		allocdby_bufctl_t *buf;
1587 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1588 
1589 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1590 
1591 		bcopy(abw->abw_buf, buf, oldsize);
1592 		mdb_free(abw->abw_buf, oldsize);
1593 
1594 		abw->abw_size <<= 1;
1595 		abw->abw_buf = buf;
1596 	}
1597 
1598 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1599 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1600 	abw->abw_nbufs++;
1601 
1602 	return (WALK_NEXT);
1603 }
1604 
1605 /*ARGSUSED*/
1606 int
1607 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1608 {
1609 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1610 	    abw, addr) == -1) {
1611 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1612 		return (WALK_DONE);
1613 	}
1614 
1615 	return (WALK_NEXT);
1616 }
1617 
1618 static int
1619 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1620 {
1621 	if (lhs->abb_ts < rhs->abb_ts)
1622 		return (1);
1623 	if (lhs->abb_ts > rhs->abb_ts)
1624 		return (-1);
1625 	return (0);
1626 }
1627 
1628 static int
1629 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1630 {
1631 	allocdby_walk_t *abw;
1632 
1633 	if (wsp->walk_addr == NULL) {
1634 		mdb_warn("allocdby walk doesn't support global walks\n");
1635 		return (WALK_ERR);
1636 	}
1637 
1638 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1639 
1640 	abw->abw_thread = wsp->walk_addr;
1641 	abw->abw_walk = walk;
1642 	abw->abw_size = 128;	/* something reasonable */
1643 	abw->abw_buf =
1644 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1645 
1646 	wsp->walk_data = abw;
1647 
1648 	if (mdb_walk("umem_cache",
1649 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1650 		mdb_warn("couldn't walk umem_cache");
1651 		allocdby_walk_fini(wsp);
1652 		return (WALK_ERR);
1653 	}
1654 
1655 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1656 	    (int(*)(const void *, const void *))allocdby_cmp);
1657 
1658 	return (WALK_NEXT);
1659 }
1660 
1661 int
1662 allocdby_walk_init(mdb_walk_state_t *wsp)
1663 {
1664 	return (allocdby_walk_init_common(wsp, "bufctl"));
1665 }
1666 
1667 int
1668 freedby_walk_init(mdb_walk_state_t *wsp)
1669 {
1670 	return (allocdby_walk_init_common(wsp, "freectl"));
1671 }
1672 
1673 int
1674 allocdby_walk_step(mdb_walk_state_t *wsp)
1675 {
1676 	allocdby_walk_t *abw = wsp->walk_data;
1677 	uintptr_t addr;
1678 	umem_bufctl_audit_t *bcp;
1679 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1680 
1681 	if (abw->abw_ndx == abw->abw_nbufs)
1682 		return (WALK_DONE);
1683 
1684 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1685 
1686 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1687 		mdb_warn("couldn't read bufctl at %p", addr);
1688 		return (WALK_DONE);
1689 	}
1690 
1691 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1692 }
1693 
1694 void
1695 allocdby_walk_fini(mdb_walk_state_t *wsp)
1696 {
1697 	allocdby_walk_t *abw = wsp->walk_data;
1698 
1699 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1700 	mdb_free(abw, sizeof (allocdby_walk_t));
1701 }
1702 
1703 /*ARGSUSED*/
1704 int
1705 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1706 {
1707 	char c[MDB_SYM_NAMLEN];
1708 	GElf_Sym sym;
1709 	int i;
1710 
1711 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1712 	for (i = 0; i < bcp->bc_depth; i++) {
1713 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1714 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1715 			continue;
1716 		if (is_umem_sym(c, "umem_"))
1717 			continue;
1718 		mdb_printf("%s+0x%lx",
1719 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1720 		break;
1721 	}
1722 	mdb_printf("\n");
1723 
1724 	return (WALK_NEXT);
1725 }
1726 
1727 static int
1728 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1729 {
1730 	if (!(flags & DCMD_ADDRSPEC))
1731 		return (DCMD_USAGE);
1732 
1733 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1734 
1735 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1736 		mdb_warn("can't walk '%s' for %p", w, addr);
1737 		return (DCMD_ERR);
1738 	}
1739 
1740 	return (DCMD_OK);
1741 }
1742 
1743 /*ARGSUSED*/
1744 int
1745 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1746 {
1747 	return (allocdby_common(addr, flags, "allocdby"));
1748 }
1749 
1750 /*ARGSUSED*/
1751 int
1752 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1753 {
1754 	return (allocdby_common(addr, flags, "freedby"));
1755 }
1756 
1757 typedef struct whatis {
1758 	uintptr_t w_addr;
1759 	const umem_cache_t *w_cache;
1760 	const vmem_t *w_vmem;
1761 	int w_found;
1762 	uint_t w_verbose;
1763 	uint_t w_freemem;
1764 	uint_t w_all;
1765 	uint_t w_bufctl;
1766 } whatis_t;
1767 
1768 static void
1769 whatis_print_umem(uintptr_t addr, uintptr_t baddr, whatis_t *w)
1770 {
1771 	/* LINTED pointer cast may result in improper alignment */
1772 	uintptr_t btaddr = (uintptr_t)UMEM_BUFTAG(w->w_cache, addr);
1773 	intptr_t stat;
1774 
1775 	if (w->w_cache->cache_flags & UMF_REDZONE) {
1776 		umem_buftag_t bt;
1777 
1778 		if (mdb_vread(&bt, sizeof (bt), btaddr) == -1)
1779 			goto done;
1780 
1781 		stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
1782 
1783 		if (stat != UMEM_BUFTAG_ALLOC && stat != UMEM_BUFTAG_FREE)
1784 			goto done;
1785 
1786 		/*
1787 		 * provide the bufctl ptr if it has useful information
1788 		 */
1789 		if (baddr == 0 && (w->w_cache->cache_flags & UMF_AUDIT))
1790 			baddr = (uintptr_t)bt.bt_bufctl;
1791 	}
1792 
1793 done:
1794 	if (baddr == 0)
1795 		mdb_printf("%p is %p+%p, %s from %s\n",
1796 		    w->w_addr, addr, w->w_addr - addr,
1797 		    w->w_freemem == FALSE ? "allocated" : "freed",
1798 		    w->w_cache->cache_name);
1799 	else
1800 		mdb_printf("%p is %p+%p, bufctl %p %s from %s\n",
1801 		    w->w_addr, addr, w->w_addr - addr, baddr,
1802 		    w->w_freemem == FALSE ? "allocated" : "freed",
1803 		    w->w_cache->cache_name);
1804 }
1805 
1806 /*ARGSUSED*/
1807 static int
1808 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_t *w)
1809 {
1810 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1811 		return (WALK_NEXT);
1812 
1813 	whatis_print_umem(addr, 0, w);
1814 	w->w_found++;
1815 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1816 }
1817 
1818 static int
1819 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w)
1820 {
1821 	if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end)
1822 		return (WALK_NEXT);
1823 
1824 	mdb_printf("%p is %p+%p ", w->w_addr,
1825 	    vs->vs_start, w->w_addr - vs->vs_start);
1826 
1827 	/*
1828 	 * Always provide the vmem_seg pointer if it has a stack trace.
1829 	 */
1830 	if (w->w_bufctl == TRUE ||
1831 	    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) {
1832 		mdb_printf("(vmem_seg %p) ", addr);
1833 	}
1834 
1835 	mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ?
1836 	    "freed " : "", w->w_vmem->vm_name);
1837 
1838 	w->w_found++;
1839 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1840 }
1841 
1842 static int
1843 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w)
1844 {
1845 	const char *nm = vmem->vm_name;
1846 	w->w_vmem = vmem;
1847 	w->w_freemem = FALSE;
1848 
1849 	if (w->w_verbose)
1850 		mdb_printf("Searching vmem arena %s...\n", nm);
1851 
1852 	if (mdb_pwalk("vmem_alloc",
1853 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1854 		mdb_warn("can't walk vmem seg for %p", addr);
1855 		return (WALK_NEXT);
1856 	}
1857 
1858 	if (w->w_found && w->w_all == FALSE)
1859 		return (WALK_DONE);
1860 
1861 	if (w->w_verbose)
1862 		mdb_printf("Searching vmem arena %s for free virtual...\n", nm);
1863 
1864 	w->w_freemem = TRUE;
1865 
1866 	if (mdb_pwalk("vmem_free",
1867 	    (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) {
1868 		mdb_warn("can't walk vmem seg for %p", addr);
1869 		return (WALK_NEXT);
1870 	}
1871 
1872 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1873 }
1874 
1875 /*ARGSUSED*/
1876 static int
1877 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_t *w)
1878 {
1879 	uintptr_t addr;
1880 
1881 	if (bcp == NULL)
1882 		return (WALK_NEXT);
1883 
1884 	addr = (uintptr_t)bcp->bc_addr;
1885 
1886 	if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize)
1887 		return (WALK_NEXT);
1888 
1889 	whatis_print_umem(addr, baddr, w);
1890 	w->w_found++;
1891 	return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE);
1892 }
1893 
1894 static int
1895 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
1896 {
1897 	char *walk, *freewalk;
1898 	mdb_walk_cb_t func;
1899 
1900 	if (w->w_bufctl == FALSE) {
1901 		walk = "umem";
1902 		freewalk = "freemem";
1903 		func = (mdb_walk_cb_t)whatis_walk_umem;
1904 	} else {
1905 		walk = "bufctl";
1906 		freewalk = "freectl";
1907 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
1908 	}
1909 
1910 	if (w->w_verbose)
1911 		mdb_printf("Searching %s...\n", c->cache_name);
1912 
1913 	w->w_cache = c;
1914 	w->w_freemem = FALSE;
1915 
1916 	if (mdb_pwalk(walk, func, w, addr) == -1) {
1917 		mdb_warn("can't find %s walker", walk);
1918 		return (WALK_DONE);
1919 	}
1920 
1921 	if (w->w_found && w->w_all == FALSE)
1922 		return (WALK_DONE);
1923 
1924 	/*
1925 	 * We have searched for allocated memory; now search for freed memory.
1926 	 */
1927 	if (w->w_verbose)
1928 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
1929 
1930 	w->w_freemem = TRUE;
1931 
1932 	if (mdb_pwalk(freewalk, func, w, addr) == -1) {
1933 		mdb_warn("can't find %s walker", freewalk);
1934 		return (WALK_DONE);
1935 	}
1936 
1937 	return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT);
1938 }
1939 
1940 static int
1941 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
1942 {
1943 	if (c->cache_cflags & UMC_NOTOUCH)
1944 		return (WALK_NEXT);
1945 
1946 	return (whatis_walk_cache(addr, c, w));
1947 }
1948 
1949 static int
1950 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_t *w)
1951 {
1952 	if (!(c->cache_cflags & UMC_NOTOUCH))
1953 		return (WALK_NEXT);
1954 
1955 	return (whatis_walk_cache(addr, c, w));
1956 }
1957 
1958 int
1959 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1960 {
1961 	whatis_t w;
1962 
1963 	if (!(flags & DCMD_ADDRSPEC))
1964 		return (DCMD_USAGE);
1965 
1966 	w.w_verbose = FALSE;
1967 	w.w_bufctl = FALSE;
1968 	w.w_all = FALSE;
1969 
1970 	if (mdb_getopts(argc, argv,
1971 	    'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose,
1972 	    'a', MDB_OPT_SETBITS, TRUE, &w.w_all,
1973 	    'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc)
1974 		return (DCMD_USAGE);
1975 
1976 	w.w_addr = addr;
1977 	w.w_found = 0;
1978 
1979 	/*
1980 	 * Mappings and threads should eventually be added here.
1981 	 */
1982 	if (mdb_walk("umem_cache",
1983 	    (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) {
1984 		mdb_warn("couldn't find umem_cache walker");
1985 		return (DCMD_ERR);
1986 	}
1987 
1988 	if (w.w_found && w.w_all == FALSE)
1989 		return (DCMD_OK);
1990 
1991 	if (mdb_walk("umem_cache",
1992 	    (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) {
1993 		mdb_warn("couldn't find umem_cache walker");
1994 		return (DCMD_ERR);
1995 	}
1996 
1997 	if (w.w_found && w.w_all == FALSE)
1998 		return (DCMD_OK);
1999 
2000 	if (mdb_walk("vmem_postfix",
2001 	    (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) {
2002 		mdb_warn("couldn't find vmem_postfix walker");
2003 		return (DCMD_ERR);
2004 	}
2005 
2006 	if (w.w_found == 0)
2007 		mdb_printf("%p is unknown\n", addr);
2008 
2009 	return (DCMD_OK);
2010 }
2011 
2012 typedef struct umem_log_cpu {
2013 	uintptr_t umc_low;
2014 	uintptr_t umc_high;
2015 } umem_log_cpu_t;
2016 
2017 int
2018 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2019 {
2020 	int i;
2021 
2022 	for (i = 0; i < umem_max_ncpus; i++) {
2023 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2024 			break;
2025 	}
2026 
2027 	if (i == umem_max_ncpus)
2028 		mdb_printf("   ");
2029 	else
2030 		mdb_printf("%3d", i);
2031 
2032 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2033 	    b->bc_timestamp, b->bc_thread);
2034 
2035 	return (WALK_NEXT);
2036 }
2037 
2038 /*ARGSUSED*/
2039 int
2040 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2041 {
2042 	umem_log_header_t lh;
2043 	umem_cpu_log_header_t clh;
2044 	uintptr_t lhp, clhp;
2045 	umem_log_cpu_t *umc;
2046 	int i;
2047 
2048 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2049 		mdb_warn("failed to read 'umem_transaction_log'");
2050 		return (DCMD_ERR);
2051 	}
2052 
2053 	if (lhp == NULL) {
2054 		mdb_warn("no umem transaction log\n");
2055 		return (DCMD_ERR);
2056 	}
2057 
2058 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2059 		mdb_warn("failed to read log header at %p", lhp);
2060 		return (DCMD_ERR);
2061 	}
2062 
2063 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2064 
2065 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2066 	    UM_SLEEP | UM_GC);
2067 
2068 	for (i = 0; i < umem_max_ncpus; i++) {
2069 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2070 			mdb_warn("cannot read cpu %d's log header at %p",
2071 			    i, clhp);
2072 			return (DCMD_ERR);
2073 		}
2074 
2075 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2076 		    (uintptr_t)lh.lh_base;
2077 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2078 
2079 		clhp += sizeof (umem_cpu_log_header_t);
2080 	}
2081 
2082 	if (DCMD_HDRSPEC(flags)) {
2083 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2084 		    "BUFADDR", "TIMESTAMP", "THREAD");
2085 	}
2086 
2087 	/*
2088 	 * If we have been passed an address, we'll just print out that
2089 	 * log entry.
2090 	 */
2091 	if (flags & DCMD_ADDRSPEC) {
2092 		umem_bufctl_audit_t *bp;
2093 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2094 
2095 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2096 			mdb_warn("failed to read bufctl at %p", addr);
2097 			return (DCMD_ERR);
2098 		}
2099 
2100 		(void) umem_log_walk(addr, bp, umc);
2101 
2102 		return (DCMD_OK);
2103 	}
2104 
2105 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2106 		mdb_warn("can't find umem log walker");
2107 		return (DCMD_ERR);
2108 	}
2109 
2110 	return (DCMD_OK);
2111 }
2112 
2113 typedef struct bufctl_history_cb {
2114 	int		bhc_flags;
2115 	int		bhc_argc;
2116 	const mdb_arg_t	*bhc_argv;
2117 	int		bhc_ret;
2118 } bufctl_history_cb_t;
2119 
2120 /*ARGSUSED*/
2121 static int
2122 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2123 {
2124 	bufctl_history_cb_t *bhc = arg;
2125 
2126 	bhc->bhc_ret =
2127 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2128 
2129 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2130 
2131 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2132 }
2133 
2134 void
2135 bufctl_help(void)
2136 {
2137 	mdb_printf("%s\n",
2138 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2139 	mdb_dec_indent(2);
2140 	mdb_printf("%<b>OPTIONS%</b>\n");
2141 	mdb_inc_indent(2);
2142 	mdb_printf("%s",
2143 "  -v    Display the full content of the bufctl, including its stack trace\n"
2144 "  -h    retrieve the bufctl's transaction history, if available\n"
2145 "  -a addr\n"
2146 "        filter out bufctls not involving the buffer at addr\n"
2147 "  -c caller\n"
2148 "        filter out bufctls without the function/PC in their stack trace\n"
2149 "  -e earliest\n"
2150 "        filter out bufctls timestamped before earliest\n"
2151 "  -l latest\n"
2152 "        filter out bufctls timestamped after latest\n"
2153 "  -t thread\n"
2154 "        filter out bufctls not involving thread\n");
2155 }
2156 
2157 int
2158 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2159 {
2160 	uint_t verbose = FALSE;
2161 	uint_t history = FALSE;
2162 	uint_t in_history = FALSE;
2163 	uintptr_t caller = NULL, thread = NULL;
2164 	uintptr_t laddr, haddr, baddr = NULL;
2165 	hrtime_t earliest = 0, latest = 0;
2166 	int i, depth;
2167 	char c[MDB_SYM_NAMLEN];
2168 	GElf_Sym sym;
2169 	umem_bufctl_audit_t *bcp;
2170 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2171 
2172 	if (mdb_getopts(argc, argv,
2173 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2174 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2175 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2176 	    'c', MDB_OPT_UINTPTR, &caller,
2177 	    't', MDB_OPT_UINTPTR, &thread,
2178 	    'e', MDB_OPT_UINT64, &earliest,
2179 	    'l', MDB_OPT_UINT64, &latest,
2180 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2181 		return (DCMD_USAGE);
2182 
2183 	if (!(flags & DCMD_ADDRSPEC))
2184 		return (DCMD_USAGE);
2185 
2186 	if (in_history && !history)
2187 		return (DCMD_USAGE);
2188 
2189 	if (history && !in_history) {
2190 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2191 		    UM_SLEEP | UM_GC);
2192 		bufctl_history_cb_t bhc;
2193 
2194 		nargv[0].a_type = MDB_TYPE_STRING;
2195 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2196 
2197 		for (i = 0; i < argc; i++)
2198 			nargv[i + 1] = argv[i];
2199 
2200 		/*
2201 		 * When in history mode, we treat each element as if it
2202 		 * were in a seperate loop, so that the headers group
2203 		 * bufctls with similar histories.
2204 		 */
2205 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2206 		bhc.bhc_argc = argc + 1;
2207 		bhc.bhc_argv = nargv;
2208 		bhc.bhc_ret = DCMD_OK;
2209 
2210 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2211 		    addr) == -1) {
2212 			mdb_warn("unable to walk bufctl_history");
2213 			return (DCMD_ERR);
2214 		}
2215 
2216 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2217 			mdb_printf("\n");
2218 
2219 		return (bhc.bhc_ret);
2220 	}
2221 
2222 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2223 		if (verbose) {
2224 			mdb_printf("%16s %16s %16s %16s\n"
2225 			    "%<u>%16s %16s %16s %16s%</u>\n",
2226 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2227 			    "", "CACHE", "LASTLOG", "CONTENTS");
2228 		} else {
2229 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2230 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2231 		}
2232 	}
2233 
2234 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2235 		mdb_warn("couldn't read bufctl at %p", addr);
2236 		return (DCMD_ERR);
2237 	}
2238 
2239 	/*
2240 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2241 	 * the address does not really refer to a bufctl.
2242 	 */
2243 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2244 
2245 	if (caller != NULL) {
2246 		laddr = caller;
2247 		haddr = caller + sizeof (caller);
2248 
2249 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2250 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2251 			/*
2252 			 * We were provided an exact symbol value; any
2253 			 * address in the function is valid.
2254 			 */
2255 			laddr = (uintptr_t)sym.st_value;
2256 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2257 		}
2258 
2259 		for (i = 0; i < depth; i++)
2260 			if (bcp->bc_stack[i] >= laddr &&
2261 			    bcp->bc_stack[i] < haddr)
2262 				break;
2263 
2264 		if (i == depth)
2265 			return (DCMD_OK);
2266 	}
2267 
2268 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2269 		return (DCMD_OK);
2270 
2271 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2272 		return (DCMD_OK);
2273 
2274 	if (latest != 0 && bcp->bc_timestamp > latest)
2275 		return (DCMD_OK);
2276 
2277 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2278 		return (DCMD_OK);
2279 
2280 	if (flags & DCMD_PIPE_OUT) {
2281 		mdb_printf("%#r\n", addr);
2282 		return (DCMD_OK);
2283 	}
2284 
2285 	if (verbose) {
2286 		mdb_printf(
2287 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2288 		    "%16s %16p %16p %16p\n",
2289 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2290 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2291 
2292 		mdb_inc_indent(17);
2293 		for (i = 0; i < depth; i++)
2294 			mdb_printf("%a\n", bcp->bc_stack[i]);
2295 		mdb_dec_indent(17);
2296 		mdb_printf("\n");
2297 	} else {
2298 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2299 		    bcp->bc_timestamp, bcp->bc_thread);
2300 
2301 		for (i = 0; i < depth; i++) {
2302 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2303 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2304 				continue;
2305 			if (is_umem_sym(c, "umem_"))
2306 				continue;
2307 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2308 			break;
2309 		}
2310 
2311 		if (i >= depth)
2312 			mdb_printf("\n");
2313 	}
2314 
2315 	return (DCMD_OK);
2316 }
2317 
2318 /*ARGSUSED*/
2319 int
2320 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2321 {
2322 	mdb_arg_t a;
2323 
2324 	if (!(flags & DCMD_ADDRSPEC))
2325 		return (DCMD_USAGE);
2326 
2327 	if (argc != 0)
2328 		return (DCMD_USAGE);
2329 
2330 	a.a_type = MDB_TYPE_STRING;
2331 	a.a_un.a_str = "-v";
2332 
2333 	return (bufctl(addr, flags, 1, &a));
2334 }
2335 
2336 typedef struct umem_verify {
2337 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2338 	size_t umv_size;		/* number of bytes in umv_buf */
2339 	int umv_corruption;		/* > 0 if corruption found. */
2340 	int umv_besilent;		/* report actual corruption sites */
2341 	struct umem_cache umv_cache;	/* the cache we're operating on */
2342 } umem_verify_t;
2343 
2344 /*
2345  * verify_pattern()
2346  *	verify that buf is filled with the pattern pat.
2347  */
2348 static int64_t
2349 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2350 {
2351 	/*LINTED*/
2352 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2353 	uint64_t *buf;
2354 
2355 	for (buf = buf_arg; buf < bufend; buf++)
2356 		if (*buf != pat)
2357 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2358 	return (-1);
2359 }
2360 
2361 /*
2362  * verify_buftag()
2363  *	verify that btp->bt_bxstat == (bcp ^ pat)
2364  */
2365 static int
2366 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2367 {
2368 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2369 }
2370 
2371 /*
2372  * verify_free()
2373  *	verify the integrity of a free block of memory by checking
2374  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2375  */
2376 /*ARGSUSED1*/
2377 static int
2378 verify_free(uintptr_t addr, const void *data, void *private)
2379 {
2380 	umem_verify_t *umv = (umem_verify_t *)private;
2381 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2382 	int64_t corrupt;		/* corruption offset */
2383 	umem_buftag_t *buftagp;		/* ptr to buftag */
2384 	umem_cache_t *cp = &umv->umv_cache;
2385 	int besilent = umv->umv_besilent;
2386 
2387 	/*LINTED*/
2388 	buftagp = UMEM_BUFTAG(cp, buf);
2389 
2390 	/*
2391 	 * Read the buffer to check.
2392 	 */
2393 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2394 		if (!besilent)
2395 			mdb_warn("couldn't read %p", addr);
2396 		return (WALK_NEXT);
2397 	}
2398 
2399 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2400 	    UMEM_FREE_PATTERN)) >= 0) {
2401 		if (!besilent)
2402 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2403 			    addr, (uintptr_t)addr + corrupt);
2404 		goto corrupt;
2405 	}
2406 
2407 	if ((cp->cache_flags & UMF_HASH) &&
2408 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2409 		if (!besilent)
2410 			mdb_printf("buffer %p (free) seems to "
2411 			    "have a corrupt redzone pattern\n", addr);
2412 		goto corrupt;
2413 	}
2414 
2415 	/*
2416 	 * confirm bufctl pointer integrity.
2417 	 */
2418 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2419 		if (!besilent)
2420 			mdb_printf("buffer %p (free) has a corrupt "
2421 			    "buftag\n", addr);
2422 		goto corrupt;
2423 	}
2424 
2425 	return (WALK_NEXT);
2426 corrupt:
2427 	umv->umv_corruption++;
2428 	return (WALK_NEXT);
2429 }
2430 
2431 /*
2432  * verify_alloc()
2433  *	Verify that the buftag of an allocated buffer makes sense with respect
2434  *	to the buffer.
2435  */
2436 /*ARGSUSED1*/
2437 static int
2438 verify_alloc(uintptr_t addr, const void *data, void *private)
2439 {
2440 	umem_verify_t *umv = (umem_verify_t *)private;
2441 	umem_cache_t *cp = &umv->umv_cache;
2442 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2443 	/*LINTED*/
2444 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2445 	uint32_t *ip = (uint32_t *)buftagp;
2446 	uint8_t *bp = (uint8_t *)buf;
2447 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2448 	int besilent = umv->umv_besilent;
2449 
2450 	/*
2451 	 * Read the buffer to check.
2452 	 */
2453 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2454 		if (!besilent)
2455 			mdb_warn("couldn't read %p", addr);
2456 		return (WALK_NEXT);
2457 	}
2458 
2459 	/*
2460 	 * There are two cases to handle:
2461 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2462 	 *    0xfeedfacefeedface at the end of it
2463 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2464 	 *    0xbb just past the end of the region in use.  At the buftag,
2465 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2466 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2467 	 *    endianness), followed by 32 bits containing the offset of the
2468 	 *    0xbb byte in the buffer.
2469 	 *
2470 	 * Finally, the two 32-bit words that comprise the second half of the
2471 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2472 	 */
2473 
2474 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2475 		looks_ok = 1;
2476 	else if (!UMEM_SIZE_VALID(ip[1]))
2477 		size_ok = 0;
2478 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2479 		looks_ok = 1;
2480 	else
2481 		size_ok = 0;
2482 
2483 	if (!size_ok) {
2484 		if (!besilent)
2485 			mdb_printf("buffer %p (allocated) has a corrupt "
2486 			    "redzone size encoding\n", addr);
2487 		goto corrupt;
2488 	}
2489 
2490 	if (!looks_ok) {
2491 		if (!besilent)
2492 			mdb_printf("buffer %p (allocated) has a corrupt "
2493 			    "redzone signature\n", addr);
2494 		goto corrupt;
2495 	}
2496 
2497 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2498 		if (!besilent)
2499 			mdb_printf("buffer %p (allocated) has a "
2500 			    "corrupt buftag\n", addr);
2501 		goto corrupt;
2502 	}
2503 
2504 	return (WALK_NEXT);
2505 corrupt:
2506 	umv->umv_corruption++;
2507 	return (WALK_NEXT);
2508 }
2509 
2510 /*ARGSUSED2*/
2511 int
2512 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2513 {
2514 	if (flags & DCMD_ADDRSPEC) {
2515 		int check_alloc = 0, check_free = 0;
2516 		umem_verify_t umv;
2517 
2518 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2519 		    addr) == -1) {
2520 			mdb_warn("couldn't read umem_cache %p", addr);
2521 			return (DCMD_ERR);
2522 		}
2523 
2524 		umv.umv_size = umv.umv_cache.cache_buftag +
2525 		    sizeof (umem_buftag_t);
2526 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2527 		umv.umv_corruption = 0;
2528 
2529 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2530 			check_alloc = 1;
2531 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2532 				check_free = 1;
2533 		} else {
2534 			if (!(flags & DCMD_LOOP)) {
2535 				mdb_warn("cache %p (%s) does not have "
2536 				    "redzone checking enabled\n", addr,
2537 				    umv.umv_cache.cache_name);
2538 			}
2539 			return (DCMD_ERR);
2540 		}
2541 
2542 		if (flags & DCMD_LOOP) {
2543 			/*
2544 			 * table mode, don't print out every corrupt buffer
2545 			 */
2546 			umv.umv_besilent = 1;
2547 		} else {
2548 			mdb_printf("Summary for cache '%s'\n",
2549 			    umv.umv_cache.cache_name);
2550 			mdb_inc_indent(2);
2551 			umv.umv_besilent = 0;
2552 		}
2553 
2554 		if (check_alloc)
2555 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2556 		if (check_free)
2557 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2558 
2559 		if (flags & DCMD_LOOP) {
2560 			if (umv.umv_corruption == 0) {
2561 				mdb_printf("%-*s %?p clean\n",
2562 				    UMEM_CACHE_NAMELEN,
2563 				    umv.umv_cache.cache_name, addr);
2564 			} else {
2565 				char *s = "";	/* optional s in "buffer[s]" */
2566 				if (umv.umv_corruption > 1)
2567 					s = "s";
2568 
2569 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2570 				    UMEM_CACHE_NAMELEN,
2571 				    umv.umv_cache.cache_name, addr,
2572 				    umv.umv_corruption, s);
2573 			}
2574 		} else {
2575 			/*
2576 			 * This is the more verbose mode, when the user has
2577 			 * type addr::umem_verify.  If the cache was clean,
2578 			 * nothing will have yet been printed. So say something.
2579 			 */
2580 			if (umv.umv_corruption == 0)
2581 				mdb_printf("clean\n");
2582 
2583 			mdb_dec_indent(2);
2584 		}
2585 	} else {
2586 		/*
2587 		 * If the user didn't specify a cache to verify, we'll walk all
2588 		 * umem_cache's, specifying ourself as a callback for each...
2589 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2590 		 */
2591 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2592 		    "Cache Name", "Addr", "Cache Integrity");
2593 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2594 	}
2595 
2596 	return (DCMD_OK);
2597 }
2598 
2599 typedef struct vmem_node {
2600 	struct vmem_node *vn_next;
2601 	struct vmem_node *vn_parent;
2602 	struct vmem_node *vn_sibling;
2603 	struct vmem_node *vn_children;
2604 	uintptr_t vn_addr;
2605 	int vn_marked;
2606 	vmem_t vn_vmem;
2607 } vmem_node_t;
2608 
2609 typedef struct vmem_walk {
2610 	vmem_node_t *vw_root;
2611 	vmem_node_t *vw_current;
2612 } vmem_walk_t;
2613 
2614 int
2615 vmem_walk_init(mdb_walk_state_t *wsp)
2616 {
2617 	uintptr_t vaddr, paddr;
2618 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2619 	vmem_walk_t *vw;
2620 
2621 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2622 		mdb_warn("couldn't read 'vmem_list'");
2623 		return (WALK_ERR);
2624 	}
2625 
2626 	while (vaddr != NULL) {
2627 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2628 		vp->vn_addr = vaddr;
2629 		vp->vn_next = head;
2630 		head = vp;
2631 
2632 		if (vaddr == wsp->walk_addr)
2633 			current = vp;
2634 
2635 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2636 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2637 			goto err;
2638 		}
2639 
2640 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2641 	}
2642 
2643 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2644 
2645 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2646 			vp->vn_sibling = root;
2647 			root = vp;
2648 			continue;
2649 		}
2650 
2651 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2652 			if (parent->vn_addr != paddr)
2653 				continue;
2654 			vp->vn_sibling = parent->vn_children;
2655 			parent->vn_children = vp;
2656 			vp->vn_parent = parent;
2657 			break;
2658 		}
2659 
2660 		if (parent == NULL) {
2661 			mdb_warn("couldn't find %p's parent (%p)\n",
2662 			    vp->vn_addr, paddr);
2663 			goto err;
2664 		}
2665 	}
2666 
2667 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2668 	vw->vw_root = root;
2669 
2670 	if (current != NULL)
2671 		vw->vw_current = current;
2672 	else
2673 		vw->vw_current = root;
2674 
2675 	wsp->walk_data = vw;
2676 	return (WALK_NEXT);
2677 err:
2678 	for (vp = head; head != NULL; vp = head) {
2679 		head = vp->vn_next;
2680 		mdb_free(vp, sizeof (vmem_node_t));
2681 	}
2682 
2683 	return (WALK_ERR);
2684 }
2685 
2686 int
2687 vmem_walk_step(mdb_walk_state_t *wsp)
2688 {
2689 	vmem_walk_t *vw = wsp->walk_data;
2690 	vmem_node_t *vp;
2691 	int rval;
2692 
2693 	if ((vp = vw->vw_current) == NULL)
2694 		return (WALK_DONE);
2695 
2696 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2697 
2698 	if (vp->vn_children != NULL) {
2699 		vw->vw_current = vp->vn_children;
2700 		return (rval);
2701 	}
2702 
2703 	do {
2704 		vw->vw_current = vp->vn_sibling;
2705 		vp = vp->vn_parent;
2706 	} while (vw->vw_current == NULL && vp != NULL);
2707 
2708 	return (rval);
2709 }
2710 
2711 /*
2712  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2713  * children are visited before their parent.  We perform the postfix walk
2714  * iteratively (rather than recursively) to allow mdb to regain control
2715  * after each callback.
2716  */
2717 int
2718 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2719 {
2720 	vmem_walk_t *vw = wsp->walk_data;
2721 	vmem_node_t *vp = vw->vw_current;
2722 	int rval;
2723 
2724 	/*
2725 	 * If this node is marked, then we know that we have already visited
2726 	 * all of its children.  If the node has any siblings, they need to
2727 	 * be visited next; otherwise, we need to visit the parent.  Note
2728 	 * that vp->vn_marked will only be zero on the first invocation of
2729 	 * the step function.
2730 	 */
2731 	if (vp->vn_marked) {
2732 		if (vp->vn_sibling != NULL)
2733 			vp = vp->vn_sibling;
2734 		else if (vp->vn_parent != NULL)
2735 			vp = vp->vn_parent;
2736 		else {
2737 			/*
2738 			 * We have neither a parent, nor a sibling, and we
2739 			 * have already been visited; we're done.
2740 			 */
2741 			return (WALK_DONE);
2742 		}
2743 	}
2744 
2745 	/*
2746 	 * Before we visit this node, visit its children.
2747 	 */
2748 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2749 		vp = vp->vn_children;
2750 
2751 	vp->vn_marked = 1;
2752 	vw->vw_current = vp;
2753 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2754 
2755 	return (rval);
2756 }
2757 
2758 void
2759 vmem_walk_fini(mdb_walk_state_t *wsp)
2760 {
2761 	vmem_walk_t *vw = wsp->walk_data;
2762 	vmem_node_t *root = vw->vw_root;
2763 	int done;
2764 
2765 	if (root == NULL)
2766 		return;
2767 
2768 	if ((vw->vw_root = root->vn_children) != NULL)
2769 		vmem_walk_fini(wsp);
2770 
2771 	vw->vw_root = root->vn_sibling;
2772 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2773 	mdb_free(root, sizeof (vmem_node_t));
2774 
2775 	if (done) {
2776 		mdb_free(vw, sizeof (vmem_walk_t));
2777 	} else {
2778 		vmem_walk_fini(wsp);
2779 	}
2780 }
2781 
2782 typedef struct vmem_seg_walk {
2783 	uint8_t vsw_type;
2784 	uintptr_t vsw_start;
2785 	uintptr_t vsw_current;
2786 } vmem_seg_walk_t;
2787 
2788 /*ARGSUSED*/
2789 int
2790 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2791 {
2792 	vmem_seg_walk_t *vsw;
2793 
2794 	if (wsp->walk_addr == NULL) {
2795 		mdb_warn("vmem_%s does not support global walks\n", name);
2796 		return (WALK_ERR);
2797 	}
2798 
2799 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2800 
2801 	vsw->vsw_type = type;
2802 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2803 	vsw->vsw_current = vsw->vsw_start;
2804 
2805 	return (WALK_NEXT);
2806 }
2807 
2808 /*
2809  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2810  */
2811 #define	VMEM_NONE	0
2812 
2813 int
2814 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2815 {
2816 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2817 }
2818 
2819 int
2820 vmem_free_walk_init(mdb_walk_state_t *wsp)
2821 {
2822 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2823 }
2824 
2825 int
2826 vmem_span_walk_init(mdb_walk_state_t *wsp)
2827 {
2828 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2829 }
2830 
2831 int
2832 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2833 {
2834 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2835 }
2836 
2837 int
2838 vmem_seg_walk_step(mdb_walk_state_t *wsp)
2839 {
2840 	vmem_seg_t seg;
2841 	vmem_seg_walk_t *vsw = wsp->walk_data;
2842 	uintptr_t addr = vsw->vsw_current;
2843 	static size_t seg_size = 0;
2844 	int rval;
2845 
2846 	if (!seg_size) {
2847 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
2848 			mdb_warn("failed to read 'vmem_seg_size'");
2849 			seg_size = sizeof (vmem_seg_t);
2850 		}
2851 	}
2852 
2853 	if (seg_size < sizeof (seg))
2854 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
2855 
2856 	if (mdb_vread(&seg, seg_size, addr) == -1) {
2857 		mdb_warn("couldn't read vmem_seg at %p", addr);
2858 		return (WALK_ERR);
2859 	}
2860 
2861 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
2862 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
2863 		rval = WALK_NEXT;
2864 	} else {
2865 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
2866 	}
2867 
2868 	if (vsw->vsw_current == vsw->vsw_start)
2869 		return (WALK_DONE);
2870 
2871 	return (rval);
2872 }
2873 
2874 void
2875 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
2876 {
2877 	vmem_seg_walk_t *vsw = wsp->walk_data;
2878 
2879 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
2880 }
2881 
2882 #define	VMEM_NAMEWIDTH	22
2883 
2884 int
2885 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2886 {
2887 	vmem_t v, parent;
2888 	uintptr_t paddr;
2889 	int ident = 0;
2890 	char c[VMEM_NAMEWIDTH];
2891 
2892 	if (!(flags & DCMD_ADDRSPEC)) {
2893 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
2894 			mdb_warn("can't walk vmem");
2895 			return (DCMD_ERR);
2896 		}
2897 		return (DCMD_OK);
2898 	}
2899 
2900 	if (DCMD_HDRSPEC(flags))
2901 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
2902 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
2903 		    "TOTAL", "SUCCEED", "FAIL");
2904 
2905 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
2906 		mdb_warn("couldn't read vmem at %p", addr);
2907 		return (DCMD_ERR);
2908 	}
2909 
2910 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
2911 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
2912 			mdb_warn("couldn't trace %p's ancestry", addr);
2913 			ident = 0;
2914 			break;
2915 		}
2916 		paddr = (uintptr_t)parent.vm_source;
2917 	}
2918 
2919 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
2920 
2921 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
2922 	    addr, VMEM_NAMEWIDTH, c,
2923 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
2924 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
2925 
2926 	return (DCMD_OK);
2927 }
2928 
2929 void
2930 vmem_seg_help(void)
2931 {
2932 	mdb_printf("%s\n",
2933 "Display the contents of vmem_seg_ts, with optional filtering.\n"
2934 "\n"
2935 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
2936 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
2937 "information.\n");
2938 	mdb_dec_indent(2);
2939 	mdb_printf("%<b>OPTIONS%</b>\n");
2940 	mdb_inc_indent(2);
2941 	mdb_printf("%s",
2942 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
2943 "  -s    report the size of the segment, instead of the end address\n"
2944 "  -c caller\n"
2945 "        filter out segments without the function/PC in their stack trace\n"
2946 "  -e earliest\n"
2947 "        filter out segments timestamped before earliest\n"
2948 "  -l latest\n"
2949 "        filter out segments timestamped after latest\n"
2950 "  -m minsize\n"
2951 "        filer out segments smaller than minsize\n"
2952 "  -M maxsize\n"
2953 "        filer out segments larger than maxsize\n"
2954 "  -t thread\n"
2955 "        filter out segments not involving thread\n"
2956 "  -T type\n"
2957 "        filter out segments not of type 'type'\n"
2958 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
2959 }
2960 
2961 
2962 /*ARGSUSED*/
2963 int
2964 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2965 {
2966 	vmem_seg_t vs;
2967 	uintptr_t *stk = vs.vs_stack;
2968 	uintptr_t sz;
2969 	uint8_t t;
2970 	const char *type = NULL;
2971 	GElf_Sym sym;
2972 	char c[MDB_SYM_NAMLEN];
2973 	int no_debug;
2974 	int i;
2975 	int depth;
2976 	uintptr_t laddr, haddr;
2977 
2978 	uintptr_t caller = NULL, thread = NULL;
2979 	uintptr_t minsize = 0, maxsize = 0;
2980 
2981 	hrtime_t earliest = 0, latest = 0;
2982 
2983 	uint_t size = 0;
2984 	uint_t verbose = 0;
2985 
2986 	if (!(flags & DCMD_ADDRSPEC))
2987 		return (DCMD_USAGE);
2988 
2989 	if (mdb_getopts(argc, argv,
2990 	    'c', MDB_OPT_UINTPTR, &caller,
2991 	    'e', MDB_OPT_UINT64, &earliest,
2992 	    'l', MDB_OPT_UINT64, &latest,
2993 	    's', MDB_OPT_SETBITS, TRUE, &size,
2994 	    'm', MDB_OPT_UINTPTR, &minsize,
2995 	    'M', MDB_OPT_UINTPTR, &maxsize,
2996 	    't', MDB_OPT_UINTPTR, &thread,
2997 	    'T', MDB_OPT_STR, &type,
2998 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2999 	    NULL) != argc)
3000 		return (DCMD_USAGE);
3001 
3002 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3003 		if (verbose) {
3004 			mdb_printf("%16s %4s %16s %16s %16s\n"
3005 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3006 			    "ADDR", "TYPE", "START", "END", "SIZE",
3007 			    "", "", "THREAD", "TIMESTAMP", "");
3008 		} else {
3009 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3010 			    "START", size? "SIZE" : "END", "WHO");
3011 		}
3012 	}
3013 
3014 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3015 		mdb_warn("couldn't read vmem_seg at %p", addr);
3016 		return (DCMD_ERR);
3017 	}
3018 
3019 	if (type != NULL) {
3020 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3021 			t = VMEM_ALLOC;
3022 		else if (strcmp(type, "FREE") == 0)
3023 			t = VMEM_FREE;
3024 		else if (strcmp(type, "SPAN") == 0)
3025 			t = VMEM_SPAN;
3026 		else if (strcmp(type, "ROTR") == 0 ||
3027 		    strcmp(type, "ROTOR") == 0)
3028 			t = VMEM_ROTOR;
3029 		else if (strcmp(type, "WLKR") == 0 ||
3030 		    strcmp(type, "WALKER") == 0)
3031 			t = VMEM_WALKER;
3032 		else {
3033 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3034 			    type);
3035 			return (DCMD_ERR);
3036 		}
3037 
3038 		if (vs.vs_type != t)
3039 			return (DCMD_OK);
3040 	}
3041 
3042 	sz = vs.vs_end - vs.vs_start;
3043 
3044 	if (minsize != 0 && sz < minsize)
3045 		return (DCMD_OK);
3046 
3047 	if (maxsize != 0 && sz > maxsize)
3048 		return (DCMD_OK);
3049 
3050 	t = vs.vs_type;
3051 	depth = vs.vs_depth;
3052 
3053 	/*
3054 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3055 	 */
3056 	no_debug = (t != VMEM_ALLOC) ||
3057 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3058 
3059 	if (no_debug) {
3060 		if (caller != NULL || thread != NULL || earliest != 0 ||
3061 		    latest != 0)
3062 			return (DCMD_OK);		/* not enough info */
3063 	} else {
3064 		if (caller != NULL) {
3065 			laddr = caller;
3066 			haddr = caller + sizeof (caller);
3067 
3068 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3069 			    sizeof (c), &sym) != -1 &&
3070 			    caller == (uintptr_t)sym.st_value) {
3071 				/*
3072 				 * We were provided an exact symbol value; any
3073 				 * address in the function is valid.
3074 				 */
3075 				laddr = (uintptr_t)sym.st_value;
3076 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3077 			}
3078 
3079 			for (i = 0; i < depth; i++)
3080 				if (vs.vs_stack[i] >= laddr &&
3081 				    vs.vs_stack[i] < haddr)
3082 					break;
3083 
3084 			if (i == depth)
3085 				return (DCMD_OK);
3086 		}
3087 
3088 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3089 			return (DCMD_OK);
3090 
3091 		if (earliest != 0 && vs.vs_timestamp < earliest)
3092 			return (DCMD_OK);
3093 
3094 		if (latest != 0 && vs.vs_timestamp > latest)
3095 			return (DCMD_OK);
3096 	}
3097 
3098 	type = (t == VMEM_ALLOC ? "ALLC" :
3099 	    t == VMEM_FREE ? "FREE" :
3100 	    t == VMEM_SPAN ? "SPAN" :
3101 	    t == VMEM_ROTOR ? "ROTR" :
3102 	    t == VMEM_WALKER ? "WLKR" :
3103 	    "????");
3104 
3105 	if (flags & DCMD_PIPE_OUT) {
3106 		mdb_printf("%#r\n", addr);
3107 		return (DCMD_OK);
3108 	}
3109 
3110 	if (verbose) {
3111 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3112 		    addr, type, vs.vs_start, vs.vs_end, sz);
3113 
3114 		if (no_debug)
3115 			return (DCMD_OK);
3116 
3117 		mdb_printf("%16s %4s %16d %16llx\n",
3118 		    "", "", vs.vs_thread, vs.vs_timestamp);
3119 
3120 		mdb_inc_indent(17);
3121 		for (i = 0; i < depth; i++) {
3122 			mdb_printf("%a\n", stk[i]);
3123 		}
3124 		mdb_dec_indent(17);
3125 		mdb_printf("\n");
3126 	} else {
3127 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3128 		    vs.vs_start, size? sz : vs.vs_end);
3129 
3130 		if (no_debug) {
3131 			mdb_printf("\n");
3132 			return (DCMD_OK);
3133 		}
3134 
3135 		for (i = 0; i < depth; i++) {
3136 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3137 			    c, sizeof (c), &sym) == -1)
3138 				continue;
3139 			if (is_umem_sym(c, "vmem_"))
3140 				continue;
3141 			break;
3142 		}
3143 		mdb_printf(" %a\n", stk[i]);
3144 	}
3145 	return (DCMD_OK);
3146 }
3147 
3148 /*ARGSUSED*/
3149 static int
3150 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3151 {
3152 	char name[UMEM_CACHE_NAMELEN + 1];
3153 	hrtime_t delta;
3154 	int i, depth;
3155 
3156 	if (bcp->bc_timestamp == 0)
3157 		return (WALK_DONE);
3158 
3159 	if (*newest == 0)
3160 		*newest = bcp->bc_timestamp;
3161 
3162 	delta = *newest - bcp->bc_timestamp;
3163 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3164 
3165 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3166 	    &bcp->bc_cache->cache_name) <= 0)
3167 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3168 
3169 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3170 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3171 
3172 	for (i = 0; i < depth; i++)
3173 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3174 
3175 	return (WALK_NEXT);
3176 }
3177 
3178 int
3179 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3180 {
3181 	const char *logname = "umem_transaction_log";
3182 	hrtime_t newest = 0;
3183 
3184 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3185 		return (DCMD_USAGE);
3186 
3187 	if (argc > 0) {
3188 		if (argv->a_type != MDB_TYPE_STRING)
3189 			return (DCMD_USAGE);
3190 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3191 			logname = "umem_failure_log";
3192 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3193 			logname = "umem_slab_log";
3194 		else
3195 			return (DCMD_USAGE);
3196 	}
3197 
3198 	if (umem_readvar(&addr, logname) == -1) {
3199 		mdb_warn("failed to read %s log header pointer");
3200 		return (DCMD_ERR);
3201 	}
3202 
3203 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3204 		mdb_warn("failed to walk umem log");
3205 		return (DCMD_ERR);
3206 	}
3207 
3208 	return (DCMD_OK);
3209 }
3210 
3211 /*
3212  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3213  * The first piece is a structure which we use to accumulate umem_cache_t
3214  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3215  * walker; we either add all caches, or ones named explicitly as arguments.
3216  */
3217 
3218 typedef struct umclist {
3219 	const char *umc_name;			/* Name to match (or NULL) */
3220 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3221 	int umc_nelems;				/* Num entries in umc_caches */
3222 	int umc_size;				/* Size of umc_caches array */
3223 } umclist_t;
3224 
3225 static int
3226 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3227 {
3228 	void *p;
3229 	int s;
3230 
3231 	if (umc->umc_name == NULL ||
3232 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3233 		/*
3234 		 * If we have a match, grow our array (if necessary), and then
3235 		 * add the virtual address of the matching cache to our list.
3236 		 */
3237 		if (umc->umc_nelems >= umc->umc_size) {
3238 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3239 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3240 
3241 			bcopy(umc->umc_caches, p,
3242 			    sizeof (uintptr_t) * umc->umc_size);
3243 
3244 			umc->umc_caches = p;
3245 			umc->umc_size = s;
3246 		}
3247 
3248 		umc->umc_caches[umc->umc_nelems++] = addr;
3249 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3250 	}
3251 
3252 	return (WALK_NEXT);
3253 }
3254 
3255 /*
3256  * The second piece of ::umausers is a hash table of allocations.  Each
3257  * allocation owner is identified by its stack trace and data_size.  We then
3258  * track the total bytes of all such allocations, and the number of allocations
3259  * to report at the end.  Once we have a list of caches, we walk through the
3260  * allocated bufctls of each, and update our hash table accordingly.
3261  */
3262 
3263 typedef struct umowner {
3264 	struct umowner *umo_head;		/* First hash elt in bucket */
3265 	struct umowner *umo_next;		/* Next hash elt in chain */
3266 	size_t umo_signature;			/* Hash table signature */
3267 	uint_t umo_num;				/* Number of allocations */
3268 	size_t umo_data_size;			/* Size of each allocation */
3269 	size_t umo_total_size;			/* Total bytes of allocation */
3270 	int umo_depth;				/* Depth of stack trace */
3271 	uintptr_t *umo_stack;			/* Stack trace */
3272 } umowner_t;
3273 
3274 typedef struct umusers {
3275 	const umem_cache_t *umu_cache;		/* Current umem cache */
3276 	umowner_t *umu_hash;			/* Hash table of owners */
3277 	uintptr_t *umu_stacks;			/* stacks for owners */
3278 	int umu_nelems;				/* Number of entries in use */
3279 	int umu_size;				/* Total number of entries */
3280 } umusers_t;
3281 
3282 static void
3283 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3284     size_t size, size_t data_size)
3285 {
3286 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3287 	size_t bucket, signature = data_size;
3288 	umowner_t *umo, *umoend;
3289 
3290 	/*
3291 	 * If the hash table is full, double its size and rehash everything.
3292 	 */
3293 	if (umu->umu_nelems >= umu->umu_size) {
3294 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3295 		size_t umowner_size = sizeof (umowner_t);
3296 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3297 		uintptr_t *new_stacks;
3298 
3299 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3300 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3301 
3302 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3303 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3304 		umu->umu_hash = umo;
3305 		umu->umu_stacks = new_stacks;
3306 		umu->umu_size = s;
3307 
3308 		umoend = umu->umu_hash + umu->umu_size;
3309 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3310 			umo->umo_head = NULL;
3311 			umo->umo_stack = &umu->umu_stacks[
3312 			    umem_stack_depth * (umo - umu->umu_hash)];
3313 		}
3314 
3315 		umoend = umu->umu_hash + umu->umu_nelems;
3316 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3317 			bucket = umo->umo_signature & (umu->umu_size - 1);
3318 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3319 			umu->umu_hash[bucket].umo_head = umo;
3320 		}
3321 	}
3322 
3323 	/*
3324 	 * Finish computing the hash signature from the stack trace, and then
3325 	 * see if the owner is in the hash table.  If so, update our stats.
3326 	 */
3327 	for (i = 0; i < depth; i++)
3328 		signature += bcp->bc_stack[i];
3329 
3330 	bucket = signature & (umu->umu_size - 1);
3331 
3332 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3333 		if (umo->umo_signature == signature) {
3334 			size_t difference = 0;
3335 
3336 			difference |= umo->umo_data_size - data_size;
3337 			difference |= umo->umo_depth - depth;
3338 
3339 			for (i = 0; i < depth; i++) {
3340 				difference |= umo->umo_stack[i] -
3341 				    bcp->bc_stack[i];
3342 			}
3343 
3344 			if (difference == 0) {
3345 				umo->umo_total_size += size;
3346 				umo->umo_num++;
3347 				return;
3348 			}
3349 		}
3350 	}
3351 
3352 	/*
3353 	 * If the owner is not yet hashed, grab the next element and fill it
3354 	 * in based on the allocation information.
3355 	 */
3356 	umo = &umu->umu_hash[umu->umu_nelems++];
3357 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3358 	umu->umu_hash[bucket].umo_head = umo;
3359 
3360 	umo->umo_signature = signature;
3361 	umo->umo_num = 1;
3362 	umo->umo_data_size = data_size;
3363 	umo->umo_total_size = size;
3364 	umo->umo_depth = depth;
3365 
3366 	for (i = 0; i < depth; i++)
3367 		umo->umo_stack[i] = bcp->bc_stack[i];
3368 }
3369 
3370 /*
3371  * When ::umausers is invoked without the -f flag, we simply update our hash
3372  * table with the information from each allocated bufctl.
3373  */
3374 /*ARGSUSED*/
3375 static int
3376 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3377 {
3378 	const umem_cache_t *cp = umu->umu_cache;
3379 
3380 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3381 	return (WALK_NEXT);
3382 }
3383 
3384 /*
3385  * When ::umausers is invoked with the -f flag, we print out the information
3386  * for each bufctl as well as updating the hash table.
3387  */
3388 static int
3389 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3390 {
3391 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3392 	const umem_cache_t *cp = umu->umu_cache;
3393 
3394 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3395 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3396 
3397 	for (i = 0; i < depth; i++)
3398 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3399 
3400 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3401 	return (WALK_NEXT);
3402 }
3403 
3404 /*
3405  * We sort our results by allocation size before printing them.
3406  */
3407 static int
3408 umownercmp(const void *lp, const void *rp)
3409 {
3410 	const umowner_t *lhs = lp;
3411 	const umowner_t *rhs = rp;
3412 
3413 	return (rhs->umo_total_size - lhs->umo_total_size);
3414 }
3415 
3416 /*
3417  * The main engine of ::umausers is relatively straightforward: First we
3418  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3419  * iterate over the allocated bufctls of each cache in the list.  Finally,
3420  * we sort and print our results.
3421  */
3422 /*ARGSUSED*/
3423 int
3424 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3425 {
3426 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3427 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3428 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3429 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3430 	int opt_e = FALSE;		/* Include "small" users */
3431 	int opt_f = FALSE;		/* Print stack traces */
3432 
3433 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3434 	umowner_t *umo, *umoend;
3435 	int i, oelems;
3436 
3437 	umclist_t umc;
3438 	umusers_t umu;
3439 
3440 	if (flags & DCMD_ADDRSPEC)
3441 		return (DCMD_USAGE);
3442 
3443 	bzero(&umc, sizeof (umc));
3444 	bzero(&umu, sizeof (umu));
3445 
3446 	while ((i = mdb_getopts(argc, argv,
3447 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3448 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3449 
3450 		argv += i;	/* skip past options we just processed */
3451 		argc -= i;	/* adjust argc */
3452 
3453 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3454 			return (DCMD_USAGE);
3455 
3456 		oelems = umc.umc_nelems;
3457 		umc.umc_name = argv->a_un.a_str;
3458 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3459 
3460 		if (umc.umc_nelems == oelems) {
3461 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3462 			return (DCMD_ERR);
3463 		}
3464 
3465 		do_all_caches = 0;
3466 		argv++;
3467 		argc--;
3468 	}
3469 
3470 	if (opt_e)
3471 		mem_threshold = cnt_threshold = 0;
3472 
3473 	if (opt_f)
3474 		callback = (mdb_walk_cb_t)umause2;
3475 
3476 	if (do_all_caches) {
3477 		umc.umc_name = NULL; /* match all cache names */
3478 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3479 	}
3480 
3481 	for (i = 0; i < umc.umc_nelems; i++) {
3482 		uintptr_t cp = umc.umc_caches[i];
3483 		umem_cache_t c;
3484 
3485 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3486 			mdb_warn("failed to read cache at %p", cp);
3487 			continue;
3488 		}
3489 
3490 		if (!(c.cache_flags & UMF_AUDIT)) {
3491 			if (!do_all_caches) {
3492 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3493 				    c.cache_name);
3494 			}
3495 			continue;
3496 		}
3497 
3498 		umu.umu_cache = &c;
3499 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3500 		audited_caches++;
3501 	}
3502 
3503 	if (audited_caches == 0 && do_all_caches) {
3504 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3505 		return (DCMD_ERR);
3506 	}
3507 
3508 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3509 	umoend = umu.umu_hash + umu.umu_nelems;
3510 
3511 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3512 		if (umo->umo_total_size < mem_threshold &&
3513 		    umo->umo_num < cnt_threshold)
3514 			continue;
3515 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3516 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3517 		for (i = 0; i < umo->umo_depth; i++)
3518 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3519 	}
3520 
3521 	return (DCMD_OK);
3522 }
3523