xref: /titanic_41/usr/src/cmd/mdb/common/modules/libumem/umem.c (revision 4b0d01e9d944e10498c80bc88d80a2f5cdd9be22)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2012 Joyent, Inc.  All rights reserved.
28  * Copyright (c) 2013 by Delphix. All rights reserved.
29  */
30 
31 #include "umem.h"
32 
33 #include <sys/vmem_impl_user.h>
34 #include <umem_impl.h>
35 
36 #include <alloca.h>
37 #include <limits.h>
38 #include <mdb/mdb_whatis.h>
39 #include <thr_uberdata.h>
40 
41 #include "misc.h"
42 #include "leaky.h"
43 #include "dist.h"
44 
45 #include "umem_pagesize.h"
46 
47 #define	UM_ALLOCATED		0x1
48 #define	UM_FREE			0x2
49 #define	UM_BUFCTL		0x4
50 #define	UM_HASH			0x8
51 
52 int umem_ready;
53 
54 static int umem_stack_depth_warned;
55 static uint32_t umem_max_ncpus;
56 uint32_t umem_stack_depth;
57 
58 size_t umem_pagesize;
59 
60 #define	UMEM_READVAR(var)				\
61 	(umem_readvar(&(var), #var) == -1 &&		\
62 	    (mdb_warn("failed to read "#var), 1))
63 
64 int
umem_update_variables(void)65 umem_update_variables(void)
66 {
67 	size_t pagesize;
68 
69 	/*
70 	 * Figure out which type of umem is being used; if it's not there
71 	 * yet, succeed quietly.
72 	 */
73 	if (umem_set_standalone() == -1) {
74 		umem_ready = 0;
75 		return (0);		/* umem not there yet */
76 	}
77 
78 	/*
79 	 * Solaris 9 used a different name for umem_max_ncpus.  It's
80 	 * cheap backwards compatibility to check for both names.
81 	 */
82 	if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
83 	    umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
84 		mdb_warn("unable to read umem_max_ncpus or max_ncpus");
85 		return (-1);
86 	}
87 	if (UMEM_READVAR(umem_ready))
88 		return (-1);
89 	if (UMEM_READVAR(umem_stack_depth))
90 		return (-1);
91 	if (UMEM_READVAR(pagesize))
92 		return (-1);
93 
94 	if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
95 		if (umem_stack_depth_warned == 0) {
96 			mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
97 			    umem_stack_depth, UMEM_MAX_STACK_DEPTH);
98 			umem_stack_depth_warned = 1;
99 		}
100 		umem_stack_depth = 0;
101 	}
102 
103 	umem_pagesize = pagesize;
104 
105 	return (0);
106 }
107 
108 static int
umem_ptc_walk_init(mdb_walk_state_t * wsp)109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
110 {
111 	if (wsp->walk_addr == NULL) {
112 		if (mdb_layered_walk("ulwp", wsp) == -1) {
113 			mdb_warn("couldn't walk 'ulwp'");
114 			return (WALK_ERR);
115 		}
116 	}
117 
118 	return (WALK_NEXT);
119 }
120 
121 static int
umem_ptc_walk_step(mdb_walk_state_t * wsp)122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
123 {
124 	uintptr_t this;
125 	int rval;
126 
127 	if (wsp->walk_layer != NULL) {
128 		this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
129 		    (uintptr_t)wsp->walk_arg;
130 	} else {
131 		this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
132 	}
133 
134 	for (;;) {
135 		if (mdb_vread(&this, sizeof (void *), this) == -1) {
136 			mdb_warn("couldn't read ptc buffer at %p", this);
137 			return (WALK_ERR);
138 		}
139 
140 		if (this == NULL)
141 			break;
142 
143 		rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
144 
145 		if (rval != WALK_NEXT)
146 			return (rval);
147 	}
148 
149 	return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
150 }
151 
152 /*ARGSUSED*/
153 static int
umem_init_walkers(uintptr_t addr,const umem_cache_t * c,int * sizes)154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
155 {
156 	mdb_walker_t w;
157 	char descr[64];
158 	char name[64];
159 	int i;
160 
161 	(void) mdb_snprintf(descr, sizeof (descr),
162 	    "walk the %s cache", c->cache_name);
163 
164 	w.walk_name = c->cache_name;
165 	w.walk_descr = descr;
166 	w.walk_init = umem_walk_init;
167 	w.walk_step = umem_walk_step;
168 	w.walk_fini = umem_walk_fini;
169 	w.walk_init_arg = (void *)addr;
170 
171 	if (mdb_add_walker(&w) == -1)
172 		mdb_warn("failed to add %s walker", c->cache_name);
173 
174 	if (!(c->cache_flags & UMF_PTC))
175 		return (WALK_NEXT);
176 
177 	/*
178 	 * For the per-thread cache walker, the address is the offset in the
179 	 * tm_roots[] array of the ulwp_t.
180 	 */
181 	for (i = 0; sizes[i] != 0; i++) {
182 		if (sizes[i] == c->cache_bufsize)
183 			break;
184 	}
185 
186 	if (sizes[i] == 0) {
187 		mdb_warn("cache %s is cached per-thread, but could not find "
188 		    "size in umem_alloc_sizes\n", c->cache_name);
189 		return (WALK_NEXT);
190 	}
191 
192 	if (i >= NTMEMBASE) {
193 		mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
194 		    c->cache_name, i, NTMEMBASE);
195 		return (WALK_NEXT);
196 	}
197 
198 	(void) mdb_snprintf(name, sizeof (name),
199 	    "umem_ptc_%d", c->cache_bufsize);
200 	(void) mdb_snprintf(descr, sizeof (descr),
201 	    "walk the per-thread cache for %s", c->cache_name);
202 
203 	w.walk_name = name;
204 	w.walk_descr = descr;
205 	w.walk_init = umem_ptc_walk_init;
206 	w.walk_step = umem_ptc_walk_step;
207 	w.walk_fini = NULL;
208 	w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
209 
210 	if (mdb_add_walker(&w) == -1)
211 		mdb_warn("failed to add %s walker", w.walk_name);
212 
213 	return (WALK_NEXT);
214 }
215 
216 /*ARGSUSED*/
217 static void
umem_statechange_cb(void * arg)218 umem_statechange_cb(void *arg)
219 {
220 	static int been_ready = 0;
221 	GElf_Sym sym;
222 	int *sizes;
223 
224 #ifndef _KMDB
225 	leaky_cleanup(1);	/* state changes invalidate leaky state */
226 #endif
227 
228 	if (umem_update_variables() == -1)
229 		return;
230 
231 	if (been_ready)
232 		return;
233 
234 	if (umem_ready != UMEM_READY)
235 		return;
236 
237 	been_ready = 1;
238 
239 	/*
240 	 * In order to determine the tm_roots offset of any cache that is
241 	 * cached per-thread, we need to have the umem_alloc_sizes array.
242 	 * Read this, assuring that it is zero-terminated.
243 	 */
244 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
245 		mdb_warn("unable to lookup 'umem_alloc_sizes'");
246 		return;
247 	}
248 
249 	sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
250 
251 	if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
252 		mdb_warn("couldn't read 'umem_alloc_sizes'");
253 		return;
254 	}
255 
256 	(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
257 }
258 
259 int
umem_abort_messages(void)260 umem_abort_messages(void)
261 {
262 	char *umem_error_buffer;
263 	uint_t umem_error_begin;
264 	GElf_Sym sym;
265 	size_t bufsize;
266 
267 	if (UMEM_READVAR(umem_error_begin))
268 		return (DCMD_ERR);
269 
270 	if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
271 		mdb_warn("unable to look up umem_error_buffer");
272 		return (DCMD_ERR);
273 	}
274 
275 	bufsize = (size_t)sym.st_size;
276 
277 	umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
278 
279 	if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
280 	    != bufsize) {
281 		mdb_warn("unable to read umem_error_buffer");
282 		return (DCMD_ERR);
283 	}
284 	/* put a zero after the end of the buffer to simplify printing */
285 	umem_error_buffer[bufsize] = 0;
286 
287 	if ((umem_error_begin % bufsize) == 0)
288 		mdb_printf("%s\n", umem_error_buffer);
289 	else {
290 		umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
291 		mdb_printf("%s%s\n",
292 		    &umem_error_buffer[umem_error_begin % bufsize],
293 		    umem_error_buffer);
294 	}
295 
296 	return (DCMD_OK);
297 }
298 
299 static void
umem_log_status(const char * name,umem_log_header_t * val)300 umem_log_status(const char *name, umem_log_header_t *val)
301 {
302 	umem_log_header_t my_lh;
303 	uintptr_t pos = (uintptr_t)val;
304 	size_t size;
305 
306 	if (pos == NULL)
307 		return;
308 
309 	if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
310 		mdb_warn("\nunable to read umem_%s_log pointer %p",
311 		    name, pos);
312 		return;
313 	}
314 
315 	size = my_lh.lh_chunksize * my_lh.lh_nchunks;
316 
317 	if (size % (1024 * 1024) == 0)
318 		mdb_printf("%s=%dm ", name, size / (1024 * 1024));
319 	else if (size % 1024 == 0)
320 		mdb_printf("%s=%dk ", name, size / 1024);
321 	else
322 		mdb_printf("%s=%d ", name, size);
323 }
324 
325 typedef struct umem_debug_flags {
326 	const char	*udf_name;
327 	uint_t		udf_flags;
328 	uint_t		udf_clear;	/* if 0, uses udf_flags */
329 } umem_debug_flags_t;
330 
331 umem_debug_flags_t umem_status_flags[] = {
332 	{ "random",	UMF_RANDOMIZE,	UMF_RANDOM },
333 	{ "default",	UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
334 	{ "audit",	UMF_AUDIT },
335 	{ "guards",	UMF_DEADBEEF | UMF_REDZONE },
336 	{ "nosignal",	UMF_CHECKSIGNAL },
337 	{ "firewall",	UMF_FIREWALL },
338 	{ "lite",	UMF_LITE },
339 	{ NULL }
340 };
341 
342 /*ARGSUSED*/
343 int
umem_status(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)344 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
345 {
346 	int umem_logging;
347 
348 	umem_log_header_t *umem_transaction_log;
349 	umem_log_header_t *umem_content_log;
350 	umem_log_header_t *umem_failure_log;
351 	umem_log_header_t *umem_slab_log;
352 
353 	mdb_printf("Status:\t\t%s\n",
354 	    umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
355 	    umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
356 	    umem_ready == UMEM_READY_INITING ? "initialization in process" :
357 	    umem_ready == UMEM_READY ? "ready and active" :
358 	    umem_ready == 0 ? "not loaded into address space" :
359 	    "unknown (umem_ready invalid)");
360 
361 	if (umem_ready == 0)
362 		return (DCMD_OK);
363 
364 	mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
365 
366 	if (UMEM_READVAR(umem_logging))
367 		goto err;
368 	if (UMEM_READVAR(umem_transaction_log))
369 		goto err;
370 	if (UMEM_READVAR(umem_content_log))
371 		goto err;
372 	if (UMEM_READVAR(umem_failure_log))
373 		goto err;
374 	if (UMEM_READVAR(umem_slab_log))
375 		goto err;
376 
377 	mdb_printf("Logs:\t\t");
378 	umem_log_status("transaction", umem_transaction_log);
379 	umem_log_status("content", umem_content_log);
380 	umem_log_status("fail", umem_failure_log);
381 	umem_log_status("slab", umem_slab_log);
382 	if (!umem_logging)
383 		mdb_printf("(inactive)");
384 	mdb_printf("\n");
385 
386 	mdb_printf("Message buffer:\n");
387 	return (umem_abort_messages());
388 
389 err:
390 	mdb_printf("Message buffer:\n");
391 	(void) umem_abort_messages();
392 	return (DCMD_ERR);
393 }
394 
395 typedef struct {
396 	uintptr_t ucw_first;
397 	uintptr_t ucw_current;
398 } umem_cache_walk_t;
399 
400 int
umem_cache_walk_init(mdb_walk_state_t * wsp)401 umem_cache_walk_init(mdb_walk_state_t *wsp)
402 {
403 	umem_cache_walk_t *ucw;
404 	umem_cache_t c;
405 	uintptr_t cp;
406 	GElf_Sym sym;
407 
408 	if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
409 		mdb_warn("couldn't find umem_null_cache");
410 		return (WALK_ERR);
411 	}
412 
413 	cp = (uintptr_t)sym.st_value;
414 
415 	if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
416 		mdb_warn("couldn't read cache at %p", cp);
417 		return (WALK_ERR);
418 	}
419 
420 	ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
421 
422 	ucw->ucw_first = cp;
423 	ucw->ucw_current = (uintptr_t)c.cache_next;
424 	wsp->walk_data = ucw;
425 
426 	return (WALK_NEXT);
427 }
428 
429 int
umem_cache_walk_step(mdb_walk_state_t * wsp)430 umem_cache_walk_step(mdb_walk_state_t *wsp)
431 {
432 	umem_cache_walk_t *ucw = wsp->walk_data;
433 	umem_cache_t c;
434 	int status;
435 
436 	if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
437 		mdb_warn("couldn't read cache at %p", ucw->ucw_current);
438 		return (WALK_DONE);
439 	}
440 
441 	status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
442 
443 	if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
444 		return (WALK_DONE);
445 
446 	return (status);
447 }
448 
449 void
umem_cache_walk_fini(mdb_walk_state_t * wsp)450 umem_cache_walk_fini(mdb_walk_state_t *wsp)
451 {
452 	umem_cache_walk_t *ucw = wsp->walk_data;
453 	mdb_free(ucw, sizeof (umem_cache_walk_t));
454 }
455 
456 typedef struct {
457 	umem_cpu_t *ucw_cpus;
458 	uint32_t ucw_current;
459 	uint32_t ucw_max;
460 } umem_cpu_walk_state_t;
461 
462 int
umem_cpu_walk_init(mdb_walk_state_t * wsp)463 umem_cpu_walk_init(mdb_walk_state_t *wsp)
464 {
465 	umem_cpu_t *umem_cpus;
466 
467 	umem_cpu_walk_state_t *ucw;
468 
469 	if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
470 		mdb_warn("failed to read 'umem_cpus'");
471 		return (WALK_ERR);
472 	}
473 
474 	ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
475 
476 	ucw->ucw_cpus = umem_cpus;
477 	ucw->ucw_current = 0;
478 	ucw->ucw_max = umem_max_ncpus;
479 
480 	wsp->walk_data = ucw;
481 	return (WALK_NEXT);
482 }
483 
484 int
umem_cpu_walk_step(mdb_walk_state_t * wsp)485 umem_cpu_walk_step(mdb_walk_state_t *wsp)
486 {
487 	umem_cpu_t cpu;
488 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
489 
490 	uintptr_t caddr;
491 
492 	if (ucw->ucw_current >= ucw->ucw_max)
493 		return (WALK_DONE);
494 
495 	caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
496 
497 	if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
498 		mdb_warn("failed to read cpu %d", ucw->ucw_current);
499 		return (WALK_ERR);
500 	}
501 
502 	ucw->ucw_current++;
503 
504 	return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
505 }
506 
507 void
umem_cpu_walk_fini(mdb_walk_state_t * wsp)508 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
509 {
510 	umem_cpu_walk_state_t *ucw = wsp->walk_data;
511 
512 	mdb_free(ucw, sizeof (*ucw));
513 }
514 
515 int
umem_cpu_cache_walk_init(mdb_walk_state_t * wsp)516 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
517 {
518 	if (wsp->walk_addr == NULL) {
519 		mdb_warn("umem_cpu_cache doesn't support global walks");
520 		return (WALK_ERR);
521 	}
522 
523 	if (mdb_layered_walk("umem_cpu", wsp) == -1) {
524 		mdb_warn("couldn't walk 'umem_cpu'");
525 		return (WALK_ERR);
526 	}
527 
528 	wsp->walk_data = (void *)wsp->walk_addr;
529 
530 	return (WALK_NEXT);
531 }
532 
533 int
umem_cpu_cache_walk_step(mdb_walk_state_t * wsp)534 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
535 {
536 	uintptr_t caddr = (uintptr_t)wsp->walk_data;
537 	const umem_cpu_t *cpu = wsp->walk_layer;
538 	umem_cpu_cache_t cc;
539 
540 	caddr += cpu->cpu_cache_offset;
541 
542 	if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
543 		mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
544 		return (WALK_ERR);
545 	}
546 
547 	return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
548 }
549 
550 int
umem_slab_walk_init(mdb_walk_state_t * wsp)551 umem_slab_walk_init(mdb_walk_state_t *wsp)
552 {
553 	uintptr_t caddr = wsp->walk_addr;
554 	umem_cache_t c;
555 
556 	if (caddr == NULL) {
557 		mdb_warn("umem_slab doesn't support global walks\n");
558 		return (WALK_ERR);
559 	}
560 
561 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
562 		mdb_warn("couldn't read umem_cache at %p", caddr);
563 		return (WALK_ERR);
564 	}
565 
566 	wsp->walk_data =
567 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
568 	wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
569 
570 	return (WALK_NEXT);
571 }
572 
573 int
umem_slab_walk_partial_init(mdb_walk_state_t * wsp)574 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
575 {
576 	uintptr_t caddr = wsp->walk_addr;
577 	umem_cache_t c;
578 
579 	if (caddr == NULL) {
580 		mdb_warn("umem_slab_partial doesn't support global walks\n");
581 		return (WALK_ERR);
582 	}
583 
584 	if (mdb_vread(&c, sizeof (c), caddr) == -1) {
585 		mdb_warn("couldn't read umem_cache at %p", caddr);
586 		return (WALK_ERR);
587 	}
588 
589 	wsp->walk_data =
590 	    (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
591 	wsp->walk_addr = (uintptr_t)c.cache_freelist;
592 
593 	/*
594 	 * Some consumers (umem_walk_step(), in particular) require at
595 	 * least one callback if there are any buffers in the cache.  So
596 	 * if there are *no* partial slabs, report the last full slab, if
597 	 * any.
598 	 *
599 	 * Yes, this is ugly, but it's cleaner than the other possibilities.
600 	 */
601 	if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
602 		wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
603 
604 	return (WALK_NEXT);
605 }
606 
607 int
umem_slab_walk_step(mdb_walk_state_t * wsp)608 umem_slab_walk_step(mdb_walk_state_t *wsp)
609 {
610 	umem_slab_t s;
611 	uintptr_t addr = wsp->walk_addr;
612 	uintptr_t saddr = (uintptr_t)wsp->walk_data;
613 	uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
614 
615 	if (addr == saddr)
616 		return (WALK_DONE);
617 
618 	if (mdb_vread(&s, sizeof (s), addr) == -1) {
619 		mdb_warn("failed to read slab at %p", wsp->walk_addr);
620 		return (WALK_ERR);
621 	}
622 
623 	if ((uintptr_t)s.slab_cache != caddr) {
624 		mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
625 		    addr, caddr, s.slab_cache);
626 		return (WALK_ERR);
627 	}
628 
629 	wsp->walk_addr = (uintptr_t)s.slab_next;
630 
631 	return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
632 }
633 
634 int
umem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)635 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
636 {
637 	umem_cache_t c;
638 
639 	if (!(flags & DCMD_ADDRSPEC)) {
640 		if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
641 			mdb_warn("can't walk umem_cache");
642 			return (DCMD_ERR);
643 		}
644 		return (DCMD_OK);
645 	}
646 
647 	if (DCMD_HDRSPEC(flags))
648 		mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
649 		    "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
650 
651 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
652 		mdb_warn("couldn't read umem_cache at %p", addr);
653 		return (DCMD_ERR);
654 	}
655 
656 	mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
657 	    c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
658 
659 	return (DCMD_OK);
660 }
661 
662 static int
addrcmp(const void * lhs,const void * rhs)663 addrcmp(const void *lhs, const void *rhs)
664 {
665 	uintptr_t p1 = *((uintptr_t *)lhs);
666 	uintptr_t p2 = *((uintptr_t *)rhs);
667 
668 	if (p1 < p2)
669 		return (-1);
670 	if (p1 > p2)
671 		return (1);
672 	return (0);
673 }
674 
675 static int
bufctlcmp(const umem_bufctl_audit_t ** lhs,const umem_bufctl_audit_t ** rhs)676 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
677 {
678 	const umem_bufctl_audit_t *bcp1 = *lhs;
679 	const umem_bufctl_audit_t *bcp2 = *rhs;
680 
681 	if (bcp1->bc_timestamp > bcp2->bc_timestamp)
682 		return (-1);
683 
684 	if (bcp1->bc_timestamp < bcp2->bc_timestamp)
685 		return (1);
686 
687 	return (0);
688 }
689 
690 typedef struct umem_hash_walk {
691 	uintptr_t *umhw_table;
692 	size_t umhw_nelems;
693 	size_t umhw_pos;
694 	umem_bufctl_t umhw_cur;
695 } umem_hash_walk_t;
696 
697 int
umem_hash_walk_init(mdb_walk_state_t * wsp)698 umem_hash_walk_init(mdb_walk_state_t *wsp)
699 {
700 	umem_hash_walk_t *umhw;
701 	uintptr_t *hash;
702 	umem_cache_t c;
703 	uintptr_t haddr, addr = wsp->walk_addr;
704 	size_t nelems;
705 	size_t hsize;
706 
707 	if (addr == NULL) {
708 		mdb_warn("umem_hash doesn't support global walks\n");
709 		return (WALK_ERR);
710 	}
711 
712 	if (mdb_vread(&c, sizeof (c), addr) == -1) {
713 		mdb_warn("couldn't read cache at addr %p", addr);
714 		return (WALK_ERR);
715 	}
716 
717 	if (!(c.cache_flags & UMF_HASH)) {
718 		mdb_warn("cache %p doesn't have a hash table\n", addr);
719 		return (WALK_DONE);		/* nothing to do */
720 	}
721 
722 	umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
723 	umhw->umhw_cur.bc_next = NULL;
724 	umhw->umhw_pos = 0;
725 
726 	umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
727 	hsize = nelems * sizeof (uintptr_t);
728 	haddr = (uintptr_t)c.cache_hash_table;
729 
730 	umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
731 	if (mdb_vread(hash, hsize, haddr) == -1) {
732 		mdb_warn("failed to read hash table at %p", haddr);
733 		mdb_free(hash, hsize);
734 		mdb_free(umhw, sizeof (umem_hash_walk_t));
735 		return (WALK_ERR);
736 	}
737 
738 	wsp->walk_data = umhw;
739 
740 	return (WALK_NEXT);
741 }
742 
743 int
umem_hash_walk_step(mdb_walk_state_t * wsp)744 umem_hash_walk_step(mdb_walk_state_t *wsp)
745 {
746 	umem_hash_walk_t *umhw = wsp->walk_data;
747 	uintptr_t addr = NULL;
748 
749 	if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
750 		while (umhw->umhw_pos < umhw->umhw_nelems) {
751 			if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
752 				break;
753 		}
754 	}
755 	if (addr == NULL)
756 		return (WALK_DONE);
757 
758 	if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
759 		mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
760 		return (WALK_ERR);
761 	}
762 
763 	return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
764 }
765 
766 void
umem_hash_walk_fini(mdb_walk_state_t * wsp)767 umem_hash_walk_fini(mdb_walk_state_t *wsp)
768 {
769 	umem_hash_walk_t *umhw = wsp->walk_data;
770 
771 	if (umhw == NULL)
772 		return;
773 
774 	mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
775 	mdb_free(umhw, sizeof (umem_hash_walk_t));
776 }
777 
778 /*
779  * Find the address of the bufctl structure for the address 'buf' in cache
780  * 'cp', which is at address caddr, and place it in *out.
781  */
782 static int
umem_hash_lookup(umem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)783 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
784 {
785 	uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
786 	umem_bufctl_t *bcp;
787 	umem_bufctl_t bc;
788 
789 	if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
790 		mdb_warn("unable to read hash bucket for %p in cache %p",
791 		    buf, caddr);
792 		return (-1);
793 	}
794 
795 	while (bcp != NULL) {
796 		if (mdb_vread(&bc, sizeof (umem_bufctl_t),
797 		    (uintptr_t)bcp) == -1) {
798 			mdb_warn("unable to read bufctl at %p", bcp);
799 			return (-1);
800 		}
801 		if (bc.bc_addr == buf) {
802 			*out = (uintptr_t)bcp;
803 			return (0);
804 		}
805 		bcp = bc.bc_next;
806 	}
807 
808 	mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
809 	return (-1);
810 }
811 
812 int
umem_get_magsize(const umem_cache_t * cp)813 umem_get_magsize(const umem_cache_t *cp)
814 {
815 	uintptr_t addr = (uintptr_t)cp->cache_magtype;
816 	GElf_Sym mt_sym;
817 	umem_magtype_t mt;
818 	int res;
819 
820 	/*
821 	 * if cpu 0 has a non-zero magsize, it must be correct.  caches
822 	 * with UMF_NOMAGAZINE have disabled their magazine layers, so
823 	 * it is okay to return 0 for them.
824 	 */
825 	if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
826 	    (cp->cache_flags & UMF_NOMAGAZINE))
827 		return (res);
828 
829 	if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
830 		mdb_warn("unable to read 'umem_magtype'");
831 	} else if (addr < mt_sym.st_value ||
832 	    addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
833 	    ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
834 		mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
835 		    cp->cache_name, addr);
836 		return (0);
837 	}
838 	if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
839 		mdb_warn("unable to read magtype at %a", addr);
840 		return (0);
841 	}
842 	return (mt.mt_magsize);
843 }
844 
845 /*ARGSUSED*/
846 static int
umem_estimate_slab(uintptr_t addr,const umem_slab_t * sp,size_t * est)847 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
848 {
849 	*est -= (sp->slab_chunks - sp->slab_refcnt);
850 
851 	return (WALK_NEXT);
852 }
853 
854 /*
855  * Returns an upper bound on the number of allocated buffers in a given
856  * cache.
857  */
858 size_t
umem_estimate_allocated(uintptr_t addr,const umem_cache_t * cp)859 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
860 {
861 	int magsize;
862 	size_t cache_est;
863 
864 	cache_est = cp->cache_buftotal;
865 
866 	(void) mdb_pwalk("umem_slab_partial",
867 	    (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
868 
869 	if ((magsize = umem_get_magsize(cp)) != 0) {
870 		size_t mag_est = cp->cache_full.ml_total * magsize;
871 
872 		if (cache_est >= mag_est) {
873 			cache_est -= mag_est;
874 		} else {
875 			mdb_warn("cache %p's magazine layer holds more buffers "
876 			    "than the slab layer.\n", addr);
877 		}
878 	}
879 	return (cache_est);
880 }
881 
882 #define	READMAG_ROUNDS(rounds) { \
883 	if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
884 		mdb_warn("couldn't read magazine at %p", ump); \
885 		goto fail; \
886 	} \
887 	for (i = 0; i < rounds; i++) { \
888 		maglist[magcnt++] = mp->mag_round[i]; \
889 		if (magcnt == magmax) { \
890 			mdb_warn("%d magazines exceeds fudge factor\n", \
891 			    magcnt); \
892 			goto fail; \
893 		} \
894 	} \
895 }
896 
897 static int
umem_read_magazines(umem_cache_t * cp,uintptr_t addr,void *** maglistp,size_t * magcntp,size_t * magmaxp)898 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
899     void ***maglistp, size_t *magcntp, size_t *magmaxp)
900 {
901 	umem_magazine_t *ump, *mp;
902 	void **maglist = NULL;
903 	int i, cpu;
904 	size_t magsize, magmax, magbsize;
905 	size_t magcnt = 0;
906 
907 	/*
908 	 * Read the magtype out of the cache, after verifying the pointer's
909 	 * correctness.
910 	 */
911 	magsize = umem_get_magsize(cp);
912 	if (magsize == 0) {
913 		*maglistp = NULL;
914 		*magcntp = 0;
915 		*magmaxp = 0;
916 		return (0);
917 	}
918 
919 	/*
920 	 * There are several places where we need to go buffer hunting:
921 	 * the per-CPU loaded magazine, the per-CPU spare full magazine,
922 	 * and the full magazine list in the depot.
923 	 *
924 	 * For an upper bound on the number of buffers in the magazine
925 	 * layer, we have the number of magazines on the cache_full
926 	 * list plus at most two magazines per CPU (the loaded and the
927 	 * spare).  Toss in 100 magazines as a fudge factor in case this
928 	 * is live (the number "100" comes from the same fudge factor in
929 	 * crash(1M)).
930 	 */
931 	magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
932 	magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
933 
934 	if (magbsize >= PAGESIZE / 2) {
935 		mdb_warn("magazine size for cache %p unreasonable (%x)\n",
936 		    addr, magbsize);
937 		return (-1);
938 	}
939 
940 	maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
941 	mp = mdb_alloc(magbsize, UM_SLEEP);
942 	if (mp == NULL || maglist == NULL)
943 		goto fail;
944 
945 	/*
946 	 * First up: the magazines in the depot (i.e. on the cache_full list).
947 	 */
948 	for (ump = cp->cache_full.ml_list; ump != NULL; ) {
949 		READMAG_ROUNDS(magsize);
950 		ump = mp->mag_next;
951 
952 		if (ump == cp->cache_full.ml_list)
953 			break; /* cache_full list loop detected */
954 	}
955 
956 	dprintf(("cache_full list done\n"));
957 
958 	/*
959 	 * Now whip through the CPUs, snagging the loaded magazines
960 	 * and full spares.
961 	 */
962 	for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
963 		umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
964 
965 		dprintf(("reading cpu cache %p\n",
966 		    (uintptr_t)ccp - (uintptr_t)cp + addr));
967 
968 		if (ccp->cc_rounds > 0 &&
969 		    (ump = ccp->cc_loaded) != NULL) {
970 			dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
971 			READMAG_ROUNDS(ccp->cc_rounds);
972 		}
973 
974 		if (ccp->cc_prounds > 0 &&
975 		    (ump = ccp->cc_ploaded) != NULL) {
976 			dprintf(("reading %d previously loaded rounds\n",
977 			    ccp->cc_prounds));
978 			READMAG_ROUNDS(ccp->cc_prounds);
979 		}
980 	}
981 
982 	dprintf(("magazine layer: %d buffers\n", magcnt));
983 
984 	mdb_free(mp, magbsize);
985 
986 	*maglistp = maglist;
987 	*magcntp = magcnt;
988 	*magmaxp = magmax;
989 
990 	return (0);
991 
992 fail:
993 	if (mp)
994 		mdb_free(mp, magbsize);
995 	if (maglist)
996 		mdb_free(maglist, magmax * sizeof (void *));
997 
998 	return (-1);
999 }
1000 
1001 typedef struct umem_read_ptc_walk {
1002 	void **urpw_buf;
1003 	size_t urpw_cnt;
1004 	size_t urpw_max;
1005 } umem_read_ptc_walk_t;
1006 
1007 /*ARGSUSED*/
1008 static int
umem_read_ptc_walk_buf(uintptr_t addr,const void * ignored,umem_read_ptc_walk_t * urpw)1009 umem_read_ptc_walk_buf(uintptr_t addr,
1010     const void *ignored, umem_read_ptc_walk_t *urpw)
1011 {
1012 	if (urpw->urpw_cnt == urpw->urpw_max) {
1013 		size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1014 		void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1015 
1016 		if (nmax > 1) {
1017 			size_t osize = urpw->urpw_max * sizeof (void *);
1018 			bcopy(urpw->urpw_buf, new, osize);
1019 			mdb_free(urpw->urpw_buf, osize);
1020 		}
1021 
1022 		urpw->urpw_buf = new;
1023 		urpw->urpw_max = nmax;
1024 	}
1025 
1026 	urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1027 
1028 	return (WALK_NEXT);
1029 }
1030 
1031 static int
umem_read_ptc(umem_cache_t * cp,void *** buflistp,size_t * bufcntp,size_t * bufmaxp)1032 umem_read_ptc(umem_cache_t *cp,
1033     void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1034 {
1035 	umem_read_ptc_walk_t urpw;
1036 	char walk[60];
1037 	int rval;
1038 
1039 	if (!(cp->cache_flags & UMF_PTC))
1040 		return (0);
1041 
1042 	(void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1043 	    cp->cache_bufsize);
1044 
1045 	urpw.urpw_buf = *buflistp;
1046 	urpw.urpw_cnt = *bufcntp;
1047 	urpw.urpw_max = *bufmaxp;
1048 
1049 	if ((rval = mdb_walk(walk,
1050 	    (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1051 		mdb_warn("couldn't walk %s", walk);
1052 	}
1053 
1054 	*buflistp = urpw.urpw_buf;
1055 	*bufcntp = urpw.urpw_cnt;
1056 	*bufmaxp = urpw.urpw_max;
1057 
1058 	return (rval);
1059 }
1060 
1061 static int
umem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1062 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1063 {
1064 	return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1065 }
1066 
1067 static int
bufctl_walk_callback(umem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1068 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1069 {
1070 	umem_bufctl_audit_t *b;
1071 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1072 
1073 	/*
1074 	 * if UMF_AUDIT is not set, we know that we're looking at a
1075 	 * umem_bufctl_t.
1076 	 */
1077 	if (!(cp->cache_flags & UMF_AUDIT) ||
1078 	    mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1079 		(void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1080 		if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1081 			mdb_warn("unable to read bufctl at %p", buf);
1082 			return (WALK_ERR);
1083 		}
1084 	}
1085 
1086 	return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1087 }
1088 
1089 typedef struct umem_walk {
1090 	int umw_type;
1091 
1092 	uintptr_t umw_addr;		/* cache address */
1093 	umem_cache_t *umw_cp;
1094 	size_t umw_csize;
1095 
1096 	/*
1097 	 * magazine layer
1098 	 */
1099 	void **umw_maglist;
1100 	size_t umw_max;
1101 	size_t umw_count;
1102 	size_t umw_pos;
1103 
1104 	/*
1105 	 * slab layer
1106 	 */
1107 	char *umw_valid;	/* to keep track of freed buffers */
1108 	char *umw_ubase;	/* buffer for slab data */
1109 } umem_walk_t;
1110 
1111 static int
umem_walk_init_common(mdb_walk_state_t * wsp,int type)1112 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1113 {
1114 	umem_walk_t *umw;
1115 	int csize;
1116 	umem_cache_t *cp;
1117 	size_t vm_quantum;
1118 
1119 	size_t magmax, magcnt;
1120 	void **maglist = NULL;
1121 	uint_t chunksize, slabsize;
1122 	int status = WALK_ERR;
1123 	uintptr_t addr = wsp->walk_addr;
1124 	const char *layered;
1125 
1126 	type &= ~UM_HASH;
1127 
1128 	if (addr == NULL) {
1129 		mdb_warn("umem walk doesn't support global walks\n");
1130 		return (WALK_ERR);
1131 	}
1132 
1133 	dprintf(("walking %p\n", addr));
1134 
1135 	/*
1136 	 * The number of "cpus" determines how large the cache is.
1137 	 */
1138 	csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1139 	cp = mdb_alloc(csize, UM_SLEEP);
1140 
1141 	if (mdb_vread(cp, csize, addr) == -1) {
1142 		mdb_warn("couldn't read cache at addr %p", addr);
1143 		goto out2;
1144 	}
1145 
1146 	/*
1147 	 * It's easy for someone to hand us an invalid cache address.
1148 	 * Unfortunately, it is hard for this walker to survive an
1149 	 * invalid cache cleanly.  So we make sure that:
1150 	 *
1151 	 *	1. the vmem arena for the cache is readable,
1152 	 *	2. the vmem arena's quantum is a power of 2,
1153 	 *	3. our slabsize is a multiple of the quantum, and
1154 	 *	4. our chunksize is >0 and less than our slabsize.
1155 	 */
1156 	if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1157 	    (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1158 	    vm_quantum == 0 ||
1159 	    (vm_quantum & (vm_quantum - 1)) != 0 ||
1160 	    cp->cache_slabsize < vm_quantum ||
1161 	    P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1162 	    cp->cache_chunksize == 0 ||
1163 	    cp->cache_chunksize > cp->cache_slabsize) {
1164 		mdb_warn("%p is not a valid umem_cache_t\n", addr);
1165 		goto out2;
1166 	}
1167 
1168 	dprintf(("buf total is %d\n", cp->cache_buftotal));
1169 
1170 	if (cp->cache_buftotal == 0) {
1171 		mdb_free(cp, csize);
1172 		return (WALK_DONE);
1173 	}
1174 
1175 	/*
1176 	 * If they ask for bufctls, but it's a small-slab cache,
1177 	 * there is nothing to report.
1178 	 */
1179 	if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1180 		dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1181 		    cp->cache_flags));
1182 		mdb_free(cp, csize);
1183 		return (WALK_DONE);
1184 	}
1185 
1186 	/*
1187 	 * Read in the contents of the magazine layer
1188 	 */
1189 	if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1190 		goto out2;
1191 
1192 	/*
1193 	 * Read in the contents of the per-thread caches, if any
1194 	 */
1195 	if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1196 		goto out2;
1197 
1198 	/*
1199 	 * We have all of the buffers from the magazines and from the
1200 	 * per-thread cache (if any);  if we are walking allocated buffers,
1201 	 * sort them so we can bsearch them later.
1202 	 */
1203 	if (type & UM_ALLOCATED)
1204 		qsort(maglist, magcnt, sizeof (void *), addrcmp);
1205 
1206 	wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1207 
1208 	umw->umw_type = type;
1209 	umw->umw_addr = addr;
1210 	umw->umw_cp = cp;
1211 	umw->umw_csize = csize;
1212 	umw->umw_maglist = maglist;
1213 	umw->umw_max = magmax;
1214 	umw->umw_count = magcnt;
1215 	umw->umw_pos = 0;
1216 
1217 	/*
1218 	 * When walking allocated buffers in a UMF_HASH cache, we walk the
1219 	 * hash table instead of the slab layer.
1220 	 */
1221 	if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1222 		layered = "umem_hash";
1223 
1224 		umw->umw_type |= UM_HASH;
1225 	} else {
1226 		/*
1227 		 * If we are walking freed buffers, we only need the
1228 		 * magazine layer plus the partially allocated slabs.
1229 		 * To walk allocated buffers, we need all of the slabs.
1230 		 */
1231 		if (type & UM_ALLOCATED)
1232 			layered = "umem_slab";
1233 		else
1234 			layered = "umem_slab_partial";
1235 
1236 		/*
1237 		 * for small-slab caches, we read in the entire slab.  For
1238 		 * freed buffers, we can just walk the freelist.  For
1239 		 * allocated buffers, we use a 'valid' array to track
1240 		 * the freed buffers.
1241 		 */
1242 		if (!(cp->cache_flags & UMF_HASH)) {
1243 			chunksize = cp->cache_chunksize;
1244 			slabsize = cp->cache_slabsize;
1245 
1246 			umw->umw_ubase = mdb_alloc(slabsize +
1247 			    sizeof (umem_bufctl_t), UM_SLEEP);
1248 
1249 			if (type & UM_ALLOCATED)
1250 				umw->umw_valid =
1251 				    mdb_alloc(slabsize / chunksize, UM_SLEEP);
1252 		}
1253 	}
1254 
1255 	status = WALK_NEXT;
1256 
1257 	if (mdb_layered_walk(layered, wsp) == -1) {
1258 		mdb_warn("unable to start layered '%s' walk", layered);
1259 		status = WALK_ERR;
1260 	}
1261 
1262 out1:
1263 	if (status == WALK_ERR) {
1264 		if (umw->umw_valid)
1265 			mdb_free(umw->umw_valid, slabsize / chunksize);
1266 
1267 		if (umw->umw_ubase)
1268 			mdb_free(umw->umw_ubase, slabsize +
1269 			    sizeof (umem_bufctl_t));
1270 
1271 		if (umw->umw_maglist)
1272 			mdb_free(umw->umw_maglist, umw->umw_max *
1273 			    sizeof (uintptr_t));
1274 
1275 		mdb_free(umw, sizeof (umem_walk_t));
1276 		wsp->walk_data = NULL;
1277 	}
1278 
1279 out2:
1280 	if (status == WALK_ERR)
1281 		mdb_free(cp, csize);
1282 
1283 	return (status);
1284 }
1285 
1286 int
umem_walk_step(mdb_walk_state_t * wsp)1287 umem_walk_step(mdb_walk_state_t *wsp)
1288 {
1289 	umem_walk_t *umw = wsp->walk_data;
1290 	int type = umw->umw_type;
1291 	umem_cache_t *cp = umw->umw_cp;
1292 
1293 	void **maglist = umw->umw_maglist;
1294 	int magcnt = umw->umw_count;
1295 
1296 	uintptr_t chunksize, slabsize;
1297 	uintptr_t addr;
1298 	const umem_slab_t *sp;
1299 	const umem_bufctl_t *bcp;
1300 	umem_bufctl_t bc;
1301 
1302 	int chunks;
1303 	char *kbase;
1304 	void *buf;
1305 	int i, ret;
1306 
1307 	char *valid, *ubase;
1308 
1309 	/*
1310 	 * first, handle the 'umem_hash' layered walk case
1311 	 */
1312 	if (type & UM_HASH) {
1313 		/*
1314 		 * We have a buffer which has been allocated out of the
1315 		 * global layer. We need to make sure that it's not
1316 		 * actually sitting in a magazine before we report it as
1317 		 * an allocated buffer.
1318 		 */
1319 		buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1320 
1321 		if (magcnt > 0 &&
1322 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1323 		    addrcmp) != NULL)
1324 			return (WALK_NEXT);
1325 
1326 		if (type & UM_BUFCTL)
1327 			return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1328 
1329 		return (umem_walk_callback(wsp, (uintptr_t)buf));
1330 	}
1331 
1332 	ret = WALK_NEXT;
1333 
1334 	addr = umw->umw_addr;
1335 
1336 	/*
1337 	 * If we're walking freed buffers, report everything in the
1338 	 * magazine layer before processing the first slab.
1339 	 */
1340 	if ((type & UM_FREE) && magcnt != 0) {
1341 		umw->umw_count = 0;		/* only do this once */
1342 		for (i = 0; i < magcnt; i++) {
1343 			buf = maglist[i];
1344 
1345 			if (type & UM_BUFCTL) {
1346 				uintptr_t out;
1347 
1348 				if (cp->cache_flags & UMF_BUFTAG) {
1349 					umem_buftag_t *btp;
1350 					umem_buftag_t tag;
1351 
1352 					/* LINTED - alignment */
1353 					btp = UMEM_BUFTAG(cp, buf);
1354 					if (mdb_vread(&tag, sizeof (tag),
1355 					    (uintptr_t)btp) == -1) {
1356 						mdb_warn("reading buftag for "
1357 						    "%p at %p", buf, btp);
1358 						continue;
1359 					}
1360 					out = (uintptr_t)tag.bt_bufctl;
1361 				} else {
1362 					if (umem_hash_lookup(cp, addr, buf,
1363 					    &out) == -1)
1364 						continue;
1365 				}
1366 				ret = bufctl_walk_callback(cp, wsp, out);
1367 			} else {
1368 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1369 			}
1370 
1371 			if (ret != WALK_NEXT)
1372 				return (ret);
1373 		}
1374 	}
1375 
1376 	/*
1377 	 * Handle the buffers in the current slab
1378 	 */
1379 	chunksize = cp->cache_chunksize;
1380 	slabsize = cp->cache_slabsize;
1381 
1382 	sp = wsp->walk_layer;
1383 	chunks = sp->slab_chunks;
1384 	kbase = sp->slab_base;
1385 
1386 	dprintf(("kbase is %p\n", kbase));
1387 
1388 	if (!(cp->cache_flags & UMF_HASH)) {
1389 		valid = umw->umw_valid;
1390 		ubase = umw->umw_ubase;
1391 
1392 		if (mdb_vread(ubase, chunks * chunksize,
1393 		    (uintptr_t)kbase) == -1) {
1394 			mdb_warn("failed to read slab contents at %p", kbase);
1395 			return (WALK_ERR);
1396 		}
1397 
1398 		/*
1399 		 * Set up the valid map as fully allocated -- we'll punch
1400 		 * out the freelist.
1401 		 */
1402 		if (type & UM_ALLOCATED)
1403 			(void) memset(valid, 1, chunks);
1404 	} else {
1405 		valid = NULL;
1406 		ubase = NULL;
1407 	}
1408 
1409 	/*
1410 	 * walk the slab's freelist
1411 	 */
1412 	bcp = sp->slab_head;
1413 
1414 	dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1415 
1416 	/*
1417 	 * since we could be in the middle of allocating a buffer,
1418 	 * our refcnt could be one higher than it aught.  So we
1419 	 * check one further on the freelist than the count allows.
1420 	 */
1421 	for (i = sp->slab_refcnt; i <= chunks; i++) {
1422 		uint_t ndx;
1423 
1424 		dprintf(("bcp is %p\n", bcp));
1425 
1426 		if (bcp == NULL) {
1427 			if (i == chunks)
1428 				break;
1429 			mdb_warn(
1430 			    "slab %p in cache %p freelist too short by %d\n",
1431 			    sp, addr, chunks - i);
1432 			break;
1433 		}
1434 
1435 		if (cp->cache_flags & UMF_HASH) {
1436 			if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1437 				mdb_warn("failed to read bufctl ptr at %p",
1438 				    bcp);
1439 				break;
1440 			}
1441 			buf = bc.bc_addr;
1442 		} else {
1443 			/*
1444 			 * Otherwise the buffer is (or should be) in the slab
1445 			 * that we've read in; determine its offset in the
1446 			 * slab, validate that it's not corrupt, and add to
1447 			 * our base address to find the umem_bufctl_t.  (Note
1448 			 * that we don't need to add the size of the bufctl
1449 			 * to our offset calculation because of the slop that's
1450 			 * allocated for the buffer at ubase.)
1451 			 */
1452 			uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1453 
1454 			if (offs > chunks * chunksize) {
1455 				mdb_warn("found corrupt bufctl ptr %p"
1456 				    " in slab %p in cache %p\n", bcp,
1457 				    wsp->walk_addr, addr);
1458 				break;
1459 			}
1460 
1461 			bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1462 			buf = UMEM_BUF(cp, bcp);
1463 		}
1464 
1465 		ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1466 
1467 		if (ndx > slabsize / cp->cache_bufsize) {
1468 			/*
1469 			 * This is very wrong; we have managed to find
1470 			 * a buffer in the slab which shouldn't
1471 			 * actually be here.  Emit a warning, and
1472 			 * try to continue.
1473 			 */
1474 			mdb_warn("buf %p is out of range for "
1475 			    "slab %p, cache %p\n", buf, sp, addr);
1476 		} else if (type & UM_ALLOCATED) {
1477 			/*
1478 			 * we have found a buffer on the slab's freelist;
1479 			 * clear its entry
1480 			 */
1481 			valid[ndx] = 0;
1482 		} else {
1483 			/*
1484 			 * Report this freed buffer
1485 			 */
1486 			if (type & UM_BUFCTL) {
1487 				ret = bufctl_walk_callback(cp, wsp,
1488 				    (uintptr_t)bcp);
1489 			} else {
1490 				ret = umem_walk_callback(wsp, (uintptr_t)buf);
1491 			}
1492 			if (ret != WALK_NEXT)
1493 				return (ret);
1494 		}
1495 
1496 		bcp = bc.bc_next;
1497 	}
1498 
1499 	if (bcp != NULL) {
1500 		dprintf(("slab %p in cache %p freelist too long (%p)\n",
1501 		    sp, addr, bcp));
1502 	}
1503 
1504 	/*
1505 	 * If we are walking freed buffers, the loop above handled reporting
1506 	 * them.
1507 	 */
1508 	if (type & UM_FREE)
1509 		return (WALK_NEXT);
1510 
1511 	if (type & UM_BUFCTL) {
1512 		mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1513 		    "cache %p\n", addr);
1514 		return (WALK_ERR);
1515 	}
1516 
1517 	/*
1518 	 * Report allocated buffers, skipping buffers in the magazine layer.
1519 	 * We only get this far for small-slab caches.
1520 	 */
1521 	for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1522 		buf = (char *)kbase + i * chunksize;
1523 
1524 		if (!valid[i])
1525 			continue;		/* on slab freelist */
1526 
1527 		if (magcnt > 0 &&
1528 		    bsearch(&buf, maglist, magcnt, sizeof (void *),
1529 		    addrcmp) != NULL)
1530 			continue;		/* in magazine layer */
1531 
1532 		ret = umem_walk_callback(wsp, (uintptr_t)buf);
1533 	}
1534 	return (ret);
1535 }
1536 
1537 void
umem_walk_fini(mdb_walk_state_t * wsp)1538 umem_walk_fini(mdb_walk_state_t *wsp)
1539 {
1540 	umem_walk_t *umw = wsp->walk_data;
1541 	uintptr_t chunksize;
1542 	uintptr_t slabsize;
1543 
1544 	if (umw == NULL)
1545 		return;
1546 
1547 	if (umw->umw_maglist != NULL)
1548 		mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1549 
1550 	chunksize = umw->umw_cp->cache_chunksize;
1551 	slabsize = umw->umw_cp->cache_slabsize;
1552 
1553 	if (umw->umw_valid != NULL)
1554 		mdb_free(umw->umw_valid, slabsize / chunksize);
1555 	if (umw->umw_ubase != NULL)
1556 		mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1557 
1558 	mdb_free(umw->umw_cp, umw->umw_csize);
1559 	mdb_free(umw, sizeof (umem_walk_t));
1560 }
1561 
1562 /*ARGSUSED*/
1563 static int
umem_walk_all(uintptr_t addr,const umem_cache_t * c,mdb_walk_state_t * wsp)1564 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1565 {
1566 	/*
1567 	 * Buffers allocated from NOTOUCH caches can also show up as freed
1568 	 * memory in other caches.  This can be a little confusing, so we
1569 	 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1570 	 * that "::walk umem" and "::walk freemem" yield disjoint output).
1571 	 */
1572 	if (c->cache_cflags & UMC_NOTOUCH)
1573 		return (WALK_NEXT);
1574 
1575 	if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1576 	    wsp->walk_cbdata, addr) == -1)
1577 		return (WALK_DONE);
1578 
1579 	return (WALK_NEXT);
1580 }
1581 
1582 #define	UMEM_WALK_ALL(name, wsp) { \
1583 	wsp->walk_data = (name); \
1584 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1585 		return (WALK_ERR); \
1586 	return (WALK_DONE); \
1587 }
1588 
1589 int
umem_walk_init(mdb_walk_state_t * wsp)1590 umem_walk_init(mdb_walk_state_t *wsp)
1591 {
1592 	if (wsp->walk_arg != NULL)
1593 		wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1594 
1595 	if (wsp->walk_addr == NULL)
1596 		UMEM_WALK_ALL("umem", wsp);
1597 	return (umem_walk_init_common(wsp, UM_ALLOCATED));
1598 }
1599 
1600 int
bufctl_walk_init(mdb_walk_state_t * wsp)1601 bufctl_walk_init(mdb_walk_state_t *wsp)
1602 {
1603 	if (wsp->walk_addr == NULL)
1604 		UMEM_WALK_ALL("bufctl", wsp);
1605 	return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1606 }
1607 
1608 int
freemem_walk_init(mdb_walk_state_t * wsp)1609 freemem_walk_init(mdb_walk_state_t *wsp)
1610 {
1611 	if (wsp->walk_addr == NULL)
1612 		UMEM_WALK_ALL("freemem", wsp);
1613 	return (umem_walk_init_common(wsp, UM_FREE));
1614 }
1615 
1616 int
freectl_walk_init(mdb_walk_state_t * wsp)1617 freectl_walk_init(mdb_walk_state_t *wsp)
1618 {
1619 	if (wsp->walk_addr == NULL)
1620 		UMEM_WALK_ALL("freectl", wsp);
1621 	return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1622 }
1623 
1624 typedef struct bufctl_history_walk {
1625 	void		*bhw_next;
1626 	umem_cache_t	*bhw_cache;
1627 	umem_slab_t	*bhw_slab;
1628 	hrtime_t	bhw_timestamp;
1629 } bufctl_history_walk_t;
1630 
1631 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1632 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1633 {
1634 	bufctl_history_walk_t *bhw;
1635 	umem_bufctl_audit_t bc;
1636 	umem_bufctl_audit_t bcn;
1637 
1638 	if (wsp->walk_addr == NULL) {
1639 		mdb_warn("bufctl_history walk doesn't support global walks\n");
1640 		return (WALK_ERR);
1641 	}
1642 
1643 	if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1644 		mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1645 		return (WALK_ERR);
1646 	}
1647 
1648 	bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1649 	bhw->bhw_timestamp = 0;
1650 	bhw->bhw_cache = bc.bc_cache;
1651 	bhw->bhw_slab = bc.bc_slab;
1652 
1653 	/*
1654 	 * sometimes the first log entry matches the base bufctl;  in that
1655 	 * case, skip the base bufctl.
1656 	 */
1657 	if (bc.bc_lastlog != NULL &&
1658 	    mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1659 	    bc.bc_addr == bcn.bc_addr &&
1660 	    bc.bc_cache == bcn.bc_cache &&
1661 	    bc.bc_slab == bcn.bc_slab &&
1662 	    bc.bc_timestamp == bcn.bc_timestamp &&
1663 	    bc.bc_thread == bcn.bc_thread)
1664 		bhw->bhw_next = bc.bc_lastlog;
1665 	else
1666 		bhw->bhw_next = (void *)wsp->walk_addr;
1667 
1668 	wsp->walk_addr = (uintptr_t)bc.bc_addr;
1669 	wsp->walk_data = bhw;
1670 
1671 	return (WALK_NEXT);
1672 }
1673 
1674 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1675 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1676 {
1677 	bufctl_history_walk_t *bhw = wsp->walk_data;
1678 	uintptr_t addr = (uintptr_t)bhw->bhw_next;
1679 	uintptr_t baseaddr = wsp->walk_addr;
1680 	umem_bufctl_audit_t *b;
1681 	UMEM_LOCAL_BUFCTL_AUDIT(&b);
1682 
1683 	if (addr == NULL)
1684 		return (WALK_DONE);
1685 
1686 	if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1687 		mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1688 		return (WALK_ERR);
1689 	}
1690 
1691 	/*
1692 	 * The bufctl is only valid if the address, cache, and slab are
1693 	 * correct.  We also check that the timestamp is decreasing, to
1694 	 * prevent infinite loops.
1695 	 */
1696 	if ((uintptr_t)b->bc_addr != baseaddr ||
1697 	    b->bc_cache != bhw->bhw_cache ||
1698 	    b->bc_slab != bhw->bhw_slab ||
1699 	    (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1700 		return (WALK_DONE);
1701 
1702 	bhw->bhw_next = b->bc_lastlog;
1703 	bhw->bhw_timestamp = b->bc_timestamp;
1704 
1705 	return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1706 }
1707 
1708 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1709 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1710 {
1711 	bufctl_history_walk_t *bhw = wsp->walk_data;
1712 
1713 	mdb_free(bhw, sizeof (*bhw));
1714 }
1715 
1716 typedef struct umem_log_walk {
1717 	umem_bufctl_audit_t *ulw_base;
1718 	umem_bufctl_audit_t **ulw_sorted;
1719 	umem_log_header_t ulw_lh;
1720 	size_t ulw_size;
1721 	size_t ulw_maxndx;
1722 	size_t ulw_ndx;
1723 } umem_log_walk_t;
1724 
1725 int
umem_log_walk_init(mdb_walk_state_t * wsp)1726 umem_log_walk_init(mdb_walk_state_t *wsp)
1727 {
1728 	uintptr_t lp = wsp->walk_addr;
1729 	umem_log_walk_t *ulw;
1730 	umem_log_header_t *lhp;
1731 	int maxndx, i, j, k;
1732 
1733 	/*
1734 	 * By default (global walk), walk the umem_transaction_log.  Otherwise
1735 	 * read the log whose umem_log_header_t is stored at walk_addr.
1736 	 */
1737 	if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1738 		mdb_warn("failed to read 'umem_transaction_log'");
1739 		return (WALK_ERR);
1740 	}
1741 
1742 	if (lp == NULL) {
1743 		mdb_warn("log is disabled\n");
1744 		return (WALK_ERR);
1745 	}
1746 
1747 	ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1748 	lhp = &ulw->ulw_lh;
1749 
1750 	if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1751 		mdb_warn("failed to read log header at %p", lp);
1752 		mdb_free(ulw, sizeof (umem_log_walk_t));
1753 		return (WALK_ERR);
1754 	}
1755 
1756 	ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1757 	ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1758 	maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1759 
1760 	if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1761 	    (uintptr_t)lhp->lh_base) == -1) {
1762 		mdb_warn("failed to read log at base %p", lhp->lh_base);
1763 		mdb_free(ulw->ulw_base, ulw->ulw_size);
1764 		mdb_free(ulw, sizeof (umem_log_walk_t));
1765 		return (WALK_ERR);
1766 	}
1767 
1768 	ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1769 	    sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1770 
1771 	for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1772 		caddr_t chunk = (caddr_t)
1773 		    ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1774 
1775 		for (j = 0; j < maxndx; j++) {
1776 			/* LINTED align */
1777 			ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1778 			chunk += UMEM_BUFCTL_AUDIT_SIZE;
1779 		}
1780 	}
1781 
1782 	qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1783 	    (int(*)(const void *, const void *))bufctlcmp);
1784 
1785 	ulw->ulw_maxndx = k;
1786 	wsp->walk_data = ulw;
1787 
1788 	return (WALK_NEXT);
1789 }
1790 
1791 int
umem_log_walk_step(mdb_walk_state_t * wsp)1792 umem_log_walk_step(mdb_walk_state_t *wsp)
1793 {
1794 	umem_log_walk_t *ulw = wsp->walk_data;
1795 	umem_bufctl_audit_t *bcp;
1796 
1797 	if (ulw->ulw_ndx == ulw->ulw_maxndx)
1798 		return (WALK_DONE);
1799 
1800 	bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1801 
1802 	return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1803 	    (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1804 }
1805 
1806 void
umem_log_walk_fini(mdb_walk_state_t * wsp)1807 umem_log_walk_fini(mdb_walk_state_t *wsp)
1808 {
1809 	umem_log_walk_t *ulw = wsp->walk_data;
1810 
1811 	mdb_free(ulw->ulw_base, ulw->ulw_size);
1812 	mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1813 	    sizeof (umem_bufctl_audit_t *));
1814 	mdb_free(ulw, sizeof (umem_log_walk_t));
1815 }
1816 
1817 typedef struct allocdby_bufctl {
1818 	uintptr_t abb_addr;
1819 	hrtime_t abb_ts;
1820 } allocdby_bufctl_t;
1821 
1822 typedef struct allocdby_walk {
1823 	const char *abw_walk;
1824 	uintptr_t abw_thread;
1825 	size_t abw_nbufs;
1826 	size_t abw_size;
1827 	allocdby_bufctl_t *abw_buf;
1828 	size_t abw_ndx;
1829 } allocdby_walk_t;
1830 
1831 int
allocdby_walk_bufctl(uintptr_t addr,const umem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1832 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1833     allocdby_walk_t *abw)
1834 {
1835 	if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1836 		return (WALK_NEXT);
1837 
1838 	if (abw->abw_nbufs == abw->abw_size) {
1839 		allocdby_bufctl_t *buf;
1840 		size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1841 
1842 		buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1843 
1844 		bcopy(abw->abw_buf, buf, oldsize);
1845 		mdb_free(abw->abw_buf, oldsize);
1846 
1847 		abw->abw_size <<= 1;
1848 		abw->abw_buf = buf;
1849 	}
1850 
1851 	abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1852 	abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1853 	abw->abw_nbufs++;
1854 
1855 	return (WALK_NEXT);
1856 }
1857 
1858 /*ARGSUSED*/
1859 int
allocdby_walk_cache(uintptr_t addr,const umem_cache_t * c,allocdby_walk_t * abw)1860 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1861 {
1862 	if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1863 	    abw, addr) == -1) {
1864 		mdb_warn("couldn't walk bufctl for cache %p", addr);
1865 		return (WALK_DONE);
1866 	}
1867 
1868 	return (WALK_NEXT);
1869 }
1870 
1871 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1872 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1873 {
1874 	if (lhs->abb_ts < rhs->abb_ts)
1875 		return (1);
1876 	if (lhs->abb_ts > rhs->abb_ts)
1877 		return (-1);
1878 	return (0);
1879 }
1880 
1881 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1882 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1883 {
1884 	allocdby_walk_t *abw;
1885 
1886 	if (wsp->walk_addr == NULL) {
1887 		mdb_warn("allocdby walk doesn't support global walks\n");
1888 		return (WALK_ERR);
1889 	}
1890 
1891 	abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1892 
1893 	abw->abw_thread = wsp->walk_addr;
1894 	abw->abw_walk = walk;
1895 	abw->abw_size = 128;	/* something reasonable */
1896 	abw->abw_buf =
1897 	    mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1898 
1899 	wsp->walk_data = abw;
1900 
1901 	if (mdb_walk("umem_cache",
1902 	    (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1903 		mdb_warn("couldn't walk umem_cache");
1904 		allocdby_walk_fini(wsp);
1905 		return (WALK_ERR);
1906 	}
1907 
1908 	qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1909 	    (int(*)(const void *, const void *))allocdby_cmp);
1910 
1911 	return (WALK_NEXT);
1912 }
1913 
1914 int
allocdby_walk_init(mdb_walk_state_t * wsp)1915 allocdby_walk_init(mdb_walk_state_t *wsp)
1916 {
1917 	return (allocdby_walk_init_common(wsp, "bufctl"));
1918 }
1919 
1920 int
freedby_walk_init(mdb_walk_state_t * wsp)1921 freedby_walk_init(mdb_walk_state_t *wsp)
1922 {
1923 	return (allocdby_walk_init_common(wsp, "freectl"));
1924 }
1925 
1926 int
allocdby_walk_step(mdb_walk_state_t * wsp)1927 allocdby_walk_step(mdb_walk_state_t *wsp)
1928 {
1929 	allocdby_walk_t *abw = wsp->walk_data;
1930 	uintptr_t addr;
1931 	umem_bufctl_audit_t *bcp;
1932 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1933 
1934 	if (abw->abw_ndx == abw->abw_nbufs)
1935 		return (WALK_DONE);
1936 
1937 	addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1938 
1939 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1940 		mdb_warn("couldn't read bufctl at %p", addr);
1941 		return (WALK_DONE);
1942 	}
1943 
1944 	return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1945 }
1946 
1947 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1948 allocdby_walk_fini(mdb_walk_state_t *wsp)
1949 {
1950 	allocdby_walk_t *abw = wsp->walk_data;
1951 
1952 	mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1953 	mdb_free(abw, sizeof (allocdby_walk_t));
1954 }
1955 
1956 /*ARGSUSED*/
1957 int
allocdby_walk(uintptr_t addr,const umem_bufctl_audit_t * bcp,void * ignored)1958 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1959 {
1960 	char c[MDB_SYM_NAMLEN];
1961 	GElf_Sym sym;
1962 	int i;
1963 
1964 	mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1965 	for (i = 0; i < bcp->bc_depth; i++) {
1966 		if (mdb_lookup_by_addr(bcp->bc_stack[i],
1967 		    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1968 			continue;
1969 		if (is_umem_sym(c, "umem_"))
1970 			continue;
1971 		mdb_printf("%s+0x%lx",
1972 		    c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1973 		break;
1974 	}
1975 	mdb_printf("\n");
1976 
1977 	return (WALK_NEXT);
1978 }
1979 
1980 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)1981 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1982 {
1983 	if (!(flags & DCMD_ADDRSPEC))
1984 		return (DCMD_USAGE);
1985 
1986 	mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1987 
1988 	if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1989 		mdb_warn("can't walk '%s' for %p", w, addr);
1990 		return (DCMD_ERR);
1991 	}
1992 
1993 	return (DCMD_OK);
1994 }
1995 
1996 /*ARGSUSED*/
1997 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1998 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1999 {
2000 	return (allocdby_common(addr, flags, "allocdby"));
2001 }
2002 
2003 /*ARGSUSED*/
2004 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2005 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2006 {
2007 	return (allocdby_common(addr, flags, "freedby"));
2008 }
2009 
2010 typedef struct whatis_info {
2011 	mdb_whatis_t *wi_w;
2012 	const umem_cache_t *wi_cache;
2013 	const vmem_t *wi_vmem;
2014 	vmem_t *wi_msb_arena;
2015 	size_t wi_slab_size;
2016 	int wi_slab_found;
2017 	uint_t wi_freemem;
2018 } whatis_info_t;
2019 
2020 /* call one of our dcmd functions with "-v" and the provided address */
2021 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2022 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2023 {
2024 	mdb_arg_t a;
2025 	a.a_type = MDB_TYPE_STRING;
2026 	a.a_un.a_str = "-v";
2027 
2028 	mdb_printf(":\n");
2029 	(void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2030 }
2031 
2032 static void
whatis_print_umem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2033 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2034     uintptr_t baddr)
2035 {
2036 	mdb_whatis_t *w = wi->wi_w;
2037 	const umem_cache_t *cp = wi->wi_cache;
2038 	int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2039 
2040 	int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2041 
2042 	mdb_whatis_report_object(w, maddr, addr, "");
2043 
2044 	if (baddr != 0 && !call_printer)
2045 		mdb_printf("bufctl %p ", baddr);
2046 
2047 	mdb_printf("%s from %s",
2048 	    (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2049 
2050 	if (call_printer && baddr != 0) {
2051 		whatis_call_printer(bufctl, baddr);
2052 		return;
2053 	}
2054 	mdb_printf("\n");
2055 }
2056 
2057 /*ARGSUSED*/
2058 static int
whatis_walk_umem(uintptr_t addr,void * ignored,whatis_info_t * wi)2059 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2060 {
2061 	mdb_whatis_t *w = wi->wi_w;
2062 
2063 	uintptr_t cur;
2064 	size_t size = wi->wi_cache->cache_bufsize;
2065 
2066 	while (mdb_whatis_match(w, addr, size, &cur))
2067 		whatis_print_umem(wi, cur, addr, NULL);
2068 
2069 	return (WHATIS_WALKRET(w));
2070 }
2071 
2072 /*ARGSUSED*/
2073 static int
whatis_walk_bufctl(uintptr_t baddr,const umem_bufctl_t * bcp,whatis_info_t * wi)2074 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2075 {
2076 	mdb_whatis_t *w = wi->wi_w;
2077 
2078 	uintptr_t cur;
2079 	uintptr_t addr = (uintptr_t)bcp->bc_addr;
2080 	size_t size = wi->wi_cache->cache_bufsize;
2081 
2082 	while (mdb_whatis_match(w, addr, size, &cur))
2083 		whatis_print_umem(wi, cur, addr, baddr);
2084 
2085 	return (WHATIS_WALKRET(w));
2086 }
2087 
2088 
2089 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2090 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2091 {
2092 	mdb_whatis_t *w = wi->wi_w;
2093 
2094 	size_t size = vs->vs_end - vs->vs_start;
2095 	uintptr_t cur;
2096 
2097 	/* We're not interested in anything but alloc and free segments */
2098 	if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2099 		return (WALK_NEXT);
2100 
2101 	while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2102 		mdb_whatis_report_object(w, cur, vs->vs_start, "");
2103 
2104 		/*
2105 		 * If we're not printing it seperately, provide the vmem_seg
2106 		 * pointer if it has a stack trace.
2107 		 */
2108 		if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2109 		    ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2110 		    (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2111 			mdb_printf("vmem_seg %p ", addr);
2112 		}
2113 
2114 		mdb_printf("%s from %s vmem arena",
2115 		    (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2116 		    wi->wi_vmem->vm_name);
2117 
2118 		if (!mdb_whatis_flags(w) & WHATIS_QUIET)
2119 			whatis_call_printer(vmem_seg, addr);
2120 		else
2121 			mdb_printf("\n");
2122 	}
2123 
2124 	return (WHATIS_WALKRET(w));
2125 }
2126 
2127 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2128 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2129 {
2130 	mdb_whatis_t *w = wi->wi_w;
2131 	const char *nm = vmem->vm_name;
2132 	wi->wi_vmem = vmem;
2133 
2134 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2135 		mdb_printf("Searching vmem arena %s...\n", nm);
2136 
2137 	if (mdb_pwalk("vmem_seg",
2138 	    (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2139 		mdb_warn("can't walk vmem seg for %p", addr);
2140 		return (WALK_NEXT);
2141 	}
2142 
2143 	return (WHATIS_WALKRET(w));
2144 }
2145 
2146 /*ARGSUSED*/
2147 static int
whatis_walk_slab(uintptr_t saddr,const umem_slab_t * sp,whatis_info_t * wi)2148 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2149 {
2150 	mdb_whatis_t *w = wi->wi_w;
2151 
2152 	/* It must overlap with the slab data, or it's not interesting */
2153 	if (mdb_whatis_overlaps(w,
2154 	    (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2155 		wi->wi_slab_found++;
2156 		return (WALK_DONE);
2157 	}
2158 	return (WALK_NEXT);
2159 }
2160 
2161 static int
whatis_walk_cache(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2162 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2163 {
2164 	mdb_whatis_t *w = wi->wi_w;
2165 	char *walk, *freewalk;
2166 	mdb_walk_cb_t func;
2167 	int do_bufctl;
2168 
2169 	/* Override the '-b' flag as necessary */
2170 	if (!(c->cache_flags & UMF_HASH))
2171 		do_bufctl = FALSE;	/* no bufctls to walk */
2172 	else if (c->cache_flags & UMF_AUDIT)
2173 		do_bufctl = TRUE;	/* we always want debugging info */
2174 	else
2175 		do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2176 
2177 	if (do_bufctl) {
2178 		walk = "bufctl";
2179 		freewalk = "freectl";
2180 		func = (mdb_walk_cb_t)whatis_walk_bufctl;
2181 	} else {
2182 		walk = "umem";
2183 		freewalk = "freemem";
2184 		func = (mdb_walk_cb_t)whatis_walk_umem;
2185 	}
2186 
2187 	wi->wi_cache = c;
2188 
2189 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2190 		mdb_printf("Searching %s...\n", c->cache_name);
2191 
2192 	/*
2193 	 * If more then two buffers live on each slab, figure out if we're
2194 	 * interested in anything in any slab before doing the more expensive
2195 	 * umem/freemem (bufctl/freectl) walkers.
2196 	 */
2197 	wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2198 	if (!(c->cache_flags & UMF_HASH))
2199 		wi->wi_slab_size -= sizeof (umem_slab_t);
2200 
2201 	if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2202 		wi->wi_slab_found = 0;
2203 		if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2204 		    addr) == -1) {
2205 			mdb_warn("can't find umem_slab walker");
2206 			return (WALK_DONE);
2207 		}
2208 		if (wi->wi_slab_found == 0)
2209 			return (WALK_NEXT);
2210 	}
2211 
2212 	wi->wi_freemem = FALSE;
2213 	if (mdb_pwalk(walk, func, wi, addr) == -1) {
2214 		mdb_warn("can't find %s walker", walk);
2215 		return (WALK_DONE);
2216 	}
2217 
2218 	if (mdb_whatis_done(w))
2219 		return (WALK_DONE);
2220 
2221 	/*
2222 	 * We have searched for allocated memory; now search for freed memory.
2223 	 */
2224 	if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2225 		mdb_printf("Searching %s for free memory...\n", c->cache_name);
2226 
2227 	wi->wi_freemem = TRUE;
2228 
2229 	if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2230 		mdb_warn("can't find %s walker", freewalk);
2231 		return (WALK_DONE);
2232 	}
2233 
2234 	return (WHATIS_WALKRET(w));
2235 }
2236 
2237 static int
whatis_walk_touch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2238 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2239 {
2240 	if (c->cache_arena == wi->wi_msb_arena ||
2241 	    (c->cache_cflags & UMC_NOTOUCH))
2242 		return (WALK_NEXT);
2243 
2244 	return (whatis_walk_cache(addr, c, wi));
2245 }
2246 
2247 static int
whatis_walk_metadata(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2248 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2249 {
2250 	if (c->cache_arena != wi->wi_msb_arena)
2251 		return (WALK_NEXT);
2252 
2253 	return (whatis_walk_cache(addr, c, wi));
2254 }
2255 
2256 static int
whatis_walk_notouch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2257 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2258 {
2259 	if (c->cache_arena == wi->wi_msb_arena ||
2260 	    !(c->cache_cflags & UMC_NOTOUCH))
2261 		return (WALK_NEXT);
2262 
2263 	return (whatis_walk_cache(addr, c, wi));
2264 }
2265 
2266 /*ARGSUSED*/
2267 static int
whatis_run_umem(mdb_whatis_t * w,void * ignored)2268 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2269 {
2270 	whatis_info_t wi;
2271 
2272 	bzero(&wi, sizeof (wi));
2273 	wi.wi_w = w;
2274 
2275 	/* umem's metadata is allocated from the umem_internal_arena */
2276 	if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2277 		mdb_warn("unable to readvar \"umem_internal_arena\"");
2278 
2279 	/*
2280 	 * We process umem caches in the following order:
2281 	 *
2282 	 *	non-UMC_NOTOUCH, non-metadata	(typically the most interesting)
2283 	 *	metadata			(can be huge with UMF_AUDIT)
2284 	 *	UMC_NOTOUCH, non-metadata	(see umem_walk_all())
2285 	 */
2286 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2287 	    &wi) == -1 ||
2288 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2289 	    &wi) == -1 ||
2290 	    mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2291 	    &wi) == -1) {
2292 		mdb_warn("couldn't find umem_cache walker");
2293 		return (1);
2294 	}
2295 	return (0);
2296 }
2297 
2298 /*ARGSUSED*/
2299 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2300 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2301 {
2302 	whatis_info_t wi;
2303 
2304 	bzero(&wi, sizeof (wi));
2305 	wi.wi_w = w;
2306 
2307 	if (mdb_walk("vmem_postfix",
2308 	    (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2309 		mdb_warn("couldn't find vmem_postfix walker");
2310 		return (1);
2311 	}
2312 	return (0);
2313 }
2314 
2315 int
umem_init(void)2316 umem_init(void)
2317 {
2318 	mdb_walker_t w = {
2319 		"umem_cache", "walk list of umem caches", umem_cache_walk_init,
2320 		umem_cache_walk_step, umem_cache_walk_fini
2321 	};
2322 
2323 	if (mdb_add_walker(&w) == -1) {
2324 		mdb_warn("failed to add umem_cache walker");
2325 		return (-1);
2326 	}
2327 
2328 	if (umem_update_variables() == -1)
2329 		return (-1);
2330 
2331 	/* install a callback so that our variables are always up-to-date */
2332 	(void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2333 	umem_statechange_cb(NULL);
2334 
2335 	/*
2336 	 * Register our ::whatis callbacks.
2337 	 */
2338 	mdb_whatis_register("umem", whatis_run_umem, NULL,
2339 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2340 	mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2341 	    WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2342 
2343 	return (0);
2344 }
2345 
2346 typedef struct umem_log_cpu {
2347 	uintptr_t umc_low;
2348 	uintptr_t umc_high;
2349 } umem_log_cpu_t;
2350 
2351 int
umem_log_walk(uintptr_t addr,const umem_bufctl_audit_t * b,umem_log_cpu_t * umc)2352 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2353 {
2354 	int i;
2355 
2356 	for (i = 0; i < umem_max_ncpus; i++) {
2357 		if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2358 			break;
2359 	}
2360 
2361 	if (i == umem_max_ncpus)
2362 		mdb_printf("   ");
2363 	else
2364 		mdb_printf("%3d", i);
2365 
2366 	mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2367 	    b->bc_timestamp, b->bc_thread);
2368 
2369 	return (WALK_NEXT);
2370 }
2371 
2372 /*ARGSUSED*/
2373 int
umem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2374 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2375 {
2376 	umem_log_header_t lh;
2377 	umem_cpu_log_header_t clh;
2378 	uintptr_t lhp, clhp;
2379 	umem_log_cpu_t *umc;
2380 	int i;
2381 
2382 	if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2383 		mdb_warn("failed to read 'umem_transaction_log'");
2384 		return (DCMD_ERR);
2385 	}
2386 
2387 	if (lhp == NULL) {
2388 		mdb_warn("no umem transaction log\n");
2389 		return (DCMD_ERR);
2390 	}
2391 
2392 	if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2393 		mdb_warn("failed to read log header at %p", lhp);
2394 		return (DCMD_ERR);
2395 	}
2396 
2397 	clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2398 
2399 	umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2400 	    UM_SLEEP | UM_GC);
2401 
2402 	for (i = 0; i < umem_max_ncpus; i++) {
2403 		if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2404 			mdb_warn("cannot read cpu %d's log header at %p",
2405 			    i, clhp);
2406 			return (DCMD_ERR);
2407 		}
2408 
2409 		umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2410 		    (uintptr_t)lh.lh_base;
2411 		umc[i].umc_high = (uintptr_t)clh.clh_current;
2412 
2413 		clhp += sizeof (umem_cpu_log_header_t);
2414 	}
2415 
2416 	if (DCMD_HDRSPEC(flags)) {
2417 		mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2418 		    "BUFADDR", "TIMESTAMP", "THREAD");
2419 	}
2420 
2421 	/*
2422 	 * If we have been passed an address, we'll just print out that
2423 	 * log entry.
2424 	 */
2425 	if (flags & DCMD_ADDRSPEC) {
2426 		umem_bufctl_audit_t *bp;
2427 		UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2428 
2429 		if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2430 			mdb_warn("failed to read bufctl at %p", addr);
2431 			return (DCMD_ERR);
2432 		}
2433 
2434 		(void) umem_log_walk(addr, bp, umc);
2435 
2436 		return (DCMD_OK);
2437 	}
2438 
2439 	if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2440 		mdb_warn("can't find umem log walker");
2441 		return (DCMD_ERR);
2442 	}
2443 
2444 	return (DCMD_OK);
2445 }
2446 
2447 typedef struct bufctl_history_cb {
2448 	int		bhc_flags;
2449 	int		bhc_argc;
2450 	const mdb_arg_t	*bhc_argv;
2451 	int		bhc_ret;
2452 } bufctl_history_cb_t;
2453 
2454 /*ARGSUSED*/
2455 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2456 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2457 {
2458 	bufctl_history_cb_t *bhc = arg;
2459 
2460 	bhc->bhc_ret =
2461 	    bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2462 
2463 	bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2464 
2465 	return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2466 }
2467 
2468 void
bufctl_help(void)2469 bufctl_help(void)
2470 {
2471 	mdb_printf("%s\n",
2472 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2473 	mdb_dec_indent(2);
2474 	mdb_printf("%<b>OPTIONS%</b>\n");
2475 	mdb_inc_indent(2);
2476 	mdb_printf("%s",
2477 "  -v    Display the full content of the bufctl, including its stack trace\n"
2478 "  -h    retrieve the bufctl's transaction history, if available\n"
2479 "  -a addr\n"
2480 "        filter out bufctls not involving the buffer at addr\n"
2481 "  -c caller\n"
2482 "        filter out bufctls without the function/PC in their stack trace\n"
2483 "  -e earliest\n"
2484 "        filter out bufctls timestamped before earliest\n"
2485 "  -l latest\n"
2486 "        filter out bufctls timestamped after latest\n"
2487 "  -t thread\n"
2488 "        filter out bufctls not involving thread\n");
2489 }
2490 
2491 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2492 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2493 {
2494 	uint_t verbose = FALSE;
2495 	uint_t history = FALSE;
2496 	uint_t in_history = FALSE;
2497 	uintptr_t caller = NULL, thread = NULL;
2498 	uintptr_t laddr, haddr, baddr = NULL;
2499 	hrtime_t earliest = 0, latest = 0;
2500 	int i, depth;
2501 	char c[MDB_SYM_NAMLEN];
2502 	GElf_Sym sym;
2503 	umem_bufctl_audit_t *bcp;
2504 	UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2505 
2506 	if (mdb_getopts(argc, argv,
2507 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
2508 	    'h', MDB_OPT_SETBITS, TRUE, &history,
2509 	    'H', MDB_OPT_SETBITS, TRUE, &in_history,		/* internal */
2510 	    'c', MDB_OPT_UINTPTR, &caller,
2511 	    't', MDB_OPT_UINTPTR, &thread,
2512 	    'e', MDB_OPT_UINT64, &earliest,
2513 	    'l', MDB_OPT_UINT64, &latest,
2514 	    'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2515 		return (DCMD_USAGE);
2516 
2517 	if (!(flags & DCMD_ADDRSPEC))
2518 		return (DCMD_USAGE);
2519 
2520 	if (in_history && !history)
2521 		return (DCMD_USAGE);
2522 
2523 	if (history && !in_history) {
2524 		mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2525 		    UM_SLEEP | UM_GC);
2526 		bufctl_history_cb_t bhc;
2527 
2528 		nargv[0].a_type = MDB_TYPE_STRING;
2529 		nargv[0].a_un.a_str = "-H";		/* prevent recursion */
2530 
2531 		for (i = 0; i < argc; i++)
2532 			nargv[i + 1] = argv[i];
2533 
2534 		/*
2535 		 * When in history mode, we treat each element as if it
2536 		 * were in a seperate loop, so that the headers group
2537 		 * bufctls with similar histories.
2538 		 */
2539 		bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2540 		bhc.bhc_argc = argc + 1;
2541 		bhc.bhc_argv = nargv;
2542 		bhc.bhc_ret = DCMD_OK;
2543 
2544 		if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2545 		    addr) == -1) {
2546 			mdb_warn("unable to walk bufctl_history");
2547 			return (DCMD_ERR);
2548 		}
2549 
2550 		if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2551 			mdb_printf("\n");
2552 
2553 		return (bhc.bhc_ret);
2554 	}
2555 
2556 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2557 		if (verbose) {
2558 			mdb_printf("%16s %16s %16s %16s\n"
2559 			    "%<u>%16s %16s %16s %16s%</u>\n",
2560 			    "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2561 			    "", "CACHE", "LASTLOG", "CONTENTS");
2562 		} else {
2563 			mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2564 			    "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2565 		}
2566 	}
2567 
2568 	if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2569 		mdb_warn("couldn't read bufctl at %p", addr);
2570 		return (DCMD_ERR);
2571 	}
2572 
2573 	/*
2574 	 * Guard against bogus bc_depth in case the bufctl is corrupt or
2575 	 * the address does not really refer to a bufctl.
2576 	 */
2577 	depth = MIN(bcp->bc_depth, umem_stack_depth);
2578 
2579 	if (caller != NULL) {
2580 		laddr = caller;
2581 		haddr = caller + sizeof (caller);
2582 
2583 		if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2584 		    &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2585 			/*
2586 			 * We were provided an exact symbol value; any
2587 			 * address in the function is valid.
2588 			 */
2589 			laddr = (uintptr_t)sym.st_value;
2590 			haddr = (uintptr_t)sym.st_value + sym.st_size;
2591 		}
2592 
2593 		for (i = 0; i < depth; i++)
2594 			if (bcp->bc_stack[i] >= laddr &&
2595 			    bcp->bc_stack[i] < haddr)
2596 				break;
2597 
2598 		if (i == depth)
2599 			return (DCMD_OK);
2600 	}
2601 
2602 	if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2603 		return (DCMD_OK);
2604 
2605 	if (earliest != 0 && bcp->bc_timestamp < earliest)
2606 		return (DCMD_OK);
2607 
2608 	if (latest != 0 && bcp->bc_timestamp > latest)
2609 		return (DCMD_OK);
2610 
2611 	if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2612 		return (DCMD_OK);
2613 
2614 	if (flags & DCMD_PIPE_OUT) {
2615 		mdb_printf("%#r\n", addr);
2616 		return (DCMD_OK);
2617 	}
2618 
2619 	if (verbose) {
2620 		mdb_printf(
2621 		    "%<b>%16p%</b> %16p %16llx %16d\n"
2622 		    "%16s %16p %16p %16p\n",
2623 		    addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2624 		    "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2625 
2626 		mdb_inc_indent(17);
2627 		for (i = 0; i < depth; i++)
2628 			mdb_printf("%a\n", bcp->bc_stack[i]);
2629 		mdb_dec_indent(17);
2630 		mdb_printf("\n");
2631 	} else {
2632 		mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2633 		    bcp->bc_timestamp, bcp->bc_thread);
2634 
2635 		for (i = 0; i < depth; i++) {
2636 			if (mdb_lookup_by_addr(bcp->bc_stack[i],
2637 			    MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2638 				continue;
2639 			if (is_umem_sym(c, "umem_"))
2640 				continue;
2641 			mdb_printf(" %a\n", bcp->bc_stack[i]);
2642 			break;
2643 		}
2644 
2645 		if (i >= depth)
2646 			mdb_printf("\n");
2647 	}
2648 
2649 	return (DCMD_OK);
2650 }
2651 
2652 /*ARGSUSED*/
2653 int
bufctl_audit(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2654 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2655 {
2656 	mdb_arg_t a;
2657 
2658 	if (!(flags & DCMD_ADDRSPEC))
2659 		return (DCMD_USAGE);
2660 
2661 	if (argc != 0)
2662 		return (DCMD_USAGE);
2663 
2664 	a.a_type = MDB_TYPE_STRING;
2665 	a.a_un.a_str = "-v";
2666 
2667 	return (bufctl(addr, flags, 1, &a));
2668 }
2669 
2670 typedef struct umem_verify {
2671 	uint64_t *umv_buf;		/* buffer to read cache contents into */
2672 	size_t umv_size;		/* number of bytes in umv_buf */
2673 	int umv_corruption;		/* > 0 if corruption found. */
2674 	int umv_besilent;		/* report actual corruption sites */
2675 	struct umem_cache umv_cache;	/* the cache we're operating on */
2676 } umem_verify_t;
2677 
2678 /*
2679  * verify_pattern()
2680  *	verify that buf is filled with the pattern pat.
2681  */
2682 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)2683 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2684 {
2685 	/*LINTED*/
2686 	uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2687 	uint64_t *buf;
2688 
2689 	for (buf = buf_arg; buf < bufend; buf++)
2690 		if (*buf != pat)
2691 			return ((uintptr_t)buf - (uintptr_t)buf_arg);
2692 	return (-1);
2693 }
2694 
2695 /*
2696  * verify_buftag()
2697  *	verify that btp->bt_bxstat == (bcp ^ pat)
2698  */
2699 static int
verify_buftag(umem_buftag_t * btp,uintptr_t pat)2700 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2701 {
2702 	return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2703 }
2704 
2705 /*
2706  * verify_free()
2707  *	verify the integrity of a free block of memory by checking
2708  *	that it is filled with 0xdeadbeef and that its buftag is sane.
2709  */
2710 /*ARGSUSED1*/
2711 static int
verify_free(uintptr_t addr,const void * data,void * private)2712 verify_free(uintptr_t addr, const void *data, void *private)
2713 {
2714 	umem_verify_t *umv = (umem_verify_t *)private;
2715 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2716 	int64_t corrupt;		/* corruption offset */
2717 	umem_buftag_t *buftagp;		/* ptr to buftag */
2718 	umem_cache_t *cp = &umv->umv_cache;
2719 	int besilent = umv->umv_besilent;
2720 
2721 	/*LINTED*/
2722 	buftagp = UMEM_BUFTAG(cp, buf);
2723 
2724 	/*
2725 	 * Read the buffer to check.
2726 	 */
2727 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2728 		if (!besilent)
2729 			mdb_warn("couldn't read %p", addr);
2730 		return (WALK_NEXT);
2731 	}
2732 
2733 	if ((corrupt = verify_pattern(buf, cp->cache_verify,
2734 	    UMEM_FREE_PATTERN)) >= 0) {
2735 		if (!besilent)
2736 			mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2737 			    addr, (uintptr_t)addr + corrupt);
2738 		goto corrupt;
2739 	}
2740 
2741 	if ((cp->cache_flags & UMF_HASH) &&
2742 	    buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2743 		if (!besilent)
2744 			mdb_printf("buffer %p (free) seems to "
2745 			    "have a corrupt redzone pattern\n", addr);
2746 		goto corrupt;
2747 	}
2748 
2749 	/*
2750 	 * confirm bufctl pointer integrity.
2751 	 */
2752 	if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2753 		if (!besilent)
2754 			mdb_printf("buffer %p (free) has a corrupt "
2755 			    "buftag\n", addr);
2756 		goto corrupt;
2757 	}
2758 
2759 	return (WALK_NEXT);
2760 corrupt:
2761 	umv->umv_corruption++;
2762 	return (WALK_NEXT);
2763 }
2764 
2765 /*
2766  * verify_alloc()
2767  *	Verify that the buftag of an allocated buffer makes sense with respect
2768  *	to the buffer.
2769  */
2770 /*ARGSUSED1*/
2771 static int
verify_alloc(uintptr_t addr,const void * data,void * private)2772 verify_alloc(uintptr_t addr, const void *data, void *private)
2773 {
2774 	umem_verify_t *umv = (umem_verify_t *)private;
2775 	umem_cache_t *cp = &umv->umv_cache;
2776 	uint64_t *buf = umv->umv_buf;	/* buf to validate */
2777 	/*LINTED*/
2778 	umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2779 	uint32_t *ip = (uint32_t *)buftagp;
2780 	uint8_t *bp = (uint8_t *)buf;
2781 	int looks_ok = 0, size_ok = 1;	/* flags for finding corruption */
2782 	int besilent = umv->umv_besilent;
2783 
2784 	/*
2785 	 * Read the buffer to check.
2786 	 */
2787 	if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2788 		if (!besilent)
2789 			mdb_warn("couldn't read %p", addr);
2790 		return (WALK_NEXT);
2791 	}
2792 
2793 	/*
2794 	 * There are two cases to handle:
2795 	 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2796 	 *    0xfeedfacefeedface at the end of it
2797 	 * 2. If the buf was alloc'd using umem_alloc, it will have
2798 	 *    0xbb just past the end of the region in use.  At the buftag,
2799 	 *    it will have 0xfeedface (or, if the whole buffer is in use,
2800 	 *    0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2801 	 *    endianness), followed by 32 bits containing the offset of the
2802 	 *    0xbb byte in the buffer.
2803 	 *
2804 	 * Finally, the two 32-bit words that comprise the second half of the
2805 	 * buftag should xor to UMEM_BUFTAG_ALLOC
2806 	 */
2807 
2808 	if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2809 		looks_ok = 1;
2810 	else if (!UMEM_SIZE_VALID(ip[1]))
2811 		size_ok = 0;
2812 	else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2813 		looks_ok = 1;
2814 	else
2815 		size_ok = 0;
2816 
2817 	if (!size_ok) {
2818 		if (!besilent)
2819 			mdb_printf("buffer %p (allocated) has a corrupt "
2820 			    "redzone size encoding\n", addr);
2821 		goto corrupt;
2822 	}
2823 
2824 	if (!looks_ok) {
2825 		if (!besilent)
2826 			mdb_printf("buffer %p (allocated) has a corrupt "
2827 			    "redzone signature\n", addr);
2828 		goto corrupt;
2829 	}
2830 
2831 	if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2832 		if (!besilent)
2833 			mdb_printf("buffer %p (allocated) has a "
2834 			    "corrupt buftag\n", addr);
2835 		goto corrupt;
2836 	}
2837 
2838 	return (WALK_NEXT);
2839 corrupt:
2840 	umv->umv_corruption++;
2841 	return (WALK_NEXT);
2842 }
2843 
2844 /*ARGSUSED2*/
2845 int
umem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2846 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2847 {
2848 	if (flags & DCMD_ADDRSPEC) {
2849 		int check_alloc = 0, check_free = 0;
2850 		umem_verify_t umv;
2851 
2852 		if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2853 		    addr) == -1) {
2854 			mdb_warn("couldn't read umem_cache %p", addr);
2855 			return (DCMD_ERR);
2856 		}
2857 
2858 		umv.umv_size = umv.umv_cache.cache_buftag +
2859 		    sizeof (umem_buftag_t);
2860 		umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2861 		umv.umv_corruption = 0;
2862 
2863 		if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2864 			check_alloc = 1;
2865 			if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2866 				check_free = 1;
2867 		} else {
2868 			if (!(flags & DCMD_LOOP)) {
2869 				mdb_warn("cache %p (%s) does not have "
2870 				    "redzone checking enabled\n", addr,
2871 				    umv.umv_cache.cache_name);
2872 			}
2873 			return (DCMD_ERR);
2874 		}
2875 
2876 		if (flags & DCMD_LOOP) {
2877 			/*
2878 			 * table mode, don't print out every corrupt buffer
2879 			 */
2880 			umv.umv_besilent = 1;
2881 		} else {
2882 			mdb_printf("Summary for cache '%s'\n",
2883 			    umv.umv_cache.cache_name);
2884 			mdb_inc_indent(2);
2885 			umv.umv_besilent = 0;
2886 		}
2887 
2888 		if (check_alloc)
2889 			(void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2890 		if (check_free)
2891 			(void) mdb_pwalk("freemem", verify_free, &umv, addr);
2892 
2893 		if (flags & DCMD_LOOP) {
2894 			if (umv.umv_corruption == 0) {
2895 				mdb_printf("%-*s %?p clean\n",
2896 				    UMEM_CACHE_NAMELEN,
2897 				    umv.umv_cache.cache_name, addr);
2898 			} else {
2899 				char *s = "";	/* optional s in "buffer[s]" */
2900 				if (umv.umv_corruption > 1)
2901 					s = "s";
2902 
2903 				mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2904 				    UMEM_CACHE_NAMELEN,
2905 				    umv.umv_cache.cache_name, addr,
2906 				    umv.umv_corruption, s);
2907 			}
2908 		} else {
2909 			/*
2910 			 * This is the more verbose mode, when the user has
2911 			 * type addr::umem_verify.  If the cache was clean,
2912 			 * nothing will have yet been printed. So say something.
2913 			 */
2914 			if (umv.umv_corruption == 0)
2915 				mdb_printf("clean\n");
2916 
2917 			mdb_dec_indent(2);
2918 		}
2919 	} else {
2920 		/*
2921 		 * If the user didn't specify a cache to verify, we'll walk all
2922 		 * umem_cache's, specifying ourself as a callback for each...
2923 		 * this is the equivalent of '::walk umem_cache .::umem_verify'
2924 		 */
2925 		mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2926 		    "Cache Name", "Addr", "Cache Integrity");
2927 		(void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2928 	}
2929 
2930 	return (DCMD_OK);
2931 }
2932 
2933 typedef struct vmem_node {
2934 	struct vmem_node *vn_next;
2935 	struct vmem_node *vn_parent;
2936 	struct vmem_node *vn_sibling;
2937 	struct vmem_node *vn_children;
2938 	uintptr_t vn_addr;
2939 	int vn_marked;
2940 	vmem_t vn_vmem;
2941 } vmem_node_t;
2942 
2943 typedef struct vmem_walk {
2944 	vmem_node_t *vw_root;
2945 	vmem_node_t *vw_current;
2946 } vmem_walk_t;
2947 
2948 int
vmem_walk_init(mdb_walk_state_t * wsp)2949 vmem_walk_init(mdb_walk_state_t *wsp)
2950 {
2951 	uintptr_t vaddr, paddr;
2952 	vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2953 	vmem_walk_t *vw;
2954 
2955 	if (umem_readvar(&vaddr, "vmem_list") == -1) {
2956 		mdb_warn("couldn't read 'vmem_list'");
2957 		return (WALK_ERR);
2958 	}
2959 
2960 	while (vaddr != NULL) {
2961 		vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2962 		vp->vn_addr = vaddr;
2963 		vp->vn_next = head;
2964 		head = vp;
2965 
2966 		if (vaddr == wsp->walk_addr)
2967 			current = vp;
2968 
2969 		if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2970 			mdb_warn("couldn't read vmem_t at %p", vaddr);
2971 			goto err;
2972 		}
2973 
2974 		vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2975 	}
2976 
2977 	for (vp = head; vp != NULL; vp = vp->vn_next) {
2978 
2979 		if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2980 			vp->vn_sibling = root;
2981 			root = vp;
2982 			continue;
2983 		}
2984 
2985 		for (parent = head; parent != NULL; parent = parent->vn_next) {
2986 			if (parent->vn_addr != paddr)
2987 				continue;
2988 			vp->vn_sibling = parent->vn_children;
2989 			parent->vn_children = vp;
2990 			vp->vn_parent = parent;
2991 			break;
2992 		}
2993 
2994 		if (parent == NULL) {
2995 			mdb_warn("couldn't find %p's parent (%p)\n",
2996 			    vp->vn_addr, paddr);
2997 			goto err;
2998 		}
2999 	}
3000 
3001 	vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3002 	vw->vw_root = root;
3003 
3004 	if (current != NULL)
3005 		vw->vw_current = current;
3006 	else
3007 		vw->vw_current = root;
3008 
3009 	wsp->walk_data = vw;
3010 	return (WALK_NEXT);
3011 err:
3012 	for (vp = head; head != NULL; vp = head) {
3013 		head = vp->vn_next;
3014 		mdb_free(vp, sizeof (vmem_node_t));
3015 	}
3016 
3017 	return (WALK_ERR);
3018 }
3019 
3020 int
vmem_walk_step(mdb_walk_state_t * wsp)3021 vmem_walk_step(mdb_walk_state_t *wsp)
3022 {
3023 	vmem_walk_t *vw = wsp->walk_data;
3024 	vmem_node_t *vp;
3025 	int rval;
3026 
3027 	if ((vp = vw->vw_current) == NULL)
3028 		return (WALK_DONE);
3029 
3030 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3031 
3032 	if (vp->vn_children != NULL) {
3033 		vw->vw_current = vp->vn_children;
3034 		return (rval);
3035 	}
3036 
3037 	do {
3038 		vw->vw_current = vp->vn_sibling;
3039 		vp = vp->vn_parent;
3040 	} while (vw->vw_current == NULL && vp != NULL);
3041 
3042 	return (rval);
3043 }
3044 
3045 /*
3046  * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3047  * children are visited before their parent.  We perform the postfix walk
3048  * iteratively (rather than recursively) to allow mdb to regain control
3049  * after each callback.
3050  */
3051 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3052 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3053 {
3054 	vmem_walk_t *vw = wsp->walk_data;
3055 	vmem_node_t *vp = vw->vw_current;
3056 	int rval;
3057 
3058 	/*
3059 	 * If this node is marked, then we know that we have already visited
3060 	 * all of its children.  If the node has any siblings, they need to
3061 	 * be visited next; otherwise, we need to visit the parent.  Note
3062 	 * that vp->vn_marked will only be zero on the first invocation of
3063 	 * the step function.
3064 	 */
3065 	if (vp->vn_marked) {
3066 		if (vp->vn_sibling != NULL)
3067 			vp = vp->vn_sibling;
3068 		else if (vp->vn_parent != NULL)
3069 			vp = vp->vn_parent;
3070 		else {
3071 			/*
3072 			 * We have neither a parent, nor a sibling, and we
3073 			 * have already been visited; we're done.
3074 			 */
3075 			return (WALK_DONE);
3076 		}
3077 	}
3078 
3079 	/*
3080 	 * Before we visit this node, visit its children.
3081 	 */
3082 	while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3083 		vp = vp->vn_children;
3084 
3085 	vp->vn_marked = 1;
3086 	vw->vw_current = vp;
3087 	rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3088 
3089 	return (rval);
3090 }
3091 
3092 void
vmem_walk_fini(mdb_walk_state_t * wsp)3093 vmem_walk_fini(mdb_walk_state_t *wsp)
3094 {
3095 	vmem_walk_t *vw = wsp->walk_data;
3096 	vmem_node_t *root = vw->vw_root;
3097 	int done;
3098 
3099 	if (root == NULL)
3100 		return;
3101 
3102 	if ((vw->vw_root = root->vn_children) != NULL)
3103 		vmem_walk_fini(wsp);
3104 
3105 	vw->vw_root = root->vn_sibling;
3106 	done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3107 	mdb_free(root, sizeof (vmem_node_t));
3108 
3109 	if (done) {
3110 		mdb_free(vw, sizeof (vmem_walk_t));
3111 	} else {
3112 		vmem_walk_fini(wsp);
3113 	}
3114 }
3115 
3116 typedef struct vmem_seg_walk {
3117 	uint8_t vsw_type;
3118 	uintptr_t vsw_start;
3119 	uintptr_t vsw_current;
3120 } vmem_seg_walk_t;
3121 
3122 /*ARGSUSED*/
3123 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3124 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3125 {
3126 	vmem_seg_walk_t *vsw;
3127 
3128 	if (wsp->walk_addr == NULL) {
3129 		mdb_warn("vmem_%s does not support global walks\n", name);
3130 		return (WALK_ERR);
3131 	}
3132 
3133 	wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3134 
3135 	vsw->vsw_type = type;
3136 	vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3137 	vsw->vsw_current = vsw->vsw_start;
3138 
3139 	return (WALK_NEXT);
3140 }
3141 
3142 /*
3143  * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3144  */
3145 #define	VMEM_NONE	0
3146 
3147 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3148 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3149 {
3150 	return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3151 }
3152 
3153 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3154 vmem_free_walk_init(mdb_walk_state_t *wsp)
3155 {
3156 	return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3157 }
3158 
3159 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3160 vmem_span_walk_init(mdb_walk_state_t *wsp)
3161 {
3162 	return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3163 }
3164 
3165 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3166 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3167 {
3168 	return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3169 }
3170 
3171 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3172 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3173 {
3174 	vmem_seg_t seg;
3175 	vmem_seg_walk_t *vsw = wsp->walk_data;
3176 	uintptr_t addr = vsw->vsw_current;
3177 	static size_t seg_size = 0;
3178 	int rval;
3179 
3180 	if (!seg_size) {
3181 		if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3182 			mdb_warn("failed to read 'vmem_seg_size'");
3183 			seg_size = sizeof (vmem_seg_t);
3184 		}
3185 	}
3186 
3187 	if (seg_size < sizeof (seg))
3188 		bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3189 
3190 	if (mdb_vread(&seg, seg_size, addr) == -1) {
3191 		mdb_warn("couldn't read vmem_seg at %p", addr);
3192 		return (WALK_ERR);
3193 	}
3194 
3195 	vsw->vsw_current = (uintptr_t)seg.vs_anext;
3196 	if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3197 		rval = WALK_NEXT;
3198 	} else {
3199 		rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3200 	}
3201 
3202 	if (vsw->vsw_current == vsw->vsw_start)
3203 		return (WALK_DONE);
3204 
3205 	return (rval);
3206 }
3207 
3208 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3209 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3210 {
3211 	vmem_seg_walk_t *vsw = wsp->walk_data;
3212 
3213 	mdb_free(vsw, sizeof (vmem_seg_walk_t));
3214 }
3215 
3216 #define	VMEM_NAMEWIDTH	22
3217 
3218 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3219 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3220 {
3221 	vmem_t v, parent;
3222 	uintptr_t paddr;
3223 	int ident = 0;
3224 	char c[VMEM_NAMEWIDTH];
3225 
3226 	if (!(flags & DCMD_ADDRSPEC)) {
3227 		if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3228 			mdb_warn("can't walk vmem");
3229 			return (DCMD_ERR);
3230 		}
3231 		return (DCMD_OK);
3232 	}
3233 
3234 	if (DCMD_HDRSPEC(flags))
3235 		mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3236 		    "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3237 		    "TOTAL", "SUCCEED", "FAIL");
3238 
3239 	if (mdb_vread(&v, sizeof (v), addr) == -1) {
3240 		mdb_warn("couldn't read vmem at %p", addr);
3241 		return (DCMD_ERR);
3242 	}
3243 
3244 	for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3245 		if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3246 			mdb_warn("couldn't trace %p's ancestry", addr);
3247 			ident = 0;
3248 			break;
3249 		}
3250 		paddr = (uintptr_t)parent.vm_source;
3251 	}
3252 
3253 	(void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3254 
3255 	mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3256 	    addr, VMEM_NAMEWIDTH, c,
3257 	    v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3258 	    v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3259 
3260 	return (DCMD_OK);
3261 }
3262 
3263 void
vmem_seg_help(void)3264 vmem_seg_help(void)
3265 {
3266 	mdb_printf("%s\n",
3267 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3268 "\n"
3269 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3270 "representing a single chunk of data.  Only ALLOC segments have debugging\n"
3271 "information.\n");
3272 	mdb_dec_indent(2);
3273 	mdb_printf("%<b>OPTIONS%</b>\n");
3274 	mdb_inc_indent(2);
3275 	mdb_printf("%s",
3276 "  -v    Display the full content of the vmem_seg, including its stack trace\n"
3277 "  -s    report the size of the segment, instead of the end address\n"
3278 "  -c caller\n"
3279 "        filter out segments without the function/PC in their stack trace\n"
3280 "  -e earliest\n"
3281 "        filter out segments timestamped before earliest\n"
3282 "  -l latest\n"
3283 "        filter out segments timestamped after latest\n"
3284 "  -m minsize\n"
3285 "        filer out segments smaller than minsize\n"
3286 "  -M maxsize\n"
3287 "        filer out segments larger than maxsize\n"
3288 "  -t thread\n"
3289 "        filter out segments not involving thread\n"
3290 "  -T type\n"
3291 "        filter out segments not of type 'type'\n"
3292 "        type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3293 }
3294 
3295 
3296 /*ARGSUSED*/
3297 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3298 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3299 {
3300 	vmem_seg_t vs;
3301 	uintptr_t *stk = vs.vs_stack;
3302 	uintptr_t sz;
3303 	uint8_t t;
3304 	const char *type = NULL;
3305 	GElf_Sym sym;
3306 	char c[MDB_SYM_NAMLEN];
3307 	int no_debug;
3308 	int i;
3309 	int depth;
3310 	uintptr_t laddr, haddr;
3311 
3312 	uintptr_t caller = NULL, thread = NULL;
3313 	uintptr_t minsize = 0, maxsize = 0;
3314 
3315 	hrtime_t earliest = 0, latest = 0;
3316 
3317 	uint_t size = 0;
3318 	uint_t verbose = 0;
3319 
3320 	if (!(flags & DCMD_ADDRSPEC))
3321 		return (DCMD_USAGE);
3322 
3323 	if (mdb_getopts(argc, argv,
3324 	    'c', MDB_OPT_UINTPTR, &caller,
3325 	    'e', MDB_OPT_UINT64, &earliest,
3326 	    'l', MDB_OPT_UINT64, &latest,
3327 	    's', MDB_OPT_SETBITS, TRUE, &size,
3328 	    'm', MDB_OPT_UINTPTR, &minsize,
3329 	    'M', MDB_OPT_UINTPTR, &maxsize,
3330 	    't', MDB_OPT_UINTPTR, &thread,
3331 	    'T', MDB_OPT_STR, &type,
3332 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
3333 	    NULL) != argc)
3334 		return (DCMD_USAGE);
3335 
3336 	if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3337 		if (verbose) {
3338 			mdb_printf("%16s %4s %16s %16s %16s\n"
3339 			    "%<u>%16s %4s %16s %16s %16s%</u>\n",
3340 			    "ADDR", "TYPE", "START", "END", "SIZE",
3341 			    "", "", "THREAD", "TIMESTAMP", "");
3342 		} else {
3343 			mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3344 			    "START", size? "SIZE" : "END", "WHO");
3345 		}
3346 	}
3347 
3348 	if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3349 		mdb_warn("couldn't read vmem_seg at %p", addr);
3350 		return (DCMD_ERR);
3351 	}
3352 
3353 	if (type != NULL) {
3354 		if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3355 			t = VMEM_ALLOC;
3356 		else if (strcmp(type, "FREE") == 0)
3357 			t = VMEM_FREE;
3358 		else if (strcmp(type, "SPAN") == 0)
3359 			t = VMEM_SPAN;
3360 		else if (strcmp(type, "ROTR") == 0 ||
3361 		    strcmp(type, "ROTOR") == 0)
3362 			t = VMEM_ROTOR;
3363 		else if (strcmp(type, "WLKR") == 0 ||
3364 		    strcmp(type, "WALKER") == 0)
3365 			t = VMEM_WALKER;
3366 		else {
3367 			mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3368 			    type);
3369 			return (DCMD_ERR);
3370 		}
3371 
3372 		if (vs.vs_type != t)
3373 			return (DCMD_OK);
3374 	}
3375 
3376 	sz = vs.vs_end - vs.vs_start;
3377 
3378 	if (minsize != 0 && sz < minsize)
3379 		return (DCMD_OK);
3380 
3381 	if (maxsize != 0 && sz > maxsize)
3382 		return (DCMD_OK);
3383 
3384 	t = vs.vs_type;
3385 	depth = vs.vs_depth;
3386 
3387 	/*
3388 	 * debug info, when present, is only accurate for VMEM_ALLOC segments
3389 	 */
3390 	no_debug = (t != VMEM_ALLOC) ||
3391 	    (depth == 0 || depth > VMEM_STACK_DEPTH);
3392 
3393 	if (no_debug) {
3394 		if (caller != NULL || thread != NULL || earliest != 0 ||
3395 		    latest != 0)
3396 			return (DCMD_OK);		/* not enough info */
3397 	} else {
3398 		if (caller != NULL) {
3399 			laddr = caller;
3400 			haddr = caller + sizeof (caller);
3401 
3402 			if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3403 			    sizeof (c), &sym) != -1 &&
3404 			    caller == (uintptr_t)sym.st_value) {
3405 				/*
3406 				 * We were provided an exact symbol value; any
3407 				 * address in the function is valid.
3408 				 */
3409 				laddr = (uintptr_t)sym.st_value;
3410 				haddr = (uintptr_t)sym.st_value + sym.st_size;
3411 			}
3412 
3413 			for (i = 0; i < depth; i++)
3414 				if (vs.vs_stack[i] >= laddr &&
3415 				    vs.vs_stack[i] < haddr)
3416 					break;
3417 
3418 			if (i == depth)
3419 				return (DCMD_OK);
3420 		}
3421 
3422 		if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3423 			return (DCMD_OK);
3424 
3425 		if (earliest != 0 && vs.vs_timestamp < earliest)
3426 			return (DCMD_OK);
3427 
3428 		if (latest != 0 && vs.vs_timestamp > latest)
3429 			return (DCMD_OK);
3430 	}
3431 
3432 	type = (t == VMEM_ALLOC ? "ALLC" :
3433 	    t == VMEM_FREE ? "FREE" :
3434 	    t == VMEM_SPAN ? "SPAN" :
3435 	    t == VMEM_ROTOR ? "ROTR" :
3436 	    t == VMEM_WALKER ? "WLKR" :
3437 	    "????");
3438 
3439 	if (flags & DCMD_PIPE_OUT) {
3440 		mdb_printf("%#r\n", addr);
3441 		return (DCMD_OK);
3442 	}
3443 
3444 	if (verbose) {
3445 		mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3446 		    addr, type, vs.vs_start, vs.vs_end, sz);
3447 
3448 		if (no_debug)
3449 			return (DCMD_OK);
3450 
3451 		mdb_printf("%16s %4s %16d %16llx\n",
3452 		    "", "", vs.vs_thread, vs.vs_timestamp);
3453 
3454 		mdb_inc_indent(17);
3455 		for (i = 0; i < depth; i++) {
3456 			mdb_printf("%a\n", stk[i]);
3457 		}
3458 		mdb_dec_indent(17);
3459 		mdb_printf("\n");
3460 	} else {
3461 		mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3462 		    vs.vs_start, size? sz : vs.vs_end);
3463 
3464 		if (no_debug) {
3465 			mdb_printf("\n");
3466 			return (DCMD_OK);
3467 		}
3468 
3469 		for (i = 0; i < depth; i++) {
3470 			if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3471 			    c, sizeof (c), &sym) == -1)
3472 				continue;
3473 			if (is_umem_sym(c, "vmem_"))
3474 				continue;
3475 			break;
3476 		}
3477 		mdb_printf(" %a\n", stk[i]);
3478 	}
3479 	return (DCMD_OK);
3480 }
3481 
3482 /*ARGSUSED*/
3483 static int
showbc(uintptr_t addr,const umem_bufctl_audit_t * bcp,hrtime_t * newest)3484 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3485 {
3486 	char name[UMEM_CACHE_NAMELEN + 1];
3487 	hrtime_t delta;
3488 	int i, depth;
3489 
3490 	if (bcp->bc_timestamp == 0)
3491 		return (WALK_DONE);
3492 
3493 	if (*newest == 0)
3494 		*newest = bcp->bc_timestamp;
3495 
3496 	delta = *newest - bcp->bc_timestamp;
3497 	depth = MIN(bcp->bc_depth, umem_stack_depth);
3498 
3499 	if (mdb_readstr(name, sizeof (name), (uintptr_t)
3500 	    &bcp->bc_cache->cache_name) <= 0)
3501 		(void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3502 
3503 	mdb_printf("\nT-%lld.%09lld  addr=%p  %s\n",
3504 	    delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3505 
3506 	for (i = 0; i < depth; i++)
3507 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3508 
3509 	return (WALK_NEXT);
3510 }
3511 
3512 int
umalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3513 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3514 {
3515 	const char *logname = "umem_transaction_log";
3516 	hrtime_t newest = 0;
3517 
3518 	if ((flags & DCMD_ADDRSPEC) || argc > 1)
3519 		return (DCMD_USAGE);
3520 
3521 	if (argc > 0) {
3522 		if (argv->a_type != MDB_TYPE_STRING)
3523 			return (DCMD_USAGE);
3524 		if (strcmp(argv->a_un.a_str, "fail") == 0)
3525 			logname = "umem_failure_log";
3526 		else if (strcmp(argv->a_un.a_str, "slab") == 0)
3527 			logname = "umem_slab_log";
3528 		else
3529 			return (DCMD_USAGE);
3530 	}
3531 
3532 	if (umem_readvar(&addr, logname) == -1) {
3533 		mdb_warn("failed to read %s log header pointer");
3534 		return (DCMD_ERR);
3535 	}
3536 
3537 	if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3538 		mdb_warn("failed to walk umem log");
3539 		return (DCMD_ERR);
3540 	}
3541 
3542 	return (DCMD_OK);
3543 }
3544 
3545 /*
3546  * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3547  * The first piece is a structure which we use to accumulate umem_cache_t
3548  * addresses of interest.  The umc_add is used as a callback for the umem_cache
3549  * walker; we either add all caches, or ones named explicitly as arguments.
3550  */
3551 
3552 typedef struct umclist {
3553 	const char *umc_name;			/* Name to match (or NULL) */
3554 	uintptr_t *umc_caches;			/* List of umem_cache_t addrs */
3555 	int umc_nelems;				/* Num entries in umc_caches */
3556 	int umc_size;				/* Size of umc_caches array */
3557 } umclist_t;
3558 
3559 static int
umc_add(uintptr_t addr,const umem_cache_t * cp,umclist_t * umc)3560 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3561 {
3562 	void *p;
3563 	int s;
3564 
3565 	if (umc->umc_name == NULL ||
3566 	    strcmp(cp->cache_name, umc->umc_name) == 0) {
3567 		/*
3568 		 * If we have a match, grow our array (if necessary), and then
3569 		 * add the virtual address of the matching cache to our list.
3570 		 */
3571 		if (umc->umc_nelems >= umc->umc_size) {
3572 			s = umc->umc_size ? umc->umc_size * 2 : 256;
3573 			p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3574 
3575 			bcopy(umc->umc_caches, p,
3576 			    sizeof (uintptr_t) * umc->umc_size);
3577 
3578 			umc->umc_caches = p;
3579 			umc->umc_size = s;
3580 		}
3581 
3582 		umc->umc_caches[umc->umc_nelems++] = addr;
3583 		return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3584 	}
3585 
3586 	return (WALK_NEXT);
3587 }
3588 
3589 /*
3590  * The second piece of ::umausers is a hash table of allocations.  Each
3591  * allocation owner is identified by its stack trace and data_size.  We then
3592  * track the total bytes of all such allocations, and the number of allocations
3593  * to report at the end.  Once we have a list of caches, we walk through the
3594  * allocated bufctls of each, and update our hash table accordingly.
3595  */
3596 
3597 typedef struct umowner {
3598 	struct umowner *umo_head;		/* First hash elt in bucket */
3599 	struct umowner *umo_next;		/* Next hash elt in chain */
3600 	size_t umo_signature;			/* Hash table signature */
3601 	uint_t umo_num;				/* Number of allocations */
3602 	size_t umo_data_size;			/* Size of each allocation */
3603 	size_t umo_total_size;			/* Total bytes of allocation */
3604 	int umo_depth;				/* Depth of stack trace */
3605 	uintptr_t *umo_stack;			/* Stack trace */
3606 } umowner_t;
3607 
3608 typedef struct umusers {
3609 	const umem_cache_t *umu_cache;		/* Current umem cache */
3610 	umowner_t *umu_hash;			/* Hash table of owners */
3611 	uintptr_t *umu_stacks;			/* stacks for owners */
3612 	int umu_nelems;				/* Number of entries in use */
3613 	int umu_size;				/* Total number of entries */
3614 } umusers_t;
3615 
3616 static void
umu_add(umusers_t * umu,const umem_bufctl_audit_t * bcp,size_t size,size_t data_size)3617 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3618     size_t size, size_t data_size)
3619 {
3620 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3621 	size_t bucket, signature = data_size;
3622 	umowner_t *umo, *umoend;
3623 
3624 	/*
3625 	 * If the hash table is full, double its size and rehash everything.
3626 	 */
3627 	if (umu->umu_nelems >= umu->umu_size) {
3628 		int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3629 		size_t umowner_size = sizeof (umowner_t);
3630 		size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3631 		uintptr_t *new_stacks;
3632 
3633 		umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3634 		new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3635 
3636 		bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3637 		bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3638 		umu->umu_hash = umo;
3639 		umu->umu_stacks = new_stacks;
3640 		umu->umu_size = s;
3641 
3642 		umoend = umu->umu_hash + umu->umu_size;
3643 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3644 			umo->umo_head = NULL;
3645 			umo->umo_stack = &umu->umu_stacks[
3646 			    umem_stack_depth * (umo - umu->umu_hash)];
3647 		}
3648 
3649 		umoend = umu->umu_hash + umu->umu_nelems;
3650 		for (umo = umu->umu_hash; umo < umoend; umo++) {
3651 			bucket = umo->umo_signature & (umu->umu_size - 1);
3652 			umo->umo_next = umu->umu_hash[bucket].umo_head;
3653 			umu->umu_hash[bucket].umo_head = umo;
3654 		}
3655 	}
3656 
3657 	/*
3658 	 * Finish computing the hash signature from the stack trace, and then
3659 	 * see if the owner is in the hash table.  If so, update our stats.
3660 	 */
3661 	for (i = 0; i < depth; i++)
3662 		signature += bcp->bc_stack[i];
3663 
3664 	bucket = signature & (umu->umu_size - 1);
3665 
3666 	for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3667 		if (umo->umo_signature == signature) {
3668 			size_t difference = 0;
3669 
3670 			difference |= umo->umo_data_size - data_size;
3671 			difference |= umo->umo_depth - depth;
3672 
3673 			for (i = 0; i < depth; i++) {
3674 				difference |= umo->umo_stack[i] -
3675 				    bcp->bc_stack[i];
3676 			}
3677 
3678 			if (difference == 0) {
3679 				umo->umo_total_size += size;
3680 				umo->umo_num++;
3681 				return;
3682 			}
3683 		}
3684 	}
3685 
3686 	/*
3687 	 * If the owner is not yet hashed, grab the next element and fill it
3688 	 * in based on the allocation information.
3689 	 */
3690 	umo = &umu->umu_hash[umu->umu_nelems++];
3691 	umo->umo_next = umu->umu_hash[bucket].umo_head;
3692 	umu->umu_hash[bucket].umo_head = umo;
3693 
3694 	umo->umo_signature = signature;
3695 	umo->umo_num = 1;
3696 	umo->umo_data_size = data_size;
3697 	umo->umo_total_size = size;
3698 	umo->umo_depth = depth;
3699 
3700 	for (i = 0; i < depth; i++)
3701 		umo->umo_stack[i] = bcp->bc_stack[i];
3702 }
3703 
3704 /*
3705  * When ::umausers is invoked without the -f flag, we simply update our hash
3706  * table with the information from each allocated bufctl.
3707  */
3708 /*ARGSUSED*/
3709 static int
umause1(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3710 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3711 {
3712 	const umem_cache_t *cp = umu->umu_cache;
3713 
3714 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3715 	return (WALK_NEXT);
3716 }
3717 
3718 /*
3719  * When ::umausers is invoked with the -f flag, we print out the information
3720  * for each bufctl as well as updating the hash table.
3721  */
3722 static int
umause2(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3723 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3724 {
3725 	int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3726 	const umem_cache_t *cp = umu->umu_cache;
3727 
3728 	mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3729 	    cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3730 
3731 	for (i = 0; i < depth; i++)
3732 		mdb_printf("\t %a\n", bcp->bc_stack[i]);
3733 
3734 	umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3735 	return (WALK_NEXT);
3736 }
3737 
3738 /*
3739  * We sort our results by allocation size before printing them.
3740  */
3741 static int
umownercmp(const void * lp,const void * rp)3742 umownercmp(const void *lp, const void *rp)
3743 {
3744 	const umowner_t *lhs = lp;
3745 	const umowner_t *rhs = rp;
3746 
3747 	return (rhs->umo_total_size - lhs->umo_total_size);
3748 }
3749 
3750 /*
3751  * The main engine of ::umausers is relatively straightforward: First we
3752  * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3753  * iterate over the allocated bufctls of each cache in the list.  Finally,
3754  * we sort and print our results.
3755  */
3756 /*ARGSUSED*/
3757 int
umausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3758 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3759 {
3760 	int mem_threshold = 8192;	/* Minimum # bytes for printing */
3761 	int cnt_threshold = 100;	/* Minimum # blocks for printing */
3762 	int audited_caches = 0;		/* Number of UMF_AUDIT caches found */
3763 	int do_all_caches = 1;		/* Do all caches (no arguments) */
3764 	int opt_e = FALSE;		/* Include "small" users */
3765 	int opt_f = FALSE;		/* Print stack traces */
3766 
3767 	mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3768 	umowner_t *umo, *umoend;
3769 	int i, oelems;
3770 
3771 	umclist_t umc;
3772 	umusers_t umu;
3773 
3774 	if (flags & DCMD_ADDRSPEC)
3775 		return (DCMD_USAGE);
3776 
3777 	bzero(&umc, sizeof (umc));
3778 	bzero(&umu, sizeof (umu));
3779 
3780 	while ((i = mdb_getopts(argc, argv,
3781 	    'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3782 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3783 
3784 		argv += i;	/* skip past options we just processed */
3785 		argc -= i;	/* adjust argc */
3786 
3787 		if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3788 			return (DCMD_USAGE);
3789 
3790 		oelems = umc.umc_nelems;
3791 		umc.umc_name = argv->a_un.a_str;
3792 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3793 
3794 		if (umc.umc_nelems == oelems) {
3795 			mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3796 			return (DCMD_ERR);
3797 		}
3798 
3799 		do_all_caches = 0;
3800 		argv++;
3801 		argc--;
3802 	}
3803 
3804 	if (opt_e)
3805 		mem_threshold = cnt_threshold = 0;
3806 
3807 	if (opt_f)
3808 		callback = (mdb_walk_cb_t)umause2;
3809 
3810 	if (do_all_caches) {
3811 		umc.umc_name = NULL; /* match all cache names */
3812 		(void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3813 	}
3814 
3815 	for (i = 0; i < umc.umc_nelems; i++) {
3816 		uintptr_t cp = umc.umc_caches[i];
3817 		umem_cache_t c;
3818 
3819 		if (mdb_vread(&c, sizeof (c), cp) == -1) {
3820 			mdb_warn("failed to read cache at %p", cp);
3821 			continue;
3822 		}
3823 
3824 		if (!(c.cache_flags & UMF_AUDIT)) {
3825 			if (!do_all_caches) {
3826 				mdb_warn("UMF_AUDIT is not enabled for %s\n",
3827 				    c.cache_name);
3828 			}
3829 			continue;
3830 		}
3831 
3832 		umu.umu_cache = &c;
3833 		(void) mdb_pwalk("bufctl", callback, &umu, cp);
3834 		audited_caches++;
3835 	}
3836 
3837 	if (audited_caches == 0 && do_all_caches) {
3838 		mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3839 		return (DCMD_ERR);
3840 	}
3841 
3842 	qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3843 	umoend = umu.umu_hash + umu.umu_nelems;
3844 
3845 	for (umo = umu.umu_hash; umo < umoend; umo++) {
3846 		if (umo->umo_total_size < mem_threshold &&
3847 		    umo->umo_num < cnt_threshold)
3848 			continue;
3849 		mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3850 		    umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3851 		for (i = 0; i < umo->umo_depth; i++)
3852 			mdb_printf("\t %a\n", umo->umo_stack[i]);
3853 	}
3854 
3855 	return (DCMD_OK);
3856 }
3857 
3858 struct malloc_data {
3859 	uint32_t malloc_size;
3860 	uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3861 };
3862 
3863 #ifdef _LP64
3864 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3865 #else
3866 #define	UMI_MAX_BUCKET		(UMEM_MAXBUF - sizeof (struct malloc_data))
3867 #endif
3868 
3869 typedef struct umem_malloc_info {
3870 	size_t um_total;	/* total allocated buffers */
3871 	size_t um_malloc;	/* malloc buffers */
3872 	size_t um_malloc_size;	/* sum of malloc buffer sizes */
3873 	size_t um_malloc_overhead; /* sum of in-chunk overheads */
3874 
3875 	umem_cache_t *um_cp;
3876 
3877 	uint_t *um_bucket;
3878 } umem_malloc_info_t;
3879 
3880 static void
umem_malloc_print_dist(uint_t * um_bucket,size_t minmalloc,size_t maxmalloc,size_t maxbuckets,size_t minbucketsize,int geometric)3881 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3882     size_t maxbuckets, size_t minbucketsize, int geometric)
3883 {
3884 	uint64_t um_malloc;
3885 	int minb = -1;
3886 	int maxb = -1;
3887 	int buckets;
3888 	int nbucks;
3889 	int i;
3890 	int b;
3891 	const int *distarray;
3892 
3893 	minb = (int)minmalloc;
3894 	maxb = (int)maxmalloc;
3895 
3896 	nbucks = buckets = maxb - minb + 1;
3897 
3898 	um_malloc = 0;
3899 	for (b = minb; b <= maxb; b++)
3900 		um_malloc += um_bucket[b];
3901 
3902 	if (maxbuckets != 0)
3903 		buckets = MIN(buckets, maxbuckets);
3904 
3905 	if (minbucketsize > 1) {
3906 		buckets = MIN(buckets, nbucks/minbucketsize);
3907 		if (buckets == 0) {
3908 			buckets = 1;
3909 			minbucketsize = nbucks;
3910 		}
3911 	}
3912 
3913 	if (geometric)
3914 		distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3915 	else
3916 		distarray = dist_linear(buckets, minb, maxb);
3917 
3918 	dist_print_header("malloc size", 11, "count");
3919 	for (i = 0; i < buckets; i++) {
3920 		dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3921 	}
3922 	mdb_printf("\n");
3923 }
3924 
3925 /*
3926  * A malloc()ed buffer looks like:
3927  *
3928  *	<----------- mi.malloc_size --->
3929  *	<----------- cp.cache_bufsize ------------------>
3930  *	<----------- cp.cache_chunksize -------------------------------->
3931  *	+-------+-----------------------+---------------+---------------+
3932  *	|/tag///| mallocsz		|/round-off/////|/debug info////|
3933  *	+-------+---------------------------------------+---------------+
3934  *		<-- usable space ------>
3935  *
3936  * mallocsz is the argument to malloc(3C).
3937  * mi.malloc_size is the actual size passed to umem_alloc(), which
3938  * is rounded up to the smallest available cache size, which is
3939  * cache_bufsize.  If there is debugging or alignment overhead in
3940  * the cache, that is reflected in a larger cache_chunksize.
3941  *
3942  * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3943  * depending upon the ISA's alignment requirements.  For 32-bit allocations,
3944  * it is always a 8-byte tag.  For 64-bit allocations larger than 8 bytes,
3945  * the tag has 8 bytes of padding before it.
3946  *
3947  * 32-byte, 64-byte buffers <= 8 bytes:
3948  *	+-------+-------+--------- ...
3949  *	|/size//|/stat//| mallocsz ...
3950  *	+-------+-------+--------- ...
3951  *			^
3952  *			pointer returned from malloc(3C)
3953  *
3954  * 64-byte buffers > 8 bytes:
3955  *	+---------------+-------+-------+--------- ...
3956  *	|/padding///////|/size//|/stat//| mallocsz ...
3957  *	+---------------+-------+-------+--------- ...
3958  *					^
3959  *					pointer returned from malloc(3C)
3960  *
3961  * The "size" field is "malloc_size", which is mallocsz + the padding.
3962  * The "stat" field is derived from malloc_size, and functions as a
3963  * validation that this buffer is actually from malloc(3C).
3964  */
3965 /*ARGSUSED*/
3966 static int
um_umem_buffer_cb(uintptr_t addr,void * buf,umem_malloc_info_t * ump)3967 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3968 {
3969 	struct malloc_data md;
3970 	size_t m_addr = addr;
3971 	size_t overhead = sizeof (md);
3972 	size_t mallocsz;
3973 
3974 	ump->um_total++;
3975 
3976 #ifdef _LP64
3977 	if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3978 		m_addr += overhead;
3979 		overhead += sizeof (md);
3980 	}
3981 #endif
3982 
3983 	if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3984 		mdb_warn("unable to read malloc header at %p", m_addr);
3985 		return (WALK_NEXT);
3986 	}
3987 
3988 	switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3989 	case MALLOC_MAGIC:
3990 #ifdef _LP64
3991 	case MALLOC_SECOND_MAGIC:
3992 #endif
3993 		mallocsz = md.malloc_size - overhead;
3994 
3995 		ump->um_malloc++;
3996 		ump->um_malloc_size += mallocsz;
3997 		ump->um_malloc_overhead += overhead;
3998 
3999 		/* include round-off and debug overhead */
4000 		ump->um_malloc_overhead +=
4001 		    ump->um_cp->cache_chunksize - md.malloc_size;
4002 
4003 		if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4004 			ump->um_bucket[mallocsz]++;
4005 
4006 		break;
4007 	default:
4008 		break;
4009 	}
4010 
4011 	return (WALK_NEXT);
4012 }
4013 
4014 int
get_umem_alloc_sizes(int ** out,size_t * out_num)4015 get_umem_alloc_sizes(int **out, size_t *out_num)
4016 {
4017 	GElf_Sym sym;
4018 
4019 	if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4020 		mdb_warn("unable to look up umem_alloc_sizes");
4021 		return (-1);
4022 	}
4023 
4024 	*out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4025 	*out_num = sym.st_size / sizeof (int);
4026 
4027 	if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4028 		mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4029 		*out = NULL;
4030 		return (-1);
4031 	}
4032 
4033 	return (0);
4034 }
4035 
4036 
4037 static int
um_umem_cache_cb(uintptr_t addr,umem_cache_t * cp,umem_malloc_info_t * ump)4038 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4039 {
4040 	if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4041 		return (WALK_NEXT);
4042 
4043 	ump->um_cp = cp;
4044 
4045 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4046 	    -1) {
4047 		mdb_warn("can't walk 'umem' for cache %p", addr);
4048 		return (WALK_ERR);
4049 	}
4050 
4051 	return (WALK_NEXT);
4052 }
4053 
4054 void
umem_malloc_dist_help(void)4055 umem_malloc_dist_help(void)
4056 {
4057 	mdb_printf("%s\n",
4058 	    "report distribution of outstanding malloc()s");
4059 	mdb_dec_indent(2);
4060 	mdb_printf("%<b>OPTIONS%</b>\n");
4061 	mdb_inc_indent(2);
4062 	mdb_printf("%s",
4063 "  -b maxbins\n"
4064 "        Use at most maxbins bins for the data\n"
4065 "  -B minbinsize\n"
4066 "        Make the bins at least minbinsize bytes apart\n"
4067 "  -d    dump the raw data out, without binning\n"
4068 "  -g    use geometric binning instead of linear binning\n");
4069 }
4070 
4071 /*ARGSUSED*/
4072 int
umem_malloc_dist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4073 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4074 {
4075 	umem_malloc_info_t mi;
4076 	uint_t geometric = 0;
4077 	uint_t dump = 0;
4078 	size_t maxbuckets = 0;
4079 	size_t minbucketsize = 0;
4080 
4081 	size_t minalloc = 0;
4082 	size_t maxalloc = UMI_MAX_BUCKET;
4083 
4084 	if (flags & DCMD_ADDRSPEC)
4085 		return (DCMD_USAGE);
4086 
4087 	if (mdb_getopts(argc, argv,
4088 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
4089 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
4090 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
4091 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
4092 	    0) != argc)
4093 		return (DCMD_USAGE);
4094 
4095 	bzero(&mi, sizeof (mi));
4096 	mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4097 	    UM_SLEEP | UM_GC);
4098 
4099 	if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4100 	    &mi) == -1) {
4101 		mdb_warn("unable to walk 'umem_cache'");
4102 		return (DCMD_ERR);
4103 	}
4104 
4105 	if (dump) {
4106 		int i;
4107 		for (i = minalloc; i <= maxalloc; i++)
4108 			mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4109 
4110 		return (DCMD_OK);
4111 	}
4112 
4113 	umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4114 	    maxbuckets, minbucketsize, geometric);
4115 
4116 	return (DCMD_OK);
4117 }
4118 
4119 void
umem_malloc_info_help(void)4120 umem_malloc_info_help(void)
4121 {
4122 	mdb_printf("%s\n",
4123 	    "report information about malloc()s by cache.  ");
4124 	mdb_dec_indent(2);
4125 	mdb_printf("%<b>OPTIONS%</b>\n");
4126 	mdb_inc_indent(2);
4127 	mdb_printf("%s",
4128 "  -b maxbins\n"
4129 "        Use at most maxbins bins for the data\n"
4130 "  -B minbinsize\n"
4131 "        Make the bins at least minbinsize bytes apart\n"
4132 "  -d    dump the raw distribution data without binning\n"
4133 #ifndef _KMDB
4134 "  -g    use geometric binning instead of linear binning\n"
4135 #endif
4136 	    "");
4137 }
4138 int
umem_malloc_info(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4139 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4140 {
4141 	umem_cache_t c;
4142 	umem_malloc_info_t mi;
4143 
4144 	int skip = 0;
4145 
4146 	size_t maxmalloc;
4147 	size_t overhead;
4148 	size_t allocated;
4149 	size_t avg_malloc;
4150 	size_t overhead_pct;	/* 1000 * overhead_percent */
4151 
4152 	uint_t verbose = 0;
4153 	uint_t dump = 0;
4154 	uint_t geometric = 0;
4155 	size_t maxbuckets = 0;
4156 	size_t minbucketsize = 0;
4157 
4158 	int *alloc_sizes;
4159 	int idx;
4160 	size_t num;
4161 	size_t minmalloc;
4162 
4163 	if (mdb_getopts(argc, argv,
4164 	    'd', MDB_OPT_SETBITS, TRUE, &dump,
4165 	    'g', MDB_OPT_SETBITS, TRUE, &geometric,
4166 	    'b', MDB_OPT_UINTPTR, &maxbuckets,
4167 	    'B', MDB_OPT_UINTPTR, &minbucketsize,
4168 	    0) != argc)
4169 		return (DCMD_USAGE);
4170 
4171 	if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4172 		verbose = 1;
4173 
4174 	if (!(flags & DCMD_ADDRSPEC)) {
4175 		if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4176 		    argc, argv) == -1) {
4177 			mdb_warn("can't walk umem_cache");
4178 			return (DCMD_ERR);
4179 		}
4180 		return (DCMD_OK);
4181 	}
4182 
4183 	if (!mdb_vread(&c, sizeof (c), addr)) {
4184 		mdb_warn("unable to read cache at %p", addr);
4185 		return (DCMD_ERR);
4186 	}
4187 
4188 	if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4189 		if (!(flags & DCMD_LOOP))
4190 			mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4191 			    "by malloc()\n", c.cache_name);
4192 		skip = 1;
4193 	}
4194 
4195 	/*
4196 	 * normally, print the header only the first time.  In verbose mode,
4197 	 * print the header on every non-skipped buffer
4198 	 */
4199 	if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4200 		mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4201 		    "CACHE", "BUFSZ", "MAXMAL",
4202 		    "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4203 
4204 	if (skip)
4205 		return (DCMD_OK);
4206 
4207 	maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4208 #ifdef _LP64
4209 	if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4210 		maxmalloc -= sizeof (struct malloc_data);
4211 #endif
4212 
4213 	bzero(&mi, sizeof (mi));
4214 	mi.um_cp = &c;
4215 	if (verbose)
4216 		mi.um_bucket =
4217 		    mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4218 		    UM_SLEEP | UM_GC);
4219 
4220 	if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4221 	    -1) {
4222 		mdb_warn("can't walk 'umem'");
4223 		return (DCMD_ERR);
4224 	}
4225 
4226 	overhead = mi.um_malloc_overhead;
4227 	allocated = mi.um_malloc_size;
4228 
4229 	/* do integer round off for the average */
4230 	if (mi.um_malloc != 0)
4231 		avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4232 	else
4233 		avg_malloc = 0;
4234 
4235 	/*
4236 	 * include per-slab overhead
4237 	 *
4238 	 * Each slab in a given cache is the same size, and has the same
4239 	 * number of chunks in it;  we read in the first slab on the
4240 	 * slab list to get the number of chunks for all slabs.  To
4241 	 * compute the per-slab overhead, we just subtract the chunk usage
4242 	 * from the slabsize:
4243 	 *
4244 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4245 	 * |////////////|	|	| ...	|	|///////|///////|
4246 	 * |////color///| chunk	| chunk	| ...	| chunk	|/color/|/slab//|
4247 	 * |////////////|	|	| ...	|	|///////|///////|
4248 	 * +------------+-------+-------+ ... --+-------+-------+-------+
4249 	 * |		\_______chunksize * chunks_____/		|
4250 	 * \__________________________slabsize__________________________/
4251 	 *
4252 	 * For UMF_HASH caches, there is an additional source of overhead;
4253 	 * the external umem_slab_t and per-chunk bufctl structures.  We
4254 	 * include those in our per-slab overhead.
4255 	 *
4256 	 * Once we have a number for the per-slab overhead, we estimate
4257 	 * the actual overhead by treating the malloc()ed buffers as if
4258 	 * they were densely packed:
4259 	 *
4260 	 *	additional overhead = (# mallocs) * (per-slab) / (chunks);
4261 	 *
4262 	 * carefully ordering the multiply before the divide, to avoid
4263 	 * round-off error.
4264 	 */
4265 	if (mi.um_malloc != 0) {
4266 		umem_slab_t slab;
4267 		uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4268 
4269 		if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4270 			mdb_warn("unable to read slab at %p\n", saddr);
4271 		} else {
4272 			long chunks = slab.slab_chunks;
4273 			if (chunks != 0 && c.cache_chunksize != 0 &&
4274 			    chunks <= c.cache_slabsize / c.cache_chunksize) {
4275 				uintmax_t perslab =
4276 				    c.cache_slabsize -
4277 				    (c.cache_chunksize * chunks);
4278 
4279 				if (c.cache_flags & UMF_HASH) {
4280 					perslab += sizeof (umem_slab_t) +
4281 					    chunks *
4282 					    ((c.cache_flags & UMF_AUDIT) ?
4283 					    sizeof (umem_bufctl_audit_t) :
4284 					    sizeof (umem_bufctl_t));
4285 				}
4286 				overhead +=
4287 				    (perslab * (uintmax_t)mi.um_malloc)/chunks;
4288 			} else {
4289 				mdb_warn("invalid #chunks (%d) in slab %p\n",
4290 				    chunks, saddr);
4291 			}
4292 		}
4293 	}
4294 
4295 	if (allocated != 0)
4296 		overhead_pct = (1000ULL * overhead) / allocated;
4297 	else
4298 		overhead_pct = 0;
4299 
4300 	mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4301 	    addr, c.cache_bufsize, maxmalloc,
4302 	    mi.um_malloc, avg_malloc, allocated, overhead,
4303 	    overhead_pct / 10, overhead_pct % 10);
4304 
4305 	if (!verbose)
4306 		return (DCMD_OK);
4307 
4308 	if (!dump)
4309 		mdb_printf("\n");
4310 
4311 	if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4312 		return (DCMD_ERR);
4313 
4314 	for (idx = 0; idx < num; idx++) {
4315 		if (alloc_sizes[idx] == c.cache_bufsize)
4316 			break;
4317 		if (alloc_sizes[idx] == 0) {
4318 			idx = num;	/* 0-terminated array */
4319 			break;
4320 		}
4321 	}
4322 	if (idx == num) {
4323 		mdb_warn(
4324 		    "cache %p's size (%d) not in umem_alloc_sizes\n",
4325 		    addr, c.cache_bufsize);
4326 		return (DCMD_ERR);
4327 	}
4328 
4329 	minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4330 	if (minmalloc > 0) {
4331 #ifdef _LP64
4332 		if (minmalloc > UMEM_SECOND_ALIGN)
4333 			minmalloc -= sizeof (struct malloc_data);
4334 #endif
4335 		minmalloc -= sizeof (struct malloc_data);
4336 		minmalloc += 1;
4337 	}
4338 
4339 	if (dump) {
4340 		for (idx = minmalloc; idx <= maxmalloc; idx++)
4341 			mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4342 		mdb_printf("\n");
4343 	} else {
4344 		umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4345 		    maxbuckets, minbucketsize, geometric);
4346 	}
4347 
4348 	return (DCMD_OK);
4349 }
4350