1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2019 Joyent, Inc.
28 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
29 */
30
31 #include "umem.h"
32
33 #include <sys/vmem_impl_user.h>
34 #include <umem_impl.h>
35
36 #include <alloca.h>
37 #include <limits.h>
38 #include <mdb/mdb_whatis.h>
39 #include <thr_uberdata.h>
40
41 #include "misc.h"
42 #include "leaky.h"
43 #include "dist.h"
44
45 #include "umem_pagesize.h"
46
47 #define UM_ALLOCATED 0x1
48 #define UM_FREE 0x2
49 #define UM_BUFCTL 0x4
50 #define UM_HASH 0x8
51
52 int umem_ready;
53
54 static int umem_stack_depth_warned;
55 static uint32_t umem_max_ncpus;
56 uint32_t umem_stack_depth;
57
58 size_t umem_pagesize;
59
60 #define UMEM_READVAR(var) \
61 (umem_readvar(&(var), #var) == -1 && \
62 (mdb_warn("failed to read "#var), 1))
63
64 int
umem_update_variables(void)65 umem_update_variables(void)
66 {
67 size_t pagesize;
68
69 /*
70 * Figure out which type of umem is being used; if it's not there
71 * yet, succeed quietly.
72 */
73 if (umem_set_standalone() == -1) {
74 umem_ready = 0;
75 return (0); /* umem not there yet */
76 }
77
78 /*
79 * Solaris 9 used a different name for umem_max_ncpus. It's
80 * cheap backwards compatibility to check for both names.
81 */
82 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
83 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
84 mdb_warn("unable to read umem_max_ncpus or max_ncpus");
85 return (-1);
86 }
87 if (UMEM_READVAR(umem_ready))
88 return (-1);
89 if (UMEM_READVAR(umem_stack_depth))
90 return (-1);
91 if (UMEM_READVAR(pagesize))
92 return (-1);
93
94 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
95 if (umem_stack_depth_warned == 0) {
96 mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
97 umem_stack_depth, UMEM_MAX_STACK_DEPTH);
98 umem_stack_depth_warned = 1;
99 }
100 umem_stack_depth = 0;
101 }
102
103 umem_pagesize = pagesize;
104
105 return (0);
106 }
107
108 static int
umem_ptc_walk_init(mdb_walk_state_t * wsp)109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
110 {
111 if (wsp->walk_addr == 0) {
112 if (mdb_layered_walk("ulwp", wsp) == -1) {
113 mdb_warn("couldn't walk 'ulwp'");
114 return (WALK_ERR);
115 }
116 }
117
118 return (WALK_NEXT);
119 }
120
121 static int
umem_ptc_walk_step(mdb_walk_state_t * wsp)122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
123 {
124 uintptr_t this;
125 int rval;
126
127 if (wsp->walk_layer != NULL) {
128 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
129 (uintptr_t)wsp->walk_arg;
130 } else {
131 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
132 }
133
134 for (;;) {
135 if (mdb_vread(&this, sizeof (void *), this) == -1) {
136 mdb_warn("couldn't read ptc buffer at %p", this);
137 return (WALK_ERR);
138 }
139
140 if (this == 0)
141 break;
142
143 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
144
145 if (rval != WALK_NEXT)
146 return (rval);
147 }
148
149 return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
150 }
151
152 /*ARGSUSED*/
153 static int
umem_init_walkers(uintptr_t addr,const umem_cache_t * c,int * sizes)154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
155 {
156 mdb_walker_t w;
157 char descr[64];
158 char name[64];
159 int i;
160
161 (void) mdb_snprintf(descr, sizeof (descr),
162 "walk the %s cache", c->cache_name);
163
164 w.walk_name = c->cache_name;
165 w.walk_descr = descr;
166 w.walk_init = umem_walk_init;
167 w.walk_step = umem_walk_step;
168 w.walk_fini = umem_walk_fini;
169 w.walk_init_arg = (void *)addr;
170
171 if (mdb_add_walker(&w) == -1)
172 mdb_warn("failed to add %s walker", c->cache_name);
173
174 if (!(c->cache_flags & UMF_PTC))
175 return (WALK_NEXT);
176
177 /*
178 * For the per-thread cache walker, the address is the offset in the
179 * tm_roots[] array of the ulwp_t.
180 */
181 for (i = 0; sizes[i] != 0; i++) {
182 if (sizes[i] == c->cache_bufsize)
183 break;
184 }
185
186 if (sizes[i] == 0) {
187 mdb_warn("cache %s is cached per-thread, but could not find "
188 "size in umem_alloc_sizes\n", c->cache_name);
189 return (WALK_NEXT);
190 }
191
192 if (i >= NTMEMBASE) {
193 mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
194 c->cache_name, i, NTMEMBASE);
195 return (WALK_NEXT);
196 }
197
198 (void) mdb_snprintf(name, sizeof (name),
199 "umem_ptc_%d", c->cache_bufsize);
200 (void) mdb_snprintf(descr, sizeof (descr),
201 "walk the per-thread cache for %s", c->cache_name);
202
203 w.walk_name = name;
204 w.walk_descr = descr;
205 w.walk_init = umem_ptc_walk_init;
206 w.walk_step = umem_ptc_walk_step;
207 w.walk_fini = NULL;
208 w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
209
210 if (mdb_add_walker(&w) == -1)
211 mdb_warn("failed to add %s walker", w.walk_name);
212
213 return (WALK_NEXT);
214 }
215
216 /*ARGSUSED*/
217 static void
umem_statechange_cb(void * arg)218 umem_statechange_cb(void *arg)
219 {
220 static int been_ready = 0;
221 GElf_Sym sym;
222 int *sizes;
223
224 #ifndef _KMDB
225 leaky_cleanup(1); /* state changes invalidate leaky state */
226 #endif
227
228 if (umem_update_variables() == -1)
229 return;
230
231 if (been_ready)
232 return;
233
234 if (umem_ready != UMEM_READY)
235 return;
236
237 been_ready = 1;
238
239 /*
240 * In order to determine the tm_roots offset of any cache that is
241 * cached per-thread, we need to have the umem_alloc_sizes array.
242 * Read this, assuring that it is zero-terminated.
243 */
244 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
245 mdb_warn("unable to lookup 'umem_alloc_sizes'");
246 return;
247 }
248
249 sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
250
251 if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
252 mdb_warn("couldn't read 'umem_alloc_sizes'");
253 return;
254 }
255
256 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
257 }
258
259 int
umem_abort_messages(void)260 umem_abort_messages(void)
261 {
262 char *umem_error_buffer;
263 uint_t umem_error_begin;
264 GElf_Sym sym;
265 size_t bufsize;
266
267 if (UMEM_READVAR(umem_error_begin))
268 return (DCMD_ERR);
269
270 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
271 mdb_warn("unable to look up umem_error_buffer");
272 return (DCMD_ERR);
273 }
274
275 bufsize = (size_t)sym.st_size;
276
277 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
278
279 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
280 != bufsize) {
281 mdb_warn("unable to read umem_error_buffer");
282 return (DCMD_ERR);
283 }
284 /* put a zero after the end of the buffer to simplify printing */
285 umem_error_buffer[bufsize] = 0;
286
287 if ((umem_error_begin % bufsize) == 0)
288 mdb_printf("%s\n", umem_error_buffer);
289 else {
290 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
291 mdb_printf("%s%s\n",
292 &umem_error_buffer[umem_error_begin % bufsize],
293 umem_error_buffer);
294 }
295
296 return (DCMD_OK);
297 }
298
299 static void
umem_log_status(const char * name,umem_log_header_t * val)300 umem_log_status(const char *name, umem_log_header_t *val)
301 {
302 umem_log_header_t my_lh;
303 uintptr_t pos = (uintptr_t)val;
304 size_t size;
305
306 if (pos == 0)
307 return;
308
309 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
310 mdb_warn("\nunable to read umem_%s_log pointer %p",
311 name, pos);
312 return;
313 }
314
315 size = my_lh.lh_chunksize * my_lh.lh_nchunks;
316
317 if (size % (1024 * 1024) == 0)
318 mdb_printf("%s=%dm ", name, size / (1024 * 1024));
319 else if (size % 1024 == 0)
320 mdb_printf("%s=%dk ", name, size / 1024);
321 else
322 mdb_printf("%s=%d ", name, size);
323 }
324
325 typedef struct umem_debug_flags {
326 const char *udf_name;
327 uint_t udf_flags;
328 uint_t udf_clear; /* if 0, uses udf_flags */
329 } umem_debug_flags_t;
330
331 umem_debug_flags_t umem_status_flags[] = {
332 { "random", UMF_RANDOMIZE, UMF_RANDOM },
333 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
334 { "audit", UMF_AUDIT },
335 { "guards", UMF_DEADBEEF | UMF_REDZONE },
336 { "nosignal", UMF_CHECKSIGNAL },
337 { "firewall", UMF_FIREWALL },
338 { "lite", UMF_LITE },
339 { "checknull", UMF_CHECKNULL },
340 { NULL }
341 };
342
343 /*ARGSUSED*/
344 int
umem_status(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)345 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
346 {
347 int umem_logging;
348
349 umem_log_header_t *umem_transaction_log;
350 umem_log_header_t *umem_content_log;
351 umem_log_header_t *umem_failure_log;
352 umem_log_header_t *umem_slab_log;
353
354 mdb_printf("Status:\t\t%s\n",
355 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
356 umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
357 umem_ready == UMEM_READY_INITING ? "initialization in process" :
358 umem_ready == UMEM_READY ? "ready and active" :
359 umem_ready == 0 ? "not loaded into address space" :
360 "unknown (umem_ready invalid)");
361
362 if (umem_ready == 0)
363 return (DCMD_OK);
364
365 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
366
367 if (UMEM_READVAR(umem_logging))
368 goto err;
369 if (UMEM_READVAR(umem_transaction_log))
370 goto err;
371 if (UMEM_READVAR(umem_content_log))
372 goto err;
373 if (UMEM_READVAR(umem_failure_log))
374 goto err;
375 if (UMEM_READVAR(umem_slab_log))
376 goto err;
377
378 mdb_printf("Logs:\t\t");
379 umem_log_status("transaction", umem_transaction_log);
380 umem_log_status("content", umem_content_log);
381 umem_log_status("fail", umem_failure_log);
382 umem_log_status("slab", umem_slab_log);
383 if (!umem_logging)
384 mdb_printf("(inactive)");
385 mdb_printf("\n");
386
387 mdb_printf("Message buffer:\n");
388 return (umem_abort_messages());
389
390 err:
391 mdb_printf("Message buffer:\n");
392 (void) umem_abort_messages();
393 return (DCMD_ERR);
394 }
395
396 typedef struct {
397 uintptr_t ucw_first;
398 uintptr_t ucw_current;
399 } umem_cache_walk_t;
400
401 int
umem_cache_walk_init(mdb_walk_state_t * wsp)402 umem_cache_walk_init(mdb_walk_state_t *wsp)
403 {
404 umem_cache_walk_t *ucw;
405 umem_cache_t c;
406 uintptr_t cp;
407 GElf_Sym sym;
408
409 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
410 mdb_warn("couldn't find umem_null_cache");
411 return (WALK_ERR);
412 }
413
414 cp = (uintptr_t)sym.st_value;
415
416 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
417 mdb_warn("couldn't read cache at %p", cp);
418 return (WALK_ERR);
419 }
420
421 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
422
423 ucw->ucw_first = cp;
424 ucw->ucw_current = (uintptr_t)c.cache_next;
425 wsp->walk_data = ucw;
426
427 return (WALK_NEXT);
428 }
429
430 int
umem_cache_walk_step(mdb_walk_state_t * wsp)431 umem_cache_walk_step(mdb_walk_state_t *wsp)
432 {
433 umem_cache_walk_t *ucw = wsp->walk_data;
434 umem_cache_t c;
435 int status;
436
437 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
438 mdb_warn("couldn't read cache at %p", ucw->ucw_current);
439 return (WALK_DONE);
440 }
441
442 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
443
444 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
445 return (WALK_DONE);
446
447 return (status);
448 }
449
450 void
umem_cache_walk_fini(mdb_walk_state_t * wsp)451 umem_cache_walk_fini(mdb_walk_state_t *wsp)
452 {
453 umem_cache_walk_t *ucw = wsp->walk_data;
454 mdb_free(ucw, sizeof (umem_cache_walk_t));
455 }
456
457 typedef struct {
458 umem_cpu_t *ucw_cpus;
459 uint32_t ucw_current;
460 uint32_t ucw_max;
461 } umem_cpu_walk_state_t;
462
463 int
umem_cpu_walk_init(mdb_walk_state_t * wsp)464 umem_cpu_walk_init(mdb_walk_state_t *wsp)
465 {
466 umem_cpu_t *umem_cpus;
467
468 umem_cpu_walk_state_t *ucw;
469
470 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
471 mdb_warn("failed to read 'umem_cpus'");
472 return (WALK_ERR);
473 }
474
475 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
476
477 ucw->ucw_cpus = umem_cpus;
478 ucw->ucw_current = 0;
479 ucw->ucw_max = umem_max_ncpus;
480
481 wsp->walk_data = ucw;
482 return (WALK_NEXT);
483 }
484
485 int
umem_cpu_walk_step(mdb_walk_state_t * wsp)486 umem_cpu_walk_step(mdb_walk_state_t *wsp)
487 {
488 umem_cpu_t cpu;
489 umem_cpu_walk_state_t *ucw = wsp->walk_data;
490
491 uintptr_t caddr;
492
493 if (ucw->ucw_current >= ucw->ucw_max)
494 return (WALK_DONE);
495
496 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
497
498 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
499 mdb_warn("failed to read cpu %d", ucw->ucw_current);
500 return (WALK_ERR);
501 }
502
503 ucw->ucw_current++;
504
505 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
506 }
507
508 void
umem_cpu_walk_fini(mdb_walk_state_t * wsp)509 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
510 {
511 umem_cpu_walk_state_t *ucw = wsp->walk_data;
512
513 mdb_free(ucw, sizeof (*ucw));
514 }
515
516 int
umem_cpu_cache_walk_init(mdb_walk_state_t * wsp)517 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
518 {
519 if (wsp->walk_addr == 0) {
520 mdb_warn("umem_cpu_cache doesn't support global walks");
521 return (WALK_ERR);
522 }
523
524 if (mdb_layered_walk("umem_cpu", wsp) == -1) {
525 mdb_warn("couldn't walk 'umem_cpu'");
526 return (WALK_ERR);
527 }
528
529 wsp->walk_data = (void *)wsp->walk_addr;
530
531 return (WALK_NEXT);
532 }
533
534 int
umem_cpu_cache_walk_step(mdb_walk_state_t * wsp)535 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
536 {
537 uintptr_t caddr = (uintptr_t)wsp->walk_data;
538 const umem_cpu_t *cpu = wsp->walk_layer;
539 umem_cpu_cache_t cc;
540
541 caddr += cpu->cpu_cache_offset;
542
543 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
544 mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
545 return (WALK_ERR);
546 }
547
548 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
549 }
550
551 int
umem_slab_walk_init(mdb_walk_state_t * wsp)552 umem_slab_walk_init(mdb_walk_state_t *wsp)
553 {
554 uintptr_t caddr = wsp->walk_addr;
555 umem_cache_t c;
556
557 if (caddr == 0) {
558 mdb_warn("umem_slab doesn't support global walks\n");
559 return (WALK_ERR);
560 }
561
562 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
563 mdb_warn("couldn't read umem_cache at %p", caddr);
564 return (WALK_ERR);
565 }
566
567 wsp->walk_data =
568 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
569 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
570
571 return (WALK_NEXT);
572 }
573
574 int
umem_slab_walk_partial_init(mdb_walk_state_t * wsp)575 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
576 {
577 uintptr_t caddr = wsp->walk_addr;
578 umem_cache_t c;
579
580 if (caddr == 0) {
581 mdb_warn("umem_slab_partial doesn't support global walks\n");
582 return (WALK_ERR);
583 }
584
585 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
586 mdb_warn("couldn't read umem_cache at %p", caddr);
587 return (WALK_ERR);
588 }
589
590 wsp->walk_data =
591 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
592 wsp->walk_addr = (uintptr_t)c.cache_freelist;
593
594 /*
595 * Some consumers (umem_walk_step(), in particular) require at
596 * least one callback if there are any buffers in the cache. So
597 * if there are *no* partial slabs, report the last full slab, if
598 * any.
599 *
600 * Yes, this is ugly, but it's cleaner than the other possibilities.
601 */
602 if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
603 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
604
605 return (WALK_NEXT);
606 }
607
608 int
umem_slab_walk_step(mdb_walk_state_t * wsp)609 umem_slab_walk_step(mdb_walk_state_t *wsp)
610 {
611 umem_slab_t s;
612 uintptr_t addr = wsp->walk_addr;
613 uintptr_t saddr = (uintptr_t)wsp->walk_data;
614 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
615
616 if (addr == saddr)
617 return (WALK_DONE);
618
619 if (mdb_vread(&s, sizeof (s), addr) == -1) {
620 mdb_warn("failed to read slab at %p", wsp->walk_addr);
621 return (WALK_ERR);
622 }
623
624 if ((uintptr_t)s.slab_cache != caddr) {
625 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
626 addr, caddr, s.slab_cache);
627 return (WALK_ERR);
628 }
629
630 wsp->walk_addr = (uintptr_t)s.slab_next;
631
632 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
633 }
634
635 int
umem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)636 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
637 {
638 umem_cache_t c;
639
640 if (!(flags & DCMD_ADDRSPEC)) {
641 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
642 mdb_warn("can't walk umem_cache");
643 return (DCMD_ERR);
644 }
645 return (DCMD_OK);
646 }
647
648 if (DCMD_HDRSPEC(flags))
649 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
650 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
651
652 if (mdb_vread(&c, sizeof (c), addr) == -1) {
653 mdb_warn("couldn't read umem_cache at %p", addr);
654 return (DCMD_ERR);
655 }
656
657 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
658 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
659
660 return (DCMD_OK);
661 }
662
663 static int
addrcmp(const void * lhs,const void * rhs)664 addrcmp(const void *lhs, const void *rhs)
665 {
666 uintptr_t p1 = *((uintptr_t *)lhs);
667 uintptr_t p2 = *((uintptr_t *)rhs);
668
669 if (p1 < p2)
670 return (-1);
671 if (p1 > p2)
672 return (1);
673 return (0);
674 }
675
676 static int
bufctlcmp(const umem_bufctl_audit_t ** lhs,const umem_bufctl_audit_t ** rhs)677 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
678 {
679 const umem_bufctl_audit_t *bcp1 = *lhs;
680 const umem_bufctl_audit_t *bcp2 = *rhs;
681
682 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
683 return (-1);
684
685 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
686 return (1);
687
688 return (0);
689 }
690
691 typedef struct umem_hash_walk {
692 uintptr_t *umhw_table;
693 size_t umhw_nelems;
694 size_t umhw_pos;
695 umem_bufctl_t umhw_cur;
696 } umem_hash_walk_t;
697
698 int
umem_hash_walk_init(mdb_walk_state_t * wsp)699 umem_hash_walk_init(mdb_walk_state_t *wsp)
700 {
701 umem_hash_walk_t *umhw;
702 uintptr_t *hash;
703 umem_cache_t c;
704 uintptr_t haddr, addr = wsp->walk_addr;
705 size_t nelems;
706 size_t hsize;
707
708 if (addr == 0) {
709 mdb_warn("umem_hash doesn't support global walks\n");
710 return (WALK_ERR);
711 }
712
713 if (mdb_vread(&c, sizeof (c), addr) == -1) {
714 mdb_warn("couldn't read cache at addr %p", addr);
715 return (WALK_ERR);
716 }
717
718 if (!(c.cache_flags & UMF_HASH)) {
719 mdb_warn("cache %p doesn't have a hash table\n", addr);
720 return (WALK_DONE); /* nothing to do */
721 }
722
723 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
724 umhw->umhw_cur.bc_next = NULL;
725 umhw->umhw_pos = 0;
726
727 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
728 hsize = nelems * sizeof (uintptr_t);
729 haddr = (uintptr_t)c.cache_hash_table;
730
731 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
732 if (mdb_vread(hash, hsize, haddr) == -1) {
733 mdb_warn("failed to read hash table at %p", haddr);
734 mdb_free(hash, hsize);
735 mdb_free(umhw, sizeof (umem_hash_walk_t));
736 return (WALK_ERR);
737 }
738
739 wsp->walk_data = umhw;
740
741 return (WALK_NEXT);
742 }
743
744 int
umem_hash_walk_step(mdb_walk_state_t * wsp)745 umem_hash_walk_step(mdb_walk_state_t *wsp)
746 {
747 umem_hash_walk_t *umhw = wsp->walk_data;
748 uintptr_t addr = 0;
749
750 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == 0) {
751 while (umhw->umhw_pos < umhw->umhw_nelems) {
752 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != 0)
753 break;
754 }
755 }
756 if (addr == 0)
757 return (WALK_DONE);
758
759 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
760 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
761 return (WALK_ERR);
762 }
763
764 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
765 }
766
767 void
umem_hash_walk_fini(mdb_walk_state_t * wsp)768 umem_hash_walk_fini(mdb_walk_state_t *wsp)
769 {
770 umem_hash_walk_t *umhw = wsp->walk_data;
771
772 if (umhw == NULL)
773 return;
774
775 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
776 mdb_free(umhw, sizeof (umem_hash_walk_t));
777 }
778
779 /*
780 * Find the address of the bufctl structure for the address 'buf' in cache
781 * 'cp', which is at address caddr, and place it in *out.
782 */
783 static int
umem_hash_lookup(umem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)784 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
785 {
786 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
787 umem_bufctl_t *bcp;
788 umem_bufctl_t bc;
789
790 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
791 mdb_warn("unable to read hash bucket for %p in cache %p",
792 buf, caddr);
793 return (-1);
794 }
795
796 while (bcp != NULL) {
797 if (mdb_vread(&bc, sizeof (umem_bufctl_t),
798 (uintptr_t)bcp) == -1) {
799 mdb_warn("unable to read bufctl at %p", bcp);
800 return (-1);
801 }
802 if (bc.bc_addr == buf) {
803 *out = (uintptr_t)bcp;
804 return (0);
805 }
806 bcp = bc.bc_next;
807 }
808
809 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
810 return (-1);
811 }
812
813 int
umem_get_magsize(const umem_cache_t * cp)814 umem_get_magsize(const umem_cache_t *cp)
815 {
816 uintptr_t addr = (uintptr_t)cp->cache_magtype;
817 GElf_Sym mt_sym;
818 umem_magtype_t mt;
819 int res;
820
821 /*
822 * if cpu 0 has a non-zero magsize, it must be correct. caches
823 * with UMF_NOMAGAZINE have disabled their magazine layers, so
824 * it is okay to return 0 for them.
825 */
826 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
827 (cp->cache_flags & UMF_NOMAGAZINE))
828 return (res);
829
830 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
831 mdb_warn("unable to read 'umem_magtype'");
832 } else if (addr < mt_sym.st_value ||
833 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
834 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
835 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
836 cp->cache_name, addr);
837 return (0);
838 }
839 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
840 mdb_warn("unable to read magtype at %a", addr);
841 return (0);
842 }
843 return (mt.mt_magsize);
844 }
845
846 /*ARGSUSED*/
847 static int
umem_estimate_slab(uintptr_t addr,const umem_slab_t * sp,size_t * est)848 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
849 {
850 *est -= (sp->slab_chunks - sp->slab_refcnt);
851
852 return (WALK_NEXT);
853 }
854
855 /*
856 * Returns an upper bound on the number of allocated buffers in a given
857 * cache.
858 */
859 size_t
umem_estimate_allocated(uintptr_t addr,const umem_cache_t * cp)860 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
861 {
862 int magsize;
863 size_t cache_est;
864
865 cache_est = cp->cache_buftotal;
866
867 (void) mdb_pwalk("umem_slab_partial",
868 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
869
870 if ((magsize = umem_get_magsize(cp)) != 0) {
871 size_t mag_est = cp->cache_full.ml_total * magsize;
872
873 if (cache_est >= mag_est) {
874 cache_est -= mag_est;
875 } else {
876 mdb_warn("cache %p's magazine layer holds more buffers "
877 "than the slab layer.\n", addr);
878 }
879 }
880 return (cache_est);
881 }
882
883 #define READMAG_ROUNDS(rounds) { \
884 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
885 mdb_warn("couldn't read magazine at %p", ump); \
886 goto fail; \
887 } \
888 for (i = 0; i < rounds; i++) { \
889 maglist[magcnt++] = mp->mag_round[i]; \
890 if (magcnt == magmax) { \
891 mdb_warn("%d magazines exceeds fudge factor\n", \
892 magcnt); \
893 goto fail; \
894 } \
895 } \
896 }
897
898 static int
umem_read_magazines(umem_cache_t * cp,uintptr_t addr,void *** maglistp,size_t * magcntp,size_t * magmaxp)899 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
900 void ***maglistp, size_t *magcntp, size_t *magmaxp)
901 {
902 umem_magazine_t *ump, *mp;
903 void **maglist = NULL;
904 int i, cpu;
905 size_t magsize, magmax, magbsize;
906 size_t magcnt = 0;
907
908 /*
909 * Read the magtype out of the cache, after verifying the pointer's
910 * correctness.
911 */
912 magsize = umem_get_magsize(cp);
913 if (magsize == 0) {
914 *maglistp = NULL;
915 *magcntp = 0;
916 *magmaxp = 0;
917 return (0);
918 }
919
920 /*
921 * There are several places where we need to go buffer hunting:
922 * the per-CPU loaded magazine, the per-CPU spare full magazine,
923 * and the full magazine list in the depot.
924 *
925 * For an upper bound on the number of buffers in the magazine
926 * layer, we have the number of magazines on the cache_full
927 * list plus at most two magazines per CPU (the loaded and the
928 * spare). Toss in 100 magazines as a fudge factor in case this
929 * is live (the number "100" comes from the same fudge factor in
930 * crash(8)).
931 */
932 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
933 magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
934
935 if (magbsize >= PAGESIZE / 2) {
936 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
937 addr, magbsize);
938 return (-1);
939 }
940
941 maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
942 mp = mdb_alloc(magbsize, UM_SLEEP);
943 if (mp == NULL || maglist == NULL)
944 goto fail;
945
946 /*
947 * First up: the magazines in the depot (i.e. on the cache_full list).
948 */
949 for (ump = cp->cache_full.ml_list; ump != NULL; ) {
950 READMAG_ROUNDS(magsize);
951 ump = mp->mag_next;
952
953 if (ump == cp->cache_full.ml_list)
954 break; /* cache_full list loop detected */
955 }
956
957 dprintf(("cache_full list done\n"));
958
959 /*
960 * Now whip through the CPUs, snagging the loaded magazines
961 * and full spares.
962 */
963 for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
964 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
965
966 dprintf(("reading cpu cache %p\n",
967 (uintptr_t)ccp - (uintptr_t)cp + addr));
968
969 if (ccp->cc_rounds > 0 &&
970 (ump = ccp->cc_loaded) != NULL) {
971 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
972 READMAG_ROUNDS(ccp->cc_rounds);
973 }
974
975 if (ccp->cc_prounds > 0 &&
976 (ump = ccp->cc_ploaded) != NULL) {
977 dprintf(("reading %d previously loaded rounds\n",
978 ccp->cc_prounds));
979 READMAG_ROUNDS(ccp->cc_prounds);
980 }
981 }
982
983 dprintf(("magazine layer: %d buffers\n", magcnt));
984
985 mdb_free(mp, magbsize);
986
987 *maglistp = maglist;
988 *magcntp = magcnt;
989 *magmaxp = magmax;
990
991 return (0);
992
993 fail:
994 if (mp)
995 mdb_free(mp, magbsize);
996 if (maglist)
997 mdb_free(maglist, magmax * sizeof (void *));
998
999 return (-1);
1000 }
1001
1002 typedef struct umem_read_ptc_walk {
1003 void **urpw_buf;
1004 size_t urpw_cnt;
1005 size_t urpw_max;
1006 } umem_read_ptc_walk_t;
1007
1008 /*ARGSUSED*/
1009 static int
umem_read_ptc_walk_buf(uintptr_t addr,const void * ignored,umem_read_ptc_walk_t * urpw)1010 umem_read_ptc_walk_buf(uintptr_t addr,
1011 const void *ignored, umem_read_ptc_walk_t *urpw)
1012 {
1013 if (urpw->urpw_cnt == urpw->urpw_max) {
1014 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1015 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1016
1017 if (nmax > 1) {
1018 size_t osize = urpw->urpw_max * sizeof (void *);
1019 bcopy(urpw->urpw_buf, new, osize);
1020 mdb_free(urpw->urpw_buf, osize);
1021 }
1022
1023 urpw->urpw_buf = new;
1024 urpw->urpw_max = nmax;
1025 }
1026
1027 urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1028
1029 return (WALK_NEXT);
1030 }
1031
1032 static int
umem_read_ptc(umem_cache_t * cp,void *** buflistp,size_t * bufcntp,size_t * bufmaxp)1033 umem_read_ptc(umem_cache_t *cp,
1034 void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1035 {
1036 umem_read_ptc_walk_t urpw;
1037 char walk[60];
1038 int rval;
1039
1040 if (!(cp->cache_flags & UMF_PTC))
1041 return (0);
1042
1043 (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1044 cp->cache_bufsize);
1045
1046 urpw.urpw_buf = *buflistp;
1047 urpw.urpw_cnt = *bufcntp;
1048 urpw.urpw_max = *bufmaxp;
1049
1050 if ((rval = mdb_walk(walk,
1051 (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1052 mdb_warn("couldn't walk %s", walk);
1053 }
1054
1055 *buflistp = urpw.urpw_buf;
1056 *bufcntp = urpw.urpw_cnt;
1057 *bufmaxp = urpw.urpw_max;
1058
1059 return (rval);
1060 }
1061
1062 static int
umem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1063 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1064 {
1065 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1066 }
1067
1068 static int
bufctl_walk_callback(umem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1069 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1070 {
1071 umem_bufctl_audit_t *b;
1072 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1073
1074 /*
1075 * if UMF_AUDIT is not set, we know that we're looking at a
1076 * umem_bufctl_t.
1077 */
1078 if (!(cp->cache_flags & UMF_AUDIT) ||
1079 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1080 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1081 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1082 mdb_warn("unable to read bufctl at %p", buf);
1083 return (WALK_ERR);
1084 }
1085 }
1086
1087 return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1088 }
1089
1090 typedef struct umem_walk {
1091 int umw_type;
1092
1093 uintptr_t umw_addr; /* cache address */
1094 umem_cache_t *umw_cp;
1095 size_t umw_csize;
1096
1097 /*
1098 * magazine layer
1099 */
1100 void **umw_maglist;
1101 size_t umw_max;
1102 size_t umw_count;
1103 size_t umw_pos;
1104
1105 /*
1106 * slab layer
1107 */
1108 char *umw_valid; /* to keep track of freed buffers */
1109 char *umw_ubase; /* buffer for slab data */
1110 } umem_walk_t;
1111
1112 static int
umem_walk_init_common(mdb_walk_state_t * wsp,int type)1113 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1114 {
1115 umem_walk_t *umw;
1116 int csize;
1117 umem_cache_t *cp;
1118 size_t vm_quantum;
1119
1120 size_t magmax, magcnt;
1121 void **maglist = NULL;
1122 uint_t chunksize = 1, slabsize = 1;
1123 int status = WALK_ERR;
1124 uintptr_t addr = wsp->walk_addr;
1125 const char *layered;
1126
1127 type &= ~UM_HASH;
1128
1129 if (addr == 0) {
1130 mdb_warn("umem walk doesn't support global walks\n");
1131 return (WALK_ERR);
1132 }
1133
1134 dprintf(("walking %p\n", addr));
1135
1136 /*
1137 * The number of "cpus" determines how large the cache is.
1138 */
1139 csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1140 cp = mdb_alloc(csize, UM_SLEEP);
1141
1142 if (mdb_vread(cp, csize, addr) == -1) {
1143 mdb_warn("couldn't read cache at addr %p", addr);
1144 goto out2;
1145 }
1146
1147 /*
1148 * It's easy for someone to hand us an invalid cache address.
1149 * Unfortunately, it is hard for this walker to survive an
1150 * invalid cache cleanly. So we make sure that:
1151 *
1152 * 1. the vmem arena for the cache is readable,
1153 * 2. the vmem arena's quantum is a power of 2,
1154 * 3. our slabsize is a multiple of the quantum, and
1155 * 4. our chunksize is >0 and less than our slabsize.
1156 */
1157 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1158 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1159 vm_quantum == 0 ||
1160 (vm_quantum & (vm_quantum - 1)) != 0 ||
1161 cp->cache_slabsize < vm_quantum ||
1162 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1163 cp->cache_chunksize == 0 ||
1164 cp->cache_chunksize > cp->cache_slabsize) {
1165 mdb_warn("%p is not a valid umem_cache_t\n", addr);
1166 goto out2;
1167 }
1168
1169 dprintf(("buf total is %d\n", cp->cache_buftotal));
1170
1171 if (cp->cache_buftotal == 0) {
1172 mdb_free(cp, csize);
1173 return (WALK_DONE);
1174 }
1175
1176 /*
1177 * If they ask for bufctls, but it's a small-slab cache,
1178 * there is nothing to report.
1179 */
1180 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1181 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1182 cp->cache_flags));
1183 mdb_free(cp, csize);
1184 return (WALK_DONE);
1185 }
1186
1187 /*
1188 * Read in the contents of the magazine layer
1189 */
1190 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1191 goto out2;
1192
1193 /*
1194 * Read in the contents of the per-thread caches, if any
1195 */
1196 if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1197 goto out2;
1198
1199 /*
1200 * We have all of the buffers from the magazines and from the
1201 * per-thread cache (if any); if we are walking allocated buffers,
1202 * sort them so we can bsearch them later.
1203 */
1204 if (type & UM_ALLOCATED)
1205 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1206
1207 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1208
1209 umw->umw_type = type;
1210 umw->umw_addr = addr;
1211 umw->umw_cp = cp;
1212 umw->umw_csize = csize;
1213 umw->umw_maglist = maglist;
1214 umw->umw_max = magmax;
1215 umw->umw_count = magcnt;
1216 umw->umw_pos = 0;
1217
1218 /*
1219 * When walking allocated buffers in a UMF_HASH cache, we walk the
1220 * hash table instead of the slab layer.
1221 */
1222 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1223 layered = "umem_hash";
1224
1225 umw->umw_type |= UM_HASH;
1226 } else {
1227 /*
1228 * If we are walking freed buffers, we only need the
1229 * magazine layer plus the partially allocated slabs.
1230 * To walk allocated buffers, we need all of the slabs.
1231 */
1232 if (type & UM_ALLOCATED)
1233 layered = "umem_slab";
1234 else
1235 layered = "umem_slab_partial";
1236
1237 /*
1238 * for small-slab caches, we read in the entire slab. For
1239 * freed buffers, we can just walk the freelist. For
1240 * allocated buffers, we use a 'valid' array to track
1241 * the freed buffers.
1242 */
1243 if (!(cp->cache_flags & UMF_HASH)) {
1244 chunksize = cp->cache_chunksize;
1245 slabsize = cp->cache_slabsize;
1246
1247 umw->umw_ubase = mdb_alloc(slabsize +
1248 sizeof (umem_bufctl_t), UM_SLEEP);
1249
1250 if (type & UM_ALLOCATED)
1251 umw->umw_valid =
1252 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1253 }
1254 }
1255
1256 status = WALK_NEXT;
1257
1258 if (mdb_layered_walk(layered, wsp) == -1) {
1259 mdb_warn("unable to start layered '%s' walk", layered);
1260 status = WALK_ERR;
1261 }
1262
1263 if (status == WALK_ERR) {
1264 if (umw->umw_valid)
1265 mdb_free(umw->umw_valid, slabsize / chunksize);
1266
1267 if (umw->umw_ubase)
1268 mdb_free(umw->umw_ubase, slabsize +
1269 sizeof (umem_bufctl_t));
1270
1271 if (umw->umw_maglist)
1272 mdb_free(umw->umw_maglist, umw->umw_max *
1273 sizeof (uintptr_t));
1274
1275 mdb_free(umw, sizeof (umem_walk_t));
1276 wsp->walk_data = NULL;
1277 }
1278
1279 out2:
1280 if (status == WALK_ERR)
1281 mdb_free(cp, csize);
1282
1283 return (status);
1284 }
1285
1286 int
umem_walk_step(mdb_walk_state_t * wsp)1287 umem_walk_step(mdb_walk_state_t *wsp)
1288 {
1289 umem_walk_t *umw = wsp->walk_data;
1290 int type = umw->umw_type;
1291 umem_cache_t *cp = umw->umw_cp;
1292
1293 void **maglist = umw->umw_maglist;
1294 int magcnt = umw->umw_count;
1295
1296 uintptr_t chunksize, slabsize;
1297 uintptr_t addr;
1298 const umem_slab_t *sp;
1299 const umem_bufctl_t *bcp;
1300 umem_bufctl_t bc;
1301
1302 int chunks;
1303 char *kbase;
1304 void *buf;
1305 int i, ret;
1306
1307 char *valid, *ubase;
1308
1309 /*
1310 * first, handle the 'umem_hash' layered walk case
1311 */
1312 if (type & UM_HASH) {
1313 /*
1314 * We have a buffer which has been allocated out of the
1315 * global layer. We need to make sure that it's not
1316 * actually sitting in a magazine before we report it as
1317 * an allocated buffer.
1318 */
1319 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1320
1321 if (magcnt > 0 &&
1322 bsearch(&buf, maglist, magcnt, sizeof (void *),
1323 addrcmp) != NULL)
1324 return (WALK_NEXT);
1325
1326 if (type & UM_BUFCTL)
1327 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1328
1329 return (umem_walk_callback(wsp, (uintptr_t)buf));
1330 }
1331
1332 ret = WALK_NEXT;
1333
1334 addr = umw->umw_addr;
1335
1336 /*
1337 * If we're walking freed buffers, report everything in the
1338 * magazine layer before processing the first slab.
1339 */
1340 if ((type & UM_FREE) && magcnt != 0) {
1341 umw->umw_count = 0; /* only do this once */
1342 for (i = 0; i < magcnt; i++) {
1343 buf = maglist[i];
1344
1345 if (type & UM_BUFCTL) {
1346 uintptr_t out;
1347
1348 if (cp->cache_flags & UMF_BUFTAG) {
1349 umem_buftag_t *btp;
1350 umem_buftag_t tag;
1351
1352 /* LINTED - alignment */
1353 btp = UMEM_BUFTAG(cp, buf);
1354 if (mdb_vread(&tag, sizeof (tag),
1355 (uintptr_t)btp) == -1) {
1356 mdb_warn("reading buftag for "
1357 "%p at %p", buf, btp);
1358 continue;
1359 }
1360 out = (uintptr_t)tag.bt_bufctl;
1361 } else {
1362 if (umem_hash_lookup(cp, addr, buf,
1363 &out) == -1)
1364 continue;
1365 }
1366 ret = bufctl_walk_callback(cp, wsp, out);
1367 } else {
1368 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1369 }
1370
1371 if (ret != WALK_NEXT)
1372 return (ret);
1373 }
1374 }
1375
1376 /*
1377 * Handle the buffers in the current slab
1378 */
1379 chunksize = cp->cache_chunksize;
1380 slabsize = cp->cache_slabsize;
1381
1382 sp = wsp->walk_layer;
1383 chunks = sp->slab_chunks;
1384 kbase = sp->slab_base;
1385
1386 dprintf(("kbase is %p\n", kbase));
1387
1388 if (!(cp->cache_flags & UMF_HASH)) {
1389 valid = umw->umw_valid;
1390 ubase = umw->umw_ubase;
1391
1392 if (mdb_vread(ubase, chunks * chunksize,
1393 (uintptr_t)kbase) == -1) {
1394 mdb_warn("failed to read slab contents at %p", kbase);
1395 return (WALK_ERR);
1396 }
1397
1398 /*
1399 * Set up the valid map as fully allocated -- we'll punch
1400 * out the freelist.
1401 */
1402 if (type & UM_ALLOCATED)
1403 (void) memset(valid, 1, chunks);
1404 } else {
1405 valid = NULL;
1406 ubase = NULL;
1407 }
1408
1409 /*
1410 * walk the slab's freelist
1411 */
1412 bcp = sp->slab_head;
1413
1414 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1415
1416 /*
1417 * since we could be in the middle of allocating a buffer,
1418 * our refcnt could be one higher than it aught. So we
1419 * check one further on the freelist than the count allows.
1420 */
1421 for (i = sp->slab_refcnt; i <= chunks; i++) {
1422 uint_t ndx;
1423
1424 dprintf(("bcp is %p\n", bcp));
1425
1426 if (bcp == NULL) {
1427 if (i == chunks)
1428 break;
1429 mdb_warn(
1430 "slab %p in cache %p freelist too short by %d\n",
1431 sp, addr, chunks - i);
1432 break;
1433 }
1434
1435 if (cp->cache_flags & UMF_HASH) {
1436 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1437 mdb_warn("failed to read bufctl ptr at %p",
1438 bcp);
1439 break;
1440 }
1441 buf = bc.bc_addr;
1442 } else {
1443 /*
1444 * Otherwise the buffer is (or should be) in the slab
1445 * that we've read in; determine its offset in the
1446 * slab, validate that it's not corrupt, and add to
1447 * our base address to find the umem_bufctl_t. (Note
1448 * that we don't need to add the size of the bufctl
1449 * to our offset calculation because of the slop that's
1450 * allocated for the buffer at ubase.)
1451 */
1452 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1453
1454 if (offs > chunks * chunksize) {
1455 mdb_warn("found corrupt bufctl ptr %p"
1456 " in slab %p in cache %p\n", bcp,
1457 wsp->walk_addr, addr);
1458 break;
1459 }
1460
1461 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1462 buf = UMEM_BUF(cp, bcp);
1463 }
1464
1465 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1466
1467 if (ndx > slabsize / cp->cache_bufsize) {
1468 /*
1469 * This is very wrong; we have managed to find
1470 * a buffer in the slab which shouldn't
1471 * actually be here. Emit a warning, and
1472 * try to continue.
1473 */
1474 mdb_warn("buf %p is out of range for "
1475 "slab %p, cache %p\n", buf, sp, addr);
1476 } else if (type & UM_ALLOCATED) {
1477 /*
1478 * we have found a buffer on the slab's freelist;
1479 * clear its entry
1480 */
1481 valid[ndx] = 0;
1482 } else {
1483 /*
1484 * Report this freed buffer
1485 */
1486 if (type & UM_BUFCTL) {
1487 ret = bufctl_walk_callback(cp, wsp,
1488 (uintptr_t)bcp);
1489 } else {
1490 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1491 }
1492 if (ret != WALK_NEXT)
1493 return (ret);
1494 }
1495
1496 bcp = bc.bc_next;
1497 }
1498
1499 if (bcp != NULL) {
1500 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1501 sp, addr, bcp));
1502 }
1503
1504 /*
1505 * If we are walking freed buffers, the loop above handled reporting
1506 * them.
1507 */
1508 if (type & UM_FREE)
1509 return (WALK_NEXT);
1510
1511 if (type & UM_BUFCTL) {
1512 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1513 "cache %p\n", addr);
1514 return (WALK_ERR);
1515 }
1516
1517 /*
1518 * Report allocated buffers, skipping buffers in the magazine layer.
1519 * We only get this far for small-slab caches.
1520 */
1521 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1522 buf = (char *)kbase + i * chunksize;
1523
1524 if (!valid[i])
1525 continue; /* on slab freelist */
1526
1527 if (magcnt > 0 &&
1528 bsearch(&buf, maglist, magcnt, sizeof (void *),
1529 addrcmp) != NULL)
1530 continue; /* in magazine layer */
1531
1532 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1533 }
1534 return (ret);
1535 }
1536
1537 void
umem_walk_fini(mdb_walk_state_t * wsp)1538 umem_walk_fini(mdb_walk_state_t *wsp)
1539 {
1540 umem_walk_t *umw = wsp->walk_data;
1541 uintptr_t chunksize;
1542 uintptr_t slabsize;
1543
1544 if (umw == NULL)
1545 return;
1546
1547 if (umw->umw_maglist != NULL)
1548 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1549
1550 chunksize = umw->umw_cp->cache_chunksize;
1551 slabsize = umw->umw_cp->cache_slabsize;
1552
1553 if (umw->umw_valid != NULL)
1554 mdb_free(umw->umw_valid, slabsize / chunksize);
1555 if (umw->umw_ubase != NULL)
1556 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1557
1558 mdb_free(umw->umw_cp, umw->umw_csize);
1559 mdb_free(umw, sizeof (umem_walk_t));
1560 }
1561
1562 /*ARGSUSED*/
1563 static int
umem_walk_all(uintptr_t addr,const umem_cache_t * c,mdb_walk_state_t * wsp)1564 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1565 {
1566 /*
1567 * Buffers allocated from NOTOUCH caches can also show up as freed
1568 * memory in other caches. This can be a little confusing, so we
1569 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1570 * that "::walk umem" and "::walk freemem" yield disjoint output).
1571 */
1572 if (c->cache_cflags & UMC_NOTOUCH)
1573 return (WALK_NEXT);
1574
1575 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1576 wsp->walk_cbdata, addr) == -1)
1577 return (WALK_DONE);
1578
1579 return (WALK_NEXT);
1580 }
1581
1582 #define UMEM_WALK_ALL(name, wsp) { \
1583 wsp->walk_data = (name); \
1584 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1585 return (WALK_ERR); \
1586 return (WALK_DONE); \
1587 }
1588
1589 int
umem_walk_init(mdb_walk_state_t * wsp)1590 umem_walk_init(mdb_walk_state_t *wsp)
1591 {
1592 if (wsp->walk_arg != NULL)
1593 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1594
1595 if (wsp->walk_addr == 0)
1596 UMEM_WALK_ALL("umem", wsp);
1597 return (umem_walk_init_common(wsp, UM_ALLOCATED));
1598 }
1599
1600 int
bufctl_walk_init(mdb_walk_state_t * wsp)1601 bufctl_walk_init(mdb_walk_state_t *wsp)
1602 {
1603 if (wsp->walk_addr == 0)
1604 UMEM_WALK_ALL("bufctl", wsp);
1605 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1606 }
1607
1608 int
freemem_walk_init(mdb_walk_state_t * wsp)1609 freemem_walk_init(mdb_walk_state_t *wsp)
1610 {
1611 if (wsp->walk_addr == 0)
1612 UMEM_WALK_ALL("freemem", wsp);
1613 return (umem_walk_init_common(wsp, UM_FREE));
1614 }
1615
1616 int
freectl_walk_init(mdb_walk_state_t * wsp)1617 freectl_walk_init(mdb_walk_state_t *wsp)
1618 {
1619 if (wsp->walk_addr == 0)
1620 UMEM_WALK_ALL("freectl", wsp);
1621 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1622 }
1623
1624 typedef struct bufctl_history_walk {
1625 void *bhw_next;
1626 umem_cache_t *bhw_cache;
1627 umem_slab_t *bhw_slab;
1628 hrtime_t bhw_timestamp;
1629 } bufctl_history_walk_t;
1630
1631 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1632 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1633 {
1634 bufctl_history_walk_t *bhw;
1635 umem_bufctl_audit_t bc;
1636 umem_bufctl_audit_t bcn;
1637
1638 if (wsp->walk_addr == 0) {
1639 mdb_warn("bufctl_history walk doesn't support global walks\n");
1640 return (WALK_ERR);
1641 }
1642
1643 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1644 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1645 return (WALK_ERR);
1646 }
1647
1648 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1649 bhw->bhw_timestamp = 0;
1650 bhw->bhw_cache = bc.bc_cache;
1651 bhw->bhw_slab = bc.bc_slab;
1652
1653 /*
1654 * sometimes the first log entry matches the base bufctl; in that
1655 * case, skip the base bufctl.
1656 */
1657 if (bc.bc_lastlog != NULL &&
1658 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1659 bc.bc_addr == bcn.bc_addr &&
1660 bc.bc_cache == bcn.bc_cache &&
1661 bc.bc_slab == bcn.bc_slab &&
1662 bc.bc_timestamp == bcn.bc_timestamp &&
1663 bc.bc_thread == bcn.bc_thread)
1664 bhw->bhw_next = bc.bc_lastlog;
1665 else
1666 bhw->bhw_next = (void *)wsp->walk_addr;
1667
1668 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1669 wsp->walk_data = bhw;
1670
1671 return (WALK_NEXT);
1672 }
1673
1674 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1675 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1676 {
1677 bufctl_history_walk_t *bhw = wsp->walk_data;
1678 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1679 uintptr_t baseaddr = wsp->walk_addr;
1680 umem_bufctl_audit_t *b;
1681 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1682
1683 if (addr == 0)
1684 return (WALK_DONE);
1685
1686 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1687 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1688 return (WALK_ERR);
1689 }
1690
1691 /*
1692 * The bufctl is only valid if the address, cache, and slab are
1693 * correct. We also check that the timestamp is decreasing, to
1694 * prevent infinite loops.
1695 */
1696 if ((uintptr_t)b->bc_addr != baseaddr ||
1697 b->bc_cache != bhw->bhw_cache ||
1698 b->bc_slab != bhw->bhw_slab ||
1699 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1700 return (WALK_DONE);
1701
1702 bhw->bhw_next = b->bc_lastlog;
1703 bhw->bhw_timestamp = b->bc_timestamp;
1704
1705 return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1706 }
1707
1708 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1709 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1710 {
1711 bufctl_history_walk_t *bhw = wsp->walk_data;
1712
1713 mdb_free(bhw, sizeof (*bhw));
1714 }
1715
1716 typedef struct umem_log_walk {
1717 umem_bufctl_audit_t *ulw_base;
1718 umem_bufctl_audit_t **ulw_sorted;
1719 umem_log_header_t ulw_lh;
1720 size_t ulw_size;
1721 size_t ulw_maxndx;
1722 size_t ulw_ndx;
1723 } umem_log_walk_t;
1724
1725 int
umem_log_walk_init(mdb_walk_state_t * wsp)1726 umem_log_walk_init(mdb_walk_state_t *wsp)
1727 {
1728 uintptr_t lp = wsp->walk_addr;
1729 umem_log_walk_t *ulw;
1730 umem_log_header_t *lhp;
1731 int maxndx, i, j, k;
1732
1733 /*
1734 * By default (global walk), walk the umem_transaction_log. Otherwise
1735 * read the log whose umem_log_header_t is stored at walk_addr.
1736 */
1737 if (lp == 0 && umem_readvar(&lp, "umem_transaction_log") == -1) {
1738 mdb_warn("failed to read 'umem_transaction_log'");
1739 return (WALK_ERR);
1740 }
1741
1742 if (lp == 0) {
1743 mdb_warn("log is disabled\n");
1744 return (WALK_ERR);
1745 }
1746
1747 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1748 lhp = &ulw->ulw_lh;
1749
1750 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1751 mdb_warn("failed to read log header at %p", lp);
1752 mdb_free(ulw, sizeof (umem_log_walk_t));
1753 return (WALK_ERR);
1754 }
1755
1756 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1757 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1758 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1759
1760 if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1761 (uintptr_t)lhp->lh_base) == -1) {
1762 mdb_warn("failed to read log at base %p", lhp->lh_base);
1763 mdb_free(ulw->ulw_base, ulw->ulw_size);
1764 mdb_free(ulw, sizeof (umem_log_walk_t));
1765 return (WALK_ERR);
1766 }
1767
1768 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1769 sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1770
1771 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1772 caddr_t chunk = (caddr_t)
1773 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1774
1775 for (j = 0; j < maxndx; j++) {
1776 /* LINTED align */
1777 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1778 chunk += UMEM_BUFCTL_AUDIT_SIZE;
1779 }
1780 }
1781
1782 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1783 (int(*)(const void *, const void *))bufctlcmp);
1784
1785 ulw->ulw_maxndx = k;
1786 wsp->walk_data = ulw;
1787
1788 return (WALK_NEXT);
1789 }
1790
1791 int
umem_log_walk_step(mdb_walk_state_t * wsp)1792 umem_log_walk_step(mdb_walk_state_t *wsp)
1793 {
1794 umem_log_walk_t *ulw = wsp->walk_data;
1795 umem_bufctl_audit_t *bcp;
1796
1797 if (ulw->ulw_ndx == ulw->ulw_maxndx)
1798 return (WALK_DONE);
1799
1800 bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1801
1802 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1803 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1804 }
1805
1806 void
umem_log_walk_fini(mdb_walk_state_t * wsp)1807 umem_log_walk_fini(mdb_walk_state_t *wsp)
1808 {
1809 umem_log_walk_t *ulw = wsp->walk_data;
1810
1811 mdb_free(ulw->ulw_base, ulw->ulw_size);
1812 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1813 sizeof (umem_bufctl_audit_t *));
1814 mdb_free(ulw, sizeof (umem_log_walk_t));
1815 }
1816
1817 typedef struct allocdby_bufctl {
1818 uintptr_t abb_addr;
1819 hrtime_t abb_ts;
1820 } allocdby_bufctl_t;
1821
1822 typedef struct allocdby_walk {
1823 const char *abw_walk;
1824 uintptr_t abw_thread;
1825 size_t abw_nbufs;
1826 size_t abw_size;
1827 allocdby_bufctl_t *abw_buf;
1828 size_t abw_ndx;
1829 } allocdby_walk_t;
1830
1831 int
allocdby_walk_bufctl(uintptr_t addr,const umem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1832 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1833 allocdby_walk_t *abw)
1834 {
1835 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1836 return (WALK_NEXT);
1837
1838 if (abw->abw_nbufs == abw->abw_size) {
1839 allocdby_bufctl_t *buf;
1840 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1841
1842 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1843
1844 bcopy(abw->abw_buf, buf, oldsize);
1845 mdb_free(abw->abw_buf, oldsize);
1846
1847 abw->abw_size <<= 1;
1848 abw->abw_buf = buf;
1849 }
1850
1851 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1852 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1853 abw->abw_nbufs++;
1854
1855 return (WALK_NEXT);
1856 }
1857
1858 /*ARGSUSED*/
1859 int
allocdby_walk_cache(uintptr_t addr,const umem_cache_t * c,allocdby_walk_t * abw)1860 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1861 {
1862 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1863 abw, addr) == -1) {
1864 mdb_warn("couldn't walk bufctl for cache %p", addr);
1865 return (WALK_DONE);
1866 }
1867
1868 return (WALK_NEXT);
1869 }
1870
1871 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1872 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1873 {
1874 if (lhs->abb_ts < rhs->abb_ts)
1875 return (1);
1876 if (lhs->abb_ts > rhs->abb_ts)
1877 return (-1);
1878 return (0);
1879 }
1880
1881 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1882 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1883 {
1884 allocdby_walk_t *abw;
1885
1886 if (wsp->walk_addr == 0) {
1887 mdb_warn("allocdby walk doesn't support global walks\n");
1888 return (WALK_ERR);
1889 }
1890
1891 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1892
1893 abw->abw_thread = wsp->walk_addr;
1894 abw->abw_walk = walk;
1895 abw->abw_size = 128; /* something reasonable */
1896 abw->abw_buf =
1897 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1898
1899 wsp->walk_data = abw;
1900
1901 if (mdb_walk("umem_cache",
1902 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1903 mdb_warn("couldn't walk umem_cache");
1904 allocdby_walk_fini(wsp);
1905 return (WALK_ERR);
1906 }
1907
1908 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1909 (int(*)(const void *, const void *))allocdby_cmp);
1910
1911 return (WALK_NEXT);
1912 }
1913
1914 int
allocdby_walk_init(mdb_walk_state_t * wsp)1915 allocdby_walk_init(mdb_walk_state_t *wsp)
1916 {
1917 return (allocdby_walk_init_common(wsp, "bufctl"));
1918 }
1919
1920 int
freedby_walk_init(mdb_walk_state_t * wsp)1921 freedby_walk_init(mdb_walk_state_t *wsp)
1922 {
1923 return (allocdby_walk_init_common(wsp, "freectl"));
1924 }
1925
1926 int
allocdby_walk_step(mdb_walk_state_t * wsp)1927 allocdby_walk_step(mdb_walk_state_t *wsp)
1928 {
1929 allocdby_walk_t *abw = wsp->walk_data;
1930 uintptr_t addr;
1931 umem_bufctl_audit_t *bcp;
1932 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1933
1934 if (abw->abw_ndx == abw->abw_nbufs)
1935 return (WALK_DONE);
1936
1937 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1938
1939 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1940 mdb_warn("couldn't read bufctl at %p", addr);
1941 return (WALK_DONE);
1942 }
1943
1944 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1945 }
1946
1947 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1948 allocdby_walk_fini(mdb_walk_state_t *wsp)
1949 {
1950 allocdby_walk_t *abw = wsp->walk_data;
1951
1952 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1953 mdb_free(abw, sizeof (allocdby_walk_t));
1954 }
1955
1956 /*ARGSUSED*/
1957 int
allocdby_walk(uintptr_t addr,const umem_bufctl_audit_t * bcp,void * ignored)1958 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1959 {
1960 char c[MDB_SYM_NAMLEN];
1961 GElf_Sym sym;
1962 int i;
1963
1964 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1965 for (i = 0; i < bcp->bc_depth; i++) {
1966 if (mdb_lookup_by_addr(bcp->bc_stack[i],
1967 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1968 continue;
1969 if (is_umem_sym(c, "umem_"))
1970 continue;
1971 mdb_printf("%s+0x%lx",
1972 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1973 break;
1974 }
1975 mdb_printf("\n");
1976
1977 return (WALK_NEXT);
1978 }
1979
1980 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)1981 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1982 {
1983 if (!(flags & DCMD_ADDRSPEC))
1984 return (DCMD_USAGE);
1985
1986 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1987
1988 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1989 mdb_warn("can't walk '%s' for %p", w, addr);
1990 return (DCMD_ERR);
1991 }
1992
1993 return (DCMD_OK);
1994 }
1995
1996 /*ARGSUSED*/
1997 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1998 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1999 {
2000 return (allocdby_common(addr, flags, "allocdby"));
2001 }
2002
2003 /*ARGSUSED*/
2004 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2005 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2006 {
2007 return (allocdby_common(addr, flags, "freedby"));
2008 }
2009
2010 typedef struct whatis_info {
2011 mdb_whatis_t *wi_w;
2012 const umem_cache_t *wi_cache;
2013 const vmem_t *wi_vmem;
2014 vmem_t *wi_msb_arena;
2015 size_t wi_slab_size;
2016 int wi_slab_found;
2017 uint_t wi_freemem;
2018 } whatis_info_t;
2019
2020 /* call one of our dcmd functions with "-v" and the provided address */
2021 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2022 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2023 {
2024 mdb_arg_t a;
2025 a.a_type = MDB_TYPE_STRING;
2026 a.a_un.a_str = "-v";
2027
2028 mdb_printf(":\n");
2029 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2030 }
2031
2032 static void
whatis_print_umem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2033 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2034 uintptr_t baddr)
2035 {
2036 mdb_whatis_t *w = wi->wi_w;
2037 const umem_cache_t *cp = wi->wi_cache;
2038 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2039
2040 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2041
2042 mdb_whatis_report_object(w, maddr, addr, "");
2043
2044 if (baddr != 0 && !call_printer)
2045 mdb_printf("bufctl %p ", baddr);
2046
2047 mdb_printf("%s from %s",
2048 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2049
2050 if (call_printer && baddr != 0) {
2051 whatis_call_printer(bufctl, baddr);
2052 return;
2053 }
2054 mdb_printf("\n");
2055 }
2056
2057 /*ARGSUSED*/
2058 static int
whatis_walk_umem(uintptr_t addr,void * ignored,whatis_info_t * wi)2059 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2060 {
2061 mdb_whatis_t *w = wi->wi_w;
2062
2063 uintptr_t cur;
2064 size_t size = wi->wi_cache->cache_bufsize;
2065
2066 while (mdb_whatis_match(w, addr, size, &cur))
2067 whatis_print_umem(wi, cur, addr, 0);
2068
2069 return (WHATIS_WALKRET(w));
2070 }
2071
2072 /*ARGSUSED*/
2073 static int
whatis_walk_bufctl(uintptr_t baddr,const umem_bufctl_t * bcp,whatis_info_t * wi)2074 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2075 {
2076 mdb_whatis_t *w = wi->wi_w;
2077
2078 uintptr_t cur;
2079 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2080 size_t size = wi->wi_cache->cache_bufsize;
2081
2082 while (mdb_whatis_match(w, addr, size, &cur))
2083 whatis_print_umem(wi, cur, addr, baddr);
2084
2085 return (WHATIS_WALKRET(w));
2086 }
2087
2088
2089 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2090 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2091 {
2092 mdb_whatis_t *w = wi->wi_w;
2093
2094 size_t size = vs->vs_end - vs->vs_start;
2095 uintptr_t cur;
2096
2097 /* We're not interested in anything but alloc and free segments */
2098 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2099 return (WALK_NEXT);
2100
2101 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2102 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2103
2104 /*
2105 * If we're not printing it seperately, provide the vmem_seg
2106 * pointer if it has a stack trace.
2107 */
2108 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2109 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2110 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2111 mdb_printf("vmem_seg %p ", addr);
2112 }
2113
2114 mdb_printf("%s from %s vmem arena",
2115 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2116 wi->wi_vmem->vm_name);
2117
2118 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2119 whatis_call_printer(vmem_seg, addr);
2120 else
2121 mdb_printf("\n");
2122 }
2123
2124 return (WHATIS_WALKRET(w));
2125 }
2126
2127 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2128 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2129 {
2130 mdb_whatis_t *w = wi->wi_w;
2131 const char *nm = vmem->vm_name;
2132 wi->wi_vmem = vmem;
2133
2134 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2135 mdb_printf("Searching vmem arena %s...\n", nm);
2136
2137 if (mdb_pwalk("vmem_seg",
2138 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2139 mdb_warn("can't walk vmem seg for %p", addr);
2140 return (WALK_NEXT);
2141 }
2142
2143 return (WHATIS_WALKRET(w));
2144 }
2145
2146 /*ARGSUSED*/
2147 static int
whatis_walk_slab(uintptr_t saddr,const umem_slab_t * sp,whatis_info_t * wi)2148 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2149 {
2150 mdb_whatis_t *w = wi->wi_w;
2151
2152 /* It must overlap with the slab data, or it's not interesting */
2153 if (mdb_whatis_overlaps(w,
2154 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2155 wi->wi_slab_found++;
2156 return (WALK_DONE);
2157 }
2158 return (WALK_NEXT);
2159 }
2160
2161 static int
whatis_walk_cache(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2162 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2163 {
2164 mdb_whatis_t *w = wi->wi_w;
2165 char *walk, *freewalk;
2166 mdb_walk_cb_t func;
2167 int do_bufctl;
2168
2169 /* Override the '-b' flag as necessary */
2170 if (!(c->cache_flags & UMF_HASH))
2171 do_bufctl = FALSE; /* no bufctls to walk */
2172 else if (c->cache_flags & UMF_AUDIT)
2173 do_bufctl = TRUE; /* we always want debugging info */
2174 else
2175 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2176
2177 if (do_bufctl) {
2178 walk = "bufctl";
2179 freewalk = "freectl";
2180 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2181 } else {
2182 walk = "umem";
2183 freewalk = "freemem";
2184 func = (mdb_walk_cb_t)whatis_walk_umem;
2185 }
2186
2187 wi->wi_cache = c;
2188
2189 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2190 mdb_printf("Searching %s...\n", c->cache_name);
2191
2192 /*
2193 * If more then two buffers live on each slab, figure out if we're
2194 * interested in anything in any slab before doing the more expensive
2195 * umem/freemem (bufctl/freectl) walkers.
2196 */
2197 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2198 if (!(c->cache_flags & UMF_HASH))
2199 wi->wi_slab_size -= sizeof (umem_slab_t);
2200
2201 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2202 wi->wi_slab_found = 0;
2203 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2204 addr) == -1) {
2205 mdb_warn("can't find umem_slab walker");
2206 return (WALK_DONE);
2207 }
2208 if (wi->wi_slab_found == 0)
2209 return (WALK_NEXT);
2210 }
2211
2212 wi->wi_freemem = FALSE;
2213 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2214 mdb_warn("can't find %s walker", walk);
2215 return (WALK_DONE);
2216 }
2217
2218 if (mdb_whatis_done(w))
2219 return (WALK_DONE);
2220
2221 /*
2222 * We have searched for allocated memory; now search for freed memory.
2223 */
2224 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2225 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2226
2227 wi->wi_freemem = TRUE;
2228
2229 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2230 mdb_warn("can't find %s walker", freewalk);
2231 return (WALK_DONE);
2232 }
2233
2234 return (WHATIS_WALKRET(w));
2235 }
2236
2237 static int
whatis_walk_touch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2238 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2239 {
2240 if (c->cache_arena == wi->wi_msb_arena ||
2241 (c->cache_cflags & UMC_NOTOUCH))
2242 return (WALK_NEXT);
2243
2244 return (whatis_walk_cache(addr, c, wi));
2245 }
2246
2247 static int
whatis_walk_metadata(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2248 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2249 {
2250 if (c->cache_arena != wi->wi_msb_arena)
2251 return (WALK_NEXT);
2252
2253 return (whatis_walk_cache(addr, c, wi));
2254 }
2255
2256 static int
whatis_walk_notouch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2257 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2258 {
2259 if (c->cache_arena == wi->wi_msb_arena ||
2260 !(c->cache_cflags & UMC_NOTOUCH))
2261 return (WALK_NEXT);
2262
2263 return (whatis_walk_cache(addr, c, wi));
2264 }
2265
2266 /*ARGSUSED*/
2267 static int
whatis_run_umem(mdb_whatis_t * w,void * ignored)2268 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2269 {
2270 whatis_info_t wi;
2271
2272 bzero(&wi, sizeof (wi));
2273 wi.wi_w = w;
2274
2275 /* umem's metadata is allocated from the umem_internal_arena */
2276 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2277 mdb_warn("unable to readvar \"umem_internal_arena\"");
2278
2279 /*
2280 * We process umem caches in the following order:
2281 *
2282 * non-UMC_NOTOUCH, non-metadata (typically the most interesting)
2283 * metadata (can be huge with UMF_AUDIT)
2284 * UMC_NOTOUCH, non-metadata (see umem_walk_all())
2285 */
2286 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2287 &wi) == -1 ||
2288 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2289 &wi) == -1 ||
2290 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2291 &wi) == -1) {
2292 mdb_warn("couldn't find umem_cache walker");
2293 return (1);
2294 }
2295 return (0);
2296 }
2297
2298 /*ARGSUSED*/
2299 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2300 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2301 {
2302 whatis_info_t wi;
2303
2304 bzero(&wi, sizeof (wi));
2305 wi.wi_w = w;
2306
2307 if (mdb_walk("vmem_postfix",
2308 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2309 mdb_warn("couldn't find vmem_postfix walker");
2310 return (1);
2311 }
2312 return (0);
2313 }
2314
2315 int
umem_init(void)2316 umem_init(void)
2317 {
2318 mdb_walker_t w = {
2319 "umem_cache", "walk list of umem caches", umem_cache_walk_init,
2320 umem_cache_walk_step, umem_cache_walk_fini
2321 };
2322
2323 if (mdb_add_walker(&w) == -1) {
2324 mdb_warn("failed to add umem_cache walker");
2325 return (-1);
2326 }
2327
2328 if (umem_update_variables() == -1)
2329 return (-1);
2330
2331 /* install a callback so that our variables are always up-to-date */
2332 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2333 umem_statechange_cb(NULL);
2334
2335 /*
2336 * Register our ::whatis callbacks.
2337 */
2338 mdb_whatis_register("umem", whatis_run_umem, NULL,
2339 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2340 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2341 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2342
2343 return (0);
2344 }
2345
2346 typedef struct umem_log_cpu {
2347 uintptr_t umc_low;
2348 uintptr_t umc_high;
2349 } umem_log_cpu_t;
2350
2351 int
umem_log_walk(uintptr_t addr,const umem_bufctl_audit_t * b,umem_log_cpu_t * umc)2352 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2353 {
2354 int i;
2355
2356 for (i = 0; i < umem_max_ncpus; i++) {
2357 if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2358 break;
2359 }
2360
2361 if (i == umem_max_ncpus)
2362 mdb_printf(" ");
2363 else
2364 mdb_printf("%3d", i);
2365
2366 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2367 b->bc_timestamp, b->bc_thread);
2368
2369 return (WALK_NEXT);
2370 }
2371
2372 /*ARGSUSED*/
2373 int
umem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2374 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2375 {
2376 umem_log_header_t lh;
2377 umem_cpu_log_header_t clh;
2378 uintptr_t lhp, clhp;
2379 umem_log_cpu_t *umc;
2380 int i;
2381
2382 if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2383 mdb_warn("failed to read 'umem_transaction_log'");
2384 return (DCMD_ERR);
2385 }
2386
2387 if (lhp == 0) {
2388 mdb_warn("no umem transaction log\n");
2389 return (DCMD_ERR);
2390 }
2391
2392 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2393 mdb_warn("failed to read log header at %p", lhp);
2394 return (DCMD_ERR);
2395 }
2396
2397 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2398
2399 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2400 UM_SLEEP | UM_GC);
2401
2402 for (i = 0; i < umem_max_ncpus; i++) {
2403 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2404 mdb_warn("cannot read cpu %d's log header at %p",
2405 i, clhp);
2406 return (DCMD_ERR);
2407 }
2408
2409 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2410 (uintptr_t)lh.lh_base;
2411 umc[i].umc_high = (uintptr_t)clh.clh_current;
2412
2413 clhp += sizeof (umem_cpu_log_header_t);
2414 }
2415
2416 if (DCMD_HDRSPEC(flags)) {
2417 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2418 "BUFADDR", "TIMESTAMP", "THREAD");
2419 }
2420
2421 /*
2422 * If we have been passed an address, we'll just print out that
2423 * log entry.
2424 */
2425 if (flags & DCMD_ADDRSPEC) {
2426 umem_bufctl_audit_t *bp;
2427 UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2428
2429 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2430 mdb_warn("failed to read bufctl at %p", addr);
2431 return (DCMD_ERR);
2432 }
2433
2434 (void) umem_log_walk(addr, bp, umc);
2435
2436 return (DCMD_OK);
2437 }
2438
2439 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2440 mdb_warn("can't find umem log walker");
2441 return (DCMD_ERR);
2442 }
2443
2444 return (DCMD_OK);
2445 }
2446
2447 typedef struct bufctl_history_cb {
2448 int bhc_flags;
2449 int bhc_argc;
2450 const mdb_arg_t *bhc_argv;
2451 int bhc_ret;
2452 } bufctl_history_cb_t;
2453
2454 /*ARGSUSED*/
2455 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2456 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2457 {
2458 bufctl_history_cb_t *bhc = arg;
2459
2460 bhc->bhc_ret =
2461 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2462
2463 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2464
2465 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2466 }
2467
2468 void
bufctl_help(void)2469 bufctl_help(void)
2470 {
2471 mdb_printf("%s\n",
2472 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2473 mdb_dec_indent(2);
2474 mdb_printf("%<b>OPTIONS%</b>\n");
2475 mdb_inc_indent(2);
2476 mdb_printf("%s",
2477 " -v Display the full content of the bufctl, including its stack trace\n"
2478 " -h retrieve the bufctl's transaction history, if available\n"
2479 " -a addr\n"
2480 " filter out bufctls not involving the buffer at addr\n"
2481 " -c caller\n"
2482 " filter out bufctls without the function/PC in their stack trace\n"
2483 " -e earliest\n"
2484 " filter out bufctls timestamped before earliest\n"
2485 " -l latest\n"
2486 " filter out bufctls timestamped after latest\n"
2487 " -t thread\n"
2488 " filter out bufctls not involving thread\n");
2489 }
2490
2491 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2492 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2493 {
2494 uint_t verbose = FALSE;
2495 uint_t history = FALSE;
2496 uint_t in_history = FALSE;
2497 uintptr_t caller = 0, thread = 0;
2498 uintptr_t laddr, haddr, baddr = 0;
2499 hrtime_t earliest = 0, latest = 0;
2500 int i, depth;
2501 char c[MDB_SYM_NAMLEN];
2502 GElf_Sym sym;
2503 umem_bufctl_audit_t *bcp;
2504 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2505
2506 if (mdb_getopts(argc, argv,
2507 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2508 'h', MDB_OPT_SETBITS, TRUE, &history,
2509 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2510 'c', MDB_OPT_UINTPTR, &caller,
2511 't', MDB_OPT_UINTPTR, &thread,
2512 'e', MDB_OPT_UINT64, &earliest,
2513 'l', MDB_OPT_UINT64, &latest,
2514 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2515 return (DCMD_USAGE);
2516
2517 if (!(flags & DCMD_ADDRSPEC))
2518 return (DCMD_USAGE);
2519
2520 if (in_history && !history)
2521 return (DCMD_USAGE);
2522
2523 if (history && !in_history) {
2524 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2525 UM_SLEEP | UM_GC);
2526 bufctl_history_cb_t bhc;
2527
2528 nargv[0].a_type = MDB_TYPE_STRING;
2529 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2530
2531 for (i = 0; i < argc; i++)
2532 nargv[i + 1] = argv[i];
2533
2534 /*
2535 * When in history mode, we treat each element as if it
2536 * were in a seperate loop, so that the headers group
2537 * bufctls with similar histories.
2538 */
2539 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2540 bhc.bhc_argc = argc + 1;
2541 bhc.bhc_argv = nargv;
2542 bhc.bhc_ret = DCMD_OK;
2543
2544 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2545 addr) == -1) {
2546 mdb_warn("unable to walk bufctl_history");
2547 return (DCMD_ERR);
2548 }
2549
2550 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2551 mdb_printf("\n");
2552
2553 return (bhc.bhc_ret);
2554 }
2555
2556 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2557 if (verbose) {
2558 mdb_printf("%16s %16s %16s %16s\n"
2559 "%<u>%16s %16s %16s %16s%</u>\n",
2560 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2561 "", "CACHE", "LASTLOG", "CONTENTS");
2562 } else {
2563 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2564 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2565 }
2566 }
2567
2568 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2569 mdb_warn("couldn't read bufctl at %p", addr);
2570 return (DCMD_ERR);
2571 }
2572
2573 /*
2574 * Guard against bogus bc_depth in case the bufctl is corrupt or
2575 * the address does not really refer to a bufctl.
2576 */
2577 depth = MIN(bcp->bc_depth, umem_stack_depth);
2578
2579 if (caller != 0) {
2580 laddr = caller;
2581 haddr = caller + sizeof (caller);
2582
2583 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2584 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2585 /*
2586 * We were provided an exact symbol value; any
2587 * address in the function is valid.
2588 */
2589 laddr = (uintptr_t)sym.st_value;
2590 haddr = (uintptr_t)sym.st_value + sym.st_size;
2591 }
2592
2593 for (i = 0; i < depth; i++)
2594 if (bcp->bc_stack[i] >= laddr &&
2595 bcp->bc_stack[i] < haddr)
2596 break;
2597
2598 if (i == depth)
2599 return (DCMD_OK);
2600 }
2601
2602 if (thread != 0 && (uintptr_t)bcp->bc_thread != thread)
2603 return (DCMD_OK);
2604
2605 if (earliest != 0 && bcp->bc_timestamp < earliest)
2606 return (DCMD_OK);
2607
2608 if (latest != 0 && bcp->bc_timestamp > latest)
2609 return (DCMD_OK);
2610
2611 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2612 return (DCMD_OK);
2613
2614 if (flags & DCMD_PIPE_OUT) {
2615 mdb_printf("%#r\n", addr);
2616 return (DCMD_OK);
2617 }
2618
2619 if (verbose) {
2620 mdb_printf(
2621 "%<b>%16p%</b> %16p %16llx %16d\n"
2622 "%16s %16p %16p %16p\n",
2623 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2624 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2625
2626 mdb_inc_indent(17);
2627 for (i = 0; i < depth; i++)
2628 mdb_printf("%a\n", bcp->bc_stack[i]);
2629 mdb_dec_indent(17);
2630 mdb_printf("\n");
2631 } else {
2632 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2633 bcp->bc_timestamp, bcp->bc_thread);
2634
2635 for (i = 0; i < depth; i++) {
2636 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2637 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2638 continue;
2639 if (is_umem_sym(c, "umem_"))
2640 continue;
2641 mdb_printf(" %a\n", bcp->bc_stack[i]);
2642 break;
2643 }
2644
2645 if (i >= depth)
2646 mdb_printf("\n");
2647 }
2648
2649 return (DCMD_OK);
2650 }
2651
2652 /*ARGSUSED*/
2653 int
bufctl_audit(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2654 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2655 {
2656 mdb_arg_t a;
2657
2658 if (!(flags & DCMD_ADDRSPEC))
2659 return (DCMD_USAGE);
2660
2661 if (argc != 0)
2662 return (DCMD_USAGE);
2663
2664 a.a_type = MDB_TYPE_STRING;
2665 a.a_un.a_str = "-v";
2666
2667 return (bufctl(addr, flags, 1, &a));
2668 }
2669
2670 typedef struct umem_verify {
2671 uint64_t *umv_buf; /* buffer to read cache contents into */
2672 size_t umv_size; /* number of bytes in umv_buf */
2673 int umv_corruption; /* > 0 if corruption found. */
2674 int umv_besilent; /* report actual corruption sites */
2675 struct umem_cache umv_cache; /* the cache we're operating on */
2676 } umem_verify_t;
2677
2678 /*
2679 * verify_pattern()
2680 * verify that buf is filled with the pattern pat.
2681 */
2682 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)2683 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2684 {
2685 /*LINTED*/
2686 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2687 uint64_t *buf;
2688
2689 for (buf = buf_arg; buf < bufend; buf++)
2690 if (*buf != pat)
2691 return ((uintptr_t)buf - (uintptr_t)buf_arg);
2692 return (-1);
2693 }
2694
2695 /*
2696 * verify_buftag()
2697 * verify that btp->bt_bxstat == (bcp ^ pat)
2698 */
2699 static int
verify_buftag(umem_buftag_t * btp,uintptr_t pat)2700 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2701 {
2702 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2703 }
2704
2705 /*
2706 * verify_free()
2707 * verify the integrity of a free block of memory by checking
2708 * that it is filled with 0xdeadbeef and that its buftag is sane.
2709 */
2710 /*ARGSUSED1*/
2711 static int
verify_free(uintptr_t addr,const void * data,void * private)2712 verify_free(uintptr_t addr, const void *data, void *private)
2713 {
2714 umem_verify_t *umv = (umem_verify_t *)private;
2715 uint64_t *buf = umv->umv_buf; /* buf to validate */
2716 int64_t corrupt; /* corruption offset */
2717 umem_buftag_t *buftagp; /* ptr to buftag */
2718 umem_cache_t *cp = &umv->umv_cache;
2719 int besilent = umv->umv_besilent;
2720
2721 /*LINTED*/
2722 buftagp = UMEM_BUFTAG(cp, buf);
2723
2724 /*
2725 * Read the buffer to check.
2726 */
2727 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2728 if (!besilent)
2729 mdb_warn("couldn't read %p", addr);
2730 return (WALK_NEXT);
2731 }
2732
2733 if ((corrupt = verify_pattern(buf, cp->cache_verify,
2734 UMEM_FREE_PATTERN)) >= 0) {
2735 if (!besilent)
2736 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2737 addr, (uintptr_t)addr + corrupt);
2738 goto corrupt;
2739 }
2740
2741 if ((cp->cache_flags & UMF_HASH) &&
2742 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2743 if (!besilent)
2744 mdb_printf("buffer %p (free) seems to "
2745 "have a corrupt redzone pattern\n", addr);
2746 goto corrupt;
2747 }
2748
2749 /*
2750 * confirm bufctl pointer integrity.
2751 */
2752 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2753 if (!besilent)
2754 mdb_printf("buffer %p (free) has a corrupt "
2755 "buftag\n", addr);
2756 goto corrupt;
2757 }
2758
2759 return (WALK_NEXT);
2760 corrupt:
2761 umv->umv_corruption++;
2762 return (WALK_NEXT);
2763 }
2764
2765 /*
2766 * verify_alloc()
2767 * Verify that the buftag of an allocated buffer makes sense with respect
2768 * to the buffer.
2769 */
2770 /*ARGSUSED1*/
2771 static int
verify_alloc(uintptr_t addr,const void * data,void * private)2772 verify_alloc(uintptr_t addr, const void *data, void *private)
2773 {
2774 umem_verify_t *umv = (umem_verify_t *)private;
2775 umem_cache_t *cp = &umv->umv_cache;
2776 uint64_t *buf = umv->umv_buf; /* buf to validate */
2777 /*LINTED*/
2778 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2779 uint32_t *ip = (uint32_t *)buftagp;
2780 uint8_t *bp = (uint8_t *)buf;
2781 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
2782 int besilent = umv->umv_besilent;
2783
2784 /*
2785 * Read the buffer to check.
2786 */
2787 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2788 if (!besilent)
2789 mdb_warn("couldn't read %p", addr);
2790 return (WALK_NEXT);
2791 }
2792
2793 /*
2794 * There are two cases to handle:
2795 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2796 * 0xfeedfacefeedface at the end of it
2797 * 2. If the buf was alloc'd using umem_alloc, it will have
2798 * 0xbb just past the end of the region in use. At the buftag,
2799 * it will have 0xfeedface (or, if the whole buffer is in use,
2800 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2801 * endianness), followed by 32 bits containing the offset of the
2802 * 0xbb byte in the buffer.
2803 *
2804 * Finally, the two 32-bit words that comprise the second half of the
2805 * buftag should xor to UMEM_BUFTAG_ALLOC
2806 */
2807
2808 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2809 looks_ok = 1;
2810 else if (!UMEM_SIZE_VALID(ip[1]))
2811 size_ok = 0;
2812 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2813 looks_ok = 1;
2814 else
2815 size_ok = 0;
2816
2817 if (!size_ok) {
2818 if (!besilent)
2819 mdb_printf("buffer %p (allocated) has a corrupt "
2820 "redzone size encoding\n", addr);
2821 goto corrupt;
2822 }
2823
2824 if (!looks_ok) {
2825 if (!besilent)
2826 mdb_printf("buffer %p (allocated) has a corrupt "
2827 "redzone signature\n", addr);
2828 goto corrupt;
2829 }
2830
2831 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2832 if (!besilent)
2833 mdb_printf("buffer %p (allocated) has a "
2834 "corrupt buftag\n", addr);
2835 goto corrupt;
2836 }
2837
2838 return (WALK_NEXT);
2839 corrupt:
2840 umv->umv_corruption++;
2841 return (WALK_NEXT);
2842 }
2843
2844 /*ARGSUSED2*/
2845 int
umem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2846 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2847 {
2848 if (flags & DCMD_ADDRSPEC) {
2849 int check_alloc = 0, check_free = 0;
2850 umem_verify_t umv;
2851
2852 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2853 addr) == -1) {
2854 mdb_warn("couldn't read umem_cache %p", addr);
2855 return (DCMD_ERR);
2856 }
2857
2858 umv.umv_size = umv.umv_cache.cache_buftag +
2859 sizeof (umem_buftag_t);
2860 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2861 umv.umv_corruption = 0;
2862
2863 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2864 check_alloc = 1;
2865 if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2866 check_free = 1;
2867 } else {
2868 if (!(flags & DCMD_LOOP)) {
2869 mdb_warn("cache %p (%s) does not have "
2870 "redzone checking enabled\n", addr,
2871 umv.umv_cache.cache_name);
2872 }
2873 return (DCMD_ERR);
2874 }
2875
2876 if (flags & DCMD_LOOP) {
2877 /*
2878 * table mode, don't print out every corrupt buffer
2879 */
2880 umv.umv_besilent = 1;
2881 } else {
2882 mdb_printf("Summary for cache '%s'\n",
2883 umv.umv_cache.cache_name);
2884 mdb_inc_indent(2);
2885 umv.umv_besilent = 0;
2886 }
2887
2888 if (check_alloc)
2889 (void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2890 if (check_free)
2891 (void) mdb_pwalk("freemem", verify_free, &umv, addr);
2892
2893 if (flags & DCMD_LOOP) {
2894 if (umv.umv_corruption == 0) {
2895 mdb_printf("%-*s %?p clean\n",
2896 UMEM_CACHE_NAMELEN,
2897 umv.umv_cache.cache_name, addr);
2898 } else {
2899 char *s = ""; /* optional s in "buffer[s]" */
2900 if (umv.umv_corruption > 1)
2901 s = "s";
2902
2903 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2904 UMEM_CACHE_NAMELEN,
2905 umv.umv_cache.cache_name, addr,
2906 umv.umv_corruption, s);
2907 }
2908 } else {
2909 /*
2910 * This is the more verbose mode, when the user has
2911 * type addr::umem_verify. If the cache was clean,
2912 * nothing will have yet been printed. So say something.
2913 */
2914 if (umv.umv_corruption == 0)
2915 mdb_printf("clean\n");
2916
2917 mdb_dec_indent(2);
2918 }
2919 } else {
2920 /*
2921 * If the user didn't specify a cache to verify, we'll walk all
2922 * umem_cache's, specifying ourself as a callback for each...
2923 * this is the equivalent of '::walk umem_cache .::umem_verify'
2924 */
2925 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2926 "Cache Name", "Addr", "Cache Integrity");
2927 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2928 }
2929
2930 return (DCMD_OK);
2931 }
2932
2933 typedef struct vmem_node {
2934 struct vmem_node *vn_next;
2935 struct vmem_node *vn_parent;
2936 struct vmem_node *vn_sibling;
2937 struct vmem_node *vn_children;
2938 uintptr_t vn_addr;
2939 int vn_marked;
2940 vmem_t vn_vmem;
2941 } vmem_node_t;
2942
2943 typedef struct vmem_walk {
2944 vmem_node_t *vw_root;
2945 vmem_node_t *vw_current;
2946 } vmem_walk_t;
2947
2948 int
vmem_walk_init(mdb_walk_state_t * wsp)2949 vmem_walk_init(mdb_walk_state_t *wsp)
2950 {
2951 uintptr_t vaddr, paddr;
2952 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2953 vmem_walk_t *vw;
2954
2955 if (umem_readvar(&vaddr, "vmem_list") == -1) {
2956 mdb_warn("couldn't read 'vmem_list'");
2957 return (WALK_ERR);
2958 }
2959
2960 while (vaddr != 0) {
2961 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2962 vp->vn_addr = vaddr;
2963 vp->vn_next = head;
2964 head = vp;
2965
2966 if (vaddr == wsp->walk_addr)
2967 current = vp;
2968
2969 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2970 mdb_warn("couldn't read vmem_t at %p", vaddr);
2971 goto err;
2972 }
2973
2974 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2975 }
2976
2977 for (vp = head; vp != NULL; vp = vp->vn_next) {
2978
2979 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) {
2980 vp->vn_sibling = root;
2981 root = vp;
2982 continue;
2983 }
2984
2985 for (parent = head; parent != NULL; parent = parent->vn_next) {
2986 if (parent->vn_addr != paddr)
2987 continue;
2988 vp->vn_sibling = parent->vn_children;
2989 parent->vn_children = vp;
2990 vp->vn_parent = parent;
2991 break;
2992 }
2993
2994 if (parent == NULL) {
2995 mdb_warn("couldn't find %p's parent (%p)\n",
2996 vp->vn_addr, paddr);
2997 goto err;
2998 }
2999 }
3000
3001 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3002 vw->vw_root = root;
3003
3004 if (current != NULL)
3005 vw->vw_current = current;
3006 else
3007 vw->vw_current = root;
3008
3009 wsp->walk_data = vw;
3010 return (WALK_NEXT);
3011 err:
3012 for (vp = head; head != NULL; vp = head) {
3013 head = vp->vn_next;
3014 mdb_free(vp, sizeof (vmem_node_t));
3015 }
3016
3017 return (WALK_ERR);
3018 }
3019
3020 int
vmem_walk_step(mdb_walk_state_t * wsp)3021 vmem_walk_step(mdb_walk_state_t *wsp)
3022 {
3023 vmem_walk_t *vw = wsp->walk_data;
3024 vmem_node_t *vp;
3025 int rval;
3026
3027 if ((vp = vw->vw_current) == NULL)
3028 return (WALK_DONE);
3029
3030 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3031
3032 if (vp->vn_children != NULL) {
3033 vw->vw_current = vp->vn_children;
3034 return (rval);
3035 }
3036
3037 do {
3038 vw->vw_current = vp->vn_sibling;
3039 vp = vp->vn_parent;
3040 } while (vw->vw_current == NULL && vp != NULL);
3041
3042 return (rval);
3043 }
3044
3045 /*
3046 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3047 * children are visited before their parent. We perform the postfix walk
3048 * iteratively (rather than recursively) to allow mdb to regain control
3049 * after each callback.
3050 */
3051 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3052 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3053 {
3054 vmem_walk_t *vw = wsp->walk_data;
3055 vmem_node_t *vp = vw->vw_current;
3056 int rval;
3057
3058 /*
3059 * If this node is marked, then we know that we have already visited
3060 * all of its children. If the node has any siblings, they need to
3061 * be visited next; otherwise, we need to visit the parent. Note
3062 * that vp->vn_marked will only be zero on the first invocation of
3063 * the step function.
3064 */
3065 if (vp->vn_marked) {
3066 if (vp->vn_sibling != NULL)
3067 vp = vp->vn_sibling;
3068 else if (vp->vn_parent != NULL)
3069 vp = vp->vn_parent;
3070 else {
3071 /*
3072 * We have neither a parent, nor a sibling, and we
3073 * have already been visited; we're done.
3074 */
3075 return (WALK_DONE);
3076 }
3077 }
3078
3079 /*
3080 * Before we visit this node, visit its children.
3081 */
3082 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3083 vp = vp->vn_children;
3084
3085 vp->vn_marked = 1;
3086 vw->vw_current = vp;
3087 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3088
3089 return (rval);
3090 }
3091
3092 void
vmem_walk_fini(mdb_walk_state_t * wsp)3093 vmem_walk_fini(mdb_walk_state_t *wsp)
3094 {
3095 vmem_walk_t *vw = wsp->walk_data;
3096 vmem_node_t *root = vw->vw_root;
3097 int done;
3098
3099 if (root == NULL)
3100 return;
3101
3102 if ((vw->vw_root = root->vn_children) != NULL)
3103 vmem_walk_fini(wsp);
3104
3105 vw->vw_root = root->vn_sibling;
3106 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3107 mdb_free(root, sizeof (vmem_node_t));
3108
3109 if (done) {
3110 mdb_free(vw, sizeof (vmem_walk_t));
3111 } else {
3112 vmem_walk_fini(wsp);
3113 }
3114 }
3115
3116 typedef struct vmem_seg_walk {
3117 uint8_t vsw_type;
3118 uintptr_t vsw_start;
3119 uintptr_t vsw_current;
3120 } vmem_seg_walk_t;
3121
3122 /*ARGSUSED*/
3123 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3124 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3125 {
3126 vmem_seg_walk_t *vsw;
3127
3128 if (wsp->walk_addr == 0) {
3129 mdb_warn("vmem_%s does not support global walks\n", name);
3130 return (WALK_ERR);
3131 }
3132
3133 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3134
3135 vsw->vsw_type = type;
3136 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3137 vsw->vsw_current = vsw->vsw_start;
3138
3139 return (WALK_NEXT);
3140 }
3141
3142 /*
3143 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3144 */
3145 #define VMEM_NONE 0
3146
3147 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3148 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3149 {
3150 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3151 }
3152
3153 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3154 vmem_free_walk_init(mdb_walk_state_t *wsp)
3155 {
3156 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3157 }
3158
3159 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3160 vmem_span_walk_init(mdb_walk_state_t *wsp)
3161 {
3162 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3163 }
3164
3165 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3166 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3167 {
3168 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3169 }
3170
3171 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3172 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3173 {
3174 vmem_seg_t seg;
3175 vmem_seg_walk_t *vsw = wsp->walk_data;
3176 uintptr_t addr = vsw->vsw_current;
3177 static size_t seg_size = 0;
3178 int rval;
3179
3180 if (!seg_size) {
3181 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3182 mdb_warn("failed to read 'vmem_seg_size'");
3183 seg_size = sizeof (vmem_seg_t);
3184 }
3185 }
3186
3187 if (seg_size < sizeof (seg))
3188 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3189
3190 if (mdb_vread(&seg, seg_size, addr) == -1) {
3191 mdb_warn("couldn't read vmem_seg at %p", addr);
3192 return (WALK_ERR);
3193 }
3194
3195 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3196 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3197 rval = WALK_NEXT;
3198 } else {
3199 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3200 }
3201
3202 if (vsw->vsw_current == vsw->vsw_start)
3203 return (WALK_DONE);
3204
3205 return (rval);
3206 }
3207
3208 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3209 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3210 {
3211 vmem_seg_walk_t *vsw = wsp->walk_data;
3212
3213 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3214 }
3215
3216 #define VMEM_NAMEWIDTH 22
3217
3218 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3219 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3220 {
3221 vmem_t v, parent;
3222 uintptr_t paddr;
3223 int ident = 0;
3224 char c[VMEM_NAMEWIDTH];
3225
3226 if (!(flags & DCMD_ADDRSPEC)) {
3227 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3228 mdb_warn("can't walk vmem");
3229 return (DCMD_ERR);
3230 }
3231 return (DCMD_OK);
3232 }
3233
3234 if (DCMD_HDRSPEC(flags))
3235 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3236 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3237 "TOTAL", "SUCCEED", "FAIL");
3238
3239 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3240 mdb_warn("couldn't read vmem at %p", addr);
3241 return (DCMD_ERR);
3242 }
3243
3244 for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) {
3245 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3246 mdb_warn("couldn't trace %p's ancestry", addr);
3247 ident = 0;
3248 break;
3249 }
3250 paddr = (uintptr_t)parent.vm_source;
3251 }
3252
3253 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3254
3255 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3256 addr, VMEM_NAMEWIDTH, c,
3257 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3258 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3259
3260 return (DCMD_OK);
3261 }
3262
3263 void
vmem_seg_help(void)3264 vmem_seg_help(void)
3265 {
3266 mdb_printf("%s\n",
3267 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3268 "\n"
3269 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3270 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3271 "information.\n");
3272 mdb_dec_indent(2);
3273 mdb_printf("%<b>OPTIONS%</b>\n");
3274 mdb_inc_indent(2);
3275 mdb_printf("%s",
3276 " -v Display the full content of the vmem_seg, including its stack trace\n"
3277 " -s report the size of the segment, instead of the end address\n"
3278 " -c caller\n"
3279 " filter out segments without the function/PC in their stack trace\n"
3280 " -e earliest\n"
3281 " filter out segments timestamped before earliest\n"
3282 " -l latest\n"
3283 " filter out segments timestamped after latest\n"
3284 " -m minsize\n"
3285 " filer out segments smaller than minsize\n"
3286 " -M maxsize\n"
3287 " filer out segments larger than maxsize\n"
3288 " -t thread\n"
3289 " filter out segments not involving thread\n"
3290 " -T type\n"
3291 " filter out segments not of type 'type'\n"
3292 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3293 }
3294
3295
3296 /*ARGSUSED*/
3297 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3298 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3299 {
3300 vmem_seg_t vs;
3301 uintptr_t *stk = vs.vs_stack;
3302 uintptr_t sz;
3303 uint8_t t;
3304 const char *type = NULL;
3305 GElf_Sym sym;
3306 char c[MDB_SYM_NAMLEN];
3307 int no_debug;
3308 int i;
3309 int depth;
3310 uintptr_t laddr, haddr;
3311
3312 uintptr_t caller = 0, thread = 0;
3313 uintptr_t minsize = 0, maxsize = 0;
3314
3315 hrtime_t earliest = 0, latest = 0;
3316
3317 uint_t size = 0;
3318 uint_t verbose = 0;
3319
3320 if (!(flags & DCMD_ADDRSPEC))
3321 return (DCMD_USAGE);
3322
3323 if (mdb_getopts(argc, argv,
3324 'c', MDB_OPT_UINTPTR, &caller,
3325 'e', MDB_OPT_UINT64, &earliest,
3326 'l', MDB_OPT_UINT64, &latest,
3327 's', MDB_OPT_SETBITS, TRUE, &size,
3328 'm', MDB_OPT_UINTPTR, &minsize,
3329 'M', MDB_OPT_UINTPTR, &maxsize,
3330 't', MDB_OPT_UINTPTR, &thread,
3331 'T', MDB_OPT_STR, &type,
3332 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3333 NULL) != argc)
3334 return (DCMD_USAGE);
3335
3336 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3337 if (verbose) {
3338 mdb_printf("%16s %4s %16s %16s %16s\n"
3339 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3340 "ADDR", "TYPE", "START", "END", "SIZE",
3341 "", "", "THREAD", "TIMESTAMP", "");
3342 } else {
3343 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3344 "START", size? "SIZE" : "END", "WHO");
3345 }
3346 }
3347
3348 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3349 mdb_warn("couldn't read vmem_seg at %p", addr);
3350 return (DCMD_ERR);
3351 }
3352
3353 if (type != NULL) {
3354 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3355 t = VMEM_ALLOC;
3356 else if (strcmp(type, "FREE") == 0)
3357 t = VMEM_FREE;
3358 else if (strcmp(type, "SPAN") == 0)
3359 t = VMEM_SPAN;
3360 else if (strcmp(type, "ROTR") == 0 ||
3361 strcmp(type, "ROTOR") == 0)
3362 t = VMEM_ROTOR;
3363 else if (strcmp(type, "WLKR") == 0 ||
3364 strcmp(type, "WALKER") == 0)
3365 t = VMEM_WALKER;
3366 else {
3367 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3368 type);
3369 return (DCMD_ERR);
3370 }
3371
3372 if (vs.vs_type != t)
3373 return (DCMD_OK);
3374 }
3375
3376 sz = vs.vs_end - vs.vs_start;
3377
3378 if (minsize != 0 && sz < minsize)
3379 return (DCMD_OK);
3380
3381 if (maxsize != 0 && sz > maxsize)
3382 return (DCMD_OK);
3383
3384 t = vs.vs_type;
3385 depth = vs.vs_depth;
3386
3387 /*
3388 * debug info, when present, is only accurate for VMEM_ALLOC segments
3389 */
3390 no_debug = (t != VMEM_ALLOC) ||
3391 (depth == 0 || depth > VMEM_STACK_DEPTH);
3392
3393 if (no_debug) {
3394 if (caller != 0 || thread != 0 || earliest != 0 || latest != 0)
3395 return (DCMD_OK); /* not enough info */
3396 } else {
3397 if (caller != 0) {
3398 laddr = caller;
3399 haddr = caller + sizeof (caller);
3400
3401 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3402 sizeof (c), &sym) != -1 &&
3403 caller == (uintptr_t)sym.st_value) {
3404 /*
3405 * We were provided an exact symbol value; any
3406 * address in the function is valid.
3407 */
3408 laddr = (uintptr_t)sym.st_value;
3409 haddr = (uintptr_t)sym.st_value + sym.st_size;
3410 }
3411
3412 for (i = 0; i < depth; i++)
3413 if (vs.vs_stack[i] >= laddr &&
3414 vs.vs_stack[i] < haddr)
3415 break;
3416
3417 if (i == depth)
3418 return (DCMD_OK);
3419 }
3420
3421 if (thread != 0 && (uintptr_t)vs.vs_thread != thread)
3422 return (DCMD_OK);
3423
3424 if (earliest != 0 && vs.vs_timestamp < earliest)
3425 return (DCMD_OK);
3426
3427 if (latest != 0 && vs.vs_timestamp > latest)
3428 return (DCMD_OK);
3429 }
3430
3431 type = (t == VMEM_ALLOC ? "ALLC" :
3432 t == VMEM_FREE ? "FREE" :
3433 t == VMEM_SPAN ? "SPAN" :
3434 t == VMEM_ROTOR ? "ROTR" :
3435 t == VMEM_WALKER ? "WLKR" :
3436 "????");
3437
3438 if (flags & DCMD_PIPE_OUT) {
3439 mdb_printf("%#r\n", addr);
3440 return (DCMD_OK);
3441 }
3442
3443 if (verbose) {
3444 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3445 addr, type, vs.vs_start, vs.vs_end, sz);
3446
3447 if (no_debug)
3448 return (DCMD_OK);
3449
3450 mdb_printf("%16s %4s %16d %16llx\n",
3451 "", "", vs.vs_thread, vs.vs_timestamp);
3452
3453 mdb_inc_indent(17);
3454 for (i = 0; i < depth; i++) {
3455 mdb_printf("%a\n", stk[i]);
3456 }
3457 mdb_dec_indent(17);
3458 mdb_printf("\n");
3459 } else {
3460 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3461 vs.vs_start, size? sz : vs.vs_end);
3462
3463 if (no_debug) {
3464 mdb_printf("\n");
3465 return (DCMD_OK);
3466 }
3467
3468 for (i = 0; i < depth; i++) {
3469 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3470 c, sizeof (c), &sym) == -1)
3471 continue;
3472 if (is_umem_sym(c, "vmem_"))
3473 continue;
3474 break;
3475 }
3476 mdb_printf(" %a\n", stk[i]);
3477 }
3478 return (DCMD_OK);
3479 }
3480
3481 /*ARGSUSED*/
3482 static int
showbc(uintptr_t addr,const umem_bufctl_audit_t * bcp,hrtime_t * newest)3483 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3484 {
3485 char name[UMEM_CACHE_NAMELEN + 1];
3486 hrtime_t delta;
3487 int i, depth;
3488
3489 if (bcp->bc_timestamp == 0)
3490 return (WALK_DONE);
3491
3492 if (*newest == 0)
3493 *newest = bcp->bc_timestamp;
3494
3495 delta = *newest - bcp->bc_timestamp;
3496 depth = MIN(bcp->bc_depth, umem_stack_depth);
3497
3498 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3499 &bcp->bc_cache->cache_name) <= 0)
3500 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3501
3502 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3503 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3504
3505 for (i = 0; i < depth; i++)
3506 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3507
3508 return (WALK_NEXT);
3509 }
3510
3511 int
umalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3512 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3513 {
3514 const char *logname = "umem_transaction_log";
3515 hrtime_t newest = 0;
3516
3517 if ((flags & DCMD_ADDRSPEC) || argc > 1)
3518 return (DCMD_USAGE);
3519
3520 if (argc > 0) {
3521 if (argv->a_type != MDB_TYPE_STRING)
3522 return (DCMD_USAGE);
3523 if (strcmp(argv->a_un.a_str, "fail") == 0)
3524 logname = "umem_failure_log";
3525 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3526 logname = "umem_slab_log";
3527 else
3528 return (DCMD_USAGE);
3529 }
3530
3531 if (umem_readvar(&addr, logname) == -1) {
3532 mdb_warn("failed to read %s log header pointer");
3533 return (DCMD_ERR);
3534 }
3535
3536 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3537 mdb_warn("failed to walk umem log");
3538 return (DCMD_ERR);
3539 }
3540
3541 return (DCMD_OK);
3542 }
3543
3544 /*
3545 * As the final lure for die-hard crash(8) users, we provide ::umausers here.
3546 * The first piece is a structure which we use to accumulate umem_cache_t
3547 * addresses of interest. The umc_add is used as a callback for the umem_cache
3548 * walker; we either add all caches, or ones named explicitly as arguments.
3549 */
3550
3551 typedef struct umclist {
3552 const char *umc_name; /* Name to match (or NULL) */
3553 uintptr_t *umc_caches; /* List of umem_cache_t addrs */
3554 int umc_nelems; /* Num entries in umc_caches */
3555 int umc_size; /* Size of umc_caches array */
3556 } umclist_t;
3557
3558 static int
umc_add(uintptr_t addr,const umem_cache_t * cp,umclist_t * umc)3559 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3560 {
3561 void *p;
3562 int s;
3563
3564 if (umc->umc_name == NULL ||
3565 strcmp(cp->cache_name, umc->umc_name) == 0) {
3566 /*
3567 * If we have a match, grow our array (if necessary), and then
3568 * add the virtual address of the matching cache to our list.
3569 */
3570 if (umc->umc_nelems >= umc->umc_size) {
3571 s = umc->umc_size ? umc->umc_size * 2 : 256;
3572 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3573
3574 bcopy(umc->umc_caches, p,
3575 sizeof (uintptr_t) * umc->umc_size);
3576
3577 umc->umc_caches = p;
3578 umc->umc_size = s;
3579 }
3580
3581 umc->umc_caches[umc->umc_nelems++] = addr;
3582 return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3583 }
3584
3585 return (WALK_NEXT);
3586 }
3587
3588 /*
3589 * The second piece of ::umausers is a hash table of allocations. Each
3590 * allocation owner is identified by its stack trace and data_size. We then
3591 * track the total bytes of all such allocations, and the number of allocations
3592 * to report at the end. Once we have a list of caches, we walk through the
3593 * allocated bufctls of each, and update our hash table accordingly.
3594 */
3595
3596 typedef struct umowner {
3597 struct umowner *umo_head; /* First hash elt in bucket */
3598 struct umowner *umo_next; /* Next hash elt in chain */
3599 size_t umo_signature; /* Hash table signature */
3600 uint_t umo_num; /* Number of allocations */
3601 size_t umo_data_size; /* Size of each allocation */
3602 size_t umo_total_size; /* Total bytes of allocation */
3603 int umo_depth; /* Depth of stack trace */
3604 uintptr_t *umo_stack; /* Stack trace */
3605 } umowner_t;
3606
3607 typedef struct umusers {
3608 const umem_cache_t *umu_cache; /* Current umem cache */
3609 umowner_t *umu_hash; /* Hash table of owners */
3610 uintptr_t *umu_stacks; /* stacks for owners */
3611 int umu_nelems; /* Number of entries in use */
3612 int umu_size; /* Total number of entries */
3613 } umusers_t;
3614
3615 static void
umu_add(umusers_t * umu,const umem_bufctl_audit_t * bcp,size_t size,size_t data_size)3616 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3617 size_t size, size_t data_size)
3618 {
3619 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3620 size_t bucket, signature = data_size;
3621 umowner_t *umo, *umoend;
3622
3623 /*
3624 * If the hash table is full, double its size and rehash everything.
3625 */
3626 if (umu->umu_nelems >= umu->umu_size) {
3627 int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3628 size_t umowner_size = sizeof (umowner_t);
3629 size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3630 uintptr_t *new_stacks;
3631
3632 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3633 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3634
3635 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3636 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3637 umu->umu_hash = umo;
3638 umu->umu_stacks = new_stacks;
3639 umu->umu_size = s;
3640
3641 umoend = umu->umu_hash + umu->umu_size;
3642 for (umo = umu->umu_hash; umo < umoend; umo++) {
3643 umo->umo_head = NULL;
3644 umo->umo_stack = &umu->umu_stacks[
3645 umem_stack_depth * (umo - umu->umu_hash)];
3646 }
3647
3648 umoend = umu->umu_hash + umu->umu_nelems;
3649 for (umo = umu->umu_hash; umo < umoend; umo++) {
3650 bucket = umo->umo_signature & (umu->umu_size - 1);
3651 umo->umo_next = umu->umu_hash[bucket].umo_head;
3652 umu->umu_hash[bucket].umo_head = umo;
3653 }
3654 }
3655
3656 /*
3657 * Finish computing the hash signature from the stack trace, and then
3658 * see if the owner is in the hash table. If so, update our stats.
3659 */
3660 for (i = 0; i < depth; i++)
3661 signature += bcp->bc_stack[i];
3662
3663 bucket = signature & (umu->umu_size - 1);
3664
3665 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3666 if (umo->umo_signature == signature) {
3667 size_t difference = 0;
3668
3669 difference |= umo->umo_data_size - data_size;
3670 difference |= umo->umo_depth - depth;
3671
3672 for (i = 0; i < depth; i++) {
3673 difference |= umo->umo_stack[i] -
3674 bcp->bc_stack[i];
3675 }
3676
3677 if (difference == 0) {
3678 umo->umo_total_size += size;
3679 umo->umo_num++;
3680 return;
3681 }
3682 }
3683 }
3684
3685 /*
3686 * If the owner is not yet hashed, grab the next element and fill it
3687 * in based on the allocation information.
3688 */
3689 umo = &umu->umu_hash[umu->umu_nelems++];
3690 umo->umo_next = umu->umu_hash[bucket].umo_head;
3691 umu->umu_hash[bucket].umo_head = umo;
3692
3693 umo->umo_signature = signature;
3694 umo->umo_num = 1;
3695 umo->umo_data_size = data_size;
3696 umo->umo_total_size = size;
3697 umo->umo_depth = depth;
3698
3699 for (i = 0; i < depth; i++)
3700 umo->umo_stack[i] = bcp->bc_stack[i];
3701 }
3702
3703 /*
3704 * When ::umausers is invoked without the -f flag, we simply update our hash
3705 * table with the information from each allocated bufctl.
3706 */
3707 /*ARGSUSED*/
3708 static int
umause1(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3709 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3710 {
3711 const umem_cache_t *cp = umu->umu_cache;
3712
3713 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3714 return (WALK_NEXT);
3715 }
3716
3717 /*
3718 * When ::umausers is invoked with the -f flag, we print out the information
3719 * for each bufctl as well as updating the hash table.
3720 */
3721 static int
umause2(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3722 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3723 {
3724 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3725 const umem_cache_t *cp = umu->umu_cache;
3726
3727 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3728 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3729
3730 for (i = 0; i < depth; i++)
3731 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3732
3733 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3734 return (WALK_NEXT);
3735 }
3736
3737 /*
3738 * We sort our results by allocation size before printing them.
3739 */
3740 static int
umownercmp(const void * lp,const void * rp)3741 umownercmp(const void *lp, const void *rp)
3742 {
3743 const umowner_t *lhs = lp;
3744 const umowner_t *rhs = rp;
3745
3746 return (rhs->umo_total_size - lhs->umo_total_size);
3747 }
3748
3749 /*
3750 * The main engine of ::umausers is relatively straightforward: First we
3751 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3752 * iterate over the allocated bufctls of each cache in the list. Finally,
3753 * we sort and print our results.
3754 */
3755 /*ARGSUSED*/
3756 int
umausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3757 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3758 {
3759 int mem_threshold = 8192; /* Minimum # bytes for printing */
3760 int cnt_threshold = 100; /* Minimum # blocks for printing */
3761 int audited_caches = 0; /* Number of UMF_AUDIT caches found */
3762 int do_all_caches = 1; /* Do all caches (no arguments) */
3763 int opt_e = FALSE; /* Include "small" users */
3764 int opt_f = FALSE; /* Print stack traces */
3765
3766 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3767 umowner_t *umo, *umoend;
3768 int i, oelems;
3769
3770 umclist_t umc;
3771 umusers_t umu;
3772
3773 if (flags & DCMD_ADDRSPEC)
3774 return (DCMD_USAGE);
3775
3776 bzero(&umc, sizeof (umc));
3777 bzero(&umu, sizeof (umu));
3778
3779 while ((i = mdb_getopts(argc, argv,
3780 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3781 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3782
3783 argv += i; /* skip past options we just processed */
3784 argc -= i; /* adjust argc */
3785
3786 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3787 return (DCMD_USAGE);
3788
3789 oelems = umc.umc_nelems;
3790 umc.umc_name = argv->a_un.a_str;
3791 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3792
3793 if (umc.umc_nelems == oelems) {
3794 mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3795 return (DCMD_ERR);
3796 }
3797
3798 do_all_caches = 0;
3799 argv++;
3800 argc--;
3801 }
3802
3803 if (opt_e)
3804 mem_threshold = cnt_threshold = 0;
3805
3806 if (opt_f)
3807 callback = (mdb_walk_cb_t)umause2;
3808
3809 if (do_all_caches) {
3810 umc.umc_name = NULL; /* match all cache names */
3811 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3812 }
3813
3814 for (i = 0; i < umc.umc_nelems; i++) {
3815 uintptr_t cp = umc.umc_caches[i];
3816 umem_cache_t c;
3817
3818 if (mdb_vread(&c, sizeof (c), cp) == -1) {
3819 mdb_warn("failed to read cache at %p", cp);
3820 continue;
3821 }
3822
3823 if (!(c.cache_flags & UMF_AUDIT)) {
3824 if (!do_all_caches) {
3825 mdb_warn("UMF_AUDIT is not enabled for %s\n",
3826 c.cache_name);
3827 }
3828 continue;
3829 }
3830
3831 umu.umu_cache = &c;
3832 (void) mdb_pwalk("bufctl", callback, &umu, cp);
3833 audited_caches++;
3834 }
3835
3836 if (audited_caches == 0 && do_all_caches) {
3837 mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3838 return (DCMD_ERR);
3839 }
3840
3841 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3842 umoend = umu.umu_hash + umu.umu_nelems;
3843
3844 for (umo = umu.umu_hash; umo < umoend; umo++) {
3845 if (umo->umo_total_size < mem_threshold &&
3846 umo->umo_num < cnt_threshold)
3847 continue;
3848 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3849 umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3850 for (i = 0; i < umo->umo_depth; i++)
3851 mdb_printf("\t %a\n", umo->umo_stack[i]);
3852 }
3853
3854 return (DCMD_OK);
3855 }
3856
3857 struct malloc_data {
3858 uint32_t malloc_size;
3859 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3860 };
3861
3862 #ifdef _LP64
3863 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3864 #else
3865 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data))
3866 #endif
3867
3868 typedef struct umem_malloc_info {
3869 size_t um_total; /* total allocated buffers */
3870 size_t um_malloc; /* malloc buffers */
3871 size_t um_malloc_size; /* sum of malloc buffer sizes */
3872 size_t um_malloc_overhead; /* sum of in-chunk overheads */
3873
3874 umem_cache_t *um_cp;
3875
3876 uint_t *um_bucket;
3877 } umem_malloc_info_t;
3878
3879 static void
umem_malloc_print_dist(uint_t * um_bucket,size_t minmalloc,size_t maxmalloc,size_t maxbuckets,size_t minbucketsize,int geometric)3880 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3881 size_t maxbuckets, size_t minbucketsize, int geometric)
3882 {
3883 uint64_t um_malloc;
3884 int minb = -1;
3885 int maxb = -1;
3886 int buckets;
3887 int nbucks;
3888 int i;
3889 int b;
3890 const int *distarray;
3891
3892 minb = (int)minmalloc;
3893 maxb = (int)maxmalloc;
3894
3895 nbucks = buckets = maxb - minb + 1;
3896
3897 um_malloc = 0;
3898 for (b = minb; b <= maxb; b++)
3899 um_malloc += um_bucket[b];
3900
3901 if (maxbuckets != 0)
3902 buckets = MIN(buckets, maxbuckets);
3903
3904 if (minbucketsize > 1) {
3905 buckets = MIN(buckets, nbucks/minbucketsize);
3906 if (buckets == 0) {
3907 buckets = 1;
3908 minbucketsize = nbucks;
3909 }
3910 }
3911
3912 if (geometric)
3913 distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3914 else
3915 distarray = dist_linear(buckets, minb, maxb);
3916
3917 dist_print_header("malloc size", 11, "count");
3918 for (i = 0; i < buckets; i++) {
3919 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3920 }
3921 mdb_printf("\n");
3922 }
3923
3924 /*
3925 * A malloc()ed buffer looks like:
3926 *
3927 * <----------- mi.malloc_size --->
3928 * <----------- cp.cache_bufsize ------------------>
3929 * <----------- cp.cache_chunksize -------------------------------->
3930 * +-------+-----------------------+---------------+---------------+
3931 * |/tag///| mallocsz |/round-off/////|/debug info////|
3932 * +-------+---------------------------------------+---------------+
3933 * <-- usable space ------>
3934 *
3935 * mallocsz is the argument to malloc(3C).
3936 * mi.malloc_size is the actual size passed to umem_alloc(), which
3937 * is rounded up to the smallest available cache size, which is
3938 * cache_bufsize. If there is debugging or alignment overhead in
3939 * the cache, that is reflected in a larger cache_chunksize.
3940 *
3941 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3942 * depending upon the ISA's alignment requirements. For 32-bit allocations,
3943 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes,
3944 * the tag has 8 bytes of padding before it.
3945 *
3946 * 32-byte, 64-byte buffers <= 8 bytes:
3947 * +-------+-------+--------- ...
3948 * |/size//|/stat//| mallocsz ...
3949 * +-------+-------+--------- ...
3950 * ^
3951 * pointer returned from malloc(3C)
3952 *
3953 * 64-byte buffers > 8 bytes:
3954 * +---------------+-------+-------+--------- ...
3955 * |/padding///////|/size//|/stat//| mallocsz ...
3956 * +---------------+-------+-------+--------- ...
3957 * ^
3958 * pointer returned from malloc(3C)
3959 *
3960 * The "size" field is "malloc_size", which is mallocsz + the padding.
3961 * The "stat" field is derived from malloc_size, and functions as a
3962 * validation that this buffer is actually from malloc(3C).
3963 */
3964 /*ARGSUSED*/
3965 static int
um_umem_buffer_cb(uintptr_t addr,void * buf,umem_malloc_info_t * ump)3966 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3967 {
3968 struct malloc_data md;
3969 size_t m_addr = addr;
3970 size_t overhead = sizeof (md);
3971 size_t mallocsz;
3972
3973 ump->um_total++;
3974
3975 #ifdef _LP64
3976 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3977 m_addr += overhead;
3978 overhead += sizeof (md);
3979 }
3980 #endif
3981
3982 if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3983 mdb_warn("unable to read malloc header at %p", m_addr);
3984 return (WALK_NEXT);
3985 }
3986
3987 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3988 case MALLOC_MAGIC:
3989 #ifdef _LP64
3990 case MALLOC_SECOND_MAGIC:
3991 #endif
3992 mallocsz = md.malloc_size - overhead;
3993
3994 ump->um_malloc++;
3995 ump->um_malloc_size += mallocsz;
3996 ump->um_malloc_overhead += overhead;
3997
3998 /* include round-off and debug overhead */
3999 ump->um_malloc_overhead +=
4000 ump->um_cp->cache_chunksize - md.malloc_size;
4001
4002 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4003 ump->um_bucket[mallocsz]++;
4004
4005 break;
4006 default:
4007 break;
4008 }
4009
4010 return (WALK_NEXT);
4011 }
4012
4013 int
get_umem_alloc_sizes(int ** out,size_t * out_num)4014 get_umem_alloc_sizes(int **out, size_t *out_num)
4015 {
4016 GElf_Sym sym;
4017
4018 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4019 mdb_warn("unable to look up umem_alloc_sizes");
4020 return (-1);
4021 }
4022
4023 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4024 *out_num = sym.st_size / sizeof (int);
4025
4026 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4027 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4028 *out = NULL;
4029 return (-1);
4030 }
4031
4032 return (0);
4033 }
4034
4035
4036 static int
um_umem_cache_cb(uintptr_t addr,umem_cache_t * cp,umem_malloc_info_t * ump)4037 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4038 {
4039 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4040 return (WALK_NEXT);
4041
4042 ump->um_cp = cp;
4043
4044 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4045 -1) {
4046 mdb_warn("can't walk 'umem' for cache %p", addr);
4047 return (WALK_ERR);
4048 }
4049
4050 return (WALK_NEXT);
4051 }
4052
4053 void
umem_malloc_dist_help(void)4054 umem_malloc_dist_help(void)
4055 {
4056 mdb_printf("%s\n",
4057 "report distribution of outstanding malloc()s");
4058 mdb_dec_indent(2);
4059 mdb_printf("%<b>OPTIONS%</b>\n");
4060 mdb_inc_indent(2);
4061 mdb_printf("%s",
4062 " -b maxbins\n"
4063 " Use at most maxbins bins for the data\n"
4064 " -B minbinsize\n"
4065 " Make the bins at least minbinsize bytes apart\n"
4066 " -d dump the raw data out, without binning\n"
4067 " -g use geometric binning instead of linear binning\n");
4068 }
4069
4070 /*ARGSUSED*/
4071 int
umem_malloc_dist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4072 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4073 {
4074 umem_malloc_info_t mi;
4075 uint_t geometric = 0;
4076 uint_t dump = 0;
4077 size_t maxbuckets = 0;
4078 size_t minbucketsize = 0;
4079
4080 size_t minalloc = 0;
4081 size_t maxalloc = UMI_MAX_BUCKET;
4082
4083 if (flags & DCMD_ADDRSPEC)
4084 return (DCMD_USAGE);
4085
4086 if (mdb_getopts(argc, argv,
4087 'd', MDB_OPT_SETBITS, TRUE, &dump,
4088 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4089 'b', MDB_OPT_UINTPTR, &maxbuckets,
4090 'B', MDB_OPT_UINTPTR, &minbucketsize,
4091 NULL) != argc)
4092 return (DCMD_USAGE);
4093
4094 bzero(&mi, sizeof (mi));
4095 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4096 UM_SLEEP | UM_GC);
4097
4098 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4099 &mi) == -1) {
4100 mdb_warn("unable to walk 'umem_cache'");
4101 return (DCMD_ERR);
4102 }
4103
4104 if (dump) {
4105 int i;
4106 for (i = minalloc; i <= maxalloc; i++)
4107 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4108
4109 return (DCMD_OK);
4110 }
4111
4112 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4113 maxbuckets, minbucketsize, geometric);
4114
4115 return (DCMD_OK);
4116 }
4117
4118 void
umem_malloc_info_help(void)4119 umem_malloc_info_help(void)
4120 {
4121 mdb_printf("%s\n",
4122 "report information about malloc()s by cache. ");
4123 mdb_dec_indent(2);
4124 mdb_printf("%<b>OPTIONS%</b>\n");
4125 mdb_inc_indent(2);
4126 mdb_printf("%s",
4127 " -b maxbins\n"
4128 " Use at most maxbins bins for the data\n"
4129 " -B minbinsize\n"
4130 " Make the bins at least minbinsize bytes apart\n"
4131 " -d dump the raw distribution data without binning\n"
4132 #ifndef _KMDB
4133 " -g use geometric binning instead of linear binning\n"
4134 #endif
4135 "");
4136 }
4137 int
umem_malloc_info(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4138 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4139 {
4140 umem_cache_t c;
4141 umem_malloc_info_t mi;
4142
4143 int skip = 0;
4144
4145 size_t maxmalloc;
4146 size_t overhead;
4147 size_t allocated;
4148 size_t avg_malloc;
4149 size_t overhead_pct; /* 1000 * overhead_percent */
4150
4151 uint_t verbose = 0;
4152 uint_t dump = 0;
4153 uint_t geometric = 0;
4154 size_t maxbuckets = 0;
4155 size_t minbucketsize = 0;
4156
4157 int *alloc_sizes;
4158 int idx;
4159 size_t num;
4160 size_t minmalloc;
4161
4162 if (mdb_getopts(argc, argv,
4163 'd', MDB_OPT_SETBITS, TRUE, &dump,
4164 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4165 'b', MDB_OPT_UINTPTR, &maxbuckets,
4166 'B', MDB_OPT_UINTPTR, &minbucketsize,
4167 NULL) != argc)
4168 return (DCMD_USAGE);
4169
4170 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4171 verbose = 1;
4172
4173 if (!(flags & DCMD_ADDRSPEC)) {
4174 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4175 argc, argv) == -1) {
4176 mdb_warn("can't walk umem_cache");
4177 return (DCMD_ERR);
4178 }
4179 return (DCMD_OK);
4180 }
4181
4182 if (!mdb_vread(&c, sizeof (c), addr)) {
4183 mdb_warn("unable to read cache at %p", addr);
4184 return (DCMD_ERR);
4185 }
4186
4187 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4188 if (!(flags & DCMD_LOOP))
4189 mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4190 "by malloc()\n", c.cache_name);
4191 skip = 1;
4192 }
4193
4194 /*
4195 * normally, print the header only the first time. In verbose mode,
4196 * print the header on every non-skipped buffer
4197 */
4198 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4199 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4200 "CACHE", "BUFSZ", "MAXMAL",
4201 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4202
4203 if (skip)
4204 return (DCMD_OK);
4205
4206 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4207 #ifdef _LP64
4208 if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4209 maxmalloc -= sizeof (struct malloc_data);
4210 #endif
4211
4212 bzero(&mi, sizeof (mi));
4213 mi.um_cp = &c;
4214 if (verbose)
4215 mi.um_bucket =
4216 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4217 UM_SLEEP | UM_GC);
4218
4219 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4220 -1) {
4221 mdb_warn("can't walk 'umem'");
4222 return (DCMD_ERR);
4223 }
4224
4225 overhead = mi.um_malloc_overhead;
4226 allocated = mi.um_malloc_size;
4227
4228 /* do integer round off for the average */
4229 if (mi.um_malloc != 0)
4230 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4231 else
4232 avg_malloc = 0;
4233
4234 /*
4235 * include per-slab overhead
4236 *
4237 * Each slab in a given cache is the same size, and has the same
4238 * number of chunks in it; we read in the first slab on the
4239 * slab list to get the number of chunks for all slabs. To
4240 * compute the per-slab overhead, we just subtract the chunk usage
4241 * from the slabsize:
4242 *
4243 * +------------+-------+-------+ ... --+-------+-------+-------+
4244 * |////////////| | | ... | |///////|///////|
4245 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//|
4246 * |////////////| | | ... | |///////|///////|
4247 * +------------+-------+-------+ ... --+-------+-------+-------+
4248 * | \_______chunksize * chunks_____/ |
4249 * \__________________________slabsize__________________________/
4250 *
4251 * For UMF_HASH caches, there is an additional source of overhead;
4252 * the external umem_slab_t and per-chunk bufctl structures. We
4253 * include those in our per-slab overhead.
4254 *
4255 * Once we have a number for the per-slab overhead, we estimate
4256 * the actual overhead by treating the malloc()ed buffers as if
4257 * they were densely packed:
4258 *
4259 * additional overhead = (# mallocs) * (per-slab) / (chunks);
4260 *
4261 * carefully ordering the multiply before the divide, to avoid
4262 * round-off error.
4263 */
4264 if (mi.um_malloc != 0) {
4265 umem_slab_t slab;
4266 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4267
4268 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4269 mdb_warn("unable to read slab at %p\n", saddr);
4270 } else {
4271 long chunks = slab.slab_chunks;
4272 if (chunks != 0 && c.cache_chunksize != 0 &&
4273 chunks <= c.cache_slabsize / c.cache_chunksize) {
4274 uintmax_t perslab =
4275 c.cache_slabsize -
4276 (c.cache_chunksize * chunks);
4277
4278 if (c.cache_flags & UMF_HASH) {
4279 perslab += sizeof (umem_slab_t) +
4280 chunks *
4281 ((c.cache_flags & UMF_AUDIT) ?
4282 sizeof (umem_bufctl_audit_t) :
4283 sizeof (umem_bufctl_t));
4284 }
4285 overhead +=
4286 (perslab * (uintmax_t)mi.um_malloc)/chunks;
4287 } else {
4288 mdb_warn("invalid #chunks (%d) in slab %p\n",
4289 chunks, saddr);
4290 }
4291 }
4292 }
4293
4294 if (allocated != 0)
4295 overhead_pct = (1000ULL * overhead) / allocated;
4296 else
4297 overhead_pct = 0;
4298
4299 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4300 addr, c.cache_bufsize, maxmalloc,
4301 mi.um_malloc, avg_malloc, allocated, overhead,
4302 overhead_pct / 10, overhead_pct % 10);
4303
4304 if (!verbose)
4305 return (DCMD_OK);
4306
4307 if (!dump)
4308 mdb_printf("\n");
4309
4310 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4311 return (DCMD_ERR);
4312
4313 for (idx = 0; idx < num; idx++) {
4314 if (alloc_sizes[idx] == c.cache_bufsize)
4315 break;
4316 if (alloc_sizes[idx] == 0) {
4317 idx = num; /* 0-terminated array */
4318 break;
4319 }
4320 }
4321 if (idx == num) {
4322 mdb_warn(
4323 "cache %p's size (%d) not in umem_alloc_sizes\n",
4324 addr, c.cache_bufsize);
4325 return (DCMD_ERR);
4326 }
4327
4328 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4329 if (minmalloc > 0) {
4330 #ifdef _LP64
4331 if (minmalloc > UMEM_SECOND_ALIGN)
4332 minmalloc -= sizeof (struct malloc_data);
4333 #endif
4334 minmalloc -= sizeof (struct malloc_data);
4335 minmalloc += 1;
4336 }
4337
4338 if (dump) {
4339 for (idx = minmalloc; idx <= maxmalloc; idx++)
4340 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4341 mdb_printf("\n");
4342 } else {
4343 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4344 maxbuckets, minbucketsize, geometric);
4345 }
4346
4347 return (DCMD_OK);
4348 }
4349