1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2019 Joyent, Inc.
28 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
29 */
30
31 #include "umem.h"
32
33 #include <sys/vmem_impl_user.h>
34 #include <umem_impl.h>
35
36 #include <alloca.h>
37 #include <limits.h>
38 #include <mdb/mdb_whatis.h>
39 #include <thr_uberdata.h>
40
41 #include "misc.h"
42 #include "leaky.h"
43 #include "dist.h"
44
45 #include "umem_pagesize.h"
46
47 #define UM_ALLOCATED 0x1
48 #define UM_FREE 0x2
49 #define UM_BUFCTL 0x4
50 #define UM_HASH 0x8
51
52 int umem_ready;
53
54 static int umem_stack_depth_warned;
55 static uint32_t umem_max_ncpus;
56 uint32_t umem_stack_depth;
57
58 size_t umem_pagesize;
59
60 #define UMEM_READVAR(var) \
61 (umem_readvar(&(var), #var) == -1 && \
62 (mdb_warn("failed to read "#var), 1))
63
64 int
umem_update_variables(void)65 umem_update_variables(void)
66 {
67 size_t pagesize;
68
69 /*
70 * Figure out which type of umem is being used; if it's not there
71 * yet, succeed quietly.
72 */
73 if (umem_set_standalone() == -1) {
74 umem_ready = 0;
75 return (0); /* umem not there yet */
76 }
77
78 /*
79 * Solaris 9 used a different name for umem_max_ncpus. It's
80 * cheap backwards compatibility to check for both names.
81 */
82 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
83 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
84 mdb_warn("unable to read umem_max_ncpus or max_ncpus");
85 return (-1);
86 }
87 if (UMEM_READVAR(umem_ready))
88 return (-1);
89 if (UMEM_READVAR(umem_stack_depth))
90 return (-1);
91 if (UMEM_READVAR(pagesize))
92 return (-1);
93
94 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
95 if (umem_stack_depth_warned == 0) {
96 mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
97 umem_stack_depth, UMEM_MAX_STACK_DEPTH);
98 umem_stack_depth_warned = 1;
99 }
100 umem_stack_depth = 0;
101 }
102
103 umem_pagesize = pagesize;
104
105 return (0);
106 }
107
108 static int
umem_ptc_walk_init(mdb_walk_state_t * wsp)109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
110 {
111 if (wsp->walk_addr == 0) {
112 if (mdb_layered_walk("ulwp", wsp) == -1) {
113 mdb_warn("couldn't walk 'ulwp'");
114 return (WALK_ERR);
115 }
116 }
117
118 return (WALK_NEXT);
119 }
120
121 static int
umem_ptc_walk_step(mdb_walk_state_t * wsp)122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
123 {
124 uintptr_t this;
125 int rval;
126
127 if (wsp->walk_layer != NULL) {
128 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
129 (uintptr_t)wsp->walk_arg;
130 } else {
131 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
132 }
133
134 for (;;) {
135 if (mdb_vread(&this, sizeof (void *), this) == -1) {
136 mdb_warn("couldn't read ptc buffer at %p", this);
137 return (WALK_ERR);
138 }
139
140 if (this == 0)
141 break;
142
143 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
144
145 if (rval != WALK_NEXT)
146 return (rval);
147 }
148
149 return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
150 }
151
152 /*ARGSUSED*/
153 static int
umem_init_walkers(uintptr_t addr,const umem_cache_t * c,int * sizes)154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
155 {
156 mdb_walker_t w;
157 char descr[64];
158 char name[64];
159 int i;
160
161 (void) mdb_snprintf(descr, sizeof (descr),
162 "walk the %s cache", c->cache_name);
163
164 w.walk_name = c->cache_name;
165 w.walk_descr = descr;
166 w.walk_init = umem_walk_init;
167 w.walk_step = umem_walk_step;
168 w.walk_fini = umem_walk_fini;
169 w.walk_init_arg = (void *)addr;
170
171 if (mdb_add_walker(&w) == -1)
172 mdb_warn("failed to add %s walker", c->cache_name);
173
174 if (!(c->cache_flags & UMF_PTC))
175 return (WALK_NEXT);
176
177 /*
178 * For the per-thread cache walker, the address is the offset in the
179 * tm_roots[] array of the ulwp_t.
180 */
181 for (i = 0; sizes[i] != 0; i++) {
182 if (sizes[i] == c->cache_bufsize)
183 break;
184 }
185
186 if (sizes[i] == 0) {
187 mdb_warn("cache %s is cached per-thread, but could not find "
188 "size in umem_alloc_sizes\n", c->cache_name);
189 return (WALK_NEXT);
190 }
191
192 if (i >= NTMEMBASE) {
193 mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
194 c->cache_name, i, NTMEMBASE);
195 return (WALK_NEXT);
196 }
197
198 (void) mdb_snprintf(name, sizeof (name),
199 "umem_ptc_%d", c->cache_bufsize);
200 (void) mdb_snprintf(descr, sizeof (descr),
201 "walk the per-thread cache for %s", c->cache_name);
202
203 w.walk_name = name;
204 w.walk_descr = descr;
205 w.walk_init = umem_ptc_walk_init;
206 w.walk_step = umem_ptc_walk_step;
207 w.walk_fini = NULL;
208 w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
209
210 if (mdb_add_walker(&w) == -1)
211 mdb_warn("failed to add %s walker", w.walk_name);
212
213 return (WALK_NEXT);
214 }
215
216 /*ARGSUSED*/
217 static void
umem_statechange_cb(void * arg)218 umem_statechange_cb(void *arg)
219 {
220 static int been_ready = 0;
221 GElf_Sym sym;
222 int *sizes;
223
224 #ifndef _KMDB
225 leaky_cleanup(1); /* state changes invalidate leaky state */
226 #endif
227
228 if (umem_update_variables() == -1)
229 return;
230
231 if (been_ready)
232 return;
233
234 if (umem_ready != UMEM_READY)
235 return;
236
237 been_ready = 1;
238
239 /*
240 * In order to determine the tm_roots offset of any cache that is
241 * cached per-thread, we need to have the umem_alloc_sizes array.
242 * Read this, assuring that it is zero-terminated.
243 */
244 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
245 mdb_warn("unable to lookup 'umem_alloc_sizes'");
246 return;
247 }
248
249 sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
250
251 if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
252 mdb_warn("couldn't read 'umem_alloc_sizes'");
253 return;
254 }
255
256 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
257 }
258
259 int
umem_abort_messages(void)260 umem_abort_messages(void)
261 {
262 char *umem_error_buffer;
263 uint_t umem_error_begin;
264 GElf_Sym sym;
265 size_t bufsize;
266
267 if (UMEM_READVAR(umem_error_begin))
268 return (DCMD_ERR);
269
270 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
271 mdb_warn("unable to look up umem_error_buffer");
272 return (DCMD_ERR);
273 }
274
275 bufsize = (size_t)sym.st_size;
276
277 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
278
279 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
280 != bufsize) {
281 mdb_warn("unable to read umem_error_buffer");
282 return (DCMD_ERR);
283 }
284 /* put a zero after the end of the buffer to simplify printing */
285 umem_error_buffer[bufsize] = 0;
286
287 if ((umem_error_begin % bufsize) == 0)
288 mdb_printf("%s\n", umem_error_buffer);
289 else {
290 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
291 mdb_printf("%s%s\n",
292 &umem_error_buffer[umem_error_begin % bufsize],
293 umem_error_buffer);
294 }
295
296 return (DCMD_OK);
297 }
298
299 static void
umem_log_status(const char * name,umem_log_header_t * val)300 umem_log_status(const char *name, umem_log_header_t *val)
301 {
302 umem_log_header_t my_lh;
303 uintptr_t pos = (uintptr_t)val;
304 size_t size;
305
306 if (pos == 0)
307 return;
308
309 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
310 mdb_warn("\nunable to read umem_%s_log pointer %p",
311 name, pos);
312 return;
313 }
314
315 size = my_lh.lh_chunksize * my_lh.lh_nchunks;
316
317 if (size % (1024 * 1024) == 0)
318 mdb_printf("%s=%dm ", name, size / (1024 * 1024));
319 else if (size % 1024 == 0)
320 mdb_printf("%s=%dk ", name, size / 1024);
321 else
322 mdb_printf("%s=%d ", name, size);
323 }
324
325 typedef struct umem_debug_flags {
326 const char *udf_name;
327 uint_t udf_flags;
328 uint_t udf_clear; /* if 0, uses udf_flags */
329 } umem_debug_flags_t;
330
331 umem_debug_flags_t umem_status_flags[] = {
332 { "random", UMF_RANDOMIZE, UMF_RANDOM },
333 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
334 { "audit", UMF_AUDIT },
335 { "guards", UMF_DEADBEEF | UMF_REDZONE },
336 { "nosignal", UMF_CHECKSIGNAL },
337 { "firewall", UMF_FIREWALL },
338 { "lite", UMF_LITE },
339 { "checknull", UMF_CHECKNULL },
340 { NULL }
341 };
342
343 /*ARGSUSED*/
344 int
umem_status(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)345 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
346 {
347 int umem_logging;
348
349 umem_log_header_t *umem_transaction_log;
350 umem_log_header_t *umem_content_log;
351 umem_log_header_t *umem_failure_log;
352 umem_log_header_t *umem_slab_log;
353
354 mdb_printf("Status:\t\t%s\n",
355 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
356 umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
357 umem_ready == UMEM_READY_INITING ? "initialization in process" :
358 umem_ready == UMEM_READY ? "ready and active" :
359 umem_ready == 0 ? "not loaded into address space" :
360 "unknown (umem_ready invalid)");
361
362 if (umem_ready == 0)
363 return (DCMD_OK);
364
365 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
366
367 if (UMEM_READVAR(umem_logging))
368 goto err;
369 if (UMEM_READVAR(umem_transaction_log))
370 goto err;
371 if (UMEM_READVAR(umem_content_log))
372 goto err;
373 if (UMEM_READVAR(umem_failure_log))
374 goto err;
375 if (UMEM_READVAR(umem_slab_log))
376 goto err;
377
378 mdb_printf("Logs:\t\t");
379 umem_log_status("transaction", umem_transaction_log);
380 umem_log_status("content", umem_content_log);
381 umem_log_status("fail", umem_failure_log);
382 umem_log_status("slab", umem_slab_log);
383 if (!umem_logging)
384 mdb_printf("(inactive)");
385 mdb_printf("\n");
386
387 mdb_printf("Message buffer:\n");
388 return (umem_abort_messages());
389
390 err:
391 mdb_printf("Message buffer:\n");
392 (void) umem_abort_messages();
393 return (DCMD_ERR);
394 }
395
396 typedef struct {
397 uintptr_t ucw_first;
398 uintptr_t ucw_current;
399 } umem_cache_walk_t;
400
401 int
umem_cache_walk_init(mdb_walk_state_t * wsp)402 umem_cache_walk_init(mdb_walk_state_t *wsp)
403 {
404 umem_cache_walk_t *ucw;
405 umem_cache_t c;
406 uintptr_t cp;
407 GElf_Sym sym;
408
409 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
410 mdb_warn("couldn't find umem_null_cache");
411 return (WALK_ERR);
412 }
413
414 cp = (uintptr_t)sym.st_value;
415
416 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
417 mdb_warn("couldn't read cache at %p", cp);
418 return (WALK_ERR);
419 }
420
421 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
422
423 ucw->ucw_first = cp;
424 ucw->ucw_current = (uintptr_t)c.cache_next;
425 wsp->walk_data = ucw;
426
427 return (WALK_NEXT);
428 }
429
430 int
umem_cache_walk_step(mdb_walk_state_t * wsp)431 umem_cache_walk_step(mdb_walk_state_t *wsp)
432 {
433 umem_cache_walk_t *ucw = wsp->walk_data;
434 umem_cache_t c;
435 int status;
436
437 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
438 mdb_warn("couldn't read cache at %p", ucw->ucw_current);
439 return (WALK_DONE);
440 }
441
442 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
443
444 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
445 return (WALK_DONE);
446
447 return (status);
448 }
449
450 void
umem_cache_walk_fini(mdb_walk_state_t * wsp)451 umem_cache_walk_fini(mdb_walk_state_t *wsp)
452 {
453 umem_cache_walk_t *ucw = wsp->walk_data;
454 mdb_free(ucw, sizeof (umem_cache_walk_t));
455 }
456
457 typedef struct {
458 umem_cpu_t *ucw_cpus;
459 uint32_t ucw_current;
460 uint32_t ucw_max;
461 } umem_cpu_walk_state_t;
462
463 int
umem_cpu_walk_init(mdb_walk_state_t * wsp)464 umem_cpu_walk_init(mdb_walk_state_t *wsp)
465 {
466 umem_cpu_t *umem_cpus;
467
468 umem_cpu_walk_state_t *ucw;
469
470 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
471 mdb_warn("failed to read 'umem_cpus'");
472 return (WALK_ERR);
473 }
474
475 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
476
477 ucw->ucw_cpus = umem_cpus;
478 ucw->ucw_current = 0;
479 ucw->ucw_max = umem_max_ncpus;
480
481 wsp->walk_data = ucw;
482 return (WALK_NEXT);
483 }
484
485 int
umem_cpu_walk_step(mdb_walk_state_t * wsp)486 umem_cpu_walk_step(mdb_walk_state_t *wsp)
487 {
488 umem_cpu_t cpu;
489 umem_cpu_walk_state_t *ucw = wsp->walk_data;
490
491 uintptr_t caddr;
492
493 if (ucw->ucw_current >= ucw->ucw_max)
494 return (WALK_DONE);
495
496 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
497
498 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
499 mdb_warn("failed to read cpu %d", ucw->ucw_current);
500 return (WALK_ERR);
501 }
502
503 ucw->ucw_current++;
504
505 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
506 }
507
508 void
umem_cpu_walk_fini(mdb_walk_state_t * wsp)509 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
510 {
511 umem_cpu_walk_state_t *ucw = wsp->walk_data;
512
513 mdb_free(ucw, sizeof (*ucw));
514 }
515
516 int
umem_cpu_cache_walk_init(mdb_walk_state_t * wsp)517 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
518 {
519 if (wsp->walk_addr == 0) {
520 mdb_warn("umem_cpu_cache doesn't support global walks");
521 return (WALK_ERR);
522 }
523
524 if (mdb_layered_walk("umem_cpu", wsp) == -1) {
525 mdb_warn("couldn't walk 'umem_cpu'");
526 return (WALK_ERR);
527 }
528
529 wsp->walk_data = (void *)wsp->walk_addr;
530
531 return (WALK_NEXT);
532 }
533
534 int
umem_cpu_cache_walk_step(mdb_walk_state_t * wsp)535 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
536 {
537 uintptr_t caddr = (uintptr_t)wsp->walk_data;
538 const umem_cpu_t *cpu = wsp->walk_layer;
539 umem_cpu_cache_t cc;
540
541 caddr += cpu->cpu_cache_offset;
542
543 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
544 mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
545 return (WALK_ERR);
546 }
547
548 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
549 }
550
551 int
umem_slab_walk_init(mdb_walk_state_t * wsp)552 umem_slab_walk_init(mdb_walk_state_t *wsp)
553 {
554 uintptr_t caddr = wsp->walk_addr;
555 umem_cache_t c;
556
557 if (caddr == 0) {
558 mdb_warn("umem_slab doesn't support global walks\n");
559 return (WALK_ERR);
560 }
561
562 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
563 mdb_warn("couldn't read umem_cache at %p", caddr);
564 return (WALK_ERR);
565 }
566
567 wsp->walk_data =
568 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
569 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
570
571 return (WALK_NEXT);
572 }
573
574 int
umem_slab_walk_partial_init(mdb_walk_state_t * wsp)575 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
576 {
577 uintptr_t caddr = wsp->walk_addr;
578 umem_cache_t c;
579
580 if (caddr == 0) {
581 mdb_warn("umem_slab_partial doesn't support global walks\n");
582 return (WALK_ERR);
583 }
584
585 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
586 mdb_warn("couldn't read umem_cache at %p", caddr);
587 return (WALK_ERR);
588 }
589
590 wsp->walk_data =
591 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
592 wsp->walk_addr = (uintptr_t)c.cache_freelist;
593
594 /*
595 * Some consumers (umem_walk_step(), in particular) require at
596 * least one callback if there are any buffers in the cache. So
597 * if there are *no* partial slabs, report the last full slab, if
598 * any.
599 *
600 * Yes, this is ugly, but it's cleaner than the other possibilities.
601 */
602 if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
603 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
604
605 return (WALK_NEXT);
606 }
607
608 int
umem_slab_walk_step(mdb_walk_state_t * wsp)609 umem_slab_walk_step(mdb_walk_state_t *wsp)
610 {
611 umem_slab_t s;
612 uintptr_t addr = wsp->walk_addr;
613 uintptr_t saddr = (uintptr_t)wsp->walk_data;
614 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
615
616 if (addr == saddr)
617 return (WALK_DONE);
618
619 if (mdb_vread(&s, sizeof (s), addr) == -1) {
620 mdb_warn("failed to read slab at %p", wsp->walk_addr);
621 return (WALK_ERR);
622 }
623
624 if ((uintptr_t)s.slab_cache != caddr) {
625 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
626 addr, caddr, s.slab_cache);
627 return (WALK_ERR);
628 }
629
630 wsp->walk_addr = (uintptr_t)s.slab_next;
631
632 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
633 }
634
635 int
umem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)636 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
637 {
638 umem_cache_t c;
639
640 if (!(flags & DCMD_ADDRSPEC)) {
641 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
642 mdb_warn("can't walk umem_cache");
643 return (DCMD_ERR);
644 }
645 return (DCMD_OK);
646 }
647
648 if (DCMD_HDRSPEC(flags))
649 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
650 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
651
652 if (mdb_vread(&c, sizeof (c), addr) == -1) {
653 mdb_warn("couldn't read umem_cache at %p", addr);
654 return (DCMD_ERR);
655 }
656
657 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
658 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
659
660 return (DCMD_OK);
661 }
662
663 static int
addrcmp(const void * lhs,const void * rhs)664 addrcmp(const void *lhs, const void *rhs)
665 {
666 uintptr_t p1 = *((uintptr_t *)lhs);
667 uintptr_t p2 = *((uintptr_t *)rhs);
668
669 if (p1 < p2)
670 return (-1);
671 if (p1 > p2)
672 return (1);
673 return (0);
674 }
675
676 static int
bufctlcmp(const umem_bufctl_audit_t ** lhs,const umem_bufctl_audit_t ** rhs)677 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
678 {
679 const umem_bufctl_audit_t *bcp1 = *lhs;
680 const umem_bufctl_audit_t *bcp2 = *rhs;
681
682 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
683 return (-1);
684
685 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
686 return (1);
687
688 return (0);
689 }
690
691 typedef struct umem_hash_walk {
692 uintptr_t *umhw_table;
693 size_t umhw_nelems;
694 size_t umhw_pos;
695 umem_bufctl_t umhw_cur;
696 } umem_hash_walk_t;
697
698 int
umem_hash_walk_init(mdb_walk_state_t * wsp)699 umem_hash_walk_init(mdb_walk_state_t *wsp)
700 {
701 umem_hash_walk_t *umhw;
702 uintptr_t *hash;
703 umem_cache_t c;
704 uintptr_t haddr, addr = wsp->walk_addr;
705 size_t nelems;
706 size_t hsize;
707
708 if (addr == 0) {
709 mdb_warn("umem_hash doesn't support global walks\n");
710 return (WALK_ERR);
711 }
712
713 if (mdb_vread(&c, sizeof (c), addr) == -1) {
714 mdb_warn("couldn't read cache at addr %p", addr);
715 return (WALK_ERR);
716 }
717
718 if (!(c.cache_flags & UMF_HASH)) {
719 mdb_warn("cache %p doesn't have a hash table\n", addr);
720 return (WALK_DONE); /* nothing to do */
721 }
722
723 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
724 umhw->umhw_cur.bc_next = NULL;
725 umhw->umhw_pos = 0;
726
727 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
728 hsize = nelems * sizeof (uintptr_t);
729 haddr = (uintptr_t)c.cache_hash_table;
730
731 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
732 if (mdb_vread(hash, hsize, haddr) == -1) {
733 mdb_warn("failed to read hash table at %p", haddr);
734 mdb_free(hash, hsize);
735 mdb_free(umhw, sizeof (umem_hash_walk_t));
736 return (WALK_ERR);
737 }
738
739 wsp->walk_data = umhw;
740
741 return (WALK_NEXT);
742 }
743
744 int
umem_hash_walk_step(mdb_walk_state_t * wsp)745 umem_hash_walk_step(mdb_walk_state_t *wsp)
746 {
747 umem_hash_walk_t *umhw = wsp->walk_data;
748 uintptr_t addr = 0;
749
750 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == 0) {
751 while (umhw->umhw_pos < umhw->umhw_nelems) {
752 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != 0)
753 break;
754 }
755 }
756 if (addr == 0)
757 return (WALK_DONE);
758
759 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
760 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
761 return (WALK_ERR);
762 }
763
764 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
765 }
766
767 void
umem_hash_walk_fini(mdb_walk_state_t * wsp)768 umem_hash_walk_fini(mdb_walk_state_t *wsp)
769 {
770 umem_hash_walk_t *umhw = wsp->walk_data;
771
772 if (umhw == NULL)
773 return;
774
775 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
776 mdb_free(umhw, sizeof (umem_hash_walk_t));
777 }
778
779 /*
780 * Find the address of the bufctl structure for the address 'buf' in cache
781 * 'cp', which is at address caddr, and place it in *out.
782 */
783 static int
umem_hash_lookup(umem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)784 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
785 {
786 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
787 umem_bufctl_t *bcp;
788 umem_bufctl_t bc;
789
790 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
791 mdb_warn("unable to read hash bucket for %p in cache %p",
792 buf, caddr);
793 return (-1);
794 }
795
796 while (bcp != NULL) {
797 if (mdb_vread(&bc, sizeof (umem_bufctl_t),
798 (uintptr_t)bcp) == -1) {
799 mdb_warn("unable to read bufctl at %p", bcp);
800 return (-1);
801 }
802 if (bc.bc_addr == buf) {
803 *out = (uintptr_t)bcp;
804 return (0);
805 }
806 bcp = bc.bc_next;
807 }
808
809 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
810 return (-1);
811 }
812
813 int
umem_get_magsize(const umem_cache_t * cp)814 umem_get_magsize(const umem_cache_t *cp)
815 {
816 uintptr_t addr = (uintptr_t)cp->cache_magtype;
817 GElf_Sym mt_sym;
818 umem_magtype_t mt;
819 int res;
820
821 /*
822 * if cpu 0 has a non-zero magsize, it must be correct. caches
823 * with UMF_NOMAGAZINE have disabled their magazine layers, so
824 * it is okay to return 0 for them.
825 */
826 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
827 (cp->cache_flags & UMF_NOMAGAZINE))
828 return (res);
829
830 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
831 mdb_warn("unable to read 'umem_magtype'");
832 } else if (addr < mt_sym.st_value ||
833 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
834 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
835 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
836 cp->cache_name, addr);
837 return (0);
838 }
839 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
840 mdb_warn("unable to read magtype at %a", addr);
841 return (0);
842 }
843 return (mt.mt_magsize);
844 }
845
846 /*ARGSUSED*/
847 static int
umem_estimate_slab(uintptr_t addr,const umem_slab_t * sp,size_t * est)848 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
849 {
850 *est -= (sp->slab_chunks - sp->slab_refcnt);
851
852 return (WALK_NEXT);
853 }
854
855 /*
856 * Returns an upper bound on the number of allocated buffers in a given
857 * cache.
858 */
859 size_t
umem_estimate_allocated(uintptr_t addr,const umem_cache_t * cp)860 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
861 {
862 int magsize;
863 size_t cache_est;
864
865 cache_est = cp->cache_buftotal;
866
867 (void) mdb_pwalk("umem_slab_partial",
868 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
869
870 if ((magsize = umem_get_magsize(cp)) != 0) {
871 size_t mag_est = cp->cache_full.ml_total * magsize;
872
873 if (cache_est >= mag_est) {
874 cache_est -= mag_est;
875 } else {
876 mdb_warn("cache %p's magazine layer holds more buffers "
877 "than the slab layer.\n", addr);
878 }
879 }
880 return (cache_est);
881 }
882
883 #define READMAG_ROUNDS(rounds) { \
884 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
885 mdb_warn("couldn't read magazine at %p", ump); \
886 goto fail; \
887 } \
888 for (i = 0; i < rounds; i++) { \
889 maglist[magcnt++] = mp->mag_round[i]; \
890 if (magcnt == magmax) { \
891 mdb_warn("%d magazines exceeds fudge factor\n", \
892 magcnt); \
893 goto fail; \
894 } \
895 } \
896 }
897
898 static int
umem_read_magazines(umem_cache_t * cp,uintptr_t addr,void *** maglistp,size_t * magcntp,size_t * magmaxp)899 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
900 void ***maglistp, size_t *magcntp, size_t *magmaxp)
901 {
902 umem_magazine_t *ump, *mp;
903 void **maglist = NULL;
904 int i, cpu;
905 size_t magsize, magmax, magbsize;
906 size_t magcnt = 0;
907
908 /*
909 * Read the magtype out of the cache, after verifying the pointer's
910 * correctness.
911 */
912 magsize = umem_get_magsize(cp);
913 if (magsize == 0) {
914 *maglistp = NULL;
915 *magcntp = 0;
916 *magmaxp = 0;
917 return (0);
918 }
919
920 /*
921 * There are several places where we need to go buffer hunting:
922 * the per-CPU loaded magazine, the per-CPU spare full magazine,
923 * and the full magazine list in the depot.
924 *
925 * For an upper bound on the number of buffers in the magazine
926 * layer, we have the number of magazines on the cache_full
927 * list plus at most two magazines per CPU (the loaded and the
928 * spare). Toss in 100 magazines as a fudge factor in case this
929 * is live (the number "100" comes from the same fudge factor in
930 * crash(8)).
931 */
932 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
933 magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
934
935 if (magbsize >= PAGESIZE / 2) {
936 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
937 addr, magbsize);
938 return (-1);
939 }
940
941 maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
942 mp = mdb_alloc(magbsize, UM_SLEEP);
943 if (mp == NULL || maglist == NULL)
944 goto fail;
945
946 /*
947 * First up: the magazines in the depot (i.e. on the cache_full list).
948 */
949 for (ump = cp->cache_full.ml_list; ump != NULL; ) {
950 READMAG_ROUNDS(magsize);
951 ump = mp->mag_next;
952
953 if (ump == cp->cache_full.ml_list)
954 break; /* cache_full list loop detected */
955 }
956
957 dprintf(("cache_full list done\n"));
958
959 /*
960 * Now whip through the CPUs, snagging the loaded magazines
961 * and full spares.
962 */
963 for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
964 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
965
966 dprintf(("reading cpu cache %p\n",
967 (uintptr_t)ccp - (uintptr_t)cp + addr));
968
969 if (ccp->cc_rounds > 0 &&
970 (ump = ccp->cc_loaded) != NULL) {
971 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
972 READMAG_ROUNDS(ccp->cc_rounds);
973 }
974
975 if (ccp->cc_prounds > 0 &&
976 (ump = ccp->cc_ploaded) != NULL) {
977 dprintf(("reading %d previously loaded rounds\n",
978 ccp->cc_prounds));
979 READMAG_ROUNDS(ccp->cc_prounds);
980 }
981 }
982
983 dprintf(("magazine layer: %d buffers\n", magcnt));
984
985 mdb_free(mp, magbsize);
986
987 *maglistp = maglist;
988 *magcntp = magcnt;
989 *magmaxp = magmax;
990
991 return (0);
992
993 fail:
994 if (mp)
995 mdb_free(mp, magbsize);
996 if (maglist)
997 mdb_free(maglist, magmax * sizeof (void *));
998
999 return (-1);
1000 }
1001
1002 typedef struct umem_read_ptc_walk {
1003 void **urpw_buf;
1004 size_t urpw_cnt;
1005 size_t urpw_max;
1006 } umem_read_ptc_walk_t;
1007
1008 /*ARGSUSED*/
1009 static int
umem_read_ptc_walk_buf(uintptr_t addr,const void * ignored,umem_read_ptc_walk_t * urpw)1010 umem_read_ptc_walk_buf(uintptr_t addr,
1011 const void *ignored, umem_read_ptc_walk_t *urpw)
1012 {
1013 if (urpw->urpw_cnt == urpw->urpw_max) {
1014 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1015 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1016
1017 if (nmax > 1) {
1018 size_t osize = urpw->urpw_max * sizeof (void *);
1019 bcopy(urpw->urpw_buf, new, osize);
1020 mdb_free(urpw->urpw_buf, osize);
1021 }
1022
1023 urpw->urpw_buf = new;
1024 urpw->urpw_max = nmax;
1025 }
1026
1027 urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1028
1029 return (WALK_NEXT);
1030 }
1031
1032 static int
umem_read_ptc(umem_cache_t * cp,void *** buflistp,size_t * bufcntp,size_t * bufmaxp)1033 umem_read_ptc(umem_cache_t *cp,
1034 void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1035 {
1036 umem_read_ptc_walk_t urpw;
1037 char walk[60];
1038 int rval;
1039
1040 if (!(cp->cache_flags & UMF_PTC))
1041 return (0);
1042
1043 (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1044 cp->cache_bufsize);
1045
1046 urpw.urpw_buf = *buflistp;
1047 urpw.urpw_cnt = *bufcntp;
1048 urpw.urpw_max = *bufmaxp;
1049
1050 if ((rval = mdb_walk(walk,
1051 (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1052 mdb_warn("couldn't walk %s", walk);
1053 }
1054
1055 *buflistp = urpw.urpw_buf;
1056 *bufcntp = urpw.urpw_cnt;
1057 *bufmaxp = urpw.urpw_max;
1058
1059 return (rval);
1060 }
1061
1062 static int
umem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1063 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1064 {
1065 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1066 }
1067
1068 static int
bufctl_walk_callback(umem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1069 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1070 {
1071 umem_bufctl_audit_t *b;
1072 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1073
1074 /*
1075 * if UMF_AUDIT is not set, we know that we're looking at a
1076 * umem_bufctl_t.
1077 */
1078 if (!(cp->cache_flags & UMF_AUDIT) ||
1079 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1080 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1081 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1082 mdb_warn("unable to read bufctl at %p", buf);
1083 return (WALK_ERR);
1084 }
1085 }
1086
1087 return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1088 }
1089
1090 typedef struct umem_walk {
1091 int umw_type;
1092
1093 uintptr_t umw_addr; /* cache address */
1094 umem_cache_t *umw_cp;
1095 size_t umw_csize;
1096
1097 /*
1098 * magazine layer
1099 */
1100 void **umw_maglist;
1101 size_t umw_max;
1102 size_t umw_count;
1103 size_t umw_pos;
1104
1105 /*
1106 * slab layer
1107 */
1108 char *umw_valid; /* to keep track of freed buffers */
1109 char *umw_ubase; /* buffer for slab data */
1110 } umem_walk_t;
1111
1112 static int
umem_walk_init_common(mdb_walk_state_t * wsp,int type)1113 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1114 {
1115 umem_walk_t *umw;
1116 int csize;
1117 umem_cache_t *cp;
1118 size_t vm_quantum;
1119
1120 size_t magmax, magcnt;
1121 void **maglist = NULL;
1122 uint_t chunksize = 1, slabsize = 1;
1123 int status = WALK_ERR;
1124 uintptr_t addr = wsp->walk_addr;
1125 const char *layered;
1126
1127 type &= ~UM_HASH;
1128
1129 if (addr == 0) {
1130 mdb_warn("umem walk doesn't support global walks\n");
1131 return (WALK_ERR);
1132 }
1133
1134 dprintf(("walking %p\n", addr));
1135
1136 /*
1137 * The number of "cpus" determines how large the cache is.
1138 */
1139 csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1140 cp = mdb_alloc(csize, UM_SLEEP);
1141
1142 if (mdb_vread(cp, csize, addr) == -1) {
1143 mdb_warn("couldn't read cache at addr %p", addr);
1144 goto out2;
1145 }
1146
1147 /*
1148 * It's easy for someone to hand us an invalid cache address.
1149 * Unfortunately, it is hard for this walker to survive an
1150 * invalid cache cleanly. So we make sure that:
1151 *
1152 * 1. the vmem arena for the cache is readable,
1153 * 2. the vmem arena's quantum is a power of 2,
1154 * 3. our slabsize is a multiple of the quantum, and
1155 * 4. our chunksize is >0 and less than our slabsize.
1156 */
1157 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1158 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1159 vm_quantum == 0 ||
1160 (vm_quantum & (vm_quantum - 1)) != 0 ||
1161 cp->cache_slabsize < vm_quantum ||
1162 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1163 cp->cache_chunksize == 0 ||
1164 cp->cache_chunksize > cp->cache_slabsize) {
1165 mdb_warn("%p is not a valid umem_cache_t\n", addr);
1166 goto out2;
1167 }
1168
1169 dprintf(("buf total is %d\n", cp->cache_buftotal));
1170
1171 if (cp->cache_buftotal == 0) {
1172 mdb_free(cp, csize);
1173 return (WALK_DONE);
1174 }
1175
1176 /*
1177 * If they ask for bufctls, but it's a small-slab cache,
1178 * there is nothing to report.
1179 */
1180 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1181 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1182 cp->cache_flags));
1183 mdb_free(cp, csize);
1184 return (WALK_DONE);
1185 }
1186
1187 /*
1188 * Read in the contents of the magazine layer
1189 */
1190 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1191 goto out2;
1192
1193 /*
1194 * Read in the contents of the per-thread caches, if any
1195 */
1196 if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1197 goto out2;
1198
1199 /*
1200 * We have all of the buffers from the magazines and from the
1201 * per-thread cache (if any); if we are walking allocated buffers,
1202 * sort them so we can bsearch them later.
1203 */
1204 if (type & UM_ALLOCATED)
1205 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1206
1207 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1208
1209 umw->umw_type = type;
1210 umw->umw_addr = addr;
1211 umw->umw_cp = cp;
1212 umw->umw_csize = csize;
1213 umw->umw_maglist = maglist;
1214 umw->umw_max = magmax;
1215 umw->umw_count = magcnt;
1216 umw->umw_pos = 0;
1217
1218 /*
1219 * When walking allocated buffers in a UMF_HASH cache, we walk the
1220 * hash table instead of the slab layer.
1221 */
1222 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1223 layered = "umem_hash";
1224
1225 umw->umw_type |= UM_HASH;
1226 } else {
1227 /*
1228 * If we are walking freed buffers, we only need the
1229 * magazine layer plus the partially allocated slabs.
1230 * To walk allocated buffers, we need all of the slabs.
1231 */
1232 if (type & UM_ALLOCATED)
1233 layered = "umem_slab";
1234 else
1235 layered = "umem_slab_partial";
1236
1237 /*
1238 * for small-slab caches, we read in the entire slab. For
1239 * freed buffers, we can just walk the freelist. For
1240 * allocated buffers, we use a 'valid' array to track
1241 * the freed buffers.
1242 */
1243 if (!(cp->cache_flags & UMF_HASH)) {
1244 chunksize = cp->cache_chunksize;
1245 slabsize = cp->cache_slabsize;
1246
1247 umw->umw_ubase = mdb_alloc(slabsize +
1248 sizeof (umem_bufctl_t), UM_SLEEP);
1249
1250 if (type & UM_ALLOCATED)
1251 umw->umw_valid =
1252 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1253 }
1254 }
1255
1256 status = WALK_NEXT;
1257
1258 if (mdb_layered_walk(layered, wsp) == -1) {
1259 mdb_warn("unable to start layered '%s' walk", layered);
1260 status = WALK_ERR;
1261 }
1262
1263 out1:
1264 if (status == WALK_ERR) {
1265 if (umw->umw_valid)
1266 mdb_free(umw->umw_valid, slabsize / chunksize);
1267
1268 if (umw->umw_ubase)
1269 mdb_free(umw->umw_ubase, slabsize +
1270 sizeof (umem_bufctl_t));
1271
1272 if (umw->umw_maglist)
1273 mdb_free(umw->umw_maglist, umw->umw_max *
1274 sizeof (uintptr_t));
1275
1276 mdb_free(umw, sizeof (umem_walk_t));
1277 wsp->walk_data = NULL;
1278 }
1279
1280 out2:
1281 if (status == WALK_ERR)
1282 mdb_free(cp, csize);
1283
1284 return (status);
1285 }
1286
1287 int
umem_walk_step(mdb_walk_state_t * wsp)1288 umem_walk_step(mdb_walk_state_t *wsp)
1289 {
1290 umem_walk_t *umw = wsp->walk_data;
1291 int type = umw->umw_type;
1292 umem_cache_t *cp = umw->umw_cp;
1293
1294 void **maglist = umw->umw_maglist;
1295 int magcnt = umw->umw_count;
1296
1297 uintptr_t chunksize, slabsize;
1298 uintptr_t addr;
1299 const umem_slab_t *sp;
1300 const umem_bufctl_t *bcp;
1301 umem_bufctl_t bc;
1302
1303 int chunks;
1304 char *kbase;
1305 void *buf;
1306 int i, ret;
1307
1308 char *valid, *ubase;
1309
1310 /*
1311 * first, handle the 'umem_hash' layered walk case
1312 */
1313 if (type & UM_HASH) {
1314 /*
1315 * We have a buffer which has been allocated out of the
1316 * global layer. We need to make sure that it's not
1317 * actually sitting in a magazine before we report it as
1318 * an allocated buffer.
1319 */
1320 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1321
1322 if (magcnt > 0 &&
1323 bsearch(&buf, maglist, magcnt, sizeof (void *),
1324 addrcmp) != NULL)
1325 return (WALK_NEXT);
1326
1327 if (type & UM_BUFCTL)
1328 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1329
1330 return (umem_walk_callback(wsp, (uintptr_t)buf));
1331 }
1332
1333 ret = WALK_NEXT;
1334
1335 addr = umw->umw_addr;
1336
1337 /*
1338 * If we're walking freed buffers, report everything in the
1339 * magazine layer before processing the first slab.
1340 */
1341 if ((type & UM_FREE) && magcnt != 0) {
1342 umw->umw_count = 0; /* only do this once */
1343 for (i = 0; i < magcnt; i++) {
1344 buf = maglist[i];
1345
1346 if (type & UM_BUFCTL) {
1347 uintptr_t out;
1348
1349 if (cp->cache_flags & UMF_BUFTAG) {
1350 umem_buftag_t *btp;
1351 umem_buftag_t tag;
1352
1353 /* LINTED - alignment */
1354 btp = UMEM_BUFTAG(cp, buf);
1355 if (mdb_vread(&tag, sizeof (tag),
1356 (uintptr_t)btp) == -1) {
1357 mdb_warn("reading buftag for "
1358 "%p at %p", buf, btp);
1359 continue;
1360 }
1361 out = (uintptr_t)tag.bt_bufctl;
1362 } else {
1363 if (umem_hash_lookup(cp, addr, buf,
1364 &out) == -1)
1365 continue;
1366 }
1367 ret = bufctl_walk_callback(cp, wsp, out);
1368 } else {
1369 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1370 }
1371
1372 if (ret != WALK_NEXT)
1373 return (ret);
1374 }
1375 }
1376
1377 /*
1378 * Handle the buffers in the current slab
1379 */
1380 chunksize = cp->cache_chunksize;
1381 slabsize = cp->cache_slabsize;
1382
1383 sp = wsp->walk_layer;
1384 chunks = sp->slab_chunks;
1385 kbase = sp->slab_base;
1386
1387 dprintf(("kbase is %p\n", kbase));
1388
1389 if (!(cp->cache_flags & UMF_HASH)) {
1390 valid = umw->umw_valid;
1391 ubase = umw->umw_ubase;
1392
1393 if (mdb_vread(ubase, chunks * chunksize,
1394 (uintptr_t)kbase) == -1) {
1395 mdb_warn("failed to read slab contents at %p", kbase);
1396 return (WALK_ERR);
1397 }
1398
1399 /*
1400 * Set up the valid map as fully allocated -- we'll punch
1401 * out the freelist.
1402 */
1403 if (type & UM_ALLOCATED)
1404 (void) memset(valid, 1, chunks);
1405 } else {
1406 valid = NULL;
1407 ubase = NULL;
1408 }
1409
1410 /*
1411 * walk the slab's freelist
1412 */
1413 bcp = sp->slab_head;
1414
1415 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1416
1417 /*
1418 * since we could be in the middle of allocating a buffer,
1419 * our refcnt could be one higher than it aught. So we
1420 * check one further on the freelist than the count allows.
1421 */
1422 for (i = sp->slab_refcnt; i <= chunks; i++) {
1423 uint_t ndx;
1424
1425 dprintf(("bcp is %p\n", bcp));
1426
1427 if (bcp == NULL) {
1428 if (i == chunks)
1429 break;
1430 mdb_warn(
1431 "slab %p in cache %p freelist too short by %d\n",
1432 sp, addr, chunks - i);
1433 break;
1434 }
1435
1436 if (cp->cache_flags & UMF_HASH) {
1437 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1438 mdb_warn("failed to read bufctl ptr at %p",
1439 bcp);
1440 break;
1441 }
1442 buf = bc.bc_addr;
1443 } else {
1444 /*
1445 * Otherwise the buffer is (or should be) in the slab
1446 * that we've read in; determine its offset in the
1447 * slab, validate that it's not corrupt, and add to
1448 * our base address to find the umem_bufctl_t. (Note
1449 * that we don't need to add the size of the bufctl
1450 * to our offset calculation because of the slop that's
1451 * allocated for the buffer at ubase.)
1452 */
1453 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1454
1455 if (offs > chunks * chunksize) {
1456 mdb_warn("found corrupt bufctl ptr %p"
1457 " in slab %p in cache %p\n", bcp,
1458 wsp->walk_addr, addr);
1459 break;
1460 }
1461
1462 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1463 buf = UMEM_BUF(cp, bcp);
1464 }
1465
1466 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1467
1468 if (ndx > slabsize / cp->cache_bufsize) {
1469 /*
1470 * This is very wrong; we have managed to find
1471 * a buffer in the slab which shouldn't
1472 * actually be here. Emit a warning, and
1473 * try to continue.
1474 */
1475 mdb_warn("buf %p is out of range for "
1476 "slab %p, cache %p\n", buf, sp, addr);
1477 } else if (type & UM_ALLOCATED) {
1478 /*
1479 * we have found a buffer on the slab's freelist;
1480 * clear its entry
1481 */
1482 valid[ndx] = 0;
1483 } else {
1484 /*
1485 * Report this freed buffer
1486 */
1487 if (type & UM_BUFCTL) {
1488 ret = bufctl_walk_callback(cp, wsp,
1489 (uintptr_t)bcp);
1490 } else {
1491 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1492 }
1493 if (ret != WALK_NEXT)
1494 return (ret);
1495 }
1496
1497 bcp = bc.bc_next;
1498 }
1499
1500 if (bcp != NULL) {
1501 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1502 sp, addr, bcp));
1503 }
1504
1505 /*
1506 * If we are walking freed buffers, the loop above handled reporting
1507 * them.
1508 */
1509 if (type & UM_FREE)
1510 return (WALK_NEXT);
1511
1512 if (type & UM_BUFCTL) {
1513 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1514 "cache %p\n", addr);
1515 return (WALK_ERR);
1516 }
1517
1518 /*
1519 * Report allocated buffers, skipping buffers in the magazine layer.
1520 * We only get this far for small-slab caches.
1521 */
1522 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1523 buf = (char *)kbase + i * chunksize;
1524
1525 if (!valid[i])
1526 continue; /* on slab freelist */
1527
1528 if (magcnt > 0 &&
1529 bsearch(&buf, maglist, magcnt, sizeof (void *),
1530 addrcmp) != NULL)
1531 continue; /* in magazine layer */
1532
1533 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1534 }
1535 return (ret);
1536 }
1537
1538 void
umem_walk_fini(mdb_walk_state_t * wsp)1539 umem_walk_fini(mdb_walk_state_t *wsp)
1540 {
1541 umem_walk_t *umw = wsp->walk_data;
1542 uintptr_t chunksize;
1543 uintptr_t slabsize;
1544
1545 if (umw == NULL)
1546 return;
1547
1548 if (umw->umw_maglist != NULL)
1549 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1550
1551 chunksize = umw->umw_cp->cache_chunksize;
1552 slabsize = umw->umw_cp->cache_slabsize;
1553
1554 if (umw->umw_valid != NULL)
1555 mdb_free(umw->umw_valid, slabsize / chunksize);
1556 if (umw->umw_ubase != NULL)
1557 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1558
1559 mdb_free(umw->umw_cp, umw->umw_csize);
1560 mdb_free(umw, sizeof (umem_walk_t));
1561 }
1562
1563 /*ARGSUSED*/
1564 static int
umem_walk_all(uintptr_t addr,const umem_cache_t * c,mdb_walk_state_t * wsp)1565 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1566 {
1567 /*
1568 * Buffers allocated from NOTOUCH caches can also show up as freed
1569 * memory in other caches. This can be a little confusing, so we
1570 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1571 * that "::walk umem" and "::walk freemem" yield disjoint output).
1572 */
1573 if (c->cache_cflags & UMC_NOTOUCH)
1574 return (WALK_NEXT);
1575
1576 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1577 wsp->walk_cbdata, addr) == -1)
1578 return (WALK_DONE);
1579
1580 return (WALK_NEXT);
1581 }
1582
1583 #define UMEM_WALK_ALL(name, wsp) { \
1584 wsp->walk_data = (name); \
1585 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1586 return (WALK_ERR); \
1587 return (WALK_DONE); \
1588 }
1589
1590 int
umem_walk_init(mdb_walk_state_t * wsp)1591 umem_walk_init(mdb_walk_state_t *wsp)
1592 {
1593 if (wsp->walk_arg != NULL)
1594 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1595
1596 if (wsp->walk_addr == 0)
1597 UMEM_WALK_ALL("umem", wsp);
1598 return (umem_walk_init_common(wsp, UM_ALLOCATED));
1599 }
1600
1601 int
bufctl_walk_init(mdb_walk_state_t * wsp)1602 bufctl_walk_init(mdb_walk_state_t *wsp)
1603 {
1604 if (wsp->walk_addr == 0)
1605 UMEM_WALK_ALL("bufctl", wsp);
1606 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1607 }
1608
1609 int
freemem_walk_init(mdb_walk_state_t * wsp)1610 freemem_walk_init(mdb_walk_state_t *wsp)
1611 {
1612 if (wsp->walk_addr == 0)
1613 UMEM_WALK_ALL("freemem", wsp);
1614 return (umem_walk_init_common(wsp, UM_FREE));
1615 }
1616
1617 int
freectl_walk_init(mdb_walk_state_t * wsp)1618 freectl_walk_init(mdb_walk_state_t *wsp)
1619 {
1620 if (wsp->walk_addr == 0)
1621 UMEM_WALK_ALL("freectl", wsp);
1622 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1623 }
1624
1625 typedef struct bufctl_history_walk {
1626 void *bhw_next;
1627 umem_cache_t *bhw_cache;
1628 umem_slab_t *bhw_slab;
1629 hrtime_t bhw_timestamp;
1630 } bufctl_history_walk_t;
1631
1632 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1633 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1634 {
1635 bufctl_history_walk_t *bhw;
1636 umem_bufctl_audit_t bc;
1637 umem_bufctl_audit_t bcn;
1638
1639 if (wsp->walk_addr == 0) {
1640 mdb_warn("bufctl_history walk doesn't support global walks\n");
1641 return (WALK_ERR);
1642 }
1643
1644 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1645 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1646 return (WALK_ERR);
1647 }
1648
1649 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1650 bhw->bhw_timestamp = 0;
1651 bhw->bhw_cache = bc.bc_cache;
1652 bhw->bhw_slab = bc.bc_slab;
1653
1654 /*
1655 * sometimes the first log entry matches the base bufctl; in that
1656 * case, skip the base bufctl.
1657 */
1658 if (bc.bc_lastlog != NULL &&
1659 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1660 bc.bc_addr == bcn.bc_addr &&
1661 bc.bc_cache == bcn.bc_cache &&
1662 bc.bc_slab == bcn.bc_slab &&
1663 bc.bc_timestamp == bcn.bc_timestamp &&
1664 bc.bc_thread == bcn.bc_thread)
1665 bhw->bhw_next = bc.bc_lastlog;
1666 else
1667 bhw->bhw_next = (void *)wsp->walk_addr;
1668
1669 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1670 wsp->walk_data = bhw;
1671
1672 return (WALK_NEXT);
1673 }
1674
1675 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1676 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1677 {
1678 bufctl_history_walk_t *bhw = wsp->walk_data;
1679 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1680 uintptr_t baseaddr = wsp->walk_addr;
1681 umem_bufctl_audit_t *b;
1682 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1683
1684 if (addr == 0)
1685 return (WALK_DONE);
1686
1687 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1688 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1689 return (WALK_ERR);
1690 }
1691
1692 /*
1693 * The bufctl is only valid if the address, cache, and slab are
1694 * correct. We also check that the timestamp is decreasing, to
1695 * prevent infinite loops.
1696 */
1697 if ((uintptr_t)b->bc_addr != baseaddr ||
1698 b->bc_cache != bhw->bhw_cache ||
1699 b->bc_slab != bhw->bhw_slab ||
1700 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1701 return (WALK_DONE);
1702
1703 bhw->bhw_next = b->bc_lastlog;
1704 bhw->bhw_timestamp = b->bc_timestamp;
1705
1706 return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1707 }
1708
1709 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1710 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1711 {
1712 bufctl_history_walk_t *bhw = wsp->walk_data;
1713
1714 mdb_free(bhw, sizeof (*bhw));
1715 }
1716
1717 typedef struct umem_log_walk {
1718 umem_bufctl_audit_t *ulw_base;
1719 umem_bufctl_audit_t **ulw_sorted;
1720 umem_log_header_t ulw_lh;
1721 size_t ulw_size;
1722 size_t ulw_maxndx;
1723 size_t ulw_ndx;
1724 } umem_log_walk_t;
1725
1726 int
umem_log_walk_init(mdb_walk_state_t * wsp)1727 umem_log_walk_init(mdb_walk_state_t *wsp)
1728 {
1729 uintptr_t lp = wsp->walk_addr;
1730 umem_log_walk_t *ulw;
1731 umem_log_header_t *lhp;
1732 int maxndx, i, j, k;
1733
1734 /*
1735 * By default (global walk), walk the umem_transaction_log. Otherwise
1736 * read the log whose umem_log_header_t is stored at walk_addr.
1737 */
1738 if (lp == 0 && umem_readvar(&lp, "umem_transaction_log") == -1) {
1739 mdb_warn("failed to read 'umem_transaction_log'");
1740 return (WALK_ERR);
1741 }
1742
1743 if (lp == 0) {
1744 mdb_warn("log is disabled\n");
1745 return (WALK_ERR);
1746 }
1747
1748 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1749 lhp = &ulw->ulw_lh;
1750
1751 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1752 mdb_warn("failed to read log header at %p", lp);
1753 mdb_free(ulw, sizeof (umem_log_walk_t));
1754 return (WALK_ERR);
1755 }
1756
1757 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1758 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1759 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1760
1761 if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1762 (uintptr_t)lhp->lh_base) == -1) {
1763 mdb_warn("failed to read log at base %p", lhp->lh_base);
1764 mdb_free(ulw->ulw_base, ulw->ulw_size);
1765 mdb_free(ulw, sizeof (umem_log_walk_t));
1766 return (WALK_ERR);
1767 }
1768
1769 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1770 sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1771
1772 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1773 caddr_t chunk = (caddr_t)
1774 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1775
1776 for (j = 0; j < maxndx; j++) {
1777 /* LINTED align */
1778 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1779 chunk += UMEM_BUFCTL_AUDIT_SIZE;
1780 }
1781 }
1782
1783 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1784 (int(*)(const void *, const void *))bufctlcmp);
1785
1786 ulw->ulw_maxndx = k;
1787 wsp->walk_data = ulw;
1788
1789 return (WALK_NEXT);
1790 }
1791
1792 int
umem_log_walk_step(mdb_walk_state_t * wsp)1793 umem_log_walk_step(mdb_walk_state_t *wsp)
1794 {
1795 umem_log_walk_t *ulw = wsp->walk_data;
1796 umem_bufctl_audit_t *bcp;
1797
1798 if (ulw->ulw_ndx == ulw->ulw_maxndx)
1799 return (WALK_DONE);
1800
1801 bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1802
1803 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1804 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1805 }
1806
1807 void
umem_log_walk_fini(mdb_walk_state_t * wsp)1808 umem_log_walk_fini(mdb_walk_state_t *wsp)
1809 {
1810 umem_log_walk_t *ulw = wsp->walk_data;
1811
1812 mdb_free(ulw->ulw_base, ulw->ulw_size);
1813 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1814 sizeof (umem_bufctl_audit_t *));
1815 mdb_free(ulw, sizeof (umem_log_walk_t));
1816 }
1817
1818 typedef struct allocdby_bufctl {
1819 uintptr_t abb_addr;
1820 hrtime_t abb_ts;
1821 } allocdby_bufctl_t;
1822
1823 typedef struct allocdby_walk {
1824 const char *abw_walk;
1825 uintptr_t abw_thread;
1826 size_t abw_nbufs;
1827 size_t abw_size;
1828 allocdby_bufctl_t *abw_buf;
1829 size_t abw_ndx;
1830 } allocdby_walk_t;
1831
1832 int
allocdby_walk_bufctl(uintptr_t addr,const umem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1833 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1834 allocdby_walk_t *abw)
1835 {
1836 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1837 return (WALK_NEXT);
1838
1839 if (abw->abw_nbufs == abw->abw_size) {
1840 allocdby_bufctl_t *buf;
1841 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1842
1843 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1844
1845 bcopy(abw->abw_buf, buf, oldsize);
1846 mdb_free(abw->abw_buf, oldsize);
1847
1848 abw->abw_size <<= 1;
1849 abw->abw_buf = buf;
1850 }
1851
1852 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1853 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1854 abw->abw_nbufs++;
1855
1856 return (WALK_NEXT);
1857 }
1858
1859 /*ARGSUSED*/
1860 int
allocdby_walk_cache(uintptr_t addr,const umem_cache_t * c,allocdby_walk_t * abw)1861 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1862 {
1863 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1864 abw, addr) == -1) {
1865 mdb_warn("couldn't walk bufctl for cache %p", addr);
1866 return (WALK_DONE);
1867 }
1868
1869 return (WALK_NEXT);
1870 }
1871
1872 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1873 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1874 {
1875 if (lhs->abb_ts < rhs->abb_ts)
1876 return (1);
1877 if (lhs->abb_ts > rhs->abb_ts)
1878 return (-1);
1879 return (0);
1880 }
1881
1882 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1883 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1884 {
1885 allocdby_walk_t *abw;
1886
1887 if (wsp->walk_addr == 0) {
1888 mdb_warn("allocdby walk doesn't support global walks\n");
1889 return (WALK_ERR);
1890 }
1891
1892 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1893
1894 abw->abw_thread = wsp->walk_addr;
1895 abw->abw_walk = walk;
1896 abw->abw_size = 128; /* something reasonable */
1897 abw->abw_buf =
1898 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1899
1900 wsp->walk_data = abw;
1901
1902 if (mdb_walk("umem_cache",
1903 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1904 mdb_warn("couldn't walk umem_cache");
1905 allocdby_walk_fini(wsp);
1906 return (WALK_ERR);
1907 }
1908
1909 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1910 (int(*)(const void *, const void *))allocdby_cmp);
1911
1912 return (WALK_NEXT);
1913 }
1914
1915 int
allocdby_walk_init(mdb_walk_state_t * wsp)1916 allocdby_walk_init(mdb_walk_state_t *wsp)
1917 {
1918 return (allocdby_walk_init_common(wsp, "bufctl"));
1919 }
1920
1921 int
freedby_walk_init(mdb_walk_state_t * wsp)1922 freedby_walk_init(mdb_walk_state_t *wsp)
1923 {
1924 return (allocdby_walk_init_common(wsp, "freectl"));
1925 }
1926
1927 int
allocdby_walk_step(mdb_walk_state_t * wsp)1928 allocdby_walk_step(mdb_walk_state_t *wsp)
1929 {
1930 allocdby_walk_t *abw = wsp->walk_data;
1931 uintptr_t addr;
1932 umem_bufctl_audit_t *bcp;
1933 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1934
1935 if (abw->abw_ndx == abw->abw_nbufs)
1936 return (WALK_DONE);
1937
1938 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1939
1940 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1941 mdb_warn("couldn't read bufctl at %p", addr);
1942 return (WALK_DONE);
1943 }
1944
1945 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1946 }
1947
1948 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1949 allocdby_walk_fini(mdb_walk_state_t *wsp)
1950 {
1951 allocdby_walk_t *abw = wsp->walk_data;
1952
1953 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1954 mdb_free(abw, sizeof (allocdby_walk_t));
1955 }
1956
1957 /*ARGSUSED*/
1958 int
allocdby_walk(uintptr_t addr,const umem_bufctl_audit_t * bcp,void * ignored)1959 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1960 {
1961 char c[MDB_SYM_NAMLEN];
1962 GElf_Sym sym;
1963 int i;
1964
1965 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1966 for (i = 0; i < bcp->bc_depth; i++) {
1967 if (mdb_lookup_by_addr(bcp->bc_stack[i],
1968 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1969 continue;
1970 if (is_umem_sym(c, "umem_"))
1971 continue;
1972 mdb_printf("%s+0x%lx",
1973 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1974 break;
1975 }
1976 mdb_printf("\n");
1977
1978 return (WALK_NEXT);
1979 }
1980
1981 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)1982 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1983 {
1984 if (!(flags & DCMD_ADDRSPEC))
1985 return (DCMD_USAGE);
1986
1987 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1988
1989 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1990 mdb_warn("can't walk '%s' for %p", w, addr);
1991 return (DCMD_ERR);
1992 }
1993
1994 return (DCMD_OK);
1995 }
1996
1997 /*ARGSUSED*/
1998 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1999 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2000 {
2001 return (allocdby_common(addr, flags, "allocdby"));
2002 }
2003
2004 /*ARGSUSED*/
2005 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2006 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2007 {
2008 return (allocdby_common(addr, flags, "freedby"));
2009 }
2010
2011 typedef struct whatis_info {
2012 mdb_whatis_t *wi_w;
2013 const umem_cache_t *wi_cache;
2014 const vmem_t *wi_vmem;
2015 vmem_t *wi_msb_arena;
2016 size_t wi_slab_size;
2017 int wi_slab_found;
2018 uint_t wi_freemem;
2019 } whatis_info_t;
2020
2021 /* call one of our dcmd functions with "-v" and the provided address */
2022 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2023 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2024 {
2025 mdb_arg_t a;
2026 a.a_type = MDB_TYPE_STRING;
2027 a.a_un.a_str = "-v";
2028
2029 mdb_printf(":\n");
2030 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2031 }
2032
2033 static void
whatis_print_umem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2034 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2035 uintptr_t baddr)
2036 {
2037 mdb_whatis_t *w = wi->wi_w;
2038 const umem_cache_t *cp = wi->wi_cache;
2039 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2040
2041 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2042
2043 mdb_whatis_report_object(w, maddr, addr, "");
2044
2045 if (baddr != 0 && !call_printer)
2046 mdb_printf("bufctl %p ", baddr);
2047
2048 mdb_printf("%s from %s",
2049 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2050
2051 if (call_printer && baddr != 0) {
2052 whatis_call_printer(bufctl, baddr);
2053 return;
2054 }
2055 mdb_printf("\n");
2056 }
2057
2058 /*ARGSUSED*/
2059 static int
whatis_walk_umem(uintptr_t addr,void * ignored,whatis_info_t * wi)2060 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2061 {
2062 mdb_whatis_t *w = wi->wi_w;
2063
2064 uintptr_t cur;
2065 size_t size = wi->wi_cache->cache_bufsize;
2066
2067 while (mdb_whatis_match(w, addr, size, &cur))
2068 whatis_print_umem(wi, cur, addr, 0);
2069
2070 return (WHATIS_WALKRET(w));
2071 }
2072
2073 /*ARGSUSED*/
2074 static int
whatis_walk_bufctl(uintptr_t baddr,const umem_bufctl_t * bcp,whatis_info_t * wi)2075 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2076 {
2077 mdb_whatis_t *w = wi->wi_w;
2078
2079 uintptr_t cur;
2080 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2081 size_t size = wi->wi_cache->cache_bufsize;
2082
2083 while (mdb_whatis_match(w, addr, size, &cur))
2084 whatis_print_umem(wi, cur, addr, baddr);
2085
2086 return (WHATIS_WALKRET(w));
2087 }
2088
2089
2090 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2091 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2092 {
2093 mdb_whatis_t *w = wi->wi_w;
2094
2095 size_t size = vs->vs_end - vs->vs_start;
2096 uintptr_t cur;
2097
2098 /* We're not interested in anything but alloc and free segments */
2099 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2100 return (WALK_NEXT);
2101
2102 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2103 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2104
2105 /*
2106 * If we're not printing it seperately, provide the vmem_seg
2107 * pointer if it has a stack trace.
2108 */
2109 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2110 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2111 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2112 mdb_printf("vmem_seg %p ", addr);
2113 }
2114
2115 mdb_printf("%s from %s vmem arena",
2116 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2117 wi->wi_vmem->vm_name);
2118
2119 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2120 whatis_call_printer(vmem_seg, addr);
2121 else
2122 mdb_printf("\n");
2123 }
2124
2125 return (WHATIS_WALKRET(w));
2126 }
2127
2128 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2129 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2130 {
2131 mdb_whatis_t *w = wi->wi_w;
2132 const char *nm = vmem->vm_name;
2133 wi->wi_vmem = vmem;
2134
2135 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2136 mdb_printf("Searching vmem arena %s...\n", nm);
2137
2138 if (mdb_pwalk("vmem_seg",
2139 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2140 mdb_warn("can't walk vmem seg for %p", addr);
2141 return (WALK_NEXT);
2142 }
2143
2144 return (WHATIS_WALKRET(w));
2145 }
2146
2147 /*ARGSUSED*/
2148 static int
whatis_walk_slab(uintptr_t saddr,const umem_slab_t * sp,whatis_info_t * wi)2149 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2150 {
2151 mdb_whatis_t *w = wi->wi_w;
2152
2153 /* It must overlap with the slab data, or it's not interesting */
2154 if (mdb_whatis_overlaps(w,
2155 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2156 wi->wi_slab_found++;
2157 return (WALK_DONE);
2158 }
2159 return (WALK_NEXT);
2160 }
2161
2162 static int
whatis_walk_cache(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2163 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2164 {
2165 mdb_whatis_t *w = wi->wi_w;
2166 char *walk, *freewalk;
2167 mdb_walk_cb_t func;
2168 int do_bufctl;
2169
2170 /* Override the '-b' flag as necessary */
2171 if (!(c->cache_flags & UMF_HASH))
2172 do_bufctl = FALSE; /* no bufctls to walk */
2173 else if (c->cache_flags & UMF_AUDIT)
2174 do_bufctl = TRUE; /* we always want debugging info */
2175 else
2176 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2177
2178 if (do_bufctl) {
2179 walk = "bufctl";
2180 freewalk = "freectl";
2181 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2182 } else {
2183 walk = "umem";
2184 freewalk = "freemem";
2185 func = (mdb_walk_cb_t)whatis_walk_umem;
2186 }
2187
2188 wi->wi_cache = c;
2189
2190 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2191 mdb_printf("Searching %s...\n", c->cache_name);
2192
2193 /*
2194 * If more then two buffers live on each slab, figure out if we're
2195 * interested in anything in any slab before doing the more expensive
2196 * umem/freemem (bufctl/freectl) walkers.
2197 */
2198 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2199 if (!(c->cache_flags & UMF_HASH))
2200 wi->wi_slab_size -= sizeof (umem_slab_t);
2201
2202 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2203 wi->wi_slab_found = 0;
2204 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2205 addr) == -1) {
2206 mdb_warn("can't find umem_slab walker");
2207 return (WALK_DONE);
2208 }
2209 if (wi->wi_slab_found == 0)
2210 return (WALK_NEXT);
2211 }
2212
2213 wi->wi_freemem = FALSE;
2214 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2215 mdb_warn("can't find %s walker", walk);
2216 return (WALK_DONE);
2217 }
2218
2219 if (mdb_whatis_done(w))
2220 return (WALK_DONE);
2221
2222 /*
2223 * We have searched for allocated memory; now search for freed memory.
2224 */
2225 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2226 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2227
2228 wi->wi_freemem = TRUE;
2229
2230 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2231 mdb_warn("can't find %s walker", freewalk);
2232 return (WALK_DONE);
2233 }
2234
2235 return (WHATIS_WALKRET(w));
2236 }
2237
2238 static int
whatis_walk_touch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2239 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2240 {
2241 if (c->cache_arena == wi->wi_msb_arena ||
2242 (c->cache_cflags & UMC_NOTOUCH))
2243 return (WALK_NEXT);
2244
2245 return (whatis_walk_cache(addr, c, wi));
2246 }
2247
2248 static int
whatis_walk_metadata(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2249 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2250 {
2251 if (c->cache_arena != wi->wi_msb_arena)
2252 return (WALK_NEXT);
2253
2254 return (whatis_walk_cache(addr, c, wi));
2255 }
2256
2257 static int
whatis_walk_notouch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2258 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2259 {
2260 if (c->cache_arena == wi->wi_msb_arena ||
2261 !(c->cache_cflags & UMC_NOTOUCH))
2262 return (WALK_NEXT);
2263
2264 return (whatis_walk_cache(addr, c, wi));
2265 }
2266
2267 /*ARGSUSED*/
2268 static int
whatis_run_umem(mdb_whatis_t * w,void * ignored)2269 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2270 {
2271 whatis_info_t wi;
2272
2273 bzero(&wi, sizeof (wi));
2274 wi.wi_w = w;
2275
2276 /* umem's metadata is allocated from the umem_internal_arena */
2277 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2278 mdb_warn("unable to readvar \"umem_internal_arena\"");
2279
2280 /*
2281 * We process umem caches in the following order:
2282 *
2283 * non-UMC_NOTOUCH, non-metadata (typically the most interesting)
2284 * metadata (can be huge with UMF_AUDIT)
2285 * UMC_NOTOUCH, non-metadata (see umem_walk_all())
2286 */
2287 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2288 &wi) == -1 ||
2289 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2290 &wi) == -1 ||
2291 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2292 &wi) == -1) {
2293 mdb_warn("couldn't find umem_cache walker");
2294 return (1);
2295 }
2296 return (0);
2297 }
2298
2299 /*ARGSUSED*/
2300 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2301 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2302 {
2303 whatis_info_t wi;
2304
2305 bzero(&wi, sizeof (wi));
2306 wi.wi_w = w;
2307
2308 if (mdb_walk("vmem_postfix",
2309 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2310 mdb_warn("couldn't find vmem_postfix walker");
2311 return (1);
2312 }
2313 return (0);
2314 }
2315
2316 int
umem_init(void)2317 umem_init(void)
2318 {
2319 mdb_walker_t w = {
2320 "umem_cache", "walk list of umem caches", umem_cache_walk_init,
2321 umem_cache_walk_step, umem_cache_walk_fini
2322 };
2323
2324 if (mdb_add_walker(&w) == -1) {
2325 mdb_warn("failed to add umem_cache walker");
2326 return (-1);
2327 }
2328
2329 if (umem_update_variables() == -1)
2330 return (-1);
2331
2332 /* install a callback so that our variables are always up-to-date */
2333 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2334 umem_statechange_cb(NULL);
2335
2336 /*
2337 * Register our ::whatis callbacks.
2338 */
2339 mdb_whatis_register("umem", whatis_run_umem, NULL,
2340 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2341 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2342 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2343
2344 return (0);
2345 }
2346
2347 typedef struct umem_log_cpu {
2348 uintptr_t umc_low;
2349 uintptr_t umc_high;
2350 } umem_log_cpu_t;
2351
2352 int
umem_log_walk(uintptr_t addr,const umem_bufctl_audit_t * b,umem_log_cpu_t * umc)2353 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2354 {
2355 int i;
2356
2357 for (i = 0; i < umem_max_ncpus; i++) {
2358 if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2359 break;
2360 }
2361
2362 if (i == umem_max_ncpus)
2363 mdb_printf(" ");
2364 else
2365 mdb_printf("%3d", i);
2366
2367 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2368 b->bc_timestamp, b->bc_thread);
2369
2370 return (WALK_NEXT);
2371 }
2372
2373 /*ARGSUSED*/
2374 int
umem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2375 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2376 {
2377 umem_log_header_t lh;
2378 umem_cpu_log_header_t clh;
2379 uintptr_t lhp, clhp;
2380 umem_log_cpu_t *umc;
2381 int i;
2382
2383 if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2384 mdb_warn("failed to read 'umem_transaction_log'");
2385 return (DCMD_ERR);
2386 }
2387
2388 if (lhp == 0) {
2389 mdb_warn("no umem transaction log\n");
2390 return (DCMD_ERR);
2391 }
2392
2393 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2394 mdb_warn("failed to read log header at %p", lhp);
2395 return (DCMD_ERR);
2396 }
2397
2398 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2399
2400 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2401 UM_SLEEP | UM_GC);
2402
2403 for (i = 0; i < umem_max_ncpus; i++) {
2404 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2405 mdb_warn("cannot read cpu %d's log header at %p",
2406 i, clhp);
2407 return (DCMD_ERR);
2408 }
2409
2410 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2411 (uintptr_t)lh.lh_base;
2412 umc[i].umc_high = (uintptr_t)clh.clh_current;
2413
2414 clhp += sizeof (umem_cpu_log_header_t);
2415 }
2416
2417 if (DCMD_HDRSPEC(flags)) {
2418 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2419 "BUFADDR", "TIMESTAMP", "THREAD");
2420 }
2421
2422 /*
2423 * If we have been passed an address, we'll just print out that
2424 * log entry.
2425 */
2426 if (flags & DCMD_ADDRSPEC) {
2427 umem_bufctl_audit_t *bp;
2428 UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2429
2430 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2431 mdb_warn("failed to read bufctl at %p", addr);
2432 return (DCMD_ERR);
2433 }
2434
2435 (void) umem_log_walk(addr, bp, umc);
2436
2437 return (DCMD_OK);
2438 }
2439
2440 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2441 mdb_warn("can't find umem log walker");
2442 return (DCMD_ERR);
2443 }
2444
2445 return (DCMD_OK);
2446 }
2447
2448 typedef struct bufctl_history_cb {
2449 int bhc_flags;
2450 int bhc_argc;
2451 const mdb_arg_t *bhc_argv;
2452 int bhc_ret;
2453 } bufctl_history_cb_t;
2454
2455 /*ARGSUSED*/
2456 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2457 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2458 {
2459 bufctl_history_cb_t *bhc = arg;
2460
2461 bhc->bhc_ret =
2462 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2463
2464 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2465
2466 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2467 }
2468
2469 void
bufctl_help(void)2470 bufctl_help(void)
2471 {
2472 mdb_printf("%s\n",
2473 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2474 mdb_dec_indent(2);
2475 mdb_printf("%<b>OPTIONS%</b>\n");
2476 mdb_inc_indent(2);
2477 mdb_printf("%s",
2478 " -v Display the full content of the bufctl, including its stack trace\n"
2479 " -h retrieve the bufctl's transaction history, if available\n"
2480 " -a addr\n"
2481 " filter out bufctls not involving the buffer at addr\n"
2482 " -c caller\n"
2483 " filter out bufctls without the function/PC in their stack trace\n"
2484 " -e earliest\n"
2485 " filter out bufctls timestamped before earliest\n"
2486 " -l latest\n"
2487 " filter out bufctls timestamped after latest\n"
2488 " -t thread\n"
2489 " filter out bufctls not involving thread\n");
2490 }
2491
2492 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2493 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2494 {
2495 uint_t verbose = FALSE;
2496 uint_t history = FALSE;
2497 uint_t in_history = FALSE;
2498 uintptr_t caller = 0, thread = 0;
2499 uintptr_t laddr, haddr, baddr = 0;
2500 hrtime_t earliest = 0, latest = 0;
2501 int i, depth;
2502 char c[MDB_SYM_NAMLEN];
2503 GElf_Sym sym;
2504 umem_bufctl_audit_t *bcp;
2505 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2506
2507 if (mdb_getopts(argc, argv,
2508 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2509 'h', MDB_OPT_SETBITS, TRUE, &history,
2510 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2511 'c', MDB_OPT_UINTPTR, &caller,
2512 't', MDB_OPT_UINTPTR, &thread,
2513 'e', MDB_OPT_UINT64, &earliest,
2514 'l', MDB_OPT_UINT64, &latest,
2515 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2516 return (DCMD_USAGE);
2517
2518 if (!(flags & DCMD_ADDRSPEC))
2519 return (DCMD_USAGE);
2520
2521 if (in_history && !history)
2522 return (DCMD_USAGE);
2523
2524 if (history && !in_history) {
2525 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2526 UM_SLEEP | UM_GC);
2527 bufctl_history_cb_t bhc;
2528
2529 nargv[0].a_type = MDB_TYPE_STRING;
2530 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2531
2532 for (i = 0; i < argc; i++)
2533 nargv[i + 1] = argv[i];
2534
2535 /*
2536 * When in history mode, we treat each element as if it
2537 * were in a seperate loop, so that the headers group
2538 * bufctls with similar histories.
2539 */
2540 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2541 bhc.bhc_argc = argc + 1;
2542 bhc.bhc_argv = nargv;
2543 bhc.bhc_ret = DCMD_OK;
2544
2545 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2546 addr) == -1) {
2547 mdb_warn("unable to walk bufctl_history");
2548 return (DCMD_ERR);
2549 }
2550
2551 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2552 mdb_printf("\n");
2553
2554 return (bhc.bhc_ret);
2555 }
2556
2557 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2558 if (verbose) {
2559 mdb_printf("%16s %16s %16s %16s\n"
2560 "%<u>%16s %16s %16s %16s%</u>\n",
2561 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2562 "", "CACHE", "LASTLOG", "CONTENTS");
2563 } else {
2564 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2565 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2566 }
2567 }
2568
2569 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2570 mdb_warn("couldn't read bufctl at %p", addr);
2571 return (DCMD_ERR);
2572 }
2573
2574 /*
2575 * Guard against bogus bc_depth in case the bufctl is corrupt or
2576 * the address does not really refer to a bufctl.
2577 */
2578 depth = MIN(bcp->bc_depth, umem_stack_depth);
2579
2580 if (caller != 0) {
2581 laddr = caller;
2582 haddr = caller + sizeof (caller);
2583
2584 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2585 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2586 /*
2587 * We were provided an exact symbol value; any
2588 * address in the function is valid.
2589 */
2590 laddr = (uintptr_t)sym.st_value;
2591 haddr = (uintptr_t)sym.st_value + sym.st_size;
2592 }
2593
2594 for (i = 0; i < depth; i++)
2595 if (bcp->bc_stack[i] >= laddr &&
2596 bcp->bc_stack[i] < haddr)
2597 break;
2598
2599 if (i == depth)
2600 return (DCMD_OK);
2601 }
2602
2603 if (thread != 0 && (uintptr_t)bcp->bc_thread != thread)
2604 return (DCMD_OK);
2605
2606 if (earliest != 0 && bcp->bc_timestamp < earliest)
2607 return (DCMD_OK);
2608
2609 if (latest != 0 && bcp->bc_timestamp > latest)
2610 return (DCMD_OK);
2611
2612 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2613 return (DCMD_OK);
2614
2615 if (flags & DCMD_PIPE_OUT) {
2616 mdb_printf("%#r\n", addr);
2617 return (DCMD_OK);
2618 }
2619
2620 if (verbose) {
2621 mdb_printf(
2622 "%<b>%16p%</b> %16p %16llx %16d\n"
2623 "%16s %16p %16p %16p\n",
2624 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2625 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2626
2627 mdb_inc_indent(17);
2628 for (i = 0; i < depth; i++)
2629 mdb_printf("%a\n", bcp->bc_stack[i]);
2630 mdb_dec_indent(17);
2631 mdb_printf("\n");
2632 } else {
2633 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2634 bcp->bc_timestamp, bcp->bc_thread);
2635
2636 for (i = 0; i < depth; i++) {
2637 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2638 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2639 continue;
2640 if (is_umem_sym(c, "umem_"))
2641 continue;
2642 mdb_printf(" %a\n", bcp->bc_stack[i]);
2643 break;
2644 }
2645
2646 if (i >= depth)
2647 mdb_printf("\n");
2648 }
2649
2650 return (DCMD_OK);
2651 }
2652
2653 /*ARGSUSED*/
2654 int
bufctl_audit(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2655 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2656 {
2657 mdb_arg_t a;
2658
2659 if (!(flags & DCMD_ADDRSPEC))
2660 return (DCMD_USAGE);
2661
2662 if (argc != 0)
2663 return (DCMD_USAGE);
2664
2665 a.a_type = MDB_TYPE_STRING;
2666 a.a_un.a_str = "-v";
2667
2668 return (bufctl(addr, flags, 1, &a));
2669 }
2670
2671 typedef struct umem_verify {
2672 uint64_t *umv_buf; /* buffer to read cache contents into */
2673 size_t umv_size; /* number of bytes in umv_buf */
2674 int umv_corruption; /* > 0 if corruption found. */
2675 int umv_besilent; /* report actual corruption sites */
2676 struct umem_cache umv_cache; /* the cache we're operating on */
2677 } umem_verify_t;
2678
2679 /*
2680 * verify_pattern()
2681 * verify that buf is filled with the pattern pat.
2682 */
2683 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)2684 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2685 {
2686 /*LINTED*/
2687 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2688 uint64_t *buf;
2689
2690 for (buf = buf_arg; buf < bufend; buf++)
2691 if (*buf != pat)
2692 return ((uintptr_t)buf - (uintptr_t)buf_arg);
2693 return (-1);
2694 }
2695
2696 /*
2697 * verify_buftag()
2698 * verify that btp->bt_bxstat == (bcp ^ pat)
2699 */
2700 static int
verify_buftag(umem_buftag_t * btp,uintptr_t pat)2701 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2702 {
2703 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2704 }
2705
2706 /*
2707 * verify_free()
2708 * verify the integrity of a free block of memory by checking
2709 * that it is filled with 0xdeadbeef and that its buftag is sane.
2710 */
2711 /*ARGSUSED1*/
2712 static int
verify_free(uintptr_t addr,const void * data,void * private)2713 verify_free(uintptr_t addr, const void *data, void *private)
2714 {
2715 umem_verify_t *umv = (umem_verify_t *)private;
2716 uint64_t *buf = umv->umv_buf; /* buf to validate */
2717 int64_t corrupt; /* corruption offset */
2718 umem_buftag_t *buftagp; /* ptr to buftag */
2719 umem_cache_t *cp = &umv->umv_cache;
2720 int besilent = umv->umv_besilent;
2721
2722 /*LINTED*/
2723 buftagp = UMEM_BUFTAG(cp, buf);
2724
2725 /*
2726 * Read the buffer to check.
2727 */
2728 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2729 if (!besilent)
2730 mdb_warn("couldn't read %p", addr);
2731 return (WALK_NEXT);
2732 }
2733
2734 if ((corrupt = verify_pattern(buf, cp->cache_verify,
2735 UMEM_FREE_PATTERN)) >= 0) {
2736 if (!besilent)
2737 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2738 addr, (uintptr_t)addr + corrupt);
2739 goto corrupt;
2740 }
2741
2742 if ((cp->cache_flags & UMF_HASH) &&
2743 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2744 if (!besilent)
2745 mdb_printf("buffer %p (free) seems to "
2746 "have a corrupt redzone pattern\n", addr);
2747 goto corrupt;
2748 }
2749
2750 /*
2751 * confirm bufctl pointer integrity.
2752 */
2753 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2754 if (!besilent)
2755 mdb_printf("buffer %p (free) has a corrupt "
2756 "buftag\n", addr);
2757 goto corrupt;
2758 }
2759
2760 return (WALK_NEXT);
2761 corrupt:
2762 umv->umv_corruption++;
2763 return (WALK_NEXT);
2764 }
2765
2766 /*
2767 * verify_alloc()
2768 * Verify that the buftag of an allocated buffer makes sense with respect
2769 * to the buffer.
2770 */
2771 /*ARGSUSED1*/
2772 static int
verify_alloc(uintptr_t addr,const void * data,void * private)2773 verify_alloc(uintptr_t addr, const void *data, void *private)
2774 {
2775 umem_verify_t *umv = (umem_verify_t *)private;
2776 umem_cache_t *cp = &umv->umv_cache;
2777 uint64_t *buf = umv->umv_buf; /* buf to validate */
2778 /*LINTED*/
2779 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2780 uint32_t *ip = (uint32_t *)buftagp;
2781 uint8_t *bp = (uint8_t *)buf;
2782 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
2783 int besilent = umv->umv_besilent;
2784
2785 /*
2786 * Read the buffer to check.
2787 */
2788 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2789 if (!besilent)
2790 mdb_warn("couldn't read %p", addr);
2791 return (WALK_NEXT);
2792 }
2793
2794 /*
2795 * There are two cases to handle:
2796 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2797 * 0xfeedfacefeedface at the end of it
2798 * 2. If the buf was alloc'd using umem_alloc, it will have
2799 * 0xbb just past the end of the region in use. At the buftag,
2800 * it will have 0xfeedface (or, if the whole buffer is in use,
2801 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2802 * endianness), followed by 32 bits containing the offset of the
2803 * 0xbb byte in the buffer.
2804 *
2805 * Finally, the two 32-bit words that comprise the second half of the
2806 * buftag should xor to UMEM_BUFTAG_ALLOC
2807 */
2808
2809 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2810 looks_ok = 1;
2811 else if (!UMEM_SIZE_VALID(ip[1]))
2812 size_ok = 0;
2813 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2814 looks_ok = 1;
2815 else
2816 size_ok = 0;
2817
2818 if (!size_ok) {
2819 if (!besilent)
2820 mdb_printf("buffer %p (allocated) has a corrupt "
2821 "redzone size encoding\n", addr);
2822 goto corrupt;
2823 }
2824
2825 if (!looks_ok) {
2826 if (!besilent)
2827 mdb_printf("buffer %p (allocated) has a corrupt "
2828 "redzone signature\n", addr);
2829 goto corrupt;
2830 }
2831
2832 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2833 if (!besilent)
2834 mdb_printf("buffer %p (allocated) has a "
2835 "corrupt buftag\n", addr);
2836 goto corrupt;
2837 }
2838
2839 return (WALK_NEXT);
2840 corrupt:
2841 umv->umv_corruption++;
2842 return (WALK_NEXT);
2843 }
2844
2845 /*ARGSUSED2*/
2846 int
umem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2847 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2848 {
2849 if (flags & DCMD_ADDRSPEC) {
2850 int check_alloc = 0, check_free = 0;
2851 umem_verify_t umv;
2852
2853 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2854 addr) == -1) {
2855 mdb_warn("couldn't read umem_cache %p", addr);
2856 return (DCMD_ERR);
2857 }
2858
2859 umv.umv_size = umv.umv_cache.cache_buftag +
2860 sizeof (umem_buftag_t);
2861 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2862 umv.umv_corruption = 0;
2863
2864 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2865 check_alloc = 1;
2866 if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2867 check_free = 1;
2868 } else {
2869 if (!(flags & DCMD_LOOP)) {
2870 mdb_warn("cache %p (%s) does not have "
2871 "redzone checking enabled\n", addr,
2872 umv.umv_cache.cache_name);
2873 }
2874 return (DCMD_ERR);
2875 }
2876
2877 if (flags & DCMD_LOOP) {
2878 /*
2879 * table mode, don't print out every corrupt buffer
2880 */
2881 umv.umv_besilent = 1;
2882 } else {
2883 mdb_printf("Summary for cache '%s'\n",
2884 umv.umv_cache.cache_name);
2885 mdb_inc_indent(2);
2886 umv.umv_besilent = 0;
2887 }
2888
2889 if (check_alloc)
2890 (void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2891 if (check_free)
2892 (void) mdb_pwalk("freemem", verify_free, &umv, addr);
2893
2894 if (flags & DCMD_LOOP) {
2895 if (umv.umv_corruption == 0) {
2896 mdb_printf("%-*s %?p clean\n",
2897 UMEM_CACHE_NAMELEN,
2898 umv.umv_cache.cache_name, addr);
2899 } else {
2900 char *s = ""; /* optional s in "buffer[s]" */
2901 if (umv.umv_corruption > 1)
2902 s = "s";
2903
2904 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2905 UMEM_CACHE_NAMELEN,
2906 umv.umv_cache.cache_name, addr,
2907 umv.umv_corruption, s);
2908 }
2909 } else {
2910 /*
2911 * This is the more verbose mode, when the user has
2912 * type addr::umem_verify. If the cache was clean,
2913 * nothing will have yet been printed. So say something.
2914 */
2915 if (umv.umv_corruption == 0)
2916 mdb_printf("clean\n");
2917
2918 mdb_dec_indent(2);
2919 }
2920 } else {
2921 /*
2922 * If the user didn't specify a cache to verify, we'll walk all
2923 * umem_cache's, specifying ourself as a callback for each...
2924 * this is the equivalent of '::walk umem_cache .::umem_verify'
2925 */
2926 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2927 "Cache Name", "Addr", "Cache Integrity");
2928 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2929 }
2930
2931 return (DCMD_OK);
2932 }
2933
2934 typedef struct vmem_node {
2935 struct vmem_node *vn_next;
2936 struct vmem_node *vn_parent;
2937 struct vmem_node *vn_sibling;
2938 struct vmem_node *vn_children;
2939 uintptr_t vn_addr;
2940 int vn_marked;
2941 vmem_t vn_vmem;
2942 } vmem_node_t;
2943
2944 typedef struct vmem_walk {
2945 vmem_node_t *vw_root;
2946 vmem_node_t *vw_current;
2947 } vmem_walk_t;
2948
2949 int
vmem_walk_init(mdb_walk_state_t * wsp)2950 vmem_walk_init(mdb_walk_state_t *wsp)
2951 {
2952 uintptr_t vaddr, paddr;
2953 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2954 vmem_walk_t *vw;
2955
2956 if (umem_readvar(&vaddr, "vmem_list") == -1) {
2957 mdb_warn("couldn't read 'vmem_list'");
2958 return (WALK_ERR);
2959 }
2960
2961 while (vaddr != 0) {
2962 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2963 vp->vn_addr = vaddr;
2964 vp->vn_next = head;
2965 head = vp;
2966
2967 if (vaddr == wsp->walk_addr)
2968 current = vp;
2969
2970 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2971 mdb_warn("couldn't read vmem_t at %p", vaddr);
2972 goto err;
2973 }
2974
2975 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2976 }
2977
2978 for (vp = head; vp != NULL; vp = vp->vn_next) {
2979
2980 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) {
2981 vp->vn_sibling = root;
2982 root = vp;
2983 continue;
2984 }
2985
2986 for (parent = head; parent != NULL; parent = parent->vn_next) {
2987 if (parent->vn_addr != paddr)
2988 continue;
2989 vp->vn_sibling = parent->vn_children;
2990 parent->vn_children = vp;
2991 vp->vn_parent = parent;
2992 break;
2993 }
2994
2995 if (parent == NULL) {
2996 mdb_warn("couldn't find %p's parent (%p)\n",
2997 vp->vn_addr, paddr);
2998 goto err;
2999 }
3000 }
3001
3002 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3003 vw->vw_root = root;
3004
3005 if (current != NULL)
3006 vw->vw_current = current;
3007 else
3008 vw->vw_current = root;
3009
3010 wsp->walk_data = vw;
3011 return (WALK_NEXT);
3012 err:
3013 for (vp = head; head != NULL; vp = head) {
3014 head = vp->vn_next;
3015 mdb_free(vp, sizeof (vmem_node_t));
3016 }
3017
3018 return (WALK_ERR);
3019 }
3020
3021 int
vmem_walk_step(mdb_walk_state_t * wsp)3022 vmem_walk_step(mdb_walk_state_t *wsp)
3023 {
3024 vmem_walk_t *vw = wsp->walk_data;
3025 vmem_node_t *vp;
3026 int rval;
3027
3028 if ((vp = vw->vw_current) == NULL)
3029 return (WALK_DONE);
3030
3031 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3032
3033 if (vp->vn_children != NULL) {
3034 vw->vw_current = vp->vn_children;
3035 return (rval);
3036 }
3037
3038 do {
3039 vw->vw_current = vp->vn_sibling;
3040 vp = vp->vn_parent;
3041 } while (vw->vw_current == NULL && vp != NULL);
3042
3043 return (rval);
3044 }
3045
3046 /*
3047 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3048 * children are visited before their parent. We perform the postfix walk
3049 * iteratively (rather than recursively) to allow mdb to regain control
3050 * after each callback.
3051 */
3052 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3053 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3054 {
3055 vmem_walk_t *vw = wsp->walk_data;
3056 vmem_node_t *vp = vw->vw_current;
3057 int rval;
3058
3059 /*
3060 * If this node is marked, then we know that we have already visited
3061 * all of its children. If the node has any siblings, they need to
3062 * be visited next; otherwise, we need to visit the parent. Note
3063 * that vp->vn_marked will only be zero on the first invocation of
3064 * the step function.
3065 */
3066 if (vp->vn_marked) {
3067 if (vp->vn_sibling != NULL)
3068 vp = vp->vn_sibling;
3069 else if (vp->vn_parent != NULL)
3070 vp = vp->vn_parent;
3071 else {
3072 /*
3073 * We have neither a parent, nor a sibling, and we
3074 * have already been visited; we're done.
3075 */
3076 return (WALK_DONE);
3077 }
3078 }
3079
3080 /*
3081 * Before we visit this node, visit its children.
3082 */
3083 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3084 vp = vp->vn_children;
3085
3086 vp->vn_marked = 1;
3087 vw->vw_current = vp;
3088 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3089
3090 return (rval);
3091 }
3092
3093 void
vmem_walk_fini(mdb_walk_state_t * wsp)3094 vmem_walk_fini(mdb_walk_state_t *wsp)
3095 {
3096 vmem_walk_t *vw = wsp->walk_data;
3097 vmem_node_t *root = vw->vw_root;
3098 int done;
3099
3100 if (root == NULL)
3101 return;
3102
3103 if ((vw->vw_root = root->vn_children) != NULL)
3104 vmem_walk_fini(wsp);
3105
3106 vw->vw_root = root->vn_sibling;
3107 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3108 mdb_free(root, sizeof (vmem_node_t));
3109
3110 if (done) {
3111 mdb_free(vw, sizeof (vmem_walk_t));
3112 } else {
3113 vmem_walk_fini(wsp);
3114 }
3115 }
3116
3117 typedef struct vmem_seg_walk {
3118 uint8_t vsw_type;
3119 uintptr_t vsw_start;
3120 uintptr_t vsw_current;
3121 } vmem_seg_walk_t;
3122
3123 /*ARGSUSED*/
3124 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3125 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3126 {
3127 vmem_seg_walk_t *vsw;
3128
3129 if (wsp->walk_addr == 0) {
3130 mdb_warn("vmem_%s does not support global walks\n", name);
3131 return (WALK_ERR);
3132 }
3133
3134 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3135
3136 vsw->vsw_type = type;
3137 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3138 vsw->vsw_current = vsw->vsw_start;
3139
3140 return (WALK_NEXT);
3141 }
3142
3143 /*
3144 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3145 */
3146 #define VMEM_NONE 0
3147
3148 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3149 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3150 {
3151 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3152 }
3153
3154 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3155 vmem_free_walk_init(mdb_walk_state_t *wsp)
3156 {
3157 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3158 }
3159
3160 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3161 vmem_span_walk_init(mdb_walk_state_t *wsp)
3162 {
3163 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3164 }
3165
3166 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3167 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3168 {
3169 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3170 }
3171
3172 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3173 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3174 {
3175 vmem_seg_t seg;
3176 vmem_seg_walk_t *vsw = wsp->walk_data;
3177 uintptr_t addr = vsw->vsw_current;
3178 static size_t seg_size = 0;
3179 int rval;
3180
3181 if (!seg_size) {
3182 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3183 mdb_warn("failed to read 'vmem_seg_size'");
3184 seg_size = sizeof (vmem_seg_t);
3185 }
3186 }
3187
3188 if (seg_size < sizeof (seg))
3189 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3190
3191 if (mdb_vread(&seg, seg_size, addr) == -1) {
3192 mdb_warn("couldn't read vmem_seg at %p", addr);
3193 return (WALK_ERR);
3194 }
3195
3196 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3197 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3198 rval = WALK_NEXT;
3199 } else {
3200 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3201 }
3202
3203 if (vsw->vsw_current == vsw->vsw_start)
3204 return (WALK_DONE);
3205
3206 return (rval);
3207 }
3208
3209 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3210 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3211 {
3212 vmem_seg_walk_t *vsw = wsp->walk_data;
3213
3214 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3215 }
3216
3217 #define VMEM_NAMEWIDTH 22
3218
3219 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3220 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3221 {
3222 vmem_t v, parent;
3223 uintptr_t paddr;
3224 int ident = 0;
3225 char c[VMEM_NAMEWIDTH];
3226
3227 if (!(flags & DCMD_ADDRSPEC)) {
3228 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3229 mdb_warn("can't walk vmem");
3230 return (DCMD_ERR);
3231 }
3232 return (DCMD_OK);
3233 }
3234
3235 if (DCMD_HDRSPEC(flags))
3236 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3237 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3238 "TOTAL", "SUCCEED", "FAIL");
3239
3240 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3241 mdb_warn("couldn't read vmem at %p", addr);
3242 return (DCMD_ERR);
3243 }
3244
3245 for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) {
3246 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3247 mdb_warn("couldn't trace %p's ancestry", addr);
3248 ident = 0;
3249 break;
3250 }
3251 paddr = (uintptr_t)parent.vm_source;
3252 }
3253
3254 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3255
3256 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3257 addr, VMEM_NAMEWIDTH, c,
3258 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3259 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3260
3261 return (DCMD_OK);
3262 }
3263
3264 void
vmem_seg_help(void)3265 vmem_seg_help(void)
3266 {
3267 mdb_printf("%s\n",
3268 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3269 "\n"
3270 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3271 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3272 "information.\n");
3273 mdb_dec_indent(2);
3274 mdb_printf("%<b>OPTIONS%</b>\n");
3275 mdb_inc_indent(2);
3276 mdb_printf("%s",
3277 " -v Display the full content of the vmem_seg, including its stack trace\n"
3278 " -s report the size of the segment, instead of the end address\n"
3279 " -c caller\n"
3280 " filter out segments without the function/PC in their stack trace\n"
3281 " -e earliest\n"
3282 " filter out segments timestamped before earliest\n"
3283 " -l latest\n"
3284 " filter out segments timestamped after latest\n"
3285 " -m minsize\n"
3286 " filer out segments smaller than minsize\n"
3287 " -M maxsize\n"
3288 " filer out segments larger than maxsize\n"
3289 " -t thread\n"
3290 " filter out segments not involving thread\n"
3291 " -T type\n"
3292 " filter out segments not of type 'type'\n"
3293 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3294 }
3295
3296
3297 /*ARGSUSED*/
3298 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3299 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3300 {
3301 vmem_seg_t vs;
3302 uintptr_t *stk = vs.vs_stack;
3303 uintptr_t sz;
3304 uint8_t t;
3305 const char *type = NULL;
3306 GElf_Sym sym;
3307 char c[MDB_SYM_NAMLEN];
3308 int no_debug;
3309 int i;
3310 int depth;
3311 uintptr_t laddr, haddr;
3312
3313 uintptr_t caller = 0, thread = 0;
3314 uintptr_t minsize = 0, maxsize = 0;
3315
3316 hrtime_t earliest = 0, latest = 0;
3317
3318 uint_t size = 0;
3319 uint_t verbose = 0;
3320
3321 if (!(flags & DCMD_ADDRSPEC))
3322 return (DCMD_USAGE);
3323
3324 if (mdb_getopts(argc, argv,
3325 'c', MDB_OPT_UINTPTR, &caller,
3326 'e', MDB_OPT_UINT64, &earliest,
3327 'l', MDB_OPT_UINT64, &latest,
3328 's', MDB_OPT_SETBITS, TRUE, &size,
3329 'm', MDB_OPT_UINTPTR, &minsize,
3330 'M', MDB_OPT_UINTPTR, &maxsize,
3331 't', MDB_OPT_UINTPTR, &thread,
3332 'T', MDB_OPT_STR, &type,
3333 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3334 NULL) != argc)
3335 return (DCMD_USAGE);
3336
3337 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3338 if (verbose) {
3339 mdb_printf("%16s %4s %16s %16s %16s\n"
3340 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3341 "ADDR", "TYPE", "START", "END", "SIZE",
3342 "", "", "THREAD", "TIMESTAMP", "");
3343 } else {
3344 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3345 "START", size? "SIZE" : "END", "WHO");
3346 }
3347 }
3348
3349 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3350 mdb_warn("couldn't read vmem_seg at %p", addr);
3351 return (DCMD_ERR);
3352 }
3353
3354 if (type != NULL) {
3355 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3356 t = VMEM_ALLOC;
3357 else if (strcmp(type, "FREE") == 0)
3358 t = VMEM_FREE;
3359 else if (strcmp(type, "SPAN") == 0)
3360 t = VMEM_SPAN;
3361 else if (strcmp(type, "ROTR") == 0 ||
3362 strcmp(type, "ROTOR") == 0)
3363 t = VMEM_ROTOR;
3364 else if (strcmp(type, "WLKR") == 0 ||
3365 strcmp(type, "WALKER") == 0)
3366 t = VMEM_WALKER;
3367 else {
3368 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3369 type);
3370 return (DCMD_ERR);
3371 }
3372
3373 if (vs.vs_type != t)
3374 return (DCMD_OK);
3375 }
3376
3377 sz = vs.vs_end - vs.vs_start;
3378
3379 if (minsize != 0 && sz < minsize)
3380 return (DCMD_OK);
3381
3382 if (maxsize != 0 && sz > maxsize)
3383 return (DCMD_OK);
3384
3385 t = vs.vs_type;
3386 depth = vs.vs_depth;
3387
3388 /*
3389 * debug info, when present, is only accurate for VMEM_ALLOC segments
3390 */
3391 no_debug = (t != VMEM_ALLOC) ||
3392 (depth == 0 || depth > VMEM_STACK_DEPTH);
3393
3394 if (no_debug) {
3395 if (caller != 0 || thread != 0 || earliest != 0 || latest != 0)
3396 return (DCMD_OK); /* not enough info */
3397 } else {
3398 if (caller != 0) {
3399 laddr = caller;
3400 haddr = caller + sizeof (caller);
3401
3402 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3403 sizeof (c), &sym) != -1 &&
3404 caller == (uintptr_t)sym.st_value) {
3405 /*
3406 * We were provided an exact symbol value; any
3407 * address in the function is valid.
3408 */
3409 laddr = (uintptr_t)sym.st_value;
3410 haddr = (uintptr_t)sym.st_value + sym.st_size;
3411 }
3412
3413 for (i = 0; i < depth; i++)
3414 if (vs.vs_stack[i] >= laddr &&
3415 vs.vs_stack[i] < haddr)
3416 break;
3417
3418 if (i == depth)
3419 return (DCMD_OK);
3420 }
3421
3422 if (thread != 0 && (uintptr_t)vs.vs_thread != thread)
3423 return (DCMD_OK);
3424
3425 if (earliest != 0 && vs.vs_timestamp < earliest)
3426 return (DCMD_OK);
3427
3428 if (latest != 0 && vs.vs_timestamp > latest)
3429 return (DCMD_OK);
3430 }
3431
3432 type = (t == VMEM_ALLOC ? "ALLC" :
3433 t == VMEM_FREE ? "FREE" :
3434 t == VMEM_SPAN ? "SPAN" :
3435 t == VMEM_ROTOR ? "ROTR" :
3436 t == VMEM_WALKER ? "WLKR" :
3437 "????");
3438
3439 if (flags & DCMD_PIPE_OUT) {
3440 mdb_printf("%#r\n", addr);
3441 return (DCMD_OK);
3442 }
3443
3444 if (verbose) {
3445 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3446 addr, type, vs.vs_start, vs.vs_end, sz);
3447
3448 if (no_debug)
3449 return (DCMD_OK);
3450
3451 mdb_printf("%16s %4s %16d %16llx\n",
3452 "", "", vs.vs_thread, vs.vs_timestamp);
3453
3454 mdb_inc_indent(17);
3455 for (i = 0; i < depth; i++) {
3456 mdb_printf("%a\n", stk[i]);
3457 }
3458 mdb_dec_indent(17);
3459 mdb_printf("\n");
3460 } else {
3461 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3462 vs.vs_start, size? sz : vs.vs_end);
3463
3464 if (no_debug) {
3465 mdb_printf("\n");
3466 return (DCMD_OK);
3467 }
3468
3469 for (i = 0; i < depth; i++) {
3470 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3471 c, sizeof (c), &sym) == -1)
3472 continue;
3473 if (is_umem_sym(c, "vmem_"))
3474 continue;
3475 break;
3476 }
3477 mdb_printf(" %a\n", stk[i]);
3478 }
3479 return (DCMD_OK);
3480 }
3481
3482 /*ARGSUSED*/
3483 static int
showbc(uintptr_t addr,const umem_bufctl_audit_t * bcp,hrtime_t * newest)3484 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3485 {
3486 char name[UMEM_CACHE_NAMELEN + 1];
3487 hrtime_t delta;
3488 int i, depth;
3489
3490 if (bcp->bc_timestamp == 0)
3491 return (WALK_DONE);
3492
3493 if (*newest == 0)
3494 *newest = bcp->bc_timestamp;
3495
3496 delta = *newest - bcp->bc_timestamp;
3497 depth = MIN(bcp->bc_depth, umem_stack_depth);
3498
3499 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3500 &bcp->bc_cache->cache_name) <= 0)
3501 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3502
3503 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3504 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3505
3506 for (i = 0; i < depth; i++)
3507 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3508
3509 return (WALK_NEXT);
3510 }
3511
3512 int
umalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3513 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3514 {
3515 const char *logname = "umem_transaction_log";
3516 hrtime_t newest = 0;
3517
3518 if ((flags & DCMD_ADDRSPEC) || argc > 1)
3519 return (DCMD_USAGE);
3520
3521 if (argc > 0) {
3522 if (argv->a_type != MDB_TYPE_STRING)
3523 return (DCMD_USAGE);
3524 if (strcmp(argv->a_un.a_str, "fail") == 0)
3525 logname = "umem_failure_log";
3526 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3527 logname = "umem_slab_log";
3528 else
3529 return (DCMD_USAGE);
3530 }
3531
3532 if (umem_readvar(&addr, logname) == -1) {
3533 mdb_warn("failed to read %s log header pointer");
3534 return (DCMD_ERR);
3535 }
3536
3537 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3538 mdb_warn("failed to walk umem log");
3539 return (DCMD_ERR);
3540 }
3541
3542 return (DCMD_OK);
3543 }
3544
3545 /*
3546 * As the final lure for die-hard crash(8) users, we provide ::umausers here.
3547 * The first piece is a structure which we use to accumulate umem_cache_t
3548 * addresses of interest. The umc_add is used as a callback for the umem_cache
3549 * walker; we either add all caches, or ones named explicitly as arguments.
3550 */
3551
3552 typedef struct umclist {
3553 const char *umc_name; /* Name to match (or NULL) */
3554 uintptr_t *umc_caches; /* List of umem_cache_t addrs */
3555 int umc_nelems; /* Num entries in umc_caches */
3556 int umc_size; /* Size of umc_caches array */
3557 } umclist_t;
3558
3559 static int
umc_add(uintptr_t addr,const umem_cache_t * cp,umclist_t * umc)3560 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3561 {
3562 void *p;
3563 int s;
3564
3565 if (umc->umc_name == NULL ||
3566 strcmp(cp->cache_name, umc->umc_name) == 0) {
3567 /*
3568 * If we have a match, grow our array (if necessary), and then
3569 * add the virtual address of the matching cache to our list.
3570 */
3571 if (umc->umc_nelems >= umc->umc_size) {
3572 s = umc->umc_size ? umc->umc_size * 2 : 256;
3573 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3574
3575 bcopy(umc->umc_caches, p,
3576 sizeof (uintptr_t) * umc->umc_size);
3577
3578 umc->umc_caches = p;
3579 umc->umc_size = s;
3580 }
3581
3582 umc->umc_caches[umc->umc_nelems++] = addr;
3583 return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3584 }
3585
3586 return (WALK_NEXT);
3587 }
3588
3589 /*
3590 * The second piece of ::umausers is a hash table of allocations. Each
3591 * allocation owner is identified by its stack trace and data_size. We then
3592 * track the total bytes of all such allocations, and the number of allocations
3593 * to report at the end. Once we have a list of caches, we walk through the
3594 * allocated bufctls of each, and update our hash table accordingly.
3595 */
3596
3597 typedef struct umowner {
3598 struct umowner *umo_head; /* First hash elt in bucket */
3599 struct umowner *umo_next; /* Next hash elt in chain */
3600 size_t umo_signature; /* Hash table signature */
3601 uint_t umo_num; /* Number of allocations */
3602 size_t umo_data_size; /* Size of each allocation */
3603 size_t umo_total_size; /* Total bytes of allocation */
3604 int umo_depth; /* Depth of stack trace */
3605 uintptr_t *umo_stack; /* Stack trace */
3606 } umowner_t;
3607
3608 typedef struct umusers {
3609 const umem_cache_t *umu_cache; /* Current umem cache */
3610 umowner_t *umu_hash; /* Hash table of owners */
3611 uintptr_t *umu_stacks; /* stacks for owners */
3612 int umu_nelems; /* Number of entries in use */
3613 int umu_size; /* Total number of entries */
3614 } umusers_t;
3615
3616 static void
umu_add(umusers_t * umu,const umem_bufctl_audit_t * bcp,size_t size,size_t data_size)3617 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3618 size_t size, size_t data_size)
3619 {
3620 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3621 size_t bucket, signature = data_size;
3622 umowner_t *umo, *umoend;
3623
3624 /*
3625 * If the hash table is full, double its size and rehash everything.
3626 */
3627 if (umu->umu_nelems >= umu->umu_size) {
3628 int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3629 size_t umowner_size = sizeof (umowner_t);
3630 size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3631 uintptr_t *new_stacks;
3632
3633 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3634 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3635
3636 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3637 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3638 umu->umu_hash = umo;
3639 umu->umu_stacks = new_stacks;
3640 umu->umu_size = s;
3641
3642 umoend = umu->umu_hash + umu->umu_size;
3643 for (umo = umu->umu_hash; umo < umoend; umo++) {
3644 umo->umo_head = NULL;
3645 umo->umo_stack = &umu->umu_stacks[
3646 umem_stack_depth * (umo - umu->umu_hash)];
3647 }
3648
3649 umoend = umu->umu_hash + umu->umu_nelems;
3650 for (umo = umu->umu_hash; umo < umoend; umo++) {
3651 bucket = umo->umo_signature & (umu->umu_size - 1);
3652 umo->umo_next = umu->umu_hash[bucket].umo_head;
3653 umu->umu_hash[bucket].umo_head = umo;
3654 }
3655 }
3656
3657 /*
3658 * Finish computing the hash signature from the stack trace, and then
3659 * see if the owner is in the hash table. If so, update our stats.
3660 */
3661 for (i = 0; i < depth; i++)
3662 signature += bcp->bc_stack[i];
3663
3664 bucket = signature & (umu->umu_size - 1);
3665
3666 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3667 if (umo->umo_signature == signature) {
3668 size_t difference = 0;
3669
3670 difference |= umo->umo_data_size - data_size;
3671 difference |= umo->umo_depth - depth;
3672
3673 for (i = 0; i < depth; i++) {
3674 difference |= umo->umo_stack[i] -
3675 bcp->bc_stack[i];
3676 }
3677
3678 if (difference == 0) {
3679 umo->umo_total_size += size;
3680 umo->umo_num++;
3681 return;
3682 }
3683 }
3684 }
3685
3686 /*
3687 * If the owner is not yet hashed, grab the next element and fill it
3688 * in based on the allocation information.
3689 */
3690 umo = &umu->umu_hash[umu->umu_nelems++];
3691 umo->umo_next = umu->umu_hash[bucket].umo_head;
3692 umu->umu_hash[bucket].umo_head = umo;
3693
3694 umo->umo_signature = signature;
3695 umo->umo_num = 1;
3696 umo->umo_data_size = data_size;
3697 umo->umo_total_size = size;
3698 umo->umo_depth = depth;
3699
3700 for (i = 0; i < depth; i++)
3701 umo->umo_stack[i] = bcp->bc_stack[i];
3702 }
3703
3704 /*
3705 * When ::umausers is invoked without the -f flag, we simply update our hash
3706 * table with the information from each allocated bufctl.
3707 */
3708 /*ARGSUSED*/
3709 static int
umause1(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3710 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3711 {
3712 const umem_cache_t *cp = umu->umu_cache;
3713
3714 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3715 return (WALK_NEXT);
3716 }
3717
3718 /*
3719 * When ::umausers is invoked with the -f flag, we print out the information
3720 * for each bufctl as well as updating the hash table.
3721 */
3722 static int
umause2(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3723 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3724 {
3725 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3726 const umem_cache_t *cp = umu->umu_cache;
3727
3728 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3729 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3730
3731 for (i = 0; i < depth; i++)
3732 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3733
3734 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3735 return (WALK_NEXT);
3736 }
3737
3738 /*
3739 * We sort our results by allocation size before printing them.
3740 */
3741 static int
umownercmp(const void * lp,const void * rp)3742 umownercmp(const void *lp, const void *rp)
3743 {
3744 const umowner_t *lhs = lp;
3745 const umowner_t *rhs = rp;
3746
3747 return (rhs->umo_total_size - lhs->umo_total_size);
3748 }
3749
3750 /*
3751 * The main engine of ::umausers is relatively straightforward: First we
3752 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3753 * iterate over the allocated bufctls of each cache in the list. Finally,
3754 * we sort and print our results.
3755 */
3756 /*ARGSUSED*/
3757 int
umausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3758 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3759 {
3760 int mem_threshold = 8192; /* Minimum # bytes for printing */
3761 int cnt_threshold = 100; /* Minimum # blocks for printing */
3762 int audited_caches = 0; /* Number of UMF_AUDIT caches found */
3763 int do_all_caches = 1; /* Do all caches (no arguments) */
3764 int opt_e = FALSE; /* Include "small" users */
3765 int opt_f = FALSE; /* Print stack traces */
3766
3767 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3768 umowner_t *umo, *umoend;
3769 int i, oelems;
3770
3771 umclist_t umc;
3772 umusers_t umu;
3773
3774 if (flags & DCMD_ADDRSPEC)
3775 return (DCMD_USAGE);
3776
3777 bzero(&umc, sizeof (umc));
3778 bzero(&umu, sizeof (umu));
3779
3780 while ((i = mdb_getopts(argc, argv,
3781 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3782 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3783
3784 argv += i; /* skip past options we just processed */
3785 argc -= i; /* adjust argc */
3786
3787 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3788 return (DCMD_USAGE);
3789
3790 oelems = umc.umc_nelems;
3791 umc.umc_name = argv->a_un.a_str;
3792 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3793
3794 if (umc.umc_nelems == oelems) {
3795 mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3796 return (DCMD_ERR);
3797 }
3798
3799 do_all_caches = 0;
3800 argv++;
3801 argc--;
3802 }
3803
3804 if (opt_e)
3805 mem_threshold = cnt_threshold = 0;
3806
3807 if (opt_f)
3808 callback = (mdb_walk_cb_t)umause2;
3809
3810 if (do_all_caches) {
3811 umc.umc_name = NULL; /* match all cache names */
3812 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3813 }
3814
3815 for (i = 0; i < umc.umc_nelems; i++) {
3816 uintptr_t cp = umc.umc_caches[i];
3817 umem_cache_t c;
3818
3819 if (mdb_vread(&c, sizeof (c), cp) == -1) {
3820 mdb_warn("failed to read cache at %p", cp);
3821 continue;
3822 }
3823
3824 if (!(c.cache_flags & UMF_AUDIT)) {
3825 if (!do_all_caches) {
3826 mdb_warn("UMF_AUDIT is not enabled for %s\n",
3827 c.cache_name);
3828 }
3829 continue;
3830 }
3831
3832 umu.umu_cache = &c;
3833 (void) mdb_pwalk("bufctl", callback, &umu, cp);
3834 audited_caches++;
3835 }
3836
3837 if (audited_caches == 0 && do_all_caches) {
3838 mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3839 return (DCMD_ERR);
3840 }
3841
3842 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3843 umoend = umu.umu_hash + umu.umu_nelems;
3844
3845 for (umo = umu.umu_hash; umo < umoend; umo++) {
3846 if (umo->umo_total_size < mem_threshold &&
3847 umo->umo_num < cnt_threshold)
3848 continue;
3849 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3850 umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3851 for (i = 0; i < umo->umo_depth; i++)
3852 mdb_printf("\t %a\n", umo->umo_stack[i]);
3853 }
3854
3855 return (DCMD_OK);
3856 }
3857
3858 struct malloc_data {
3859 uint32_t malloc_size;
3860 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3861 };
3862
3863 #ifdef _LP64
3864 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3865 #else
3866 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data))
3867 #endif
3868
3869 typedef struct umem_malloc_info {
3870 size_t um_total; /* total allocated buffers */
3871 size_t um_malloc; /* malloc buffers */
3872 size_t um_malloc_size; /* sum of malloc buffer sizes */
3873 size_t um_malloc_overhead; /* sum of in-chunk overheads */
3874
3875 umem_cache_t *um_cp;
3876
3877 uint_t *um_bucket;
3878 } umem_malloc_info_t;
3879
3880 static void
umem_malloc_print_dist(uint_t * um_bucket,size_t minmalloc,size_t maxmalloc,size_t maxbuckets,size_t minbucketsize,int geometric)3881 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3882 size_t maxbuckets, size_t minbucketsize, int geometric)
3883 {
3884 uint64_t um_malloc;
3885 int minb = -1;
3886 int maxb = -1;
3887 int buckets;
3888 int nbucks;
3889 int i;
3890 int b;
3891 const int *distarray;
3892
3893 minb = (int)minmalloc;
3894 maxb = (int)maxmalloc;
3895
3896 nbucks = buckets = maxb - minb + 1;
3897
3898 um_malloc = 0;
3899 for (b = minb; b <= maxb; b++)
3900 um_malloc += um_bucket[b];
3901
3902 if (maxbuckets != 0)
3903 buckets = MIN(buckets, maxbuckets);
3904
3905 if (minbucketsize > 1) {
3906 buckets = MIN(buckets, nbucks/minbucketsize);
3907 if (buckets == 0) {
3908 buckets = 1;
3909 minbucketsize = nbucks;
3910 }
3911 }
3912
3913 if (geometric)
3914 distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3915 else
3916 distarray = dist_linear(buckets, minb, maxb);
3917
3918 dist_print_header("malloc size", 11, "count");
3919 for (i = 0; i < buckets; i++) {
3920 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3921 }
3922 mdb_printf("\n");
3923 }
3924
3925 /*
3926 * A malloc()ed buffer looks like:
3927 *
3928 * <----------- mi.malloc_size --->
3929 * <----------- cp.cache_bufsize ------------------>
3930 * <----------- cp.cache_chunksize -------------------------------->
3931 * +-------+-----------------------+---------------+---------------+
3932 * |/tag///| mallocsz |/round-off/////|/debug info////|
3933 * +-------+---------------------------------------+---------------+
3934 * <-- usable space ------>
3935 *
3936 * mallocsz is the argument to malloc(3C).
3937 * mi.malloc_size is the actual size passed to umem_alloc(), which
3938 * is rounded up to the smallest available cache size, which is
3939 * cache_bufsize. If there is debugging or alignment overhead in
3940 * the cache, that is reflected in a larger cache_chunksize.
3941 *
3942 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3943 * depending upon the ISA's alignment requirements. For 32-bit allocations,
3944 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes,
3945 * the tag has 8 bytes of padding before it.
3946 *
3947 * 32-byte, 64-byte buffers <= 8 bytes:
3948 * +-------+-------+--------- ...
3949 * |/size//|/stat//| mallocsz ...
3950 * +-------+-------+--------- ...
3951 * ^
3952 * pointer returned from malloc(3C)
3953 *
3954 * 64-byte buffers > 8 bytes:
3955 * +---------------+-------+-------+--------- ...
3956 * |/padding///////|/size//|/stat//| mallocsz ...
3957 * +---------------+-------+-------+--------- ...
3958 * ^
3959 * pointer returned from malloc(3C)
3960 *
3961 * The "size" field is "malloc_size", which is mallocsz + the padding.
3962 * The "stat" field is derived from malloc_size, and functions as a
3963 * validation that this buffer is actually from malloc(3C).
3964 */
3965 /*ARGSUSED*/
3966 static int
um_umem_buffer_cb(uintptr_t addr,void * buf,umem_malloc_info_t * ump)3967 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3968 {
3969 struct malloc_data md;
3970 size_t m_addr = addr;
3971 size_t overhead = sizeof (md);
3972 size_t mallocsz;
3973
3974 ump->um_total++;
3975
3976 #ifdef _LP64
3977 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3978 m_addr += overhead;
3979 overhead += sizeof (md);
3980 }
3981 #endif
3982
3983 if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3984 mdb_warn("unable to read malloc header at %p", m_addr);
3985 return (WALK_NEXT);
3986 }
3987
3988 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3989 case MALLOC_MAGIC:
3990 #ifdef _LP64
3991 case MALLOC_SECOND_MAGIC:
3992 #endif
3993 mallocsz = md.malloc_size - overhead;
3994
3995 ump->um_malloc++;
3996 ump->um_malloc_size += mallocsz;
3997 ump->um_malloc_overhead += overhead;
3998
3999 /* include round-off and debug overhead */
4000 ump->um_malloc_overhead +=
4001 ump->um_cp->cache_chunksize - md.malloc_size;
4002
4003 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4004 ump->um_bucket[mallocsz]++;
4005
4006 break;
4007 default:
4008 break;
4009 }
4010
4011 return (WALK_NEXT);
4012 }
4013
4014 int
get_umem_alloc_sizes(int ** out,size_t * out_num)4015 get_umem_alloc_sizes(int **out, size_t *out_num)
4016 {
4017 GElf_Sym sym;
4018
4019 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4020 mdb_warn("unable to look up umem_alloc_sizes");
4021 return (-1);
4022 }
4023
4024 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4025 *out_num = sym.st_size / sizeof (int);
4026
4027 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4028 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4029 *out = NULL;
4030 return (-1);
4031 }
4032
4033 return (0);
4034 }
4035
4036
4037 static int
um_umem_cache_cb(uintptr_t addr,umem_cache_t * cp,umem_malloc_info_t * ump)4038 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4039 {
4040 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4041 return (WALK_NEXT);
4042
4043 ump->um_cp = cp;
4044
4045 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4046 -1) {
4047 mdb_warn("can't walk 'umem' for cache %p", addr);
4048 return (WALK_ERR);
4049 }
4050
4051 return (WALK_NEXT);
4052 }
4053
4054 void
umem_malloc_dist_help(void)4055 umem_malloc_dist_help(void)
4056 {
4057 mdb_printf("%s\n",
4058 "report distribution of outstanding malloc()s");
4059 mdb_dec_indent(2);
4060 mdb_printf("%<b>OPTIONS%</b>\n");
4061 mdb_inc_indent(2);
4062 mdb_printf("%s",
4063 " -b maxbins\n"
4064 " Use at most maxbins bins for the data\n"
4065 " -B minbinsize\n"
4066 " Make the bins at least minbinsize bytes apart\n"
4067 " -d dump the raw data out, without binning\n"
4068 " -g use geometric binning instead of linear binning\n");
4069 }
4070
4071 /*ARGSUSED*/
4072 int
umem_malloc_dist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4073 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4074 {
4075 umem_malloc_info_t mi;
4076 uint_t geometric = 0;
4077 uint_t dump = 0;
4078 size_t maxbuckets = 0;
4079 size_t minbucketsize = 0;
4080
4081 size_t minalloc = 0;
4082 size_t maxalloc = UMI_MAX_BUCKET;
4083
4084 if (flags & DCMD_ADDRSPEC)
4085 return (DCMD_USAGE);
4086
4087 if (mdb_getopts(argc, argv,
4088 'd', MDB_OPT_SETBITS, TRUE, &dump,
4089 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4090 'b', MDB_OPT_UINTPTR, &maxbuckets,
4091 'B', MDB_OPT_UINTPTR, &minbucketsize,
4092 NULL) != argc)
4093 return (DCMD_USAGE);
4094
4095 bzero(&mi, sizeof (mi));
4096 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4097 UM_SLEEP | UM_GC);
4098
4099 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4100 &mi) == -1) {
4101 mdb_warn("unable to walk 'umem_cache'");
4102 return (DCMD_ERR);
4103 }
4104
4105 if (dump) {
4106 int i;
4107 for (i = minalloc; i <= maxalloc; i++)
4108 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4109
4110 return (DCMD_OK);
4111 }
4112
4113 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4114 maxbuckets, minbucketsize, geometric);
4115
4116 return (DCMD_OK);
4117 }
4118
4119 void
umem_malloc_info_help(void)4120 umem_malloc_info_help(void)
4121 {
4122 mdb_printf("%s\n",
4123 "report information about malloc()s by cache. ");
4124 mdb_dec_indent(2);
4125 mdb_printf("%<b>OPTIONS%</b>\n");
4126 mdb_inc_indent(2);
4127 mdb_printf("%s",
4128 " -b maxbins\n"
4129 " Use at most maxbins bins for the data\n"
4130 " -B minbinsize\n"
4131 " Make the bins at least minbinsize bytes apart\n"
4132 " -d dump the raw distribution data without binning\n"
4133 #ifndef _KMDB
4134 " -g use geometric binning instead of linear binning\n"
4135 #endif
4136 "");
4137 }
4138 int
umem_malloc_info(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4139 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4140 {
4141 umem_cache_t c;
4142 umem_malloc_info_t mi;
4143
4144 int skip = 0;
4145
4146 size_t maxmalloc;
4147 size_t overhead;
4148 size_t allocated;
4149 size_t avg_malloc;
4150 size_t overhead_pct; /* 1000 * overhead_percent */
4151
4152 uint_t verbose = 0;
4153 uint_t dump = 0;
4154 uint_t geometric = 0;
4155 size_t maxbuckets = 0;
4156 size_t minbucketsize = 0;
4157
4158 int *alloc_sizes;
4159 int idx;
4160 size_t num;
4161 size_t minmalloc;
4162
4163 if (mdb_getopts(argc, argv,
4164 'd', MDB_OPT_SETBITS, TRUE, &dump,
4165 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4166 'b', MDB_OPT_UINTPTR, &maxbuckets,
4167 'B', MDB_OPT_UINTPTR, &minbucketsize,
4168 NULL) != argc)
4169 return (DCMD_USAGE);
4170
4171 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4172 verbose = 1;
4173
4174 if (!(flags & DCMD_ADDRSPEC)) {
4175 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4176 argc, argv) == -1) {
4177 mdb_warn("can't walk umem_cache");
4178 return (DCMD_ERR);
4179 }
4180 return (DCMD_OK);
4181 }
4182
4183 if (!mdb_vread(&c, sizeof (c), addr)) {
4184 mdb_warn("unable to read cache at %p", addr);
4185 return (DCMD_ERR);
4186 }
4187
4188 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4189 if (!(flags & DCMD_LOOP))
4190 mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4191 "by malloc()\n", c.cache_name);
4192 skip = 1;
4193 }
4194
4195 /*
4196 * normally, print the header only the first time. In verbose mode,
4197 * print the header on every non-skipped buffer
4198 */
4199 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4200 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4201 "CACHE", "BUFSZ", "MAXMAL",
4202 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4203
4204 if (skip)
4205 return (DCMD_OK);
4206
4207 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4208 #ifdef _LP64
4209 if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4210 maxmalloc -= sizeof (struct malloc_data);
4211 #endif
4212
4213 bzero(&mi, sizeof (mi));
4214 mi.um_cp = &c;
4215 if (verbose)
4216 mi.um_bucket =
4217 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4218 UM_SLEEP | UM_GC);
4219
4220 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4221 -1) {
4222 mdb_warn("can't walk 'umem'");
4223 return (DCMD_ERR);
4224 }
4225
4226 overhead = mi.um_malloc_overhead;
4227 allocated = mi.um_malloc_size;
4228
4229 /* do integer round off for the average */
4230 if (mi.um_malloc != 0)
4231 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4232 else
4233 avg_malloc = 0;
4234
4235 /*
4236 * include per-slab overhead
4237 *
4238 * Each slab in a given cache is the same size, and has the same
4239 * number of chunks in it; we read in the first slab on the
4240 * slab list to get the number of chunks for all slabs. To
4241 * compute the per-slab overhead, we just subtract the chunk usage
4242 * from the slabsize:
4243 *
4244 * +------------+-------+-------+ ... --+-------+-------+-------+
4245 * |////////////| | | ... | |///////|///////|
4246 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//|
4247 * |////////////| | | ... | |///////|///////|
4248 * +------------+-------+-------+ ... --+-------+-------+-------+
4249 * | \_______chunksize * chunks_____/ |
4250 * \__________________________slabsize__________________________/
4251 *
4252 * For UMF_HASH caches, there is an additional source of overhead;
4253 * the external umem_slab_t and per-chunk bufctl structures. We
4254 * include those in our per-slab overhead.
4255 *
4256 * Once we have a number for the per-slab overhead, we estimate
4257 * the actual overhead by treating the malloc()ed buffers as if
4258 * they were densely packed:
4259 *
4260 * additional overhead = (# mallocs) * (per-slab) / (chunks);
4261 *
4262 * carefully ordering the multiply before the divide, to avoid
4263 * round-off error.
4264 */
4265 if (mi.um_malloc != 0) {
4266 umem_slab_t slab;
4267 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4268
4269 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4270 mdb_warn("unable to read slab at %p\n", saddr);
4271 } else {
4272 long chunks = slab.slab_chunks;
4273 if (chunks != 0 && c.cache_chunksize != 0 &&
4274 chunks <= c.cache_slabsize / c.cache_chunksize) {
4275 uintmax_t perslab =
4276 c.cache_slabsize -
4277 (c.cache_chunksize * chunks);
4278
4279 if (c.cache_flags & UMF_HASH) {
4280 perslab += sizeof (umem_slab_t) +
4281 chunks *
4282 ((c.cache_flags & UMF_AUDIT) ?
4283 sizeof (umem_bufctl_audit_t) :
4284 sizeof (umem_bufctl_t));
4285 }
4286 overhead +=
4287 (perslab * (uintmax_t)mi.um_malloc)/chunks;
4288 } else {
4289 mdb_warn("invalid #chunks (%d) in slab %p\n",
4290 chunks, saddr);
4291 }
4292 }
4293 }
4294
4295 if (allocated != 0)
4296 overhead_pct = (1000ULL * overhead) / allocated;
4297 else
4298 overhead_pct = 0;
4299
4300 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4301 addr, c.cache_bufsize, maxmalloc,
4302 mi.um_malloc, avg_malloc, allocated, overhead,
4303 overhead_pct / 10, overhead_pct % 10);
4304
4305 if (!verbose)
4306 return (DCMD_OK);
4307
4308 if (!dump)
4309 mdb_printf("\n");
4310
4311 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4312 return (DCMD_ERR);
4313
4314 for (idx = 0; idx < num; idx++) {
4315 if (alloc_sizes[idx] == c.cache_bufsize)
4316 break;
4317 if (alloc_sizes[idx] == 0) {
4318 idx = num; /* 0-terminated array */
4319 break;
4320 }
4321 }
4322 if (idx == num) {
4323 mdb_warn(
4324 "cache %p's size (%d) not in umem_alloc_sizes\n",
4325 addr, c.cache_bufsize);
4326 return (DCMD_ERR);
4327 }
4328
4329 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4330 if (minmalloc > 0) {
4331 #ifdef _LP64
4332 if (minmalloc > UMEM_SECOND_ALIGN)
4333 minmalloc -= sizeof (struct malloc_data);
4334 #endif
4335 minmalloc -= sizeof (struct malloc_data);
4336 minmalloc += 1;
4337 }
4338
4339 if (dump) {
4340 for (idx = minmalloc; idx <= maxmalloc; idx++)
4341 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4342 mdb_printf("\n");
4343 } else {
4344 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4345 maxbuckets, minbucketsize, geometric);
4346 }
4347
4348 return (DCMD_OK);
4349 }
4350