1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2019 Joyent, Inc. 28 * Copyright (c) 2013, 2015 by Delphix. All rights reserved. 29 */ 30 31 #include "umem.h" 32 33 #include <sys/vmem_impl_user.h> 34 #include <umem_impl.h> 35 36 #include <alloca.h> 37 #include <limits.h> 38 #include <mdb/mdb_whatis.h> 39 #include <thr_uberdata.h> 40 41 #include "misc.h" 42 #include "leaky.h" 43 #include "dist.h" 44 45 #include "umem_pagesize.h" 46 47 #define UM_ALLOCATED 0x1 48 #define UM_FREE 0x2 49 #define UM_BUFCTL 0x4 50 #define UM_HASH 0x8 51 52 int umem_ready; 53 54 static int umem_stack_depth_warned; 55 static uint32_t umem_max_ncpus; 56 uint32_t umem_stack_depth; 57 58 size_t umem_pagesize; 59 60 #define UMEM_READVAR(var) \ 61 (umem_readvar(&(var), #var) == -1 && \ 62 (mdb_warn("failed to read "#var), 1)) 63 64 int 65 umem_update_variables(void) 66 { 67 size_t pagesize; 68 69 /* 70 * Figure out which type of umem is being used; if it's not there 71 * yet, succeed quietly. 72 */ 73 if (umem_set_standalone() == -1) { 74 umem_ready = 0; 75 return (0); /* umem not there yet */ 76 } 77 78 /* 79 * Solaris 9 used a different name for umem_max_ncpus. It's 80 * cheap backwards compatibility to check for both names. 81 */ 82 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 && 83 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) { 84 mdb_warn("unable to read umem_max_ncpus or max_ncpus"); 85 return (-1); 86 } 87 if (UMEM_READVAR(umem_ready)) 88 return (-1); 89 if (UMEM_READVAR(umem_stack_depth)) 90 return (-1); 91 if (UMEM_READVAR(pagesize)) 92 return (-1); 93 94 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) { 95 if (umem_stack_depth_warned == 0) { 96 mdb_warn("umem_stack_depth corrupted (%d > %d)\n", 97 umem_stack_depth, UMEM_MAX_STACK_DEPTH); 98 umem_stack_depth_warned = 1; 99 } 100 umem_stack_depth = 0; 101 } 102 103 umem_pagesize = pagesize; 104 105 return (0); 106 } 107 108 static int 109 umem_ptc_walk_init(mdb_walk_state_t *wsp) 110 { 111 if (wsp->walk_addr == 0) { 112 if (mdb_layered_walk("ulwp", wsp) == -1) { 113 mdb_warn("couldn't walk 'ulwp'"); 114 return (WALK_ERR); 115 } 116 } 117 118 return (WALK_NEXT); 119 } 120 121 static int 122 umem_ptc_walk_step(mdb_walk_state_t *wsp) 123 { 124 uintptr_t this; 125 int rval; 126 127 if (wsp->walk_layer != NULL) { 128 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self + 129 (uintptr_t)wsp->walk_arg; 130 } else { 131 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg; 132 } 133 134 for (;;) { 135 if (mdb_vread(&this, sizeof (void *), this) == -1) { 136 mdb_warn("couldn't read ptc buffer at %p", this); 137 return (WALK_ERR); 138 } 139 140 if (this == 0) 141 break; 142 143 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata); 144 145 if (rval != WALK_NEXT) 146 return (rval); 147 } 148 149 return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE); 150 } 151 152 /*ARGSUSED*/ 153 static int 154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes) 155 { 156 mdb_walker_t w; 157 char descr[64]; 158 char name[64]; 159 int i; 160 161 (void) mdb_snprintf(descr, sizeof (descr), 162 "walk the %s cache", c->cache_name); 163 164 w.walk_name = c->cache_name; 165 w.walk_descr = descr; 166 w.walk_init = umem_walk_init; 167 w.walk_step = umem_walk_step; 168 w.walk_fini = umem_walk_fini; 169 w.walk_init_arg = (void *)addr; 170 171 if (mdb_add_walker(&w) == -1) 172 mdb_warn("failed to add %s walker", c->cache_name); 173 174 if (!(c->cache_flags & UMF_PTC)) 175 return (WALK_NEXT); 176 177 /* 178 * For the per-thread cache walker, the address is the offset in the 179 * tm_roots[] array of the ulwp_t. 180 */ 181 for (i = 0; sizes[i] != 0; i++) { 182 if (sizes[i] == c->cache_bufsize) 183 break; 184 } 185 186 if (sizes[i] == 0) { 187 mdb_warn("cache %s is cached per-thread, but could not find " 188 "size in umem_alloc_sizes\n", c->cache_name); 189 return (WALK_NEXT); 190 } 191 192 if (i >= NTMEMBASE) { 193 mdb_warn("index for %s (%d) exceeds root slots (%d)\n", 194 c->cache_name, i, NTMEMBASE); 195 return (WALK_NEXT); 196 } 197 198 (void) mdb_snprintf(name, sizeof (name), 199 "umem_ptc_%d", c->cache_bufsize); 200 (void) mdb_snprintf(descr, sizeof (descr), 201 "walk the per-thread cache for %s", c->cache_name); 202 203 w.walk_name = name; 204 w.walk_descr = descr; 205 w.walk_init = umem_ptc_walk_init; 206 w.walk_step = umem_ptc_walk_step; 207 w.walk_fini = NULL; 208 w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]); 209 210 if (mdb_add_walker(&w) == -1) 211 mdb_warn("failed to add %s walker", w.walk_name); 212 213 return (WALK_NEXT); 214 } 215 216 /*ARGSUSED*/ 217 static void 218 umem_statechange_cb(void *arg) 219 { 220 static int been_ready = 0; 221 GElf_Sym sym; 222 int *sizes; 223 224 #ifndef _KMDB 225 leaky_cleanup(1); /* state changes invalidate leaky state */ 226 #endif 227 228 if (umem_update_variables() == -1) 229 return; 230 231 if (been_ready) 232 return; 233 234 if (umem_ready != UMEM_READY) 235 return; 236 237 been_ready = 1; 238 239 /* 240 * In order to determine the tm_roots offset of any cache that is 241 * cached per-thread, we need to have the umem_alloc_sizes array. 242 * Read this, assuring that it is zero-terminated. 243 */ 244 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { 245 mdb_warn("unable to lookup 'umem_alloc_sizes'"); 246 return; 247 } 248 249 sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC); 250 251 if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) { 252 mdb_warn("couldn't read 'umem_alloc_sizes'"); 253 return; 254 } 255 256 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes); 257 } 258 259 int 260 umem_abort_messages(void) 261 { 262 char *umem_error_buffer; 263 uint_t umem_error_begin; 264 GElf_Sym sym; 265 size_t bufsize; 266 267 if (UMEM_READVAR(umem_error_begin)) 268 return (DCMD_ERR); 269 270 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) { 271 mdb_warn("unable to look up umem_error_buffer"); 272 return (DCMD_ERR); 273 } 274 275 bufsize = (size_t)sym.st_size; 276 277 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC); 278 279 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value) 280 != bufsize) { 281 mdb_warn("unable to read umem_error_buffer"); 282 return (DCMD_ERR); 283 } 284 /* put a zero after the end of the buffer to simplify printing */ 285 umem_error_buffer[bufsize] = 0; 286 287 if ((umem_error_begin % bufsize) == 0) 288 mdb_printf("%s\n", umem_error_buffer); 289 else { 290 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0; 291 mdb_printf("%s%s\n", 292 &umem_error_buffer[umem_error_begin % bufsize], 293 umem_error_buffer); 294 } 295 296 return (DCMD_OK); 297 } 298 299 static void 300 umem_log_status(const char *name, umem_log_header_t *val) 301 { 302 umem_log_header_t my_lh; 303 uintptr_t pos = (uintptr_t)val; 304 size_t size; 305 306 if (pos == 0) 307 return; 308 309 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) { 310 mdb_warn("\nunable to read umem_%s_log pointer %p", 311 name, pos); 312 return; 313 } 314 315 size = my_lh.lh_chunksize * my_lh.lh_nchunks; 316 317 if (size % (1024 * 1024) == 0) 318 mdb_printf("%s=%dm ", name, size / (1024 * 1024)); 319 else if (size % 1024 == 0) 320 mdb_printf("%s=%dk ", name, size / 1024); 321 else 322 mdb_printf("%s=%d ", name, size); 323 } 324 325 typedef struct umem_debug_flags { 326 const char *udf_name; 327 uint_t udf_flags; 328 uint_t udf_clear; /* if 0, uses udf_flags */ 329 } umem_debug_flags_t; 330 331 umem_debug_flags_t umem_status_flags[] = { 332 { "random", UMF_RANDOMIZE, UMF_RANDOM }, 333 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS }, 334 { "audit", UMF_AUDIT }, 335 { "guards", UMF_DEADBEEF | UMF_REDZONE }, 336 { "nosignal", UMF_CHECKSIGNAL }, 337 { "firewall", UMF_FIREWALL }, 338 { "lite", UMF_LITE }, 339 { "checknull", UMF_CHECKNULL }, 340 { NULL } 341 }; 342 343 /*ARGSUSED*/ 344 int 345 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 346 { 347 int umem_logging; 348 349 umem_log_header_t *umem_transaction_log; 350 umem_log_header_t *umem_content_log; 351 umem_log_header_t *umem_failure_log; 352 umem_log_header_t *umem_slab_log; 353 354 mdb_printf("Status:\t\t%s\n", 355 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" : 356 umem_ready == UMEM_READY_STARTUP ? "uninitialized" : 357 umem_ready == UMEM_READY_INITING ? "initialization in process" : 358 umem_ready == UMEM_READY ? "ready and active" : 359 umem_ready == 0 ? "not loaded into address space" : 360 "unknown (umem_ready invalid)"); 361 362 if (umem_ready == 0) 363 return (DCMD_OK); 364 365 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus); 366 367 if (UMEM_READVAR(umem_logging)) 368 goto err; 369 if (UMEM_READVAR(umem_transaction_log)) 370 goto err; 371 if (UMEM_READVAR(umem_content_log)) 372 goto err; 373 if (UMEM_READVAR(umem_failure_log)) 374 goto err; 375 if (UMEM_READVAR(umem_slab_log)) 376 goto err; 377 378 mdb_printf("Logs:\t\t"); 379 umem_log_status("transaction", umem_transaction_log); 380 umem_log_status("content", umem_content_log); 381 umem_log_status("fail", umem_failure_log); 382 umem_log_status("slab", umem_slab_log); 383 if (!umem_logging) 384 mdb_printf("(inactive)"); 385 mdb_printf("\n"); 386 387 mdb_printf("Message buffer:\n"); 388 return (umem_abort_messages()); 389 390 err: 391 mdb_printf("Message buffer:\n"); 392 (void) umem_abort_messages(); 393 return (DCMD_ERR); 394 } 395 396 typedef struct { 397 uintptr_t ucw_first; 398 uintptr_t ucw_current; 399 } umem_cache_walk_t; 400 401 int 402 umem_cache_walk_init(mdb_walk_state_t *wsp) 403 { 404 umem_cache_walk_t *ucw; 405 umem_cache_t c; 406 uintptr_t cp; 407 GElf_Sym sym; 408 409 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) { 410 mdb_warn("couldn't find umem_null_cache"); 411 return (WALK_ERR); 412 } 413 414 cp = (uintptr_t)sym.st_value; 415 416 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) { 417 mdb_warn("couldn't read cache at %p", cp); 418 return (WALK_ERR); 419 } 420 421 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP); 422 423 ucw->ucw_first = cp; 424 ucw->ucw_current = (uintptr_t)c.cache_next; 425 wsp->walk_data = ucw; 426 427 return (WALK_NEXT); 428 } 429 430 int 431 umem_cache_walk_step(mdb_walk_state_t *wsp) 432 { 433 umem_cache_walk_t *ucw = wsp->walk_data; 434 umem_cache_t c; 435 int status; 436 437 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) { 438 mdb_warn("couldn't read cache at %p", ucw->ucw_current); 439 return (WALK_DONE); 440 } 441 442 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata); 443 444 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first) 445 return (WALK_DONE); 446 447 return (status); 448 } 449 450 void 451 umem_cache_walk_fini(mdb_walk_state_t *wsp) 452 { 453 umem_cache_walk_t *ucw = wsp->walk_data; 454 mdb_free(ucw, sizeof (umem_cache_walk_t)); 455 } 456 457 typedef struct { 458 umem_cpu_t *ucw_cpus; 459 uint32_t ucw_current; 460 uint32_t ucw_max; 461 } umem_cpu_walk_state_t; 462 463 int 464 umem_cpu_walk_init(mdb_walk_state_t *wsp) 465 { 466 umem_cpu_t *umem_cpus; 467 468 umem_cpu_walk_state_t *ucw; 469 470 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) { 471 mdb_warn("failed to read 'umem_cpus'"); 472 return (WALK_ERR); 473 } 474 475 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP); 476 477 ucw->ucw_cpus = umem_cpus; 478 ucw->ucw_current = 0; 479 ucw->ucw_max = umem_max_ncpus; 480 481 wsp->walk_data = ucw; 482 return (WALK_NEXT); 483 } 484 485 int 486 umem_cpu_walk_step(mdb_walk_state_t *wsp) 487 { 488 umem_cpu_t cpu; 489 umem_cpu_walk_state_t *ucw = wsp->walk_data; 490 491 uintptr_t caddr; 492 493 if (ucw->ucw_current >= ucw->ucw_max) 494 return (WALK_DONE); 495 496 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]); 497 498 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) { 499 mdb_warn("failed to read cpu %d", ucw->ucw_current); 500 return (WALK_ERR); 501 } 502 503 ucw->ucw_current++; 504 505 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata)); 506 } 507 508 void 509 umem_cpu_walk_fini(mdb_walk_state_t *wsp) 510 { 511 umem_cpu_walk_state_t *ucw = wsp->walk_data; 512 513 mdb_free(ucw, sizeof (*ucw)); 514 } 515 516 int 517 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 518 { 519 if (wsp->walk_addr == 0) { 520 mdb_warn("umem_cpu_cache doesn't support global walks"); 521 return (WALK_ERR); 522 } 523 524 if (mdb_layered_walk("umem_cpu", wsp) == -1) { 525 mdb_warn("couldn't walk 'umem_cpu'"); 526 return (WALK_ERR); 527 } 528 529 wsp->walk_data = (void *)wsp->walk_addr; 530 531 return (WALK_NEXT); 532 } 533 534 int 535 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 536 { 537 uintptr_t caddr = (uintptr_t)wsp->walk_data; 538 const umem_cpu_t *cpu = wsp->walk_layer; 539 umem_cpu_cache_t cc; 540 541 caddr += cpu->cpu_cache_offset; 542 543 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) { 544 mdb_warn("couldn't read umem_cpu_cache at %p", caddr); 545 return (WALK_ERR); 546 } 547 548 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 549 } 550 551 int 552 umem_slab_walk_init(mdb_walk_state_t *wsp) 553 { 554 uintptr_t caddr = wsp->walk_addr; 555 umem_cache_t c; 556 557 if (caddr == 0) { 558 mdb_warn("umem_slab doesn't support global walks\n"); 559 return (WALK_ERR); 560 } 561 562 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 563 mdb_warn("couldn't read umem_cache at %p", caddr); 564 return (WALK_ERR); 565 } 566 567 wsp->walk_data = 568 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 569 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; 570 571 return (WALK_NEXT); 572 } 573 574 int 575 umem_slab_walk_partial_init(mdb_walk_state_t *wsp) 576 { 577 uintptr_t caddr = wsp->walk_addr; 578 umem_cache_t c; 579 580 if (caddr == 0) { 581 mdb_warn("umem_slab_partial doesn't support global walks\n"); 582 return (WALK_ERR); 583 } 584 585 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 586 mdb_warn("couldn't read umem_cache at %p", caddr); 587 return (WALK_ERR); 588 } 589 590 wsp->walk_data = 591 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 592 wsp->walk_addr = (uintptr_t)c.cache_freelist; 593 594 /* 595 * Some consumers (umem_walk_step(), in particular) require at 596 * least one callback if there are any buffers in the cache. So 597 * if there are *no* partial slabs, report the last full slab, if 598 * any. 599 * 600 * Yes, this is ugly, but it's cleaner than the other possibilities. 601 */ 602 if ((uintptr_t)wsp->walk_data == wsp->walk_addr) 603 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; 604 605 return (WALK_NEXT); 606 } 607 608 int 609 umem_slab_walk_step(mdb_walk_state_t *wsp) 610 { 611 umem_slab_t s; 612 uintptr_t addr = wsp->walk_addr; 613 uintptr_t saddr = (uintptr_t)wsp->walk_data; 614 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab); 615 616 if (addr == saddr) 617 return (WALK_DONE); 618 619 if (mdb_vread(&s, sizeof (s), addr) == -1) { 620 mdb_warn("failed to read slab at %p", wsp->walk_addr); 621 return (WALK_ERR); 622 } 623 624 if ((uintptr_t)s.slab_cache != caddr) { 625 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 626 addr, caddr, s.slab_cache); 627 return (WALK_ERR); 628 } 629 630 wsp->walk_addr = (uintptr_t)s.slab_next; 631 632 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); 633 } 634 635 int 636 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 637 { 638 umem_cache_t c; 639 640 if (!(flags & DCMD_ADDRSPEC)) { 641 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) { 642 mdb_warn("can't walk umem_cache"); 643 return (DCMD_ERR); 644 } 645 return (DCMD_OK); 646 } 647 648 if (DCMD_HDRSPEC(flags)) 649 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME", 650 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 651 652 if (mdb_vread(&c, sizeof (c), addr) == -1) { 653 mdb_warn("couldn't read umem_cache at %p", addr); 654 return (DCMD_ERR); 655 } 656 657 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name, 658 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 659 660 return (DCMD_OK); 661 } 662 663 static int 664 addrcmp(const void *lhs, const void *rhs) 665 { 666 uintptr_t p1 = *((uintptr_t *)lhs); 667 uintptr_t p2 = *((uintptr_t *)rhs); 668 669 if (p1 < p2) 670 return (-1); 671 if (p1 > p2) 672 return (1); 673 return (0); 674 } 675 676 static int 677 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs) 678 { 679 const umem_bufctl_audit_t *bcp1 = *lhs; 680 const umem_bufctl_audit_t *bcp2 = *rhs; 681 682 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 683 return (-1); 684 685 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 686 return (1); 687 688 return (0); 689 } 690 691 typedef struct umem_hash_walk { 692 uintptr_t *umhw_table; 693 size_t umhw_nelems; 694 size_t umhw_pos; 695 umem_bufctl_t umhw_cur; 696 } umem_hash_walk_t; 697 698 int 699 umem_hash_walk_init(mdb_walk_state_t *wsp) 700 { 701 umem_hash_walk_t *umhw; 702 uintptr_t *hash; 703 umem_cache_t c; 704 uintptr_t haddr, addr = wsp->walk_addr; 705 size_t nelems; 706 size_t hsize; 707 708 if (addr == 0) { 709 mdb_warn("umem_hash doesn't support global walks\n"); 710 return (WALK_ERR); 711 } 712 713 if (mdb_vread(&c, sizeof (c), addr) == -1) { 714 mdb_warn("couldn't read cache at addr %p", addr); 715 return (WALK_ERR); 716 } 717 718 if (!(c.cache_flags & UMF_HASH)) { 719 mdb_warn("cache %p doesn't have a hash table\n", addr); 720 return (WALK_DONE); /* nothing to do */ 721 } 722 723 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP); 724 umhw->umhw_cur.bc_next = NULL; 725 umhw->umhw_pos = 0; 726 727 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1; 728 hsize = nelems * sizeof (uintptr_t); 729 haddr = (uintptr_t)c.cache_hash_table; 730 731 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 732 if (mdb_vread(hash, hsize, haddr) == -1) { 733 mdb_warn("failed to read hash table at %p", haddr); 734 mdb_free(hash, hsize); 735 mdb_free(umhw, sizeof (umem_hash_walk_t)); 736 return (WALK_ERR); 737 } 738 739 wsp->walk_data = umhw; 740 741 return (WALK_NEXT); 742 } 743 744 int 745 umem_hash_walk_step(mdb_walk_state_t *wsp) 746 { 747 umem_hash_walk_t *umhw = wsp->walk_data; 748 uintptr_t addr = 0; 749 750 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == 0) { 751 while (umhw->umhw_pos < umhw->umhw_nelems) { 752 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != 0) 753 break; 754 } 755 } 756 if (addr == 0) 757 return (WALK_DONE); 758 759 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) { 760 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr); 761 return (WALK_ERR); 762 } 763 764 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata)); 765 } 766 767 void 768 umem_hash_walk_fini(mdb_walk_state_t *wsp) 769 { 770 umem_hash_walk_t *umhw = wsp->walk_data; 771 772 if (umhw == NULL) 773 return; 774 775 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t)); 776 mdb_free(umhw, sizeof (umem_hash_walk_t)); 777 } 778 779 /* 780 * Find the address of the bufctl structure for the address 'buf' in cache 781 * 'cp', which is at address caddr, and place it in *out. 782 */ 783 static int 784 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 785 { 786 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf); 787 umem_bufctl_t *bcp; 788 umem_bufctl_t bc; 789 790 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) { 791 mdb_warn("unable to read hash bucket for %p in cache %p", 792 buf, caddr); 793 return (-1); 794 } 795 796 while (bcp != NULL) { 797 if (mdb_vread(&bc, sizeof (umem_bufctl_t), 798 (uintptr_t)bcp) == -1) { 799 mdb_warn("unable to read bufctl at %p", bcp); 800 return (-1); 801 } 802 if (bc.bc_addr == buf) { 803 *out = (uintptr_t)bcp; 804 return (0); 805 } 806 bcp = bc.bc_next; 807 } 808 809 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 810 return (-1); 811 } 812 813 int 814 umem_get_magsize(const umem_cache_t *cp) 815 { 816 uintptr_t addr = (uintptr_t)cp->cache_magtype; 817 GElf_Sym mt_sym; 818 umem_magtype_t mt; 819 int res; 820 821 /* 822 * if cpu 0 has a non-zero magsize, it must be correct. caches 823 * with UMF_NOMAGAZINE have disabled their magazine layers, so 824 * it is okay to return 0 for them. 825 */ 826 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 827 (cp->cache_flags & UMF_NOMAGAZINE)) 828 return (res); 829 830 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) { 831 mdb_warn("unable to read 'umem_magtype'"); 832 } else if (addr < mt_sym.st_value || 833 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 834 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 835 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 836 cp->cache_name, addr); 837 return (0); 838 } 839 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 840 mdb_warn("unable to read magtype at %a", addr); 841 return (0); 842 } 843 return (mt.mt_magsize); 844 } 845 846 /*ARGSUSED*/ 847 static int 848 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est) 849 { 850 *est -= (sp->slab_chunks - sp->slab_refcnt); 851 852 return (WALK_NEXT); 853 } 854 855 /* 856 * Returns an upper bound on the number of allocated buffers in a given 857 * cache. 858 */ 859 size_t 860 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) 861 { 862 int magsize; 863 size_t cache_est; 864 865 cache_est = cp->cache_buftotal; 866 867 (void) mdb_pwalk("umem_slab_partial", 868 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr); 869 870 if ((magsize = umem_get_magsize(cp)) != 0) { 871 size_t mag_est = cp->cache_full.ml_total * magsize; 872 873 if (cache_est >= mag_est) { 874 cache_est -= mag_est; 875 } else { 876 mdb_warn("cache %p's magazine layer holds more buffers " 877 "than the slab layer.\n", addr); 878 } 879 } 880 return (cache_est); 881 } 882 883 #define READMAG_ROUNDS(rounds) { \ 884 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \ 885 mdb_warn("couldn't read magazine at %p", ump); \ 886 goto fail; \ 887 } \ 888 for (i = 0; i < rounds; i++) { \ 889 maglist[magcnt++] = mp->mag_round[i]; \ 890 if (magcnt == magmax) { \ 891 mdb_warn("%d magazines exceeds fudge factor\n", \ 892 magcnt); \ 893 goto fail; \ 894 } \ 895 } \ 896 } 897 898 static int 899 umem_read_magazines(umem_cache_t *cp, uintptr_t addr, 900 void ***maglistp, size_t *magcntp, size_t *magmaxp) 901 { 902 umem_magazine_t *ump, *mp; 903 void **maglist = NULL; 904 int i, cpu; 905 size_t magsize, magmax, magbsize; 906 size_t magcnt = 0; 907 908 /* 909 * Read the magtype out of the cache, after verifying the pointer's 910 * correctness. 911 */ 912 magsize = umem_get_magsize(cp); 913 if (magsize == 0) { 914 *maglistp = NULL; 915 *magcntp = 0; 916 *magmaxp = 0; 917 return (0); 918 } 919 920 /* 921 * There are several places where we need to go buffer hunting: 922 * the per-CPU loaded magazine, the per-CPU spare full magazine, 923 * and the full magazine list in the depot. 924 * 925 * For an upper bound on the number of buffers in the magazine 926 * layer, we have the number of magazines on the cache_full 927 * list plus at most two magazines per CPU (the loaded and the 928 * spare). Toss in 100 magazines as a fudge factor in case this 929 * is live (the number "100" comes from the same fudge factor in 930 * crash(1M)). 931 */ 932 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize; 933 magbsize = offsetof(umem_magazine_t, mag_round[magsize]); 934 935 if (magbsize >= PAGESIZE / 2) { 936 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 937 addr, magbsize); 938 return (-1); 939 } 940 941 maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP); 942 mp = mdb_alloc(magbsize, UM_SLEEP); 943 if (mp == NULL || maglist == NULL) 944 goto fail; 945 946 /* 947 * First up: the magazines in the depot (i.e. on the cache_full list). 948 */ 949 for (ump = cp->cache_full.ml_list; ump != NULL; ) { 950 READMAG_ROUNDS(magsize); 951 ump = mp->mag_next; 952 953 if (ump == cp->cache_full.ml_list) 954 break; /* cache_full list loop detected */ 955 } 956 957 dprintf(("cache_full list done\n")); 958 959 /* 960 * Now whip through the CPUs, snagging the loaded magazines 961 * and full spares. 962 */ 963 for (cpu = 0; cpu < umem_max_ncpus; cpu++) { 964 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 965 966 dprintf(("reading cpu cache %p\n", 967 (uintptr_t)ccp - (uintptr_t)cp + addr)); 968 969 if (ccp->cc_rounds > 0 && 970 (ump = ccp->cc_loaded) != NULL) { 971 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 972 READMAG_ROUNDS(ccp->cc_rounds); 973 } 974 975 if (ccp->cc_prounds > 0 && 976 (ump = ccp->cc_ploaded) != NULL) { 977 dprintf(("reading %d previously loaded rounds\n", 978 ccp->cc_prounds)); 979 READMAG_ROUNDS(ccp->cc_prounds); 980 } 981 } 982 983 dprintf(("magazine layer: %d buffers\n", magcnt)); 984 985 mdb_free(mp, magbsize); 986 987 *maglistp = maglist; 988 *magcntp = magcnt; 989 *magmaxp = magmax; 990 991 return (0); 992 993 fail: 994 if (mp) 995 mdb_free(mp, magbsize); 996 if (maglist) 997 mdb_free(maglist, magmax * sizeof (void *)); 998 999 return (-1); 1000 } 1001 1002 typedef struct umem_read_ptc_walk { 1003 void **urpw_buf; 1004 size_t urpw_cnt; 1005 size_t urpw_max; 1006 } umem_read_ptc_walk_t; 1007 1008 /*ARGSUSED*/ 1009 static int 1010 umem_read_ptc_walk_buf(uintptr_t addr, 1011 const void *ignored, umem_read_ptc_walk_t *urpw) 1012 { 1013 if (urpw->urpw_cnt == urpw->urpw_max) { 1014 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1; 1015 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP); 1016 1017 if (nmax > 1) { 1018 size_t osize = urpw->urpw_max * sizeof (void *); 1019 bcopy(urpw->urpw_buf, new, osize); 1020 mdb_free(urpw->urpw_buf, osize); 1021 } 1022 1023 urpw->urpw_buf = new; 1024 urpw->urpw_max = nmax; 1025 } 1026 1027 urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr; 1028 1029 return (WALK_NEXT); 1030 } 1031 1032 static int 1033 umem_read_ptc(umem_cache_t *cp, 1034 void ***buflistp, size_t *bufcntp, size_t *bufmaxp) 1035 { 1036 umem_read_ptc_walk_t urpw; 1037 char walk[60]; 1038 int rval; 1039 1040 if (!(cp->cache_flags & UMF_PTC)) 1041 return (0); 1042 1043 (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d", 1044 cp->cache_bufsize); 1045 1046 urpw.urpw_buf = *buflistp; 1047 urpw.urpw_cnt = *bufcntp; 1048 urpw.urpw_max = *bufmaxp; 1049 1050 if ((rval = mdb_walk(walk, 1051 (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) { 1052 mdb_warn("couldn't walk %s", walk); 1053 } 1054 1055 *buflistp = urpw.urpw_buf; 1056 *bufcntp = urpw.urpw_cnt; 1057 *bufmaxp = urpw.urpw_max; 1058 1059 return (rval); 1060 } 1061 1062 static int 1063 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1064 { 1065 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1066 } 1067 1068 static int 1069 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1070 { 1071 umem_bufctl_audit_t *b; 1072 UMEM_LOCAL_BUFCTL_AUDIT(&b); 1073 1074 /* 1075 * if UMF_AUDIT is not set, we know that we're looking at a 1076 * umem_bufctl_t. 1077 */ 1078 if (!(cp->cache_flags & UMF_AUDIT) || 1079 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) { 1080 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE); 1081 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) { 1082 mdb_warn("unable to read bufctl at %p", buf); 1083 return (WALK_ERR); 1084 } 1085 } 1086 1087 return (wsp->walk_callback(buf, b, wsp->walk_cbdata)); 1088 } 1089 1090 typedef struct umem_walk { 1091 int umw_type; 1092 1093 uintptr_t umw_addr; /* cache address */ 1094 umem_cache_t *umw_cp; 1095 size_t umw_csize; 1096 1097 /* 1098 * magazine layer 1099 */ 1100 void **umw_maglist; 1101 size_t umw_max; 1102 size_t umw_count; 1103 size_t umw_pos; 1104 1105 /* 1106 * slab layer 1107 */ 1108 char *umw_valid; /* to keep track of freed buffers */ 1109 char *umw_ubase; /* buffer for slab data */ 1110 } umem_walk_t; 1111 1112 static int 1113 umem_walk_init_common(mdb_walk_state_t *wsp, int type) 1114 { 1115 umem_walk_t *umw; 1116 int csize; 1117 umem_cache_t *cp; 1118 size_t vm_quantum; 1119 1120 size_t magmax, magcnt; 1121 void **maglist = NULL; 1122 uint_t chunksize = 1, slabsize = 1; 1123 int status = WALK_ERR; 1124 uintptr_t addr = wsp->walk_addr; 1125 const char *layered; 1126 1127 type &= ~UM_HASH; 1128 1129 if (addr == 0) { 1130 mdb_warn("umem walk doesn't support global walks\n"); 1131 return (WALK_ERR); 1132 } 1133 1134 dprintf(("walking %p\n", addr)); 1135 1136 /* 1137 * The number of "cpus" determines how large the cache is. 1138 */ 1139 csize = UMEM_CACHE_SIZE(umem_max_ncpus); 1140 cp = mdb_alloc(csize, UM_SLEEP); 1141 1142 if (mdb_vread(cp, csize, addr) == -1) { 1143 mdb_warn("couldn't read cache at addr %p", addr); 1144 goto out2; 1145 } 1146 1147 /* 1148 * It's easy for someone to hand us an invalid cache address. 1149 * Unfortunately, it is hard for this walker to survive an 1150 * invalid cache cleanly. So we make sure that: 1151 * 1152 * 1. the vmem arena for the cache is readable, 1153 * 2. the vmem arena's quantum is a power of 2, 1154 * 3. our slabsize is a multiple of the quantum, and 1155 * 4. our chunksize is >0 and less than our slabsize. 1156 */ 1157 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1158 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1159 vm_quantum == 0 || 1160 (vm_quantum & (vm_quantum - 1)) != 0 || 1161 cp->cache_slabsize < vm_quantum || 1162 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1163 cp->cache_chunksize == 0 || 1164 cp->cache_chunksize > cp->cache_slabsize) { 1165 mdb_warn("%p is not a valid umem_cache_t\n", addr); 1166 goto out2; 1167 } 1168 1169 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1170 1171 if (cp->cache_buftotal == 0) { 1172 mdb_free(cp, csize); 1173 return (WALK_DONE); 1174 } 1175 1176 /* 1177 * If they ask for bufctls, but it's a small-slab cache, 1178 * there is nothing to report. 1179 */ 1180 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) { 1181 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n", 1182 cp->cache_flags)); 1183 mdb_free(cp, csize); 1184 return (WALK_DONE); 1185 } 1186 1187 /* 1188 * Read in the contents of the magazine layer 1189 */ 1190 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0) 1191 goto out2; 1192 1193 /* 1194 * Read in the contents of the per-thread caches, if any 1195 */ 1196 if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0) 1197 goto out2; 1198 1199 /* 1200 * We have all of the buffers from the magazines and from the 1201 * per-thread cache (if any); if we are walking allocated buffers, 1202 * sort them so we can bsearch them later. 1203 */ 1204 if (type & UM_ALLOCATED) 1205 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1206 1207 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP); 1208 1209 umw->umw_type = type; 1210 umw->umw_addr = addr; 1211 umw->umw_cp = cp; 1212 umw->umw_csize = csize; 1213 umw->umw_maglist = maglist; 1214 umw->umw_max = magmax; 1215 umw->umw_count = magcnt; 1216 umw->umw_pos = 0; 1217 1218 /* 1219 * When walking allocated buffers in a UMF_HASH cache, we walk the 1220 * hash table instead of the slab layer. 1221 */ 1222 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) { 1223 layered = "umem_hash"; 1224 1225 umw->umw_type |= UM_HASH; 1226 } else { 1227 /* 1228 * If we are walking freed buffers, we only need the 1229 * magazine layer plus the partially allocated slabs. 1230 * To walk allocated buffers, we need all of the slabs. 1231 */ 1232 if (type & UM_ALLOCATED) 1233 layered = "umem_slab"; 1234 else 1235 layered = "umem_slab_partial"; 1236 1237 /* 1238 * for small-slab caches, we read in the entire slab. For 1239 * freed buffers, we can just walk the freelist. For 1240 * allocated buffers, we use a 'valid' array to track 1241 * the freed buffers. 1242 */ 1243 if (!(cp->cache_flags & UMF_HASH)) { 1244 chunksize = cp->cache_chunksize; 1245 slabsize = cp->cache_slabsize; 1246 1247 umw->umw_ubase = mdb_alloc(slabsize + 1248 sizeof (umem_bufctl_t), UM_SLEEP); 1249 1250 if (type & UM_ALLOCATED) 1251 umw->umw_valid = 1252 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1253 } 1254 } 1255 1256 status = WALK_NEXT; 1257 1258 if (mdb_layered_walk(layered, wsp) == -1) { 1259 mdb_warn("unable to start layered '%s' walk", layered); 1260 status = WALK_ERR; 1261 } 1262 1263 out1: 1264 if (status == WALK_ERR) { 1265 if (umw->umw_valid) 1266 mdb_free(umw->umw_valid, slabsize / chunksize); 1267 1268 if (umw->umw_ubase) 1269 mdb_free(umw->umw_ubase, slabsize + 1270 sizeof (umem_bufctl_t)); 1271 1272 if (umw->umw_maglist) 1273 mdb_free(umw->umw_maglist, umw->umw_max * 1274 sizeof (uintptr_t)); 1275 1276 mdb_free(umw, sizeof (umem_walk_t)); 1277 wsp->walk_data = NULL; 1278 } 1279 1280 out2: 1281 if (status == WALK_ERR) 1282 mdb_free(cp, csize); 1283 1284 return (status); 1285 } 1286 1287 int 1288 umem_walk_step(mdb_walk_state_t *wsp) 1289 { 1290 umem_walk_t *umw = wsp->walk_data; 1291 int type = umw->umw_type; 1292 umem_cache_t *cp = umw->umw_cp; 1293 1294 void **maglist = umw->umw_maglist; 1295 int magcnt = umw->umw_count; 1296 1297 uintptr_t chunksize, slabsize; 1298 uintptr_t addr; 1299 const umem_slab_t *sp; 1300 const umem_bufctl_t *bcp; 1301 umem_bufctl_t bc; 1302 1303 int chunks; 1304 char *kbase; 1305 void *buf; 1306 int i, ret; 1307 1308 char *valid, *ubase; 1309 1310 /* 1311 * first, handle the 'umem_hash' layered walk case 1312 */ 1313 if (type & UM_HASH) { 1314 /* 1315 * We have a buffer which has been allocated out of the 1316 * global layer. We need to make sure that it's not 1317 * actually sitting in a magazine before we report it as 1318 * an allocated buffer. 1319 */ 1320 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr; 1321 1322 if (magcnt > 0 && 1323 bsearch(&buf, maglist, magcnt, sizeof (void *), 1324 addrcmp) != NULL) 1325 return (WALK_NEXT); 1326 1327 if (type & UM_BUFCTL) 1328 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1329 1330 return (umem_walk_callback(wsp, (uintptr_t)buf)); 1331 } 1332 1333 ret = WALK_NEXT; 1334 1335 addr = umw->umw_addr; 1336 1337 /* 1338 * If we're walking freed buffers, report everything in the 1339 * magazine layer before processing the first slab. 1340 */ 1341 if ((type & UM_FREE) && magcnt != 0) { 1342 umw->umw_count = 0; /* only do this once */ 1343 for (i = 0; i < magcnt; i++) { 1344 buf = maglist[i]; 1345 1346 if (type & UM_BUFCTL) { 1347 uintptr_t out; 1348 1349 if (cp->cache_flags & UMF_BUFTAG) { 1350 umem_buftag_t *btp; 1351 umem_buftag_t tag; 1352 1353 /* LINTED - alignment */ 1354 btp = UMEM_BUFTAG(cp, buf); 1355 if (mdb_vread(&tag, sizeof (tag), 1356 (uintptr_t)btp) == -1) { 1357 mdb_warn("reading buftag for " 1358 "%p at %p", buf, btp); 1359 continue; 1360 } 1361 out = (uintptr_t)tag.bt_bufctl; 1362 } else { 1363 if (umem_hash_lookup(cp, addr, buf, 1364 &out) == -1) 1365 continue; 1366 } 1367 ret = bufctl_walk_callback(cp, wsp, out); 1368 } else { 1369 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1370 } 1371 1372 if (ret != WALK_NEXT) 1373 return (ret); 1374 } 1375 } 1376 1377 /* 1378 * Handle the buffers in the current slab 1379 */ 1380 chunksize = cp->cache_chunksize; 1381 slabsize = cp->cache_slabsize; 1382 1383 sp = wsp->walk_layer; 1384 chunks = sp->slab_chunks; 1385 kbase = sp->slab_base; 1386 1387 dprintf(("kbase is %p\n", kbase)); 1388 1389 if (!(cp->cache_flags & UMF_HASH)) { 1390 valid = umw->umw_valid; 1391 ubase = umw->umw_ubase; 1392 1393 if (mdb_vread(ubase, chunks * chunksize, 1394 (uintptr_t)kbase) == -1) { 1395 mdb_warn("failed to read slab contents at %p", kbase); 1396 return (WALK_ERR); 1397 } 1398 1399 /* 1400 * Set up the valid map as fully allocated -- we'll punch 1401 * out the freelist. 1402 */ 1403 if (type & UM_ALLOCATED) 1404 (void) memset(valid, 1, chunks); 1405 } else { 1406 valid = NULL; 1407 ubase = NULL; 1408 } 1409 1410 /* 1411 * walk the slab's freelist 1412 */ 1413 bcp = sp->slab_head; 1414 1415 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1416 1417 /* 1418 * since we could be in the middle of allocating a buffer, 1419 * our refcnt could be one higher than it aught. So we 1420 * check one further on the freelist than the count allows. 1421 */ 1422 for (i = sp->slab_refcnt; i <= chunks; i++) { 1423 uint_t ndx; 1424 1425 dprintf(("bcp is %p\n", bcp)); 1426 1427 if (bcp == NULL) { 1428 if (i == chunks) 1429 break; 1430 mdb_warn( 1431 "slab %p in cache %p freelist too short by %d\n", 1432 sp, addr, chunks - i); 1433 break; 1434 } 1435 1436 if (cp->cache_flags & UMF_HASH) { 1437 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1438 mdb_warn("failed to read bufctl ptr at %p", 1439 bcp); 1440 break; 1441 } 1442 buf = bc.bc_addr; 1443 } else { 1444 /* 1445 * Otherwise the buffer is (or should be) in the slab 1446 * that we've read in; determine its offset in the 1447 * slab, validate that it's not corrupt, and add to 1448 * our base address to find the umem_bufctl_t. (Note 1449 * that we don't need to add the size of the bufctl 1450 * to our offset calculation because of the slop that's 1451 * allocated for the buffer at ubase.) 1452 */ 1453 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase; 1454 1455 if (offs > chunks * chunksize) { 1456 mdb_warn("found corrupt bufctl ptr %p" 1457 " in slab %p in cache %p\n", bcp, 1458 wsp->walk_addr, addr); 1459 break; 1460 } 1461 1462 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs)); 1463 buf = UMEM_BUF(cp, bcp); 1464 } 1465 1466 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1467 1468 if (ndx > slabsize / cp->cache_bufsize) { 1469 /* 1470 * This is very wrong; we have managed to find 1471 * a buffer in the slab which shouldn't 1472 * actually be here. Emit a warning, and 1473 * try to continue. 1474 */ 1475 mdb_warn("buf %p is out of range for " 1476 "slab %p, cache %p\n", buf, sp, addr); 1477 } else if (type & UM_ALLOCATED) { 1478 /* 1479 * we have found a buffer on the slab's freelist; 1480 * clear its entry 1481 */ 1482 valid[ndx] = 0; 1483 } else { 1484 /* 1485 * Report this freed buffer 1486 */ 1487 if (type & UM_BUFCTL) { 1488 ret = bufctl_walk_callback(cp, wsp, 1489 (uintptr_t)bcp); 1490 } else { 1491 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1492 } 1493 if (ret != WALK_NEXT) 1494 return (ret); 1495 } 1496 1497 bcp = bc.bc_next; 1498 } 1499 1500 if (bcp != NULL) { 1501 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1502 sp, addr, bcp)); 1503 } 1504 1505 /* 1506 * If we are walking freed buffers, the loop above handled reporting 1507 * them. 1508 */ 1509 if (type & UM_FREE) 1510 return (WALK_NEXT); 1511 1512 if (type & UM_BUFCTL) { 1513 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for " 1514 "cache %p\n", addr); 1515 return (WALK_ERR); 1516 } 1517 1518 /* 1519 * Report allocated buffers, skipping buffers in the magazine layer. 1520 * We only get this far for small-slab caches. 1521 */ 1522 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1523 buf = (char *)kbase + i * chunksize; 1524 1525 if (!valid[i]) 1526 continue; /* on slab freelist */ 1527 1528 if (magcnt > 0 && 1529 bsearch(&buf, maglist, magcnt, sizeof (void *), 1530 addrcmp) != NULL) 1531 continue; /* in magazine layer */ 1532 1533 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1534 } 1535 return (ret); 1536 } 1537 1538 void 1539 umem_walk_fini(mdb_walk_state_t *wsp) 1540 { 1541 umem_walk_t *umw = wsp->walk_data; 1542 uintptr_t chunksize; 1543 uintptr_t slabsize; 1544 1545 if (umw == NULL) 1546 return; 1547 1548 if (umw->umw_maglist != NULL) 1549 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *)); 1550 1551 chunksize = umw->umw_cp->cache_chunksize; 1552 slabsize = umw->umw_cp->cache_slabsize; 1553 1554 if (umw->umw_valid != NULL) 1555 mdb_free(umw->umw_valid, slabsize / chunksize); 1556 if (umw->umw_ubase != NULL) 1557 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t)); 1558 1559 mdb_free(umw->umw_cp, umw->umw_csize); 1560 mdb_free(umw, sizeof (umem_walk_t)); 1561 } 1562 1563 /*ARGSUSED*/ 1564 static int 1565 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp) 1566 { 1567 /* 1568 * Buffers allocated from NOTOUCH caches can also show up as freed 1569 * memory in other caches. This can be a little confusing, so we 1570 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1571 * that "::walk umem" and "::walk freemem" yield disjoint output). 1572 */ 1573 if (c->cache_cflags & UMC_NOTOUCH) 1574 return (WALK_NEXT); 1575 1576 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1577 wsp->walk_cbdata, addr) == -1) 1578 return (WALK_DONE); 1579 1580 return (WALK_NEXT); 1581 } 1582 1583 #define UMEM_WALK_ALL(name, wsp) { \ 1584 wsp->walk_data = (name); \ 1585 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \ 1586 return (WALK_ERR); \ 1587 return (WALK_DONE); \ 1588 } 1589 1590 int 1591 umem_walk_init(mdb_walk_state_t *wsp) 1592 { 1593 if (wsp->walk_arg != NULL) 1594 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1595 1596 if (wsp->walk_addr == 0) 1597 UMEM_WALK_ALL("umem", wsp); 1598 return (umem_walk_init_common(wsp, UM_ALLOCATED)); 1599 } 1600 1601 int 1602 bufctl_walk_init(mdb_walk_state_t *wsp) 1603 { 1604 if (wsp->walk_addr == 0) 1605 UMEM_WALK_ALL("bufctl", wsp); 1606 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL)); 1607 } 1608 1609 int 1610 freemem_walk_init(mdb_walk_state_t *wsp) 1611 { 1612 if (wsp->walk_addr == 0) 1613 UMEM_WALK_ALL("freemem", wsp); 1614 return (umem_walk_init_common(wsp, UM_FREE)); 1615 } 1616 1617 int 1618 freectl_walk_init(mdb_walk_state_t *wsp) 1619 { 1620 if (wsp->walk_addr == 0) 1621 UMEM_WALK_ALL("freectl", wsp); 1622 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL)); 1623 } 1624 1625 typedef struct bufctl_history_walk { 1626 void *bhw_next; 1627 umem_cache_t *bhw_cache; 1628 umem_slab_t *bhw_slab; 1629 hrtime_t bhw_timestamp; 1630 } bufctl_history_walk_t; 1631 1632 int 1633 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1634 { 1635 bufctl_history_walk_t *bhw; 1636 umem_bufctl_audit_t bc; 1637 umem_bufctl_audit_t bcn; 1638 1639 if (wsp->walk_addr == 0) { 1640 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1641 return (WALK_ERR); 1642 } 1643 1644 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1645 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1646 return (WALK_ERR); 1647 } 1648 1649 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1650 bhw->bhw_timestamp = 0; 1651 bhw->bhw_cache = bc.bc_cache; 1652 bhw->bhw_slab = bc.bc_slab; 1653 1654 /* 1655 * sometimes the first log entry matches the base bufctl; in that 1656 * case, skip the base bufctl. 1657 */ 1658 if (bc.bc_lastlog != NULL && 1659 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1660 bc.bc_addr == bcn.bc_addr && 1661 bc.bc_cache == bcn.bc_cache && 1662 bc.bc_slab == bcn.bc_slab && 1663 bc.bc_timestamp == bcn.bc_timestamp && 1664 bc.bc_thread == bcn.bc_thread) 1665 bhw->bhw_next = bc.bc_lastlog; 1666 else 1667 bhw->bhw_next = (void *)wsp->walk_addr; 1668 1669 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1670 wsp->walk_data = bhw; 1671 1672 return (WALK_NEXT); 1673 } 1674 1675 int 1676 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1677 { 1678 bufctl_history_walk_t *bhw = wsp->walk_data; 1679 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1680 uintptr_t baseaddr = wsp->walk_addr; 1681 umem_bufctl_audit_t *b; 1682 UMEM_LOCAL_BUFCTL_AUDIT(&b); 1683 1684 if (addr == 0) 1685 return (WALK_DONE); 1686 1687 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1688 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1689 return (WALK_ERR); 1690 } 1691 1692 /* 1693 * The bufctl is only valid if the address, cache, and slab are 1694 * correct. We also check that the timestamp is decreasing, to 1695 * prevent infinite loops. 1696 */ 1697 if ((uintptr_t)b->bc_addr != baseaddr || 1698 b->bc_cache != bhw->bhw_cache || 1699 b->bc_slab != bhw->bhw_slab || 1700 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp)) 1701 return (WALK_DONE); 1702 1703 bhw->bhw_next = b->bc_lastlog; 1704 bhw->bhw_timestamp = b->bc_timestamp; 1705 1706 return (wsp->walk_callback(addr, b, wsp->walk_cbdata)); 1707 } 1708 1709 void 1710 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1711 { 1712 bufctl_history_walk_t *bhw = wsp->walk_data; 1713 1714 mdb_free(bhw, sizeof (*bhw)); 1715 } 1716 1717 typedef struct umem_log_walk { 1718 umem_bufctl_audit_t *ulw_base; 1719 umem_bufctl_audit_t **ulw_sorted; 1720 umem_log_header_t ulw_lh; 1721 size_t ulw_size; 1722 size_t ulw_maxndx; 1723 size_t ulw_ndx; 1724 } umem_log_walk_t; 1725 1726 int 1727 umem_log_walk_init(mdb_walk_state_t *wsp) 1728 { 1729 uintptr_t lp = wsp->walk_addr; 1730 umem_log_walk_t *ulw; 1731 umem_log_header_t *lhp; 1732 int maxndx, i, j, k; 1733 1734 /* 1735 * By default (global walk), walk the umem_transaction_log. Otherwise 1736 * read the log whose umem_log_header_t is stored at walk_addr. 1737 */ 1738 if (lp == 0 && umem_readvar(&lp, "umem_transaction_log") == -1) { 1739 mdb_warn("failed to read 'umem_transaction_log'"); 1740 return (WALK_ERR); 1741 } 1742 1743 if (lp == 0) { 1744 mdb_warn("log is disabled\n"); 1745 return (WALK_ERR); 1746 } 1747 1748 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP); 1749 lhp = &ulw->ulw_lh; 1750 1751 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) { 1752 mdb_warn("failed to read log header at %p", lp); 1753 mdb_free(ulw, sizeof (umem_log_walk_t)); 1754 return (WALK_ERR); 1755 } 1756 1757 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1758 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP); 1759 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1; 1760 1761 if (mdb_vread(ulw->ulw_base, ulw->ulw_size, 1762 (uintptr_t)lhp->lh_base) == -1) { 1763 mdb_warn("failed to read log at base %p", lhp->lh_base); 1764 mdb_free(ulw->ulw_base, ulw->ulw_size); 1765 mdb_free(ulw, sizeof (umem_log_walk_t)); 1766 return (WALK_ERR); 1767 } 1768 1769 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1770 sizeof (umem_bufctl_audit_t *), UM_SLEEP); 1771 1772 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1773 caddr_t chunk = (caddr_t) 1774 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize); 1775 1776 for (j = 0; j < maxndx; j++) { 1777 /* LINTED align */ 1778 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk; 1779 chunk += UMEM_BUFCTL_AUDIT_SIZE; 1780 } 1781 } 1782 1783 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *), 1784 (int(*)(const void *, const void *))bufctlcmp); 1785 1786 ulw->ulw_maxndx = k; 1787 wsp->walk_data = ulw; 1788 1789 return (WALK_NEXT); 1790 } 1791 1792 int 1793 umem_log_walk_step(mdb_walk_state_t *wsp) 1794 { 1795 umem_log_walk_t *ulw = wsp->walk_data; 1796 umem_bufctl_audit_t *bcp; 1797 1798 if (ulw->ulw_ndx == ulw->ulw_maxndx) 1799 return (WALK_DONE); 1800 1801 bcp = ulw->ulw_sorted[ulw->ulw_ndx++]; 1802 1803 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base + 1804 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata)); 1805 } 1806 1807 void 1808 umem_log_walk_fini(mdb_walk_state_t *wsp) 1809 { 1810 umem_log_walk_t *ulw = wsp->walk_data; 1811 1812 mdb_free(ulw->ulw_base, ulw->ulw_size); 1813 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx * 1814 sizeof (umem_bufctl_audit_t *)); 1815 mdb_free(ulw, sizeof (umem_log_walk_t)); 1816 } 1817 1818 typedef struct allocdby_bufctl { 1819 uintptr_t abb_addr; 1820 hrtime_t abb_ts; 1821 } allocdby_bufctl_t; 1822 1823 typedef struct allocdby_walk { 1824 const char *abw_walk; 1825 uintptr_t abw_thread; 1826 size_t abw_nbufs; 1827 size_t abw_size; 1828 allocdby_bufctl_t *abw_buf; 1829 size_t abw_ndx; 1830 } allocdby_walk_t; 1831 1832 int 1833 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp, 1834 allocdby_walk_t *abw) 1835 { 1836 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1837 return (WALK_NEXT); 1838 1839 if (abw->abw_nbufs == abw->abw_size) { 1840 allocdby_bufctl_t *buf; 1841 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1842 1843 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1844 1845 bcopy(abw->abw_buf, buf, oldsize); 1846 mdb_free(abw->abw_buf, oldsize); 1847 1848 abw->abw_size <<= 1; 1849 abw->abw_buf = buf; 1850 } 1851 1852 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1853 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1854 abw->abw_nbufs++; 1855 1856 return (WALK_NEXT); 1857 } 1858 1859 /*ARGSUSED*/ 1860 int 1861 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw) 1862 { 1863 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1864 abw, addr) == -1) { 1865 mdb_warn("couldn't walk bufctl for cache %p", addr); 1866 return (WALK_DONE); 1867 } 1868 1869 return (WALK_NEXT); 1870 } 1871 1872 static int 1873 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1874 { 1875 if (lhs->abb_ts < rhs->abb_ts) 1876 return (1); 1877 if (lhs->abb_ts > rhs->abb_ts) 1878 return (-1); 1879 return (0); 1880 } 1881 1882 static int 1883 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1884 { 1885 allocdby_walk_t *abw; 1886 1887 if (wsp->walk_addr == 0) { 1888 mdb_warn("allocdby walk doesn't support global walks\n"); 1889 return (WALK_ERR); 1890 } 1891 1892 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1893 1894 abw->abw_thread = wsp->walk_addr; 1895 abw->abw_walk = walk; 1896 abw->abw_size = 128; /* something reasonable */ 1897 abw->abw_buf = 1898 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1899 1900 wsp->walk_data = abw; 1901 1902 if (mdb_walk("umem_cache", 1903 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1904 mdb_warn("couldn't walk umem_cache"); 1905 allocdby_walk_fini(wsp); 1906 return (WALK_ERR); 1907 } 1908 1909 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1910 (int(*)(const void *, const void *))allocdby_cmp); 1911 1912 return (WALK_NEXT); 1913 } 1914 1915 int 1916 allocdby_walk_init(mdb_walk_state_t *wsp) 1917 { 1918 return (allocdby_walk_init_common(wsp, "bufctl")); 1919 } 1920 1921 int 1922 freedby_walk_init(mdb_walk_state_t *wsp) 1923 { 1924 return (allocdby_walk_init_common(wsp, "freectl")); 1925 } 1926 1927 int 1928 allocdby_walk_step(mdb_walk_state_t *wsp) 1929 { 1930 allocdby_walk_t *abw = wsp->walk_data; 1931 uintptr_t addr; 1932 umem_bufctl_audit_t *bcp; 1933 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 1934 1935 if (abw->abw_ndx == abw->abw_nbufs) 1936 return (WALK_DONE); 1937 1938 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1939 1940 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1941 mdb_warn("couldn't read bufctl at %p", addr); 1942 return (WALK_DONE); 1943 } 1944 1945 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata)); 1946 } 1947 1948 void 1949 allocdby_walk_fini(mdb_walk_state_t *wsp) 1950 { 1951 allocdby_walk_t *abw = wsp->walk_data; 1952 1953 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1954 mdb_free(abw, sizeof (allocdby_walk_t)); 1955 } 1956 1957 /*ARGSUSED*/ 1958 int 1959 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored) 1960 { 1961 char c[MDB_SYM_NAMLEN]; 1962 GElf_Sym sym; 1963 int i; 1964 1965 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 1966 for (i = 0; i < bcp->bc_depth; i++) { 1967 if (mdb_lookup_by_addr(bcp->bc_stack[i], 1968 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 1969 continue; 1970 if (is_umem_sym(c, "umem_")) 1971 continue; 1972 mdb_printf("%s+0x%lx", 1973 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 1974 break; 1975 } 1976 mdb_printf("\n"); 1977 1978 return (WALK_NEXT); 1979 } 1980 1981 static int 1982 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 1983 { 1984 if (!(flags & DCMD_ADDRSPEC)) 1985 return (DCMD_USAGE); 1986 1987 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 1988 1989 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 1990 mdb_warn("can't walk '%s' for %p", w, addr); 1991 return (DCMD_ERR); 1992 } 1993 1994 return (DCMD_OK); 1995 } 1996 1997 /*ARGSUSED*/ 1998 int 1999 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2000 { 2001 return (allocdby_common(addr, flags, "allocdby")); 2002 } 2003 2004 /*ARGSUSED*/ 2005 int 2006 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2007 { 2008 return (allocdby_common(addr, flags, "freedby")); 2009 } 2010 2011 typedef struct whatis_info { 2012 mdb_whatis_t *wi_w; 2013 const umem_cache_t *wi_cache; 2014 const vmem_t *wi_vmem; 2015 vmem_t *wi_msb_arena; 2016 size_t wi_slab_size; 2017 int wi_slab_found; 2018 uint_t wi_freemem; 2019 } whatis_info_t; 2020 2021 /* call one of our dcmd functions with "-v" and the provided address */ 2022 static void 2023 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2024 { 2025 mdb_arg_t a; 2026 a.a_type = MDB_TYPE_STRING; 2027 a.a_un.a_str = "-v"; 2028 2029 mdb_printf(":\n"); 2030 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2031 } 2032 2033 static void 2034 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 2035 uintptr_t baddr) 2036 { 2037 mdb_whatis_t *w = wi->wi_w; 2038 const umem_cache_t *cp = wi->wi_cache; 2039 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 2040 2041 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT)); 2042 2043 mdb_whatis_report_object(w, maddr, addr, ""); 2044 2045 if (baddr != 0 && !call_printer) 2046 mdb_printf("bufctl %p ", baddr); 2047 2048 mdb_printf("%s from %s", 2049 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 2050 2051 if (call_printer && baddr != 0) { 2052 whatis_call_printer(bufctl, baddr); 2053 return; 2054 } 2055 mdb_printf("\n"); 2056 } 2057 2058 /*ARGSUSED*/ 2059 static int 2060 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi) 2061 { 2062 mdb_whatis_t *w = wi->wi_w; 2063 2064 uintptr_t cur; 2065 size_t size = wi->wi_cache->cache_bufsize; 2066 2067 while (mdb_whatis_match(w, addr, size, &cur)) 2068 whatis_print_umem(wi, cur, addr, 0); 2069 2070 return (WHATIS_WALKRET(w)); 2071 } 2072 2073 /*ARGSUSED*/ 2074 static int 2075 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi) 2076 { 2077 mdb_whatis_t *w = wi->wi_w; 2078 2079 uintptr_t cur; 2080 uintptr_t addr = (uintptr_t)bcp->bc_addr; 2081 size_t size = wi->wi_cache->cache_bufsize; 2082 2083 while (mdb_whatis_match(w, addr, size, &cur)) 2084 whatis_print_umem(wi, cur, addr, baddr); 2085 2086 return (WHATIS_WALKRET(w)); 2087 } 2088 2089 2090 static int 2091 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 2092 { 2093 mdb_whatis_t *w = wi->wi_w; 2094 2095 size_t size = vs->vs_end - vs->vs_start; 2096 uintptr_t cur; 2097 2098 /* We're not interested in anything but alloc and free segments */ 2099 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 2100 return (WALK_NEXT); 2101 2102 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 2103 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 2104 2105 /* 2106 * If we're not printing it seperately, provide the vmem_seg 2107 * pointer if it has a stack trace. 2108 */ 2109 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 2110 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 || 2111 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2112 mdb_printf("vmem_seg %p ", addr); 2113 } 2114 2115 mdb_printf("%s from %s vmem arena", 2116 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 2117 wi->wi_vmem->vm_name); 2118 2119 if (!(mdb_whatis_flags(w) & WHATIS_QUIET)) 2120 whatis_call_printer(vmem_seg, addr); 2121 else 2122 mdb_printf("\n"); 2123 } 2124 2125 return (WHATIS_WALKRET(w)); 2126 } 2127 2128 static int 2129 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 2130 { 2131 mdb_whatis_t *w = wi->wi_w; 2132 const char *nm = vmem->vm_name; 2133 wi->wi_vmem = vmem; 2134 2135 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2136 mdb_printf("Searching vmem arena %s...\n", nm); 2137 2138 if (mdb_pwalk("vmem_seg", 2139 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 2140 mdb_warn("can't walk vmem seg for %p", addr); 2141 return (WALK_NEXT); 2142 } 2143 2144 return (WHATIS_WALKRET(w)); 2145 } 2146 2147 /*ARGSUSED*/ 2148 static int 2149 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi) 2150 { 2151 mdb_whatis_t *w = wi->wi_w; 2152 2153 /* It must overlap with the slab data, or it's not interesting */ 2154 if (mdb_whatis_overlaps(w, 2155 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 2156 wi->wi_slab_found++; 2157 return (WALK_DONE); 2158 } 2159 return (WALK_NEXT); 2160 } 2161 2162 static int 2163 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2164 { 2165 mdb_whatis_t *w = wi->wi_w; 2166 char *walk, *freewalk; 2167 mdb_walk_cb_t func; 2168 int do_bufctl; 2169 2170 /* Override the '-b' flag as necessary */ 2171 if (!(c->cache_flags & UMF_HASH)) 2172 do_bufctl = FALSE; /* no bufctls to walk */ 2173 else if (c->cache_flags & UMF_AUDIT) 2174 do_bufctl = TRUE; /* we always want debugging info */ 2175 else 2176 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2177 2178 if (do_bufctl) { 2179 walk = "bufctl"; 2180 freewalk = "freectl"; 2181 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2182 } else { 2183 walk = "umem"; 2184 freewalk = "freemem"; 2185 func = (mdb_walk_cb_t)whatis_walk_umem; 2186 } 2187 2188 wi->wi_cache = c; 2189 2190 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2191 mdb_printf("Searching %s...\n", c->cache_name); 2192 2193 /* 2194 * If more then two buffers live on each slab, figure out if we're 2195 * interested in anything in any slab before doing the more expensive 2196 * umem/freemem (bufctl/freectl) walkers. 2197 */ 2198 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2199 if (!(c->cache_flags & UMF_HASH)) 2200 wi->wi_slab_size -= sizeof (umem_slab_t); 2201 2202 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2203 wi->wi_slab_found = 0; 2204 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2205 addr) == -1) { 2206 mdb_warn("can't find umem_slab walker"); 2207 return (WALK_DONE); 2208 } 2209 if (wi->wi_slab_found == 0) 2210 return (WALK_NEXT); 2211 } 2212 2213 wi->wi_freemem = FALSE; 2214 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2215 mdb_warn("can't find %s walker", walk); 2216 return (WALK_DONE); 2217 } 2218 2219 if (mdb_whatis_done(w)) 2220 return (WALK_DONE); 2221 2222 /* 2223 * We have searched for allocated memory; now search for freed memory. 2224 */ 2225 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2226 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2227 2228 wi->wi_freemem = TRUE; 2229 2230 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2231 mdb_warn("can't find %s walker", freewalk); 2232 return (WALK_DONE); 2233 } 2234 2235 return (WHATIS_WALKRET(w)); 2236 } 2237 2238 static int 2239 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2240 { 2241 if (c->cache_arena == wi->wi_msb_arena || 2242 (c->cache_cflags & UMC_NOTOUCH)) 2243 return (WALK_NEXT); 2244 2245 return (whatis_walk_cache(addr, c, wi)); 2246 } 2247 2248 static int 2249 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2250 { 2251 if (c->cache_arena != wi->wi_msb_arena) 2252 return (WALK_NEXT); 2253 2254 return (whatis_walk_cache(addr, c, wi)); 2255 } 2256 2257 static int 2258 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2259 { 2260 if (c->cache_arena == wi->wi_msb_arena || 2261 !(c->cache_cflags & UMC_NOTOUCH)) 2262 return (WALK_NEXT); 2263 2264 return (whatis_walk_cache(addr, c, wi)); 2265 } 2266 2267 /*ARGSUSED*/ 2268 static int 2269 whatis_run_umem(mdb_whatis_t *w, void *ignored) 2270 { 2271 whatis_info_t wi; 2272 2273 bzero(&wi, sizeof (wi)); 2274 wi.wi_w = w; 2275 2276 /* umem's metadata is allocated from the umem_internal_arena */ 2277 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1) 2278 mdb_warn("unable to readvar \"umem_internal_arena\""); 2279 2280 /* 2281 * We process umem caches in the following order: 2282 * 2283 * non-UMC_NOTOUCH, non-metadata (typically the most interesting) 2284 * metadata (can be huge with UMF_AUDIT) 2285 * UMC_NOTOUCH, non-metadata (see umem_walk_all()) 2286 */ 2287 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2288 &wi) == -1 || 2289 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2290 &wi) == -1 || 2291 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2292 &wi) == -1) { 2293 mdb_warn("couldn't find umem_cache walker"); 2294 return (1); 2295 } 2296 return (0); 2297 } 2298 2299 /*ARGSUSED*/ 2300 static int 2301 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2302 { 2303 whatis_info_t wi; 2304 2305 bzero(&wi, sizeof (wi)); 2306 wi.wi_w = w; 2307 2308 if (mdb_walk("vmem_postfix", 2309 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2310 mdb_warn("couldn't find vmem_postfix walker"); 2311 return (1); 2312 } 2313 return (0); 2314 } 2315 2316 int 2317 umem_init(void) 2318 { 2319 mdb_walker_t w = { 2320 "umem_cache", "walk list of umem caches", umem_cache_walk_init, 2321 umem_cache_walk_step, umem_cache_walk_fini 2322 }; 2323 2324 if (mdb_add_walker(&w) == -1) { 2325 mdb_warn("failed to add umem_cache walker"); 2326 return (-1); 2327 } 2328 2329 if (umem_update_variables() == -1) 2330 return (-1); 2331 2332 /* install a callback so that our variables are always up-to-date */ 2333 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL); 2334 umem_statechange_cb(NULL); 2335 2336 /* 2337 * Register our ::whatis callbacks. 2338 */ 2339 mdb_whatis_register("umem", whatis_run_umem, NULL, 2340 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); 2341 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 2342 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); 2343 2344 return (0); 2345 } 2346 2347 typedef struct umem_log_cpu { 2348 uintptr_t umc_low; 2349 uintptr_t umc_high; 2350 } umem_log_cpu_t; 2351 2352 int 2353 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc) 2354 { 2355 int i; 2356 2357 for (i = 0; i < umem_max_ncpus; i++) { 2358 if (addr >= umc[i].umc_low && addr < umc[i].umc_high) 2359 break; 2360 } 2361 2362 if (i == umem_max_ncpus) 2363 mdb_printf(" "); 2364 else 2365 mdb_printf("%3d", i); 2366 2367 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2368 b->bc_timestamp, b->bc_thread); 2369 2370 return (WALK_NEXT); 2371 } 2372 2373 /*ARGSUSED*/ 2374 int 2375 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2376 { 2377 umem_log_header_t lh; 2378 umem_cpu_log_header_t clh; 2379 uintptr_t lhp, clhp; 2380 umem_log_cpu_t *umc; 2381 int i; 2382 2383 if (umem_readvar(&lhp, "umem_transaction_log") == -1) { 2384 mdb_warn("failed to read 'umem_transaction_log'"); 2385 return (DCMD_ERR); 2386 } 2387 2388 if (lhp == 0) { 2389 mdb_warn("no umem transaction log\n"); 2390 return (DCMD_ERR); 2391 } 2392 2393 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) { 2394 mdb_warn("failed to read log header at %p", lhp); 2395 return (DCMD_ERR); 2396 } 2397 2398 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2399 2400 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus, 2401 UM_SLEEP | UM_GC); 2402 2403 for (i = 0; i < umem_max_ncpus; i++) { 2404 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2405 mdb_warn("cannot read cpu %d's log header at %p", 2406 i, clhp); 2407 return (DCMD_ERR); 2408 } 2409 2410 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize + 2411 (uintptr_t)lh.lh_base; 2412 umc[i].umc_high = (uintptr_t)clh.clh_current; 2413 2414 clhp += sizeof (umem_cpu_log_header_t); 2415 } 2416 2417 if (DCMD_HDRSPEC(flags)) { 2418 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", 2419 "BUFADDR", "TIMESTAMP", "THREAD"); 2420 } 2421 2422 /* 2423 * If we have been passed an address, we'll just print out that 2424 * log entry. 2425 */ 2426 if (flags & DCMD_ADDRSPEC) { 2427 umem_bufctl_audit_t *bp; 2428 UMEM_LOCAL_BUFCTL_AUDIT(&bp); 2429 2430 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2431 mdb_warn("failed to read bufctl at %p", addr); 2432 return (DCMD_ERR); 2433 } 2434 2435 (void) umem_log_walk(addr, bp, umc); 2436 2437 return (DCMD_OK); 2438 } 2439 2440 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) { 2441 mdb_warn("can't find umem log walker"); 2442 return (DCMD_ERR); 2443 } 2444 2445 return (DCMD_OK); 2446 } 2447 2448 typedef struct bufctl_history_cb { 2449 int bhc_flags; 2450 int bhc_argc; 2451 const mdb_arg_t *bhc_argv; 2452 int bhc_ret; 2453 } bufctl_history_cb_t; 2454 2455 /*ARGSUSED*/ 2456 static int 2457 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2458 { 2459 bufctl_history_cb_t *bhc = arg; 2460 2461 bhc->bhc_ret = 2462 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2463 2464 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2465 2466 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2467 } 2468 2469 void 2470 bufctl_help(void) 2471 { 2472 mdb_printf("%s\n", 2473 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n"); 2474 mdb_dec_indent(2); 2475 mdb_printf("%<b>OPTIONS%</b>\n"); 2476 mdb_inc_indent(2); 2477 mdb_printf("%s", 2478 " -v Display the full content of the bufctl, including its stack trace\n" 2479 " -h retrieve the bufctl's transaction history, if available\n" 2480 " -a addr\n" 2481 " filter out bufctls not involving the buffer at addr\n" 2482 " -c caller\n" 2483 " filter out bufctls without the function/PC in their stack trace\n" 2484 " -e earliest\n" 2485 " filter out bufctls timestamped before earliest\n" 2486 " -l latest\n" 2487 " filter out bufctls timestamped after latest\n" 2488 " -t thread\n" 2489 " filter out bufctls not involving thread\n"); 2490 } 2491 2492 int 2493 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2494 { 2495 uint_t verbose = FALSE; 2496 uint_t history = FALSE; 2497 uint_t in_history = FALSE; 2498 uintptr_t caller = 0, thread = 0; 2499 uintptr_t laddr, haddr, baddr = 0; 2500 hrtime_t earliest = 0, latest = 0; 2501 int i, depth; 2502 char c[MDB_SYM_NAMLEN]; 2503 GElf_Sym sym; 2504 umem_bufctl_audit_t *bcp; 2505 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 2506 2507 if (mdb_getopts(argc, argv, 2508 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2509 'h', MDB_OPT_SETBITS, TRUE, &history, 2510 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2511 'c', MDB_OPT_UINTPTR, &caller, 2512 't', MDB_OPT_UINTPTR, &thread, 2513 'e', MDB_OPT_UINT64, &earliest, 2514 'l', MDB_OPT_UINT64, &latest, 2515 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2516 return (DCMD_USAGE); 2517 2518 if (!(flags & DCMD_ADDRSPEC)) 2519 return (DCMD_USAGE); 2520 2521 if (in_history && !history) 2522 return (DCMD_USAGE); 2523 2524 if (history && !in_history) { 2525 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2526 UM_SLEEP | UM_GC); 2527 bufctl_history_cb_t bhc; 2528 2529 nargv[0].a_type = MDB_TYPE_STRING; 2530 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2531 2532 for (i = 0; i < argc; i++) 2533 nargv[i + 1] = argv[i]; 2534 2535 /* 2536 * When in history mode, we treat each element as if it 2537 * were in a seperate loop, so that the headers group 2538 * bufctls with similar histories. 2539 */ 2540 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2541 bhc.bhc_argc = argc + 1; 2542 bhc.bhc_argv = nargv; 2543 bhc.bhc_ret = DCMD_OK; 2544 2545 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2546 addr) == -1) { 2547 mdb_warn("unable to walk bufctl_history"); 2548 return (DCMD_ERR); 2549 } 2550 2551 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2552 mdb_printf("\n"); 2553 2554 return (bhc.bhc_ret); 2555 } 2556 2557 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2558 if (verbose) { 2559 mdb_printf("%16s %16s %16s %16s\n" 2560 "%<u>%16s %16s %16s %16s%</u>\n", 2561 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2562 "", "CACHE", "LASTLOG", "CONTENTS"); 2563 } else { 2564 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n", 2565 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER"); 2566 } 2567 } 2568 2569 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2570 mdb_warn("couldn't read bufctl at %p", addr); 2571 return (DCMD_ERR); 2572 } 2573 2574 /* 2575 * Guard against bogus bc_depth in case the bufctl is corrupt or 2576 * the address does not really refer to a bufctl. 2577 */ 2578 depth = MIN(bcp->bc_depth, umem_stack_depth); 2579 2580 if (caller != 0) { 2581 laddr = caller; 2582 haddr = caller + sizeof (caller); 2583 2584 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2585 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2586 /* 2587 * We were provided an exact symbol value; any 2588 * address in the function is valid. 2589 */ 2590 laddr = (uintptr_t)sym.st_value; 2591 haddr = (uintptr_t)sym.st_value + sym.st_size; 2592 } 2593 2594 for (i = 0; i < depth; i++) 2595 if (bcp->bc_stack[i] >= laddr && 2596 bcp->bc_stack[i] < haddr) 2597 break; 2598 2599 if (i == depth) 2600 return (DCMD_OK); 2601 } 2602 2603 if (thread != 0 && (uintptr_t)bcp->bc_thread != thread) 2604 return (DCMD_OK); 2605 2606 if (earliest != 0 && bcp->bc_timestamp < earliest) 2607 return (DCMD_OK); 2608 2609 if (latest != 0 && bcp->bc_timestamp > latest) 2610 return (DCMD_OK); 2611 2612 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr) 2613 return (DCMD_OK); 2614 2615 if (flags & DCMD_PIPE_OUT) { 2616 mdb_printf("%#r\n", addr); 2617 return (DCMD_OK); 2618 } 2619 2620 if (verbose) { 2621 mdb_printf( 2622 "%<b>%16p%</b> %16p %16llx %16d\n" 2623 "%16s %16p %16p %16p\n", 2624 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread, 2625 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents); 2626 2627 mdb_inc_indent(17); 2628 for (i = 0; i < depth; i++) 2629 mdb_printf("%a\n", bcp->bc_stack[i]); 2630 mdb_dec_indent(17); 2631 mdb_printf("\n"); 2632 } else { 2633 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr, 2634 bcp->bc_timestamp, bcp->bc_thread); 2635 2636 for (i = 0; i < depth; i++) { 2637 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2638 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2639 continue; 2640 if (is_umem_sym(c, "umem_")) 2641 continue; 2642 mdb_printf(" %a\n", bcp->bc_stack[i]); 2643 break; 2644 } 2645 2646 if (i >= depth) 2647 mdb_printf("\n"); 2648 } 2649 2650 return (DCMD_OK); 2651 } 2652 2653 /*ARGSUSED*/ 2654 int 2655 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2656 { 2657 mdb_arg_t a; 2658 2659 if (!(flags & DCMD_ADDRSPEC)) 2660 return (DCMD_USAGE); 2661 2662 if (argc != 0) 2663 return (DCMD_USAGE); 2664 2665 a.a_type = MDB_TYPE_STRING; 2666 a.a_un.a_str = "-v"; 2667 2668 return (bufctl(addr, flags, 1, &a)); 2669 } 2670 2671 typedef struct umem_verify { 2672 uint64_t *umv_buf; /* buffer to read cache contents into */ 2673 size_t umv_size; /* number of bytes in umv_buf */ 2674 int umv_corruption; /* > 0 if corruption found. */ 2675 int umv_besilent; /* report actual corruption sites */ 2676 struct umem_cache umv_cache; /* the cache we're operating on */ 2677 } umem_verify_t; 2678 2679 /* 2680 * verify_pattern() 2681 * verify that buf is filled with the pattern pat. 2682 */ 2683 static int64_t 2684 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2685 { 2686 /*LINTED*/ 2687 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 2688 uint64_t *buf; 2689 2690 for (buf = buf_arg; buf < bufend; buf++) 2691 if (*buf != pat) 2692 return ((uintptr_t)buf - (uintptr_t)buf_arg); 2693 return (-1); 2694 } 2695 2696 /* 2697 * verify_buftag() 2698 * verify that btp->bt_bxstat == (bcp ^ pat) 2699 */ 2700 static int 2701 verify_buftag(umem_buftag_t *btp, uintptr_t pat) 2702 { 2703 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 2704 } 2705 2706 /* 2707 * verify_free() 2708 * verify the integrity of a free block of memory by checking 2709 * that it is filled with 0xdeadbeef and that its buftag is sane. 2710 */ 2711 /*ARGSUSED1*/ 2712 static int 2713 verify_free(uintptr_t addr, const void *data, void *private) 2714 { 2715 umem_verify_t *umv = (umem_verify_t *)private; 2716 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2717 int64_t corrupt; /* corruption offset */ 2718 umem_buftag_t *buftagp; /* ptr to buftag */ 2719 umem_cache_t *cp = &umv->umv_cache; 2720 int besilent = umv->umv_besilent; 2721 2722 /*LINTED*/ 2723 buftagp = UMEM_BUFTAG(cp, buf); 2724 2725 /* 2726 * Read the buffer to check. 2727 */ 2728 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2729 if (!besilent) 2730 mdb_warn("couldn't read %p", addr); 2731 return (WALK_NEXT); 2732 } 2733 2734 if ((corrupt = verify_pattern(buf, cp->cache_verify, 2735 UMEM_FREE_PATTERN)) >= 0) { 2736 if (!besilent) 2737 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 2738 addr, (uintptr_t)addr + corrupt); 2739 goto corrupt; 2740 } 2741 2742 if ((cp->cache_flags & UMF_HASH) && 2743 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) { 2744 if (!besilent) 2745 mdb_printf("buffer %p (free) seems to " 2746 "have a corrupt redzone pattern\n", addr); 2747 goto corrupt; 2748 } 2749 2750 /* 2751 * confirm bufctl pointer integrity. 2752 */ 2753 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) { 2754 if (!besilent) 2755 mdb_printf("buffer %p (free) has a corrupt " 2756 "buftag\n", addr); 2757 goto corrupt; 2758 } 2759 2760 return (WALK_NEXT); 2761 corrupt: 2762 umv->umv_corruption++; 2763 return (WALK_NEXT); 2764 } 2765 2766 /* 2767 * verify_alloc() 2768 * Verify that the buftag of an allocated buffer makes sense with respect 2769 * to the buffer. 2770 */ 2771 /*ARGSUSED1*/ 2772 static int 2773 verify_alloc(uintptr_t addr, const void *data, void *private) 2774 { 2775 umem_verify_t *umv = (umem_verify_t *)private; 2776 umem_cache_t *cp = &umv->umv_cache; 2777 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2778 /*LINTED*/ 2779 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf); 2780 uint32_t *ip = (uint32_t *)buftagp; 2781 uint8_t *bp = (uint8_t *)buf; 2782 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 2783 int besilent = umv->umv_besilent; 2784 2785 /* 2786 * Read the buffer to check. 2787 */ 2788 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2789 if (!besilent) 2790 mdb_warn("couldn't read %p", addr); 2791 return (WALK_NEXT); 2792 } 2793 2794 /* 2795 * There are two cases to handle: 2796 * 1. If the buf was alloc'd using umem_cache_alloc, it will have 2797 * 0xfeedfacefeedface at the end of it 2798 * 2. If the buf was alloc'd using umem_alloc, it will have 2799 * 0xbb just past the end of the region in use. At the buftag, 2800 * it will have 0xfeedface (or, if the whole buffer is in use, 2801 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 2802 * endianness), followed by 32 bits containing the offset of the 2803 * 0xbb byte in the buffer. 2804 * 2805 * Finally, the two 32-bit words that comprise the second half of the 2806 * buftag should xor to UMEM_BUFTAG_ALLOC 2807 */ 2808 2809 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN) 2810 looks_ok = 1; 2811 else if (!UMEM_SIZE_VALID(ip[1])) 2812 size_ok = 0; 2813 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE) 2814 looks_ok = 1; 2815 else 2816 size_ok = 0; 2817 2818 if (!size_ok) { 2819 if (!besilent) 2820 mdb_printf("buffer %p (allocated) has a corrupt " 2821 "redzone size encoding\n", addr); 2822 goto corrupt; 2823 } 2824 2825 if (!looks_ok) { 2826 if (!besilent) 2827 mdb_printf("buffer %p (allocated) has a corrupt " 2828 "redzone signature\n", addr); 2829 goto corrupt; 2830 } 2831 2832 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) { 2833 if (!besilent) 2834 mdb_printf("buffer %p (allocated) has a " 2835 "corrupt buftag\n", addr); 2836 goto corrupt; 2837 } 2838 2839 return (WALK_NEXT); 2840 corrupt: 2841 umv->umv_corruption++; 2842 return (WALK_NEXT); 2843 } 2844 2845 /*ARGSUSED2*/ 2846 int 2847 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2848 { 2849 if (flags & DCMD_ADDRSPEC) { 2850 int check_alloc = 0, check_free = 0; 2851 umem_verify_t umv; 2852 2853 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache), 2854 addr) == -1) { 2855 mdb_warn("couldn't read umem_cache %p", addr); 2856 return (DCMD_ERR); 2857 } 2858 2859 umv.umv_size = umv.umv_cache.cache_buftag + 2860 sizeof (umem_buftag_t); 2861 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC); 2862 umv.umv_corruption = 0; 2863 2864 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) { 2865 check_alloc = 1; 2866 if (umv.umv_cache.cache_flags & UMF_DEADBEEF) 2867 check_free = 1; 2868 } else { 2869 if (!(flags & DCMD_LOOP)) { 2870 mdb_warn("cache %p (%s) does not have " 2871 "redzone checking enabled\n", addr, 2872 umv.umv_cache.cache_name); 2873 } 2874 return (DCMD_ERR); 2875 } 2876 2877 if (flags & DCMD_LOOP) { 2878 /* 2879 * table mode, don't print out every corrupt buffer 2880 */ 2881 umv.umv_besilent = 1; 2882 } else { 2883 mdb_printf("Summary for cache '%s'\n", 2884 umv.umv_cache.cache_name); 2885 mdb_inc_indent(2); 2886 umv.umv_besilent = 0; 2887 } 2888 2889 if (check_alloc) 2890 (void) mdb_pwalk("umem", verify_alloc, &umv, addr); 2891 if (check_free) 2892 (void) mdb_pwalk("freemem", verify_free, &umv, addr); 2893 2894 if (flags & DCMD_LOOP) { 2895 if (umv.umv_corruption == 0) { 2896 mdb_printf("%-*s %?p clean\n", 2897 UMEM_CACHE_NAMELEN, 2898 umv.umv_cache.cache_name, addr); 2899 } else { 2900 char *s = ""; /* optional s in "buffer[s]" */ 2901 if (umv.umv_corruption > 1) 2902 s = "s"; 2903 2904 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 2905 UMEM_CACHE_NAMELEN, 2906 umv.umv_cache.cache_name, addr, 2907 umv.umv_corruption, s); 2908 } 2909 } else { 2910 /* 2911 * This is the more verbose mode, when the user has 2912 * type addr::umem_verify. If the cache was clean, 2913 * nothing will have yet been printed. So say something. 2914 */ 2915 if (umv.umv_corruption == 0) 2916 mdb_printf("clean\n"); 2917 2918 mdb_dec_indent(2); 2919 } 2920 } else { 2921 /* 2922 * If the user didn't specify a cache to verify, we'll walk all 2923 * umem_cache's, specifying ourself as a callback for each... 2924 * this is the equivalent of '::walk umem_cache .::umem_verify' 2925 */ 2926 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN, 2927 "Cache Name", "Addr", "Cache Integrity"); 2928 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL)); 2929 } 2930 2931 return (DCMD_OK); 2932 } 2933 2934 typedef struct vmem_node { 2935 struct vmem_node *vn_next; 2936 struct vmem_node *vn_parent; 2937 struct vmem_node *vn_sibling; 2938 struct vmem_node *vn_children; 2939 uintptr_t vn_addr; 2940 int vn_marked; 2941 vmem_t vn_vmem; 2942 } vmem_node_t; 2943 2944 typedef struct vmem_walk { 2945 vmem_node_t *vw_root; 2946 vmem_node_t *vw_current; 2947 } vmem_walk_t; 2948 2949 int 2950 vmem_walk_init(mdb_walk_state_t *wsp) 2951 { 2952 uintptr_t vaddr, paddr; 2953 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 2954 vmem_walk_t *vw; 2955 2956 if (umem_readvar(&vaddr, "vmem_list") == -1) { 2957 mdb_warn("couldn't read 'vmem_list'"); 2958 return (WALK_ERR); 2959 } 2960 2961 while (vaddr != 0) { 2962 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 2963 vp->vn_addr = vaddr; 2964 vp->vn_next = head; 2965 head = vp; 2966 2967 if (vaddr == wsp->walk_addr) 2968 current = vp; 2969 2970 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 2971 mdb_warn("couldn't read vmem_t at %p", vaddr); 2972 goto err; 2973 } 2974 2975 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 2976 } 2977 2978 for (vp = head; vp != NULL; vp = vp->vn_next) { 2979 2980 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) { 2981 vp->vn_sibling = root; 2982 root = vp; 2983 continue; 2984 } 2985 2986 for (parent = head; parent != NULL; parent = parent->vn_next) { 2987 if (parent->vn_addr != paddr) 2988 continue; 2989 vp->vn_sibling = parent->vn_children; 2990 parent->vn_children = vp; 2991 vp->vn_parent = parent; 2992 break; 2993 } 2994 2995 if (parent == NULL) { 2996 mdb_warn("couldn't find %p's parent (%p)\n", 2997 vp->vn_addr, paddr); 2998 goto err; 2999 } 3000 } 3001 3002 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3003 vw->vw_root = root; 3004 3005 if (current != NULL) 3006 vw->vw_current = current; 3007 else 3008 vw->vw_current = root; 3009 3010 wsp->walk_data = vw; 3011 return (WALK_NEXT); 3012 err: 3013 for (vp = head; head != NULL; vp = head) { 3014 head = vp->vn_next; 3015 mdb_free(vp, sizeof (vmem_node_t)); 3016 } 3017 3018 return (WALK_ERR); 3019 } 3020 3021 int 3022 vmem_walk_step(mdb_walk_state_t *wsp) 3023 { 3024 vmem_walk_t *vw = wsp->walk_data; 3025 vmem_node_t *vp; 3026 int rval; 3027 3028 if ((vp = vw->vw_current) == NULL) 3029 return (WALK_DONE); 3030 3031 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3032 3033 if (vp->vn_children != NULL) { 3034 vw->vw_current = vp->vn_children; 3035 return (rval); 3036 } 3037 3038 do { 3039 vw->vw_current = vp->vn_sibling; 3040 vp = vp->vn_parent; 3041 } while (vw->vw_current == NULL && vp != NULL); 3042 3043 return (rval); 3044 } 3045 3046 /* 3047 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3048 * children are visited before their parent. We perform the postfix walk 3049 * iteratively (rather than recursively) to allow mdb to regain control 3050 * after each callback. 3051 */ 3052 int 3053 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3054 { 3055 vmem_walk_t *vw = wsp->walk_data; 3056 vmem_node_t *vp = vw->vw_current; 3057 int rval; 3058 3059 /* 3060 * If this node is marked, then we know that we have already visited 3061 * all of its children. If the node has any siblings, they need to 3062 * be visited next; otherwise, we need to visit the parent. Note 3063 * that vp->vn_marked will only be zero on the first invocation of 3064 * the step function. 3065 */ 3066 if (vp->vn_marked) { 3067 if (vp->vn_sibling != NULL) 3068 vp = vp->vn_sibling; 3069 else if (vp->vn_parent != NULL) 3070 vp = vp->vn_parent; 3071 else { 3072 /* 3073 * We have neither a parent, nor a sibling, and we 3074 * have already been visited; we're done. 3075 */ 3076 return (WALK_DONE); 3077 } 3078 } 3079 3080 /* 3081 * Before we visit this node, visit its children. 3082 */ 3083 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3084 vp = vp->vn_children; 3085 3086 vp->vn_marked = 1; 3087 vw->vw_current = vp; 3088 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3089 3090 return (rval); 3091 } 3092 3093 void 3094 vmem_walk_fini(mdb_walk_state_t *wsp) 3095 { 3096 vmem_walk_t *vw = wsp->walk_data; 3097 vmem_node_t *root = vw->vw_root; 3098 int done; 3099 3100 if (root == NULL) 3101 return; 3102 3103 if ((vw->vw_root = root->vn_children) != NULL) 3104 vmem_walk_fini(wsp); 3105 3106 vw->vw_root = root->vn_sibling; 3107 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3108 mdb_free(root, sizeof (vmem_node_t)); 3109 3110 if (done) { 3111 mdb_free(vw, sizeof (vmem_walk_t)); 3112 } else { 3113 vmem_walk_fini(wsp); 3114 } 3115 } 3116 3117 typedef struct vmem_seg_walk { 3118 uint8_t vsw_type; 3119 uintptr_t vsw_start; 3120 uintptr_t vsw_current; 3121 } vmem_seg_walk_t; 3122 3123 /*ARGSUSED*/ 3124 int 3125 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3126 { 3127 vmem_seg_walk_t *vsw; 3128 3129 if (wsp->walk_addr == 0) { 3130 mdb_warn("vmem_%s does not support global walks\n", name); 3131 return (WALK_ERR); 3132 } 3133 3134 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3135 3136 vsw->vsw_type = type; 3137 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0); 3138 vsw->vsw_current = vsw->vsw_start; 3139 3140 return (WALK_NEXT); 3141 } 3142 3143 /* 3144 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3145 */ 3146 #define VMEM_NONE 0 3147 3148 int 3149 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3150 { 3151 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3152 } 3153 3154 int 3155 vmem_free_walk_init(mdb_walk_state_t *wsp) 3156 { 3157 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3158 } 3159 3160 int 3161 vmem_span_walk_init(mdb_walk_state_t *wsp) 3162 { 3163 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3164 } 3165 3166 int 3167 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3168 { 3169 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3170 } 3171 3172 int 3173 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3174 { 3175 vmem_seg_t seg; 3176 vmem_seg_walk_t *vsw = wsp->walk_data; 3177 uintptr_t addr = vsw->vsw_current; 3178 static size_t seg_size = 0; 3179 int rval; 3180 3181 if (!seg_size) { 3182 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) { 3183 mdb_warn("failed to read 'vmem_seg_size'"); 3184 seg_size = sizeof (vmem_seg_t); 3185 } 3186 } 3187 3188 if (seg_size < sizeof (seg)) 3189 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3190 3191 if (mdb_vread(&seg, seg_size, addr) == -1) { 3192 mdb_warn("couldn't read vmem_seg at %p", addr); 3193 return (WALK_ERR); 3194 } 3195 3196 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3197 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3198 rval = WALK_NEXT; 3199 } else { 3200 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3201 } 3202 3203 if (vsw->vsw_current == vsw->vsw_start) 3204 return (WALK_DONE); 3205 3206 return (rval); 3207 } 3208 3209 void 3210 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3211 { 3212 vmem_seg_walk_t *vsw = wsp->walk_data; 3213 3214 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3215 } 3216 3217 #define VMEM_NAMEWIDTH 22 3218 3219 int 3220 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3221 { 3222 vmem_t v, parent; 3223 uintptr_t paddr; 3224 int ident = 0; 3225 char c[VMEM_NAMEWIDTH]; 3226 3227 if (!(flags & DCMD_ADDRSPEC)) { 3228 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3229 mdb_warn("can't walk vmem"); 3230 return (DCMD_ERR); 3231 } 3232 return (DCMD_OK); 3233 } 3234 3235 if (DCMD_HDRSPEC(flags)) 3236 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3237 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3238 "TOTAL", "SUCCEED", "FAIL"); 3239 3240 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3241 mdb_warn("couldn't read vmem at %p", addr); 3242 return (DCMD_ERR); 3243 } 3244 3245 for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) { 3246 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3247 mdb_warn("couldn't trace %p's ancestry", addr); 3248 ident = 0; 3249 break; 3250 } 3251 paddr = (uintptr_t)parent.vm_source; 3252 } 3253 3254 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3255 3256 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3257 addr, VMEM_NAMEWIDTH, c, 3258 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total, 3259 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail); 3260 3261 return (DCMD_OK); 3262 } 3263 3264 void 3265 vmem_seg_help(void) 3266 { 3267 mdb_printf("%s\n", 3268 "Display the contents of vmem_seg_ts, with optional filtering.\n" 3269 "\n" 3270 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3271 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3272 "information.\n"); 3273 mdb_dec_indent(2); 3274 mdb_printf("%<b>OPTIONS%</b>\n"); 3275 mdb_inc_indent(2); 3276 mdb_printf("%s", 3277 " -v Display the full content of the vmem_seg, including its stack trace\n" 3278 " -s report the size of the segment, instead of the end address\n" 3279 " -c caller\n" 3280 " filter out segments without the function/PC in their stack trace\n" 3281 " -e earliest\n" 3282 " filter out segments timestamped before earliest\n" 3283 " -l latest\n" 3284 " filter out segments timestamped after latest\n" 3285 " -m minsize\n" 3286 " filer out segments smaller than minsize\n" 3287 " -M maxsize\n" 3288 " filer out segments larger than maxsize\n" 3289 " -t thread\n" 3290 " filter out segments not involving thread\n" 3291 " -T type\n" 3292 " filter out segments not of type 'type'\n" 3293 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3294 } 3295 3296 3297 /*ARGSUSED*/ 3298 int 3299 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3300 { 3301 vmem_seg_t vs; 3302 uintptr_t *stk = vs.vs_stack; 3303 uintptr_t sz; 3304 uint8_t t; 3305 const char *type = NULL; 3306 GElf_Sym sym; 3307 char c[MDB_SYM_NAMLEN]; 3308 int no_debug; 3309 int i; 3310 int depth; 3311 uintptr_t laddr, haddr; 3312 3313 uintptr_t caller = 0, thread = 0; 3314 uintptr_t minsize = 0, maxsize = 0; 3315 3316 hrtime_t earliest = 0, latest = 0; 3317 3318 uint_t size = 0; 3319 uint_t verbose = 0; 3320 3321 if (!(flags & DCMD_ADDRSPEC)) 3322 return (DCMD_USAGE); 3323 3324 if (mdb_getopts(argc, argv, 3325 'c', MDB_OPT_UINTPTR, &caller, 3326 'e', MDB_OPT_UINT64, &earliest, 3327 'l', MDB_OPT_UINT64, &latest, 3328 's', MDB_OPT_SETBITS, TRUE, &size, 3329 'm', MDB_OPT_UINTPTR, &minsize, 3330 'M', MDB_OPT_UINTPTR, &maxsize, 3331 't', MDB_OPT_UINTPTR, &thread, 3332 'T', MDB_OPT_STR, &type, 3333 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3334 NULL) != argc) 3335 return (DCMD_USAGE); 3336 3337 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3338 if (verbose) { 3339 mdb_printf("%16s %4s %16s %16s %16s\n" 3340 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3341 "ADDR", "TYPE", "START", "END", "SIZE", 3342 "", "", "THREAD", "TIMESTAMP", ""); 3343 } else { 3344 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3345 "START", size? "SIZE" : "END", "WHO"); 3346 } 3347 } 3348 3349 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3350 mdb_warn("couldn't read vmem_seg at %p", addr); 3351 return (DCMD_ERR); 3352 } 3353 3354 if (type != NULL) { 3355 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3356 t = VMEM_ALLOC; 3357 else if (strcmp(type, "FREE") == 0) 3358 t = VMEM_FREE; 3359 else if (strcmp(type, "SPAN") == 0) 3360 t = VMEM_SPAN; 3361 else if (strcmp(type, "ROTR") == 0 || 3362 strcmp(type, "ROTOR") == 0) 3363 t = VMEM_ROTOR; 3364 else if (strcmp(type, "WLKR") == 0 || 3365 strcmp(type, "WALKER") == 0) 3366 t = VMEM_WALKER; 3367 else { 3368 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3369 type); 3370 return (DCMD_ERR); 3371 } 3372 3373 if (vs.vs_type != t) 3374 return (DCMD_OK); 3375 } 3376 3377 sz = vs.vs_end - vs.vs_start; 3378 3379 if (minsize != 0 && sz < minsize) 3380 return (DCMD_OK); 3381 3382 if (maxsize != 0 && sz > maxsize) 3383 return (DCMD_OK); 3384 3385 t = vs.vs_type; 3386 depth = vs.vs_depth; 3387 3388 /* 3389 * debug info, when present, is only accurate for VMEM_ALLOC segments 3390 */ 3391 no_debug = (t != VMEM_ALLOC) || 3392 (depth == 0 || depth > VMEM_STACK_DEPTH); 3393 3394 if (no_debug) { 3395 if (caller != 0 || thread != 0 || earliest != 0 || latest != 0) 3396 return (DCMD_OK); /* not enough info */ 3397 } else { 3398 if (caller != 0) { 3399 laddr = caller; 3400 haddr = caller + sizeof (caller); 3401 3402 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3403 sizeof (c), &sym) != -1 && 3404 caller == (uintptr_t)sym.st_value) { 3405 /* 3406 * We were provided an exact symbol value; any 3407 * address in the function is valid. 3408 */ 3409 laddr = (uintptr_t)sym.st_value; 3410 haddr = (uintptr_t)sym.st_value + sym.st_size; 3411 } 3412 3413 for (i = 0; i < depth; i++) 3414 if (vs.vs_stack[i] >= laddr && 3415 vs.vs_stack[i] < haddr) 3416 break; 3417 3418 if (i == depth) 3419 return (DCMD_OK); 3420 } 3421 3422 if (thread != 0 && (uintptr_t)vs.vs_thread != thread) 3423 return (DCMD_OK); 3424 3425 if (earliest != 0 && vs.vs_timestamp < earliest) 3426 return (DCMD_OK); 3427 3428 if (latest != 0 && vs.vs_timestamp > latest) 3429 return (DCMD_OK); 3430 } 3431 3432 type = (t == VMEM_ALLOC ? "ALLC" : 3433 t == VMEM_FREE ? "FREE" : 3434 t == VMEM_SPAN ? "SPAN" : 3435 t == VMEM_ROTOR ? "ROTR" : 3436 t == VMEM_WALKER ? "WLKR" : 3437 "????"); 3438 3439 if (flags & DCMD_PIPE_OUT) { 3440 mdb_printf("%#r\n", addr); 3441 return (DCMD_OK); 3442 } 3443 3444 if (verbose) { 3445 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3446 addr, type, vs.vs_start, vs.vs_end, sz); 3447 3448 if (no_debug) 3449 return (DCMD_OK); 3450 3451 mdb_printf("%16s %4s %16d %16llx\n", 3452 "", "", vs.vs_thread, vs.vs_timestamp); 3453 3454 mdb_inc_indent(17); 3455 for (i = 0; i < depth; i++) { 3456 mdb_printf("%a\n", stk[i]); 3457 } 3458 mdb_dec_indent(17); 3459 mdb_printf("\n"); 3460 } else { 3461 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3462 vs.vs_start, size? sz : vs.vs_end); 3463 3464 if (no_debug) { 3465 mdb_printf("\n"); 3466 return (DCMD_OK); 3467 } 3468 3469 for (i = 0; i < depth; i++) { 3470 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3471 c, sizeof (c), &sym) == -1) 3472 continue; 3473 if (is_umem_sym(c, "vmem_")) 3474 continue; 3475 break; 3476 } 3477 mdb_printf(" %a\n", stk[i]); 3478 } 3479 return (DCMD_OK); 3480 } 3481 3482 /*ARGSUSED*/ 3483 static int 3484 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest) 3485 { 3486 char name[UMEM_CACHE_NAMELEN + 1]; 3487 hrtime_t delta; 3488 int i, depth; 3489 3490 if (bcp->bc_timestamp == 0) 3491 return (WALK_DONE); 3492 3493 if (*newest == 0) 3494 *newest = bcp->bc_timestamp; 3495 3496 delta = *newest - bcp->bc_timestamp; 3497 depth = MIN(bcp->bc_depth, umem_stack_depth); 3498 3499 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3500 &bcp->bc_cache->cache_name) <= 0) 3501 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3502 3503 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3504 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3505 3506 for (i = 0; i < depth; i++) 3507 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3508 3509 return (WALK_NEXT); 3510 } 3511 3512 int 3513 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3514 { 3515 const char *logname = "umem_transaction_log"; 3516 hrtime_t newest = 0; 3517 3518 if ((flags & DCMD_ADDRSPEC) || argc > 1) 3519 return (DCMD_USAGE); 3520 3521 if (argc > 0) { 3522 if (argv->a_type != MDB_TYPE_STRING) 3523 return (DCMD_USAGE); 3524 if (strcmp(argv->a_un.a_str, "fail") == 0) 3525 logname = "umem_failure_log"; 3526 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3527 logname = "umem_slab_log"; 3528 else 3529 return (DCMD_USAGE); 3530 } 3531 3532 if (umem_readvar(&addr, logname) == -1) { 3533 mdb_warn("failed to read %s log header pointer"); 3534 return (DCMD_ERR); 3535 } 3536 3537 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) { 3538 mdb_warn("failed to walk umem log"); 3539 return (DCMD_ERR); 3540 } 3541 3542 return (DCMD_OK); 3543 } 3544 3545 /* 3546 * As the final lure for die-hard crash(1M) users, we provide ::umausers here. 3547 * The first piece is a structure which we use to accumulate umem_cache_t 3548 * addresses of interest. The umc_add is used as a callback for the umem_cache 3549 * walker; we either add all caches, or ones named explicitly as arguments. 3550 */ 3551 3552 typedef struct umclist { 3553 const char *umc_name; /* Name to match (or NULL) */ 3554 uintptr_t *umc_caches; /* List of umem_cache_t addrs */ 3555 int umc_nelems; /* Num entries in umc_caches */ 3556 int umc_size; /* Size of umc_caches array */ 3557 } umclist_t; 3558 3559 static int 3560 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc) 3561 { 3562 void *p; 3563 int s; 3564 3565 if (umc->umc_name == NULL || 3566 strcmp(cp->cache_name, umc->umc_name) == 0) { 3567 /* 3568 * If we have a match, grow our array (if necessary), and then 3569 * add the virtual address of the matching cache to our list. 3570 */ 3571 if (umc->umc_nelems >= umc->umc_size) { 3572 s = umc->umc_size ? umc->umc_size * 2 : 256; 3573 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3574 3575 bcopy(umc->umc_caches, p, 3576 sizeof (uintptr_t) * umc->umc_size); 3577 3578 umc->umc_caches = p; 3579 umc->umc_size = s; 3580 } 3581 3582 umc->umc_caches[umc->umc_nelems++] = addr; 3583 return (umc->umc_name ? WALK_DONE : WALK_NEXT); 3584 } 3585 3586 return (WALK_NEXT); 3587 } 3588 3589 /* 3590 * The second piece of ::umausers is a hash table of allocations. Each 3591 * allocation owner is identified by its stack trace and data_size. We then 3592 * track the total bytes of all such allocations, and the number of allocations 3593 * to report at the end. Once we have a list of caches, we walk through the 3594 * allocated bufctls of each, and update our hash table accordingly. 3595 */ 3596 3597 typedef struct umowner { 3598 struct umowner *umo_head; /* First hash elt in bucket */ 3599 struct umowner *umo_next; /* Next hash elt in chain */ 3600 size_t umo_signature; /* Hash table signature */ 3601 uint_t umo_num; /* Number of allocations */ 3602 size_t umo_data_size; /* Size of each allocation */ 3603 size_t umo_total_size; /* Total bytes of allocation */ 3604 int umo_depth; /* Depth of stack trace */ 3605 uintptr_t *umo_stack; /* Stack trace */ 3606 } umowner_t; 3607 3608 typedef struct umusers { 3609 const umem_cache_t *umu_cache; /* Current umem cache */ 3610 umowner_t *umu_hash; /* Hash table of owners */ 3611 uintptr_t *umu_stacks; /* stacks for owners */ 3612 int umu_nelems; /* Number of entries in use */ 3613 int umu_size; /* Total number of entries */ 3614 } umusers_t; 3615 3616 static void 3617 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp, 3618 size_t size, size_t data_size) 3619 { 3620 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3621 size_t bucket, signature = data_size; 3622 umowner_t *umo, *umoend; 3623 3624 /* 3625 * If the hash table is full, double its size and rehash everything. 3626 */ 3627 if (umu->umu_nelems >= umu->umu_size) { 3628 int s = umu->umu_size ? umu->umu_size * 2 : 1024; 3629 size_t umowner_size = sizeof (umowner_t); 3630 size_t trace_size = umem_stack_depth * sizeof (uintptr_t); 3631 uintptr_t *new_stacks; 3632 3633 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC); 3634 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC); 3635 3636 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size); 3637 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size); 3638 umu->umu_hash = umo; 3639 umu->umu_stacks = new_stacks; 3640 umu->umu_size = s; 3641 3642 umoend = umu->umu_hash + umu->umu_size; 3643 for (umo = umu->umu_hash; umo < umoend; umo++) { 3644 umo->umo_head = NULL; 3645 umo->umo_stack = &umu->umu_stacks[ 3646 umem_stack_depth * (umo - umu->umu_hash)]; 3647 } 3648 3649 umoend = umu->umu_hash + umu->umu_nelems; 3650 for (umo = umu->umu_hash; umo < umoend; umo++) { 3651 bucket = umo->umo_signature & (umu->umu_size - 1); 3652 umo->umo_next = umu->umu_hash[bucket].umo_head; 3653 umu->umu_hash[bucket].umo_head = umo; 3654 } 3655 } 3656 3657 /* 3658 * Finish computing the hash signature from the stack trace, and then 3659 * see if the owner is in the hash table. If so, update our stats. 3660 */ 3661 for (i = 0; i < depth; i++) 3662 signature += bcp->bc_stack[i]; 3663 3664 bucket = signature & (umu->umu_size - 1); 3665 3666 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) { 3667 if (umo->umo_signature == signature) { 3668 size_t difference = 0; 3669 3670 difference |= umo->umo_data_size - data_size; 3671 difference |= umo->umo_depth - depth; 3672 3673 for (i = 0; i < depth; i++) { 3674 difference |= umo->umo_stack[i] - 3675 bcp->bc_stack[i]; 3676 } 3677 3678 if (difference == 0) { 3679 umo->umo_total_size += size; 3680 umo->umo_num++; 3681 return; 3682 } 3683 } 3684 } 3685 3686 /* 3687 * If the owner is not yet hashed, grab the next element and fill it 3688 * in based on the allocation information. 3689 */ 3690 umo = &umu->umu_hash[umu->umu_nelems++]; 3691 umo->umo_next = umu->umu_hash[bucket].umo_head; 3692 umu->umu_hash[bucket].umo_head = umo; 3693 3694 umo->umo_signature = signature; 3695 umo->umo_num = 1; 3696 umo->umo_data_size = data_size; 3697 umo->umo_total_size = size; 3698 umo->umo_depth = depth; 3699 3700 for (i = 0; i < depth; i++) 3701 umo->umo_stack[i] = bcp->bc_stack[i]; 3702 } 3703 3704 /* 3705 * When ::umausers is invoked without the -f flag, we simply update our hash 3706 * table with the information from each allocated bufctl. 3707 */ 3708 /*ARGSUSED*/ 3709 static int 3710 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3711 { 3712 const umem_cache_t *cp = umu->umu_cache; 3713 3714 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3715 return (WALK_NEXT); 3716 } 3717 3718 /* 3719 * When ::umausers is invoked with the -f flag, we print out the information 3720 * for each bufctl as well as updating the hash table. 3721 */ 3722 static int 3723 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3724 { 3725 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3726 const umem_cache_t *cp = umu->umu_cache; 3727 3728 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 3729 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 3730 3731 for (i = 0; i < depth; i++) 3732 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3733 3734 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3735 return (WALK_NEXT); 3736 } 3737 3738 /* 3739 * We sort our results by allocation size before printing them. 3740 */ 3741 static int 3742 umownercmp(const void *lp, const void *rp) 3743 { 3744 const umowner_t *lhs = lp; 3745 const umowner_t *rhs = rp; 3746 3747 return (rhs->umo_total_size - lhs->umo_total_size); 3748 } 3749 3750 /* 3751 * The main engine of ::umausers is relatively straightforward: First we 3752 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we 3753 * iterate over the allocated bufctls of each cache in the list. Finally, 3754 * we sort and print our results. 3755 */ 3756 /*ARGSUSED*/ 3757 int 3758 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3759 { 3760 int mem_threshold = 8192; /* Minimum # bytes for printing */ 3761 int cnt_threshold = 100; /* Minimum # blocks for printing */ 3762 int audited_caches = 0; /* Number of UMF_AUDIT caches found */ 3763 int do_all_caches = 1; /* Do all caches (no arguments) */ 3764 int opt_e = FALSE; /* Include "small" users */ 3765 int opt_f = FALSE; /* Print stack traces */ 3766 3767 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1; 3768 umowner_t *umo, *umoend; 3769 int i, oelems; 3770 3771 umclist_t umc; 3772 umusers_t umu; 3773 3774 if (flags & DCMD_ADDRSPEC) 3775 return (DCMD_USAGE); 3776 3777 bzero(&umc, sizeof (umc)); 3778 bzero(&umu, sizeof (umu)); 3779 3780 while ((i = mdb_getopts(argc, argv, 3781 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 3782 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 3783 3784 argv += i; /* skip past options we just processed */ 3785 argc -= i; /* adjust argc */ 3786 3787 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 3788 return (DCMD_USAGE); 3789 3790 oelems = umc.umc_nelems; 3791 umc.umc_name = argv->a_un.a_str; 3792 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3793 3794 if (umc.umc_nelems == oelems) { 3795 mdb_warn("unknown umem cache: %s\n", umc.umc_name); 3796 return (DCMD_ERR); 3797 } 3798 3799 do_all_caches = 0; 3800 argv++; 3801 argc--; 3802 } 3803 3804 if (opt_e) 3805 mem_threshold = cnt_threshold = 0; 3806 3807 if (opt_f) 3808 callback = (mdb_walk_cb_t)umause2; 3809 3810 if (do_all_caches) { 3811 umc.umc_name = NULL; /* match all cache names */ 3812 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3813 } 3814 3815 for (i = 0; i < umc.umc_nelems; i++) { 3816 uintptr_t cp = umc.umc_caches[i]; 3817 umem_cache_t c; 3818 3819 if (mdb_vread(&c, sizeof (c), cp) == -1) { 3820 mdb_warn("failed to read cache at %p", cp); 3821 continue; 3822 } 3823 3824 if (!(c.cache_flags & UMF_AUDIT)) { 3825 if (!do_all_caches) { 3826 mdb_warn("UMF_AUDIT is not enabled for %s\n", 3827 c.cache_name); 3828 } 3829 continue; 3830 } 3831 3832 umu.umu_cache = &c; 3833 (void) mdb_pwalk("bufctl", callback, &umu, cp); 3834 audited_caches++; 3835 } 3836 3837 if (audited_caches == 0 && do_all_caches) { 3838 mdb_warn("UMF_AUDIT is not enabled for any caches\n"); 3839 return (DCMD_ERR); 3840 } 3841 3842 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp); 3843 umoend = umu.umu_hash + umu.umu_nelems; 3844 3845 for (umo = umu.umu_hash; umo < umoend; umo++) { 3846 if (umo->umo_total_size < mem_threshold && 3847 umo->umo_num < cnt_threshold) 3848 continue; 3849 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 3850 umo->umo_total_size, umo->umo_num, umo->umo_data_size); 3851 for (i = 0; i < umo->umo_depth; i++) 3852 mdb_printf("\t %a\n", umo->umo_stack[i]); 3853 } 3854 3855 return (DCMD_OK); 3856 } 3857 3858 struct malloc_data { 3859 uint32_t malloc_size; 3860 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */ 3861 }; 3862 3863 #ifdef _LP64 3864 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data)) 3865 #else 3866 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data)) 3867 #endif 3868 3869 typedef struct umem_malloc_info { 3870 size_t um_total; /* total allocated buffers */ 3871 size_t um_malloc; /* malloc buffers */ 3872 size_t um_malloc_size; /* sum of malloc buffer sizes */ 3873 size_t um_malloc_overhead; /* sum of in-chunk overheads */ 3874 3875 umem_cache_t *um_cp; 3876 3877 uint_t *um_bucket; 3878 } umem_malloc_info_t; 3879 3880 static void 3881 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc, 3882 size_t maxbuckets, size_t minbucketsize, int geometric) 3883 { 3884 uint64_t um_malloc; 3885 int minb = -1; 3886 int maxb = -1; 3887 int buckets; 3888 int nbucks; 3889 int i; 3890 int b; 3891 const int *distarray; 3892 3893 minb = (int)minmalloc; 3894 maxb = (int)maxmalloc; 3895 3896 nbucks = buckets = maxb - minb + 1; 3897 3898 um_malloc = 0; 3899 for (b = minb; b <= maxb; b++) 3900 um_malloc += um_bucket[b]; 3901 3902 if (maxbuckets != 0) 3903 buckets = MIN(buckets, maxbuckets); 3904 3905 if (minbucketsize > 1) { 3906 buckets = MIN(buckets, nbucks/minbucketsize); 3907 if (buckets == 0) { 3908 buckets = 1; 3909 minbucketsize = nbucks; 3910 } 3911 } 3912 3913 if (geometric) 3914 distarray = dist_geometric(buckets, minb, maxb, minbucketsize); 3915 else 3916 distarray = dist_linear(buckets, minb, maxb); 3917 3918 dist_print_header("malloc size", 11, "count"); 3919 for (i = 0; i < buckets; i++) { 3920 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11); 3921 } 3922 mdb_printf("\n"); 3923 } 3924 3925 /* 3926 * A malloc()ed buffer looks like: 3927 * 3928 * <----------- mi.malloc_size ---> 3929 * <----------- cp.cache_bufsize ------------------> 3930 * <----------- cp.cache_chunksize --------------------------------> 3931 * +-------+-----------------------+---------------+---------------+ 3932 * |/tag///| mallocsz |/round-off/////|/debug info////| 3933 * +-------+---------------------------------------+---------------+ 3934 * <-- usable space ------> 3935 * 3936 * mallocsz is the argument to malloc(3C). 3937 * mi.malloc_size is the actual size passed to umem_alloc(), which 3938 * is rounded up to the smallest available cache size, which is 3939 * cache_bufsize. If there is debugging or alignment overhead in 3940 * the cache, that is reflected in a larger cache_chunksize. 3941 * 3942 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes, 3943 * depending upon the ISA's alignment requirements. For 32-bit allocations, 3944 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes, 3945 * the tag has 8 bytes of padding before it. 3946 * 3947 * 32-byte, 64-byte buffers <= 8 bytes: 3948 * +-------+-------+--------- ... 3949 * |/size//|/stat//| mallocsz ... 3950 * +-------+-------+--------- ... 3951 * ^ 3952 * pointer returned from malloc(3C) 3953 * 3954 * 64-byte buffers > 8 bytes: 3955 * +---------------+-------+-------+--------- ... 3956 * |/padding///////|/size//|/stat//| mallocsz ... 3957 * +---------------+-------+-------+--------- ... 3958 * ^ 3959 * pointer returned from malloc(3C) 3960 * 3961 * The "size" field is "malloc_size", which is mallocsz + the padding. 3962 * The "stat" field is derived from malloc_size, and functions as a 3963 * validation that this buffer is actually from malloc(3C). 3964 */ 3965 /*ARGSUSED*/ 3966 static int 3967 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump) 3968 { 3969 struct malloc_data md; 3970 size_t m_addr = addr; 3971 size_t overhead = sizeof (md); 3972 size_t mallocsz; 3973 3974 ump->um_total++; 3975 3976 #ifdef _LP64 3977 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) { 3978 m_addr += overhead; 3979 overhead += sizeof (md); 3980 } 3981 #endif 3982 3983 if (mdb_vread(&md, sizeof (md), m_addr) == -1) { 3984 mdb_warn("unable to read malloc header at %p", m_addr); 3985 return (WALK_NEXT); 3986 } 3987 3988 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) { 3989 case MALLOC_MAGIC: 3990 #ifdef _LP64 3991 case MALLOC_SECOND_MAGIC: 3992 #endif 3993 mallocsz = md.malloc_size - overhead; 3994 3995 ump->um_malloc++; 3996 ump->um_malloc_size += mallocsz; 3997 ump->um_malloc_overhead += overhead; 3998 3999 /* include round-off and debug overhead */ 4000 ump->um_malloc_overhead += 4001 ump->um_cp->cache_chunksize - md.malloc_size; 4002 4003 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET) 4004 ump->um_bucket[mallocsz]++; 4005 4006 break; 4007 default: 4008 break; 4009 } 4010 4011 return (WALK_NEXT); 4012 } 4013 4014 int 4015 get_umem_alloc_sizes(int **out, size_t *out_num) 4016 { 4017 GElf_Sym sym; 4018 4019 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { 4020 mdb_warn("unable to look up umem_alloc_sizes"); 4021 return (-1); 4022 } 4023 4024 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC); 4025 *out_num = sym.st_size / sizeof (int); 4026 4027 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) { 4028 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value); 4029 *out = NULL; 4030 return (-1); 4031 } 4032 4033 return (0); 4034 } 4035 4036 4037 static int 4038 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump) 4039 { 4040 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) 4041 return (WALK_NEXT); 4042 4043 ump->um_cp = cp; 4044 4045 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) == 4046 -1) { 4047 mdb_warn("can't walk 'umem' for cache %p", addr); 4048 return (WALK_ERR); 4049 } 4050 4051 return (WALK_NEXT); 4052 } 4053 4054 void 4055 umem_malloc_dist_help(void) 4056 { 4057 mdb_printf("%s\n", 4058 "report distribution of outstanding malloc()s"); 4059 mdb_dec_indent(2); 4060 mdb_printf("%<b>OPTIONS%</b>\n"); 4061 mdb_inc_indent(2); 4062 mdb_printf("%s", 4063 " -b maxbins\n" 4064 " Use at most maxbins bins for the data\n" 4065 " -B minbinsize\n" 4066 " Make the bins at least minbinsize bytes apart\n" 4067 " -d dump the raw data out, without binning\n" 4068 " -g use geometric binning instead of linear binning\n"); 4069 } 4070 4071 /*ARGSUSED*/ 4072 int 4073 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4074 { 4075 umem_malloc_info_t mi; 4076 uint_t geometric = 0; 4077 uint_t dump = 0; 4078 size_t maxbuckets = 0; 4079 size_t minbucketsize = 0; 4080 4081 size_t minalloc = 0; 4082 size_t maxalloc = UMI_MAX_BUCKET; 4083 4084 if (flags & DCMD_ADDRSPEC) 4085 return (DCMD_USAGE); 4086 4087 if (mdb_getopts(argc, argv, 4088 'd', MDB_OPT_SETBITS, TRUE, &dump, 4089 'g', MDB_OPT_SETBITS, TRUE, &geometric, 4090 'b', MDB_OPT_UINTPTR, &maxbuckets, 4091 'B', MDB_OPT_UINTPTR, &minbucketsize, 4092 NULL) != argc) 4093 return (DCMD_USAGE); 4094 4095 bzero(&mi, sizeof (mi)); 4096 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4097 UM_SLEEP | UM_GC); 4098 4099 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb, 4100 &mi) == -1) { 4101 mdb_warn("unable to walk 'umem_cache'"); 4102 return (DCMD_ERR); 4103 } 4104 4105 if (dump) { 4106 int i; 4107 for (i = minalloc; i <= maxalloc; i++) 4108 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]); 4109 4110 return (DCMD_OK); 4111 } 4112 4113 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc, 4114 maxbuckets, minbucketsize, geometric); 4115 4116 return (DCMD_OK); 4117 } 4118 4119 void 4120 umem_malloc_info_help(void) 4121 { 4122 mdb_printf("%s\n", 4123 "report information about malloc()s by cache. "); 4124 mdb_dec_indent(2); 4125 mdb_printf("%<b>OPTIONS%</b>\n"); 4126 mdb_inc_indent(2); 4127 mdb_printf("%s", 4128 " -b maxbins\n" 4129 " Use at most maxbins bins for the data\n" 4130 " -B minbinsize\n" 4131 " Make the bins at least minbinsize bytes apart\n" 4132 " -d dump the raw distribution data without binning\n" 4133 #ifndef _KMDB 4134 " -g use geometric binning instead of linear binning\n" 4135 #endif 4136 ""); 4137 } 4138 int 4139 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4140 { 4141 umem_cache_t c; 4142 umem_malloc_info_t mi; 4143 4144 int skip = 0; 4145 4146 size_t maxmalloc; 4147 size_t overhead; 4148 size_t allocated; 4149 size_t avg_malloc; 4150 size_t overhead_pct; /* 1000 * overhead_percent */ 4151 4152 uint_t verbose = 0; 4153 uint_t dump = 0; 4154 uint_t geometric = 0; 4155 size_t maxbuckets = 0; 4156 size_t minbucketsize = 0; 4157 4158 int *alloc_sizes; 4159 int idx; 4160 size_t num; 4161 size_t minmalloc; 4162 4163 if (mdb_getopts(argc, argv, 4164 'd', MDB_OPT_SETBITS, TRUE, &dump, 4165 'g', MDB_OPT_SETBITS, TRUE, &geometric, 4166 'b', MDB_OPT_UINTPTR, &maxbuckets, 4167 'B', MDB_OPT_UINTPTR, &minbucketsize, 4168 NULL) != argc) 4169 return (DCMD_USAGE); 4170 4171 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0)) 4172 verbose = 1; 4173 4174 if (!(flags & DCMD_ADDRSPEC)) { 4175 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info", 4176 argc, argv) == -1) { 4177 mdb_warn("can't walk umem_cache"); 4178 return (DCMD_ERR); 4179 } 4180 return (DCMD_OK); 4181 } 4182 4183 if (!mdb_vread(&c, sizeof (c), addr)) { 4184 mdb_warn("unable to read cache at %p", addr); 4185 return (DCMD_ERR); 4186 } 4187 4188 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) { 4189 if (!(flags & DCMD_LOOP)) 4190 mdb_warn("umem_malloc_info: cache \"%s\" is not used " 4191 "by malloc()\n", c.cache_name); 4192 skip = 1; 4193 } 4194 4195 /* 4196 * normally, print the header only the first time. In verbose mode, 4197 * print the header on every non-skipped buffer 4198 */ 4199 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip)) 4200 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n", 4201 "CACHE", "BUFSZ", "MAXMAL", 4202 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER"); 4203 4204 if (skip) 4205 return (DCMD_OK); 4206 4207 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data); 4208 #ifdef _LP64 4209 if (c.cache_bufsize > UMEM_SECOND_ALIGN) 4210 maxmalloc -= sizeof (struct malloc_data); 4211 #endif 4212 4213 bzero(&mi, sizeof (mi)); 4214 mi.um_cp = &c; 4215 if (verbose) 4216 mi.um_bucket = 4217 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4218 UM_SLEEP | UM_GC); 4219 4220 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) == 4221 -1) { 4222 mdb_warn("can't walk 'umem'"); 4223 return (DCMD_ERR); 4224 } 4225 4226 overhead = mi.um_malloc_overhead; 4227 allocated = mi.um_malloc_size; 4228 4229 /* do integer round off for the average */ 4230 if (mi.um_malloc != 0) 4231 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc; 4232 else 4233 avg_malloc = 0; 4234 4235 /* 4236 * include per-slab overhead 4237 * 4238 * Each slab in a given cache is the same size, and has the same 4239 * number of chunks in it; we read in the first slab on the 4240 * slab list to get the number of chunks for all slabs. To 4241 * compute the per-slab overhead, we just subtract the chunk usage 4242 * from the slabsize: 4243 * 4244 * +------------+-------+-------+ ... --+-------+-------+-------+ 4245 * |////////////| | | ... | |///////|///////| 4246 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//| 4247 * |////////////| | | ... | |///////|///////| 4248 * +------------+-------+-------+ ... --+-------+-------+-------+ 4249 * | \_______chunksize * chunks_____/ | 4250 * \__________________________slabsize__________________________/ 4251 * 4252 * For UMF_HASH caches, there is an additional source of overhead; 4253 * the external umem_slab_t and per-chunk bufctl structures. We 4254 * include those in our per-slab overhead. 4255 * 4256 * Once we have a number for the per-slab overhead, we estimate 4257 * the actual overhead by treating the malloc()ed buffers as if 4258 * they were densely packed: 4259 * 4260 * additional overhead = (# mallocs) * (per-slab) / (chunks); 4261 * 4262 * carefully ordering the multiply before the divide, to avoid 4263 * round-off error. 4264 */ 4265 if (mi.um_malloc != 0) { 4266 umem_slab_t slab; 4267 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next; 4268 4269 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) { 4270 mdb_warn("unable to read slab at %p\n", saddr); 4271 } else { 4272 long chunks = slab.slab_chunks; 4273 if (chunks != 0 && c.cache_chunksize != 0 && 4274 chunks <= c.cache_slabsize / c.cache_chunksize) { 4275 uintmax_t perslab = 4276 c.cache_slabsize - 4277 (c.cache_chunksize * chunks); 4278 4279 if (c.cache_flags & UMF_HASH) { 4280 perslab += sizeof (umem_slab_t) + 4281 chunks * 4282 ((c.cache_flags & UMF_AUDIT) ? 4283 sizeof (umem_bufctl_audit_t) : 4284 sizeof (umem_bufctl_t)); 4285 } 4286 overhead += 4287 (perslab * (uintmax_t)mi.um_malloc)/chunks; 4288 } else { 4289 mdb_warn("invalid #chunks (%d) in slab %p\n", 4290 chunks, saddr); 4291 } 4292 } 4293 } 4294 4295 if (allocated != 0) 4296 overhead_pct = (1000ULL * overhead) / allocated; 4297 else 4298 overhead_pct = 0; 4299 4300 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n", 4301 addr, c.cache_bufsize, maxmalloc, 4302 mi.um_malloc, avg_malloc, allocated, overhead, 4303 overhead_pct / 10, overhead_pct % 10); 4304 4305 if (!verbose) 4306 return (DCMD_OK); 4307 4308 if (!dump) 4309 mdb_printf("\n"); 4310 4311 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1) 4312 return (DCMD_ERR); 4313 4314 for (idx = 0; idx < num; idx++) { 4315 if (alloc_sizes[idx] == c.cache_bufsize) 4316 break; 4317 if (alloc_sizes[idx] == 0) { 4318 idx = num; /* 0-terminated array */ 4319 break; 4320 } 4321 } 4322 if (idx == num) { 4323 mdb_warn( 4324 "cache %p's size (%d) not in umem_alloc_sizes\n", 4325 addr, c.cache_bufsize); 4326 return (DCMD_ERR); 4327 } 4328 4329 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1]; 4330 if (minmalloc > 0) { 4331 #ifdef _LP64 4332 if (minmalloc > UMEM_SECOND_ALIGN) 4333 minmalloc -= sizeof (struct malloc_data); 4334 #endif 4335 minmalloc -= sizeof (struct malloc_data); 4336 minmalloc += 1; 4337 } 4338 4339 if (dump) { 4340 for (idx = minmalloc; idx <= maxmalloc; idx++) 4341 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]); 4342 mdb_printf("\n"); 4343 } else { 4344 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc, 4345 maxbuckets, minbucketsize, geometric); 4346 } 4347 4348 return (DCMD_OK); 4349 } 4350