1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2012 Joyent, Inc. All rights reserved. 28 * Copyright (c) 2013 by Delphix. All rights reserved. 29 */ 30 31 #include "umem.h" 32 33 #include <sys/vmem_impl_user.h> 34 #include <umem_impl.h> 35 36 #include <alloca.h> 37 #include <limits.h> 38 #include <mdb/mdb_whatis.h> 39 #include <thr_uberdata.h> 40 41 #include "misc.h" 42 #include "leaky.h" 43 #include "dist.h" 44 45 #include "umem_pagesize.h" 46 47 #define UM_ALLOCATED 0x1 48 #define UM_FREE 0x2 49 #define UM_BUFCTL 0x4 50 #define UM_HASH 0x8 51 52 int umem_ready; 53 54 static int umem_stack_depth_warned; 55 static uint32_t umem_max_ncpus; 56 uint32_t umem_stack_depth; 57 58 size_t umem_pagesize; 59 60 #define UMEM_READVAR(var) \ 61 (umem_readvar(&(var), #var) == -1 && \ 62 (mdb_warn("failed to read "#var), 1)) 63 64 int 65 umem_update_variables(void) 66 { 67 size_t pagesize; 68 69 /* 70 * Figure out which type of umem is being used; if it's not there 71 * yet, succeed quietly. 72 */ 73 if (umem_set_standalone() == -1) { 74 umem_ready = 0; 75 return (0); /* umem not there yet */ 76 } 77 78 /* 79 * Solaris 9 used a different name for umem_max_ncpus. It's 80 * cheap backwards compatibility to check for both names. 81 */ 82 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 && 83 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) { 84 mdb_warn("unable to read umem_max_ncpus or max_ncpus"); 85 return (-1); 86 } 87 if (UMEM_READVAR(umem_ready)) 88 return (-1); 89 if (UMEM_READVAR(umem_stack_depth)) 90 return (-1); 91 if (UMEM_READVAR(pagesize)) 92 return (-1); 93 94 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) { 95 if (umem_stack_depth_warned == 0) { 96 mdb_warn("umem_stack_depth corrupted (%d > %d)\n", 97 umem_stack_depth, UMEM_MAX_STACK_DEPTH); 98 umem_stack_depth_warned = 1; 99 } 100 umem_stack_depth = 0; 101 } 102 103 umem_pagesize = pagesize; 104 105 return (0); 106 } 107 108 static int 109 umem_ptc_walk_init(mdb_walk_state_t *wsp) 110 { 111 if (wsp->walk_addr == NULL) { 112 if (mdb_layered_walk("ulwp", wsp) == -1) { 113 mdb_warn("couldn't walk 'ulwp'"); 114 return (WALK_ERR); 115 } 116 } 117 118 return (WALK_NEXT); 119 } 120 121 static int 122 umem_ptc_walk_step(mdb_walk_state_t *wsp) 123 { 124 uintptr_t this; 125 int rval; 126 127 if (wsp->walk_layer != NULL) { 128 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self + 129 (uintptr_t)wsp->walk_arg; 130 } else { 131 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg; 132 } 133 134 for (;;) { 135 if (mdb_vread(&this, sizeof (void *), this) == -1) { 136 mdb_warn("couldn't read ptc buffer at %p", this); 137 return (WALK_ERR); 138 } 139 140 if (this == NULL) 141 break; 142 143 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata); 144 145 if (rval != WALK_NEXT) 146 return (rval); 147 } 148 149 return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE); 150 } 151 152 /*ARGSUSED*/ 153 static int 154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes) 155 { 156 mdb_walker_t w; 157 char descr[64]; 158 char name[64]; 159 int i; 160 161 (void) mdb_snprintf(descr, sizeof (descr), 162 "walk the %s cache", c->cache_name); 163 164 w.walk_name = c->cache_name; 165 w.walk_descr = descr; 166 w.walk_init = umem_walk_init; 167 w.walk_step = umem_walk_step; 168 w.walk_fini = umem_walk_fini; 169 w.walk_init_arg = (void *)addr; 170 171 if (mdb_add_walker(&w) == -1) 172 mdb_warn("failed to add %s walker", c->cache_name); 173 174 if (!(c->cache_flags & UMF_PTC)) 175 return (WALK_NEXT); 176 177 /* 178 * For the per-thread cache walker, the address is the offset in the 179 * tm_roots[] array of the ulwp_t. 180 */ 181 for (i = 0; sizes[i] != 0; i++) { 182 if (sizes[i] == c->cache_bufsize) 183 break; 184 } 185 186 if (sizes[i] == 0) { 187 mdb_warn("cache %s is cached per-thread, but could not find " 188 "size in umem_alloc_sizes\n", c->cache_name); 189 return (WALK_NEXT); 190 } 191 192 if (i >= NTMEMBASE) { 193 mdb_warn("index for %s (%d) exceeds root slots (%d)\n", 194 c->cache_name, i, NTMEMBASE); 195 return (WALK_NEXT); 196 } 197 198 (void) mdb_snprintf(name, sizeof (name), 199 "umem_ptc_%d", c->cache_bufsize); 200 (void) mdb_snprintf(descr, sizeof (descr), 201 "walk the per-thread cache for %s", c->cache_name); 202 203 w.walk_name = name; 204 w.walk_descr = descr; 205 w.walk_init = umem_ptc_walk_init; 206 w.walk_step = umem_ptc_walk_step; 207 w.walk_fini = NULL; 208 w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]); 209 210 if (mdb_add_walker(&w) == -1) 211 mdb_warn("failed to add %s walker", w.walk_name); 212 213 return (WALK_NEXT); 214 } 215 216 /*ARGSUSED*/ 217 static void 218 umem_statechange_cb(void *arg) 219 { 220 static int been_ready = 0; 221 GElf_Sym sym; 222 int *sizes; 223 224 #ifndef _KMDB 225 leaky_cleanup(1); /* state changes invalidate leaky state */ 226 #endif 227 228 if (umem_update_variables() == -1) 229 return; 230 231 if (been_ready) 232 return; 233 234 if (umem_ready != UMEM_READY) 235 return; 236 237 been_ready = 1; 238 239 /* 240 * In order to determine the tm_roots offset of any cache that is 241 * cached per-thread, we need to have the umem_alloc_sizes array. 242 * Read this, assuring that it is zero-terminated. 243 */ 244 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { 245 mdb_warn("unable to lookup 'umem_alloc_sizes'"); 246 return; 247 } 248 249 sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC); 250 251 if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) { 252 mdb_warn("couldn't read 'umem_alloc_sizes'"); 253 return; 254 } 255 256 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes); 257 } 258 259 int 260 umem_abort_messages(void) 261 { 262 char *umem_error_buffer; 263 uint_t umem_error_begin; 264 GElf_Sym sym; 265 size_t bufsize; 266 267 if (UMEM_READVAR(umem_error_begin)) 268 return (DCMD_ERR); 269 270 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) { 271 mdb_warn("unable to look up umem_error_buffer"); 272 return (DCMD_ERR); 273 } 274 275 bufsize = (size_t)sym.st_size; 276 277 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC); 278 279 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value) 280 != bufsize) { 281 mdb_warn("unable to read umem_error_buffer"); 282 return (DCMD_ERR); 283 } 284 /* put a zero after the end of the buffer to simplify printing */ 285 umem_error_buffer[bufsize] = 0; 286 287 if ((umem_error_begin % bufsize) == 0) 288 mdb_printf("%s\n", umem_error_buffer); 289 else { 290 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0; 291 mdb_printf("%s%s\n", 292 &umem_error_buffer[umem_error_begin % bufsize], 293 umem_error_buffer); 294 } 295 296 return (DCMD_OK); 297 } 298 299 static void 300 umem_log_status(const char *name, umem_log_header_t *val) 301 { 302 umem_log_header_t my_lh; 303 uintptr_t pos = (uintptr_t)val; 304 size_t size; 305 306 if (pos == NULL) 307 return; 308 309 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) { 310 mdb_warn("\nunable to read umem_%s_log pointer %p", 311 name, pos); 312 return; 313 } 314 315 size = my_lh.lh_chunksize * my_lh.lh_nchunks; 316 317 if (size % (1024 * 1024) == 0) 318 mdb_printf("%s=%dm ", name, size / (1024 * 1024)); 319 else if (size % 1024 == 0) 320 mdb_printf("%s=%dk ", name, size / 1024); 321 else 322 mdb_printf("%s=%d ", name, size); 323 } 324 325 typedef struct umem_debug_flags { 326 const char *udf_name; 327 uint_t udf_flags; 328 uint_t udf_clear; /* if 0, uses udf_flags */ 329 } umem_debug_flags_t; 330 331 umem_debug_flags_t umem_status_flags[] = { 332 { "random", UMF_RANDOMIZE, UMF_RANDOM }, 333 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS }, 334 { "audit", UMF_AUDIT }, 335 { "guards", UMF_DEADBEEF | UMF_REDZONE }, 336 { "nosignal", UMF_CHECKSIGNAL }, 337 { "firewall", UMF_FIREWALL }, 338 { "lite", UMF_LITE }, 339 { NULL } 340 }; 341 342 /*ARGSUSED*/ 343 int 344 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 345 { 346 int umem_logging; 347 348 umem_log_header_t *umem_transaction_log; 349 umem_log_header_t *umem_content_log; 350 umem_log_header_t *umem_failure_log; 351 umem_log_header_t *umem_slab_log; 352 353 mdb_printf("Status:\t\t%s\n", 354 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" : 355 umem_ready == UMEM_READY_STARTUP ? "uninitialized" : 356 umem_ready == UMEM_READY_INITING ? "initialization in process" : 357 umem_ready == UMEM_READY ? "ready and active" : 358 umem_ready == 0 ? "not loaded into address space" : 359 "unknown (umem_ready invalid)"); 360 361 if (umem_ready == 0) 362 return (DCMD_OK); 363 364 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus); 365 366 if (UMEM_READVAR(umem_logging)) 367 goto err; 368 if (UMEM_READVAR(umem_transaction_log)) 369 goto err; 370 if (UMEM_READVAR(umem_content_log)) 371 goto err; 372 if (UMEM_READVAR(umem_failure_log)) 373 goto err; 374 if (UMEM_READVAR(umem_slab_log)) 375 goto err; 376 377 mdb_printf("Logs:\t\t"); 378 umem_log_status("transaction", umem_transaction_log); 379 umem_log_status("content", umem_content_log); 380 umem_log_status("fail", umem_failure_log); 381 umem_log_status("slab", umem_slab_log); 382 if (!umem_logging) 383 mdb_printf("(inactive)"); 384 mdb_printf("\n"); 385 386 mdb_printf("Message buffer:\n"); 387 return (umem_abort_messages()); 388 389 err: 390 mdb_printf("Message buffer:\n"); 391 (void) umem_abort_messages(); 392 return (DCMD_ERR); 393 } 394 395 typedef struct { 396 uintptr_t ucw_first; 397 uintptr_t ucw_current; 398 } umem_cache_walk_t; 399 400 int 401 umem_cache_walk_init(mdb_walk_state_t *wsp) 402 { 403 umem_cache_walk_t *ucw; 404 umem_cache_t c; 405 uintptr_t cp; 406 GElf_Sym sym; 407 408 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) { 409 mdb_warn("couldn't find umem_null_cache"); 410 return (WALK_ERR); 411 } 412 413 cp = (uintptr_t)sym.st_value; 414 415 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) { 416 mdb_warn("couldn't read cache at %p", cp); 417 return (WALK_ERR); 418 } 419 420 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP); 421 422 ucw->ucw_first = cp; 423 ucw->ucw_current = (uintptr_t)c.cache_next; 424 wsp->walk_data = ucw; 425 426 return (WALK_NEXT); 427 } 428 429 int 430 umem_cache_walk_step(mdb_walk_state_t *wsp) 431 { 432 umem_cache_walk_t *ucw = wsp->walk_data; 433 umem_cache_t c; 434 int status; 435 436 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) { 437 mdb_warn("couldn't read cache at %p", ucw->ucw_current); 438 return (WALK_DONE); 439 } 440 441 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata); 442 443 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first) 444 return (WALK_DONE); 445 446 return (status); 447 } 448 449 void 450 umem_cache_walk_fini(mdb_walk_state_t *wsp) 451 { 452 umem_cache_walk_t *ucw = wsp->walk_data; 453 mdb_free(ucw, sizeof (umem_cache_walk_t)); 454 } 455 456 typedef struct { 457 umem_cpu_t *ucw_cpus; 458 uint32_t ucw_current; 459 uint32_t ucw_max; 460 } umem_cpu_walk_state_t; 461 462 int 463 umem_cpu_walk_init(mdb_walk_state_t *wsp) 464 { 465 umem_cpu_t *umem_cpus; 466 467 umem_cpu_walk_state_t *ucw; 468 469 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) { 470 mdb_warn("failed to read 'umem_cpus'"); 471 return (WALK_ERR); 472 } 473 474 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP); 475 476 ucw->ucw_cpus = umem_cpus; 477 ucw->ucw_current = 0; 478 ucw->ucw_max = umem_max_ncpus; 479 480 wsp->walk_data = ucw; 481 return (WALK_NEXT); 482 } 483 484 int 485 umem_cpu_walk_step(mdb_walk_state_t *wsp) 486 { 487 umem_cpu_t cpu; 488 umem_cpu_walk_state_t *ucw = wsp->walk_data; 489 490 uintptr_t caddr; 491 492 if (ucw->ucw_current >= ucw->ucw_max) 493 return (WALK_DONE); 494 495 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]); 496 497 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) { 498 mdb_warn("failed to read cpu %d", ucw->ucw_current); 499 return (WALK_ERR); 500 } 501 502 ucw->ucw_current++; 503 504 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata)); 505 } 506 507 void 508 umem_cpu_walk_fini(mdb_walk_state_t *wsp) 509 { 510 umem_cpu_walk_state_t *ucw = wsp->walk_data; 511 512 mdb_free(ucw, sizeof (*ucw)); 513 } 514 515 int 516 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 517 { 518 if (wsp->walk_addr == NULL) { 519 mdb_warn("umem_cpu_cache doesn't support global walks"); 520 return (WALK_ERR); 521 } 522 523 if (mdb_layered_walk("umem_cpu", wsp) == -1) { 524 mdb_warn("couldn't walk 'umem_cpu'"); 525 return (WALK_ERR); 526 } 527 528 wsp->walk_data = (void *)wsp->walk_addr; 529 530 return (WALK_NEXT); 531 } 532 533 int 534 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 535 { 536 uintptr_t caddr = (uintptr_t)wsp->walk_data; 537 const umem_cpu_t *cpu = wsp->walk_layer; 538 umem_cpu_cache_t cc; 539 540 caddr += cpu->cpu_cache_offset; 541 542 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) { 543 mdb_warn("couldn't read umem_cpu_cache at %p", caddr); 544 return (WALK_ERR); 545 } 546 547 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 548 } 549 550 int 551 umem_slab_walk_init(mdb_walk_state_t *wsp) 552 { 553 uintptr_t caddr = wsp->walk_addr; 554 umem_cache_t c; 555 556 if (caddr == NULL) { 557 mdb_warn("umem_slab doesn't support global walks\n"); 558 return (WALK_ERR); 559 } 560 561 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 562 mdb_warn("couldn't read umem_cache at %p", caddr); 563 return (WALK_ERR); 564 } 565 566 wsp->walk_data = 567 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 568 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; 569 570 return (WALK_NEXT); 571 } 572 573 int 574 umem_slab_walk_partial_init(mdb_walk_state_t *wsp) 575 { 576 uintptr_t caddr = wsp->walk_addr; 577 umem_cache_t c; 578 579 if (caddr == NULL) { 580 mdb_warn("umem_slab_partial doesn't support global walks\n"); 581 return (WALK_ERR); 582 } 583 584 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 585 mdb_warn("couldn't read umem_cache at %p", caddr); 586 return (WALK_ERR); 587 } 588 589 wsp->walk_data = 590 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 591 wsp->walk_addr = (uintptr_t)c.cache_freelist; 592 593 /* 594 * Some consumers (umem_walk_step(), in particular) require at 595 * least one callback if there are any buffers in the cache. So 596 * if there are *no* partial slabs, report the last full slab, if 597 * any. 598 * 599 * Yes, this is ugly, but it's cleaner than the other possibilities. 600 */ 601 if ((uintptr_t)wsp->walk_data == wsp->walk_addr) 602 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; 603 604 return (WALK_NEXT); 605 } 606 607 int 608 umem_slab_walk_step(mdb_walk_state_t *wsp) 609 { 610 umem_slab_t s; 611 uintptr_t addr = wsp->walk_addr; 612 uintptr_t saddr = (uintptr_t)wsp->walk_data; 613 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab); 614 615 if (addr == saddr) 616 return (WALK_DONE); 617 618 if (mdb_vread(&s, sizeof (s), addr) == -1) { 619 mdb_warn("failed to read slab at %p", wsp->walk_addr); 620 return (WALK_ERR); 621 } 622 623 if ((uintptr_t)s.slab_cache != caddr) { 624 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 625 addr, caddr, s.slab_cache); 626 return (WALK_ERR); 627 } 628 629 wsp->walk_addr = (uintptr_t)s.slab_next; 630 631 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); 632 } 633 634 int 635 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 636 { 637 umem_cache_t c; 638 639 if (!(flags & DCMD_ADDRSPEC)) { 640 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) { 641 mdb_warn("can't walk umem_cache"); 642 return (DCMD_ERR); 643 } 644 return (DCMD_OK); 645 } 646 647 if (DCMD_HDRSPEC(flags)) 648 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME", 649 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 650 651 if (mdb_vread(&c, sizeof (c), addr) == -1) { 652 mdb_warn("couldn't read umem_cache at %p", addr); 653 return (DCMD_ERR); 654 } 655 656 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name, 657 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 658 659 return (DCMD_OK); 660 } 661 662 static int 663 addrcmp(const void *lhs, const void *rhs) 664 { 665 uintptr_t p1 = *((uintptr_t *)lhs); 666 uintptr_t p2 = *((uintptr_t *)rhs); 667 668 if (p1 < p2) 669 return (-1); 670 if (p1 > p2) 671 return (1); 672 return (0); 673 } 674 675 static int 676 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs) 677 { 678 const umem_bufctl_audit_t *bcp1 = *lhs; 679 const umem_bufctl_audit_t *bcp2 = *rhs; 680 681 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 682 return (-1); 683 684 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 685 return (1); 686 687 return (0); 688 } 689 690 typedef struct umem_hash_walk { 691 uintptr_t *umhw_table; 692 size_t umhw_nelems; 693 size_t umhw_pos; 694 umem_bufctl_t umhw_cur; 695 } umem_hash_walk_t; 696 697 int 698 umem_hash_walk_init(mdb_walk_state_t *wsp) 699 { 700 umem_hash_walk_t *umhw; 701 uintptr_t *hash; 702 umem_cache_t c; 703 uintptr_t haddr, addr = wsp->walk_addr; 704 size_t nelems; 705 size_t hsize; 706 707 if (addr == NULL) { 708 mdb_warn("umem_hash doesn't support global walks\n"); 709 return (WALK_ERR); 710 } 711 712 if (mdb_vread(&c, sizeof (c), addr) == -1) { 713 mdb_warn("couldn't read cache at addr %p", addr); 714 return (WALK_ERR); 715 } 716 717 if (!(c.cache_flags & UMF_HASH)) { 718 mdb_warn("cache %p doesn't have a hash table\n", addr); 719 return (WALK_DONE); /* nothing to do */ 720 } 721 722 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP); 723 umhw->umhw_cur.bc_next = NULL; 724 umhw->umhw_pos = 0; 725 726 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1; 727 hsize = nelems * sizeof (uintptr_t); 728 haddr = (uintptr_t)c.cache_hash_table; 729 730 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 731 if (mdb_vread(hash, hsize, haddr) == -1) { 732 mdb_warn("failed to read hash table at %p", haddr); 733 mdb_free(hash, hsize); 734 mdb_free(umhw, sizeof (umem_hash_walk_t)); 735 return (WALK_ERR); 736 } 737 738 wsp->walk_data = umhw; 739 740 return (WALK_NEXT); 741 } 742 743 int 744 umem_hash_walk_step(mdb_walk_state_t *wsp) 745 { 746 umem_hash_walk_t *umhw = wsp->walk_data; 747 uintptr_t addr = NULL; 748 749 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) { 750 while (umhw->umhw_pos < umhw->umhw_nelems) { 751 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL) 752 break; 753 } 754 } 755 if (addr == NULL) 756 return (WALK_DONE); 757 758 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) { 759 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr); 760 return (WALK_ERR); 761 } 762 763 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata)); 764 } 765 766 void 767 umem_hash_walk_fini(mdb_walk_state_t *wsp) 768 { 769 umem_hash_walk_t *umhw = wsp->walk_data; 770 771 if (umhw == NULL) 772 return; 773 774 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t)); 775 mdb_free(umhw, sizeof (umem_hash_walk_t)); 776 } 777 778 /* 779 * Find the address of the bufctl structure for the address 'buf' in cache 780 * 'cp', which is at address caddr, and place it in *out. 781 */ 782 static int 783 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 784 { 785 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf); 786 umem_bufctl_t *bcp; 787 umem_bufctl_t bc; 788 789 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) { 790 mdb_warn("unable to read hash bucket for %p in cache %p", 791 buf, caddr); 792 return (-1); 793 } 794 795 while (bcp != NULL) { 796 if (mdb_vread(&bc, sizeof (umem_bufctl_t), 797 (uintptr_t)bcp) == -1) { 798 mdb_warn("unable to read bufctl at %p", bcp); 799 return (-1); 800 } 801 if (bc.bc_addr == buf) { 802 *out = (uintptr_t)bcp; 803 return (0); 804 } 805 bcp = bc.bc_next; 806 } 807 808 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 809 return (-1); 810 } 811 812 int 813 umem_get_magsize(const umem_cache_t *cp) 814 { 815 uintptr_t addr = (uintptr_t)cp->cache_magtype; 816 GElf_Sym mt_sym; 817 umem_magtype_t mt; 818 int res; 819 820 /* 821 * if cpu 0 has a non-zero magsize, it must be correct. caches 822 * with UMF_NOMAGAZINE have disabled their magazine layers, so 823 * it is okay to return 0 for them. 824 */ 825 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 826 (cp->cache_flags & UMF_NOMAGAZINE)) 827 return (res); 828 829 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) { 830 mdb_warn("unable to read 'umem_magtype'"); 831 } else if (addr < mt_sym.st_value || 832 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 833 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 834 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 835 cp->cache_name, addr); 836 return (0); 837 } 838 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 839 mdb_warn("unable to read magtype at %a", addr); 840 return (0); 841 } 842 return (mt.mt_magsize); 843 } 844 845 /*ARGSUSED*/ 846 static int 847 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est) 848 { 849 *est -= (sp->slab_chunks - sp->slab_refcnt); 850 851 return (WALK_NEXT); 852 } 853 854 /* 855 * Returns an upper bound on the number of allocated buffers in a given 856 * cache. 857 */ 858 size_t 859 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) 860 { 861 int magsize; 862 size_t cache_est; 863 864 cache_est = cp->cache_buftotal; 865 866 (void) mdb_pwalk("umem_slab_partial", 867 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr); 868 869 if ((magsize = umem_get_magsize(cp)) != 0) { 870 size_t mag_est = cp->cache_full.ml_total * magsize; 871 872 if (cache_est >= mag_est) { 873 cache_est -= mag_est; 874 } else { 875 mdb_warn("cache %p's magazine layer holds more buffers " 876 "than the slab layer.\n", addr); 877 } 878 } 879 return (cache_est); 880 } 881 882 #define READMAG_ROUNDS(rounds) { \ 883 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \ 884 mdb_warn("couldn't read magazine at %p", ump); \ 885 goto fail; \ 886 } \ 887 for (i = 0; i < rounds; i++) { \ 888 maglist[magcnt++] = mp->mag_round[i]; \ 889 if (magcnt == magmax) { \ 890 mdb_warn("%d magazines exceeds fudge factor\n", \ 891 magcnt); \ 892 goto fail; \ 893 } \ 894 } \ 895 } 896 897 static int 898 umem_read_magazines(umem_cache_t *cp, uintptr_t addr, 899 void ***maglistp, size_t *magcntp, size_t *magmaxp) 900 { 901 umem_magazine_t *ump, *mp; 902 void **maglist = NULL; 903 int i, cpu; 904 size_t magsize, magmax, magbsize; 905 size_t magcnt = 0; 906 907 /* 908 * Read the magtype out of the cache, after verifying the pointer's 909 * correctness. 910 */ 911 magsize = umem_get_magsize(cp); 912 if (magsize == 0) { 913 *maglistp = NULL; 914 *magcntp = 0; 915 *magmaxp = 0; 916 return (0); 917 } 918 919 /* 920 * There are several places where we need to go buffer hunting: 921 * the per-CPU loaded magazine, the per-CPU spare full magazine, 922 * and the full magazine list in the depot. 923 * 924 * For an upper bound on the number of buffers in the magazine 925 * layer, we have the number of magazines on the cache_full 926 * list plus at most two magazines per CPU (the loaded and the 927 * spare). Toss in 100 magazines as a fudge factor in case this 928 * is live (the number "100" comes from the same fudge factor in 929 * crash(1M)). 930 */ 931 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize; 932 magbsize = offsetof(umem_magazine_t, mag_round[magsize]); 933 934 if (magbsize >= PAGESIZE / 2) { 935 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 936 addr, magbsize); 937 return (-1); 938 } 939 940 maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP); 941 mp = mdb_alloc(magbsize, UM_SLEEP); 942 if (mp == NULL || maglist == NULL) 943 goto fail; 944 945 /* 946 * First up: the magazines in the depot (i.e. on the cache_full list). 947 */ 948 for (ump = cp->cache_full.ml_list; ump != NULL; ) { 949 READMAG_ROUNDS(magsize); 950 ump = mp->mag_next; 951 952 if (ump == cp->cache_full.ml_list) 953 break; /* cache_full list loop detected */ 954 } 955 956 dprintf(("cache_full list done\n")); 957 958 /* 959 * Now whip through the CPUs, snagging the loaded magazines 960 * and full spares. 961 */ 962 for (cpu = 0; cpu < umem_max_ncpus; cpu++) { 963 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 964 965 dprintf(("reading cpu cache %p\n", 966 (uintptr_t)ccp - (uintptr_t)cp + addr)); 967 968 if (ccp->cc_rounds > 0 && 969 (ump = ccp->cc_loaded) != NULL) { 970 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 971 READMAG_ROUNDS(ccp->cc_rounds); 972 } 973 974 if (ccp->cc_prounds > 0 && 975 (ump = ccp->cc_ploaded) != NULL) { 976 dprintf(("reading %d previously loaded rounds\n", 977 ccp->cc_prounds)); 978 READMAG_ROUNDS(ccp->cc_prounds); 979 } 980 } 981 982 dprintf(("magazine layer: %d buffers\n", magcnt)); 983 984 mdb_free(mp, magbsize); 985 986 *maglistp = maglist; 987 *magcntp = magcnt; 988 *magmaxp = magmax; 989 990 return (0); 991 992 fail: 993 if (mp) 994 mdb_free(mp, magbsize); 995 if (maglist) 996 mdb_free(maglist, magmax * sizeof (void *)); 997 998 return (-1); 999 } 1000 1001 typedef struct umem_read_ptc_walk { 1002 void **urpw_buf; 1003 size_t urpw_cnt; 1004 size_t urpw_max; 1005 } umem_read_ptc_walk_t; 1006 1007 /*ARGSUSED*/ 1008 static int 1009 umem_read_ptc_walk_buf(uintptr_t addr, 1010 const void *ignored, umem_read_ptc_walk_t *urpw) 1011 { 1012 if (urpw->urpw_cnt == urpw->urpw_max) { 1013 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1; 1014 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP); 1015 1016 if (nmax > 1) { 1017 size_t osize = urpw->urpw_max * sizeof (void *); 1018 bcopy(urpw->urpw_buf, new, osize); 1019 mdb_free(urpw->urpw_buf, osize); 1020 } 1021 1022 urpw->urpw_buf = new; 1023 urpw->urpw_max = nmax; 1024 } 1025 1026 urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr; 1027 1028 return (WALK_NEXT); 1029 } 1030 1031 static int 1032 umem_read_ptc(umem_cache_t *cp, 1033 void ***buflistp, size_t *bufcntp, size_t *bufmaxp) 1034 { 1035 umem_read_ptc_walk_t urpw; 1036 char walk[60]; 1037 int rval; 1038 1039 if (!(cp->cache_flags & UMF_PTC)) 1040 return (0); 1041 1042 (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d", 1043 cp->cache_bufsize); 1044 1045 urpw.urpw_buf = *buflistp; 1046 urpw.urpw_cnt = *bufcntp; 1047 urpw.urpw_max = *bufmaxp; 1048 1049 if ((rval = mdb_walk(walk, 1050 (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) { 1051 mdb_warn("couldn't walk %s", walk); 1052 } 1053 1054 *buflistp = urpw.urpw_buf; 1055 *bufcntp = urpw.urpw_cnt; 1056 *bufmaxp = urpw.urpw_max; 1057 1058 return (rval); 1059 } 1060 1061 static int 1062 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1063 { 1064 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1065 } 1066 1067 static int 1068 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1069 { 1070 umem_bufctl_audit_t *b; 1071 UMEM_LOCAL_BUFCTL_AUDIT(&b); 1072 1073 /* 1074 * if UMF_AUDIT is not set, we know that we're looking at a 1075 * umem_bufctl_t. 1076 */ 1077 if (!(cp->cache_flags & UMF_AUDIT) || 1078 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) { 1079 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE); 1080 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) { 1081 mdb_warn("unable to read bufctl at %p", buf); 1082 return (WALK_ERR); 1083 } 1084 } 1085 1086 return (wsp->walk_callback(buf, b, wsp->walk_cbdata)); 1087 } 1088 1089 typedef struct umem_walk { 1090 int umw_type; 1091 1092 uintptr_t umw_addr; /* cache address */ 1093 umem_cache_t *umw_cp; 1094 size_t umw_csize; 1095 1096 /* 1097 * magazine layer 1098 */ 1099 void **umw_maglist; 1100 size_t umw_max; 1101 size_t umw_count; 1102 size_t umw_pos; 1103 1104 /* 1105 * slab layer 1106 */ 1107 char *umw_valid; /* to keep track of freed buffers */ 1108 char *umw_ubase; /* buffer for slab data */ 1109 } umem_walk_t; 1110 1111 static int 1112 umem_walk_init_common(mdb_walk_state_t *wsp, int type) 1113 { 1114 umem_walk_t *umw; 1115 int csize; 1116 umem_cache_t *cp; 1117 size_t vm_quantum; 1118 1119 size_t magmax, magcnt; 1120 void **maglist = NULL; 1121 uint_t chunksize, slabsize; 1122 int status = WALK_ERR; 1123 uintptr_t addr = wsp->walk_addr; 1124 const char *layered; 1125 1126 type &= ~UM_HASH; 1127 1128 if (addr == NULL) { 1129 mdb_warn("umem walk doesn't support global walks\n"); 1130 return (WALK_ERR); 1131 } 1132 1133 dprintf(("walking %p\n", addr)); 1134 1135 /* 1136 * The number of "cpus" determines how large the cache is. 1137 */ 1138 csize = UMEM_CACHE_SIZE(umem_max_ncpus); 1139 cp = mdb_alloc(csize, UM_SLEEP); 1140 1141 if (mdb_vread(cp, csize, addr) == -1) { 1142 mdb_warn("couldn't read cache at addr %p", addr); 1143 goto out2; 1144 } 1145 1146 /* 1147 * It's easy for someone to hand us an invalid cache address. 1148 * Unfortunately, it is hard for this walker to survive an 1149 * invalid cache cleanly. So we make sure that: 1150 * 1151 * 1. the vmem arena for the cache is readable, 1152 * 2. the vmem arena's quantum is a power of 2, 1153 * 3. our slabsize is a multiple of the quantum, and 1154 * 4. our chunksize is >0 and less than our slabsize. 1155 */ 1156 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1157 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1158 vm_quantum == 0 || 1159 (vm_quantum & (vm_quantum - 1)) != 0 || 1160 cp->cache_slabsize < vm_quantum || 1161 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1162 cp->cache_chunksize == 0 || 1163 cp->cache_chunksize > cp->cache_slabsize) { 1164 mdb_warn("%p is not a valid umem_cache_t\n", addr); 1165 goto out2; 1166 } 1167 1168 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1169 1170 if (cp->cache_buftotal == 0) { 1171 mdb_free(cp, csize); 1172 return (WALK_DONE); 1173 } 1174 1175 /* 1176 * If they ask for bufctls, but it's a small-slab cache, 1177 * there is nothing to report. 1178 */ 1179 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) { 1180 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n", 1181 cp->cache_flags)); 1182 mdb_free(cp, csize); 1183 return (WALK_DONE); 1184 } 1185 1186 /* 1187 * Read in the contents of the magazine layer 1188 */ 1189 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0) 1190 goto out2; 1191 1192 /* 1193 * Read in the contents of the per-thread caches, if any 1194 */ 1195 if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0) 1196 goto out2; 1197 1198 /* 1199 * We have all of the buffers from the magazines and from the 1200 * per-thread cache (if any); if we are walking allocated buffers, 1201 * sort them so we can bsearch them later. 1202 */ 1203 if (type & UM_ALLOCATED) 1204 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1205 1206 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP); 1207 1208 umw->umw_type = type; 1209 umw->umw_addr = addr; 1210 umw->umw_cp = cp; 1211 umw->umw_csize = csize; 1212 umw->umw_maglist = maglist; 1213 umw->umw_max = magmax; 1214 umw->umw_count = magcnt; 1215 umw->umw_pos = 0; 1216 1217 /* 1218 * When walking allocated buffers in a UMF_HASH cache, we walk the 1219 * hash table instead of the slab layer. 1220 */ 1221 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) { 1222 layered = "umem_hash"; 1223 1224 umw->umw_type |= UM_HASH; 1225 } else { 1226 /* 1227 * If we are walking freed buffers, we only need the 1228 * magazine layer plus the partially allocated slabs. 1229 * To walk allocated buffers, we need all of the slabs. 1230 */ 1231 if (type & UM_ALLOCATED) 1232 layered = "umem_slab"; 1233 else 1234 layered = "umem_slab_partial"; 1235 1236 /* 1237 * for small-slab caches, we read in the entire slab. For 1238 * freed buffers, we can just walk the freelist. For 1239 * allocated buffers, we use a 'valid' array to track 1240 * the freed buffers. 1241 */ 1242 if (!(cp->cache_flags & UMF_HASH)) { 1243 chunksize = cp->cache_chunksize; 1244 slabsize = cp->cache_slabsize; 1245 1246 umw->umw_ubase = mdb_alloc(slabsize + 1247 sizeof (umem_bufctl_t), UM_SLEEP); 1248 1249 if (type & UM_ALLOCATED) 1250 umw->umw_valid = 1251 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1252 } 1253 } 1254 1255 status = WALK_NEXT; 1256 1257 if (mdb_layered_walk(layered, wsp) == -1) { 1258 mdb_warn("unable to start layered '%s' walk", layered); 1259 status = WALK_ERR; 1260 } 1261 1262 out1: 1263 if (status == WALK_ERR) { 1264 if (umw->umw_valid) 1265 mdb_free(umw->umw_valid, slabsize / chunksize); 1266 1267 if (umw->umw_ubase) 1268 mdb_free(umw->umw_ubase, slabsize + 1269 sizeof (umem_bufctl_t)); 1270 1271 if (umw->umw_maglist) 1272 mdb_free(umw->umw_maglist, umw->umw_max * 1273 sizeof (uintptr_t)); 1274 1275 mdb_free(umw, sizeof (umem_walk_t)); 1276 wsp->walk_data = NULL; 1277 } 1278 1279 out2: 1280 if (status == WALK_ERR) 1281 mdb_free(cp, csize); 1282 1283 return (status); 1284 } 1285 1286 int 1287 umem_walk_step(mdb_walk_state_t *wsp) 1288 { 1289 umem_walk_t *umw = wsp->walk_data; 1290 int type = umw->umw_type; 1291 umem_cache_t *cp = umw->umw_cp; 1292 1293 void **maglist = umw->umw_maglist; 1294 int magcnt = umw->umw_count; 1295 1296 uintptr_t chunksize, slabsize; 1297 uintptr_t addr; 1298 const umem_slab_t *sp; 1299 const umem_bufctl_t *bcp; 1300 umem_bufctl_t bc; 1301 1302 int chunks; 1303 char *kbase; 1304 void *buf; 1305 int i, ret; 1306 1307 char *valid, *ubase; 1308 1309 /* 1310 * first, handle the 'umem_hash' layered walk case 1311 */ 1312 if (type & UM_HASH) { 1313 /* 1314 * We have a buffer which has been allocated out of the 1315 * global layer. We need to make sure that it's not 1316 * actually sitting in a magazine before we report it as 1317 * an allocated buffer. 1318 */ 1319 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr; 1320 1321 if (magcnt > 0 && 1322 bsearch(&buf, maglist, magcnt, sizeof (void *), 1323 addrcmp) != NULL) 1324 return (WALK_NEXT); 1325 1326 if (type & UM_BUFCTL) 1327 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1328 1329 return (umem_walk_callback(wsp, (uintptr_t)buf)); 1330 } 1331 1332 ret = WALK_NEXT; 1333 1334 addr = umw->umw_addr; 1335 1336 /* 1337 * If we're walking freed buffers, report everything in the 1338 * magazine layer before processing the first slab. 1339 */ 1340 if ((type & UM_FREE) && magcnt != 0) { 1341 umw->umw_count = 0; /* only do this once */ 1342 for (i = 0; i < magcnt; i++) { 1343 buf = maglist[i]; 1344 1345 if (type & UM_BUFCTL) { 1346 uintptr_t out; 1347 1348 if (cp->cache_flags & UMF_BUFTAG) { 1349 umem_buftag_t *btp; 1350 umem_buftag_t tag; 1351 1352 /* LINTED - alignment */ 1353 btp = UMEM_BUFTAG(cp, buf); 1354 if (mdb_vread(&tag, sizeof (tag), 1355 (uintptr_t)btp) == -1) { 1356 mdb_warn("reading buftag for " 1357 "%p at %p", buf, btp); 1358 continue; 1359 } 1360 out = (uintptr_t)tag.bt_bufctl; 1361 } else { 1362 if (umem_hash_lookup(cp, addr, buf, 1363 &out) == -1) 1364 continue; 1365 } 1366 ret = bufctl_walk_callback(cp, wsp, out); 1367 } else { 1368 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1369 } 1370 1371 if (ret != WALK_NEXT) 1372 return (ret); 1373 } 1374 } 1375 1376 /* 1377 * Handle the buffers in the current slab 1378 */ 1379 chunksize = cp->cache_chunksize; 1380 slabsize = cp->cache_slabsize; 1381 1382 sp = wsp->walk_layer; 1383 chunks = sp->slab_chunks; 1384 kbase = sp->slab_base; 1385 1386 dprintf(("kbase is %p\n", kbase)); 1387 1388 if (!(cp->cache_flags & UMF_HASH)) { 1389 valid = umw->umw_valid; 1390 ubase = umw->umw_ubase; 1391 1392 if (mdb_vread(ubase, chunks * chunksize, 1393 (uintptr_t)kbase) == -1) { 1394 mdb_warn("failed to read slab contents at %p", kbase); 1395 return (WALK_ERR); 1396 } 1397 1398 /* 1399 * Set up the valid map as fully allocated -- we'll punch 1400 * out the freelist. 1401 */ 1402 if (type & UM_ALLOCATED) 1403 (void) memset(valid, 1, chunks); 1404 } else { 1405 valid = NULL; 1406 ubase = NULL; 1407 } 1408 1409 /* 1410 * walk the slab's freelist 1411 */ 1412 bcp = sp->slab_head; 1413 1414 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1415 1416 /* 1417 * since we could be in the middle of allocating a buffer, 1418 * our refcnt could be one higher than it aught. So we 1419 * check one further on the freelist than the count allows. 1420 */ 1421 for (i = sp->slab_refcnt; i <= chunks; i++) { 1422 uint_t ndx; 1423 1424 dprintf(("bcp is %p\n", bcp)); 1425 1426 if (bcp == NULL) { 1427 if (i == chunks) 1428 break; 1429 mdb_warn( 1430 "slab %p in cache %p freelist too short by %d\n", 1431 sp, addr, chunks - i); 1432 break; 1433 } 1434 1435 if (cp->cache_flags & UMF_HASH) { 1436 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1437 mdb_warn("failed to read bufctl ptr at %p", 1438 bcp); 1439 break; 1440 } 1441 buf = bc.bc_addr; 1442 } else { 1443 /* 1444 * Otherwise the buffer is (or should be) in the slab 1445 * that we've read in; determine its offset in the 1446 * slab, validate that it's not corrupt, and add to 1447 * our base address to find the umem_bufctl_t. (Note 1448 * that we don't need to add the size of the bufctl 1449 * to our offset calculation because of the slop that's 1450 * allocated for the buffer at ubase.) 1451 */ 1452 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase; 1453 1454 if (offs > chunks * chunksize) { 1455 mdb_warn("found corrupt bufctl ptr %p" 1456 " in slab %p in cache %p\n", bcp, 1457 wsp->walk_addr, addr); 1458 break; 1459 } 1460 1461 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs)); 1462 buf = UMEM_BUF(cp, bcp); 1463 } 1464 1465 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1466 1467 if (ndx > slabsize / cp->cache_bufsize) { 1468 /* 1469 * This is very wrong; we have managed to find 1470 * a buffer in the slab which shouldn't 1471 * actually be here. Emit a warning, and 1472 * try to continue. 1473 */ 1474 mdb_warn("buf %p is out of range for " 1475 "slab %p, cache %p\n", buf, sp, addr); 1476 } else if (type & UM_ALLOCATED) { 1477 /* 1478 * we have found a buffer on the slab's freelist; 1479 * clear its entry 1480 */ 1481 valid[ndx] = 0; 1482 } else { 1483 /* 1484 * Report this freed buffer 1485 */ 1486 if (type & UM_BUFCTL) { 1487 ret = bufctl_walk_callback(cp, wsp, 1488 (uintptr_t)bcp); 1489 } else { 1490 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1491 } 1492 if (ret != WALK_NEXT) 1493 return (ret); 1494 } 1495 1496 bcp = bc.bc_next; 1497 } 1498 1499 if (bcp != NULL) { 1500 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1501 sp, addr, bcp)); 1502 } 1503 1504 /* 1505 * If we are walking freed buffers, the loop above handled reporting 1506 * them. 1507 */ 1508 if (type & UM_FREE) 1509 return (WALK_NEXT); 1510 1511 if (type & UM_BUFCTL) { 1512 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for " 1513 "cache %p\n", addr); 1514 return (WALK_ERR); 1515 } 1516 1517 /* 1518 * Report allocated buffers, skipping buffers in the magazine layer. 1519 * We only get this far for small-slab caches. 1520 */ 1521 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1522 buf = (char *)kbase + i * chunksize; 1523 1524 if (!valid[i]) 1525 continue; /* on slab freelist */ 1526 1527 if (magcnt > 0 && 1528 bsearch(&buf, maglist, magcnt, sizeof (void *), 1529 addrcmp) != NULL) 1530 continue; /* in magazine layer */ 1531 1532 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1533 } 1534 return (ret); 1535 } 1536 1537 void 1538 umem_walk_fini(mdb_walk_state_t *wsp) 1539 { 1540 umem_walk_t *umw = wsp->walk_data; 1541 uintptr_t chunksize; 1542 uintptr_t slabsize; 1543 1544 if (umw == NULL) 1545 return; 1546 1547 if (umw->umw_maglist != NULL) 1548 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *)); 1549 1550 chunksize = umw->umw_cp->cache_chunksize; 1551 slabsize = umw->umw_cp->cache_slabsize; 1552 1553 if (umw->umw_valid != NULL) 1554 mdb_free(umw->umw_valid, slabsize / chunksize); 1555 if (umw->umw_ubase != NULL) 1556 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t)); 1557 1558 mdb_free(umw->umw_cp, umw->umw_csize); 1559 mdb_free(umw, sizeof (umem_walk_t)); 1560 } 1561 1562 /*ARGSUSED*/ 1563 static int 1564 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp) 1565 { 1566 /* 1567 * Buffers allocated from NOTOUCH caches can also show up as freed 1568 * memory in other caches. This can be a little confusing, so we 1569 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1570 * that "::walk umem" and "::walk freemem" yield disjoint output). 1571 */ 1572 if (c->cache_cflags & UMC_NOTOUCH) 1573 return (WALK_NEXT); 1574 1575 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1576 wsp->walk_cbdata, addr) == -1) 1577 return (WALK_DONE); 1578 1579 return (WALK_NEXT); 1580 } 1581 1582 #define UMEM_WALK_ALL(name, wsp) { \ 1583 wsp->walk_data = (name); \ 1584 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \ 1585 return (WALK_ERR); \ 1586 return (WALK_DONE); \ 1587 } 1588 1589 int 1590 umem_walk_init(mdb_walk_state_t *wsp) 1591 { 1592 if (wsp->walk_arg != NULL) 1593 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1594 1595 if (wsp->walk_addr == NULL) 1596 UMEM_WALK_ALL("umem", wsp); 1597 return (umem_walk_init_common(wsp, UM_ALLOCATED)); 1598 } 1599 1600 int 1601 bufctl_walk_init(mdb_walk_state_t *wsp) 1602 { 1603 if (wsp->walk_addr == NULL) 1604 UMEM_WALK_ALL("bufctl", wsp); 1605 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL)); 1606 } 1607 1608 int 1609 freemem_walk_init(mdb_walk_state_t *wsp) 1610 { 1611 if (wsp->walk_addr == NULL) 1612 UMEM_WALK_ALL("freemem", wsp); 1613 return (umem_walk_init_common(wsp, UM_FREE)); 1614 } 1615 1616 int 1617 freectl_walk_init(mdb_walk_state_t *wsp) 1618 { 1619 if (wsp->walk_addr == NULL) 1620 UMEM_WALK_ALL("freectl", wsp); 1621 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL)); 1622 } 1623 1624 typedef struct bufctl_history_walk { 1625 void *bhw_next; 1626 umem_cache_t *bhw_cache; 1627 umem_slab_t *bhw_slab; 1628 hrtime_t bhw_timestamp; 1629 } bufctl_history_walk_t; 1630 1631 int 1632 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1633 { 1634 bufctl_history_walk_t *bhw; 1635 umem_bufctl_audit_t bc; 1636 umem_bufctl_audit_t bcn; 1637 1638 if (wsp->walk_addr == NULL) { 1639 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1640 return (WALK_ERR); 1641 } 1642 1643 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1644 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1645 return (WALK_ERR); 1646 } 1647 1648 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1649 bhw->bhw_timestamp = 0; 1650 bhw->bhw_cache = bc.bc_cache; 1651 bhw->bhw_slab = bc.bc_slab; 1652 1653 /* 1654 * sometimes the first log entry matches the base bufctl; in that 1655 * case, skip the base bufctl. 1656 */ 1657 if (bc.bc_lastlog != NULL && 1658 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1659 bc.bc_addr == bcn.bc_addr && 1660 bc.bc_cache == bcn.bc_cache && 1661 bc.bc_slab == bcn.bc_slab && 1662 bc.bc_timestamp == bcn.bc_timestamp && 1663 bc.bc_thread == bcn.bc_thread) 1664 bhw->bhw_next = bc.bc_lastlog; 1665 else 1666 bhw->bhw_next = (void *)wsp->walk_addr; 1667 1668 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1669 wsp->walk_data = bhw; 1670 1671 return (WALK_NEXT); 1672 } 1673 1674 int 1675 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1676 { 1677 bufctl_history_walk_t *bhw = wsp->walk_data; 1678 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1679 uintptr_t baseaddr = wsp->walk_addr; 1680 umem_bufctl_audit_t *b; 1681 UMEM_LOCAL_BUFCTL_AUDIT(&b); 1682 1683 if (addr == NULL) 1684 return (WALK_DONE); 1685 1686 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1687 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1688 return (WALK_ERR); 1689 } 1690 1691 /* 1692 * The bufctl is only valid if the address, cache, and slab are 1693 * correct. We also check that the timestamp is decreasing, to 1694 * prevent infinite loops. 1695 */ 1696 if ((uintptr_t)b->bc_addr != baseaddr || 1697 b->bc_cache != bhw->bhw_cache || 1698 b->bc_slab != bhw->bhw_slab || 1699 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp)) 1700 return (WALK_DONE); 1701 1702 bhw->bhw_next = b->bc_lastlog; 1703 bhw->bhw_timestamp = b->bc_timestamp; 1704 1705 return (wsp->walk_callback(addr, b, wsp->walk_cbdata)); 1706 } 1707 1708 void 1709 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1710 { 1711 bufctl_history_walk_t *bhw = wsp->walk_data; 1712 1713 mdb_free(bhw, sizeof (*bhw)); 1714 } 1715 1716 typedef struct umem_log_walk { 1717 umem_bufctl_audit_t *ulw_base; 1718 umem_bufctl_audit_t **ulw_sorted; 1719 umem_log_header_t ulw_lh; 1720 size_t ulw_size; 1721 size_t ulw_maxndx; 1722 size_t ulw_ndx; 1723 } umem_log_walk_t; 1724 1725 int 1726 umem_log_walk_init(mdb_walk_state_t *wsp) 1727 { 1728 uintptr_t lp = wsp->walk_addr; 1729 umem_log_walk_t *ulw; 1730 umem_log_header_t *lhp; 1731 int maxndx, i, j, k; 1732 1733 /* 1734 * By default (global walk), walk the umem_transaction_log. Otherwise 1735 * read the log whose umem_log_header_t is stored at walk_addr. 1736 */ 1737 if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) { 1738 mdb_warn("failed to read 'umem_transaction_log'"); 1739 return (WALK_ERR); 1740 } 1741 1742 if (lp == NULL) { 1743 mdb_warn("log is disabled\n"); 1744 return (WALK_ERR); 1745 } 1746 1747 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP); 1748 lhp = &ulw->ulw_lh; 1749 1750 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) { 1751 mdb_warn("failed to read log header at %p", lp); 1752 mdb_free(ulw, sizeof (umem_log_walk_t)); 1753 return (WALK_ERR); 1754 } 1755 1756 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1757 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP); 1758 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1; 1759 1760 if (mdb_vread(ulw->ulw_base, ulw->ulw_size, 1761 (uintptr_t)lhp->lh_base) == -1) { 1762 mdb_warn("failed to read log at base %p", lhp->lh_base); 1763 mdb_free(ulw->ulw_base, ulw->ulw_size); 1764 mdb_free(ulw, sizeof (umem_log_walk_t)); 1765 return (WALK_ERR); 1766 } 1767 1768 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1769 sizeof (umem_bufctl_audit_t *), UM_SLEEP); 1770 1771 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1772 caddr_t chunk = (caddr_t) 1773 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize); 1774 1775 for (j = 0; j < maxndx; j++) { 1776 /* LINTED align */ 1777 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk; 1778 chunk += UMEM_BUFCTL_AUDIT_SIZE; 1779 } 1780 } 1781 1782 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *), 1783 (int(*)(const void *, const void *))bufctlcmp); 1784 1785 ulw->ulw_maxndx = k; 1786 wsp->walk_data = ulw; 1787 1788 return (WALK_NEXT); 1789 } 1790 1791 int 1792 umem_log_walk_step(mdb_walk_state_t *wsp) 1793 { 1794 umem_log_walk_t *ulw = wsp->walk_data; 1795 umem_bufctl_audit_t *bcp; 1796 1797 if (ulw->ulw_ndx == ulw->ulw_maxndx) 1798 return (WALK_DONE); 1799 1800 bcp = ulw->ulw_sorted[ulw->ulw_ndx++]; 1801 1802 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base + 1803 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata)); 1804 } 1805 1806 void 1807 umem_log_walk_fini(mdb_walk_state_t *wsp) 1808 { 1809 umem_log_walk_t *ulw = wsp->walk_data; 1810 1811 mdb_free(ulw->ulw_base, ulw->ulw_size); 1812 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx * 1813 sizeof (umem_bufctl_audit_t *)); 1814 mdb_free(ulw, sizeof (umem_log_walk_t)); 1815 } 1816 1817 typedef struct allocdby_bufctl { 1818 uintptr_t abb_addr; 1819 hrtime_t abb_ts; 1820 } allocdby_bufctl_t; 1821 1822 typedef struct allocdby_walk { 1823 const char *abw_walk; 1824 uintptr_t abw_thread; 1825 size_t abw_nbufs; 1826 size_t abw_size; 1827 allocdby_bufctl_t *abw_buf; 1828 size_t abw_ndx; 1829 } allocdby_walk_t; 1830 1831 int 1832 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp, 1833 allocdby_walk_t *abw) 1834 { 1835 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1836 return (WALK_NEXT); 1837 1838 if (abw->abw_nbufs == abw->abw_size) { 1839 allocdby_bufctl_t *buf; 1840 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1841 1842 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1843 1844 bcopy(abw->abw_buf, buf, oldsize); 1845 mdb_free(abw->abw_buf, oldsize); 1846 1847 abw->abw_size <<= 1; 1848 abw->abw_buf = buf; 1849 } 1850 1851 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1852 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1853 abw->abw_nbufs++; 1854 1855 return (WALK_NEXT); 1856 } 1857 1858 /*ARGSUSED*/ 1859 int 1860 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw) 1861 { 1862 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1863 abw, addr) == -1) { 1864 mdb_warn("couldn't walk bufctl for cache %p", addr); 1865 return (WALK_DONE); 1866 } 1867 1868 return (WALK_NEXT); 1869 } 1870 1871 static int 1872 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1873 { 1874 if (lhs->abb_ts < rhs->abb_ts) 1875 return (1); 1876 if (lhs->abb_ts > rhs->abb_ts) 1877 return (-1); 1878 return (0); 1879 } 1880 1881 static int 1882 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1883 { 1884 allocdby_walk_t *abw; 1885 1886 if (wsp->walk_addr == NULL) { 1887 mdb_warn("allocdby walk doesn't support global walks\n"); 1888 return (WALK_ERR); 1889 } 1890 1891 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1892 1893 abw->abw_thread = wsp->walk_addr; 1894 abw->abw_walk = walk; 1895 abw->abw_size = 128; /* something reasonable */ 1896 abw->abw_buf = 1897 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1898 1899 wsp->walk_data = abw; 1900 1901 if (mdb_walk("umem_cache", 1902 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1903 mdb_warn("couldn't walk umem_cache"); 1904 allocdby_walk_fini(wsp); 1905 return (WALK_ERR); 1906 } 1907 1908 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1909 (int(*)(const void *, const void *))allocdby_cmp); 1910 1911 return (WALK_NEXT); 1912 } 1913 1914 int 1915 allocdby_walk_init(mdb_walk_state_t *wsp) 1916 { 1917 return (allocdby_walk_init_common(wsp, "bufctl")); 1918 } 1919 1920 int 1921 freedby_walk_init(mdb_walk_state_t *wsp) 1922 { 1923 return (allocdby_walk_init_common(wsp, "freectl")); 1924 } 1925 1926 int 1927 allocdby_walk_step(mdb_walk_state_t *wsp) 1928 { 1929 allocdby_walk_t *abw = wsp->walk_data; 1930 uintptr_t addr; 1931 umem_bufctl_audit_t *bcp; 1932 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 1933 1934 if (abw->abw_ndx == abw->abw_nbufs) 1935 return (WALK_DONE); 1936 1937 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1938 1939 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1940 mdb_warn("couldn't read bufctl at %p", addr); 1941 return (WALK_DONE); 1942 } 1943 1944 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata)); 1945 } 1946 1947 void 1948 allocdby_walk_fini(mdb_walk_state_t *wsp) 1949 { 1950 allocdby_walk_t *abw = wsp->walk_data; 1951 1952 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1953 mdb_free(abw, sizeof (allocdby_walk_t)); 1954 } 1955 1956 /*ARGSUSED*/ 1957 int 1958 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored) 1959 { 1960 char c[MDB_SYM_NAMLEN]; 1961 GElf_Sym sym; 1962 int i; 1963 1964 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 1965 for (i = 0; i < bcp->bc_depth; i++) { 1966 if (mdb_lookup_by_addr(bcp->bc_stack[i], 1967 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 1968 continue; 1969 if (is_umem_sym(c, "umem_")) 1970 continue; 1971 mdb_printf("%s+0x%lx", 1972 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 1973 break; 1974 } 1975 mdb_printf("\n"); 1976 1977 return (WALK_NEXT); 1978 } 1979 1980 static int 1981 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 1982 { 1983 if (!(flags & DCMD_ADDRSPEC)) 1984 return (DCMD_USAGE); 1985 1986 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 1987 1988 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 1989 mdb_warn("can't walk '%s' for %p", w, addr); 1990 return (DCMD_ERR); 1991 } 1992 1993 return (DCMD_OK); 1994 } 1995 1996 /*ARGSUSED*/ 1997 int 1998 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1999 { 2000 return (allocdby_common(addr, flags, "allocdby")); 2001 } 2002 2003 /*ARGSUSED*/ 2004 int 2005 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2006 { 2007 return (allocdby_common(addr, flags, "freedby")); 2008 } 2009 2010 typedef struct whatis_info { 2011 mdb_whatis_t *wi_w; 2012 const umem_cache_t *wi_cache; 2013 const vmem_t *wi_vmem; 2014 vmem_t *wi_msb_arena; 2015 size_t wi_slab_size; 2016 int wi_slab_found; 2017 uint_t wi_freemem; 2018 } whatis_info_t; 2019 2020 /* call one of our dcmd functions with "-v" and the provided address */ 2021 static void 2022 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2023 { 2024 mdb_arg_t a; 2025 a.a_type = MDB_TYPE_STRING; 2026 a.a_un.a_str = "-v"; 2027 2028 mdb_printf(":\n"); 2029 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2030 } 2031 2032 static void 2033 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 2034 uintptr_t baddr) 2035 { 2036 mdb_whatis_t *w = wi->wi_w; 2037 const umem_cache_t *cp = wi->wi_cache; 2038 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 2039 2040 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT)); 2041 2042 mdb_whatis_report_object(w, maddr, addr, ""); 2043 2044 if (baddr != 0 && !call_printer) 2045 mdb_printf("bufctl %p ", baddr); 2046 2047 mdb_printf("%s from %s", 2048 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 2049 2050 if (call_printer && baddr != 0) { 2051 whatis_call_printer(bufctl, baddr); 2052 return; 2053 } 2054 mdb_printf("\n"); 2055 } 2056 2057 /*ARGSUSED*/ 2058 static int 2059 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi) 2060 { 2061 mdb_whatis_t *w = wi->wi_w; 2062 2063 uintptr_t cur; 2064 size_t size = wi->wi_cache->cache_bufsize; 2065 2066 while (mdb_whatis_match(w, addr, size, &cur)) 2067 whatis_print_umem(wi, cur, addr, NULL); 2068 2069 return (WHATIS_WALKRET(w)); 2070 } 2071 2072 /*ARGSUSED*/ 2073 static int 2074 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi) 2075 { 2076 mdb_whatis_t *w = wi->wi_w; 2077 2078 uintptr_t cur; 2079 uintptr_t addr = (uintptr_t)bcp->bc_addr; 2080 size_t size = wi->wi_cache->cache_bufsize; 2081 2082 while (mdb_whatis_match(w, addr, size, &cur)) 2083 whatis_print_umem(wi, cur, addr, baddr); 2084 2085 return (WHATIS_WALKRET(w)); 2086 } 2087 2088 2089 static int 2090 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 2091 { 2092 mdb_whatis_t *w = wi->wi_w; 2093 2094 size_t size = vs->vs_end - vs->vs_start; 2095 uintptr_t cur; 2096 2097 /* We're not interested in anything but alloc and free segments */ 2098 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 2099 return (WALK_NEXT); 2100 2101 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 2102 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 2103 2104 /* 2105 * If we're not printing it seperately, provide the vmem_seg 2106 * pointer if it has a stack trace. 2107 */ 2108 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 2109 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 || 2110 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2111 mdb_printf("vmem_seg %p ", addr); 2112 } 2113 2114 mdb_printf("%s from %s vmem arena", 2115 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 2116 wi->wi_vmem->vm_name); 2117 2118 if (!mdb_whatis_flags(w) & WHATIS_QUIET) 2119 whatis_call_printer(vmem_seg, addr); 2120 else 2121 mdb_printf("\n"); 2122 } 2123 2124 return (WHATIS_WALKRET(w)); 2125 } 2126 2127 static int 2128 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 2129 { 2130 mdb_whatis_t *w = wi->wi_w; 2131 const char *nm = vmem->vm_name; 2132 wi->wi_vmem = vmem; 2133 2134 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2135 mdb_printf("Searching vmem arena %s...\n", nm); 2136 2137 if (mdb_pwalk("vmem_seg", 2138 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 2139 mdb_warn("can't walk vmem seg for %p", addr); 2140 return (WALK_NEXT); 2141 } 2142 2143 return (WHATIS_WALKRET(w)); 2144 } 2145 2146 /*ARGSUSED*/ 2147 static int 2148 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi) 2149 { 2150 mdb_whatis_t *w = wi->wi_w; 2151 2152 /* It must overlap with the slab data, or it's not interesting */ 2153 if (mdb_whatis_overlaps(w, 2154 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 2155 wi->wi_slab_found++; 2156 return (WALK_DONE); 2157 } 2158 return (WALK_NEXT); 2159 } 2160 2161 static int 2162 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2163 { 2164 mdb_whatis_t *w = wi->wi_w; 2165 char *walk, *freewalk; 2166 mdb_walk_cb_t func; 2167 int do_bufctl; 2168 2169 /* Override the '-b' flag as necessary */ 2170 if (!(c->cache_flags & UMF_HASH)) 2171 do_bufctl = FALSE; /* no bufctls to walk */ 2172 else if (c->cache_flags & UMF_AUDIT) 2173 do_bufctl = TRUE; /* we always want debugging info */ 2174 else 2175 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2176 2177 if (do_bufctl) { 2178 walk = "bufctl"; 2179 freewalk = "freectl"; 2180 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2181 } else { 2182 walk = "umem"; 2183 freewalk = "freemem"; 2184 func = (mdb_walk_cb_t)whatis_walk_umem; 2185 } 2186 2187 wi->wi_cache = c; 2188 2189 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2190 mdb_printf("Searching %s...\n", c->cache_name); 2191 2192 /* 2193 * If more then two buffers live on each slab, figure out if we're 2194 * interested in anything in any slab before doing the more expensive 2195 * umem/freemem (bufctl/freectl) walkers. 2196 */ 2197 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2198 if (!(c->cache_flags & UMF_HASH)) 2199 wi->wi_slab_size -= sizeof (umem_slab_t); 2200 2201 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2202 wi->wi_slab_found = 0; 2203 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2204 addr) == -1) { 2205 mdb_warn("can't find umem_slab walker"); 2206 return (WALK_DONE); 2207 } 2208 if (wi->wi_slab_found == 0) 2209 return (WALK_NEXT); 2210 } 2211 2212 wi->wi_freemem = FALSE; 2213 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2214 mdb_warn("can't find %s walker", walk); 2215 return (WALK_DONE); 2216 } 2217 2218 if (mdb_whatis_done(w)) 2219 return (WALK_DONE); 2220 2221 /* 2222 * We have searched for allocated memory; now search for freed memory. 2223 */ 2224 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2225 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2226 2227 wi->wi_freemem = TRUE; 2228 2229 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2230 mdb_warn("can't find %s walker", freewalk); 2231 return (WALK_DONE); 2232 } 2233 2234 return (WHATIS_WALKRET(w)); 2235 } 2236 2237 static int 2238 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2239 { 2240 if (c->cache_arena == wi->wi_msb_arena || 2241 (c->cache_cflags & UMC_NOTOUCH)) 2242 return (WALK_NEXT); 2243 2244 return (whatis_walk_cache(addr, c, wi)); 2245 } 2246 2247 static int 2248 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2249 { 2250 if (c->cache_arena != wi->wi_msb_arena) 2251 return (WALK_NEXT); 2252 2253 return (whatis_walk_cache(addr, c, wi)); 2254 } 2255 2256 static int 2257 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2258 { 2259 if (c->cache_arena == wi->wi_msb_arena || 2260 !(c->cache_cflags & UMC_NOTOUCH)) 2261 return (WALK_NEXT); 2262 2263 return (whatis_walk_cache(addr, c, wi)); 2264 } 2265 2266 /*ARGSUSED*/ 2267 static int 2268 whatis_run_umem(mdb_whatis_t *w, void *ignored) 2269 { 2270 whatis_info_t wi; 2271 2272 bzero(&wi, sizeof (wi)); 2273 wi.wi_w = w; 2274 2275 /* umem's metadata is allocated from the umem_internal_arena */ 2276 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1) 2277 mdb_warn("unable to readvar \"umem_internal_arena\""); 2278 2279 /* 2280 * We process umem caches in the following order: 2281 * 2282 * non-UMC_NOTOUCH, non-metadata (typically the most interesting) 2283 * metadata (can be huge with UMF_AUDIT) 2284 * UMC_NOTOUCH, non-metadata (see umem_walk_all()) 2285 */ 2286 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2287 &wi) == -1 || 2288 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2289 &wi) == -1 || 2290 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2291 &wi) == -1) { 2292 mdb_warn("couldn't find umem_cache walker"); 2293 return (1); 2294 } 2295 return (0); 2296 } 2297 2298 /*ARGSUSED*/ 2299 static int 2300 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2301 { 2302 whatis_info_t wi; 2303 2304 bzero(&wi, sizeof (wi)); 2305 wi.wi_w = w; 2306 2307 if (mdb_walk("vmem_postfix", 2308 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2309 mdb_warn("couldn't find vmem_postfix walker"); 2310 return (1); 2311 } 2312 return (0); 2313 } 2314 2315 int 2316 umem_init(void) 2317 { 2318 mdb_walker_t w = { 2319 "umem_cache", "walk list of umem caches", umem_cache_walk_init, 2320 umem_cache_walk_step, umem_cache_walk_fini 2321 }; 2322 2323 if (mdb_add_walker(&w) == -1) { 2324 mdb_warn("failed to add umem_cache walker"); 2325 return (-1); 2326 } 2327 2328 if (umem_update_variables() == -1) 2329 return (-1); 2330 2331 /* install a callback so that our variables are always up-to-date */ 2332 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL); 2333 umem_statechange_cb(NULL); 2334 2335 /* 2336 * Register our ::whatis callbacks. 2337 */ 2338 mdb_whatis_register("umem", whatis_run_umem, NULL, 2339 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); 2340 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 2341 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); 2342 2343 return (0); 2344 } 2345 2346 typedef struct umem_log_cpu { 2347 uintptr_t umc_low; 2348 uintptr_t umc_high; 2349 } umem_log_cpu_t; 2350 2351 int 2352 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc) 2353 { 2354 int i; 2355 2356 for (i = 0; i < umem_max_ncpus; i++) { 2357 if (addr >= umc[i].umc_low && addr < umc[i].umc_high) 2358 break; 2359 } 2360 2361 if (i == umem_max_ncpus) 2362 mdb_printf(" "); 2363 else 2364 mdb_printf("%3d", i); 2365 2366 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2367 b->bc_timestamp, b->bc_thread); 2368 2369 return (WALK_NEXT); 2370 } 2371 2372 /*ARGSUSED*/ 2373 int 2374 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2375 { 2376 umem_log_header_t lh; 2377 umem_cpu_log_header_t clh; 2378 uintptr_t lhp, clhp; 2379 umem_log_cpu_t *umc; 2380 int i; 2381 2382 if (umem_readvar(&lhp, "umem_transaction_log") == -1) { 2383 mdb_warn("failed to read 'umem_transaction_log'"); 2384 return (DCMD_ERR); 2385 } 2386 2387 if (lhp == NULL) { 2388 mdb_warn("no umem transaction log\n"); 2389 return (DCMD_ERR); 2390 } 2391 2392 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) { 2393 mdb_warn("failed to read log header at %p", lhp); 2394 return (DCMD_ERR); 2395 } 2396 2397 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2398 2399 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus, 2400 UM_SLEEP | UM_GC); 2401 2402 for (i = 0; i < umem_max_ncpus; i++) { 2403 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2404 mdb_warn("cannot read cpu %d's log header at %p", 2405 i, clhp); 2406 return (DCMD_ERR); 2407 } 2408 2409 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize + 2410 (uintptr_t)lh.lh_base; 2411 umc[i].umc_high = (uintptr_t)clh.clh_current; 2412 2413 clhp += sizeof (umem_cpu_log_header_t); 2414 } 2415 2416 if (DCMD_HDRSPEC(flags)) { 2417 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", 2418 "BUFADDR", "TIMESTAMP", "THREAD"); 2419 } 2420 2421 /* 2422 * If we have been passed an address, we'll just print out that 2423 * log entry. 2424 */ 2425 if (flags & DCMD_ADDRSPEC) { 2426 umem_bufctl_audit_t *bp; 2427 UMEM_LOCAL_BUFCTL_AUDIT(&bp); 2428 2429 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2430 mdb_warn("failed to read bufctl at %p", addr); 2431 return (DCMD_ERR); 2432 } 2433 2434 (void) umem_log_walk(addr, bp, umc); 2435 2436 return (DCMD_OK); 2437 } 2438 2439 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) { 2440 mdb_warn("can't find umem log walker"); 2441 return (DCMD_ERR); 2442 } 2443 2444 return (DCMD_OK); 2445 } 2446 2447 typedef struct bufctl_history_cb { 2448 int bhc_flags; 2449 int bhc_argc; 2450 const mdb_arg_t *bhc_argv; 2451 int bhc_ret; 2452 } bufctl_history_cb_t; 2453 2454 /*ARGSUSED*/ 2455 static int 2456 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2457 { 2458 bufctl_history_cb_t *bhc = arg; 2459 2460 bhc->bhc_ret = 2461 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2462 2463 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2464 2465 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2466 } 2467 2468 void 2469 bufctl_help(void) 2470 { 2471 mdb_printf("%s\n", 2472 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n"); 2473 mdb_dec_indent(2); 2474 mdb_printf("%<b>OPTIONS%</b>\n"); 2475 mdb_inc_indent(2); 2476 mdb_printf("%s", 2477 " -v Display the full content of the bufctl, including its stack trace\n" 2478 " -h retrieve the bufctl's transaction history, if available\n" 2479 " -a addr\n" 2480 " filter out bufctls not involving the buffer at addr\n" 2481 " -c caller\n" 2482 " filter out bufctls without the function/PC in their stack trace\n" 2483 " -e earliest\n" 2484 " filter out bufctls timestamped before earliest\n" 2485 " -l latest\n" 2486 " filter out bufctls timestamped after latest\n" 2487 " -t thread\n" 2488 " filter out bufctls not involving thread\n"); 2489 } 2490 2491 int 2492 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2493 { 2494 uint_t verbose = FALSE; 2495 uint_t history = FALSE; 2496 uint_t in_history = FALSE; 2497 uintptr_t caller = NULL, thread = NULL; 2498 uintptr_t laddr, haddr, baddr = NULL; 2499 hrtime_t earliest = 0, latest = 0; 2500 int i, depth; 2501 char c[MDB_SYM_NAMLEN]; 2502 GElf_Sym sym; 2503 umem_bufctl_audit_t *bcp; 2504 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 2505 2506 if (mdb_getopts(argc, argv, 2507 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2508 'h', MDB_OPT_SETBITS, TRUE, &history, 2509 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2510 'c', MDB_OPT_UINTPTR, &caller, 2511 't', MDB_OPT_UINTPTR, &thread, 2512 'e', MDB_OPT_UINT64, &earliest, 2513 'l', MDB_OPT_UINT64, &latest, 2514 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2515 return (DCMD_USAGE); 2516 2517 if (!(flags & DCMD_ADDRSPEC)) 2518 return (DCMD_USAGE); 2519 2520 if (in_history && !history) 2521 return (DCMD_USAGE); 2522 2523 if (history && !in_history) { 2524 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2525 UM_SLEEP | UM_GC); 2526 bufctl_history_cb_t bhc; 2527 2528 nargv[0].a_type = MDB_TYPE_STRING; 2529 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2530 2531 for (i = 0; i < argc; i++) 2532 nargv[i + 1] = argv[i]; 2533 2534 /* 2535 * When in history mode, we treat each element as if it 2536 * were in a seperate loop, so that the headers group 2537 * bufctls with similar histories. 2538 */ 2539 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2540 bhc.bhc_argc = argc + 1; 2541 bhc.bhc_argv = nargv; 2542 bhc.bhc_ret = DCMD_OK; 2543 2544 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2545 addr) == -1) { 2546 mdb_warn("unable to walk bufctl_history"); 2547 return (DCMD_ERR); 2548 } 2549 2550 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2551 mdb_printf("\n"); 2552 2553 return (bhc.bhc_ret); 2554 } 2555 2556 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2557 if (verbose) { 2558 mdb_printf("%16s %16s %16s %16s\n" 2559 "%<u>%16s %16s %16s %16s%</u>\n", 2560 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2561 "", "CACHE", "LASTLOG", "CONTENTS"); 2562 } else { 2563 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n", 2564 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER"); 2565 } 2566 } 2567 2568 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2569 mdb_warn("couldn't read bufctl at %p", addr); 2570 return (DCMD_ERR); 2571 } 2572 2573 /* 2574 * Guard against bogus bc_depth in case the bufctl is corrupt or 2575 * the address does not really refer to a bufctl. 2576 */ 2577 depth = MIN(bcp->bc_depth, umem_stack_depth); 2578 2579 if (caller != NULL) { 2580 laddr = caller; 2581 haddr = caller + sizeof (caller); 2582 2583 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2584 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2585 /* 2586 * We were provided an exact symbol value; any 2587 * address in the function is valid. 2588 */ 2589 laddr = (uintptr_t)sym.st_value; 2590 haddr = (uintptr_t)sym.st_value + sym.st_size; 2591 } 2592 2593 for (i = 0; i < depth; i++) 2594 if (bcp->bc_stack[i] >= laddr && 2595 bcp->bc_stack[i] < haddr) 2596 break; 2597 2598 if (i == depth) 2599 return (DCMD_OK); 2600 } 2601 2602 if (thread != NULL && (uintptr_t)bcp->bc_thread != thread) 2603 return (DCMD_OK); 2604 2605 if (earliest != 0 && bcp->bc_timestamp < earliest) 2606 return (DCMD_OK); 2607 2608 if (latest != 0 && bcp->bc_timestamp > latest) 2609 return (DCMD_OK); 2610 2611 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr) 2612 return (DCMD_OK); 2613 2614 if (flags & DCMD_PIPE_OUT) { 2615 mdb_printf("%#r\n", addr); 2616 return (DCMD_OK); 2617 } 2618 2619 if (verbose) { 2620 mdb_printf( 2621 "%<b>%16p%</b> %16p %16llx %16d\n" 2622 "%16s %16p %16p %16p\n", 2623 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread, 2624 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents); 2625 2626 mdb_inc_indent(17); 2627 for (i = 0; i < depth; i++) 2628 mdb_printf("%a\n", bcp->bc_stack[i]); 2629 mdb_dec_indent(17); 2630 mdb_printf("\n"); 2631 } else { 2632 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr, 2633 bcp->bc_timestamp, bcp->bc_thread); 2634 2635 for (i = 0; i < depth; i++) { 2636 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2637 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2638 continue; 2639 if (is_umem_sym(c, "umem_")) 2640 continue; 2641 mdb_printf(" %a\n", bcp->bc_stack[i]); 2642 break; 2643 } 2644 2645 if (i >= depth) 2646 mdb_printf("\n"); 2647 } 2648 2649 return (DCMD_OK); 2650 } 2651 2652 /*ARGSUSED*/ 2653 int 2654 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2655 { 2656 mdb_arg_t a; 2657 2658 if (!(flags & DCMD_ADDRSPEC)) 2659 return (DCMD_USAGE); 2660 2661 if (argc != 0) 2662 return (DCMD_USAGE); 2663 2664 a.a_type = MDB_TYPE_STRING; 2665 a.a_un.a_str = "-v"; 2666 2667 return (bufctl(addr, flags, 1, &a)); 2668 } 2669 2670 typedef struct umem_verify { 2671 uint64_t *umv_buf; /* buffer to read cache contents into */ 2672 size_t umv_size; /* number of bytes in umv_buf */ 2673 int umv_corruption; /* > 0 if corruption found. */ 2674 int umv_besilent; /* report actual corruption sites */ 2675 struct umem_cache umv_cache; /* the cache we're operating on */ 2676 } umem_verify_t; 2677 2678 /* 2679 * verify_pattern() 2680 * verify that buf is filled with the pattern pat. 2681 */ 2682 static int64_t 2683 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2684 { 2685 /*LINTED*/ 2686 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 2687 uint64_t *buf; 2688 2689 for (buf = buf_arg; buf < bufend; buf++) 2690 if (*buf != pat) 2691 return ((uintptr_t)buf - (uintptr_t)buf_arg); 2692 return (-1); 2693 } 2694 2695 /* 2696 * verify_buftag() 2697 * verify that btp->bt_bxstat == (bcp ^ pat) 2698 */ 2699 static int 2700 verify_buftag(umem_buftag_t *btp, uintptr_t pat) 2701 { 2702 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 2703 } 2704 2705 /* 2706 * verify_free() 2707 * verify the integrity of a free block of memory by checking 2708 * that it is filled with 0xdeadbeef and that its buftag is sane. 2709 */ 2710 /*ARGSUSED1*/ 2711 static int 2712 verify_free(uintptr_t addr, const void *data, void *private) 2713 { 2714 umem_verify_t *umv = (umem_verify_t *)private; 2715 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2716 int64_t corrupt; /* corruption offset */ 2717 umem_buftag_t *buftagp; /* ptr to buftag */ 2718 umem_cache_t *cp = &umv->umv_cache; 2719 int besilent = umv->umv_besilent; 2720 2721 /*LINTED*/ 2722 buftagp = UMEM_BUFTAG(cp, buf); 2723 2724 /* 2725 * Read the buffer to check. 2726 */ 2727 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2728 if (!besilent) 2729 mdb_warn("couldn't read %p", addr); 2730 return (WALK_NEXT); 2731 } 2732 2733 if ((corrupt = verify_pattern(buf, cp->cache_verify, 2734 UMEM_FREE_PATTERN)) >= 0) { 2735 if (!besilent) 2736 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 2737 addr, (uintptr_t)addr + corrupt); 2738 goto corrupt; 2739 } 2740 2741 if ((cp->cache_flags & UMF_HASH) && 2742 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) { 2743 if (!besilent) 2744 mdb_printf("buffer %p (free) seems to " 2745 "have a corrupt redzone pattern\n", addr); 2746 goto corrupt; 2747 } 2748 2749 /* 2750 * confirm bufctl pointer integrity. 2751 */ 2752 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) { 2753 if (!besilent) 2754 mdb_printf("buffer %p (free) has a corrupt " 2755 "buftag\n", addr); 2756 goto corrupt; 2757 } 2758 2759 return (WALK_NEXT); 2760 corrupt: 2761 umv->umv_corruption++; 2762 return (WALK_NEXT); 2763 } 2764 2765 /* 2766 * verify_alloc() 2767 * Verify that the buftag of an allocated buffer makes sense with respect 2768 * to the buffer. 2769 */ 2770 /*ARGSUSED1*/ 2771 static int 2772 verify_alloc(uintptr_t addr, const void *data, void *private) 2773 { 2774 umem_verify_t *umv = (umem_verify_t *)private; 2775 umem_cache_t *cp = &umv->umv_cache; 2776 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2777 /*LINTED*/ 2778 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf); 2779 uint32_t *ip = (uint32_t *)buftagp; 2780 uint8_t *bp = (uint8_t *)buf; 2781 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 2782 int besilent = umv->umv_besilent; 2783 2784 /* 2785 * Read the buffer to check. 2786 */ 2787 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2788 if (!besilent) 2789 mdb_warn("couldn't read %p", addr); 2790 return (WALK_NEXT); 2791 } 2792 2793 /* 2794 * There are two cases to handle: 2795 * 1. If the buf was alloc'd using umem_cache_alloc, it will have 2796 * 0xfeedfacefeedface at the end of it 2797 * 2. If the buf was alloc'd using umem_alloc, it will have 2798 * 0xbb just past the end of the region in use. At the buftag, 2799 * it will have 0xfeedface (or, if the whole buffer is in use, 2800 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 2801 * endianness), followed by 32 bits containing the offset of the 2802 * 0xbb byte in the buffer. 2803 * 2804 * Finally, the two 32-bit words that comprise the second half of the 2805 * buftag should xor to UMEM_BUFTAG_ALLOC 2806 */ 2807 2808 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN) 2809 looks_ok = 1; 2810 else if (!UMEM_SIZE_VALID(ip[1])) 2811 size_ok = 0; 2812 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE) 2813 looks_ok = 1; 2814 else 2815 size_ok = 0; 2816 2817 if (!size_ok) { 2818 if (!besilent) 2819 mdb_printf("buffer %p (allocated) has a corrupt " 2820 "redzone size encoding\n", addr); 2821 goto corrupt; 2822 } 2823 2824 if (!looks_ok) { 2825 if (!besilent) 2826 mdb_printf("buffer %p (allocated) has a corrupt " 2827 "redzone signature\n", addr); 2828 goto corrupt; 2829 } 2830 2831 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) { 2832 if (!besilent) 2833 mdb_printf("buffer %p (allocated) has a " 2834 "corrupt buftag\n", addr); 2835 goto corrupt; 2836 } 2837 2838 return (WALK_NEXT); 2839 corrupt: 2840 umv->umv_corruption++; 2841 return (WALK_NEXT); 2842 } 2843 2844 /*ARGSUSED2*/ 2845 int 2846 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2847 { 2848 if (flags & DCMD_ADDRSPEC) { 2849 int check_alloc = 0, check_free = 0; 2850 umem_verify_t umv; 2851 2852 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache), 2853 addr) == -1) { 2854 mdb_warn("couldn't read umem_cache %p", addr); 2855 return (DCMD_ERR); 2856 } 2857 2858 umv.umv_size = umv.umv_cache.cache_buftag + 2859 sizeof (umem_buftag_t); 2860 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC); 2861 umv.umv_corruption = 0; 2862 2863 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) { 2864 check_alloc = 1; 2865 if (umv.umv_cache.cache_flags & UMF_DEADBEEF) 2866 check_free = 1; 2867 } else { 2868 if (!(flags & DCMD_LOOP)) { 2869 mdb_warn("cache %p (%s) does not have " 2870 "redzone checking enabled\n", addr, 2871 umv.umv_cache.cache_name); 2872 } 2873 return (DCMD_ERR); 2874 } 2875 2876 if (flags & DCMD_LOOP) { 2877 /* 2878 * table mode, don't print out every corrupt buffer 2879 */ 2880 umv.umv_besilent = 1; 2881 } else { 2882 mdb_printf("Summary for cache '%s'\n", 2883 umv.umv_cache.cache_name); 2884 mdb_inc_indent(2); 2885 umv.umv_besilent = 0; 2886 } 2887 2888 if (check_alloc) 2889 (void) mdb_pwalk("umem", verify_alloc, &umv, addr); 2890 if (check_free) 2891 (void) mdb_pwalk("freemem", verify_free, &umv, addr); 2892 2893 if (flags & DCMD_LOOP) { 2894 if (umv.umv_corruption == 0) { 2895 mdb_printf("%-*s %?p clean\n", 2896 UMEM_CACHE_NAMELEN, 2897 umv.umv_cache.cache_name, addr); 2898 } else { 2899 char *s = ""; /* optional s in "buffer[s]" */ 2900 if (umv.umv_corruption > 1) 2901 s = "s"; 2902 2903 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 2904 UMEM_CACHE_NAMELEN, 2905 umv.umv_cache.cache_name, addr, 2906 umv.umv_corruption, s); 2907 } 2908 } else { 2909 /* 2910 * This is the more verbose mode, when the user has 2911 * type addr::umem_verify. If the cache was clean, 2912 * nothing will have yet been printed. So say something. 2913 */ 2914 if (umv.umv_corruption == 0) 2915 mdb_printf("clean\n"); 2916 2917 mdb_dec_indent(2); 2918 } 2919 } else { 2920 /* 2921 * If the user didn't specify a cache to verify, we'll walk all 2922 * umem_cache's, specifying ourself as a callback for each... 2923 * this is the equivalent of '::walk umem_cache .::umem_verify' 2924 */ 2925 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN, 2926 "Cache Name", "Addr", "Cache Integrity"); 2927 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL)); 2928 } 2929 2930 return (DCMD_OK); 2931 } 2932 2933 typedef struct vmem_node { 2934 struct vmem_node *vn_next; 2935 struct vmem_node *vn_parent; 2936 struct vmem_node *vn_sibling; 2937 struct vmem_node *vn_children; 2938 uintptr_t vn_addr; 2939 int vn_marked; 2940 vmem_t vn_vmem; 2941 } vmem_node_t; 2942 2943 typedef struct vmem_walk { 2944 vmem_node_t *vw_root; 2945 vmem_node_t *vw_current; 2946 } vmem_walk_t; 2947 2948 int 2949 vmem_walk_init(mdb_walk_state_t *wsp) 2950 { 2951 uintptr_t vaddr, paddr; 2952 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 2953 vmem_walk_t *vw; 2954 2955 if (umem_readvar(&vaddr, "vmem_list") == -1) { 2956 mdb_warn("couldn't read 'vmem_list'"); 2957 return (WALK_ERR); 2958 } 2959 2960 while (vaddr != NULL) { 2961 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 2962 vp->vn_addr = vaddr; 2963 vp->vn_next = head; 2964 head = vp; 2965 2966 if (vaddr == wsp->walk_addr) 2967 current = vp; 2968 2969 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 2970 mdb_warn("couldn't read vmem_t at %p", vaddr); 2971 goto err; 2972 } 2973 2974 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 2975 } 2976 2977 for (vp = head; vp != NULL; vp = vp->vn_next) { 2978 2979 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 2980 vp->vn_sibling = root; 2981 root = vp; 2982 continue; 2983 } 2984 2985 for (parent = head; parent != NULL; parent = parent->vn_next) { 2986 if (parent->vn_addr != paddr) 2987 continue; 2988 vp->vn_sibling = parent->vn_children; 2989 parent->vn_children = vp; 2990 vp->vn_parent = parent; 2991 break; 2992 } 2993 2994 if (parent == NULL) { 2995 mdb_warn("couldn't find %p's parent (%p)\n", 2996 vp->vn_addr, paddr); 2997 goto err; 2998 } 2999 } 3000 3001 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3002 vw->vw_root = root; 3003 3004 if (current != NULL) 3005 vw->vw_current = current; 3006 else 3007 vw->vw_current = root; 3008 3009 wsp->walk_data = vw; 3010 return (WALK_NEXT); 3011 err: 3012 for (vp = head; head != NULL; vp = head) { 3013 head = vp->vn_next; 3014 mdb_free(vp, sizeof (vmem_node_t)); 3015 } 3016 3017 return (WALK_ERR); 3018 } 3019 3020 int 3021 vmem_walk_step(mdb_walk_state_t *wsp) 3022 { 3023 vmem_walk_t *vw = wsp->walk_data; 3024 vmem_node_t *vp; 3025 int rval; 3026 3027 if ((vp = vw->vw_current) == NULL) 3028 return (WALK_DONE); 3029 3030 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3031 3032 if (vp->vn_children != NULL) { 3033 vw->vw_current = vp->vn_children; 3034 return (rval); 3035 } 3036 3037 do { 3038 vw->vw_current = vp->vn_sibling; 3039 vp = vp->vn_parent; 3040 } while (vw->vw_current == NULL && vp != NULL); 3041 3042 return (rval); 3043 } 3044 3045 /* 3046 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3047 * children are visited before their parent. We perform the postfix walk 3048 * iteratively (rather than recursively) to allow mdb to regain control 3049 * after each callback. 3050 */ 3051 int 3052 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3053 { 3054 vmem_walk_t *vw = wsp->walk_data; 3055 vmem_node_t *vp = vw->vw_current; 3056 int rval; 3057 3058 /* 3059 * If this node is marked, then we know that we have already visited 3060 * all of its children. If the node has any siblings, they need to 3061 * be visited next; otherwise, we need to visit the parent. Note 3062 * that vp->vn_marked will only be zero on the first invocation of 3063 * the step function. 3064 */ 3065 if (vp->vn_marked) { 3066 if (vp->vn_sibling != NULL) 3067 vp = vp->vn_sibling; 3068 else if (vp->vn_parent != NULL) 3069 vp = vp->vn_parent; 3070 else { 3071 /* 3072 * We have neither a parent, nor a sibling, and we 3073 * have already been visited; we're done. 3074 */ 3075 return (WALK_DONE); 3076 } 3077 } 3078 3079 /* 3080 * Before we visit this node, visit its children. 3081 */ 3082 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3083 vp = vp->vn_children; 3084 3085 vp->vn_marked = 1; 3086 vw->vw_current = vp; 3087 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3088 3089 return (rval); 3090 } 3091 3092 void 3093 vmem_walk_fini(mdb_walk_state_t *wsp) 3094 { 3095 vmem_walk_t *vw = wsp->walk_data; 3096 vmem_node_t *root = vw->vw_root; 3097 int done; 3098 3099 if (root == NULL) 3100 return; 3101 3102 if ((vw->vw_root = root->vn_children) != NULL) 3103 vmem_walk_fini(wsp); 3104 3105 vw->vw_root = root->vn_sibling; 3106 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3107 mdb_free(root, sizeof (vmem_node_t)); 3108 3109 if (done) { 3110 mdb_free(vw, sizeof (vmem_walk_t)); 3111 } else { 3112 vmem_walk_fini(wsp); 3113 } 3114 } 3115 3116 typedef struct vmem_seg_walk { 3117 uint8_t vsw_type; 3118 uintptr_t vsw_start; 3119 uintptr_t vsw_current; 3120 } vmem_seg_walk_t; 3121 3122 /*ARGSUSED*/ 3123 int 3124 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3125 { 3126 vmem_seg_walk_t *vsw; 3127 3128 if (wsp->walk_addr == NULL) { 3129 mdb_warn("vmem_%s does not support global walks\n", name); 3130 return (WALK_ERR); 3131 } 3132 3133 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3134 3135 vsw->vsw_type = type; 3136 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0); 3137 vsw->vsw_current = vsw->vsw_start; 3138 3139 return (WALK_NEXT); 3140 } 3141 3142 /* 3143 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3144 */ 3145 #define VMEM_NONE 0 3146 3147 int 3148 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3149 { 3150 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3151 } 3152 3153 int 3154 vmem_free_walk_init(mdb_walk_state_t *wsp) 3155 { 3156 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3157 } 3158 3159 int 3160 vmem_span_walk_init(mdb_walk_state_t *wsp) 3161 { 3162 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3163 } 3164 3165 int 3166 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3167 { 3168 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3169 } 3170 3171 int 3172 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3173 { 3174 vmem_seg_t seg; 3175 vmem_seg_walk_t *vsw = wsp->walk_data; 3176 uintptr_t addr = vsw->vsw_current; 3177 static size_t seg_size = 0; 3178 int rval; 3179 3180 if (!seg_size) { 3181 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) { 3182 mdb_warn("failed to read 'vmem_seg_size'"); 3183 seg_size = sizeof (vmem_seg_t); 3184 } 3185 } 3186 3187 if (seg_size < sizeof (seg)) 3188 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3189 3190 if (mdb_vread(&seg, seg_size, addr) == -1) { 3191 mdb_warn("couldn't read vmem_seg at %p", addr); 3192 return (WALK_ERR); 3193 } 3194 3195 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3196 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3197 rval = WALK_NEXT; 3198 } else { 3199 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3200 } 3201 3202 if (vsw->vsw_current == vsw->vsw_start) 3203 return (WALK_DONE); 3204 3205 return (rval); 3206 } 3207 3208 void 3209 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3210 { 3211 vmem_seg_walk_t *vsw = wsp->walk_data; 3212 3213 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3214 } 3215 3216 #define VMEM_NAMEWIDTH 22 3217 3218 int 3219 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3220 { 3221 vmem_t v, parent; 3222 uintptr_t paddr; 3223 int ident = 0; 3224 char c[VMEM_NAMEWIDTH]; 3225 3226 if (!(flags & DCMD_ADDRSPEC)) { 3227 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3228 mdb_warn("can't walk vmem"); 3229 return (DCMD_ERR); 3230 } 3231 return (DCMD_OK); 3232 } 3233 3234 if (DCMD_HDRSPEC(flags)) 3235 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3236 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3237 "TOTAL", "SUCCEED", "FAIL"); 3238 3239 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3240 mdb_warn("couldn't read vmem at %p", addr); 3241 return (DCMD_ERR); 3242 } 3243 3244 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3245 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3246 mdb_warn("couldn't trace %p's ancestry", addr); 3247 ident = 0; 3248 break; 3249 } 3250 paddr = (uintptr_t)parent.vm_source; 3251 } 3252 3253 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3254 3255 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3256 addr, VMEM_NAMEWIDTH, c, 3257 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total, 3258 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail); 3259 3260 return (DCMD_OK); 3261 } 3262 3263 void 3264 vmem_seg_help(void) 3265 { 3266 mdb_printf("%s\n", 3267 "Display the contents of vmem_seg_ts, with optional filtering.\n" 3268 "\n" 3269 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3270 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3271 "information.\n"); 3272 mdb_dec_indent(2); 3273 mdb_printf("%<b>OPTIONS%</b>\n"); 3274 mdb_inc_indent(2); 3275 mdb_printf("%s", 3276 " -v Display the full content of the vmem_seg, including its stack trace\n" 3277 " -s report the size of the segment, instead of the end address\n" 3278 " -c caller\n" 3279 " filter out segments without the function/PC in their stack trace\n" 3280 " -e earliest\n" 3281 " filter out segments timestamped before earliest\n" 3282 " -l latest\n" 3283 " filter out segments timestamped after latest\n" 3284 " -m minsize\n" 3285 " filer out segments smaller than minsize\n" 3286 " -M maxsize\n" 3287 " filer out segments larger than maxsize\n" 3288 " -t thread\n" 3289 " filter out segments not involving thread\n" 3290 " -T type\n" 3291 " filter out segments not of type 'type'\n" 3292 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3293 } 3294 3295 3296 /*ARGSUSED*/ 3297 int 3298 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3299 { 3300 vmem_seg_t vs; 3301 uintptr_t *stk = vs.vs_stack; 3302 uintptr_t sz; 3303 uint8_t t; 3304 const char *type = NULL; 3305 GElf_Sym sym; 3306 char c[MDB_SYM_NAMLEN]; 3307 int no_debug; 3308 int i; 3309 int depth; 3310 uintptr_t laddr, haddr; 3311 3312 uintptr_t caller = NULL, thread = NULL; 3313 uintptr_t minsize = 0, maxsize = 0; 3314 3315 hrtime_t earliest = 0, latest = 0; 3316 3317 uint_t size = 0; 3318 uint_t verbose = 0; 3319 3320 if (!(flags & DCMD_ADDRSPEC)) 3321 return (DCMD_USAGE); 3322 3323 if (mdb_getopts(argc, argv, 3324 'c', MDB_OPT_UINTPTR, &caller, 3325 'e', MDB_OPT_UINT64, &earliest, 3326 'l', MDB_OPT_UINT64, &latest, 3327 's', MDB_OPT_SETBITS, TRUE, &size, 3328 'm', MDB_OPT_UINTPTR, &minsize, 3329 'M', MDB_OPT_UINTPTR, &maxsize, 3330 't', MDB_OPT_UINTPTR, &thread, 3331 'T', MDB_OPT_STR, &type, 3332 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3333 NULL) != argc) 3334 return (DCMD_USAGE); 3335 3336 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3337 if (verbose) { 3338 mdb_printf("%16s %4s %16s %16s %16s\n" 3339 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3340 "ADDR", "TYPE", "START", "END", "SIZE", 3341 "", "", "THREAD", "TIMESTAMP", ""); 3342 } else { 3343 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3344 "START", size? "SIZE" : "END", "WHO"); 3345 } 3346 } 3347 3348 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3349 mdb_warn("couldn't read vmem_seg at %p", addr); 3350 return (DCMD_ERR); 3351 } 3352 3353 if (type != NULL) { 3354 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3355 t = VMEM_ALLOC; 3356 else if (strcmp(type, "FREE") == 0) 3357 t = VMEM_FREE; 3358 else if (strcmp(type, "SPAN") == 0) 3359 t = VMEM_SPAN; 3360 else if (strcmp(type, "ROTR") == 0 || 3361 strcmp(type, "ROTOR") == 0) 3362 t = VMEM_ROTOR; 3363 else if (strcmp(type, "WLKR") == 0 || 3364 strcmp(type, "WALKER") == 0) 3365 t = VMEM_WALKER; 3366 else { 3367 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3368 type); 3369 return (DCMD_ERR); 3370 } 3371 3372 if (vs.vs_type != t) 3373 return (DCMD_OK); 3374 } 3375 3376 sz = vs.vs_end - vs.vs_start; 3377 3378 if (minsize != 0 && sz < minsize) 3379 return (DCMD_OK); 3380 3381 if (maxsize != 0 && sz > maxsize) 3382 return (DCMD_OK); 3383 3384 t = vs.vs_type; 3385 depth = vs.vs_depth; 3386 3387 /* 3388 * debug info, when present, is only accurate for VMEM_ALLOC segments 3389 */ 3390 no_debug = (t != VMEM_ALLOC) || 3391 (depth == 0 || depth > VMEM_STACK_DEPTH); 3392 3393 if (no_debug) { 3394 if (caller != NULL || thread != NULL || earliest != 0 || 3395 latest != 0) 3396 return (DCMD_OK); /* not enough info */ 3397 } else { 3398 if (caller != NULL) { 3399 laddr = caller; 3400 haddr = caller + sizeof (caller); 3401 3402 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3403 sizeof (c), &sym) != -1 && 3404 caller == (uintptr_t)sym.st_value) { 3405 /* 3406 * We were provided an exact symbol value; any 3407 * address in the function is valid. 3408 */ 3409 laddr = (uintptr_t)sym.st_value; 3410 haddr = (uintptr_t)sym.st_value + sym.st_size; 3411 } 3412 3413 for (i = 0; i < depth; i++) 3414 if (vs.vs_stack[i] >= laddr && 3415 vs.vs_stack[i] < haddr) 3416 break; 3417 3418 if (i == depth) 3419 return (DCMD_OK); 3420 } 3421 3422 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3423 return (DCMD_OK); 3424 3425 if (earliest != 0 && vs.vs_timestamp < earliest) 3426 return (DCMD_OK); 3427 3428 if (latest != 0 && vs.vs_timestamp > latest) 3429 return (DCMD_OK); 3430 } 3431 3432 type = (t == VMEM_ALLOC ? "ALLC" : 3433 t == VMEM_FREE ? "FREE" : 3434 t == VMEM_SPAN ? "SPAN" : 3435 t == VMEM_ROTOR ? "ROTR" : 3436 t == VMEM_WALKER ? "WLKR" : 3437 "????"); 3438 3439 if (flags & DCMD_PIPE_OUT) { 3440 mdb_printf("%#r\n", addr); 3441 return (DCMD_OK); 3442 } 3443 3444 if (verbose) { 3445 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3446 addr, type, vs.vs_start, vs.vs_end, sz); 3447 3448 if (no_debug) 3449 return (DCMD_OK); 3450 3451 mdb_printf("%16s %4s %16d %16llx\n", 3452 "", "", vs.vs_thread, vs.vs_timestamp); 3453 3454 mdb_inc_indent(17); 3455 for (i = 0; i < depth; i++) { 3456 mdb_printf("%a\n", stk[i]); 3457 } 3458 mdb_dec_indent(17); 3459 mdb_printf("\n"); 3460 } else { 3461 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3462 vs.vs_start, size? sz : vs.vs_end); 3463 3464 if (no_debug) { 3465 mdb_printf("\n"); 3466 return (DCMD_OK); 3467 } 3468 3469 for (i = 0; i < depth; i++) { 3470 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3471 c, sizeof (c), &sym) == -1) 3472 continue; 3473 if (is_umem_sym(c, "vmem_")) 3474 continue; 3475 break; 3476 } 3477 mdb_printf(" %a\n", stk[i]); 3478 } 3479 return (DCMD_OK); 3480 } 3481 3482 /*ARGSUSED*/ 3483 static int 3484 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest) 3485 { 3486 char name[UMEM_CACHE_NAMELEN + 1]; 3487 hrtime_t delta; 3488 int i, depth; 3489 3490 if (bcp->bc_timestamp == 0) 3491 return (WALK_DONE); 3492 3493 if (*newest == 0) 3494 *newest = bcp->bc_timestamp; 3495 3496 delta = *newest - bcp->bc_timestamp; 3497 depth = MIN(bcp->bc_depth, umem_stack_depth); 3498 3499 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3500 &bcp->bc_cache->cache_name) <= 0) 3501 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3502 3503 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3504 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3505 3506 for (i = 0; i < depth; i++) 3507 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3508 3509 return (WALK_NEXT); 3510 } 3511 3512 int 3513 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3514 { 3515 const char *logname = "umem_transaction_log"; 3516 hrtime_t newest = 0; 3517 3518 if ((flags & DCMD_ADDRSPEC) || argc > 1) 3519 return (DCMD_USAGE); 3520 3521 if (argc > 0) { 3522 if (argv->a_type != MDB_TYPE_STRING) 3523 return (DCMD_USAGE); 3524 if (strcmp(argv->a_un.a_str, "fail") == 0) 3525 logname = "umem_failure_log"; 3526 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3527 logname = "umem_slab_log"; 3528 else 3529 return (DCMD_USAGE); 3530 } 3531 3532 if (umem_readvar(&addr, logname) == -1) { 3533 mdb_warn("failed to read %s log header pointer"); 3534 return (DCMD_ERR); 3535 } 3536 3537 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) { 3538 mdb_warn("failed to walk umem log"); 3539 return (DCMD_ERR); 3540 } 3541 3542 return (DCMD_OK); 3543 } 3544 3545 /* 3546 * As the final lure for die-hard crash(1M) users, we provide ::umausers here. 3547 * The first piece is a structure which we use to accumulate umem_cache_t 3548 * addresses of interest. The umc_add is used as a callback for the umem_cache 3549 * walker; we either add all caches, or ones named explicitly as arguments. 3550 */ 3551 3552 typedef struct umclist { 3553 const char *umc_name; /* Name to match (or NULL) */ 3554 uintptr_t *umc_caches; /* List of umem_cache_t addrs */ 3555 int umc_nelems; /* Num entries in umc_caches */ 3556 int umc_size; /* Size of umc_caches array */ 3557 } umclist_t; 3558 3559 static int 3560 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc) 3561 { 3562 void *p; 3563 int s; 3564 3565 if (umc->umc_name == NULL || 3566 strcmp(cp->cache_name, umc->umc_name) == 0) { 3567 /* 3568 * If we have a match, grow our array (if necessary), and then 3569 * add the virtual address of the matching cache to our list. 3570 */ 3571 if (umc->umc_nelems >= umc->umc_size) { 3572 s = umc->umc_size ? umc->umc_size * 2 : 256; 3573 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3574 3575 bcopy(umc->umc_caches, p, 3576 sizeof (uintptr_t) * umc->umc_size); 3577 3578 umc->umc_caches = p; 3579 umc->umc_size = s; 3580 } 3581 3582 umc->umc_caches[umc->umc_nelems++] = addr; 3583 return (umc->umc_name ? WALK_DONE : WALK_NEXT); 3584 } 3585 3586 return (WALK_NEXT); 3587 } 3588 3589 /* 3590 * The second piece of ::umausers is a hash table of allocations. Each 3591 * allocation owner is identified by its stack trace and data_size. We then 3592 * track the total bytes of all such allocations, and the number of allocations 3593 * to report at the end. Once we have a list of caches, we walk through the 3594 * allocated bufctls of each, and update our hash table accordingly. 3595 */ 3596 3597 typedef struct umowner { 3598 struct umowner *umo_head; /* First hash elt in bucket */ 3599 struct umowner *umo_next; /* Next hash elt in chain */ 3600 size_t umo_signature; /* Hash table signature */ 3601 uint_t umo_num; /* Number of allocations */ 3602 size_t umo_data_size; /* Size of each allocation */ 3603 size_t umo_total_size; /* Total bytes of allocation */ 3604 int umo_depth; /* Depth of stack trace */ 3605 uintptr_t *umo_stack; /* Stack trace */ 3606 } umowner_t; 3607 3608 typedef struct umusers { 3609 const umem_cache_t *umu_cache; /* Current umem cache */ 3610 umowner_t *umu_hash; /* Hash table of owners */ 3611 uintptr_t *umu_stacks; /* stacks for owners */ 3612 int umu_nelems; /* Number of entries in use */ 3613 int umu_size; /* Total number of entries */ 3614 } umusers_t; 3615 3616 static void 3617 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp, 3618 size_t size, size_t data_size) 3619 { 3620 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3621 size_t bucket, signature = data_size; 3622 umowner_t *umo, *umoend; 3623 3624 /* 3625 * If the hash table is full, double its size and rehash everything. 3626 */ 3627 if (umu->umu_nelems >= umu->umu_size) { 3628 int s = umu->umu_size ? umu->umu_size * 2 : 1024; 3629 size_t umowner_size = sizeof (umowner_t); 3630 size_t trace_size = umem_stack_depth * sizeof (uintptr_t); 3631 uintptr_t *new_stacks; 3632 3633 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC); 3634 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC); 3635 3636 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size); 3637 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size); 3638 umu->umu_hash = umo; 3639 umu->umu_stacks = new_stacks; 3640 umu->umu_size = s; 3641 3642 umoend = umu->umu_hash + umu->umu_size; 3643 for (umo = umu->umu_hash; umo < umoend; umo++) { 3644 umo->umo_head = NULL; 3645 umo->umo_stack = &umu->umu_stacks[ 3646 umem_stack_depth * (umo - umu->umu_hash)]; 3647 } 3648 3649 umoend = umu->umu_hash + umu->umu_nelems; 3650 for (umo = umu->umu_hash; umo < umoend; umo++) { 3651 bucket = umo->umo_signature & (umu->umu_size - 1); 3652 umo->umo_next = umu->umu_hash[bucket].umo_head; 3653 umu->umu_hash[bucket].umo_head = umo; 3654 } 3655 } 3656 3657 /* 3658 * Finish computing the hash signature from the stack trace, and then 3659 * see if the owner is in the hash table. If so, update our stats. 3660 */ 3661 for (i = 0; i < depth; i++) 3662 signature += bcp->bc_stack[i]; 3663 3664 bucket = signature & (umu->umu_size - 1); 3665 3666 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) { 3667 if (umo->umo_signature == signature) { 3668 size_t difference = 0; 3669 3670 difference |= umo->umo_data_size - data_size; 3671 difference |= umo->umo_depth - depth; 3672 3673 for (i = 0; i < depth; i++) { 3674 difference |= umo->umo_stack[i] - 3675 bcp->bc_stack[i]; 3676 } 3677 3678 if (difference == 0) { 3679 umo->umo_total_size += size; 3680 umo->umo_num++; 3681 return; 3682 } 3683 } 3684 } 3685 3686 /* 3687 * If the owner is not yet hashed, grab the next element and fill it 3688 * in based on the allocation information. 3689 */ 3690 umo = &umu->umu_hash[umu->umu_nelems++]; 3691 umo->umo_next = umu->umu_hash[bucket].umo_head; 3692 umu->umu_hash[bucket].umo_head = umo; 3693 3694 umo->umo_signature = signature; 3695 umo->umo_num = 1; 3696 umo->umo_data_size = data_size; 3697 umo->umo_total_size = size; 3698 umo->umo_depth = depth; 3699 3700 for (i = 0; i < depth; i++) 3701 umo->umo_stack[i] = bcp->bc_stack[i]; 3702 } 3703 3704 /* 3705 * When ::umausers is invoked without the -f flag, we simply update our hash 3706 * table with the information from each allocated bufctl. 3707 */ 3708 /*ARGSUSED*/ 3709 static int 3710 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3711 { 3712 const umem_cache_t *cp = umu->umu_cache; 3713 3714 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3715 return (WALK_NEXT); 3716 } 3717 3718 /* 3719 * When ::umausers is invoked with the -f flag, we print out the information 3720 * for each bufctl as well as updating the hash table. 3721 */ 3722 static int 3723 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3724 { 3725 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3726 const umem_cache_t *cp = umu->umu_cache; 3727 3728 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 3729 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 3730 3731 for (i = 0; i < depth; i++) 3732 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3733 3734 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3735 return (WALK_NEXT); 3736 } 3737 3738 /* 3739 * We sort our results by allocation size before printing them. 3740 */ 3741 static int 3742 umownercmp(const void *lp, const void *rp) 3743 { 3744 const umowner_t *lhs = lp; 3745 const umowner_t *rhs = rp; 3746 3747 return (rhs->umo_total_size - lhs->umo_total_size); 3748 } 3749 3750 /* 3751 * The main engine of ::umausers is relatively straightforward: First we 3752 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we 3753 * iterate over the allocated bufctls of each cache in the list. Finally, 3754 * we sort and print our results. 3755 */ 3756 /*ARGSUSED*/ 3757 int 3758 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3759 { 3760 int mem_threshold = 8192; /* Minimum # bytes for printing */ 3761 int cnt_threshold = 100; /* Minimum # blocks for printing */ 3762 int audited_caches = 0; /* Number of UMF_AUDIT caches found */ 3763 int do_all_caches = 1; /* Do all caches (no arguments) */ 3764 int opt_e = FALSE; /* Include "small" users */ 3765 int opt_f = FALSE; /* Print stack traces */ 3766 3767 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1; 3768 umowner_t *umo, *umoend; 3769 int i, oelems; 3770 3771 umclist_t umc; 3772 umusers_t umu; 3773 3774 if (flags & DCMD_ADDRSPEC) 3775 return (DCMD_USAGE); 3776 3777 bzero(&umc, sizeof (umc)); 3778 bzero(&umu, sizeof (umu)); 3779 3780 while ((i = mdb_getopts(argc, argv, 3781 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 3782 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 3783 3784 argv += i; /* skip past options we just processed */ 3785 argc -= i; /* adjust argc */ 3786 3787 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 3788 return (DCMD_USAGE); 3789 3790 oelems = umc.umc_nelems; 3791 umc.umc_name = argv->a_un.a_str; 3792 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3793 3794 if (umc.umc_nelems == oelems) { 3795 mdb_warn("unknown umem cache: %s\n", umc.umc_name); 3796 return (DCMD_ERR); 3797 } 3798 3799 do_all_caches = 0; 3800 argv++; 3801 argc--; 3802 } 3803 3804 if (opt_e) 3805 mem_threshold = cnt_threshold = 0; 3806 3807 if (opt_f) 3808 callback = (mdb_walk_cb_t)umause2; 3809 3810 if (do_all_caches) { 3811 umc.umc_name = NULL; /* match all cache names */ 3812 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3813 } 3814 3815 for (i = 0; i < umc.umc_nelems; i++) { 3816 uintptr_t cp = umc.umc_caches[i]; 3817 umem_cache_t c; 3818 3819 if (mdb_vread(&c, sizeof (c), cp) == -1) { 3820 mdb_warn("failed to read cache at %p", cp); 3821 continue; 3822 } 3823 3824 if (!(c.cache_flags & UMF_AUDIT)) { 3825 if (!do_all_caches) { 3826 mdb_warn("UMF_AUDIT is not enabled for %s\n", 3827 c.cache_name); 3828 } 3829 continue; 3830 } 3831 3832 umu.umu_cache = &c; 3833 (void) mdb_pwalk("bufctl", callback, &umu, cp); 3834 audited_caches++; 3835 } 3836 3837 if (audited_caches == 0 && do_all_caches) { 3838 mdb_warn("UMF_AUDIT is not enabled for any caches\n"); 3839 return (DCMD_ERR); 3840 } 3841 3842 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp); 3843 umoend = umu.umu_hash + umu.umu_nelems; 3844 3845 for (umo = umu.umu_hash; umo < umoend; umo++) { 3846 if (umo->umo_total_size < mem_threshold && 3847 umo->umo_num < cnt_threshold) 3848 continue; 3849 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 3850 umo->umo_total_size, umo->umo_num, umo->umo_data_size); 3851 for (i = 0; i < umo->umo_depth; i++) 3852 mdb_printf("\t %a\n", umo->umo_stack[i]); 3853 } 3854 3855 return (DCMD_OK); 3856 } 3857 3858 struct malloc_data { 3859 uint32_t malloc_size; 3860 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */ 3861 }; 3862 3863 #ifdef _LP64 3864 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data)) 3865 #else 3866 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data)) 3867 #endif 3868 3869 typedef struct umem_malloc_info { 3870 size_t um_total; /* total allocated buffers */ 3871 size_t um_malloc; /* malloc buffers */ 3872 size_t um_malloc_size; /* sum of malloc buffer sizes */ 3873 size_t um_malloc_overhead; /* sum of in-chunk overheads */ 3874 3875 umem_cache_t *um_cp; 3876 3877 uint_t *um_bucket; 3878 } umem_malloc_info_t; 3879 3880 static void 3881 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc, 3882 size_t maxbuckets, size_t minbucketsize, int geometric) 3883 { 3884 uint64_t um_malloc; 3885 int minb = -1; 3886 int maxb = -1; 3887 int buckets; 3888 int nbucks; 3889 int i; 3890 int b; 3891 const int *distarray; 3892 3893 minb = (int)minmalloc; 3894 maxb = (int)maxmalloc; 3895 3896 nbucks = buckets = maxb - minb + 1; 3897 3898 um_malloc = 0; 3899 for (b = minb; b <= maxb; b++) 3900 um_malloc += um_bucket[b]; 3901 3902 if (maxbuckets != 0) 3903 buckets = MIN(buckets, maxbuckets); 3904 3905 if (minbucketsize > 1) { 3906 buckets = MIN(buckets, nbucks/minbucketsize); 3907 if (buckets == 0) { 3908 buckets = 1; 3909 minbucketsize = nbucks; 3910 } 3911 } 3912 3913 if (geometric) 3914 distarray = dist_geometric(buckets, minb, maxb, minbucketsize); 3915 else 3916 distarray = dist_linear(buckets, minb, maxb); 3917 3918 dist_print_header("malloc size", 11, "count"); 3919 for (i = 0; i < buckets; i++) { 3920 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11); 3921 } 3922 mdb_printf("\n"); 3923 } 3924 3925 /* 3926 * A malloc()ed buffer looks like: 3927 * 3928 * <----------- mi.malloc_size ---> 3929 * <----------- cp.cache_bufsize ------------------> 3930 * <----------- cp.cache_chunksize --------------------------------> 3931 * +-------+-----------------------+---------------+---------------+ 3932 * |/tag///| mallocsz |/round-off/////|/debug info////| 3933 * +-------+---------------------------------------+---------------+ 3934 * <-- usable space ------> 3935 * 3936 * mallocsz is the argument to malloc(3C). 3937 * mi.malloc_size is the actual size passed to umem_alloc(), which 3938 * is rounded up to the smallest available cache size, which is 3939 * cache_bufsize. If there is debugging or alignment overhead in 3940 * the cache, that is reflected in a larger cache_chunksize. 3941 * 3942 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes, 3943 * depending upon the ISA's alignment requirements. For 32-bit allocations, 3944 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes, 3945 * the tag has 8 bytes of padding before it. 3946 * 3947 * 32-byte, 64-byte buffers <= 8 bytes: 3948 * +-------+-------+--------- ... 3949 * |/size//|/stat//| mallocsz ... 3950 * +-------+-------+--------- ... 3951 * ^ 3952 * pointer returned from malloc(3C) 3953 * 3954 * 64-byte buffers > 8 bytes: 3955 * +---------------+-------+-------+--------- ... 3956 * |/padding///////|/size//|/stat//| mallocsz ... 3957 * +---------------+-------+-------+--------- ... 3958 * ^ 3959 * pointer returned from malloc(3C) 3960 * 3961 * The "size" field is "malloc_size", which is mallocsz + the padding. 3962 * The "stat" field is derived from malloc_size, and functions as a 3963 * validation that this buffer is actually from malloc(3C). 3964 */ 3965 /*ARGSUSED*/ 3966 static int 3967 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump) 3968 { 3969 struct malloc_data md; 3970 size_t m_addr = addr; 3971 size_t overhead = sizeof (md); 3972 size_t mallocsz; 3973 3974 ump->um_total++; 3975 3976 #ifdef _LP64 3977 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) { 3978 m_addr += overhead; 3979 overhead += sizeof (md); 3980 } 3981 #endif 3982 3983 if (mdb_vread(&md, sizeof (md), m_addr) == -1) { 3984 mdb_warn("unable to read malloc header at %p", m_addr); 3985 return (WALK_NEXT); 3986 } 3987 3988 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) { 3989 case MALLOC_MAGIC: 3990 #ifdef _LP64 3991 case MALLOC_SECOND_MAGIC: 3992 #endif 3993 mallocsz = md.malloc_size - overhead; 3994 3995 ump->um_malloc++; 3996 ump->um_malloc_size += mallocsz; 3997 ump->um_malloc_overhead += overhead; 3998 3999 /* include round-off and debug overhead */ 4000 ump->um_malloc_overhead += 4001 ump->um_cp->cache_chunksize - md.malloc_size; 4002 4003 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET) 4004 ump->um_bucket[mallocsz]++; 4005 4006 break; 4007 default: 4008 break; 4009 } 4010 4011 return (WALK_NEXT); 4012 } 4013 4014 int 4015 get_umem_alloc_sizes(int **out, size_t *out_num) 4016 { 4017 GElf_Sym sym; 4018 4019 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { 4020 mdb_warn("unable to look up umem_alloc_sizes"); 4021 return (-1); 4022 } 4023 4024 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC); 4025 *out_num = sym.st_size / sizeof (int); 4026 4027 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) { 4028 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value); 4029 *out = NULL; 4030 return (-1); 4031 } 4032 4033 return (0); 4034 } 4035 4036 4037 static int 4038 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump) 4039 { 4040 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) 4041 return (WALK_NEXT); 4042 4043 ump->um_cp = cp; 4044 4045 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) == 4046 -1) { 4047 mdb_warn("can't walk 'umem' for cache %p", addr); 4048 return (WALK_ERR); 4049 } 4050 4051 return (WALK_NEXT); 4052 } 4053 4054 void 4055 umem_malloc_dist_help(void) 4056 { 4057 mdb_printf("%s\n", 4058 "report distribution of outstanding malloc()s"); 4059 mdb_dec_indent(2); 4060 mdb_printf("%<b>OPTIONS%</b>\n"); 4061 mdb_inc_indent(2); 4062 mdb_printf("%s", 4063 " -b maxbins\n" 4064 " Use at most maxbins bins for the data\n" 4065 " -B minbinsize\n" 4066 " Make the bins at least minbinsize bytes apart\n" 4067 " -d dump the raw data out, without binning\n" 4068 " -g use geometric binning instead of linear binning\n"); 4069 } 4070 4071 /*ARGSUSED*/ 4072 int 4073 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4074 { 4075 umem_malloc_info_t mi; 4076 uint_t geometric = 0; 4077 uint_t dump = 0; 4078 size_t maxbuckets = 0; 4079 size_t minbucketsize = 0; 4080 4081 size_t minalloc = 0; 4082 size_t maxalloc = UMI_MAX_BUCKET; 4083 4084 if (flags & DCMD_ADDRSPEC) 4085 return (DCMD_USAGE); 4086 4087 if (mdb_getopts(argc, argv, 4088 'd', MDB_OPT_SETBITS, TRUE, &dump, 4089 'g', MDB_OPT_SETBITS, TRUE, &geometric, 4090 'b', MDB_OPT_UINTPTR, &maxbuckets, 4091 'B', MDB_OPT_UINTPTR, &minbucketsize, 4092 0) != argc) 4093 return (DCMD_USAGE); 4094 4095 bzero(&mi, sizeof (mi)); 4096 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4097 UM_SLEEP | UM_GC); 4098 4099 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb, 4100 &mi) == -1) { 4101 mdb_warn("unable to walk 'umem_cache'"); 4102 return (DCMD_ERR); 4103 } 4104 4105 if (dump) { 4106 int i; 4107 for (i = minalloc; i <= maxalloc; i++) 4108 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]); 4109 4110 return (DCMD_OK); 4111 } 4112 4113 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc, 4114 maxbuckets, minbucketsize, geometric); 4115 4116 return (DCMD_OK); 4117 } 4118 4119 void 4120 umem_malloc_info_help(void) 4121 { 4122 mdb_printf("%s\n", 4123 "report information about malloc()s by cache. "); 4124 mdb_dec_indent(2); 4125 mdb_printf("%<b>OPTIONS%</b>\n"); 4126 mdb_inc_indent(2); 4127 mdb_printf("%s", 4128 " -b maxbins\n" 4129 " Use at most maxbins bins for the data\n" 4130 " -B minbinsize\n" 4131 " Make the bins at least minbinsize bytes apart\n" 4132 " -d dump the raw distribution data without binning\n" 4133 #ifndef _KMDB 4134 " -g use geometric binning instead of linear binning\n" 4135 #endif 4136 ""); 4137 } 4138 int 4139 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4140 { 4141 umem_cache_t c; 4142 umem_malloc_info_t mi; 4143 4144 int skip = 0; 4145 4146 size_t maxmalloc; 4147 size_t overhead; 4148 size_t allocated; 4149 size_t avg_malloc; 4150 size_t overhead_pct; /* 1000 * overhead_percent */ 4151 4152 uint_t verbose = 0; 4153 uint_t dump = 0; 4154 uint_t geometric = 0; 4155 size_t maxbuckets = 0; 4156 size_t minbucketsize = 0; 4157 4158 int *alloc_sizes; 4159 int idx; 4160 size_t num; 4161 size_t minmalloc; 4162 4163 if (mdb_getopts(argc, argv, 4164 'd', MDB_OPT_SETBITS, TRUE, &dump, 4165 'g', MDB_OPT_SETBITS, TRUE, &geometric, 4166 'b', MDB_OPT_UINTPTR, &maxbuckets, 4167 'B', MDB_OPT_UINTPTR, &minbucketsize, 4168 0) != argc) 4169 return (DCMD_USAGE); 4170 4171 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0)) 4172 verbose = 1; 4173 4174 if (!(flags & DCMD_ADDRSPEC)) { 4175 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info", 4176 argc, argv) == -1) { 4177 mdb_warn("can't walk umem_cache"); 4178 return (DCMD_ERR); 4179 } 4180 return (DCMD_OK); 4181 } 4182 4183 if (!mdb_vread(&c, sizeof (c), addr)) { 4184 mdb_warn("unable to read cache at %p", addr); 4185 return (DCMD_ERR); 4186 } 4187 4188 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) { 4189 if (!(flags & DCMD_LOOP)) 4190 mdb_warn("umem_malloc_info: cache \"%s\" is not used " 4191 "by malloc()\n", c.cache_name); 4192 skip = 1; 4193 } 4194 4195 /* 4196 * normally, print the header only the first time. In verbose mode, 4197 * print the header on every non-skipped buffer 4198 */ 4199 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip)) 4200 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n", 4201 "CACHE", "BUFSZ", "MAXMAL", 4202 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER"); 4203 4204 if (skip) 4205 return (DCMD_OK); 4206 4207 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data); 4208 #ifdef _LP64 4209 if (c.cache_bufsize > UMEM_SECOND_ALIGN) 4210 maxmalloc -= sizeof (struct malloc_data); 4211 #endif 4212 4213 bzero(&mi, sizeof (mi)); 4214 mi.um_cp = &c; 4215 if (verbose) 4216 mi.um_bucket = 4217 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4218 UM_SLEEP | UM_GC); 4219 4220 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) == 4221 -1) { 4222 mdb_warn("can't walk 'umem'"); 4223 return (DCMD_ERR); 4224 } 4225 4226 overhead = mi.um_malloc_overhead; 4227 allocated = mi.um_malloc_size; 4228 4229 /* do integer round off for the average */ 4230 if (mi.um_malloc != 0) 4231 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc; 4232 else 4233 avg_malloc = 0; 4234 4235 /* 4236 * include per-slab overhead 4237 * 4238 * Each slab in a given cache is the same size, and has the same 4239 * number of chunks in it; we read in the first slab on the 4240 * slab list to get the number of chunks for all slabs. To 4241 * compute the per-slab overhead, we just subtract the chunk usage 4242 * from the slabsize: 4243 * 4244 * +------------+-------+-------+ ... --+-------+-------+-------+ 4245 * |////////////| | | ... | |///////|///////| 4246 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//| 4247 * |////////////| | | ... | |///////|///////| 4248 * +------------+-------+-------+ ... --+-------+-------+-------+ 4249 * | \_______chunksize * chunks_____/ | 4250 * \__________________________slabsize__________________________/ 4251 * 4252 * For UMF_HASH caches, there is an additional source of overhead; 4253 * the external umem_slab_t and per-chunk bufctl structures. We 4254 * include those in our per-slab overhead. 4255 * 4256 * Once we have a number for the per-slab overhead, we estimate 4257 * the actual overhead by treating the malloc()ed buffers as if 4258 * they were densely packed: 4259 * 4260 * additional overhead = (# mallocs) * (per-slab) / (chunks); 4261 * 4262 * carefully ordering the multiply before the divide, to avoid 4263 * round-off error. 4264 */ 4265 if (mi.um_malloc != 0) { 4266 umem_slab_t slab; 4267 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next; 4268 4269 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) { 4270 mdb_warn("unable to read slab at %p\n", saddr); 4271 } else { 4272 long chunks = slab.slab_chunks; 4273 if (chunks != 0 && c.cache_chunksize != 0 && 4274 chunks <= c.cache_slabsize / c.cache_chunksize) { 4275 uintmax_t perslab = 4276 c.cache_slabsize - 4277 (c.cache_chunksize * chunks); 4278 4279 if (c.cache_flags & UMF_HASH) { 4280 perslab += sizeof (umem_slab_t) + 4281 chunks * 4282 ((c.cache_flags & UMF_AUDIT) ? 4283 sizeof (umem_bufctl_audit_t) : 4284 sizeof (umem_bufctl_t)); 4285 } 4286 overhead += 4287 (perslab * (uintmax_t)mi.um_malloc)/chunks; 4288 } else { 4289 mdb_warn("invalid #chunks (%d) in slab %p\n", 4290 chunks, saddr); 4291 } 4292 } 4293 } 4294 4295 if (allocated != 0) 4296 overhead_pct = (1000ULL * overhead) / allocated; 4297 else 4298 overhead_pct = 0; 4299 4300 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n", 4301 addr, c.cache_bufsize, maxmalloc, 4302 mi.um_malloc, avg_malloc, allocated, overhead, 4303 overhead_pct / 10, overhead_pct % 10); 4304 4305 if (!verbose) 4306 return (DCMD_OK); 4307 4308 if (!dump) 4309 mdb_printf("\n"); 4310 4311 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1) 4312 return (DCMD_ERR); 4313 4314 for (idx = 0; idx < num; idx++) { 4315 if (alloc_sizes[idx] == c.cache_bufsize) 4316 break; 4317 if (alloc_sizes[idx] == 0) { 4318 idx = num; /* 0-terminated array */ 4319 break; 4320 } 4321 } 4322 if (idx == num) { 4323 mdb_warn( 4324 "cache %p's size (%d) not in umem_alloc_sizes\n", 4325 addr, c.cache_bufsize); 4326 return (DCMD_ERR); 4327 } 4328 4329 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1]; 4330 if (minmalloc > 0) { 4331 #ifdef _LP64 4332 if (minmalloc > UMEM_SECOND_ALIGN) 4333 minmalloc -= sizeof (struct malloc_data); 4334 #endif 4335 minmalloc -= sizeof (struct malloc_data); 4336 minmalloc += 1; 4337 } 4338 4339 if (dump) { 4340 for (idx = minmalloc; idx <= maxmalloc; idx++) 4341 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]); 4342 mdb_printf("\n"); 4343 } else { 4344 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc, 4345 maxbuckets, minbucketsize, geometric); 4346 } 4347 4348 return (DCMD_OK); 4349 } 4350