1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include "umem.h" 29 30 #include <sys/vmem_impl_user.h> 31 #include <umem_impl.h> 32 33 #include <alloca.h> 34 #include <limits.h> 35 36 #include "misc.h" 37 #include "leaky.h" 38 39 #include "umem_pagesize.h" 40 41 #define UM_ALLOCATED 0x1 42 #define UM_FREE 0x2 43 #define UM_BUFCTL 0x4 44 #define UM_HASH 0x8 45 46 int umem_ready; 47 48 static int umem_stack_depth_warned; 49 static uint32_t umem_max_ncpus; 50 uint32_t umem_stack_depth; 51 52 size_t umem_pagesize; 53 54 #define UMEM_READVAR(var) \ 55 (umem_readvar(&(var), #var) == -1 && \ 56 (mdb_warn("failed to read "#var), 1)) 57 58 int 59 umem_update_variables(void) 60 { 61 size_t pagesize; 62 63 /* 64 * Figure out which type of umem is being used; if it's not there 65 * yet, succeed quietly. 66 */ 67 if (umem_set_standalone() == -1) { 68 umem_ready = 0; 69 return (0); /* umem not there yet */ 70 } 71 72 /* 73 * Solaris 9 used a different name for umem_max_ncpus. It's 74 * cheap backwards compatibility to check for both names. 75 */ 76 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 && 77 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) { 78 mdb_warn("unable to read umem_max_ncpus or max_ncpus"); 79 return (-1); 80 } 81 if (UMEM_READVAR(umem_ready)) 82 return (-1); 83 if (UMEM_READVAR(umem_stack_depth)) 84 return (-1); 85 if (UMEM_READVAR(pagesize)) 86 return (-1); 87 88 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) { 89 if (umem_stack_depth_warned == 0) { 90 mdb_warn("umem_stack_depth corrupted (%d > %d)\n", 91 umem_stack_depth, UMEM_MAX_STACK_DEPTH); 92 umem_stack_depth_warned = 1; 93 } 94 umem_stack_depth = 0; 95 } 96 97 umem_pagesize = pagesize; 98 99 return (0); 100 } 101 102 /*ARGSUSED*/ 103 static int 104 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored) 105 { 106 mdb_walker_t w; 107 char descr[64]; 108 109 (void) mdb_snprintf(descr, sizeof (descr), 110 "walk the %s cache", c->cache_name); 111 112 w.walk_name = c->cache_name; 113 w.walk_descr = descr; 114 w.walk_init = umem_walk_init; 115 w.walk_step = umem_walk_step; 116 w.walk_fini = umem_walk_fini; 117 w.walk_init_arg = (void *)addr; 118 119 if (mdb_add_walker(&w) == -1) 120 mdb_warn("failed to add %s walker", c->cache_name); 121 122 return (WALK_NEXT); 123 } 124 125 /*ARGSUSED*/ 126 static void 127 umem_statechange_cb(void *arg) 128 { 129 static int been_ready = 0; 130 131 #ifndef _KMDB 132 leaky_cleanup(1); /* state changes invalidate leaky state */ 133 #endif 134 135 if (umem_update_variables() == -1) 136 return; 137 138 if (been_ready) 139 return; 140 141 if (umem_ready != UMEM_READY) 142 return; 143 144 been_ready = 1; 145 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL); 146 } 147 148 int 149 umem_init(void) 150 { 151 mdb_walker_t w = { 152 "umem_cache", "walk list of umem caches", umem_cache_walk_init, 153 umem_cache_walk_step, umem_cache_walk_fini 154 }; 155 156 if (mdb_add_walker(&w) == -1) { 157 mdb_warn("failed to add umem_cache walker"); 158 return (-1); 159 } 160 161 if (umem_update_variables() == -1) 162 return (-1); 163 164 /* install a callback so that our variables are always up-to-date */ 165 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL); 166 umem_statechange_cb(NULL); 167 168 return (0); 169 } 170 171 int 172 umem_abort_messages(void) 173 { 174 char *umem_error_buffer; 175 uint_t umem_error_begin; 176 GElf_Sym sym; 177 size_t bufsize; 178 179 if (UMEM_READVAR(umem_error_begin)) 180 return (DCMD_ERR); 181 182 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) { 183 mdb_warn("unable to look up umem_error_buffer"); 184 return (DCMD_ERR); 185 } 186 187 bufsize = (size_t)sym.st_size; 188 189 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC); 190 191 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value) 192 != bufsize) { 193 mdb_warn("unable to read umem_error_buffer"); 194 return (DCMD_ERR); 195 } 196 /* put a zero after the end of the buffer to simplify printing */ 197 umem_error_buffer[bufsize] = 0; 198 199 if ((umem_error_begin % bufsize) == 0) 200 mdb_printf("%s\n", umem_error_buffer); 201 else { 202 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0; 203 mdb_printf("%s%s\n", 204 &umem_error_buffer[umem_error_begin % bufsize], 205 umem_error_buffer); 206 } 207 208 return (DCMD_OK); 209 } 210 211 static void 212 umem_log_status(const char *name, umem_log_header_t *val) 213 { 214 umem_log_header_t my_lh; 215 uintptr_t pos = (uintptr_t)val; 216 size_t size; 217 218 if (pos == NULL) 219 return; 220 221 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) { 222 mdb_warn("\nunable to read umem_%s_log pointer %p", 223 name, pos); 224 return; 225 } 226 227 size = my_lh.lh_chunksize * my_lh.lh_nchunks; 228 229 if (size % (1024 * 1024) == 0) 230 mdb_printf("%s=%dm ", name, size / (1024 * 1024)); 231 else if (size % 1024 == 0) 232 mdb_printf("%s=%dk ", name, size / 1024); 233 else 234 mdb_printf("%s=%d ", name, size); 235 } 236 237 typedef struct umem_debug_flags { 238 const char *udf_name; 239 uint_t udf_flags; 240 uint_t udf_clear; /* if 0, uses udf_flags */ 241 } umem_debug_flags_t; 242 243 umem_debug_flags_t umem_status_flags[] = { 244 { "random", UMF_RANDOMIZE, UMF_RANDOM }, 245 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS }, 246 { "audit", UMF_AUDIT }, 247 { "guards", UMF_DEADBEEF | UMF_REDZONE }, 248 { "nosignal", UMF_CHECKSIGNAL }, 249 { "firewall", UMF_FIREWALL }, 250 { "lite", UMF_LITE }, 251 { NULL } 252 }; 253 254 /*ARGSUSED*/ 255 int 256 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 257 { 258 int umem_logging; 259 260 umem_log_header_t *umem_transaction_log; 261 umem_log_header_t *umem_content_log; 262 umem_log_header_t *umem_failure_log; 263 umem_log_header_t *umem_slab_log; 264 265 mdb_printf("Status:\t\t%s\n", 266 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" : 267 umem_ready == UMEM_READY_STARTUP ? "uninitialized" : 268 umem_ready == UMEM_READY_INITING ? "initialization in process" : 269 umem_ready == UMEM_READY ? "ready and active" : 270 umem_ready == 0 ? "not loaded into address space" : 271 "unknown (umem_ready invalid)"); 272 273 if (umem_ready == 0) 274 return (DCMD_OK); 275 276 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus); 277 278 if (UMEM_READVAR(umem_logging)) 279 goto err; 280 if (UMEM_READVAR(umem_transaction_log)) 281 goto err; 282 if (UMEM_READVAR(umem_content_log)) 283 goto err; 284 if (UMEM_READVAR(umem_failure_log)) 285 goto err; 286 if (UMEM_READVAR(umem_slab_log)) 287 goto err; 288 289 mdb_printf("Logs:\t\t"); 290 umem_log_status("transaction", umem_transaction_log); 291 umem_log_status("content", umem_content_log); 292 umem_log_status("fail", umem_failure_log); 293 umem_log_status("slab", umem_slab_log); 294 if (!umem_logging) 295 mdb_printf("(inactive)"); 296 mdb_printf("\n"); 297 298 mdb_printf("Message buffer:\n"); 299 return (umem_abort_messages()); 300 301 err: 302 mdb_printf("Message buffer:\n"); 303 (void) umem_abort_messages(); 304 return (DCMD_ERR); 305 } 306 307 typedef struct { 308 uintptr_t ucw_first; 309 uintptr_t ucw_current; 310 } umem_cache_walk_t; 311 312 int 313 umem_cache_walk_init(mdb_walk_state_t *wsp) 314 { 315 umem_cache_walk_t *ucw; 316 umem_cache_t c; 317 uintptr_t cp; 318 GElf_Sym sym; 319 320 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) { 321 mdb_warn("couldn't find umem_null_cache"); 322 return (WALK_ERR); 323 } 324 325 cp = (uintptr_t)sym.st_value; 326 327 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) { 328 mdb_warn("couldn't read cache at %p", cp); 329 return (WALK_ERR); 330 } 331 332 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP); 333 334 ucw->ucw_first = cp; 335 ucw->ucw_current = (uintptr_t)c.cache_next; 336 wsp->walk_data = ucw; 337 338 return (WALK_NEXT); 339 } 340 341 int 342 umem_cache_walk_step(mdb_walk_state_t *wsp) 343 { 344 umem_cache_walk_t *ucw = wsp->walk_data; 345 umem_cache_t c; 346 int status; 347 348 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) { 349 mdb_warn("couldn't read cache at %p", ucw->ucw_current); 350 return (WALK_DONE); 351 } 352 353 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata); 354 355 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first) 356 return (WALK_DONE); 357 358 return (status); 359 } 360 361 void 362 umem_cache_walk_fini(mdb_walk_state_t *wsp) 363 { 364 umem_cache_walk_t *ucw = wsp->walk_data; 365 mdb_free(ucw, sizeof (umem_cache_walk_t)); 366 } 367 368 typedef struct { 369 umem_cpu_t *ucw_cpus; 370 uint32_t ucw_current; 371 uint32_t ucw_max; 372 } umem_cpu_walk_state_t; 373 374 int 375 umem_cpu_walk_init(mdb_walk_state_t *wsp) 376 { 377 umem_cpu_t *umem_cpus; 378 379 umem_cpu_walk_state_t *ucw; 380 381 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) { 382 mdb_warn("failed to read 'umem_cpus'"); 383 return (WALK_ERR); 384 } 385 386 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP); 387 388 ucw->ucw_cpus = umem_cpus; 389 ucw->ucw_current = 0; 390 ucw->ucw_max = umem_max_ncpus; 391 392 wsp->walk_data = ucw; 393 return (WALK_NEXT); 394 } 395 396 int 397 umem_cpu_walk_step(mdb_walk_state_t *wsp) 398 { 399 umem_cpu_t cpu; 400 umem_cpu_walk_state_t *ucw = wsp->walk_data; 401 402 uintptr_t caddr; 403 404 if (ucw->ucw_current >= ucw->ucw_max) 405 return (WALK_DONE); 406 407 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]); 408 409 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) { 410 mdb_warn("failed to read cpu %d", ucw->ucw_current); 411 return (WALK_ERR); 412 } 413 414 ucw->ucw_current++; 415 416 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata)); 417 } 418 419 void 420 umem_cpu_walk_fini(mdb_walk_state_t *wsp) 421 { 422 umem_cpu_walk_state_t *ucw = wsp->walk_data; 423 424 mdb_free(ucw, sizeof (*ucw)); 425 } 426 427 int 428 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 429 { 430 if (wsp->walk_addr == NULL) { 431 mdb_warn("umem_cpu_cache doesn't support global walks"); 432 return (WALK_ERR); 433 } 434 435 if (mdb_layered_walk("umem_cpu", wsp) == -1) { 436 mdb_warn("couldn't walk 'umem_cpu'"); 437 return (WALK_ERR); 438 } 439 440 wsp->walk_data = (void *)wsp->walk_addr; 441 442 return (WALK_NEXT); 443 } 444 445 int 446 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 447 { 448 uintptr_t caddr = (uintptr_t)wsp->walk_data; 449 const umem_cpu_t *cpu = wsp->walk_layer; 450 umem_cpu_cache_t cc; 451 452 caddr += cpu->cpu_cache_offset; 453 454 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) { 455 mdb_warn("couldn't read umem_cpu_cache at %p", caddr); 456 return (WALK_ERR); 457 } 458 459 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 460 } 461 462 int 463 umem_slab_walk_init(mdb_walk_state_t *wsp) 464 { 465 uintptr_t caddr = wsp->walk_addr; 466 umem_cache_t c; 467 468 if (caddr == NULL) { 469 mdb_warn("umem_slab doesn't support global walks\n"); 470 return (WALK_ERR); 471 } 472 473 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 474 mdb_warn("couldn't read umem_cache at %p", caddr); 475 return (WALK_ERR); 476 } 477 478 wsp->walk_data = 479 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 480 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; 481 482 return (WALK_NEXT); 483 } 484 485 int 486 umem_slab_walk_partial_init(mdb_walk_state_t *wsp) 487 { 488 uintptr_t caddr = wsp->walk_addr; 489 umem_cache_t c; 490 491 if (caddr == NULL) { 492 mdb_warn("umem_slab_partial doesn't support global walks\n"); 493 return (WALK_ERR); 494 } 495 496 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 497 mdb_warn("couldn't read umem_cache at %p", caddr); 498 return (WALK_ERR); 499 } 500 501 wsp->walk_data = 502 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 503 wsp->walk_addr = (uintptr_t)c.cache_freelist; 504 505 /* 506 * Some consumers (umem_walk_step(), in particular) require at 507 * least one callback if there are any buffers in the cache. So 508 * if there are *no* partial slabs, report the last full slab, if 509 * any. 510 * 511 * Yes, this is ugly, but it's cleaner than the other possibilities. 512 */ 513 if ((uintptr_t)wsp->walk_data == wsp->walk_addr) 514 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; 515 516 return (WALK_NEXT); 517 } 518 519 int 520 umem_slab_walk_step(mdb_walk_state_t *wsp) 521 { 522 umem_slab_t s; 523 uintptr_t addr = wsp->walk_addr; 524 uintptr_t saddr = (uintptr_t)wsp->walk_data; 525 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab); 526 527 if (addr == saddr) 528 return (WALK_DONE); 529 530 if (mdb_vread(&s, sizeof (s), addr) == -1) { 531 mdb_warn("failed to read slab at %p", wsp->walk_addr); 532 return (WALK_ERR); 533 } 534 535 if ((uintptr_t)s.slab_cache != caddr) { 536 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 537 addr, caddr, s.slab_cache); 538 return (WALK_ERR); 539 } 540 541 wsp->walk_addr = (uintptr_t)s.slab_next; 542 543 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); 544 } 545 546 int 547 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 548 { 549 umem_cache_t c; 550 551 if (!(flags & DCMD_ADDRSPEC)) { 552 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) { 553 mdb_warn("can't walk umem_cache"); 554 return (DCMD_ERR); 555 } 556 return (DCMD_OK); 557 } 558 559 if (DCMD_HDRSPEC(flags)) 560 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME", 561 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 562 563 if (mdb_vread(&c, sizeof (c), addr) == -1) { 564 mdb_warn("couldn't read umem_cache at %p", addr); 565 return (DCMD_ERR); 566 } 567 568 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name, 569 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 570 571 return (DCMD_OK); 572 } 573 574 static int 575 addrcmp(const void *lhs, const void *rhs) 576 { 577 uintptr_t p1 = *((uintptr_t *)lhs); 578 uintptr_t p2 = *((uintptr_t *)rhs); 579 580 if (p1 < p2) 581 return (-1); 582 if (p1 > p2) 583 return (1); 584 return (0); 585 } 586 587 static int 588 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs) 589 { 590 const umem_bufctl_audit_t *bcp1 = *lhs; 591 const umem_bufctl_audit_t *bcp2 = *rhs; 592 593 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 594 return (-1); 595 596 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 597 return (1); 598 599 return (0); 600 } 601 602 typedef struct umem_hash_walk { 603 uintptr_t *umhw_table; 604 size_t umhw_nelems; 605 size_t umhw_pos; 606 umem_bufctl_t umhw_cur; 607 } umem_hash_walk_t; 608 609 int 610 umem_hash_walk_init(mdb_walk_state_t *wsp) 611 { 612 umem_hash_walk_t *umhw; 613 uintptr_t *hash; 614 umem_cache_t c; 615 uintptr_t haddr, addr = wsp->walk_addr; 616 size_t nelems; 617 size_t hsize; 618 619 if (addr == NULL) { 620 mdb_warn("umem_hash doesn't support global walks\n"); 621 return (WALK_ERR); 622 } 623 624 if (mdb_vread(&c, sizeof (c), addr) == -1) { 625 mdb_warn("couldn't read cache at addr %p", addr); 626 return (WALK_ERR); 627 } 628 629 if (!(c.cache_flags & UMF_HASH)) { 630 mdb_warn("cache %p doesn't have a hash table\n", addr); 631 return (WALK_DONE); /* nothing to do */ 632 } 633 634 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP); 635 umhw->umhw_cur.bc_next = NULL; 636 umhw->umhw_pos = 0; 637 638 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1; 639 hsize = nelems * sizeof (uintptr_t); 640 haddr = (uintptr_t)c.cache_hash_table; 641 642 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 643 if (mdb_vread(hash, hsize, haddr) == -1) { 644 mdb_warn("failed to read hash table at %p", haddr); 645 mdb_free(hash, hsize); 646 mdb_free(umhw, sizeof (umem_hash_walk_t)); 647 return (WALK_ERR); 648 } 649 650 wsp->walk_data = umhw; 651 652 return (WALK_NEXT); 653 } 654 655 int 656 umem_hash_walk_step(mdb_walk_state_t *wsp) 657 { 658 umem_hash_walk_t *umhw = wsp->walk_data; 659 uintptr_t addr = NULL; 660 661 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) { 662 while (umhw->umhw_pos < umhw->umhw_nelems) { 663 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL) 664 break; 665 } 666 } 667 if (addr == NULL) 668 return (WALK_DONE); 669 670 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) { 671 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr); 672 return (WALK_ERR); 673 } 674 675 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata)); 676 } 677 678 void 679 umem_hash_walk_fini(mdb_walk_state_t *wsp) 680 { 681 umem_hash_walk_t *umhw = wsp->walk_data; 682 683 if (umhw == NULL) 684 return; 685 686 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t)); 687 mdb_free(umhw, sizeof (umem_hash_walk_t)); 688 } 689 690 /* 691 * Find the address of the bufctl structure for the address 'buf' in cache 692 * 'cp', which is at address caddr, and place it in *out. 693 */ 694 static int 695 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 696 { 697 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf); 698 umem_bufctl_t *bcp; 699 umem_bufctl_t bc; 700 701 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) { 702 mdb_warn("unable to read hash bucket for %p in cache %p", 703 buf, caddr); 704 return (-1); 705 } 706 707 while (bcp != NULL) { 708 if (mdb_vread(&bc, sizeof (umem_bufctl_t), 709 (uintptr_t)bcp) == -1) { 710 mdb_warn("unable to read bufctl at %p", bcp); 711 return (-1); 712 } 713 if (bc.bc_addr == buf) { 714 *out = (uintptr_t)bcp; 715 return (0); 716 } 717 bcp = bc.bc_next; 718 } 719 720 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 721 return (-1); 722 } 723 724 int 725 umem_get_magsize(const umem_cache_t *cp) 726 { 727 uintptr_t addr = (uintptr_t)cp->cache_magtype; 728 GElf_Sym mt_sym; 729 umem_magtype_t mt; 730 int res; 731 732 /* 733 * if cpu 0 has a non-zero magsize, it must be correct. caches 734 * with UMF_NOMAGAZINE have disabled their magazine layers, so 735 * it is okay to return 0 for them. 736 */ 737 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 738 (cp->cache_flags & UMF_NOMAGAZINE)) 739 return (res); 740 741 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) { 742 mdb_warn("unable to read 'umem_magtype'"); 743 } else if (addr < mt_sym.st_value || 744 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 745 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 746 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 747 cp->cache_name, addr); 748 return (0); 749 } 750 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 751 mdb_warn("unable to read magtype at %a", addr); 752 return (0); 753 } 754 return (mt.mt_magsize); 755 } 756 757 /*ARGSUSED*/ 758 static int 759 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est) 760 { 761 *est -= (sp->slab_chunks - sp->slab_refcnt); 762 763 return (WALK_NEXT); 764 } 765 766 /* 767 * Returns an upper bound on the number of allocated buffers in a given 768 * cache. 769 */ 770 size_t 771 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) 772 { 773 int magsize; 774 size_t cache_est; 775 776 cache_est = cp->cache_buftotal; 777 778 (void) mdb_pwalk("umem_slab_partial", 779 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr); 780 781 if ((magsize = umem_get_magsize(cp)) != 0) { 782 size_t mag_est = cp->cache_full.ml_total * magsize; 783 784 if (cache_est >= mag_est) { 785 cache_est -= mag_est; 786 } else { 787 mdb_warn("cache %p's magazine layer holds more buffers " 788 "than the slab layer.\n", addr); 789 } 790 } 791 return (cache_est); 792 } 793 794 #define READMAG_ROUNDS(rounds) { \ 795 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \ 796 mdb_warn("couldn't read magazine at %p", ump); \ 797 goto fail; \ 798 } \ 799 for (i = 0; i < rounds; i++) { \ 800 maglist[magcnt++] = mp->mag_round[i]; \ 801 if (magcnt == magmax) { \ 802 mdb_warn("%d magazines exceeds fudge factor\n", \ 803 magcnt); \ 804 goto fail; \ 805 } \ 806 } \ 807 } 808 809 int 810 umem_read_magazines(umem_cache_t *cp, uintptr_t addr, 811 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 812 { 813 umem_magazine_t *ump, *mp; 814 void **maglist = NULL; 815 int i, cpu; 816 size_t magsize, magmax, magbsize; 817 size_t magcnt = 0; 818 819 /* 820 * Read the magtype out of the cache, after verifying the pointer's 821 * correctness. 822 */ 823 magsize = umem_get_magsize(cp); 824 if (magsize == 0) { 825 *maglistp = NULL; 826 *magcntp = 0; 827 *magmaxp = 0; 828 return (WALK_NEXT); 829 } 830 831 /* 832 * There are several places where we need to go buffer hunting: 833 * the per-CPU loaded magazine, the per-CPU spare full magazine, 834 * and the full magazine list in the depot. 835 * 836 * For an upper bound on the number of buffers in the magazine 837 * layer, we have the number of magazines on the cache_full 838 * list plus at most two magazines per CPU (the loaded and the 839 * spare). Toss in 100 magazines as a fudge factor in case this 840 * is live (the number "100" comes from the same fudge factor in 841 * crash(1M)). 842 */ 843 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize; 844 magbsize = offsetof(umem_magazine_t, mag_round[magsize]); 845 846 if (magbsize >= PAGESIZE / 2) { 847 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 848 addr, magbsize); 849 return (WALK_ERR); 850 } 851 852 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 853 mp = mdb_alloc(magbsize, alloc_flags); 854 if (mp == NULL || maglist == NULL) 855 goto fail; 856 857 /* 858 * First up: the magazines in the depot (i.e. on the cache_full list). 859 */ 860 for (ump = cp->cache_full.ml_list; ump != NULL; ) { 861 READMAG_ROUNDS(magsize); 862 ump = mp->mag_next; 863 864 if (ump == cp->cache_full.ml_list) 865 break; /* cache_full list loop detected */ 866 } 867 868 dprintf(("cache_full list done\n")); 869 870 /* 871 * Now whip through the CPUs, snagging the loaded magazines 872 * and full spares. 873 */ 874 for (cpu = 0; cpu < umem_max_ncpus; cpu++) { 875 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 876 877 dprintf(("reading cpu cache %p\n", 878 (uintptr_t)ccp - (uintptr_t)cp + addr)); 879 880 if (ccp->cc_rounds > 0 && 881 (ump = ccp->cc_loaded) != NULL) { 882 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 883 READMAG_ROUNDS(ccp->cc_rounds); 884 } 885 886 if (ccp->cc_prounds > 0 && 887 (ump = ccp->cc_ploaded) != NULL) { 888 dprintf(("reading %d previously loaded rounds\n", 889 ccp->cc_prounds)); 890 READMAG_ROUNDS(ccp->cc_prounds); 891 } 892 } 893 894 dprintf(("magazine layer: %d buffers\n", magcnt)); 895 896 if (!(alloc_flags & UM_GC)) 897 mdb_free(mp, magbsize); 898 899 *maglistp = maglist; 900 *magcntp = magcnt; 901 *magmaxp = magmax; 902 903 return (WALK_NEXT); 904 905 fail: 906 if (!(alloc_flags & UM_GC)) { 907 if (mp) 908 mdb_free(mp, magbsize); 909 if (maglist) 910 mdb_free(maglist, magmax * sizeof (void *)); 911 } 912 return (WALK_ERR); 913 } 914 915 static int 916 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 917 { 918 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 919 } 920 921 static int 922 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 923 { 924 umem_bufctl_audit_t *b; 925 UMEM_LOCAL_BUFCTL_AUDIT(&b); 926 927 /* 928 * if UMF_AUDIT is not set, we know that we're looking at a 929 * umem_bufctl_t. 930 */ 931 if (!(cp->cache_flags & UMF_AUDIT) || 932 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) { 933 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE); 934 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) { 935 mdb_warn("unable to read bufctl at %p", buf); 936 return (WALK_ERR); 937 } 938 } 939 940 return (wsp->walk_callback(buf, b, wsp->walk_cbdata)); 941 } 942 943 typedef struct umem_walk { 944 int umw_type; 945 946 int umw_addr; /* cache address */ 947 umem_cache_t *umw_cp; 948 size_t umw_csize; 949 950 /* 951 * magazine layer 952 */ 953 void **umw_maglist; 954 size_t umw_max; 955 size_t umw_count; 956 size_t umw_pos; 957 958 /* 959 * slab layer 960 */ 961 char *umw_valid; /* to keep track of freed buffers */ 962 char *umw_ubase; /* buffer for slab data */ 963 } umem_walk_t; 964 965 static int 966 umem_walk_init_common(mdb_walk_state_t *wsp, int type) 967 { 968 umem_walk_t *umw; 969 int csize; 970 umem_cache_t *cp; 971 size_t vm_quantum; 972 973 size_t magmax, magcnt; 974 void **maglist = NULL; 975 uint_t chunksize, slabsize; 976 int status = WALK_ERR; 977 uintptr_t addr = wsp->walk_addr; 978 const char *layered; 979 980 type &= ~UM_HASH; 981 982 if (addr == NULL) { 983 mdb_warn("umem walk doesn't support global walks\n"); 984 return (WALK_ERR); 985 } 986 987 dprintf(("walking %p\n", addr)); 988 989 /* 990 * The number of "cpus" determines how large the cache is. 991 */ 992 csize = UMEM_CACHE_SIZE(umem_max_ncpus); 993 cp = mdb_alloc(csize, UM_SLEEP); 994 995 if (mdb_vread(cp, csize, addr) == -1) { 996 mdb_warn("couldn't read cache at addr %p", addr); 997 goto out2; 998 } 999 1000 /* 1001 * It's easy for someone to hand us an invalid cache address. 1002 * Unfortunately, it is hard for this walker to survive an 1003 * invalid cache cleanly. So we make sure that: 1004 * 1005 * 1. the vmem arena for the cache is readable, 1006 * 2. the vmem arena's quantum is a power of 2, 1007 * 3. our slabsize is a multiple of the quantum, and 1008 * 4. our chunksize is >0 and less than our slabsize. 1009 */ 1010 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1011 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1012 vm_quantum == 0 || 1013 (vm_quantum & (vm_quantum - 1)) != 0 || 1014 cp->cache_slabsize < vm_quantum || 1015 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1016 cp->cache_chunksize == 0 || 1017 cp->cache_chunksize > cp->cache_slabsize) { 1018 mdb_warn("%p is not a valid umem_cache_t\n", addr); 1019 goto out2; 1020 } 1021 1022 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1023 1024 if (cp->cache_buftotal == 0) { 1025 mdb_free(cp, csize); 1026 return (WALK_DONE); 1027 } 1028 1029 /* 1030 * If they ask for bufctls, but it's a small-slab cache, 1031 * there is nothing to report. 1032 */ 1033 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) { 1034 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n", 1035 cp->cache_flags)); 1036 mdb_free(cp, csize); 1037 return (WALK_DONE); 1038 } 1039 1040 /* 1041 * Read in the contents of the magazine layer 1042 */ 1043 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax, 1044 UM_SLEEP) == WALK_ERR) 1045 goto out2; 1046 1047 /* 1048 * We have all of the buffers from the magazines; if we are walking 1049 * allocated buffers, sort them so we can bsearch them later. 1050 */ 1051 if (type & UM_ALLOCATED) 1052 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1053 1054 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP); 1055 1056 umw->umw_type = type; 1057 umw->umw_addr = addr; 1058 umw->umw_cp = cp; 1059 umw->umw_csize = csize; 1060 umw->umw_maglist = maglist; 1061 umw->umw_max = magmax; 1062 umw->umw_count = magcnt; 1063 umw->umw_pos = 0; 1064 1065 /* 1066 * When walking allocated buffers in a UMF_HASH cache, we walk the 1067 * hash table instead of the slab layer. 1068 */ 1069 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) { 1070 layered = "umem_hash"; 1071 1072 umw->umw_type |= UM_HASH; 1073 } else { 1074 /* 1075 * If we are walking freed buffers, we only need the 1076 * magazine layer plus the partially allocated slabs. 1077 * To walk allocated buffers, we need all of the slabs. 1078 */ 1079 if (type & UM_ALLOCATED) 1080 layered = "umem_slab"; 1081 else 1082 layered = "umem_slab_partial"; 1083 1084 /* 1085 * for small-slab caches, we read in the entire slab. For 1086 * freed buffers, we can just walk the freelist. For 1087 * allocated buffers, we use a 'valid' array to track 1088 * the freed buffers. 1089 */ 1090 if (!(cp->cache_flags & UMF_HASH)) { 1091 chunksize = cp->cache_chunksize; 1092 slabsize = cp->cache_slabsize; 1093 1094 umw->umw_ubase = mdb_alloc(slabsize + 1095 sizeof (umem_bufctl_t), UM_SLEEP); 1096 1097 if (type & UM_ALLOCATED) 1098 umw->umw_valid = 1099 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1100 } 1101 } 1102 1103 status = WALK_NEXT; 1104 1105 if (mdb_layered_walk(layered, wsp) == -1) { 1106 mdb_warn("unable to start layered '%s' walk", layered); 1107 status = WALK_ERR; 1108 } 1109 1110 out1: 1111 if (status == WALK_ERR) { 1112 if (umw->umw_valid) 1113 mdb_free(umw->umw_valid, slabsize / chunksize); 1114 1115 if (umw->umw_ubase) 1116 mdb_free(umw->umw_ubase, slabsize + 1117 sizeof (umem_bufctl_t)); 1118 1119 if (umw->umw_maglist) 1120 mdb_free(umw->umw_maglist, umw->umw_max * 1121 sizeof (uintptr_t)); 1122 1123 mdb_free(umw, sizeof (umem_walk_t)); 1124 wsp->walk_data = NULL; 1125 } 1126 1127 out2: 1128 if (status == WALK_ERR) 1129 mdb_free(cp, csize); 1130 1131 return (status); 1132 } 1133 1134 int 1135 umem_walk_step(mdb_walk_state_t *wsp) 1136 { 1137 umem_walk_t *umw = wsp->walk_data; 1138 int type = umw->umw_type; 1139 umem_cache_t *cp = umw->umw_cp; 1140 1141 void **maglist = umw->umw_maglist; 1142 int magcnt = umw->umw_count; 1143 1144 uintptr_t chunksize, slabsize; 1145 uintptr_t addr; 1146 const umem_slab_t *sp; 1147 const umem_bufctl_t *bcp; 1148 umem_bufctl_t bc; 1149 1150 int chunks; 1151 char *kbase; 1152 void *buf; 1153 int i, ret; 1154 1155 char *valid, *ubase; 1156 1157 /* 1158 * first, handle the 'umem_hash' layered walk case 1159 */ 1160 if (type & UM_HASH) { 1161 /* 1162 * We have a buffer which has been allocated out of the 1163 * global layer. We need to make sure that it's not 1164 * actually sitting in a magazine before we report it as 1165 * an allocated buffer. 1166 */ 1167 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr; 1168 1169 if (magcnt > 0 && 1170 bsearch(&buf, maglist, magcnt, sizeof (void *), 1171 addrcmp) != NULL) 1172 return (WALK_NEXT); 1173 1174 if (type & UM_BUFCTL) 1175 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1176 1177 return (umem_walk_callback(wsp, (uintptr_t)buf)); 1178 } 1179 1180 ret = WALK_NEXT; 1181 1182 addr = umw->umw_addr; 1183 1184 /* 1185 * If we're walking freed buffers, report everything in the 1186 * magazine layer before processing the first slab. 1187 */ 1188 if ((type & UM_FREE) && magcnt != 0) { 1189 umw->umw_count = 0; /* only do this once */ 1190 for (i = 0; i < magcnt; i++) { 1191 buf = maglist[i]; 1192 1193 if (type & UM_BUFCTL) { 1194 uintptr_t out; 1195 1196 if (cp->cache_flags & UMF_BUFTAG) { 1197 umem_buftag_t *btp; 1198 umem_buftag_t tag; 1199 1200 /* LINTED - alignment */ 1201 btp = UMEM_BUFTAG(cp, buf); 1202 if (mdb_vread(&tag, sizeof (tag), 1203 (uintptr_t)btp) == -1) { 1204 mdb_warn("reading buftag for " 1205 "%p at %p", buf, btp); 1206 continue; 1207 } 1208 out = (uintptr_t)tag.bt_bufctl; 1209 } else { 1210 if (umem_hash_lookup(cp, addr, buf, 1211 &out) == -1) 1212 continue; 1213 } 1214 ret = bufctl_walk_callback(cp, wsp, out); 1215 } else { 1216 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1217 } 1218 1219 if (ret != WALK_NEXT) 1220 return (ret); 1221 } 1222 } 1223 1224 /* 1225 * Handle the buffers in the current slab 1226 */ 1227 chunksize = cp->cache_chunksize; 1228 slabsize = cp->cache_slabsize; 1229 1230 sp = wsp->walk_layer; 1231 chunks = sp->slab_chunks; 1232 kbase = sp->slab_base; 1233 1234 dprintf(("kbase is %p\n", kbase)); 1235 1236 if (!(cp->cache_flags & UMF_HASH)) { 1237 valid = umw->umw_valid; 1238 ubase = umw->umw_ubase; 1239 1240 if (mdb_vread(ubase, chunks * chunksize, 1241 (uintptr_t)kbase) == -1) { 1242 mdb_warn("failed to read slab contents at %p", kbase); 1243 return (WALK_ERR); 1244 } 1245 1246 /* 1247 * Set up the valid map as fully allocated -- we'll punch 1248 * out the freelist. 1249 */ 1250 if (type & UM_ALLOCATED) 1251 (void) memset(valid, 1, chunks); 1252 } else { 1253 valid = NULL; 1254 ubase = NULL; 1255 } 1256 1257 /* 1258 * walk the slab's freelist 1259 */ 1260 bcp = sp->slab_head; 1261 1262 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1263 1264 /* 1265 * since we could be in the middle of allocating a buffer, 1266 * our refcnt could be one higher than it aught. So we 1267 * check one further on the freelist than the count allows. 1268 */ 1269 for (i = sp->slab_refcnt; i <= chunks; i++) { 1270 uint_t ndx; 1271 1272 dprintf(("bcp is %p\n", bcp)); 1273 1274 if (bcp == NULL) { 1275 if (i == chunks) 1276 break; 1277 mdb_warn( 1278 "slab %p in cache %p freelist too short by %d\n", 1279 sp, addr, chunks - i); 1280 break; 1281 } 1282 1283 if (cp->cache_flags & UMF_HASH) { 1284 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1285 mdb_warn("failed to read bufctl ptr at %p", 1286 bcp); 1287 break; 1288 } 1289 buf = bc.bc_addr; 1290 } else { 1291 /* 1292 * Otherwise the buffer is in the slab which 1293 * we've read in; we just need to determine 1294 * its offset in the slab to find the 1295 * umem_bufctl_t. 1296 */ 1297 bc = *((umem_bufctl_t *) 1298 ((uintptr_t)bcp - (uintptr_t)kbase + 1299 (uintptr_t)ubase)); 1300 1301 buf = UMEM_BUF(cp, bcp); 1302 } 1303 1304 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1305 1306 if (ndx > slabsize / cp->cache_bufsize) { 1307 /* 1308 * This is very wrong; we have managed to find 1309 * a buffer in the slab which shouldn't 1310 * actually be here. Emit a warning, and 1311 * try to continue. 1312 */ 1313 mdb_warn("buf %p is out of range for " 1314 "slab %p, cache %p\n", buf, sp, addr); 1315 } else if (type & UM_ALLOCATED) { 1316 /* 1317 * we have found a buffer on the slab's freelist; 1318 * clear its entry 1319 */ 1320 valid[ndx] = 0; 1321 } else { 1322 /* 1323 * Report this freed buffer 1324 */ 1325 if (type & UM_BUFCTL) { 1326 ret = bufctl_walk_callback(cp, wsp, 1327 (uintptr_t)bcp); 1328 } else { 1329 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1330 } 1331 if (ret != WALK_NEXT) 1332 return (ret); 1333 } 1334 1335 bcp = bc.bc_next; 1336 } 1337 1338 if (bcp != NULL) { 1339 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1340 sp, addr, bcp)); 1341 } 1342 1343 /* 1344 * If we are walking freed buffers, the loop above handled reporting 1345 * them. 1346 */ 1347 if (type & UM_FREE) 1348 return (WALK_NEXT); 1349 1350 if (type & UM_BUFCTL) { 1351 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for " 1352 "cache %p\n", addr); 1353 return (WALK_ERR); 1354 } 1355 1356 /* 1357 * Report allocated buffers, skipping buffers in the magazine layer. 1358 * We only get this far for small-slab caches. 1359 */ 1360 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1361 buf = (char *)kbase + i * chunksize; 1362 1363 if (!valid[i]) 1364 continue; /* on slab freelist */ 1365 1366 if (magcnt > 0 && 1367 bsearch(&buf, maglist, magcnt, sizeof (void *), 1368 addrcmp) != NULL) 1369 continue; /* in magazine layer */ 1370 1371 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1372 } 1373 return (ret); 1374 } 1375 1376 void 1377 umem_walk_fini(mdb_walk_state_t *wsp) 1378 { 1379 umem_walk_t *umw = wsp->walk_data; 1380 uintptr_t chunksize; 1381 uintptr_t slabsize; 1382 1383 if (umw == NULL) 1384 return; 1385 1386 if (umw->umw_maglist != NULL) 1387 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *)); 1388 1389 chunksize = umw->umw_cp->cache_chunksize; 1390 slabsize = umw->umw_cp->cache_slabsize; 1391 1392 if (umw->umw_valid != NULL) 1393 mdb_free(umw->umw_valid, slabsize / chunksize); 1394 if (umw->umw_ubase != NULL) 1395 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t)); 1396 1397 mdb_free(umw->umw_cp, umw->umw_csize); 1398 mdb_free(umw, sizeof (umem_walk_t)); 1399 } 1400 1401 /*ARGSUSED*/ 1402 static int 1403 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp) 1404 { 1405 /* 1406 * Buffers allocated from NOTOUCH caches can also show up as freed 1407 * memory in other caches. This can be a little confusing, so we 1408 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1409 * that "::walk umem" and "::walk freemem" yield disjoint output). 1410 */ 1411 if (c->cache_cflags & UMC_NOTOUCH) 1412 return (WALK_NEXT); 1413 1414 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1415 wsp->walk_cbdata, addr) == -1) 1416 return (WALK_DONE); 1417 1418 return (WALK_NEXT); 1419 } 1420 1421 #define UMEM_WALK_ALL(name, wsp) { \ 1422 wsp->walk_data = (name); \ 1423 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \ 1424 return (WALK_ERR); \ 1425 return (WALK_DONE); \ 1426 } 1427 1428 int 1429 umem_walk_init(mdb_walk_state_t *wsp) 1430 { 1431 if (wsp->walk_arg != NULL) 1432 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1433 1434 if (wsp->walk_addr == NULL) 1435 UMEM_WALK_ALL("umem", wsp); 1436 return (umem_walk_init_common(wsp, UM_ALLOCATED)); 1437 } 1438 1439 int 1440 bufctl_walk_init(mdb_walk_state_t *wsp) 1441 { 1442 if (wsp->walk_addr == NULL) 1443 UMEM_WALK_ALL("bufctl", wsp); 1444 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL)); 1445 } 1446 1447 int 1448 freemem_walk_init(mdb_walk_state_t *wsp) 1449 { 1450 if (wsp->walk_addr == NULL) 1451 UMEM_WALK_ALL("freemem", wsp); 1452 return (umem_walk_init_common(wsp, UM_FREE)); 1453 } 1454 1455 int 1456 freectl_walk_init(mdb_walk_state_t *wsp) 1457 { 1458 if (wsp->walk_addr == NULL) 1459 UMEM_WALK_ALL("freectl", wsp); 1460 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL)); 1461 } 1462 1463 typedef struct bufctl_history_walk { 1464 void *bhw_next; 1465 umem_cache_t *bhw_cache; 1466 umem_slab_t *bhw_slab; 1467 hrtime_t bhw_timestamp; 1468 } bufctl_history_walk_t; 1469 1470 int 1471 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1472 { 1473 bufctl_history_walk_t *bhw; 1474 umem_bufctl_audit_t bc; 1475 umem_bufctl_audit_t bcn; 1476 1477 if (wsp->walk_addr == NULL) { 1478 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1479 return (WALK_ERR); 1480 } 1481 1482 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1483 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1484 return (WALK_ERR); 1485 } 1486 1487 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1488 bhw->bhw_timestamp = 0; 1489 bhw->bhw_cache = bc.bc_cache; 1490 bhw->bhw_slab = bc.bc_slab; 1491 1492 /* 1493 * sometimes the first log entry matches the base bufctl; in that 1494 * case, skip the base bufctl. 1495 */ 1496 if (bc.bc_lastlog != NULL && 1497 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1498 bc.bc_addr == bcn.bc_addr && 1499 bc.bc_cache == bcn.bc_cache && 1500 bc.bc_slab == bcn.bc_slab && 1501 bc.bc_timestamp == bcn.bc_timestamp && 1502 bc.bc_thread == bcn.bc_thread) 1503 bhw->bhw_next = bc.bc_lastlog; 1504 else 1505 bhw->bhw_next = (void *)wsp->walk_addr; 1506 1507 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1508 wsp->walk_data = bhw; 1509 1510 return (WALK_NEXT); 1511 } 1512 1513 int 1514 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1515 { 1516 bufctl_history_walk_t *bhw = wsp->walk_data; 1517 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1518 uintptr_t baseaddr = wsp->walk_addr; 1519 umem_bufctl_audit_t *b; 1520 UMEM_LOCAL_BUFCTL_AUDIT(&b); 1521 1522 if (addr == NULL) 1523 return (WALK_DONE); 1524 1525 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1526 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1527 return (WALK_ERR); 1528 } 1529 1530 /* 1531 * The bufctl is only valid if the address, cache, and slab are 1532 * correct. We also check that the timestamp is decreasing, to 1533 * prevent infinite loops. 1534 */ 1535 if ((uintptr_t)b->bc_addr != baseaddr || 1536 b->bc_cache != bhw->bhw_cache || 1537 b->bc_slab != bhw->bhw_slab || 1538 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp)) 1539 return (WALK_DONE); 1540 1541 bhw->bhw_next = b->bc_lastlog; 1542 bhw->bhw_timestamp = b->bc_timestamp; 1543 1544 return (wsp->walk_callback(addr, b, wsp->walk_cbdata)); 1545 } 1546 1547 void 1548 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1549 { 1550 bufctl_history_walk_t *bhw = wsp->walk_data; 1551 1552 mdb_free(bhw, sizeof (*bhw)); 1553 } 1554 1555 typedef struct umem_log_walk { 1556 umem_bufctl_audit_t *ulw_base; 1557 umem_bufctl_audit_t **ulw_sorted; 1558 umem_log_header_t ulw_lh; 1559 size_t ulw_size; 1560 size_t ulw_maxndx; 1561 size_t ulw_ndx; 1562 } umem_log_walk_t; 1563 1564 int 1565 umem_log_walk_init(mdb_walk_state_t *wsp) 1566 { 1567 uintptr_t lp = wsp->walk_addr; 1568 umem_log_walk_t *ulw; 1569 umem_log_header_t *lhp; 1570 int maxndx, i, j, k; 1571 1572 /* 1573 * By default (global walk), walk the umem_transaction_log. Otherwise 1574 * read the log whose umem_log_header_t is stored at walk_addr. 1575 */ 1576 if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) { 1577 mdb_warn("failed to read 'umem_transaction_log'"); 1578 return (WALK_ERR); 1579 } 1580 1581 if (lp == NULL) { 1582 mdb_warn("log is disabled\n"); 1583 return (WALK_ERR); 1584 } 1585 1586 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP); 1587 lhp = &ulw->ulw_lh; 1588 1589 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) { 1590 mdb_warn("failed to read log header at %p", lp); 1591 mdb_free(ulw, sizeof (umem_log_walk_t)); 1592 return (WALK_ERR); 1593 } 1594 1595 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1596 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP); 1597 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1; 1598 1599 if (mdb_vread(ulw->ulw_base, ulw->ulw_size, 1600 (uintptr_t)lhp->lh_base) == -1) { 1601 mdb_warn("failed to read log at base %p", lhp->lh_base); 1602 mdb_free(ulw->ulw_base, ulw->ulw_size); 1603 mdb_free(ulw, sizeof (umem_log_walk_t)); 1604 return (WALK_ERR); 1605 } 1606 1607 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1608 sizeof (umem_bufctl_audit_t *), UM_SLEEP); 1609 1610 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1611 caddr_t chunk = (caddr_t) 1612 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize); 1613 1614 for (j = 0; j < maxndx; j++) { 1615 /* LINTED align */ 1616 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk; 1617 chunk += UMEM_BUFCTL_AUDIT_SIZE; 1618 } 1619 } 1620 1621 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *), 1622 (int(*)(const void *, const void *))bufctlcmp); 1623 1624 ulw->ulw_maxndx = k; 1625 wsp->walk_data = ulw; 1626 1627 return (WALK_NEXT); 1628 } 1629 1630 int 1631 umem_log_walk_step(mdb_walk_state_t *wsp) 1632 { 1633 umem_log_walk_t *ulw = wsp->walk_data; 1634 umem_bufctl_audit_t *bcp; 1635 1636 if (ulw->ulw_ndx == ulw->ulw_maxndx) 1637 return (WALK_DONE); 1638 1639 bcp = ulw->ulw_sorted[ulw->ulw_ndx++]; 1640 1641 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base + 1642 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata)); 1643 } 1644 1645 void 1646 umem_log_walk_fini(mdb_walk_state_t *wsp) 1647 { 1648 umem_log_walk_t *ulw = wsp->walk_data; 1649 1650 mdb_free(ulw->ulw_base, ulw->ulw_size); 1651 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx * 1652 sizeof (umem_bufctl_audit_t *)); 1653 mdb_free(ulw, sizeof (umem_log_walk_t)); 1654 } 1655 1656 typedef struct allocdby_bufctl { 1657 uintptr_t abb_addr; 1658 hrtime_t abb_ts; 1659 } allocdby_bufctl_t; 1660 1661 typedef struct allocdby_walk { 1662 const char *abw_walk; 1663 uintptr_t abw_thread; 1664 size_t abw_nbufs; 1665 size_t abw_size; 1666 allocdby_bufctl_t *abw_buf; 1667 size_t abw_ndx; 1668 } allocdby_walk_t; 1669 1670 int 1671 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp, 1672 allocdby_walk_t *abw) 1673 { 1674 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1675 return (WALK_NEXT); 1676 1677 if (abw->abw_nbufs == abw->abw_size) { 1678 allocdby_bufctl_t *buf; 1679 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1680 1681 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1682 1683 bcopy(abw->abw_buf, buf, oldsize); 1684 mdb_free(abw->abw_buf, oldsize); 1685 1686 abw->abw_size <<= 1; 1687 abw->abw_buf = buf; 1688 } 1689 1690 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1691 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1692 abw->abw_nbufs++; 1693 1694 return (WALK_NEXT); 1695 } 1696 1697 /*ARGSUSED*/ 1698 int 1699 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw) 1700 { 1701 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1702 abw, addr) == -1) { 1703 mdb_warn("couldn't walk bufctl for cache %p", addr); 1704 return (WALK_DONE); 1705 } 1706 1707 return (WALK_NEXT); 1708 } 1709 1710 static int 1711 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1712 { 1713 if (lhs->abb_ts < rhs->abb_ts) 1714 return (1); 1715 if (lhs->abb_ts > rhs->abb_ts) 1716 return (-1); 1717 return (0); 1718 } 1719 1720 static int 1721 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1722 { 1723 allocdby_walk_t *abw; 1724 1725 if (wsp->walk_addr == NULL) { 1726 mdb_warn("allocdby walk doesn't support global walks\n"); 1727 return (WALK_ERR); 1728 } 1729 1730 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1731 1732 abw->abw_thread = wsp->walk_addr; 1733 abw->abw_walk = walk; 1734 abw->abw_size = 128; /* something reasonable */ 1735 abw->abw_buf = 1736 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1737 1738 wsp->walk_data = abw; 1739 1740 if (mdb_walk("umem_cache", 1741 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1742 mdb_warn("couldn't walk umem_cache"); 1743 allocdby_walk_fini(wsp); 1744 return (WALK_ERR); 1745 } 1746 1747 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1748 (int(*)(const void *, const void *))allocdby_cmp); 1749 1750 return (WALK_NEXT); 1751 } 1752 1753 int 1754 allocdby_walk_init(mdb_walk_state_t *wsp) 1755 { 1756 return (allocdby_walk_init_common(wsp, "bufctl")); 1757 } 1758 1759 int 1760 freedby_walk_init(mdb_walk_state_t *wsp) 1761 { 1762 return (allocdby_walk_init_common(wsp, "freectl")); 1763 } 1764 1765 int 1766 allocdby_walk_step(mdb_walk_state_t *wsp) 1767 { 1768 allocdby_walk_t *abw = wsp->walk_data; 1769 uintptr_t addr; 1770 umem_bufctl_audit_t *bcp; 1771 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 1772 1773 if (abw->abw_ndx == abw->abw_nbufs) 1774 return (WALK_DONE); 1775 1776 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1777 1778 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1779 mdb_warn("couldn't read bufctl at %p", addr); 1780 return (WALK_DONE); 1781 } 1782 1783 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata)); 1784 } 1785 1786 void 1787 allocdby_walk_fini(mdb_walk_state_t *wsp) 1788 { 1789 allocdby_walk_t *abw = wsp->walk_data; 1790 1791 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1792 mdb_free(abw, sizeof (allocdby_walk_t)); 1793 } 1794 1795 /*ARGSUSED*/ 1796 int 1797 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored) 1798 { 1799 char c[MDB_SYM_NAMLEN]; 1800 GElf_Sym sym; 1801 int i; 1802 1803 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 1804 for (i = 0; i < bcp->bc_depth; i++) { 1805 if (mdb_lookup_by_addr(bcp->bc_stack[i], 1806 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 1807 continue; 1808 if (is_umem_sym(c, "umem_")) 1809 continue; 1810 mdb_printf("%s+0x%lx", 1811 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 1812 break; 1813 } 1814 mdb_printf("\n"); 1815 1816 return (WALK_NEXT); 1817 } 1818 1819 static int 1820 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 1821 { 1822 if (!(flags & DCMD_ADDRSPEC)) 1823 return (DCMD_USAGE); 1824 1825 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 1826 1827 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 1828 mdb_warn("can't walk '%s' for %p", w, addr); 1829 return (DCMD_ERR); 1830 } 1831 1832 return (DCMD_OK); 1833 } 1834 1835 /*ARGSUSED*/ 1836 int 1837 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1838 { 1839 return (allocdby_common(addr, flags, "allocdby")); 1840 } 1841 1842 /*ARGSUSED*/ 1843 int 1844 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1845 { 1846 return (allocdby_common(addr, flags, "freedby")); 1847 } 1848 1849 typedef struct whatis { 1850 uintptr_t w_addr; 1851 const umem_cache_t *w_cache; 1852 const vmem_t *w_vmem; 1853 int w_found; 1854 uint_t w_verbose; 1855 uint_t w_freemem; 1856 uint_t w_all; 1857 uint_t w_bufctl; 1858 } whatis_t; 1859 1860 static void 1861 whatis_print_umem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 1862 { 1863 /* LINTED pointer cast may result in improper alignment */ 1864 uintptr_t btaddr = (uintptr_t)UMEM_BUFTAG(w->w_cache, addr); 1865 intptr_t stat; 1866 1867 if (w->w_cache->cache_flags & UMF_REDZONE) { 1868 umem_buftag_t bt; 1869 1870 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 1871 goto done; 1872 1873 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 1874 1875 if (stat != UMEM_BUFTAG_ALLOC && stat != UMEM_BUFTAG_FREE) 1876 goto done; 1877 1878 /* 1879 * provide the bufctl ptr if it has useful information 1880 */ 1881 if (baddr == 0 && (w->w_cache->cache_flags & UMF_AUDIT)) 1882 baddr = (uintptr_t)bt.bt_bufctl; 1883 } 1884 1885 done: 1886 if (baddr == 0) 1887 mdb_printf("%p is %p+%p, %s from %s\n", 1888 w->w_addr, addr, w->w_addr - addr, 1889 w->w_freemem == FALSE ? "allocated" : "freed", 1890 w->w_cache->cache_name); 1891 else 1892 mdb_printf("%p is %p+%p, bufctl %p %s from %s\n", 1893 w->w_addr, addr, w->w_addr - addr, baddr, 1894 w->w_freemem == FALSE ? "allocated" : "freed", 1895 w->w_cache->cache_name); 1896 } 1897 1898 /*ARGSUSED*/ 1899 static int 1900 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_t *w) 1901 { 1902 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 1903 return (WALK_NEXT); 1904 1905 whatis_print_umem(addr, 0, w); 1906 w->w_found++; 1907 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1908 } 1909 1910 static int 1911 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 1912 { 1913 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 1914 return (WALK_NEXT); 1915 1916 mdb_printf("%p is %p+%p ", w->w_addr, 1917 vs->vs_start, w->w_addr - vs->vs_start); 1918 1919 /* 1920 * Always provide the vmem_seg pointer if it has a stack trace. 1921 */ 1922 if (w->w_bufctl == TRUE || 1923 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) { 1924 mdb_printf("(vmem_seg %p) ", addr); 1925 } 1926 1927 mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ? 1928 "freed " : "", w->w_vmem->vm_name); 1929 1930 w->w_found++; 1931 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1932 } 1933 1934 static int 1935 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 1936 { 1937 const char *nm = vmem->vm_name; 1938 w->w_vmem = vmem; 1939 w->w_freemem = FALSE; 1940 1941 if (w->w_verbose) 1942 mdb_printf("Searching vmem arena %s...\n", nm); 1943 1944 if (mdb_pwalk("vmem_alloc", 1945 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 1946 mdb_warn("can't walk vmem seg for %p", addr); 1947 return (WALK_NEXT); 1948 } 1949 1950 if (w->w_found && w->w_all == FALSE) 1951 return (WALK_DONE); 1952 1953 if (w->w_verbose) 1954 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 1955 1956 w->w_freemem = TRUE; 1957 1958 if (mdb_pwalk("vmem_free", 1959 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 1960 mdb_warn("can't walk vmem seg for %p", addr); 1961 return (WALK_NEXT); 1962 } 1963 1964 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 1965 } 1966 1967 /*ARGSUSED*/ 1968 static int 1969 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_t *w) 1970 { 1971 uintptr_t addr; 1972 1973 if (bcp == NULL) 1974 return (WALK_NEXT); 1975 1976 addr = (uintptr_t)bcp->bc_addr; 1977 1978 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 1979 return (WALK_NEXT); 1980 1981 whatis_print_umem(addr, baddr, w); 1982 w->w_found++; 1983 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1984 } 1985 1986 static int 1987 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_t *w) 1988 { 1989 char *walk, *freewalk; 1990 mdb_walk_cb_t func; 1991 1992 if (w->w_bufctl == FALSE) { 1993 walk = "umem"; 1994 freewalk = "freemem"; 1995 func = (mdb_walk_cb_t)whatis_walk_umem; 1996 } else { 1997 walk = "bufctl"; 1998 freewalk = "freectl"; 1999 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2000 } 2001 2002 if (w->w_verbose) 2003 mdb_printf("Searching %s...\n", c->cache_name); 2004 2005 w->w_cache = c; 2006 w->w_freemem = FALSE; 2007 2008 if (mdb_pwalk(walk, func, w, addr) == -1) { 2009 mdb_warn("can't find %s walker", walk); 2010 return (WALK_DONE); 2011 } 2012 2013 if (w->w_found && w->w_all == FALSE) 2014 return (WALK_DONE); 2015 2016 /* 2017 * We have searched for allocated memory; now search for freed memory. 2018 */ 2019 if (w->w_verbose) 2020 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2021 2022 w->w_freemem = TRUE; 2023 2024 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 2025 mdb_warn("can't find %s walker", freewalk); 2026 return (WALK_DONE); 2027 } 2028 2029 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2030 } 2031 2032 static int 2033 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_t *w) 2034 { 2035 if (c->cache_cflags & UMC_NOTOUCH) 2036 return (WALK_NEXT); 2037 2038 return (whatis_walk_cache(addr, c, w)); 2039 } 2040 2041 static int 2042 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_t *w) 2043 { 2044 if (!(c->cache_cflags & UMC_NOTOUCH)) 2045 return (WALK_NEXT); 2046 2047 return (whatis_walk_cache(addr, c, w)); 2048 } 2049 2050 int 2051 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2052 { 2053 whatis_t w; 2054 2055 if (!(flags & DCMD_ADDRSPEC)) 2056 return (DCMD_USAGE); 2057 2058 w.w_verbose = FALSE; 2059 w.w_bufctl = FALSE; 2060 w.w_all = FALSE; 2061 2062 if (mdb_getopts(argc, argv, 2063 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2064 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2065 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc) 2066 return (DCMD_USAGE); 2067 2068 w.w_addr = addr; 2069 w.w_found = 0; 2070 2071 /* 2072 * Mappings and threads should eventually be added here. 2073 */ 2074 if (mdb_walk("umem_cache", 2075 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2076 mdb_warn("couldn't find umem_cache walker"); 2077 return (DCMD_ERR); 2078 } 2079 2080 if (w.w_found && w.w_all == FALSE) 2081 return (DCMD_OK); 2082 2083 if (mdb_walk("umem_cache", 2084 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2085 mdb_warn("couldn't find umem_cache walker"); 2086 return (DCMD_ERR); 2087 } 2088 2089 if (w.w_found && w.w_all == FALSE) 2090 return (DCMD_OK); 2091 2092 if (mdb_walk("vmem_postfix", 2093 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2094 mdb_warn("couldn't find vmem_postfix walker"); 2095 return (DCMD_ERR); 2096 } 2097 2098 if (w.w_found == 0) 2099 mdb_printf("%p is unknown\n", addr); 2100 2101 return (DCMD_OK); 2102 } 2103 2104 typedef struct umem_log_cpu { 2105 uintptr_t umc_low; 2106 uintptr_t umc_high; 2107 } umem_log_cpu_t; 2108 2109 int 2110 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc) 2111 { 2112 int i; 2113 2114 for (i = 0; i < umem_max_ncpus; i++) { 2115 if (addr >= umc[i].umc_low && addr < umc[i].umc_high) 2116 break; 2117 } 2118 2119 if (i == umem_max_ncpus) 2120 mdb_printf(" "); 2121 else 2122 mdb_printf("%3d", i); 2123 2124 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2125 b->bc_timestamp, b->bc_thread); 2126 2127 return (WALK_NEXT); 2128 } 2129 2130 /*ARGSUSED*/ 2131 int 2132 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2133 { 2134 umem_log_header_t lh; 2135 umem_cpu_log_header_t clh; 2136 uintptr_t lhp, clhp; 2137 umem_log_cpu_t *umc; 2138 int i; 2139 2140 if (umem_readvar(&lhp, "umem_transaction_log") == -1) { 2141 mdb_warn("failed to read 'umem_transaction_log'"); 2142 return (DCMD_ERR); 2143 } 2144 2145 if (lhp == NULL) { 2146 mdb_warn("no umem transaction log\n"); 2147 return (DCMD_ERR); 2148 } 2149 2150 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) { 2151 mdb_warn("failed to read log header at %p", lhp); 2152 return (DCMD_ERR); 2153 } 2154 2155 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2156 2157 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus, 2158 UM_SLEEP | UM_GC); 2159 2160 for (i = 0; i < umem_max_ncpus; i++) { 2161 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2162 mdb_warn("cannot read cpu %d's log header at %p", 2163 i, clhp); 2164 return (DCMD_ERR); 2165 } 2166 2167 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize + 2168 (uintptr_t)lh.lh_base; 2169 umc[i].umc_high = (uintptr_t)clh.clh_current; 2170 2171 clhp += sizeof (umem_cpu_log_header_t); 2172 } 2173 2174 if (DCMD_HDRSPEC(flags)) { 2175 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", 2176 "BUFADDR", "TIMESTAMP", "THREAD"); 2177 } 2178 2179 /* 2180 * If we have been passed an address, we'll just print out that 2181 * log entry. 2182 */ 2183 if (flags & DCMD_ADDRSPEC) { 2184 umem_bufctl_audit_t *bp; 2185 UMEM_LOCAL_BUFCTL_AUDIT(&bp); 2186 2187 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2188 mdb_warn("failed to read bufctl at %p", addr); 2189 return (DCMD_ERR); 2190 } 2191 2192 (void) umem_log_walk(addr, bp, umc); 2193 2194 return (DCMD_OK); 2195 } 2196 2197 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) { 2198 mdb_warn("can't find umem log walker"); 2199 return (DCMD_ERR); 2200 } 2201 2202 return (DCMD_OK); 2203 } 2204 2205 typedef struct bufctl_history_cb { 2206 int bhc_flags; 2207 int bhc_argc; 2208 const mdb_arg_t *bhc_argv; 2209 int bhc_ret; 2210 } bufctl_history_cb_t; 2211 2212 /*ARGSUSED*/ 2213 static int 2214 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2215 { 2216 bufctl_history_cb_t *bhc = arg; 2217 2218 bhc->bhc_ret = 2219 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2220 2221 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2222 2223 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2224 } 2225 2226 void 2227 bufctl_help(void) 2228 { 2229 mdb_printf("%s\n", 2230 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n"); 2231 mdb_dec_indent(2); 2232 mdb_printf("%<b>OPTIONS%</b>\n"); 2233 mdb_inc_indent(2); 2234 mdb_printf("%s", 2235 " -v Display the full content of the bufctl, including its stack trace\n" 2236 " -h retrieve the bufctl's transaction history, if available\n" 2237 " -a addr\n" 2238 " filter out bufctls not involving the buffer at addr\n" 2239 " -c caller\n" 2240 " filter out bufctls without the function/PC in their stack trace\n" 2241 " -e earliest\n" 2242 " filter out bufctls timestamped before earliest\n" 2243 " -l latest\n" 2244 " filter out bufctls timestamped after latest\n" 2245 " -t thread\n" 2246 " filter out bufctls not involving thread\n"); 2247 } 2248 2249 int 2250 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2251 { 2252 uint_t verbose = FALSE; 2253 uint_t history = FALSE; 2254 uint_t in_history = FALSE; 2255 uintptr_t caller = NULL, thread = NULL; 2256 uintptr_t laddr, haddr, baddr = NULL; 2257 hrtime_t earliest = 0, latest = 0; 2258 int i, depth; 2259 char c[MDB_SYM_NAMLEN]; 2260 GElf_Sym sym; 2261 umem_bufctl_audit_t *bcp; 2262 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 2263 2264 if (mdb_getopts(argc, argv, 2265 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2266 'h', MDB_OPT_SETBITS, TRUE, &history, 2267 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2268 'c', MDB_OPT_UINTPTR, &caller, 2269 't', MDB_OPT_UINTPTR, &thread, 2270 'e', MDB_OPT_UINT64, &earliest, 2271 'l', MDB_OPT_UINT64, &latest, 2272 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2273 return (DCMD_USAGE); 2274 2275 if (!(flags & DCMD_ADDRSPEC)) 2276 return (DCMD_USAGE); 2277 2278 if (in_history && !history) 2279 return (DCMD_USAGE); 2280 2281 if (history && !in_history) { 2282 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2283 UM_SLEEP | UM_GC); 2284 bufctl_history_cb_t bhc; 2285 2286 nargv[0].a_type = MDB_TYPE_STRING; 2287 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2288 2289 for (i = 0; i < argc; i++) 2290 nargv[i + 1] = argv[i]; 2291 2292 /* 2293 * When in history mode, we treat each element as if it 2294 * were in a seperate loop, so that the headers group 2295 * bufctls with similar histories. 2296 */ 2297 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2298 bhc.bhc_argc = argc + 1; 2299 bhc.bhc_argv = nargv; 2300 bhc.bhc_ret = DCMD_OK; 2301 2302 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2303 addr) == -1) { 2304 mdb_warn("unable to walk bufctl_history"); 2305 return (DCMD_ERR); 2306 } 2307 2308 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2309 mdb_printf("\n"); 2310 2311 return (bhc.bhc_ret); 2312 } 2313 2314 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2315 if (verbose) { 2316 mdb_printf("%16s %16s %16s %16s\n" 2317 "%<u>%16s %16s %16s %16s%</u>\n", 2318 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2319 "", "CACHE", "LASTLOG", "CONTENTS"); 2320 } else { 2321 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n", 2322 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER"); 2323 } 2324 } 2325 2326 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2327 mdb_warn("couldn't read bufctl at %p", addr); 2328 return (DCMD_ERR); 2329 } 2330 2331 /* 2332 * Guard against bogus bc_depth in case the bufctl is corrupt or 2333 * the address does not really refer to a bufctl. 2334 */ 2335 depth = MIN(bcp->bc_depth, umem_stack_depth); 2336 2337 if (caller != NULL) { 2338 laddr = caller; 2339 haddr = caller + sizeof (caller); 2340 2341 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2342 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2343 /* 2344 * We were provided an exact symbol value; any 2345 * address in the function is valid. 2346 */ 2347 laddr = (uintptr_t)sym.st_value; 2348 haddr = (uintptr_t)sym.st_value + sym.st_size; 2349 } 2350 2351 for (i = 0; i < depth; i++) 2352 if (bcp->bc_stack[i] >= laddr && 2353 bcp->bc_stack[i] < haddr) 2354 break; 2355 2356 if (i == depth) 2357 return (DCMD_OK); 2358 } 2359 2360 if (thread != NULL && (uintptr_t)bcp->bc_thread != thread) 2361 return (DCMD_OK); 2362 2363 if (earliest != 0 && bcp->bc_timestamp < earliest) 2364 return (DCMD_OK); 2365 2366 if (latest != 0 && bcp->bc_timestamp > latest) 2367 return (DCMD_OK); 2368 2369 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr) 2370 return (DCMD_OK); 2371 2372 if (flags & DCMD_PIPE_OUT) { 2373 mdb_printf("%#r\n", addr); 2374 return (DCMD_OK); 2375 } 2376 2377 if (verbose) { 2378 mdb_printf( 2379 "%<b>%16p%</b> %16p %16llx %16d\n" 2380 "%16s %16p %16p %16p\n", 2381 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread, 2382 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents); 2383 2384 mdb_inc_indent(17); 2385 for (i = 0; i < depth; i++) 2386 mdb_printf("%a\n", bcp->bc_stack[i]); 2387 mdb_dec_indent(17); 2388 mdb_printf("\n"); 2389 } else { 2390 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr, 2391 bcp->bc_timestamp, bcp->bc_thread); 2392 2393 for (i = 0; i < depth; i++) { 2394 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2395 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2396 continue; 2397 if (is_umem_sym(c, "umem_")) 2398 continue; 2399 mdb_printf(" %a\n", bcp->bc_stack[i]); 2400 break; 2401 } 2402 2403 if (i >= depth) 2404 mdb_printf("\n"); 2405 } 2406 2407 return (DCMD_OK); 2408 } 2409 2410 /*ARGSUSED*/ 2411 int 2412 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2413 { 2414 mdb_arg_t a; 2415 2416 if (!(flags & DCMD_ADDRSPEC)) 2417 return (DCMD_USAGE); 2418 2419 if (argc != 0) 2420 return (DCMD_USAGE); 2421 2422 a.a_type = MDB_TYPE_STRING; 2423 a.a_un.a_str = "-v"; 2424 2425 return (bufctl(addr, flags, 1, &a)); 2426 } 2427 2428 typedef struct umem_verify { 2429 uint64_t *umv_buf; /* buffer to read cache contents into */ 2430 size_t umv_size; /* number of bytes in umv_buf */ 2431 int umv_corruption; /* > 0 if corruption found. */ 2432 int umv_besilent; /* report actual corruption sites */ 2433 struct umem_cache umv_cache; /* the cache we're operating on */ 2434 } umem_verify_t; 2435 2436 /* 2437 * verify_pattern() 2438 * verify that buf is filled with the pattern pat. 2439 */ 2440 static int64_t 2441 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2442 { 2443 /*LINTED*/ 2444 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 2445 uint64_t *buf; 2446 2447 for (buf = buf_arg; buf < bufend; buf++) 2448 if (*buf != pat) 2449 return ((uintptr_t)buf - (uintptr_t)buf_arg); 2450 return (-1); 2451 } 2452 2453 /* 2454 * verify_buftag() 2455 * verify that btp->bt_bxstat == (bcp ^ pat) 2456 */ 2457 static int 2458 verify_buftag(umem_buftag_t *btp, uintptr_t pat) 2459 { 2460 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 2461 } 2462 2463 /* 2464 * verify_free() 2465 * verify the integrity of a free block of memory by checking 2466 * that it is filled with 0xdeadbeef and that its buftag is sane. 2467 */ 2468 /*ARGSUSED1*/ 2469 static int 2470 verify_free(uintptr_t addr, const void *data, void *private) 2471 { 2472 umem_verify_t *umv = (umem_verify_t *)private; 2473 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2474 int64_t corrupt; /* corruption offset */ 2475 umem_buftag_t *buftagp; /* ptr to buftag */ 2476 umem_cache_t *cp = &umv->umv_cache; 2477 int besilent = umv->umv_besilent; 2478 2479 /*LINTED*/ 2480 buftagp = UMEM_BUFTAG(cp, buf); 2481 2482 /* 2483 * Read the buffer to check. 2484 */ 2485 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2486 if (!besilent) 2487 mdb_warn("couldn't read %p", addr); 2488 return (WALK_NEXT); 2489 } 2490 2491 if ((corrupt = verify_pattern(buf, cp->cache_verify, 2492 UMEM_FREE_PATTERN)) >= 0) { 2493 if (!besilent) 2494 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 2495 addr, (uintptr_t)addr + corrupt); 2496 goto corrupt; 2497 } 2498 2499 if ((cp->cache_flags & UMF_HASH) && 2500 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) { 2501 if (!besilent) 2502 mdb_printf("buffer %p (free) seems to " 2503 "have a corrupt redzone pattern\n", addr); 2504 goto corrupt; 2505 } 2506 2507 /* 2508 * confirm bufctl pointer integrity. 2509 */ 2510 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) { 2511 if (!besilent) 2512 mdb_printf("buffer %p (free) has a corrupt " 2513 "buftag\n", addr); 2514 goto corrupt; 2515 } 2516 2517 return (WALK_NEXT); 2518 corrupt: 2519 umv->umv_corruption++; 2520 return (WALK_NEXT); 2521 } 2522 2523 /* 2524 * verify_alloc() 2525 * Verify that the buftag of an allocated buffer makes sense with respect 2526 * to the buffer. 2527 */ 2528 /*ARGSUSED1*/ 2529 static int 2530 verify_alloc(uintptr_t addr, const void *data, void *private) 2531 { 2532 umem_verify_t *umv = (umem_verify_t *)private; 2533 umem_cache_t *cp = &umv->umv_cache; 2534 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2535 /*LINTED*/ 2536 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf); 2537 uint32_t *ip = (uint32_t *)buftagp; 2538 uint8_t *bp = (uint8_t *)buf; 2539 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 2540 int besilent = umv->umv_besilent; 2541 2542 /* 2543 * Read the buffer to check. 2544 */ 2545 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2546 if (!besilent) 2547 mdb_warn("couldn't read %p", addr); 2548 return (WALK_NEXT); 2549 } 2550 2551 /* 2552 * There are two cases to handle: 2553 * 1. If the buf was alloc'd using umem_cache_alloc, it will have 2554 * 0xfeedfacefeedface at the end of it 2555 * 2. If the buf was alloc'd using umem_alloc, it will have 2556 * 0xbb just past the end of the region in use. At the buftag, 2557 * it will have 0xfeedface (or, if the whole buffer is in use, 2558 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 2559 * endianness), followed by 32 bits containing the offset of the 2560 * 0xbb byte in the buffer. 2561 * 2562 * Finally, the two 32-bit words that comprise the second half of the 2563 * buftag should xor to UMEM_BUFTAG_ALLOC 2564 */ 2565 2566 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN) 2567 looks_ok = 1; 2568 else if (!UMEM_SIZE_VALID(ip[1])) 2569 size_ok = 0; 2570 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE) 2571 looks_ok = 1; 2572 else 2573 size_ok = 0; 2574 2575 if (!size_ok) { 2576 if (!besilent) 2577 mdb_printf("buffer %p (allocated) has a corrupt " 2578 "redzone size encoding\n", addr); 2579 goto corrupt; 2580 } 2581 2582 if (!looks_ok) { 2583 if (!besilent) 2584 mdb_printf("buffer %p (allocated) has a corrupt " 2585 "redzone signature\n", addr); 2586 goto corrupt; 2587 } 2588 2589 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) { 2590 if (!besilent) 2591 mdb_printf("buffer %p (allocated) has a " 2592 "corrupt buftag\n", addr); 2593 goto corrupt; 2594 } 2595 2596 return (WALK_NEXT); 2597 corrupt: 2598 umv->umv_corruption++; 2599 return (WALK_NEXT); 2600 } 2601 2602 /*ARGSUSED2*/ 2603 int 2604 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2605 { 2606 if (flags & DCMD_ADDRSPEC) { 2607 int check_alloc = 0, check_free = 0; 2608 umem_verify_t umv; 2609 2610 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache), 2611 addr) == -1) { 2612 mdb_warn("couldn't read umem_cache %p", addr); 2613 return (DCMD_ERR); 2614 } 2615 2616 umv.umv_size = umv.umv_cache.cache_buftag + 2617 sizeof (umem_buftag_t); 2618 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC); 2619 umv.umv_corruption = 0; 2620 2621 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) { 2622 check_alloc = 1; 2623 if (umv.umv_cache.cache_flags & UMF_DEADBEEF) 2624 check_free = 1; 2625 } else { 2626 if (!(flags & DCMD_LOOP)) { 2627 mdb_warn("cache %p (%s) does not have " 2628 "redzone checking enabled\n", addr, 2629 umv.umv_cache.cache_name); 2630 } 2631 return (DCMD_ERR); 2632 } 2633 2634 if (flags & DCMD_LOOP) { 2635 /* 2636 * table mode, don't print out every corrupt buffer 2637 */ 2638 umv.umv_besilent = 1; 2639 } else { 2640 mdb_printf("Summary for cache '%s'\n", 2641 umv.umv_cache.cache_name); 2642 mdb_inc_indent(2); 2643 umv.umv_besilent = 0; 2644 } 2645 2646 if (check_alloc) 2647 (void) mdb_pwalk("umem", verify_alloc, &umv, addr); 2648 if (check_free) 2649 (void) mdb_pwalk("freemem", verify_free, &umv, addr); 2650 2651 if (flags & DCMD_LOOP) { 2652 if (umv.umv_corruption == 0) { 2653 mdb_printf("%-*s %?p clean\n", 2654 UMEM_CACHE_NAMELEN, 2655 umv.umv_cache.cache_name, addr); 2656 } else { 2657 char *s = ""; /* optional s in "buffer[s]" */ 2658 if (umv.umv_corruption > 1) 2659 s = "s"; 2660 2661 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 2662 UMEM_CACHE_NAMELEN, 2663 umv.umv_cache.cache_name, addr, 2664 umv.umv_corruption, s); 2665 } 2666 } else { 2667 /* 2668 * This is the more verbose mode, when the user has 2669 * type addr::umem_verify. If the cache was clean, 2670 * nothing will have yet been printed. So say something. 2671 */ 2672 if (umv.umv_corruption == 0) 2673 mdb_printf("clean\n"); 2674 2675 mdb_dec_indent(2); 2676 } 2677 } else { 2678 /* 2679 * If the user didn't specify a cache to verify, we'll walk all 2680 * umem_cache's, specifying ourself as a callback for each... 2681 * this is the equivalent of '::walk umem_cache .::umem_verify' 2682 */ 2683 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN, 2684 "Cache Name", "Addr", "Cache Integrity"); 2685 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL)); 2686 } 2687 2688 return (DCMD_OK); 2689 } 2690 2691 typedef struct vmem_node { 2692 struct vmem_node *vn_next; 2693 struct vmem_node *vn_parent; 2694 struct vmem_node *vn_sibling; 2695 struct vmem_node *vn_children; 2696 uintptr_t vn_addr; 2697 int vn_marked; 2698 vmem_t vn_vmem; 2699 } vmem_node_t; 2700 2701 typedef struct vmem_walk { 2702 vmem_node_t *vw_root; 2703 vmem_node_t *vw_current; 2704 } vmem_walk_t; 2705 2706 int 2707 vmem_walk_init(mdb_walk_state_t *wsp) 2708 { 2709 uintptr_t vaddr, paddr; 2710 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 2711 vmem_walk_t *vw; 2712 2713 if (umem_readvar(&vaddr, "vmem_list") == -1) { 2714 mdb_warn("couldn't read 'vmem_list'"); 2715 return (WALK_ERR); 2716 } 2717 2718 while (vaddr != NULL) { 2719 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 2720 vp->vn_addr = vaddr; 2721 vp->vn_next = head; 2722 head = vp; 2723 2724 if (vaddr == wsp->walk_addr) 2725 current = vp; 2726 2727 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 2728 mdb_warn("couldn't read vmem_t at %p", vaddr); 2729 goto err; 2730 } 2731 2732 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 2733 } 2734 2735 for (vp = head; vp != NULL; vp = vp->vn_next) { 2736 2737 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 2738 vp->vn_sibling = root; 2739 root = vp; 2740 continue; 2741 } 2742 2743 for (parent = head; parent != NULL; parent = parent->vn_next) { 2744 if (parent->vn_addr != paddr) 2745 continue; 2746 vp->vn_sibling = parent->vn_children; 2747 parent->vn_children = vp; 2748 vp->vn_parent = parent; 2749 break; 2750 } 2751 2752 if (parent == NULL) { 2753 mdb_warn("couldn't find %p's parent (%p)\n", 2754 vp->vn_addr, paddr); 2755 goto err; 2756 } 2757 } 2758 2759 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 2760 vw->vw_root = root; 2761 2762 if (current != NULL) 2763 vw->vw_current = current; 2764 else 2765 vw->vw_current = root; 2766 2767 wsp->walk_data = vw; 2768 return (WALK_NEXT); 2769 err: 2770 for (vp = head; head != NULL; vp = head) { 2771 head = vp->vn_next; 2772 mdb_free(vp, sizeof (vmem_node_t)); 2773 } 2774 2775 return (WALK_ERR); 2776 } 2777 2778 int 2779 vmem_walk_step(mdb_walk_state_t *wsp) 2780 { 2781 vmem_walk_t *vw = wsp->walk_data; 2782 vmem_node_t *vp; 2783 int rval; 2784 2785 if ((vp = vw->vw_current) == NULL) 2786 return (WALK_DONE); 2787 2788 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 2789 2790 if (vp->vn_children != NULL) { 2791 vw->vw_current = vp->vn_children; 2792 return (rval); 2793 } 2794 2795 do { 2796 vw->vw_current = vp->vn_sibling; 2797 vp = vp->vn_parent; 2798 } while (vw->vw_current == NULL && vp != NULL); 2799 2800 return (rval); 2801 } 2802 2803 /* 2804 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 2805 * children are visited before their parent. We perform the postfix walk 2806 * iteratively (rather than recursively) to allow mdb to regain control 2807 * after each callback. 2808 */ 2809 int 2810 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 2811 { 2812 vmem_walk_t *vw = wsp->walk_data; 2813 vmem_node_t *vp = vw->vw_current; 2814 int rval; 2815 2816 /* 2817 * If this node is marked, then we know that we have already visited 2818 * all of its children. If the node has any siblings, they need to 2819 * be visited next; otherwise, we need to visit the parent. Note 2820 * that vp->vn_marked will only be zero on the first invocation of 2821 * the step function. 2822 */ 2823 if (vp->vn_marked) { 2824 if (vp->vn_sibling != NULL) 2825 vp = vp->vn_sibling; 2826 else if (vp->vn_parent != NULL) 2827 vp = vp->vn_parent; 2828 else { 2829 /* 2830 * We have neither a parent, nor a sibling, and we 2831 * have already been visited; we're done. 2832 */ 2833 return (WALK_DONE); 2834 } 2835 } 2836 2837 /* 2838 * Before we visit this node, visit its children. 2839 */ 2840 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 2841 vp = vp->vn_children; 2842 2843 vp->vn_marked = 1; 2844 vw->vw_current = vp; 2845 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 2846 2847 return (rval); 2848 } 2849 2850 void 2851 vmem_walk_fini(mdb_walk_state_t *wsp) 2852 { 2853 vmem_walk_t *vw = wsp->walk_data; 2854 vmem_node_t *root = vw->vw_root; 2855 int done; 2856 2857 if (root == NULL) 2858 return; 2859 2860 if ((vw->vw_root = root->vn_children) != NULL) 2861 vmem_walk_fini(wsp); 2862 2863 vw->vw_root = root->vn_sibling; 2864 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 2865 mdb_free(root, sizeof (vmem_node_t)); 2866 2867 if (done) { 2868 mdb_free(vw, sizeof (vmem_walk_t)); 2869 } else { 2870 vmem_walk_fini(wsp); 2871 } 2872 } 2873 2874 typedef struct vmem_seg_walk { 2875 uint8_t vsw_type; 2876 uintptr_t vsw_start; 2877 uintptr_t vsw_current; 2878 } vmem_seg_walk_t; 2879 2880 /*ARGSUSED*/ 2881 int 2882 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 2883 { 2884 vmem_seg_walk_t *vsw; 2885 2886 if (wsp->walk_addr == NULL) { 2887 mdb_warn("vmem_%s does not support global walks\n", name); 2888 return (WALK_ERR); 2889 } 2890 2891 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 2892 2893 vsw->vsw_type = type; 2894 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0); 2895 vsw->vsw_current = vsw->vsw_start; 2896 2897 return (WALK_NEXT); 2898 } 2899 2900 /* 2901 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 2902 */ 2903 #define VMEM_NONE 0 2904 2905 int 2906 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 2907 { 2908 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 2909 } 2910 2911 int 2912 vmem_free_walk_init(mdb_walk_state_t *wsp) 2913 { 2914 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 2915 } 2916 2917 int 2918 vmem_span_walk_init(mdb_walk_state_t *wsp) 2919 { 2920 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 2921 } 2922 2923 int 2924 vmem_seg_walk_init(mdb_walk_state_t *wsp) 2925 { 2926 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 2927 } 2928 2929 int 2930 vmem_seg_walk_step(mdb_walk_state_t *wsp) 2931 { 2932 vmem_seg_t seg; 2933 vmem_seg_walk_t *vsw = wsp->walk_data; 2934 uintptr_t addr = vsw->vsw_current; 2935 static size_t seg_size = 0; 2936 int rval; 2937 2938 if (!seg_size) { 2939 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) { 2940 mdb_warn("failed to read 'vmem_seg_size'"); 2941 seg_size = sizeof (vmem_seg_t); 2942 } 2943 } 2944 2945 if (seg_size < sizeof (seg)) 2946 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 2947 2948 if (mdb_vread(&seg, seg_size, addr) == -1) { 2949 mdb_warn("couldn't read vmem_seg at %p", addr); 2950 return (WALK_ERR); 2951 } 2952 2953 vsw->vsw_current = (uintptr_t)seg.vs_anext; 2954 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 2955 rval = WALK_NEXT; 2956 } else { 2957 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 2958 } 2959 2960 if (vsw->vsw_current == vsw->vsw_start) 2961 return (WALK_DONE); 2962 2963 return (rval); 2964 } 2965 2966 void 2967 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 2968 { 2969 vmem_seg_walk_t *vsw = wsp->walk_data; 2970 2971 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 2972 } 2973 2974 #define VMEM_NAMEWIDTH 22 2975 2976 int 2977 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2978 { 2979 vmem_t v, parent; 2980 uintptr_t paddr; 2981 int ident = 0; 2982 char c[VMEM_NAMEWIDTH]; 2983 2984 if (!(flags & DCMD_ADDRSPEC)) { 2985 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 2986 mdb_warn("can't walk vmem"); 2987 return (DCMD_ERR); 2988 } 2989 return (DCMD_OK); 2990 } 2991 2992 if (DCMD_HDRSPEC(flags)) 2993 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 2994 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 2995 "TOTAL", "SUCCEED", "FAIL"); 2996 2997 if (mdb_vread(&v, sizeof (v), addr) == -1) { 2998 mdb_warn("couldn't read vmem at %p", addr); 2999 return (DCMD_ERR); 3000 } 3001 3002 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3003 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3004 mdb_warn("couldn't trace %p's ancestry", addr); 3005 ident = 0; 3006 break; 3007 } 3008 paddr = (uintptr_t)parent.vm_source; 3009 } 3010 3011 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3012 3013 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3014 addr, VMEM_NAMEWIDTH, c, 3015 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total, 3016 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail); 3017 3018 return (DCMD_OK); 3019 } 3020 3021 void 3022 vmem_seg_help(void) 3023 { 3024 mdb_printf("%s\n", 3025 "Display the contents of vmem_seg_ts, with optional filtering.\n" 3026 "\n" 3027 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3028 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3029 "information.\n"); 3030 mdb_dec_indent(2); 3031 mdb_printf("%<b>OPTIONS%</b>\n"); 3032 mdb_inc_indent(2); 3033 mdb_printf("%s", 3034 " -v Display the full content of the vmem_seg, including its stack trace\n" 3035 " -s report the size of the segment, instead of the end address\n" 3036 " -c caller\n" 3037 " filter out segments without the function/PC in their stack trace\n" 3038 " -e earliest\n" 3039 " filter out segments timestamped before earliest\n" 3040 " -l latest\n" 3041 " filter out segments timestamped after latest\n" 3042 " -m minsize\n" 3043 " filer out segments smaller than minsize\n" 3044 " -M maxsize\n" 3045 " filer out segments larger than maxsize\n" 3046 " -t thread\n" 3047 " filter out segments not involving thread\n" 3048 " -T type\n" 3049 " filter out segments not of type 'type'\n" 3050 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3051 } 3052 3053 3054 /*ARGSUSED*/ 3055 int 3056 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3057 { 3058 vmem_seg_t vs; 3059 uintptr_t *stk = vs.vs_stack; 3060 uintptr_t sz; 3061 uint8_t t; 3062 const char *type = NULL; 3063 GElf_Sym sym; 3064 char c[MDB_SYM_NAMLEN]; 3065 int no_debug; 3066 int i; 3067 int depth; 3068 uintptr_t laddr, haddr; 3069 3070 uintptr_t caller = NULL, thread = NULL; 3071 uintptr_t minsize = 0, maxsize = 0; 3072 3073 hrtime_t earliest = 0, latest = 0; 3074 3075 uint_t size = 0; 3076 uint_t verbose = 0; 3077 3078 if (!(flags & DCMD_ADDRSPEC)) 3079 return (DCMD_USAGE); 3080 3081 if (mdb_getopts(argc, argv, 3082 'c', MDB_OPT_UINTPTR, &caller, 3083 'e', MDB_OPT_UINT64, &earliest, 3084 'l', MDB_OPT_UINT64, &latest, 3085 's', MDB_OPT_SETBITS, TRUE, &size, 3086 'm', MDB_OPT_UINTPTR, &minsize, 3087 'M', MDB_OPT_UINTPTR, &maxsize, 3088 't', MDB_OPT_UINTPTR, &thread, 3089 'T', MDB_OPT_STR, &type, 3090 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3091 NULL) != argc) 3092 return (DCMD_USAGE); 3093 3094 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3095 if (verbose) { 3096 mdb_printf("%16s %4s %16s %16s %16s\n" 3097 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3098 "ADDR", "TYPE", "START", "END", "SIZE", 3099 "", "", "THREAD", "TIMESTAMP", ""); 3100 } else { 3101 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3102 "START", size? "SIZE" : "END", "WHO"); 3103 } 3104 } 3105 3106 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3107 mdb_warn("couldn't read vmem_seg at %p", addr); 3108 return (DCMD_ERR); 3109 } 3110 3111 if (type != NULL) { 3112 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3113 t = VMEM_ALLOC; 3114 else if (strcmp(type, "FREE") == 0) 3115 t = VMEM_FREE; 3116 else if (strcmp(type, "SPAN") == 0) 3117 t = VMEM_SPAN; 3118 else if (strcmp(type, "ROTR") == 0 || 3119 strcmp(type, "ROTOR") == 0) 3120 t = VMEM_ROTOR; 3121 else if (strcmp(type, "WLKR") == 0 || 3122 strcmp(type, "WALKER") == 0) 3123 t = VMEM_WALKER; 3124 else { 3125 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3126 type); 3127 return (DCMD_ERR); 3128 } 3129 3130 if (vs.vs_type != t) 3131 return (DCMD_OK); 3132 } 3133 3134 sz = vs.vs_end - vs.vs_start; 3135 3136 if (minsize != 0 && sz < minsize) 3137 return (DCMD_OK); 3138 3139 if (maxsize != 0 && sz > maxsize) 3140 return (DCMD_OK); 3141 3142 t = vs.vs_type; 3143 depth = vs.vs_depth; 3144 3145 /* 3146 * debug info, when present, is only accurate for VMEM_ALLOC segments 3147 */ 3148 no_debug = (t != VMEM_ALLOC) || 3149 (depth == 0 || depth > VMEM_STACK_DEPTH); 3150 3151 if (no_debug) { 3152 if (caller != NULL || thread != NULL || earliest != 0 || 3153 latest != 0) 3154 return (DCMD_OK); /* not enough info */ 3155 } else { 3156 if (caller != NULL) { 3157 laddr = caller; 3158 haddr = caller + sizeof (caller); 3159 3160 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3161 sizeof (c), &sym) != -1 && 3162 caller == (uintptr_t)sym.st_value) { 3163 /* 3164 * We were provided an exact symbol value; any 3165 * address in the function is valid. 3166 */ 3167 laddr = (uintptr_t)sym.st_value; 3168 haddr = (uintptr_t)sym.st_value + sym.st_size; 3169 } 3170 3171 for (i = 0; i < depth; i++) 3172 if (vs.vs_stack[i] >= laddr && 3173 vs.vs_stack[i] < haddr) 3174 break; 3175 3176 if (i == depth) 3177 return (DCMD_OK); 3178 } 3179 3180 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3181 return (DCMD_OK); 3182 3183 if (earliest != 0 && vs.vs_timestamp < earliest) 3184 return (DCMD_OK); 3185 3186 if (latest != 0 && vs.vs_timestamp > latest) 3187 return (DCMD_OK); 3188 } 3189 3190 type = (t == VMEM_ALLOC ? "ALLC" : 3191 t == VMEM_FREE ? "FREE" : 3192 t == VMEM_SPAN ? "SPAN" : 3193 t == VMEM_ROTOR ? "ROTR" : 3194 t == VMEM_WALKER ? "WLKR" : 3195 "????"); 3196 3197 if (flags & DCMD_PIPE_OUT) { 3198 mdb_printf("%#r\n", addr); 3199 return (DCMD_OK); 3200 } 3201 3202 if (verbose) { 3203 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3204 addr, type, vs.vs_start, vs.vs_end, sz); 3205 3206 if (no_debug) 3207 return (DCMD_OK); 3208 3209 mdb_printf("%16s %4s %16d %16llx\n", 3210 "", "", vs.vs_thread, vs.vs_timestamp); 3211 3212 mdb_inc_indent(17); 3213 for (i = 0; i < depth; i++) { 3214 mdb_printf("%a\n", stk[i]); 3215 } 3216 mdb_dec_indent(17); 3217 mdb_printf("\n"); 3218 } else { 3219 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3220 vs.vs_start, size? sz : vs.vs_end); 3221 3222 if (no_debug) { 3223 mdb_printf("\n"); 3224 return (DCMD_OK); 3225 } 3226 3227 for (i = 0; i < depth; i++) { 3228 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3229 c, sizeof (c), &sym) == -1) 3230 continue; 3231 if (is_umem_sym(c, "vmem_")) 3232 continue; 3233 break; 3234 } 3235 mdb_printf(" %a\n", stk[i]); 3236 } 3237 return (DCMD_OK); 3238 } 3239 3240 /*ARGSUSED*/ 3241 static int 3242 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest) 3243 { 3244 char name[UMEM_CACHE_NAMELEN + 1]; 3245 hrtime_t delta; 3246 int i, depth; 3247 3248 if (bcp->bc_timestamp == 0) 3249 return (WALK_DONE); 3250 3251 if (*newest == 0) 3252 *newest = bcp->bc_timestamp; 3253 3254 delta = *newest - bcp->bc_timestamp; 3255 depth = MIN(bcp->bc_depth, umem_stack_depth); 3256 3257 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3258 &bcp->bc_cache->cache_name) <= 0) 3259 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3260 3261 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3262 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3263 3264 for (i = 0; i < depth; i++) 3265 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3266 3267 return (WALK_NEXT); 3268 } 3269 3270 int 3271 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3272 { 3273 const char *logname = "umem_transaction_log"; 3274 hrtime_t newest = 0; 3275 3276 if ((flags & DCMD_ADDRSPEC) || argc > 1) 3277 return (DCMD_USAGE); 3278 3279 if (argc > 0) { 3280 if (argv->a_type != MDB_TYPE_STRING) 3281 return (DCMD_USAGE); 3282 if (strcmp(argv->a_un.a_str, "fail") == 0) 3283 logname = "umem_failure_log"; 3284 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3285 logname = "umem_slab_log"; 3286 else 3287 return (DCMD_USAGE); 3288 } 3289 3290 if (umem_readvar(&addr, logname) == -1) { 3291 mdb_warn("failed to read %s log header pointer"); 3292 return (DCMD_ERR); 3293 } 3294 3295 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) { 3296 mdb_warn("failed to walk umem log"); 3297 return (DCMD_ERR); 3298 } 3299 3300 return (DCMD_OK); 3301 } 3302 3303 /* 3304 * As the final lure for die-hard crash(1M) users, we provide ::umausers here. 3305 * The first piece is a structure which we use to accumulate umem_cache_t 3306 * addresses of interest. The umc_add is used as a callback for the umem_cache 3307 * walker; we either add all caches, or ones named explicitly as arguments. 3308 */ 3309 3310 typedef struct umclist { 3311 const char *umc_name; /* Name to match (or NULL) */ 3312 uintptr_t *umc_caches; /* List of umem_cache_t addrs */ 3313 int umc_nelems; /* Num entries in umc_caches */ 3314 int umc_size; /* Size of umc_caches array */ 3315 } umclist_t; 3316 3317 static int 3318 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc) 3319 { 3320 void *p; 3321 int s; 3322 3323 if (umc->umc_name == NULL || 3324 strcmp(cp->cache_name, umc->umc_name) == 0) { 3325 /* 3326 * If we have a match, grow our array (if necessary), and then 3327 * add the virtual address of the matching cache to our list. 3328 */ 3329 if (umc->umc_nelems >= umc->umc_size) { 3330 s = umc->umc_size ? umc->umc_size * 2 : 256; 3331 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3332 3333 bcopy(umc->umc_caches, p, 3334 sizeof (uintptr_t) * umc->umc_size); 3335 3336 umc->umc_caches = p; 3337 umc->umc_size = s; 3338 } 3339 3340 umc->umc_caches[umc->umc_nelems++] = addr; 3341 return (umc->umc_name ? WALK_DONE : WALK_NEXT); 3342 } 3343 3344 return (WALK_NEXT); 3345 } 3346 3347 /* 3348 * The second piece of ::umausers is a hash table of allocations. Each 3349 * allocation owner is identified by its stack trace and data_size. We then 3350 * track the total bytes of all such allocations, and the number of allocations 3351 * to report at the end. Once we have a list of caches, we walk through the 3352 * allocated bufctls of each, and update our hash table accordingly. 3353 */ 3354 3355 typedef struct umowner { 3356 struct umowner *umo_head; /* First hash elt in bucket */ 3357 struct umowner *umo_next; /* Next hash elt in chain */ 3358 size_t umo_signature; /* Hash table signature */ 3359 uint_t umo_num; /* Number of allocations */ 3360 size_t umo_data_size; /* Size of each allocation */ 3361 size_t umo_total_size; /* Total bytes of allocation */ 3362 int umo_depth; /* Depth of stack trace */ 3363 uintptr_t *umo_stack; /* Stack trace */ 3364 } umowner_t; 3365 3366 typedef struct umusers { 3367 const umem_cache_t *umu_cache; /* Current umem cache */ 3368 umowner_t *umu_hash; /* Hash table of owners */ 3369 uintptr_t *umu_stacks; /* stacks for owners */ 3370 int umu_nelems; /* Number of entries in use */ 3371 int umu_size; /* Total number of entries */ 3372 } umusers_t; 3373 3374 static void 3375 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp, 3376 size_t size, size_t data_size) 3377 { 3378 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3379 size_t bucket, signature = data_size; 3380 umowner_t *umo, *umoend; 3381 3382 /* 3383 * If the hash table is full, double its size and rehash everything. 3384 */ 3385 if (umu->umu_nelems >= umu->umu_size) { 3386 int s = umu->umu_size ? umu->umu_size * 2 : 1024; 3387 size_t umowner_size = sizeof (umowner_t); 3388 size_t trace_size = umem_stack_depth * sizeof (uintptr_t); 3389 uintptr_t *new_stacks; 3390 3391 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC); 3392 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC); 3393 3394 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size); 3395 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size); 3396 umu->umu_hash = umo; 3397 umu->umu_stacks = new_stacks; 3398 umu->umu_size = s; 3399 3400 umoend = umu->umu_hash + umu->umu_size; 3401 for (umo = umu->umu_hash; umo < umoend; umo++) { 3402 umo->umo_head = NULL; 3403 umo->umo_stack = &umu->umu_stacks[ 3404 umem_stack_depth * (umo - umu->umu_hash)]; 3405 } 3406 3407 umoend = umu->umu_hash + umu->umu_nelems; 3408 for (umo = umu->umu_hash; umo < umoend; umo++) { 3409 bucket = umo->umo_signature & (umu->umu_size - 1); 3410 umo->umo_next = umu->umu_hash[bucket].umo_head; 3411 umu->umu_hash[bucket].umo_head = umo; 3412 } 3413 } 3414 3415 /* 3416 * Finish computing the hash signature from the stack trace, and then 3417 * see if the owner is in the hash table. If so, update our stats. 3418 */ 3419 for (i = 0; i < depth; i++) 3420 signature += bcp->bc_stack[i]; 3421 3422 bucket = signature & (umu->umu_size - 1); 3423 3424 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) { 3425 if (umo->umo_signature == signature) { 3426 size_t difference = 0; 3427 3428 difference |= umo->umo_data_size - data_size; 3429 difference |= umo->umo_depth - depth; 3430 3431 for (i = 0; i < depth; i++) { 3432 difference |= umo->umo_stack[i] - 3433 bcp->bc_stack[i]; 3434 } 3435 3436 if (difference == 0) { 3437 umo->umo_total_size += size; 3438 umo->umo_num++; 3439 return; 3440 } 3441 } 3442 } 3443 3444 /* 3445 * If the owner is not yet hashed, grab the next element and fill it 3446 * in based on the allocation information. 3447 */ 3448 umo = &umu->umu_hash[umu->umu_nelems++]; 3449 umo->umo_next = umu->umu_hash[bucket].umo_head; 3450 umu->umu_hash[bucket].umo_head = umo; 3451 3452 umo->umo_signature = signature; 3453 umo->umo_num = 1; 3454 umo->umo_data_size = data_size; 3455 umo->umo_total_size = size; 3456 umo->umo_depth = depth; 3457 3458 for (i = 0; i < depth; i++) 3459 umo->umo_stack[i] = bcp->bc_stack[i]; 3460 } 3461 3462 /* 3463 * When ::umausers is invoked without the -f flag, we simply update our hash 3464 * table with the information from each allocated bufctl. 3465 */ 3466 /*ARGSUSED*/ 3467 static int 3468 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3469 { 3470 const umem_cache_t *cp = umu->umu_cache; 3471 3472 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3473 return (WALK_NEXT); 3474 } 3475 3476 /* 3477 * When ::umausers is invoked with the -f flag, we print out the information 3478 * for each bufctl as well as updating the hash table. 3479 */ 3480 static int 3481 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3482 { 3483 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3484 const umem_cache_t *cp = umu->umu_cache; 3485 3486 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 3487 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 3488 3489 for (i = 0; i < depth; i++) 3490 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3491 3492 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3493 return (WALK_NEXT); 3494 } 3495 3496 /* 3497 * We sort our results by allocation size before printing them. 3498 */ 3499 static int 3500 umownercmp(const void *lp, const void *rp) 3501 { 3502 const umowner_t *lhs = lp; 3503 const umowner_t *rhs = rp; 3504 3505 return (rhs->umo_total_size - lhs->umo_total_size); 3506 } 3507 3508 /* 3509 * The main engine of ::umausers is relatively straightforward: First we 3510 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we 3511 * iterate over the allocated bufctls of each cache in the list. Finally, 3512 * we sort and print our results. 3513 */ 3514 /*ARGSUSED*/ 3515 int 3516 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3517 { 3518 int mem_threshold = 8192; /* Minimum # bytes for printing */ 3519 int cnt_threshold = 100; /* Minimum # blocks for printing */ 3520 int audited_caches = 0; /* Number of UMF_AUDIT caches found */ 3521 int do_all_caches = 1; /* Do all caches (no arguments) */ 3522 int opt_e = FALSE; /* Include "small" users */ 3523 int opt_f = FALSE; /* Print stack traces */ 3524 3525 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1; 3526 umowner_t *umo, *umoend; 3527 int i, oelems; 3528 3529 umclist_t umc; 3530 umusers_t umu; 3531 3532 if (flags & DCMD_ADDRSPEC) 3533 return (DCMD_USAGE); 3534 3535 bzero(&umc, sizeof (umc)); 3536 bzero(&umu, sizeof (umu)); 3537 3538 while ((i = mdb_getopts(argc, argv, 3539 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 3540 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 3541 3542 argv += i; /* skip past options we just processed */ 3543 argc -= i; /* adjust argc */ 3544 3545 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 3546 return (DCMD_USAGE); 3547 3548 oelems = umc.umc_nelems; 3549 umc.umc_name = argv->a_un.a_str; 3550 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3551 3552 if (umc.umc_nelems == oelems) { 3553 mdb_warn("unknown umem cache: %s\n", umc.umc_name); 3554 return (DCMD_ERR); 3555 } 3556 3557 do_all_caches = 0; 3558 argv++; 3559 argc--; 3560 } 3561 3562 if (opt_e) 3563 mem_threshold = cnt_threshold = 0; 3564 3565 if (opt_f) 3566 callback = (mdb_walk_cb_t)umause2; 3567 3568 if (do_all_caches) { 3569 umc.umc_name = NULL; /* match all cache names */ 3570 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3571 } 3572 3573 for (i = 0; i < umc.umc_nelems; i++) { 3574 uintptr_t cp = umc.umc_caches[i]; 3575 umem_cache_t c; 3576 3577 if (mdb_vread(&c, sizeof (c), cp) == -1) { 3578 mdb_warn("failed to read cache at %p", cp); 3579 continue; 3580 } 3581 3582 if (!(c.cache_flags & UMF_AUDIT)) { 3583 if (!do_all_caches) { 3584 mdb_warn("UMF_AUDIT is not enabled for %s\n", 3585 c.cache_name); 3586 } 3587 continue; 3588 } 3589 3590 umu.umu_cache = &c; 3591 (void) mdb_pwalk("bufctl", callback, &umu, cp); 3592 audited_caches++; 3593 } 3594 3595 if (audited_caches == 0 && do_all_caches) { 3596 mdb_warn("UMF_AUDIT is not enabled for any caches\n"); 3597 return (DCMD_ERR); 3598 } 3599 3600 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp); 3601 umoend = umu.umu_hash + umu.umu_nelems; 3602 3603 for (umo = umu.umu_hash; umo < umoend; umo++) { 3604 if (umo->umo_total_size < mem_threshold && 3605 umo->umo_num < cnt_threshold) 3606 continue; 3607 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 3608 umo->umo_total_size, umo->umo_num, umo->umo_data_size); 3609 for (i = 0; i < umo->umo_depth; i++) 3610 mdb_printf("\t %a\n", umo->umo_stack[i]); 3611 } 3612 3613 return (DCMD_OK); 3614 } 3615 3616 struct malloc_data { 3617 uint32_t malloc_size; 3618 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */ 3619 }; 3620 3621 #ifdef _LP64 3622 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data)) 3623 #else 3624 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data)) 3625 #endif 3626 3627 typedef struct umem_malloc_info { 3628 size_t um_total; /* total allocated buffers */ 3629 size_t um_malloc; /* malloc buffers */ 3630 size_t um_malloc_size; /* sum of malloc buffer sizes */ 3631 size_t um_malloc_overhead; /* sum of in-chunk overheads */ 3632 3633 umem_cache_t *um_cp; 3634 3635 uint_t *um_bucket; 3636 } umem_malloc_info_t; 3637 3638 static const int * 3639 dist_linear(int buckets, int beg, int end) 3640 { 3641 int *out = mdb_alloc((buckets + 1) * sizeof (*out), UM_SLEEP | UM_GC); 3642 int pos; 3643 int dist = end - beg + 1; 3644 3645 for (pos = 0; pos < buckets; pos++) 3646 out[pos] = beg + (pos * dist)/buckets; 3647 out[buckets] = end + 1; 3648 3649 return (out); 3650 } 3651 3652 /* 3653 * We want the bins to be a constant ratio: 3654 * 3655 * b_0 = beg; 3656 * b_idx = b_{idx-1} * r; 3657 * b_buckets = end + 1; 3658 * 3659 * That is: 3660 * 3661 * buckets 3662 * beg * r = end 3663 * 3664 * Which reduces to: 3665 * 3666 * buckets ___________________ 3667 * r = -------/ ((end + 1) / beg) 3668 * 3669 * log ((end + 1) / beg) 3670 * log r = --------------------- 3671 * buckets 3672 * 3673 * (log ((end + 1) / beg)) / buckets 3674 * r = e 3675 */ 3676 static const int * 3677 dist_geometric(int buckets, int beg, int end, int minbucketsize) 3678 { 3679 #ifdef _KMDB 3680 return (dist_linear(buckets, beg, end)); 3681 #else 3682 int *out = mdb_alloc((buckets + 1) * sizeof (*out), UM_SLEEP | UM_GC); 3683 3684 extern double log(double); 3685 extern double exp(double); 3686 3687 double r; 3688 double b; 3689 int idx = 0; 3690 int last; 3691 int begzero; 3692 3693 if (minbucketsize == 0) 3694 minbucketsize = 1; 3695 3696 if (buckets == 1) { 3697 out[0] = beg; 3698 out[1] = end + 1; 3699 return (out); 3700 } 3701 3702 begzero = (beg == 0); 3703 if (begzero) 3704 beg = 1; 3705 3706 r = exp(log((double)(end + 1) / beg) / buckets); 3707 3708 /* 3709 * We've now computed r, using the previously derived formula. We 3710 * now need to generate the array of bucket bounds. There are 3711 * two major variables: 3712 * 3713 * b holds b_idx, the current index, as a double. 3714 * last holds the integer which goes into out[idx] 3715 * 3716 * Our job is to transform the smooth function b_idx, defined 3717 * above, into integer-sized buckets, with a specified minimum 3718 * bucket size. Since b_idx is an exponentially growing function, 3719 * any inadequate buckets must be at the beginning. To deal 3720 * with this, we make buckets of minimum size until b catches up 3721 * with last. 3722 * 3723 * A final wrinkle is that beg *can* be zero. We compute r and b 3724 * as if beg was 1, then start last as 0. This can lead to a bit 3725 * of oddness around the 0 bucket, but it's mostly reasonable. 3726 */ 3727 3728 b = last = beg; 3729 if (begzero) 3730 last = 0; 3731 3732 for (idx = 0; idx < buckets; idx++) { 3733 int next; 3734 3735 out[idx] = last; 3736 3737 b *= r; 3738 next = (int)b; 3739 3740 if (next > last + minbucketsize - 1) 3741 last = next; 3742 else 3743 last += minbucketsize; 3744 } 3745 out[buckets] = end + 1; 3746 3747 return (out); 3748 #endif 3749 } 3750 3751 #define NCHARS 50 3752 static void 3753 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc, 3754 size_t maxbuckets, size_t minbucketsize, int geometric) 3755 { 3756 size_t um_malloc; 3757 int minb = -1; 3758 int maxb = -1; 3759 int buckets; 3760 int nbucks; 3761 int i; 3762 int n; 3763 int b; 3764 const char *dist = " Distribution "; 3765 char dashes[NCHARS + 1]; 3766 const int *distarray; 3767 3768 minb = (int)minmalloc; 3769 maxb = (int)maxmalloc; 3770 3771 nbucks = buckets = maxb - minb + 1; 3772 3773 um_malloc = 0; 3774 for (b = minb; b <= maxb; b++) 3775 um_malloc += um_bucket[b]; 3776 if (um_malloc == 0) 3777 um_malloc = 1; /* avoid divide-by-zero */ 3778 3779 if (maxbuckets != 0) 3780 buckets = MIN(buckets, maxbuckets); 3781 3782 if (minbucketsize > 1) { 3783 buckets = MIN(buckets, nbucks/minbucketsize); 3784 if (buckets == 0) { 3785 buckets = 1; 3786 minbucketsize = nbucks; 3787 } 3788 } 3789 3790 3791 if (geometric) 3792 distarray = dist_geometric(buckets, minb, maxb, minbucketsize); 3793 else 3794 distarray = dist_linear(buckets, minb, maxb); 3795 3796 n = (NCHARS - strlen(dist)) / 2; 3797 (void) memset(dashes, '-', n); 3798 dashes[n] = 0; 3799 3800 mdb_printf("%11s %s%s%s %s\n", 3801 "malloc size", dashes, dist, dashes, "count"); 3802 3803 for (i = 0; i < buckets; i++) { 3804 int bb = distarray[i]; 3805 int be = distarray[i+1] - 1; 3806 uint64_t amount = 0; 3807 3808 int nats; 3809 char ats[NCHARS + 1], spaces[NCHARS + 1]; 3810 char range[40]; 3811 3812 for (b = bb; b <= be; b++) 3813 amount += um_bucket[b]; 3814 3815 nats = (NCHARS * amount)/um_malloc; 3816 (void) memset(ats, '@', nats); 3817 ats[nats] = 0; 3818 (void) memset(spaces, ' ', NCHARS - nats); 3819 spaces[NCHARS - nats] = 0; 3820 3821 if (bb == be) 3822 mdb_snprintf(range, sizeof (range), "%d", bb); 3823 else 3824 mdb_snprintf(range, sizeof (range), "%d-%d", bb, be); 3825 mdb_printf("%11s |%s%s %lld\n", range, ats, spaces, amount); 3826 } 3827 mdb_printf("\n"); 3828 } 3829 #undef NCHARS 3830 3831 /* 3832 * A malloc()ed buffer looks like: 3833 * 3834 * <----------- mi.malloc_size ---> 3835 * <----------- cp.cache_bufsize ------------------> 3836 * <----------- cp.cache_chunksize --------------------------------> 3837 * +-------+-----------------------+---------------+---------------+ 3838 * |/tag///| mallocsz |/round-off/////|/debug info////| 3839 * +-------+---------------------------------------+---------------+ 3840 * <-- usable space ------> 3841 * 3842 * mallocsz is the argument to malloc(3C). 3843 * mi.malloc_size is the actual size passed to umem_alloc(), which 3844 * is rounded up to the smallest available cache size, which is 3845 * cache_bufsize. If there is debugging or alignment overhead in 3846 * the cache, that is reflected in a larger cache_chunksize. 3847 * 3848 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes, 3849 * depending upon the ISA's alignment requirements. For 32-bit allocations, 3850 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes, 3851 * the tag has 8 bytes of padding before it. 3852 * 3853 * 32-byte, 64-byte buffers <= 8 bytes: 3854 * +-------+-------+--------- ... 3855 * |/size//|/stat//| mallocsz ... 3856 * +-------+-------+--------- ... 3857 * ^ 3858 * pointer returned from malloc(3C) 3859 * 3860 * 64-byte buffers > 8 bytes: 3861 * +---------------+-------+-------+--------- ... 3862 * |/padding///////|/size//|/stat//| mallocsz ... 3863 * +---------------+-------+-------+--------- ... 3864 * ^ 3865 * pointer returned from malloc(3C) 3866 * 3867 * The "size" field is "malloc_size", which is mallocsz + the padding. 3868 * The "stat" field is derived from malloc_size, and functions as a 3869 * validation that this buffer is actually from malloc(3C). 3870 */ 3871 /*ARGSUSED*/ 3872 static int 3873 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump) 3874 { 3875 struct malloc_data md; 3876 size_t m_addr = addr; 3877 size_t overhead = sizeof (md); 3878 size_t mallocsz; 3879 3880 ump->um_total++; 3881 3882 #ifdef _LP64 3883 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) { 3884 m_addr += overhead; 3885 overhead += sizeof (md); 3886 } 3887 #endif 3888 3889 if (mdb_vread(&md, sizeof (md), m_addr) == -1) { 3890 mdb_warn("unable to read malloc header at %p", m_addr); 3891 return (WALK_NEXT); 3892 } 3893 3894 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) { 3895 case MALLOC_MAGIC: 3896 #ifdef _LP64 3897 case MALLOC_SECOND_MAGIC: 3898 #endif 3899 mallocsz = md.malloc_size - overhead; 3900 3901 ump->um_malloc++; 3902 ump->um_malloc_size += mallocsz; 3903 ump->um_malloc_overhead += overhead; 3904 3905 /* include round-off and debug overhead */ 3906 ump->um_malloc_overhead += 3907 ump->um_cp->cache_chunksize - md.malloc_size; 3908 3909 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET) 3910 ump->um_bucket[mallocsz]++; 3911 3912 break; 3913 default: 3914 break; 3915 } 3916 3917 return (WALK_NEXT); 3918 } 3919 3920 int 3921 get_umem_alloc_sizes(int **out, size_t *out_num) 3922 { 3923 GElf_Sym sym; 3924 3925 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { 3926 mdb_warn("unable to look up umem_alloc_sizes"); 3927 return (-1); 3928 } 3929 3930 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC); 3931 *out_num = sym.st_size / sizeof (int); 3932 3933 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) { 3934 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value); 3935 *out = NULL; 3936 return (-1); 3937 } 3938 3939 return (0); 3940 } 3941 3942 3943 static int 3944 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump) 3945 { 3946 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) 3947 return (WALK_NEXT); 3948 3949 ump->um_cp = cp; 3950 3951 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) == 3952 -1) { 3953 mdb_warn("can't walk 'umem' for cache %p", addr); 3954 return (WALK_ERR); 3955 } 3956 3957 return (WALK_NEXT); 3958 } 3959 3960 void 3961 umem_malloc_dist_help(void) 3962 { 3963 mdb_printf("%s\n", 3964 "report distribution of outstanding malloc()s"); 3965 mdb_dec_indent(2); 3966 mdb_printf("%<b>OPTIONS%</b>\n"); 3967 mdb_inc_indent(2); 3968 mdb_printf("%s", 3969 " -b maxbins\n" 3970 " Use at most maxbins bins for the data\n" 3971 " -B minbinsize\n" 3972 " Make the bins at least minbinsize bytes apart\n" 3973 " -d dump the raw data out, without binning\n" 3974 " -g use geometric binning instead of linear binning\n"); 3975 } 3976 3977 /*ARGSUSED*/ 3978 int 3979 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3980 { 3981 umem_malloc_info_t mi; 3982 uint_t geometric = 0; 3983 uint_t dump = 0; 3984 size_t maxbuckets = 0; 3985 size_t minbucketsize = 0; 3986 3987 size_t minalloc = 0; 3988 size_t maxalloc = UMI_MAX_BUCKET; 3989 3990 if (flags & DCMD_ADDRSPEC) 3991 return (DCMD_USAGE); 3992 3993 if (mdb_getopts(argc, argv, 3994 'd', MDB_OPT_SETBITS, TRUE, &dump, 3995 'g', MDB_OPT_SETBITS, TRUE, &geometric, 3996 'b', MDB_OPT_UINTPTR, &maxbuckets, 3997 'B', MDB_OPT_UINTPTR, &minbucketsize, 3998 0) != argc) 3999 return (DCMD_USAGE); 4000 4001 bzero(&mi, sizeof (mi)); 4002 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4003 UM_SLEEP | UM_GC); 4004 4005 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb, 4006 &mi) == -1) { 4007 mdb_warn("unable to walk 'umem_cache'"); 4008 return (DCMD_ERR); 4009 } 4010 4011 if (dump) { 4012 int i; 4013 for (i = minalloc; i <= maxalloc; i++) 4014 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]); 4015 4016 return (DCMD_OK); 4017 } 4018 4019 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc, 4020 maxbuckets, minbucketsize, geometric); 4021 4022 return (DCMD_OK); 4023 } 4024 4025 void 4026 umem_malloc_info_help(void) 4027 { 4028 mdb_printf("%s\n", 4029 "report information about malloc()s by cache. "); 4030 mdb_dec_indent(2); 4031 mdb_printf("%<b>OPTIONS%</b>\n"); 4032 mdb_inc_indent(2); 4033 mdb_printf("%s", 4034 " -b maxbins\n" 4035 " Use at most maxbins bins for the data\n" 4036 " -B minbinsize\n" 4037 " Make the bins at least minbinsize bytes apart\n" 4038 " -d dump the raw distribution data without binning\n" 4039 #ifndef _KMDB 4040 " -g use geometric binning instead of linear binning\n" 4041 #endif 4042 ""); 4043 } 4044 int 4045 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4046 { 4047 umem_cache_t c; 4048 umem_malloc_info_t mi; 4049 4050 int skip = 0; 4051 4052 size_t maxmalloc; 4053 size_t overhead; 4054 size_t allocated; 4055 size_t avg_malloc; 4056 size_t overhead_pct; /* 1000 * overhead_percent */ 4057 4058 uint_t verbose = 0; 4059 uint_t dump = 0; 4060 uint_t geometric = 0; 4061 size_t maxbuckets = 0; 4062 size_t minbucketsize = 0; 4063 4064 int *alloc_sizes; 4065 int idx; 4066 size_t num; 4067 size_t minmalloc; 4068 4069 if (mdb_getopts(argc, argv, 4070 'd', MDB_OPT_SETBITS, TRUE, &dump, 4071 'g', MDB_OPT_SETBITS, TRUE, &geometric, 4072 'b', MDB_OPT_UINTPTR, &maxbuckets, 4073 'B', MDB_OPT_UINTPTR, &minbucketsize, 4074 0) != argc) 4075 return (DCMD_USAGE); 4076 4077 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0)) 4078 verbose = 1; 4079 4080 if (!(flags & DCMD_ADDRSPEC)) { 4081 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info", 4082 argc, argv) == -1) { 4083 mdb_warn("can't walk umem_cache"); 4084 return (DCMD_ERR); 4085 } 4086 return (DCMD_OK); 4087 } 4088 4089 if (!mdb_vread(&c, sizeof (c), addr)) { 4090 mdb_warn("unable to read cache at %p", addr); 4091 return (DCMD_ERR); 4092 } 4093 4094 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) { 4095 if (!(flags & DCMD_LOOP)) 4096 mdb_warn("umem_malloc_info: cache \"%s\" is not used " 4097 "by malloc()\n", c.cache_name); 4098 skip = 1; 4099 } 4100 4101 /* 4102 * normally, print the header only the first time. In verbose mode, 4103 * print the header on every non-skipped buffer 4104 */ 4105 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip)) 4106 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n", 4107 "CACHE", "BUFSZ", "MAXMAL", 4108 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER"); 4109 4110 if (skip) 4111 return (DCMD_OK); 4112 4113 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data); 4114 #ifdef _LP64 4115 if (c.cache_bufsize > UMEM_SECOND_ALIGN) 4116 maxmalloc -= sizeof (struct malloc_data); 4117 #endif 4118 4119 bzero(&mi, sizeof (mi)); 4120 mi.um_cp = &c; 4121 if (verbose) 4122 mi.um_bucket = 4123 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4124 UM_SLEEP | UM_GC); 4125 4126 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) == 4127 -1) { 4128 mdb_warn("can't walk 'umem'"); 4129 return (DCMD_ERR); 4130 } 4131 4132 overhead = mi.um_malloc_overhead; 4133 allocated = mi.um_malloc_size; 4134 4135 /* do integer round off for the average */ 4136 if (mi.um_malloc != 0) 4137 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc; 4138 else 4139 avg_malloc = 0; 4140 4141 /* 4142 * include per-slab overhead 4143 * 4144 * Each slab in a given cache is the same size, and has the same 4145 * number of chunks in it; we read in the first slab on the 4146 * slab list to get the number of chunks for all slabs. To 4147 * compute the per-slab overhead, we just subtract the chunk usage 4148 * from the slabsize: 4149 * 4150 * +------------+-------+-------+ ... --+-------+-------+-------+ 4151 * |////////////| | | ... | |///////|///////| 4152 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//| 4153 * |////////////| | | ... | |///////|///////| 4154 * +------------+-------+-------+ ... --+-------+-------+-------+ 4155 * | \_______chunksize * chunks_____/ | 4156 * \__________________________slabsize__________________________/ 4157 * 4158 * For UMF_HASH caches, there is an additional source of overhead; 4159 * the external umem_slab_t and per-chunk bufctl structures. We 4160 * include those in our per-slab overhead. 4161 * 4162 * Once we have a number for the per-slab overhead, we estimate 4163 * the actual overhead by treating the malloc()ed buffers as if 4164 * they were densely packed: 4165 * 4166 * additional overhead = (# mallocs) * (per-slab) / (chunks); 4167 * 4168 * carefully ordering the multiply before the divide, to avoid 4169 * round-off error. 4170 */ 4171 if (mi.um_malloc != 0) { 4172 umem_slab_t slab; 4173 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next; 4174 4175 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) { 4176 mdb_warn("unable to read slab at %p\n", saddr); 4177 } else { 4178 long chunks = slab.slab_chunks; 4179 if (chunks != 0 && c.cache_chunksize != 0 && 4180 chunks <= c.cache_slabsize / c.cache_chunksize) { 4181 uintmax_t perslab = 4182 c.cache_slabsize - 4183 (c.cache_chunksize * chunks); 4184 4185 if (c.cache_flags & UMF_HASH) { 4186 perslab += sizeof (umem_slab_t) + 4187 chunks * 4188 ((c.cache_flags & UMF_AUDIT) ? 4189 sizeof (umem_bufctl_audit_t) : 4190 sizeof (umem_bufctl_t)); 4191 } 4192 overhead += 4193 (perslab * (uintmax_t)mi.um_malloc)/chunks; 4194 } else { 4195 mdb_warn("invalid #chunks (%d) in slab %p\n", 4196 chunks, saddr); 4197 } 4198 } 4199 } 4200 4201 if (allocated != 0) 4202 overhead_pct = (1000ULL * overhead) / allocated; 4203 else 4204 overhead_pct = 0; 4205 4206 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n", 4207 addr, c.cache_bufsize, maxmalloc, 4208 mi.um_malloc, avg_malloc, allocated, overhead, 4209 overhead_pct / 10, overhead_pct % 10); 4210 4211 if (!verbose) 4212 return (DCMD_OK); 4213 4214 if (!dump) 4215 mdb_printf("\n"); 4216 4217 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1) 4218 return (DCMD_ERR); 4219 4220 for (idx = 0; idx < num; idx++) { 4221 if (alloc_sizes[idx] == c.cache_bufsize) 4222 break; 4223 if (alloc_sizes[idx] == 0) { 4224 idx = num; /* 0-terminated array */ 4225 break; 4226 } 4227 } 4228 if (idx == num) { 4229 mdb_warn( 4230 "cache %p's size (%d) not in umem_alloc_sizes\n", 4231 addr, c.cache_bufsize); 4232 return (DCMD_ERR); 4233 } 4234 4235 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1]; 4236 if (minmalloc > 0) { 4237 #ifdef _LP64 4238 if (minmalloc > UMEM_SECOND_ALIGN) 4239 minmalloc -= sizeof (struct malloc_data); 4240 #endif 4241 minmalloc -= sizeof (struct malloc_data); 4242 minmalloc += 1; 4243 } 4244 4245 if (dump) { 4246 for (idx = minmalloc; idx <= maxmalloc; idx++) 4247 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]); 4248 mdb_printf("\n"); 4249 } else { 4250 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc, 4251 maxbuckets, minbucketsize, geometric); 4252 } 4253 4254 return (DCMD_OK); 4255 } 4256