1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2011 Joyent, Inc. All rights reserved. 28 * Copyright (c) 2013 by Delphix. All rights reserved. 29 */ 30 31 #include "umem.h" 32 33 #include <sys/vmem_impl_user.h> 34 #include <umem_impl.h> 35 36 #include <alloca.h> 37 #include <limits.h> 38 #include <mdb/mdb_whatis.h> 39 40 #include "misc.h" 41 #include "leaky.h" 42 #include "dist.h" 43 44 #include "umem_pagesize.h" 45 46 #define UM_ALLOCATED 0x1 47 #define UM_FREE 0x2 48 #define UM_BUFCTL 0x4 49 #define UM_HASH 0x8 50 51 int umem_ready; 52 53 static int umem_stack_depth_warned; 54 static uint32_t umem_max_ncpus; 55 uint32_t umem_stack_depth; 56 57 size_t umem_pagesize; 58 59 #define UMEM_READVAR(var) \ 60 (umem_readvar(&(var), #var) == -1 && \ 61 (mdb_warn("failed to read "#var), 1)) 62 63 int 64 umem_update_variables(void) 65 { 66 size_t pagesize; 67 68 /* 69 * Figure out which type of umem is being used; if it's not there 70 * yet, succeed quietly. 71 */ 72 if (umem_set_standalone() == -1) { 73 umem_ready = 0; 74 return (0); /* umem not there yet */ 75 } 76 77 /* 78 * Solaris 9 used a different name for umem_max_ncpus. It's 79 * cheap backwards compatibility to check for both names. 80 */ 81 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 && 82 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) { 83 mdb_warn("unable to read umem_max_ncpus or max_ncpus"); 84 return (-1); 85 } 86 if (UMEM_READVAR(umem_ready)) 87 return (-1); 88 if (UMEM_READVAR(umem_stack_depth)) 89 return (-1); 90 if (UMEM_READVAR(pagesize)) 91 return (-1); 92 93 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) { 94 if (umem_stack_depth_warned == 0) { 95 mdb_warn("umem_stack_depth corrupted (%d > %d)\n", 96 umem_stack_depth, UMEM_MAX_STACK_DEPTH); 97 umem_stack_depth_warned = 1; 98 } 99 umem_stack_depth = 0; 100 } 101 102 umem_pagesize = pagesize; 103 104 return (0); 105 } 106 107 /*ARGSUSED*/ 108 static int 109 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored) 110 { 111 mdb_walker_t w; 112 char descr[64]; 113 114 (void) mdb_snprintf(descr, sizeof (descr), 115 "walk the %s cache", c->cache_name); 116 117 w.walk_name = c->cache_name; 118 w.walk_descr = descr; 119 w.walk_init = umem_walk_init; 120 w.walk_step = umem_walk_step; 121 w.walk_fini = umem_walk_fini; 122 w.walk_init_arg = (void *)addr; 123 124 if (mdb_add_walker(&w) == -1) 125 mdb_warn("failed to add %s walker", c->cache_name); 126 127 return (WALK_NEXT); 128 } 129 130 /*ARGSUSED*/ 131 static void 132 umem_statechange_cb(void *arg) 133 { 134 static int been_ready = 0; 135 136 #ifndef _KMDB 137 leaky_cleanup(1); /* state changes invalidate leaky state */ 138 #endif 139 140 if (umem_update_variables() == -1) 141 return; 142 143 if (been_ready) 144 return; 145 146 if (umem_ready != UMEM_READY) 147 return; 148 149 been_ready = 1; 150 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL); 151 } 152 153 int 154 umem_abort_messages(void) 155 { 156 char *umem_error_buffer; 157 uint_t umem_error_begin; 158 GElf_Sym sym; 159 size_t bufsize; 160 161 if (UMEM_READVAR(umem_error_begin)) 162 return (DCMD_ERR); 163 164 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) { 165 mdb_warn("unable to look up umem_error_buffer"); 166 return (DCMD_ERR); 167 } 168 169 bufsize = (size_t)sym.st_size; 170 171 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC); 172 173 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value) 174 != bufsize) { 175 mdb_warn("unable to read umem_error_buffer"); 176 return (DCMD_ERR); 177 } 178 /* put a zero after the end of the buffer to simplify printing */ 179 umem_error_buffer[bufsize] = 0; 180 181 if ((umem_error_begin % bufsize) == 0) 182 mdb_printf("%s\n", umem_error_buffer); 183 else { 184 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0; 185 mdb_printf("%s%s\n", 186 &umem_error_buffer[umem_error_begin % bufsize], 187 umem_error_buffer); 188 } 189 190 return (DCMD_OK); 191 } 192 193 static void 194 umem_log_status(const char *name, umem_log_header_t *val) 195 { 196 umem_log_header_t my_lh; 197 uintptr_t pos = (uintptr_t)val; 198 size_t size; 199 200 if (pos == NULL) 201 return; 202 203 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) { 204 mdb_warn("\nunable to read umem_%s_log pointer %p", 205 name, pos); 206 return; 207 } 208 209 size = my_lh.lh_chunksize * my_lh.lh_nchunks; 210 211 if (size % (1024 * 1024) == 0) 212 mdb_printf("%s=%dm ", name, size / (1024 * 1024)); 213 else if (size % 1024 == 0) 214 mdb_printf("%s=%dk ", name, size / 1024); 215 else 216 mdb_printf("%s=%d ", name, size); 217 } 218 219 typedef struct umem_debug_flags { 220 const char *udf_name; 221 uint_t udf_flags; 222 uint_t udf_clear; /* if 0, uses udf_flags */ 223 } umem_debug_flags_t; 224 225 umem_debug_flags_t umem_status_flags[] = { 226 { "random", UMF_RANDOMIZE, UMF_RANDOM }, 227 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS }, 228 { "audit", UMF_AUDIT }, 229 { "guards", UMF_DEADBEEF | UMF_REDZONE }, 230 { "nosignal", UMF_CHECKSIGNAL }, 231 { "firewall", UMF_FIREWALL }, 232 { "lite", UMF_LITE }, 233 { NULL } 234 }; 235 236 /*ARGSUSED*/ 237 int 238 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 239 { 240 int umem_logging; 241 242 umem_log_header_t *umem_transaction_log; 243 umem_log_header_t *umem_content_log; 244 umem_log_header_t *umem_failure_log; 245 umem_log_header_t *umem_slab_log; 246 247 mdb_printf("Status:\t\t%s\n", 248 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" : 249 umem_ready == UMEM_READY_STARTUP ? "uninitialized" : 250 umem_ready == UMEM_READY_INITING ? "initialization in process" : 251 umem_ready == UMEM_READY ? "ready and active" : 252 umem_ready == 0 ? "not loaded into address space" : 253 "unknown (umem_ready invalid)"); 254 255 if (umem_ready == 0) 256 return (DCMD_OK); 257 258 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus); 259 260 if (UMEM_READVAR(umem_logging)) 261 goto err; 262 if (UMEM_READVAR(umem_transaction_log)) 263 goto err; 264 if (UMEM_READVAR(umem_content_log)) 265 goto err; 266 if (UMEM_READVAR(umem_failure_log)) 267 goto err; 268 if (UMEM_READVAR(umem_slab_log)) 269 goto err; 270 271 mdb_printf("Logs:\t\t"); 272 umem_log_status("transaction", umem_transaction_log); 273 umem_log_status("content", umem_content_log); 274 umem_log_status("fail", umem_failure_log); 275 umem_log_status("slab", umem_slab_log); 276 if (!umem_logging) 277 mdb_printf("(inactive)"); 278 mdb_printf("\n"); 279 280 mdb_printf("Message buffer:\n"); 281 return (umem_abort_messages()); 282 283 err: 284 mdb_printf("Message buffer:\n"); 285 (void) umem_abort_messages(); 286 return (DCMD_ERR); 287 } 288 289 typedef struct { 290 uintptr_t ucw_first; 291 uintptr_t ucw_current; 292 } umem_cache_walk_t; 293 294 int 295 umem_cache_walk_init(mdb_walk_state_t *wsp) 296 { 297 umem_cache_walk_t *ucw; 298 umem_cache_t c; 299 uintptr_t cp; 300 GElf_Sym sym; 301 302 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) { 303 mdb_warn("couldn't find umem_null_cache"); 304 return (WALK_ERR); 305 } 306 307 cp = (uintptr_t)sym.st_value; 308 309 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) { 310 mdb_warn("couldn't read cache at %p", cp); 311 return (WALK_ERR); 312 } 313 314 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP); 315 316 ucw->ucw_first = cp; 317 ucw->ucw_current = (uintptr_t)c.cache_next; 318 wsp->walk_data = ucw; 319 320 return (WALK_NEXT); 321 } 322 323 int 324 umem_cache_walk_step(mdb_walk_state_t *wsp) 325 { 326 umem_cache_walk_t *ucw = wsp->walk_data; 327 umem_cache_t c; 328 int status; 329 330 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) { 331 mdb_warn("couldn't read cache at %p", ucw->ucw_current); 332 return (WALK_DONE); 333 } 334 335 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata); 336 337 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first) 338 return (WALK_DONE); 339 340 return (status); 341 } 342 343 void 344 umem_cache_walk_fini(mdb_walk_state_t *wsp) 345 { 346 umem_cache_walk_t *ucw = wsp->walk_data; 347 mdb_free(ucw, sizeof (umem_cache_walk_t)); 348 } 349 350 typedef struct { 351 umem_cpu_t *ucw_cpus; 352 uint32_t ucw_current; 353 uint32_t ucw_max; 354 } umem_cpu_walk_state_t; 355 356 int 357 umem_cpu_walk_init(mdb_walk_state_t *wsp) 358 { 359 umem_cpu_t *umem_cpus; 360 361 umem_cpu_walk_state_t *ucw; 362 363 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) { 364 mdb_warn("failed to read 'umem_cpus'"); 365 return (WALK_ERR); 366 } 367 368 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP); 369 370 ucw->ucw_cpus = umem_cpus; 371 ucw->ucw_current = 0; 372 ucw->ucw_max = umem_max_ncpus; 373 374 wsp->walk_data = ucw; 375 return (WALK_NEXT); 376 } 377 378 int 379 umem_cpu_walk_step(mdb_walk_state_t *wsp) 380 { 381 umem_cpu_t cpu; 382 umem_cpu_walk_state_t *ucw = wsp->walk_data; 383 384 uintptr_t caddr; 385 386 if (ucw->ucw_current >= ucw->ucw_max) 387 return (WALK_DONE); 388 389 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]); 390 391 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) { 392 mdb_warn("failed to read cpu %d", ucw->ucw_current); 393 return (WALK_ERR); 394 } 395 396 ucw->ucw_current++; 397 398 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata)); 399 } 400 401 void 402 umem_cpu_walk_fini(mdb_walk_state_t *wsp) 403 { 404 umem_cpu_walk_state_t *ucw = wsp->walk_data; 405 406 mdb_free(ucw, sizeof (*ucw)); 407 } 408 409 int 410 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 411 { 412 if (wsp->walk_addr == NULL) { 413 mdb_warn("umem_cpu_cache doesn't support global walks"); 414 return (WALK_ERR); 415 } 416 417 if (mdb_layered_walk("umem_cpu", wsp) == -1) { 418 mdb_warn("couldn't walk 'umem_cpu'"); 419 return (WALK_ERR); 420 } 421 422 wsp->walk_data = (void *)wsp->walk_addr; 423 424 return (WALK_NEXT); 425 } 426 427 int 428 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 429 { 430 uintptr_t caddr = (uintptr_t)wsp->walk_data; 431 const umem_cpu_t *cpu = wsp->walk_layer; 432 umem_cpu_cache_t cc; 433 434 caddr += cpu->cpu_cache_offset; 435 436 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) { 437 mdb_warn("couldn't read umem_cpu_cache at %p", caddr); 438 return (WALK_ERR); 439 } 440 441 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 442 } 443 444 int 445 umem_slab_walk_init(mdb_walk_state_t *wsp) 446 { 447 uintptr_t caddr = wsp->walk_addr; 448 umem_cache_t c; 449 450 if (caddr == NULL) { 451 mdb_warn("umem_slab doesn't support global walks\n"); 452 return (WALK_ERR); 453 } 454 455 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 456 mdb_warn("couldn't read umem_cache at %p", caddr); 457 return (WALK_ERR); 458 } 459 460 wsp->walk_data = 461 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 462 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; 463 464 return (WALK_NEXT); 465 } 466 467 int 468 umem_slab_walk_partial_init(mdb_walk_state_t *wsp) 469 { 470 uintptr_t caddr = wsp->walk_addr; 471 umem_cache_t c; 472 473 if (caddr == NULL) { 474 mdb_warn("umem_slab_partial doesn't support global walks\n"); 475 return (WALK_ERR); 476 } 477 478 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 479 mdb_warn("couldn't read umem_cache at %p", caddr); 480 return (WALK_ERR); 481 } 482 483 wsp->walk_data = 484 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); 485 wsp->walk_addr = (uintptr_t)c.cache_freelist; 486 487 /* 488 * Some consumers (umem_walk_step(), in particular) require at 489 * least one callback if there are any buffers in the cache. So 490 * if there are *no* partial slabs, report the last full slab, if 491 * any. 492 * 493 * Yes, this is ugly, but it's cleaner than the other possibilities. 494 */ 495 if ((uintptr_t)wsp->walk_data == wsp->walk_addr) 496 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; 497 498 return (WALK_NEXT); 499 } 500 501 int 502 umem_slab_walk_step(mdb_walk_state_t *wsp) 503 { 504 umem_slab_t s; 505 uintptr_t addr = wsp->walk_addr; 506 uintptr_t saddr = (uintptr_t)wsp->walk_data; 507 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab); 508 509 if (addr == saddr) 510 return (WALK_DONE); 511 512 if (mdb_vread(&s, sizeof (s), addr) == -1) { 513 mdb_warn("failed to read slab at %p", wsp->walk_addr); 514 return (WALK_ERR); 515 } 516 517 if ((uintptr_t)s.slab_cache != caddr) { 518 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 519 addr, caddr, s.slab_cache); 520 return (WALK_ERR); 521 } 522 523 wsp->walk_addr = (uintptr_t)s.slab_next; 524 525 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); 526 } 527 528 int 529 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 530 { 531 umem_cache_t c; 532 533 if (!(flags & DCMD_ADDRSPEC)) { 534 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) { 535 mdb_warn("can't walk umem_cache"); 536 return (DCMD_ERR); 537 } 538 return (DCMD_OK); 539 } 540 541 if (DCMD_HDRSPEC(flags)) 542 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME", 543 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 544 545 if (mdb_vread(&c, sizeof (c), addr) == -1) { 546 mdb_warn("couldn't read umem_cache at %p", addr); 547 return (DCMD_ERR); 548 } 549 550 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name, 551 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 552 553 return (DCMD_OK); 554 } 555 556 static int 557 addrcmp(const void *lhs, const void *rhs) 558 { 559 uintptr_t p1 = *((uintptr_t *)lhs); 560 uintptr_t p2 = *((uintptr_t *)rhs); 561 562 if (p1 < p2) 563 return (-1); 564 if (p1 > p2) 565 return (1); 566 return (0); 567 } 568 569 static int 570 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs) 571 { 572 const umem_bufctl_audit_t *bcp1 = *lhs; 573 const umem_bufctl_audit_t *bcp2 = *rhs; 574 575 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 576 return (-1); 577 578 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 579 return (1); 580 581 return (0); 582 } 583 584 typedef struct umem_hash_walk { 585 uintptr_t *umhw_table; 586 size_t umhw_nelems; 587 size_t umhw_pos; 588 umem_bufctl_t umhw_cur; 589 } umem_hash_walk_t; 590 591 int 592 umem_hash_walk_init(mdb_walk_state_t *wsp) 593 { 594 umem_hash_walk_t *umhw; 595 uintptr_t *hash; 596 umem_cache_t c; 597 uintptr_t haddr, addr = wsp->walk_addr; 598 size_t nelems; 599 size_t hsize; 600 601 if (addr == NULL) { 602 mdb_warn("umem_hash doesn't support global walks\n"); 603 return (WALK_ERR); 604 } 605 606 if (mdb_vread(&c, sizeof (c), addr) == -1) { 607 mdb_warn("couldn't read cache at addr %p", addr); 608 return (WALK_ERR); 609 } 610 611 if (!(c.cache_flags & UMF_HASH)) { 612 mdb_warn("cache %p doesn't have a hash table\n", addr); 613 return (WALK_DONE); /* nothing to do */ 614 } 615 616 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP); 617 umhw->umhw_cur.bc_next = NULL; 618 umhw->umhw_pos = 0; 619 620 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1; 621 hsize = nelems * sizeof (uintptr_t); 622 haddr = (uintptr_t)c.cache_hash_table; 623 624 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 625 if (mdb_vread(hash, hsize, haddr) == -1) { 626 mdb_warn("failed to read hash table at %p", haddr); 627 mdb_free(hash, hsize); 628 mdb_free(umhw, sizeof (umem_hash_walk_t)); 629 return (WALK_ERR); 630 } 631 632 wsp->walk_data = umhw; 633 634 return (WALK_NEXT); 635 } 636 637 int 638 umem_hash_walk_step(mdb_walk_state_t *wsp) 639 { 640 umem_hash_walk_t *umhw = wsp->walk_data; 641 uintptr_t addr = NULL; 642 643 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) { 644 while (umhw->umhw_pos < umhw->umhw_nelems) { 645 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL) 646 break; 647 } 648 } 649 if (addr == NULL) 650 return (WALK_DONE); 651 652 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) { 653 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr); 654 return (WALK_ERR); 655 } 656 657 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata)); 658 } 659 660 void 661 umem_hash_walk_fini(mdb_walk_state_t *wsp) 662 { 663 umem_hash_walk_t *umhw = wsp->walk_data; 664 665 if (umhw == NULL) 666 return; 667 668 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t)); 669 mdb_free(umhw, sizeof (umem_hash_walk_t)); 670 } 671 672 /* 673 * Find the address of the bufctl structure for the address 'buf' in cache 674 * 'cp', which is at address caddr, and place it in *out. 675 */ 676 static int 677 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 678 { 679 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf); 680 umem_bufctl_t *bcp; 681 umem_bufctl_t bc; 682 683 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) { 684 mdb_warn("unable to read hash bucket for %p in cache %p", 685 buf, caddr); 686 return (-1); 687 } 688 689 while (bcp != NULL) { 690 if (mdb_vread(&bc, sizeof (umem_bufctl_t), 691 (uintptr_t)bcp) == -1) { 692 mdb_warn("unable to read bufctl at %p", bcp); 693 return (-1); 694 } 695 if (bc.bc_addr == buf) { 696 *out = (uintptr_t)bcp; 697 return (0); 698 } 699 bcp = bc.bc_next; 700 } 701 702 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 703 return (-1); 704 } 705 706 int 707 umem_get_magsize(const umem_cache_t *cp) 708 { 709 uintptr_t addr = (uintptr_t)cp->cache_magtype; 710 GElf_Sym mt_sym; 711 umem_magtype_t mt; 712 int res; 713 714 /* 715 * if cpu 0 has a non-zero magsize, it must be correct. caches 716 * with UMF_NOMAGAZINE have disabled their magazine layers, so 717 * it is okay to return 0 for them. 718 */ 719 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 720 (cp->cache_flags & UMF_NOMAGAZINE)) 721 return (res); 722 723 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) { 724 mdb_warn("unable to read 'umem_magtype'"); 725 } else if (addr < mt_sym.st_value || 726 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 727 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 728 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 729 cp->cache_name, addr); 730 return (0); 731 } 732 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 733 mdb_warn("unable to read magtype at %a", addr); 734 return (0); 735 } 736 return (mt.mt_magsize); 737 } 738 739 /*ARGSUSED*/ 740 static int 741 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est) 742 { 743 *est -= (sp->slab_chunks - sp->slab_refcnt); 744 745 return (WALK_NEXT); 746 } 747 748 /* 749 * Returns an upper bound on the number of allocated buffers in a given 750 * cache. 751 */ 752 size_t 753 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) 754 { 755 int magsize; 756 size_t cache_est; 757 758 cache_est = cp->cache_buftotal; 759 760 (void) mdb_pwalk("umem_slab_partial", 761 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr); 762 763 if ((magsize = umem_get_magsize(cp)) != 0) { 764 size_t mag_est = cp->cache_full.ml_total * magsize; 765 766 if (cache_est >= mag_est) { 767 cache_est -= mag_est; 768 } else { 769 mdb_warn("cache %p's magazine layer holds more buffers " 770 "than the slab layer.\n", addr); 771 } 772 } 773 return (cache_est); 774 } 775 776 #define READMAG_ROUNDS(rounds) { \ 777 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \ 778 mdb_warn("couldn't read magazine at %p", ump); \ 779 goto fail; \ 780 } \ 781 for (i = 0; i < rounds; i++) { \ 782 maglist[magcnt++] = mp->mag_round[i]; \ 783 if (magcnt == magmax) { \ 784 mdb_warn("%d magazines exceeds fudge factor\n", \ 785 magcnt); \ 786 goto fail; \ 787 } \ 788 } \ 789 } 790 791 int 792 umem_read_magazines(umem_cache_t *cp, uintptr_t addr, 793 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 794 { 795 umem_magazine_t *ump, *mp; 796 void **maglist = NULL; 797 int i, cpu; 798 size_t magsize, magmax, magbsize; 799 size_t magcnt = 0; 800 801 /* 802 * Read the magtype out of the cache, after verifying the pointer's 803 * correctness. 804 */ 805 magsize = umem_get_magsize(cp); 806 if (magsize == 0) { 807 *maglistp = NULL; 808 *magcntp = 0; 809 *magmaxp = 0; 810 return (WALK_NEXT); 811 } 812 813 /* 814 * There are several places where we need to go buffer hunting: 815 * the per-CPU loaded magazine, the per-CPU spare full magazine, 816 * and the full magazine list in the depot. 817 * 818 * For an upper bound on the number of buffers in the magazine 819 * layer, we have the number of magazines on the cache_full 820 * list plus at most two magazines per CPU (the loaded and the 821 * spare). Toss in 100 magazines as a fudge factor in case this 822 * is live (the number "100" comes from the same fudge factor in 823 * crash(1M)). 824 */ 825 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize; 826 magbsize = offsetof(umem_magazine_t, mag_round[magsize]); 827 828 if (magbsize >= PAGESIZE / 2) { 829 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 830 addr, magbsize); 831 return (WALK_ERR); 832 } 833 834 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 835 mp = mdb_alloc(magbsize, alloc_flags); 836 if (mp == NULL || maglist == NULL) 837 goto fail; 838 839 /* 840 * First up: the magazines in the depot (i.e. on the cache_full list). 841 */ 842 for (ump = cp->cache_full.ml_list; ump != NULL; ) { 843 READMAG_ROUNDS(magsize); 844 ump = mp->mag_next; 845 846 if (ump == cp->cache_full.ml_list) 847 break; /* cache_full list loop detected */ 848 } 849 850 dprintf(("cache_full list done\n")); 851 852 /* 853 * Now whip through the CPUs, snagging the loaded magazines 854 * and full spares. 855 */ 856 for (cpu = 0; cpu < umem_max_ncpus; cpu++) { 857 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 858 859 dprintf(("reading cpu cache %p\n", 860 (uintptr_t)ccp - (uintptr_t)cp + addr)); 861 862 if (ccp->cc_rounds > 0 && 863 (ump = ccp->cc_loaded) != NULL) { 864 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 865 READMAG_ROUNDS(ccp->cc_rounds); 866 } 867 868 if (ccp->cc_prounds > 0 && 869 (ump = ccp->cc_ploaded) != NULL) { 870 dprintf(("reading %d previously loaded rounds\n", 871 ccp->cc_prounds)); 872 READMAG_ROUNDS(ccp->cc_prounds); 873 } 874 } 875 876 dprintf(("magazine layer: %d buffers\n", magcnt)); 877 878 if (!(alloc_flags & UM_GC)) 879 mdb_free(mp, magbsize); 880 881 *maglistp = maglist; 882 *magcntp = magcnt; 883 *magmaxp = magmax; 884 885 return (WALK_NEXT); 886 887 fail: 888 if (!(alloc_flags & UM_GC)) { 889 if (mp) 890 mdb_free(mp, magbsize); 891 if (maglist) 892 mdb_free(maglist, magmax * sizeof (void *)); 893 } 894 return (WALK_ERR); 895 } 896 897 static int 898 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 899 { 900 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 901 } 902 903 static int 904 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 905 { 906 umem_bufctl_audit_t *b; 907 UMEM_LOCAL_BUFCTL_AUDIT(&b); 908 909 /* 910 * if UMF_AUDIT is not set, we know that we're looking at a 911 * umem_bufctl_t. 912 */ 913 if (!(cp->cache_flags & UMF_AUDIT) || 914 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) { 915 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE); 916 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) { 917 mdb_warn("unable to read bufctl at %p", buf); 918 return (WALK_ERR); 919 } 920 } 921 922 return (wsp->walk_callback(buf, b, wsp->walk_cbdata)); 923 } 924 925 typedef struct umem_walk { 926 int umw_type; 927 928 uintptr_t umw_addr; /* cache address */ 929 umem_cache_t *umw_cp; 930 size_t umw_csize; 931 932 /* 933 * magazine layer 934 */ 935 void **umw_maglist; 936 size_t umw_max; 937 size_t umw_count; 938 size_t umw_pos; 939 940 /* 941 * slab layer 942 */ 943 char *umw_valid; /* to keep track of freed buffers */ 944 char *umw_ubase; /* buffer for slab data */ 945 } umem_walk_t; 946 947 static int 948 umem_walk_init_common(mdb_walk_state_t *wsp, int type) 949 { 950 umem_walk_t *umw; 951 int csize; 952 umem_cache_t *cp; 953 size_t vm_quantum; 954 955 size_t magmax, magcnt; 956 void **maglist = NULL; 957 uint_t chunksize, slabsize; 958 int status = WALK_ERR; 959 uintptr_t addr = wsp->walk_addr; 960 const char *layered; 961 962 type &= ~UM_HASH; 963 964 if (addr == NULL) { 965 mdb_warn("umem walk doesn't support global walks\n"); 966 return (WALK_ERR); 967 } 968 969 dprintf(("walking %p\n", addr)); 970 971 /* 972 * The number of "cpus" determines how large the cache is. 973 */ 974 csize = UMEM_CACHE_SIZE(umem_max_ncpus); 975 cp = mdb_alloc(csize, UM_SLEEP); 976 977 if (mdb_vread(cp, csize, addr) == -1) { 978 mdb_warn("couldn't read cache at addr %p", addr); 979 goto out2; 980 } 981 982 /* 983 * It's easy for someone to hand us an invalid cache address. 984 * Unfortunately, it is hard for this walker to survive an 985 * invalid cache cleanly. So we make sure that: 986 * 987 * 1. the vmem arena for the cache is readable, 988 * 2. the vmem arena's quantum is a power of 2, 989 * 3. our slabsize is a multiple of the quantum, and 990 * 4. our chunksize is >0 and less than our slabsize. 991 */ 992 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 993 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 994 vm_quantum == 0 || 995 (vm_quantum & (vm_quantum - 1)) != 0 || 996 cp->cache_slabsize < vm_quantum || 997 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 998 cp->cache_chunksize == 0 || 999 cp->cache_chunksize > cp->cache_slabsize) { 1000 mdb_warn("%p is not a valid umem_cache_t\n", addr); 1001 goto out2; 1002 } 1003 1004 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1005 1006 if (cp->cache_buftotal == 0) { 1007 mdb_free(cp, csize); 1008 return (WALK_DONE); 1009 } 1010 1011 /* 1012 * If they ask for bufctls, but it's a small-slab cache, 1013 * there is nothing to report. 1014 */ 1015 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) { 1016 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n", 1017 cp->cache_flags)); 1018 mdb_free(cp, csize); 1019 return (WALK_DONE); 1020 } 1021 1022 /* 1023 * Read in the contents of the magazine layer 1024 */ 1025 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax, 1026 UM_SLEEP) == WALK_ERR) 1027 goto out2; 1028 1029 /* 1030 * We have all of the buffers from the magazines; if we are walking 1031 * allocated buffers, sort them so we can bsearch them later. 1032 */ 1033 if (type & UM_ALLOCATED) 1034 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1035 1036 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP); 1037 1038 umw->umw_type = type; 1039 umw->umw_addr = addr; 1040 umw->umw_cp = cp; 1041 umw->umw_csize = csize; 1042 umw->umw_maglist = maglist; 1043 umw->umw_max = magmax; 1044 umw->umw_count = magcnt; 1045 umw->umw_pos = 0; 1046 1047 /* 1048 * When walking allocated buffers in a UMF_HASH cache, we walk the 1049 * hash table instead of the slab layer. 1050 */ 1051 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) { 1052 layered = "umem_hash"; 1053 1054 umw->umw_type |= UM_HASH; 1055 } else { 1056 /* 1057 * If we are walking freed buffers, we only need the 1058 * magazine layer plus the partially allocated slabs. 1059 * To walk allocated buffers, we need all of the slabs. 1060 */ 1061 if (type & UM_ALLOCATED) 1062 layered = "umem_slab"; 1063 else 1064 layered = "umem_slab_partial"; 1065 1066 /* 1067 * for small-slab caches, we read in the entire slab. For 1068 * freed buffers, we can just walk the freelist. For 1069 * allocated buffers, we use a 'valid' array to track 1070 * the freed buffers. 1071 */ 1072 if (!(cp->cache_flags & UMF_HASH)) { 1073 chunksize = cp->cache_chunksize; 1074 slabsize = cp->cache_slabsize; 1075 1076 umw->umw_ubase = mdb_alloc(slabsize + 1077 sizeof (umem_bufctl_t), UM_SLEEP); 1078 1079 if (type & UM_ALLOCATED) 1080 umw->umw_valid = 1081 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1082 } 1083 } 1084 1085 status = WALK_NEXT; 1086 1087 if (mdb_layered_walk(layered, wsp) == -1) { 1088 mdb_warn("unable to start layered '%s' walk", layered); 1089 status = WALK_ERR; 1090 } 1091 1092 out1: 1093 if (status == WALK_ERR) { 1094 if (umw->umw_valid) 1095 mdb_free(umw->umw_valid, slabsize / chunksize); 1096 1097 if (umw->umw_ubase) 1098 mdb_free(umw->umw_ubase, slabsize + 1099 sizeof (umem_bufctl_t)); 1100 1101 if (umw->umw_maglist) 1102 mdb_free(umw->umw_maglist, umw->umw_max * 1103 sizeof (uintptr_t)); 1104 1105 mdb_free(umw, sizeof (umem_walk_t)); 1106 wsp->walk_data = NULL; 1107 } 1108 1109 out2: 1110 if (status == WALK_ERR) 1111 mdb_free(cp, csize); 1112 1113 return (status); 1114 } 1115 1116 int 1117 umem_walk_step(mdb_walk_state_t *wsp) 1118 { 1119 umem_walk_t *umw = wsp->walk_data; 1120 int type = umw->umw_type; 1121 umem_cache_t *cp = umw->umw_cp; 1122 1123 void **maglist = umw->umw_maglist; 1124 int magcnt = umw->umw_count; 1125 1126 uintptr_t chunksize, slabsize; 1127 uintptr_t addr; 1128 const umem_slab_t *sp; 1129 const umem_bufctl_t *bcp; 1130 umem_bufctl_t bc; 1131 1132 int chunks; 1133 char *kbase; 1134 void *buf; 1135 int i, ret; 1136 1137 char *valid, *ubase; 1138 1139 /* 1140 * first, handle the 'umem_hash' layered walk case 1141 */ 1142 if (type & UM_HASH) { 1143 /* 1144 * We have a buffer which has been allocated out of the 1145 * global layer. We need to make sure that it's not 1146 * actually sitting in a magazine before we report it as 1147 * an allocated buffer. 1148 */ 1149 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr; 1150 1151 if (magcnt > 0 && 1152 bsearch(&buf, maglist, magcnt, sizeof (void *), 1153 addrcmp) != NULL) 1154 return (WALK_NEXT); 1155 1156 if (type & UM_BUFCTL) 1157 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1158 1159 return (umem_walk_callback(wsp, (uintptr_t)buf)); 1160 } 1161 1162 ret = WALK_NEXT; 1163 1164 addr = umw->umw_addr; 1165 1166 /* 1167 * If we're walking freed buffers, report everything in the 1168 * magazine layer before processing the first slab. 1169 */ 1170 if ((type & UM_FREE) && magcnt != 0) { 1171 umw->umw_count = 0; /* only do this once */ 1172 for (i = 0; i < magcnt; i++) { 1173 buf = maglist[i]; 1174 1175 if (type & UM_BUFCTL) { 1176 uintptr_t out; 1177 1178 if (cp->cache_flags & UMF_BUFTAG) { 1179 umem_buftag_t *btp; 1180 umem_buftag_t tag; 1181 1182 /* LINTED - alignment */ 1183 btp = UMEM_BUFTAG(cp, buf); 1184 if (mdb_vread(&tag, sizeof (tag), 1185 (uintptr_t)btp) == -1) { 1186 mdb_warn("reading buftag for " 1187 "%p at %p", buf, btp); 1188 continue; 1189 } 1190 out = (uintptr_t)tag.bt_bufctl; 1191 } else { 1192 if (umem_hash_lookup(cp, addr, buf, 1193 &out) == -1) 1194 continue; 1195 } 1196 ret = bufctl_walk_callback(cp, wsp, out); 1197 } else { 1198 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1199 } 1200 1201 if (ret != WALK_NEXT) 1202 return (ret); 1203 } 1204 } 1205 1206 /* 1207 * Handle the buffers in the current slab 1208 */ 1209 chunksize = cp->cache_chunksize; 1210 slabsize = cp->cache_slabsize; 1211 1212 sp = wsp->walk_layer; 1213 chunks = sp->slab_chunks; 1214 kbase = sp->slab_base; 1215 1216 dprintf(("kbase is %p\n", kbase)); 1217 1218 if (!(cp->cache_flags & UMF_HASH)) { 1219 valid = umw->umw_valid; 1220 ubase = umw->umw_ubase; 1221 1222 if (mdb_vread(ubase, chunks * chunksize, 1223 (uintptr_t)kbase) == -1) { 1224 mdb_warn("failed to read slab contents at %p", kbase); 1225 return (WALK_ERR); 1226 } 1227 1228 /* 1229 * Set up the valid map as fully allocated -- we'll punch 1230 * out the freelist. 1231 */ 1232 if (type & UM_ALLOCATED) 1233 (void) memset(valid, 1, chunks); 1234 } else { 1235 valid = NULL; 1236 ubase = NULL; 1237 } 1238 1239 /* 1240 * walk the slab's freelist 1241 */ 1242 bcp = sp->slab_head; 1243 1244 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1245 1246 /* 1247 * since we could be in the middle of allocating a buffer, 1248 * our refcnt could be one higher than it aught. So we 1249 * check one further on the freelist than the count allows. 1250 */ 1251 for (i = sp->slab_refcnt; i <= chunks; i++) { 1252 uint_t ndx; 1253 1254 dprintf(("bcp is %p\n", bcp)); 1255 1256 if (bcp == NULL) { 1257 if (i == chunks) 1258 break; 1259 mdb_warn( 1260 "slab %p in cache %p freelist too short by %d\n", 1261 sp, addr, chunks - i); 1262 break; 1263 } 1264 1265 if (cp->cache_flags & UMF_HASH) { 1266 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1267 mdb_warn("failed to read bufctl ptr at %p", 1268 bcp); 1269 break; 1270 } 1271 buf = bc.bc_addr; 1272 } else { 1273 /* 1274 * Otherwise the buffer is (or should be) in the slab 1275 * that we've read in; determine its offset in the 1276 * slab, validate that it's not corrupt, and add to 1277 * our base address to find the umem_bufctl_t. (Note 1278 * that we don't need to add the size of the bufctl 1279 * to our offset calculation because of the slop that's 1280 * allocated for the buffer at ubase.) 1281 */ 1282 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase; 1283 1284 if (offs > chunks * chunksize) { 1285 mdb_warn("found corrupt bufctl ptr %p" 1286 " in slab %p in cache %p\n", bcp, 1287 wsp->walk_addr, addr); 1288 break; 1289 } 1290 1291 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs)); 1292 buf = UMEM_BUF(cp, bcp); 1293 } 1294 1295 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1296 1297 if (ndx > slabsize / cp->cache_bufsize) { 1298 /* 1299 * This is very wrong; we have managed to find 1300 * a buffer in the slab which shouldn't 1301 * actually be here. Emit a warning, and 1302 * try to continue. 1303 */ 1304 mdb_warn("buf %p is out of range for " 1305 "slab %p, cache %p\n", buf, sp, addr); 1306 } else if (type & UM_ALLOCATED) { 1307 /* 1308 * we have found a buffer on the slab's freelist; 1309 * clear its entry 1310 */ 1311 valid[ndx] = 0; 1312 } else { 1313 /* 1314 * Report this freed buffer 1315 */ 1316 if (type & UM_BUFCTL) { 1317 ret = bufctl_walk_callback(cp, wsp, 1318 (uintptr_t)bcp); 1319 } else { 1320 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1321 } 1322 if (ret != WALK_NEXT) 1323 return (ret); 1324 } 1325 1326 bcp = bc.bc_next; 1327 } 1328 1329 if (bcp != NULL) { 1330 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1331 sp, addr, bcp)); 1332 } 1333 1334 /* 1335 * If we are walking freed buffers, the loop above handled reporting 1336 * them. 1337 */ 1338 if (type & UM_FREE) 1339 return (WALK_NEXT); 1340 1341 if (type & UM_BUFCTL) { 1342 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for " 1343 "cache %p\n", addr); 1344 return (WALK_ERR); 1345 } 1346 1347 /* 1348 * Report allocated buffers, skipping buffers in the magazine layer. 1349 * We only get this far for small-slab caches. 1350 */ 1351 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1352 buf = (char *)kbase + i * chunksize; 1353 1354 if (!valid[i]) 1355 continue; /* on slab freelist */ 1356 1357 if (magcnt > 0 && 1358 bsearch(&buf, maglist, magcnt, sizeof (void *), 1359 addrcmp) != NULL) 1360 continue; /* in magazine layer */ 1361 1362 ret = umem_walk_callback(wsp, (uintptr_t)buf); 1363 } 1364 return (ret); 1365 } 1366 1367 void 1368 umem_walk_fini(mdb_walk_state_t *wsp) 1369 { 1370 umem_walk_t *umw = wsp->walk_data; 1371 uintptr_t chunksize; 1372 uintptr_t slabsize; 1373 1374 if (umw == NULL) 1375 return; 1376 1377 if (umw->umw_maglist != NULL) 1378 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *)); 1379 1380 chunksize = umw->umw_cp->cache_chunksize; 1381 slabsize = umw->umw_cp->cache_slabsize; 1382 1383 if (umw->umw_valid != NULL) 1384 mdb_free(umw->umw_valid, slabsize / chunksize); 1385 if (umw->umw_ubase != NULL) 1386 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t)); 1387 1388 mdb_free(umw->umw_cp, umw->umw_csize); 1389 mdb_free(umw, sizeof (umem_walk_t)); 1390 } 1391 1392 /*ARGSUSED*/ 1393 static int 1394 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp) 1395 { 1396 /* 1397 * Buffers allocated from NOTOUCH caches can also show up as freed 1398 * memory in other caches. This can be a little confusing, so we 1399 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1400 * that "::walk umem" and "::walk freemem" yield disjoint output). 1401 */ 1402 if (c->cache_cflags & UMC_NOTOUCH) 1403 return (WALK_NEXT); 1404 1405 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1406 wsp->walk_cbdata, addr) == -1) 1407 return (WALK_DONE); 1408 1409 return (WALK_NEXT); 1410 } 1411 1412 #define UMEM_WALK_ALL(name, wsp) { \ 1413 wsp->walk_data = (name); \ 1414 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \ 1415 return (WALK_ERR); \ 1416 return (WALK_DONE); \ 1417 } 1418 1419 int 1420 umem_walk_init(mdb_walk_state_t *wsp) 1421 { 1422 if (wsp->walk_arg != NULL) 1423 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1424 1425 if (wsp->walk_addr == NULL) 1426 UMEM_WALK_ALL("umem", wsp); 1427 return (umem_walk_init_common(wsp, UM_ALLOCATED)); 1428 } 1429 1430 int 1431 bufctl_walk_init(mdb_walk_state_t *wsp) 1432 { 1433 if (wsp->walk_addr == NULL) 1434 UMEM_WALK_ALL("bufctl", wsp); 1435 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL)); 1436 } 1437 1438 int 1439 freemem_walk_init(mdb_walk_state_t *wsp) 1440 { 1441 if (wsp->walk_addr == NULL) 1442 UMEM_WALK_ALL("freemem", wsp); 1443 return (umem_walk_init_common(wsp, UM_FREE)); 1444 } 1445 1446 int 1447 freectl_walk_init(mdb_walk_state_t *wsp) 1448 { 1449 if (wsp->walk_addr == NULL) 1450 UMEM_WALK_ALL("freectl", wsp); 1451 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL)); 1452 } 1453 1454 typedef struct bufctl_history_walk { 1455 void *bhw_next; 1456 umem_cache_t *bhw_cache; 1457 umem_slab_t *bhw_slab; 1458 hrtime_t bhw_timestamp; 1459 } bufctl_history_walk_t; 1460 1461 int 1462 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1463 { 1464 bufctl_history_walk_t *bhw; 1465 umem_bufctl_audit_t bc; 1466 umem_bufctl_audit_t bcn; 1467 1468 if (wsp->walk_addr == NULL) { 1469 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1470 return (WALK_ERR); 1471 } 1472 1473 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1474 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1475 return (WALK_ERR); 1476 } 1477 1478 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1479 bhw->bhw_timestamp = 0; 1480 bhw->bhw_cache = bc.bc_cache; 1481 bhw->bhw_slab = bc.bc_slab; 1482 1483 /* 1484 * sometimes the first log entry matches the base bufctl; in that 1485 * case, skip the base bufctl. 1486 */ 1487 if (bc.bc_lastlog != NULL && 1488 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1489 bc.bc_addr == bcn.bc_addr && 1490 bc.bc_cache == bcn.bc_cache && 1491 bc.bc_slab == bcn.bc_slab && 1492 bc.bc_timestamp == bcn.bc_timestamp && 1493 bc.bc_thread == bcn.bc_thread) 1494 bhw->bhw_next = bc.bc_lastlog; 1495 else 1496 bhw->bhw_next = (void *)wsp->walk_addr; 1497 1498 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1499 wsp->walk_data = bhw; 1500 1501 return (WALK_NEXT); 1502 } 1503 1504 int 1505 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1506 { 1507 bufctl_history_walk_t *bhw = wsp->walk_data; 1508 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1509 uintptr_t baseaddr = wsp->walk_addr; 1510 umem_bufctl_audit_t *b; 1511 UMEM_LOCAL_BUFCTL_AUDIT(&b); 1512 1513 if (addr == NULL) 1514 return (WALK_DONE); 1515 1516 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1517 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1518 return (WALK_ERR); 1519 } 1520 1521 /* 1522 * The bufctl is only valid if the address, cache, and slab are 1523 * correct. We also check that the timestamp is decreasing, to 1524 * prevent infinite loops. 1525 */ 1526 if ((uintptr_t)b->bc_addr != baseaddr || 1527 b->bc_cache != bhw->bhw_cache || 1528 b->bc_slab != bhw->bhw_slab || 1529 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp)) 1530 return (WALK_DONE); 1531 1532 bhw->bhw_next = b->bc_lastlog; 1533 bhw->bhw_timestamp = b->bc_timestamp; 1534 1535 return (wsp->walk_callback(addr, b, wsp->walk_cbdata)); 1536 } 1537 1538 void 1539 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1540 { 1541 bufctl_history_walk_t *bhw = wsp->walk_data; 1542 1543 mdb_free(bhw, sizeof (*bhw)); 1544 } 1545 1546 typedef struct umem_log_walk { 1547 umem_bufctl_audit_t *ulw_base; 1548 umem_bufctl_audit_t **ulw_sorted; 1549 umem_log_header_t ulw_lh; 1550 size_t ulw_size; 1551 size_t ulw_maxndx; 1552 size_t ulw_ndx; 1553 } umem_log_walk_t; 1554 1555 int 1556 umem_log_walk_init(mdb_walk_state_t *wsp) 1557 { 1558 uintptr_t lp = wsp->walk_addr; 1559 umem_log_walk_t *ulw; 1560 umem_log_header_t *lhp; 1561 int maxndx, i, j, k; 1562 1563 /* 1564 * By default (global walk), walk the umem_transaction_log. Otherwise 1565 * read the log whose umem_log_header_t is stored at walk_addr. 1566 */ 1567 if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) { 1568 mdb_warn("failed to read 'umem_transaction_log'"); 1569 return (WALK_ERR); 1570 } 1571 1572 if (lp == NULL) { 1573 mdb_warn("log is disabled\n"); 1574 return (WALK_ERR); 1575 } 1576 1577 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP); 1578 lhp = &ulw->ulw_lh; 1579 1580 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) { 1581 mdb_warn("failed to read log header at %p", lp); 1582 mdb_free(ulw, sizeof (umem_log_walk_t)); 1583 return (WALK_ERR); 1584 } 1585 1586 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1587 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP); 1588 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1; 1589 1590 if (mdb_vread(ulw->ulw_base, ulw->ulw_size, 1591 (uintptr_t)lhp->lh_base) == -1) { 1592 mdb_warn("failed to read log at base %p", lhp->lh_base); 1593 mdb_free(ulw->ulw_base, ulw->ulw_size); 1594 mdb_free(ulw, sizeof (umem_log_walk_t)); 1595 return (WALK_ERR); 1596 } 1597 1598 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1599 sizeof (umem_bufctl_audit_t *), UM_SLEEP); 1600 1601 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1602 caddr_t chunk = (caddr_t) 1603 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize); 1604 1605 for (j = 0; j < maxndx; j++) { 1606 /* LINTED align */ 1607 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk; 1608 chunk += UMEM_BUFCTL_AUDIT_SIZE; 1609 } 1610 } 1611 1612 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *), 1613 (int(*)(const void *, const void *))bufctlcmp); 1614 1615 ulw->ulw_maxndx = k; 1616 wsp->walk_data = ulw; 1617 1618 return (WALK_NEXT); 1619 } 1620 1621 int 1622 umem_log_walk_step(mdb_walk_state_t *wsp) 1623 { 1624 umem_log_walk_t *ulw = wsp->walk_data; 1625 umem_bufctl_audit_t *bcp; 1626 1627 if (ulw->ulw_ndx == ulw->ulw_maxndx) 1628 return (WALK_DONE); 1629 1630 bcp = ulw->ulw_sorted[ulw->ulw_ndx++]; 1631 1632 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base + 1633 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata)); 1634 } 1635 1636 void 1637 umem_log_walk_fini(mdb_walk_state_t *wsp) 1638 { 1639 umem_log_walk_t *ulw = wsp->walk_data; 1640 1641 mdb_free(ulw->ulw_base, ulw->ulw_size); 1642 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx * 1643 sizeof (umem_bufctl_audit_t *)); 1644 mdb_free(ulw, sizeof (umem_log_walk_t)); 1645 } 1646 1647 typedef struct allocdby_bufctl { 1648 uintptr_t abb_addr; 1649 hrtime_t abb_ts; 1650 } allocdby_bufctl_t; 1651 1652 typedef struct allocdby_walk { 1653 const char *abw_walk; 1654 uintptr_t abw_thread; 1655 size_t abw_nbufs; 1656 size_t abw_size; 1657 allocdby_bufctl_t *abw_buf; 1658 size_t abw_ndx; 1659 } allocdby_walk_t; 1660 1661 int 1662 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp, 1663 allocdby_walk_t *abw) 1664 { 1665 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1666 return (WALK_NEXT); 1667 1668 if (abw->abw_nbufs == abw->abw_size) { 1669 allocdby_bufctl_t *buf; 1670 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1671 1672 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1673 1674 bcopy(abw->abw_buf, buf, oldsize); 1675 mdb_free(abw->abw_buf, oldsize); 1676 1677 abw->abw_size <<= 1; 1678 abw->abw_buf = buf; 1679 } 1680 1681 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1682 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1683 abw->abw_nbufs++; 1684 1685 return (WALK_NEXT); 1686 } 1687 1688 /*ARGSUSED*/ 1689 int 1690 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw) 1691 { 1692 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1693 abw, addr) == -1) { 1694 mdb_warn("couldn't walk bufctl for cache %p", addr); 1695 return (WALK_DONE); 1696 } 1697 1698 return (WALK_NEXT); 1699 } 1700 1701 static int 1702 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1703 { 1704 if (lhs->abb_ts < rhs->abb_ts) 1705 return (1); 1706 if (lhs->abb_ts > rhs->abb_ts) 1707 return (-1); 1708 return (0); 1709 } 1710 1711 static int 1712 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1713 { 1714 allocdby_walk_t *abw; 1715 1716 if (wsp->walk_addr == NULL) { 1717 mdb_warn("allocdby walk doesn't support global walks\n"); 1718 return (WALK_ERR); 1719 } 1720 1721 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1722 1723 abw->abw_thread = wsp->walk_addr; 1724 abw->abw_walk = walk; 1725 abw->abw_size = 128; /* something reasonable */ 1726 abw->abw_buf = 1727 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1728 1729 wsp->walk_data = abw; 1730 1731 if (mdb_walk("umem_cache", 1732 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1733 mdb_warn("couldn't walk umem_cache"); 1734 allocdby_walk_fini(wsp); 1735 return (WALK_ERR); 1736 } 1737 1738 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1739 (int(*)(const void *, const void *))allocdby_cmp); 1740 1741 return (WALK_NEXT); 1742 } 1743 1744 int 1745 allocdby_walk_init(mdb_walk_state_t *wsp) 1746 { 1747 return (allocdby_walk_init_common(wsp, "bufctl")); 1748 } 1749 1750 int 1751 freedby_walk_init(mdb_walk_state_t *wsp) 1752 { 1753 return (allocdby_walk_init_common(wsp, "freectl")); 1754 } 1755 1756 int 1757 allocdby_walk_step(mdb_walk_state_t *wsp) 1758 { 1759 allocdby_walk_t *abw = wsp->walk_data; 1760 uintptr_t addr; 1761 umem_bufctl_audit_t *bcp; 1762 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 1763 1764 if (abw->abw_ndx == abw->abw_nbufs) 1765 return (WALK_DONE); 1766 1767 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1768 1769 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 1770 mdb_warn("couldn't read bufctl at %p", addr); 1771 return (WALK_DONE); 1772 } 1773 1774 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata)); 1775 } 1776 1777 void 1778 allocdby_walk_fini(mdb_walk_state_t *wsp) 1779 { 1780 allocdby_walk_t *abw = wsp->walk_data; 1781 1782 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1783 mdb_free(abw, sizeof (allocdby_walk_t)); 1784 } 1785 1786 /*ARGSUSED*/ 1787 int 1788 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored) 1789 { 1790 char c[MDB_SYM_NAMLEN]; 1791 GElf_Sym sym; 1792 int i; 1793 1794 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 1795 for (i = 0; i < bcp->bc_depth; i++) { 1796 if (mdb_lookup_by_addr(bcp->bc_stack[i], 1797 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 1798 continue; 1799 if (is_umem_sym(c, "umem_")) 1800 continue; 1801 mdb_printf("%s+0x%lx", 1802 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 1803 break; 1804 } 1805 mdb_printf("\n"); 1806 1807 return (WALK_NEXT); 1808 } 1809 1810 static int 1811 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 1812 { 1813 if (!(flags & DCMD_ADDRSPEC)) 1814 return (DCMD_USAGE); 1815 1816 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 1817 1818 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 1819 mdb_warn("can't walk '%s' for %p", w, addr); 1820 return (DCMD_ERR); 1821 } 1822 1823 return (DCMD_OK); 1824 } 1825 1826 /*ARGSUSED*/ 1827 int 1828 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1829 { 1830 return (allocdby_common(addr, flags, "allocdby")); 1831 } 1832 1833 /*ARGSUSED*/ 1834 int 1835 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1836 { 1837 return (allocdby_common(addr, flags, "freedby")); 1838 } 1839 1840 typedef struct whatis_info { 1841 mdb_whatis_t *wi_w; 1842 const umem_cache_t *wi_cache; 1843 const vmem_t *wi_vmem; 1844 vmem_t *wi_msb_arena; 1845 size_t wi_slab_size; 1846 int wi_slab_found; 1847 uint_t wi_freemem; 1848 } whatis_info_t; 1849 1850 /* call one of our dcmd functions with "-v" and the provided address */ 1851 static void 1852 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 1853 { 1854 mdb_arg_t a; 1855 a.a_type = MDB_TYPE_STRING; 1856 a.a_un.a_str = "-v"; 1857 1858 mdb_printf(":\n"); 1859 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 1860 } 1861 1862 static void 1863 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 1864 uintptr_t baddr) 1865 { 1866 mdb_whatis_t *w = wi->wi_w; 1867 const umem_cache_t *cp = wi->wi_cache; 1868 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 1869 1870 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT)); 1871 1872 mdb_whatis_report_object(w, maddr, addr, ""); 1873 1874 if (baddr != 0 && !call_printer) 1875 mdb_printf("bufctl %p ", baddr); 1876 1877 mdb_printf("%s from %s", 1878 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 1879 1880 if (call_printer && baddr != 0) { 1881 whatis_call_printer(bufctl, baddr); 1882 return; 1883 } 1884 mdb_printf("\n"); 1885 } 1886 1887 /*ARGSUSED*/ 1888 static int 1889 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi) 1890 { 1891 mdb_whatis_t *w = wi->wi_w; 1892 1893 uintptr_t cur; 1894 size_t size = wi->wi_cache->cache_bufsize; 1895 1896 while (mdb_whatis_match(w, addr, size, &cur)) 1897 whatis_print_umem(wi, cur, addr, NULL); 1898 1899 return (WHATIS_WALKRET(w)); 1900 } 1901 1902 /*ARGSUSED*/ 1903 static int 1904 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi) 1905 { 1906 mdb_whatis_t *w = wi->wi_w; 1907 1908 uintptr_t cur; 1909 uintptr_t addr = (uintptr_t)bcp->bc_addr; 1910 size_t size = wi->wi_cache->cache_bufsize; 1911 1912 while (mdb_whatis_match(w, addr, size, &cur)) 1913 whatis_print_umem(wi, cur, addr, baddr); 1914 1915 return (WHATIS_WALKRET(w)); 1916 } 1917 1918 1919 static int 1920 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 1921 { 1922 mdb_whatis_t *w = wi->wi_w; 1923 1924 size_t size = vs->vs_end - vs->vs_start; 1925 uintptr_t cur; 1926 1927 /* We're not interested in anything but alloc and free segments */ 1928 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 1929 return (WALK_NEXT); 1930 1931 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 1932 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 1933 1934 /* 1935 * If we're not printing it seperately, provide the vmem_seg 1936 * pointer if it has a stack trace. 1937 */ 1938 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 1939 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 || 1940 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 1941 mdb_printf("vmem_seg %p ", addr); 1942 } 1943 1944 mdb_printf("%s from %s vmem arena", 1945 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 1946 wi->wi_vmem->vm_name); 1947 1948 if (!mdb_whatis_flags(w) & WHATIS_QUIET) 1949 whatis_call_printer(vmem_seg, addr); 1950 else 1951 mdb_printf("\n"); 1952 } 1953 1954 return (WHATIS_WALKRET(w)); 1955 } 1956 1957 static int 1958 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 1959 { 1960 mdb_whatis_t *w = wi->wi_w; 1961 const char *nm = vmem->vm_name; 1962 wi->wi_vmem = vmem; 1963 1964 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 1965 mdb_printf("Searching vmem arena %s...\n", nm); 1966 1967 if (mdb_pwalk("vmem_seg", 1968 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 1969 mdb_warn("can't walk vmem seg for %p", addr); 1970 return (WALK_NEXT); 1971 } 1972 1973 return (WHATIS_WALKRET(w)); 1974 } 1975 1976 /*ARGSUSED*/ 1977 static int 1978 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi) 1979 { 1980 mdb_whatis_t *w = wi->wi_w; 1981 1982 /* It must overlap with the slab data, or it's not interesting */ 1983 if (mdb_whatis_overlaps(w, 1984 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 1985 wi->wi_slab_found++; 1986 return (WALK_DONE); 1987 } 1988 return (WALK_NEXT); 1989 } 1990 1991 static int 1992 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 1993 { 1994 mdb_whatis_t *w = wi->wi_w; 1995 char *walk, *freewalk; 1996 mdb_walk_cb_t func; 1997 int do_bufctl; 1998 1999 /* Override the '-b' flag as necessary */ 2000 if (!(c->cache_flags & UMF_HASH)) 2001 do_bufctl = FALSE; /* no bufctls to walk */ 2002 else if (c->cache_flags & UMF_AUDIT) 2003 do_bufctl = TRUE; /* we always want debugging info */ 2004 else 2005 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2006 2007 if (do_bufctl) { 2008 walk = "bufctl"; 2009 freewalk = "freectl"; 2010 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2011 } else { 2012 walk = "umem"; 2013 freewalk = "freemem"; 2014 func = (mdb_walk_cb_t)whatis_walk_umem; 2015 } 2016 2017 wi->wi_cache = c; 2018 2019 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2020 mdb_printf("Searching %s...\n", c->cache_name); 2021 2022 /* 2023 * If more then two buffers live on each slab, figure out if we're 2024 * interested in anything in any slab before doing the more expensive 2025 * umem/freemem (bufctl/freectl) walkers. 2026 */ 2027 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2028 if (!(c->cache_flags & UMF_HASH)) 2029 wi->wi_slab_size -= sizeof (umem_slab_t); 2030 2031 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2032 wi->wi_slab_found = 0; 2033 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2034 addr) == -1) { 2035 mdb_warn("can't find umem_slab walker"); 2036 return (WALK_DONE); 2037 } 2038 if (wi->wi_slab_found == 0) 2039 return (WALK_NEXT); 2040 } 2041 2042 wi->wi_freemem = FALSE; 2043 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2044 mdb_warn("can't find %s walker", walk); 2045 return (WALK_DONE); 2046 } 2047 2048 if (mdb_whatis_done(w)) 2049 return (WALK_DONE); 2050 2051 /* 2052 * We have searched for allocated memory; now search for freed memory. 2053 */ 2054 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2055 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2056 2057 wi->wi_freemem = TRUE; 2058 2059 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2060 mdb_warn("can't find %s walker", freewalk); 2061 return (WALK_DONE); 2062 } 2063 2064 return (WHATIS_WALKRET(w)); 2065 } 2066 2067 static int 2068 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2069 { 2070 if (c->cache_arena == wi->wi_msb_arena || 2071 (c->cache_cflags & UMC_NOTOUCH)) 2072 return (WALK_NEXT); 2073 2074 return (whatis_walk_cache(addr, c, wi)); 2075 } 2076 2077 static int 2078 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2079 { 2080 if (c->cache_arena != wi->wi_msb_arena) 2081 return (WALK_NEXT); 2082 2083 return (whatis_walk_cache(addr, c, wi)); 2084 } 2085 2086 static int 2087 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) 2088 { 2089 if (c->cache_arena == wi->wi_msb_arena || 2090 !(c->cache_cflags & UMC_NOTOUCH)) 2091 return (WALK_NEXT); 2092 2093 return (whatis_walk_cache(addr, c, wi)); 2094 } 2095 2096 /*ARGSUSED*/ 2097 static int 2098 whatis_run_umem(mdb_whatis_t *w, void *ignored) 2099 { 2100 whatis_info_t wi; 2101 2102 bzero(&wi, sizeof (wi)); 2103 wi.wi_w = w; 2104 2105 /* umem's metadata is allocated from the umem_internal_arena */ 2106 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1) 2107 mdb_warn("unable to readvar \"umem_internal_arena\""); 2108 2109 /* 2110 * We process umem caches in the following order: 2111 * 2112 * non-UMC_NOTOUCH, non-metadata (typically the most interesting) 2113 * metadata (can be huge with UMF_AUDIT) 2114 * UMC_NOTOUCH, non-metadata (see umem_walk_all()) 2115 */ 2116 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2117 &wi) == -1 || 2118 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2119 &wi) == -1 || 2120 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2121 &wi) == -1) { 2122 mdb_warn("couldn't find umem_cache walker"); 2123 return (1); 2124 } 2125 return (0); 2126 } 2127 2128 /*ARGSUSED*/ 2129 static int 2130 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2131 { 2132 whatis_info_t wi; 2133 2134 bzero(&wi, sizeof (wi)); 2135 wi.wi_w = w; 2136 2137 if (mdb_walk("vmem_postfix", 2138 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2139 mdb_warn("couldn't find vmem_postfix walker"); 2140 return (1); 2141 } 2142 return (0); 2143 } 2144 2145 int 2146 umem_init(void) 2147 { 2148 mdb_walker_t w = { 2149 "umem_cache", "walk list of umem caches", umem_cache_walk_init, 2150 umem_cache_walk_step, umem_cache_walk_fini 2151 }; 2152 2153 if (mdb_add_walker(&w) == -1) { 2154 mdb_warn("failed to add umem_cache walker"); 2155 return (-1); 2156 } 2157 2158 if (umem_update_variables() == -1) 2159 return (-1); 2160 2161 /* install a callback so that our variables are always up-to-date */ 2162 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL); 2163 umem_statechange_cb(NULL); 2164 2165 /* 2166 * Register our ::whatis callbacks. 2167 */ 2168 mdb_whatis_register("umem", whatis_run_umem, NULL, 2169 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); 2170 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 2171 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); 2172 2173 return (0); 2174 } 2175 2176 typedef struct umem_log_cpu { 2177 uintptr_t umc_low; 2178 uintptr_t umc_high; 2179 } umem_log_cpu_t; 2180 2181 int 2182 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc) 2183 { 2184 int i; 2185 2186 for (i = 0; i < umem_max_ncpus; i++) { 2187 if (addr >= umc[i].umc_low && addr < umc[i].umc_high) 2188 break; 2189 } 2190 2191 if (i == umem_max_ncpus) 2192 mdb_printf(" "); 2193 else 2194 mdb_printf("%3d", i); 2195 2196 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2197 b->bc_timestamp, b->bc_thread); 2198 2199 return (WALK_NEXT); 2200 } 2201 2202 /*ARGSUSED*/ 2203 int 2204 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2205 { 2206 umem_log_header_t lh; 2207 umem_cpu_log_header_t clh; 2208 uintptr_t lhp, clhp; 2209 umem_log_cpu_t *umc; 2210 int i; 2211 2212 if (umem_readvar(&lhp, "umem_transaction_log") == -1) { 2213 mdb_warn("failed to read 'umem_transaction_log'"); 2214 return (DCMD_ERR); 2215 } 2216 2217 if (lhp == NULL) { 2218 mdb_warn("no umem transaction log\n"); 2219 return (DCMD_ERR); 2220 } 2221 2222 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) { 2223 mdb_warn("failed to read log header at %p", lhp); 2224 return (DCMD_ERR); 2225 } 2226 2227 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2228 2229 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus, 2230 UM_SLEEP | UM_GC); 2231 2232 for (i = 0; i < umem_max_ncpus; i++) { 2233 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2234 mdb_warn("cannot read cpu %d's log header at %p", 2235 i, clhp); 2236 return (DCMD_ERR); 2237 } 2238 2239 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize + 2240 (uintptr_t)lh.lh_base; 2241 umc[i].umc_high = (uintptr_t)clh.clh_current; 2242 2243 clhp += sizeof (umem_cpu_log_header_t); 2244 } 2245 2246 if (DCMD_HDRSPEC(flags)) { 2247 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", 2248 "BUFADDR", "TIMESTAMP", "THREAD"); 2249 } 2250 2251 /* 2252 * If we have been passed an address, we'll just print out that 2253 * log entry. 2254 */ 2255 if (flags & DCMD_ADDRSPEC) { 2256 umem_bufctl_audit_t *bp; 2257 UMEM_LOCAL_BUFCTL_AUDIT(&bp); 2258 2259 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2260 mdb_warn("failed to read bufctl at %p", addr); 2261 return (DCMD_ERR); 2262 } 2263 2264 (void) umem_log_walk(addr, bp, umc); 2265 2266 return (DCMD_OK); 2267 } 2268 2269 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) { 2270 mdb_warn("can't find umem log walker"); 2271 return (DCMD_ERR); 2272 } 2273 2274 return (DCMD_OK); 2275 } 2276 2277 typedef struct bufctl_history_cb { 2278 int bhc_flags; 2279 int bhc_argc; 2280 const mdb_arg_t *bhc_argv; 2281 int bhc_ret; 2282 } bufctl_history_cb_t; 2283 2284 /*ARGSUSED*/ 2285 static int 2286 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2287 { 2288 bufctl_history_cb_t *bhc = arg; 2289 2290 bhc->bhc_ret = 2291 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2292 2293 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2294 2295 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2296 } 2297 2298 void 2299 bufctl_help(void) 2300 { 2301 mdb_printf("%s\n", 2302 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n"); 2303 mdb_dec_indent(2); 2304 mdb_printf("%<b>OPTIONS%</b>\n"); 2305 mdb_inc_indent(2); 2306 mdb_printf("%s", 2307 " -v Display the full content of the bufctl, including its stack trace\n" 2308 " -h retrieve the bufctl's transaction history, if available\n" 2309 " -a addr\n" 2310 " filter out bufctls not involving the buffer at addr\n" 2311 " -c caller\n" 2312 " filter out bufctls without the function/PC in their stack trace\n" 2313 " -e earliest\n" 2314 " filter out bufctls timestamped before earliest\n" 2315 " -l latest\n" 2316 " filter out bufctls timestamped after latest\n" 2317 " -t thread\n" 2318 " filter out bufctls not involving thread\n"); 2319 } 2320 2321 int 2322 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2323 { 2324 uint_t verbose = FALSE; 2325 uint_t history = FALSE; 2326 uint_t in_history = FALSE; 2327 uintptr_t caller = NULL, thread = NULL; 2328 uintptr_t laddr, haddr, baddr = NULL; 2329 hrtime_t earliest = 0, latest = 0; 2330 int i, depth; 2331 char c[MDB_SYM_NAMLEN]; 2332 GElf_Sym sym; 2333 umem_bufctl_audit_t *bcp; 2334 UMEM_LOCAL_BUFCTL_AUDIT(&bcp); 2335 2336 if (mdb_getopts(argc, argv, 2337 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2338 'h', MDB_OPT_SETBITS, TRUE, &history, 2339 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2340 'c', MDB_OPT_UINTPTR, &caller, 2341 't', MDB_OPT_UINTPTR, &thread, 2342 'e', MDB_OPT_UINT64, &earliest, 2343 'l', MDB_OPT_UINT64, &latest, 2344 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2345 return (DCMD_USAGE); 2346 2347 if (!(flags & DCMD_ADDRSPEC)) 2348 return (DCMD_USAGE); 2349 2350 if (in_history && !history) 2351 return (DCMD_USAGE); 2352 2353 if (history && !in_history) { 2354 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2355 UM_SLEEP | UM_GC); 2356 bufctl_history_cb_t bhc; 2357 2358 nargv[0].a_type = MDB_TYPE_STRING; 2359 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2360 2361 for (i = 0; i < argc; i++) 2362 nargv[i + 1] = argv[i]; 2363 2364 /* 2365 * When in history mode, we treat each element as if it 2366 * were in a seperate loop, so that the headers group 2367 * bufctls with similar histories. 2368 */ 2369 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2370 bhc.bhc_argc = argc + 1; 2371 bhc.bhc_argv = nargv; 2372 bhc.bhc_ret = DCMD_OK; 2373 2374 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2375 addr) == -1) { 2376 mdb_warn("unable to walk bufctl_history"); 2377 return (DCMD_ERR); 2378 } 2379 2380 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2381 mdb_printf("\n"); 2382 2383 return (bhc.bhc_ret); 2384 } 2385 2386 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2387 if (verbose) { 2388 mdb_printf("%16s %16s %16s %16s\n" 2389 "%<u>%16s %16s %16s %16s%</u>\n", 2390 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2391 "", "CACHE", "LASTLOG", "CONTENTS"); 2392 } else { 2393 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n", 2394 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER"); 2395 } 2396 } 2397 2398 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { 2399 mdb_warn("couldn't read bufctl at %p", addr); 2400 return (DCMD_ERR); 2401 } 2402 2403 /* 2404 * Guard against bogus bc_depth in case the bufctl is corrupt or 2405 * the address does not really refer to a bufctl. 2406 */ 2407 depth = MIN(bcp->bc_depth, umem_stack_depth); 2408 2409 if (caller != NULL) { 2410 laddr = caller; 2411 haddr = caller + sizeof (caller); 2412 2413 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2414 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2415 /* 2416 * We were provided an exact symbol value; any 2417 * address in the function is valid. 2418 */ 2419 laddr = (uintptr_t)sym.st_value; 2420 haddr = (uintptr_t)sym.st_value + sym.st_size; 2421 } 2422 2423 for (i = 0; i < depth; i++) 2424 if (bcp->bc_stack[i] >= laddr && 2425 bcp->bc_stack[i] < haddr) 2426 break; 2427 2428 if (i == depth) 2429 return (DCMD_OK); 2430 } 2431 2432 if (thread != NULL && (uintptr_t)bcp->bc_thread != thread) 2433 return (DCMD_OK); 2434 2435 if (earliest != 0 && bcp->bc_timestamp < earliest) 2436 return (DCMD_OK); 2437 2438 if (latest != 0 && bcp->bc_timestamp > latest) 2439 return (DCMD_OK); 2440 2441 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr) 2442 return (DCMD_OK); 2443 2444 if (flags & DCMD_PIPE_OUT) { 2445 mdb_printf("%#r\n", addr); 2446 return (DCMD_OK); 2447 } 2448 2449 if (verbose) { 2450 mdb_printf( 2451 "%<b>%16p%</b> %16p %16llx %16d\n" 2452 "%16s %16p %16p %16p\n", 2453 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread, 2454 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents); 2455 2456 mdb_inc_indent(17); 2457 for (i = 0; i < depth; i++) 2458 mdb_printf("%a\n", bcp->bc_stack[i]); 2459 mdb_dec_indent(17); 2460 mdb_printf("\n"); 2461 } else { 2462 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr, 2463 bcp->bc_timestamp, bcp->bc_thread); 2464 2465 for (i = 0; i < depth; i++) { 2466 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2467 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2468 continue; 2469 if (is_umem_sym(c, "umem_")) 2470 continue; 2471 mdb_printf(" %a\n", bcp->bc_stack[i]); 2472 break; 2473 } 2474 2475 if (i >= depth) 2476 mdb_printf("\n"); 2477 } 2478 2479 return (DCMD_OK); 2480 } 2481 2482 /*ARGSUSED*/ 2483 int 2484 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2485 { 2486 mdb_arg_t a; 2487 2488 if (!(flags & DCMD_ADDRSPEC)) 2489 return (DCMD_USAGE); 2490 2491 if (argc != 0) 2492 return (DCMD_USAGE); 2493 2494 a.a_type = MDB_TYPE_STRING; 2495 a.a_un.a_str = "-v"; 2496 2497 return (bufctl(addr, flags, 1, &a)); 2498 } 2499 2500 typedef struct umem_verify { 2501 uint64_t *umv_buf; /* buffer to read cache contents into */ 2502 size_t umv_size; /* number of bytes in umv_buf */ 2503 int umv_corruption; /* > 0 if corruption found. */ 2504 int umv_besilent; /* report actual corruption sites */ 2505 struct umem_cache umv_cache; /* the cache we're operating on */ 2506 } umem_verify_t; 2507 2508 /* 2509 * verify_pattern() 2510 * verify that buf is filled with the pattern pat. 2511 */ 2512 static int64_t 2513 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2514 { 2515 /*LINTED*/ 2516 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 2517 uint64_t *buf; 2518 2519 for (buf = buf_arg; buf < bufend; buf++) 2520 if (*buf != pat) 2521 return ((uintptr_t)buf - (uintptr_t)buf_arg); 2522 return (-1); 2523 } 2524 2525 /* 2526 * verify_buftag() 2527 * verify that btp->bt_bxstat == (bcp ^ pat) 2528 */ 2529 static int 2530 verify_buftag(umem_buftag_t *btp, uintptr_t pat) 2531 { 2532 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 2533 } 2534 2535 /* 2536 * verify_free() 2537 * verify the integrity of a free block of memory by checking 2538 * that it is filled with 0xdeadbeef and that its buftag is sane. 2539 */ 2540 /*ARGSUSED1*/ 2541 static int 2542 verify_free(uintptr_t addr, const void *data, void *private) 2543 { 2544 umem_verify_t *umv = (umem_verify_t *)private; 2545 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2546 int64_t corrupt; /* corruption offset */ 2547 umem_buftag_t *buftagp; /* ptr to buftag */ 2548 umem_cache_t *cp = &umv->umv_cache; 2549 int besilent = umv->umv_besilent; 2550 2551 /*LINTED*/ 2552 buftagp = UMEM_BUFTAG(cp, buf); 2553 2554 /* 2555 * Read the buffer to check. 2556 */ 2557 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2558 if (!besilent) 2559 mdb_warn("couldn't read %p", addr); 2560 return (WALK_NEXT); 2561 } 2562 2563 if ((corrupt = verify_pattern(buf, cp->cache_verify, 2564 UMEM_FREE_PATTERN)) >= 0) { 2565 if (!besilent) 2566 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 2567 addr, (uintptr_t)addr + corrupt); 2568 goto corrupt; 2569 } 2570 2571 if ((cp->cache_flags & UMF_HASH) && 2572 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) { 2573 if (!besilent) 2574 mdb_printf("buffer %p (free) seems to " 2575 "have a corrupt redzone pattern\n", addr); 2576 goto corrupt; 2577 } 2578 2579 /* 2580 * confirm bufctl pointer integrity. 2581 */ 2582 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) { 2583 if (!besilent) 2584 mdb_printf("buffer %p (free) has a corrupt " 2585 "buftag\n", addr); 2586 goto corrupt; 2587 } 2588 2589 return (WALK_NEXT); 2590 corrupt: 2591 umv->umv_corruption++; 2592 return (WALK_NEXT); 2593 } 2594 2595 /* 2596 * verify_alloc() 2597 * Verify that the buftag of an allocated buffer makes sense with respect 2598 * to the buffer. 2599 */ 2600 /*ARGSUSED1*/ 2601 static int 2602 verify_alloc(uintptr_t addr, const void *data, void *private) 2603 { 2604 umem_verify_t *umv = (umem_verify_t *)private; 2605 umem_cache_t *cp = &umv->umv_cache; 2606 uint64_t *buf = umv->umv_buf; /* buf to validate */ 2607 /*LINTED*/ 2608 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf); 2609 uint32_t *ip = (uint32_t *)buftagp; 2610 uint8_t *bp = (uint8_t *)buf; 2611 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 2612 int besilent = umv->umv_besilent; 2613 2614 /* 2615 * Read the buffer to check. 2616 */ 2617 if (mdb_vread(buf, umv->umv_size, addr) == -1) { 2618 if (!besilent) 2619 mdb_warn("couldn't read %p", addr); 2620 return (WALK_NEXT); 2621 } 2622 2623 /* 2624 * There are two cases to handle: 2625 * 1. If the buf was alloc'd using umem_cache_alloc, it will have 2626 * 0xfeedfacefeedface at the end of it 2627 * 2. If the buf was alloc'd using umem_alloc, it will have 2628 * 0xbb just past the end of the region in use. At the buftag, 2629 * it will have 0xfeedface (or, if the whole buffer is in use, 2630 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 2631 * endianness), followed by 32 bits containing the offset of the 2632 * 0xbb byte in the buffer. 2633 * 2634 * Finally, the two 32-bit words that comprise the second half of the 2635 * buftag should xor to UMEM_BUFTAG_ALLOC 2636 */ 2637 2638 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN) 2639 looks_ok = 1; 2640 else if (!UMEM_SIZE_VALID(ip[1])) 2641 size_ok = 0; 2642 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE) 2643 looks_ok = 1; 2644 else 2645 size_ok = 0; 2646 2647 if (!size_ok) { 2648 if (!besilent) 2649 mdb_printf("buffer %p (allocated) has a corrupt " 2650 "redzone size encoding\n", addr); 2651 goto corrupt; 2652 } 2653 2654 if (!looks_ok) { 2655 if (!besilent) 2656 mdb_printf("buffer %p (allocated) has a corrupt " 2657 "redzone signature\n", addr); 2658 goto corrupt; 2659 } 2660 2661 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) { 2662 if (!besilent) 2663 mdb_printf("buffer %p (allocated) has a " 2664 "corrupt buftag\n", addr); 2665 goto corrupt; 2666 } 2667 2668 return (WALK_NEXT); 2669 corrupt: 2670 umv->umv_corruption++; 2671 return (WALK_NEXT); 2672 } 2673 2674 /*ARGSUSED2*/ 2675 int 2676 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2677 { 2678 if (flags & DCMD_ADDRSPEC) { 2679 int check_alloc = 0, check_free = 0; 2680 umem_verify_t umv; 2681 2682 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache), 2683 addr) == -1) { 2684 mdb_warn("couldn't read umem_cache %p", addr); 2685 return (DCMD_ERR); 2686 } 2687 2688 umv.umv_size = umv.umv_cache.cache_buftag + 2689 sizeof (umem_buftag_t); 2690 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC); 2691 umv.umv_corruption = 0; 2692 2693 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) { 2694 check_alloc = 1; 2695 if (umv.umv_cache.cache_flags & UMF_DEADBEEF) 2696 check_free = 1; 2697 } else { 2698 if (!(flags & DCMD_LOOP)) { 2699 mdb_warn("cache %p (%s) does not have " 2700 "redzone checking enabled\n", addr, 2701 umv.umv_cache.cache_name); 2702 } 2703 return (DCMD_ERR); 2704 } 2705 2706 if (flags & DCMD_LOOP) { 2707 /* 2708 * table mode, don't print out every corrupt buffer 2709 */ 2710 umv.umv_besilent = 1; 2711 } else { 2712 mdb_printf("Summary for cache '%s'\n", 2713 umv.umv_cache.cache_name); 2714 mdb_inc_indent(2); 2715 umv.umv_besilent = 0; 2716 } 2717 2718 if (check_alloc) 2719 (void) mdb_pwalk("umem", verify_alloc, &umv, addr); 2720 if (check_free) 2721 (void) mdb_pwalk("freemem", verify_free, &umv, addr); 2722 2723 if (flags & DCMD_LOOP) { 2724 if (umv.umv_corruption == 0) { 2725 mdb_printf("%-*s %?p clean\n", 2726 UMEM_CACHE_NAMELEN, 2727 umv.umv_cache.cache_name, addr); 2728 } else { 2729 char *s = ""; /* optional s in "buffer[s]" */ 2730 if (umv.umv_corruption > 1) 2731 s = "s"; 2732 2733 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 2734 UMEM_CACHE_NAMELEN, 2735 umv.umv_cache.cache_name, addr, 2736 umv.umv_corruption, s); 2737 } 2738 } else { 2739 /* 2740 * This is the more verbose mode, when the user has 2741 * type addr::umem_verify. If the cache was clean, 2742 * nothing will have yet been printed. So say something. 2743 */ 2744 if (umv.umv_corruption == 0) 2745 mdb_printf("clean\n"); 2746 2747 mdb_dec_indent(2); 2748 } 2749 } else { 2750 /* 2751 * If the user didn't specify a cache to verify, we'll walk all 2752 * umem_cache's, specifying ourself as a callback for each... 2753 * this is the equivalent of '::walk umem_cache .::umem_verify' 2754 */ 2755 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN, 2756 "Cache Name", "Addr", "Cache Integrity"); 2757 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL)); 2758 } 2759 2760 return (DCMD_OK); 2761 } 2762 2763 typedef struct vmem_node { 2764 struct vmem_node *vn_next; 2765 struct vmem_node *vn_parent; 2766 struct vmem_node *vn_sibling; 2767 struct vmem_node *vn_children; 2768 uintptr_t vn_addr; 2769 int vn_marked; 2770 vmem_t vn_vmem; 2771 } vmem_node_t; 2772 2773 typedef struct vmem_walk { 2774 vmem_node_t *vw_root; 2775 vmem_node_t *vw_current; 2776 } vmem_walk_t; 2777 2778 int 2779 vmem_walk_init(mdb_walk_state_t *wsp) 2780 { 2781 uintptr_t vaddr, paddr; 2782 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 2783 vmem_walk_t *vw; 2784 2785 if (umem_readvar(&vaddr, "vmem_list") == -1) { 2786 mdb_warn("couldn't read 'vmem_list'"); 2787 return (WALK_ERR); 2788 } 2789 2790 while (vaddr != NULL) { 2791 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 2792 vp->vn_addr = vaddr; 2793 vp->vn_next = head; 2794 head = vp; 2795 2796 if (vaddr == wsp->walk_addr) 2797 current = vp; 2798 2799 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 2800 mdb_warn("couldn't read vmem_t at %p", vaddr); 2801 goto err; 2802 } 2803 2804 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 2805 } 2806 2807 for (vp = head; vp != NULL; vp = vp->vn_next) { 2808 2809 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 2810 vp->vn_sibling = root; 2811 root = vp; 2812 continue; 2813 } 2814 2815 for (parent = head; parent != NULL; parent = parent->vn_next) { 2816 if (parent->vn_addr != paddr) 2817 continue; 2818 vp->vn_sibling = parent->vn_children; 2819 parent->vn_children = vp; 2820 vp->vn_parent = parent; 2821 break; 2822 } 2823 2824 if (parent == NULL) { 2825 mdb_warn("couldn't find %p's parent (%p)\n", 2826 vp->vn_addr, paddr); 2827 goto err; 2828 } 2829 } 2830 2831 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 2832 vw->vw_root = root; 2833 2834 if (current != NULL) 2835 vw->vw_current = current; 2836 else 2837 vw->vw_current = root; 2838 2839 wsp->walk_data = vw; 2840 return (WALK_NEXT); 2841 err: 2842 for (vp = head; head != NULL; vp = head) { 2843 head = vp->vn_next; 2844 mdb_free(vp, sizeof (vmem_node_t)); 2845 } 2846 2847 return (WALK_ERR); 2848 } 2849 2850 int 2851 vmem_walk_step(mdb_walk_state_t *wsp) 2852 { 2853 vmem_walk_t *vw = wsp->walk_data; 2854 vmem_node_t *vp; 2855 int rval; 2856 2857 if ((vp = vw->vw_current) == NULL) 2858 return (WALK_DONE); 2859 2860 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 2861 2862 if (vp->vn_children != NULL) { 2863 vw->vw_current = vp->vn_children; 2864 return (rval); 2865 } 2866 2867 do { 2868 vw->vw_current = vp->vn_sibling; 2869 vp = vp->vn_parent; 2870 } while (vw->vw_current == NULL && vp != NULL); 2871 2872 return (rval); 2873 } 2874 2875 /* 2876 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 2877 * children are visited before their parent. We perform the postfix walk 2878 * iteratively (rather than recursively) to allow mdb to regain control 2879 * after each callback. 2880 */ 2881 int 2882 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 2883 { 2884 vmem_walk_t *vw = wsp->walk_data; 2885 vmem_node_t *vp = vw->vw_current; 2886 int rval; 2887 2888 /* 2889 * If this node is marked, then we know that we have already visited 2890 * all of its children. If the node has any siblings, they need to 2891 * be visited next; otherwise, we need to visit the parent. Note 2892 * that vp->vn_marked will only be zero on the first invocation of 2893 * the step function. 2894 */ 2895 if (vp->vn_marked) { 2896 if (vp->vn_sibling != NULL) 2897 vp = vp->vn_sibling; 2898 else if (vp->vn_parent != NULL) 2899 vp = vp->vn_parent; 2900 else { 2901 /* 2902 * We have neither a parent, nor a sibling, and we 2903 * have already been visited; we're done. 2904 */ 2905 return (WALK_DONE); 2906 } 2907 } 2908 2909 /* 2910 * Before we visit this node, visit its children. 2911 */ 2912 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 2913 vp = vp->vn_children; 2914 2915 vp->vn_marked = 1; 2916 vw->vw_current = vp; 2917 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 2918 2919 return (rval); 2920 } 2921 2922 void 2923 vmem_walk_fini(mdb_walk_state_t *wsp) 2924 { 2925 vmem_walk_t *vw = wsp->walk_data; 2926 vmem_node_t *root = vw->vw_root; 2927 int done; 2928 2929 if (root == NULL) 2930 return; 2931 2932 if ((vw->vw_root = root->vn_children) != NULL) 2933 vmem_walk_fini(wsp); 2934 2935 vw->vw_root = root->vn_sibling; 2936 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 2937 mdb_free(root, sizeof (vmem_node_t)); 2938 2939 if (done) { 2940 mdb_free(vw, sizeof (vmem_walk_t)); 2941 } else { 2942 vmem_walk_fini(wsp); 2943 } 2944 } 2945 2946 typedef struct vmem_seg_walk { 2947 uint8_t vsw_type; 2948 uintptr_t vsw_start; 2949 uintptr_t vsw_current; 2950 } vmem_seg_walk_t; 2951 2952 /*ARGSUSED*/ 2953 int 2954 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 2955 { 2956 vmem_seg_walk_t *vsw; 2957 2958 if (wsp->walk_addr == NULL) { 2959 mdb_warn("vmem_%s does not support global walks\n", name); 2960 return (WALK_ERR); 2961 } 2962 2963 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 2964 2965 vsw->vsw_type = type; 2966 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0); 2967 vsw->vsw_current = vsw->vsw_start; 2968 2969 return (WALK_NEXT); 2970 } 2971 2972 /* 2973 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 2974 */ 2975 #define VMEM_NONE 0 2976 2977 int 2978 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 2979 { 2980 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 2981 } 2982 2983 int 2984 vmem_free_walk_init(mdb_walk_state_t *wsp) 2985 { 2986 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 2987 } 2988 2989 int 2990 vmem_span_walk_init(mdb_walk_state_t *wsp) 2991 { 2992 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 2993 } 2994 2995 int 2996 vmem_seg_walk_init(mdb_walk_state_t *wsp) 2997 { 2998 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 2999 } 3000 3001 int 3002 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3003 { 3004 vmem_seg_t seg; 3005 vmem_seg_walk_t *vsw = wsp->walk_data; 3006 uintptr_t addr = vsw->vsw_current; 3007 static size_t seg_size = 0; 3008 int rval; 3009 3010 if (!seg_size) { 3011 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) { 3012 mdb_warn("failed to read 'vmem_seg_size'"); 3013 seg_size = sizeof (vmem_seg_t); 3014 } 3015 } 3016 3017 if (seg_size < sizeof (seg)) 3018 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3019 3020 if (mdb_vread(&seg, seg_size, addr) == -1) { 3021 mdb_warn("couldn't read vmem_seg at %p", addr); 3022 return (WALK_ERR); 3023 } 3024 3025 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3026 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3027 rval = WALK_NEXT; 3028 } else { 3029 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3030 } 3031 3032 if (vsw->vsw_current == vsw->vsw_start) 3033 return (WALK_DONE); 3034 3035 return (rval); 3036 } 3037 3038 void 3039 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3040 { 3041 vmem_seg_walk_t *vsw = wsp->walk_data; 3042 3043 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3044 } 3045 3046 #define VMEM_NAMEWIDTH 22 3047 3048 int 3049 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3050 { 3051 vmem_t v, parent; 3052 uintptr_t paddr; 3053 int ident = 0; 3054 char c[VMEM_NAMEWIDTH]; 3055 3056 if (!(flags & DCMD_ADDRSPEC)) { 3057 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3058 mdb_warn("can't walk vmem"); 3059 return (DCMD_ERR); 3060 } 3061 return (DCMD_OK); 3062 } 3063 3064 if (DCMD_HDRSPEC(flags)) 3065 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3066 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3067 "TOTAL", "SUCCEED", "FAIL"); 3068 3069 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3070 mdb_warn("couldn't read vmem at %p", addr); 3071 return (DCMD_ERR); 3072 } 3073 3074 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3075 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3076 mdb_warn("couldn't trace %p's ancestry", addr); 3077 ident = 0; 3078 break; 3079 } 3080 paddr = (uintptr_t)parent.vm_source; 3081 } 3082 3083 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3084 3085 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3086 addr, VMEM_NAMEWIDTH, c, 3087 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total, 3088 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail); 3089 3090 return (DCMD_OK); 3091 } 3092 3093 void 3094 vmem_seg_help(void) 3095 { 3096 mdb_printf("%s\n", 3097 "Display the contents of vmem_seg_ts, with optional filtering.\n" 3098 "\n" 3099 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3100 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3101 "information.\n"); 3102 mdb_dec_indent(2); 3103 mdb_printf("%<b>OPTIONS%</b>\n"); 3104 mdb_inc_indent(2); 3105 mdb_printf("%s", 3106 " -v Display the full content of the vmem_seg, including its stack trace\n" 3107 " -s report the size of the segment, instead of the end address\n" 3108 " -c caller\n" 3109 " filter out segments without the function/PC in their stack trace\n" 3110 " -e earliest\n" 3111 " filter out segments timestamped before earliest\n" 3112 " -l latest\n" 3113 " filter out segments timestamped after latest\n" 3114 " -m minsize\n" 3115 " filer out segments smaller than minsize\n" 3116 " -M maxsize\n" 3117 " filer out segments larger than maxsize\n" 3118 " -t thread\n" 3119 " filter out segments not involving thread\n" 3120 " -T type\n" 3121 " filter out segments not of type 'type'\n" 3122 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3123 } 3124 3125 3126 /*ARGSUSED*/ 3127 int 3128 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3129 { 3130 vmem_seg_t vs; 3131 uintptr_t *stk = vs.vs_stack; 3132 uintptr_t sz; 3133 uint8_t t; 3134 const char *type = NULL; 3135 GElf_Sym sym; 3136 char c[MDB_SYM_NAMLEN]; 3137 int no_debug; 3138 int i; 3139 int depth; 3140 uintptr_t laddr, haddr; 3141 3142 uintptr_t caller = NULL, thread = NULL; 3143 uintptr_t minsize = 0, maxsize = 0; 3144 3145 hrtime_t earliest = 0, latest = 0; 3146 3147 uint_t size = 0; 3148 uint_t verbose = 0; 3149 3150 if (!(flags & DCMD_ADDRSPEC)) 3151 return (DCMD_USAGE); 3152 3153 if (mdb_getopts(argc, argv, 3154 'c', MDB_OPT_UINTPTR, &caller, 3155 'e', MDB_OPT_UINT64, &earliest, 3156 'l', MDB_OPT_UINT64, &latest, 3157 's', MDB_OPT_SETBITS, TRUE, &size, 3158 'm', MDB_OPT_UINTPTR, &minsize, 3159 'M', MDB_OPT_UINTPTR, &maxsize, 3160 't', MDB_OPT_UINTPTR, &thread, 3161 'T', MDB_OPT_STR, &type, 3162 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3163 NULL) != argc) 3164 return (DCMD_USAGE); 3165 3166 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3167 if (verbose) { 3168 mdb_printf("%16s %4s %16s %16s %16s\n" 3169 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3170 "ADDR", "TYPE", "START", "END", "SIZE", 3171 "", "", "THREAD", "TIMESTAMP", ""); 3172 } else { 3173 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3174 "START", size? "SIZE" : "END", "WHO"); 3175 } 3176 } 3177 3178 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3179 mdb_warn("couldn't read vmem_seg at %p", addr); 3180 return (DCMD_ERR); 3181 } 3182 3183 if (type != NULL) { 3184 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3185 t = VMEM_ALLOC; 3186 else if (strcmp(type, "FREE") == 0) 3187 t = VMEM_FREE; 3188 else if (strcmp(type, "SPAN") == 0) 3189 t = VMEM_SPAN; 3190 else if (strcmp(type, "ROTR") == 0 || 3191 strcmp(type, "ROTOR") == 0) 3192 t = VMEM_ROTOR; 3193 else if (strcmp(type, "WLKR") == 0 || 3194 strcmp(type, "WALKER") == 0) 3195 t = VMEM_WALKER; 3196 else { 3197 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3198 type); 3199 return (DCMD_ERR); 3200 } 3201 3202 if (vs.vs_type != t) 3203 return (DCMD_OK); 3204 } 3205 3206 sz = vs.vs_end - vs.vs_start; 3207 3208 if (minsize != 0 && sz < minsize) 3209 return (DCMD_OK); 3210 3211 if (maxsize != 0 && sz > maxsize) 3212 return (DCMD_OK); 3213 3214 t = vs.vs_type; 3215 depth = vs.vs_depth; 3216 3217 /* 3218 * debug info, when present, is only accurate for VMEM_ALLOC segments 3219 */ 3220 no_debug = (t != VMEM_ALLOC) || 3221 (depth == 0 || depth > VMEM_STACK_DEPTH); 3222 3223 if (no_debug) { 3224 if (caller != NULL || thread != NULL || earliest != 0 || 3225 latest != 0) 3226 return (DCMD_OK); /* not enough info */ 3227 } else { 3228 if (caller != NULL) { 3229 laddr = caller; 3230 haddr = caller + sizeof (caller); 3231 3232 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3233 sizeof (c), &sym) != -1 && 3234 caller == (uintptr_t)sym.st_value) { 3235 /* 3236 * We were provided an exact symbol value; any 3237 * address in the function is valid. 3238 */ 3239 laddr = (uintptr_t)sym.st_value; 3240 haddr = (uintptr_t)sym.st_value + sym.st_size; 3241 } 3242 3243 for (i = 0; i < depth; i++) 3244 if (vs.vs_stack[i] >= laddr && 3245 vs.vs_stack[i] < haddr) 3246 break; 3247 3248 if (i == depth) 3249 return (DCMD_OK); 3250 } 3251 3252 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3253 return (DCMD_OK); 3254 3255 if (earliest != 0 && vs.vs_timestamp < earliest) 3256 return (DCMD_OK); 3257 3258 if (latest != 0 && vs.vs_timestamp > latest) 3259 return (DCMD_OK); 3260 } 3261 3262 type = (t == VMEM_ALLOC ? "ALLC" : 3263 t == VMEM_FREE ? "FREE" : 3264 t == VMEM_SPAN ? "SPAN" : 3265 t == VMEM_ROTOR ? "ROTR" : 3266 t == VMEM_WALKER ? "WLKR" : 3267 "????"); 3268 3269 if (flags & DCMD_PIPE_OUT) { 3270 mdb_printf("%#r\n", addr); 3271 return (DCMD_OK); 3272 } 3273 3274 if (verbose) { 3275 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3276 addr, type, vs.vs_start, vs.vs_end, sz); 3277 3278 if (no_debug) 3279 return (DCMD_OK); 3280 3281 mdb_printf("%16s %4s %16d %16llx\n", 3282 "", "", vs.vs_thread, vs.vs_timestamp); 3283 3284 mdb_inc_indent(17); 3285 for (i = 0; i < depth; i++) { 3286 mdb_printf("%a\n", stk[i]); 3287 } 3288 mdb_dec_indent(17); 3289 mdb_printf("\n"); 3290 } else { 3291 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3292 vs.vs_start, size? sz : vs.vs_end); 3293 3294 if (no_debug) { 3295 mdb_printf("\n"); 3296 return (DCMD_OK); 3297 } 3298 3299 for (i = 0; i < depth; i++) { 3300 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3301 c, sizeof (c), &sym) == -1) 3302 continue; 3303 if (is_umem_sym(c, "vmem_")) 3304 continue; 3305 break; 3306 } 3307 mdb_printf(" %a\n", stk[i]); 3308 } 3309 return (DCMD_OK); 3310 } 3311 3312 /*ARGSUSED*/ 3313 static int 3314 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest) 3315 { 3316 char name[UMEM_CACHE_NAMELEN + 1]; 3317 hrtime_t delta; 3318 int i, depth; 3319 3320 if (bcp->bc_timestamp == 0) 3321 return (WALK_DONE); 3322 3323 if (*newest == 0) 3324 *newest = bcp->bc_timestamp; 3325 3326 delta = *newest - bcp->bc_timestamp; 3327 depth = MIN(bcp->bc_depth, umem_stack_depth); 3328 3329 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3330 &bcp->bc_cache->cache_name) <= 0) 3331 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3332 3333 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3334 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3335 3336 for (i = 0; i < depth; i++) 3337 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3338 3339 return (WALK_NEXT); 3340 } 3341 3342 int 3343 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3344 { 3345 const char *logname = "umem_transaction_log"; 3346 hrtime_t newest = 0; 3347 3348 if ((flags & DCMD_ADDRSPEC) || argc > 1) 3349 return (DCMD_USAGE); 3350 3351 if (argc > 0) { 3352 if (argv->a_type != MDB_TYPE_STRING) 3353 return (DCMD_USAGE); 3354 if (strcmp(argv->a_un.a_str, "fail") == 0) 3355 logname = "umem_failure_log"; 3356 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3357 logname = "umem_slab_log"; 3358 else 3359 return (DCMD_USAGE); 3360 } 3361 3362 if (umem_readvar(&addr, logname) == -1) { 3363 mdb_warn("failed to read %s log header pointer"); 3364 return (DCMD_ERR); 3365 } 3366 3367 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) { 3368 mdb_warn("failed to walk umem log"); 3369 return (DCMD_ERR); 3370 } 3371 3372 return (DCMD_OK); 3373 } 3374 3375 /* 3376 * As the final lure for die-hard crash(1M) users, we provide ::umausers here. 3377 * The first piece is a structure which we use to accumulate umem_cache_t 3378 * addresses of interest. The umc_add is used as a callback for the umem_cache 3379 * walker; we either add all caches, or ones named explicitly as arguments. 3380 */ 3381 3382 typedef struct umclist { 3383 const char *umc_name; /* Name to match (or NULL) */ 3384 uintptr_t *umc_caches; /* List of umem_cache_t addrs */ 3385 int umc_nelems; /* Num entries in umc_caches */ 3386 int umc_size; /* Size of umc_caches array */ 3387 } umclist_t; 3388 3389 static int 3390 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc) 3391 { 3392 void *p; 3393 int s; 3394 3395 if (umc->umc_name == NULL || 3396 strcmp(cp->cache_name, umc->umc_name) == 0) { 3397 /* 3398 * If we have a match, grow our array (if necessary), and then 3399 * add the virtual address of the matching cache to our list. 3400 */ 3401 if (umc->umc_nelems >= umc->umc_size) { 3402 s = umc->umc_size ? umc->umc_size * 2 : 256; 3403 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3404 3405 bcopy(umc->umc_caches, p, 3406 sizeof (uintptr_t) * umc->umc_size); 3407 3408 umc->umc_caches = p; 3409 umc->umc_size = s; 3410 } 3411 3412 umc->umc_caches[umc->umc_nelems++] = addr; 3413 return (umc->umc_name ? WALK_DONE : WALK_NEXT); 3414 } 3415 3416 return (WALK_NEXT); 3417 } 3418 3419 /* 3420 * The second piece of ::umausers is a hash table of allocations. Each 3421 * allocation owner is identified by its stack trace and data_size. We then 3422 * track the total bytes of all such allocations, and the number of allocations 3423 * to report at the end. Once we have a list of caches, we walk through the 3424 * allocated bufctls of each, and update our hash table accordingly. 3425 */ 3426 3427 typedef struct umowner { 3428 struct umowner *umo_head; /* First hash elt in bucket */ 3429 struct umowner *umo_next; /* Next hash elt in chain */ 3430 size_t umo_signature; /* Hash table signature */ 3431 uint_t umo_num; /* Number of allocations */ 3432 size_t umo_data_size; /* Size of each allocation */ 3433 size_t umo_total_size; /* Total bytes of allocation */ 3434 int umo_depth; /* Depth of stack trace */ 3435 uintptr_t *umo_stack; /* Stack trace */ 3436 } umowner_t; 3437 3438 typedef struct umusers { 3439 const umem_cache_t *umu_cache; /* Current umem cache */ 3440 umowner_t *umu_hash; /* Hash table of owners */ 3441 uintptr_t *umu_stacks; /* stacks for owners */ 3442 int umu_nelems; /* Number of entries in use */ 3443 int umu_size; /* Total number of entries */ 3444 } umusers_t; 3445 3446 static void 3447 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp, 3448 size_t size, size_t data_size) 3449 { 3450 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3451 size_t bucket, signature = data_size; 3452 umowner_t *umo, *umoend; 3453 3454 /* 3455 * If the hash table is full, double its size and rehash everything. 3456 */ 3457 if (umu->umu_nelems >= umu->umu_size) { 3458 int s = umu->umu_size ? umu->umu_size * 2 : 1024; 3459 size_t umowner_size = sizeof (umowner_t); 3460 size_t trace_size = umem_stack_depth * sizeof (uintptr_t); 3461 uintptr_t *new_stacks; 3462 3463 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC); 3464 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC); 3465 3466 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size); 3467 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size); 3468 umu->umu_hash = umo; 3469 umu->umu_stacks = new_stacks; 3470 umu->umu_size = s; 3471 3472 umoend = umu->umu_hash + umu->umu_size; 3473 for (umo = umu->umu_hash; umo < umoend; umo++) { 3474 umo->umo_head = NULL; 3475 umo->umo_stack = &umu->umu_stacks[ 3476 umem_stack_depth * (umo - umu->umu_hash)]; 3477 } 3478 3479 umoend = umu->umu_hash + umu->umu_nelems; 3480 for (umo = umu->umu_hash; umo < umoend; umo++) { 3481 bucket = umo->umo_signature & (umu->umu_size - 1); 3482 umo->umo_next = umu->umu_hash[bucket].umo_head; 3483 umu->umu_hash[bucket].umo_head = umo; 3484 } 3485 } 3486 3487 /* 3488 * Finish computing the hash signature from the stack trace, and then 3489 * see if the owner is in the hash table. If so, update our stats. 3490 */ 3491 for (i = 0; i < depth; i++) 3492 signature += bcp->bc_stack[i]; 3493 3494 bucket = signature & (umu->umu_size - 1); 3495 3496 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) { 3497 if (umo->umo_signature == signature) { 3498 size_t difference = 0; 3499 3500 difference |= umo->umo_data_size - data_size; 3501 difference |= umo->umo_depth - depth; 3502 3503 for (i = 0; i < depth; i++) { 3504 difference |= umo->umo_stack[i] - 3505 bcp->bc_stack[i]; 3506 } 3507 3508 if (difference == 0) { 3509 umo->umo_total_size += size; 3510 umo->umo_num++; 3511 return; 3512 } 3513 } 3514 } 3515 3516 /* 3517 * If the owner is not yet hashed, grab the next element and fill it 3518 * in based on the allocation information. 3519 */ 3520 umo = &umu->umu_hash[umu->umu_nelems++]; 3521 umo->umo_next = umu->umu_hash[bucket].umo_head; 3522 umu->umu_hash[bucket].umo_head = umo; 3523 3524 umo->umo_signature = signature; 3525 umo->umo_num = 1; 3526 umo->umo_data_size = data_size; 3527 umo->umo_total_size = size; 3528 umo->umo_depth = depth; 3529 3530 for (i = 0; i < depth; i++) 3531 umo->umo_stack[i] = bcp->bc_stack[i]; 3532 } 3533 3534 /* 3535 * When ::umausers is invoked without the -f flag, we simply update our hash 3536 * table with the information from each allocated bufctl. 3537 */ 3538 /*ARGSUSED*/ 3539 static int 3540 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3541 { 3542 const umem_cache_t *cp = umu->umu_cache; 3543 3544 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3545 return (WALK_NEXT); 3546 } 3547 3548 /* 3549 * When ::umausers is invoked with the -f flag, we print out the information 3550 * for each bufctl as well as updating the hash table. 3551 */ 3552 static int 3553 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) 3554 { 3555 int i, depth = MIN(bcp->bc_depth, umem_stack_depth); 3556 const umem_cache_t *cp = umu->umu_cache; 3557 3558 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 3559 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 3560 3561 for (i = 0; i < depth; i++) 3562 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3563 3564 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3565 return (WALK_NEXT); 3566 } 3567 3568 /* 3569 * We sort our results by allocation size before printing them. 3570 */ 3571 static int 3572 umownercmp(const void *lp, const void *rp) 3573 { 3574 const umowner_t *lhs = lp; 3575 const umowner_t *rhs = rp; 3576 3577 return (rhs->umo_total_size - lhs->umo_total_size); 3578 } 3579 3580 /* 3581 * The main engine of ::umausers is relatively straightforward: First we 3582 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we 3583 * iterate over the allocated bufctls of each cache in the list. Finally, 3584 * we sort and print our results. 3585 */ 3586 /*ARGSUSED*/ 3587 int 3588 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3589 { 3590 int mem_threshold = 8192; /* Minimum # bytes for printing */ 3591 int cnt_threshold = 100; /* Minimum # blocks for printing */ 3592 int audited_caches = 0; /* Number of UMF_AUDIT caches found */ 3593 int do_all_caches = 1; /* Do all caches (no arguments) */ 3594 int opt_e = FALSE; /* Include "small" users */ 3595 int opt_f = FALSE; /* Print stack traces */ 3596 3597 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1; 3598 umowner_t *umo, *umoend; 3599 int i, oelems; 3600 3601 umclist_t umc; 3602 umusers_t umu; 3603 3604 if (flags & DCMD_ADDRSPEC) 3605 return (DCMD_USAGE); 3606 3607 bzero(&umc, sizeof (umc)); 3608 bzero(&umu, sizeof (umu)); 3609 3610 while ((i = mdb_getopts(argc, argv, 3611 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 3612 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 3613 3614 argv += i; /* skip past options we just processed */ 3615 argc -= i; /* adjust argc */ 3616 3617 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 3618 return (DCMD_USAGE); 3619 3620 oelems = umc.umc_nelems; 3621 umc.umc_name = argv->a_un.a_str; 3622 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3623 3624 if (umc.umc_nelems == oelems) { 3625 mdb_warn("unknown umem cache: %s\n", umc.umc_name); 3626 return (DCMD_ERR); 3627 } 3628 3629 do_all_caches = 0; 3630 argv++; 3631 argc--; 3632 } 3633 3634 if (opt_e) 3635 mem_threshold = cnt_threshold = 0; 3636 3637 if (opt_f) 3638 callback = (mdb_walk_cb_t)umause2; 3639 3640 if (do_all_caches) { 3641 umc.umc_name = NULL; /* match all cache names */ 3642 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); 3643 } 3644 3645 for (i = 0; i < umc.umc_nelems; i++) { 3646 uintptr_t cp = umc.umc_caches[i]; 3647 umem_cache_t c; 3648 3649 if (mdb_vread(&c, sizeof (c), cp) == -1) { 3650 mdb_warn("failed to read cache at %p", cp); 3651 continue; 3652 } 3653 3654 if (!(c.cache_flags & UMF_AUDIT)) { 3655 if (!do_all_caches) { 3656 mdb_warn("UMF_AUDIT is not enabled for %s\n", 3657 c.cache_name); 3658 } 3659 continue; 3660 } 3661 3662 umu.umu_cache = &c; 3663 (void) mdb_pwalk("bufctl", callback, &umu, cp); 3664 audited_caches++; 3665 } 3666 3667 if (audited_caches == 0 && do_all_caches) { 3668 mdb_warn("UMF_AUDIT is not enabled for any caches\n"); 3669 return (DCMD_ERR); 3670 } 3671 3672 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp); 3673 umoend = umu.umu_hash + umu.umu_nelems; 3674 3675 for (umo = umu.umu_hash; umo < umoend; umo++) { 3676 if (umo->umo_total_size < mem_threshold && 3677 umo->umo_num < cnt_threshold) 3678 continue; 3679 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 3680 umo->umo_total_size, umo->umo_num, umo->umo_data_size); 3681 for (i = 0; i < umo->umo_depth; i++) 3682 mdb_printf("\t %a\n", umo->umo_stack[i]); 3683 } 3684 3685 return (DCMD_OK); 3686 } 3687 3688 struct malloc_data { 3689 uint32_t malloc_size; 3690 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */ 3691 }; 3692 3693 #ifdef _LP64 3694 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data)) 3695 #else 3696 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data)) 3697 #endif 3698 3699 typedef struct umem_malloc_info { 3700 size_t um_total; /* total allocated buffers */ 3701 size_t um_malloc; /* malloc buffers */ 3702 size_t um_malloc_size; /* sum of malloc buffer sizes */ 3703 size_t um_malloc_overhead; /* sum of in-chunk overheads */ 3704 3705 umem_cache_t *um_cp; 3706 3707 uint_t *um_bucket; 3708 } umem_malloc_info_t; 3709 3710 static void 3711 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc, 3712 size_t maxbuckets, size_t minbucketsize, int geometric) 3713 { 3714 uint64_t um_malloc; 3715 int minb = -1; 3716 int maxb = -1; 3717 int buckets; 3718 int nbucks; 3719 int i; 3720 int b; 3721 const int *distarray; 3722 3723 minb = (int)minmalloc; 3724 maxb = (int)maxmalloc; 3725 3726 nbucks = buckets = maxb - minb + 1; 3727 3728 um_malloc = 0; 3729 for (b = minb; b <= maxb; b++) 3730 um_malloc += um_bucket[b]; 3731 3732 if (maxbuckets != 0) 3733 buckets = MIN(buckets, maxbuckets); 3734 3735 if (minbucketsize > 1) { 3736 buckets = MIN(buckets, nbucks/minbucketsize); 3737 if (buckets == 0) { 3738 buckets = 1; 3739 minbucketsize = nbucks; 3740 } 3741 } 3742 3743 if (geometric) 3744 distarray = dist_geometric(buckets, minb, maxb, minbucketsize); 3745 else 3746 distarray = dist_linear(buckets, minb, maxb); 3747 3748 dist_print_header("malloc size", 11, "count"); 3749 for (i = 0; i < buckets; i++) { 3750 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11); 3751 } 3752 mdb_printf("\n"); 3753 } 3754 3755 /* 3756 * A malloc()ed buffer looks like: 3757 * 3758 * <----------- mi.malloc_size ---> 3759 * <----------- cp.cache_bufsize ------------------> 3760 * <----------- cp.cache_chunksize --------------------------------> 3761 * +-------+-----------------------+---------------+---------------+ 3762 * |/tag///| mallocsz |/round-off/////|/debug info////| 3763 * +-------+---------------------------------------+---------------+ 3764 * <-- usable space ------> 3765 * 3766 * mallocsz is the argument to malloc(3C). 3767 * mi.malloc_size is the actual size passed to umem_alloc(), which 3768 * is rounded up to the smallest available cache size, which is 3769 * cache_bufsize. If there is debugging or alignment overhead in 3770 * the cache, that is reflected in a larger cache_chunksize. 3771 * 3772 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes, 3773 * depending upon the ISA's alignment requirements. For 32-bit allocations, 3774 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes, 3775 * the tag has 8 bytes of padding before it. 3776 * 3777 * 32-byte, 64-byte buffers <= 8 bytes: 3778 * +-------+-------+--------- ... 3779 * |/size//|/stat//| mallocsz ... 3780 * +-------+-------+--------- ... 3781 * ^ 3782 * pointer returned from malloc(3C) 3783 * 3784 * 64-byte buffers > 8 bytes: 3785 * +---------------+-------+-------+--------- ... 3786 * |/padding///////|/size//|/stat//| mallocsz ... 3787 * +---------------+-------+-------+--------- ... 3788 * ^ 3789 * pointer returned from malloc(3C) 3790 * 3791 * The "size" field is "malloc_size", which is mallocsz + the padding. 3792 * The "stat" field is derived from malloc_size, and functions as a 3793 * validation that this buffer is actually from malloc(3C). 3794 */ 3795 /*ARGSUSED*/ 3796 static int 3797 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump) 3798 { 3799 struct malloc_data md; 3800 size_t m_addr = addr; 3801 size_t overhead = sizeof (md); 3802 size_t mallocsz; 3803 3804 ump->um_total++; 3805 3806 #ifdef _LP64 3807 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) { 3808 m_addr += overhead; 3809 overhead += sizeof (md); 3810 } 3811 #endif 3812 3813 if (mdb_vread(&md, sizeof (md), m_addr) == -1) { 3814 mdb_warn("unable to read malloc header at %p", m_addr); 3815 return (WALK_NEXT); 3816 } 3817 3818 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) { 3819 case MALLOC_MAGIC: 3820 #ifdef _LP64 3821 case MALLOC_SECOND_MAGIC: 3822 #endif 3823 mallocsz = md.malloc_size - overhead; 3824 3825 ump->um_malloc++; 3826 ump->um_malloc_size += mallocsz; 3827 ump->um_malloc_overhead += overhead; 3828 3829 /* include round-off and debug overhead */ 3830 ump->um_malloc_overhead += 3831 ump->um_cp->cache_chunksize - md.malloc_size; 3832 3833 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET) 3834 ump->um_bucket[mallocsz]++; 3835 3836 break; 3837 default: 3838 break; 3839 } 3840 3841 return (WALK_NEXT); 3842 } 3843 3844 int 3845 get_umem_alloc_sizes(int **out, size_t *out_num) 3846 { 3847 GElf_Sym sym; 3848 3849 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { 3850 mdb_warn("unable to look up umem_alloc_sizes"); 3851 return (-1); 3852 } 3853 3854 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC); 3855 *out_num = sym.st_size / sizeof (int); 3856 3857 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) { 3858 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value); 3859 *out = NULL; 3860 return (-1); 3861 } 3862 3863 return (0); 3864 } 3865 3866 3867 static int 3868 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump) 3869 { 3870 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) 3871 return (WALK_NEXT); 3872 3873 ump->um_cp = cp; 3874 3875 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) == 3876 -1) { 3877 mdb_warn("can't walk 'umem' for cache %p", addr); 3878 return (WALK_ERR); 3879 } 3880 3881 return (WALK_NEXT); 3882 } 3883 3884 void 3885 umem_malloc_dist_help(void) 3886 { 3887 mdb_printf("%s\n", 3888 "report distribution of outstanding malloc()s"); 3889 mdb_dec_indent(2); 3890 mdb_printf("%<b>OPTIONS%</b>\n"); 3891 mdb_inc_indent(2); 3892 mdb_printf("%s", 3893 " -b maxbins\n" 3894 " Use at most maxbins bins for the data\n" 3895 " -B minbinsize\n" 3896 " Make the bins at least minbinsize bytes apart\n" 3897 " -d dump the raw data out, without binning\n" 3898 " -g use geometric binning instead of linear binning\n"); 3899 } 3900 3901 /*ARGSUSED*/ 3902 int 3903 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3904 { 3905 umem_malloc_info_t mi; 3906 uint_t geometric = 0; 3907 uint_t dump = 0; 3908 size_t maxbuckets = 0; 3909 size_t minbucketsize = 0; 3910 3911 size_t minalloc = 0; 3912 size_t maxalloc = UMI_MAX_BUCKET; 3913 3914 if (flags & DCMD_ADDRSPEC) 3915 return (DCMD_USAGE); 3916 3917 if (mdb_getopts(argc, argv, 3918 'd', MDB_OPT_SETBITS, TRUE, &dump, 3919 'g', MDB_OPT_SETBITS, TRUE, &geometric, 3920 'b', MDB_OPT_UINTPTR, &maxbuckets, 3921 'B', MDB_OPT_UINTPTR, &minbucketsize, 3922 0) != argc) 3923 return (DCMD_USAGE); 3924 3925 bzero(&mi, sizeof (mi)); 3926 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 3927 UM_SLEEP | UM_GC); 3928 3929 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb, 3930 &mi) == -1) { 3931 mdb_warn("unable to walk 'umem_cache'"); 3932 return (DCMD_ERR); 3933 } 3934 3935 if (dump) { 3936 int i; 3937 for (i = minalloc; i <= maxalloc; i++) 3938 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]); 3939 3940 return (DCMD_OK); 3941 } 3942 3943 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc, 3944 maxbuckets, minbucketsize, geometric); 3945 3946 return (DCMD_OK); 3947 } 3948 3949 void 3950 umem_malloc_info_help(void) 3951 { 3952 mdb_printf("%s\n", 3953 "report information about malloc()s by cache. "); 3954 mdb_dec_indent(2); 3955 mdb_printf("%<b>OPTIONS%</b>\n"); 3956 mdb_inc_indent(2); 3957 mdb_printf("%s", 3958 " -b maxbins\n" 3959 " Use at most maxbins bins for the data\n" 3960 " -B minbinsize\n" 3961 " Make the bins at least minbinsize bytes apart\n" 3962 " -d dump the raw distribution data without binning\n" 3963 #ifndef _KMDB 3964 " -g use geometric binning instead of linear binning\n" 3965 #endif 3966 ""); 3967 } 3968 int 3969 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3970 { 3971 umem_cache_t c; 3972 umem_malloc_info_t mi; 3973 3974 int skip = 0; 3975 3976 size_t maxmalloc; 3977 size_t overhead; 3978 size_t allocated; 3979 size_t avg_malloc; 3980 size_t overhead_pct; /* 1000 * overhead_percent */ 3981 3982 uint_t verbose = 0; 3983 uint_t dump = 0; 3984 uint_t geometric = 0; 3985 size_t maxbuckets = 0; 3986 size_t minbucketsize = 0; 3987 3988 int *alloc_sizes; 3989 int idx; 3990 size_t num; 3991 size_t minmalloc; 3992 3993 if (mdb_getopts(argc, argv, 3994 'd', MDB_OPT_SETBITS, TRUE, &dump, 3995 'g', MDB_OPT_SETBITS, TRUE, &geometric, 3996 'b', MDB_OPT_UINTPTR, &maxbuckets, 3997 'B', MDB_OPT_UINTPTR, &minbucketsize, 3998 0) != argc) 3999 return (DCMD_USAGE); 4000 4001 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0)) 4002 verbose = 1; 4003 4004 if (!(flags & DCMD_ADDRSPEC)) { 4005 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info", 4006 argc, argv) == -1) { 4007 mdb_warn("can't walk umem_cache"); 4008 return (DCMD_ERR); 4009 } 4010 return (DCMD_OK); 4011 } 4012 4013 if (!mdb_vread(&c, sizeof (c), addr)) { 4014 mdb_warn("unable to read cache at %p", addr); 4015 return (DCMD_ERR); 4016 } 4017 4018 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) { 4019 if (!(flags & DCMD_LOOP)) 4020 mdb_warn("umem_malloc_info: cache \"%s\" is not used " 4021 "by malloc()\n", c.cache_name); 4022 skip = 1; 4023 } 4024 4025 /* 4026 * normally, print the header only the first time. In verbose mode, 4027 * print the header on every non-skipped buffer 4028 */ 4029 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip)) 4030 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n", 4031 "CACHE", "BUFSZ", "MAXMAL", 4032 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER"); 4033 4034 if (skip) 4035 return (DCMD_OK); 4036 4037 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data); 4038 #ifdef _LP64 4039 if (c.cache_bufsize > UMEM_SECOND_ALIGN) 4040 maxmalloc -= sizeof (struct malloc_data); 4041 #endif 4042 4043 bzero(&mi, sizeof (mi)); 4044 mi.um_cp = &c; 4045 if (verbose) 4046 mi.um_bucket = 4047 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), 4048 UM_SLEEP | UM_GC); 4049 4050 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) == 4051 -1) { 4052 mdb_warn("can't walk 'umem'"); 4053 return (DCMD_ERR); 4054 } 4055 4056 overhead = mi.um_malloc_overhead; 4057 allocated = mi.um_malloc_size; 4058 4059 /* do integer round off for the average */ 4060 if (mi.um_malloc != 0) 4061 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc; 4062 else 4063 avg_malloc = 0; 4064 4065 /* 4066 * include per-slab overhead 4067 * 4068 * Each slab in a given cache is the same size, and has the same 4069 * number of chunks in it; we read in the first slab on the 4070 * slab list to get the number of chunks for all slabs. To 4071 * compute the per-slab overhead, we just subtract the chunk usage 4072 * from the slabsize: 4073 * 4074 * +------------+-------+-------+ ... --+-------+-------+-------+ 4075 * |////////////| | | ... | |///////|///////| 4076 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//| 4077 * |////////////| | | ... | |///////|///////| 4078 * +------------+-------+-------+ ... --+-------+-------+-------+ 4079 * | \_______chunksize * chunks_____/ | 4080 * \__________________________slabsize__________________________/ 4081 * 4082 * For UMF_HASH caches, there is an additional source of overhead; 4083 * the external umem_slab_t and per-chunk bufctl structures. We 4084 * include those in our per-slab overhead. 4085 * 4086 * Once we have a number for the per-slab overhead, we estimate 4087 * the actual overhead by treating the malloc()ed buffers as if 4088 * they were densely packed: 4089 * 4090 * additional overhead = (# mallocs) * (per-slab) / (chunks); 4091 * 4092 * carefully ordering the multiply before the divide, to avoid 4093 * round-off error. 4094 */ 4095 if (mi.um_malloc != 0) { 4096 umem_slab_t slab; 4097 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next; 4098 4099 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) { 4100 mdb_warn("unable to read slab at %p\n", saddr); 4101 } else { 4102 long chunks = slab.slab_chunks; 4103 if (chunks != 0 && c.cache_chunksize != 0 && 4104 chunks <= c.cache_slabsize / c.cache_chunksize) { 4105 uintmax_t perslab = 4106 c.cache_slabsize - 4107 (c.cache_chunksize * chunks); 4108 4109 if (c.cache_flags & UMF_HASH) { 4110 perslab += sizeof (umem_slab_t) + 4111 chunks * 4112 ((c.cache_flags & UMF_AUDIT) ? 4113 sizeof (umem_bufctl_audit_t) : 4114 sizeof (umem_bufctl_t)); 4115 } 4116 overhead += 4117 (perslab * (uintmax_t)mi.um_malloc)/chunks; 4118 } else { 4119 mdb_warn("invalid #chunks (%d) in slab %p\n", 4120 chunks, saddr); 4121 } 4122 } 4123 } 4124 4125 if (allocated != 0) 4126 overhead_pct = (1000ULL * overhead) / allocated; 4127 else 4128 overhead_pct = 0; 4129 4130 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n", 4131 addr, c.cache_bufsize, maxmalloc, 4132 mi.um_malloc, avg_malloc, allocated, overhead, 4133 overhead_pct / 10, overhead_pct % 10); 4134 4135 if (!verbose) 4136 return (DCMD_OK); 4137 4138 if (!dump) 4139 mdb_printf("\n"); 4140 4141 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1) 4142 return (DCMD_ERR); 4143 4144 for (idx = 0; idx < num; idx++) { 4145 if (alloc_sizes[idx] == c.cache_bufsize) 4146 break; 4147 if (alloc_sizes[idx] == 0) { 4148 idx = num; /* 0-terminated array */ 4149 break; 4150 } 4151 } 4152 if (idx == num) { 4153 mdb_warn( 4154 "cache %p's size (%d) not in umem_alloc_sizes\n", 4155 addr, c.cache_bufsize); 4156 return (DCMD_ERR); 4157 } 4158 4159 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1]; 4160 if (minmalloc > 0) { 4161 #ifdef _LP64 4162 if (minmalloc > UMEM_SECOND_ALIGN) 4163 minmalloc -= sizeof (struct malloc_data); 4164 #endif 4165 minmalloc -= sizeof (struct malloc_data); 4166 minmalloc += 1; 4167 } 4168 4169 if (dump) { 4170 for (idx = minmalloc; idx <= maxmalloc; idx++) 4171 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]); 4172 mdb_printf("\n"); 4173 } else { 4174 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc, 4175 maxbuckets, minbucketsize, geometric); 4176 } 4177 4178 return (DCMD_OK); 4179 } 4180