1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ctf.h> 29 #include <sys/cpuvar.h> 30 #include <sys/kmem_impl.h> 31 #include <sys/vmem_impl.h> 32 #include <sys/machelf.h> 33 #include <sys/modctl.h> 34 #include <sys/kobj.h> 35 #include <sys/panic.h> 36 #include <sys/stack.h> 37 #include <sys/sysmacros.h> 38 #include <vm/page.h> 39 40 #include "avl.h" 41 #include "combined.h" 42 #include "dist.h" 43 #include "kmem.h" 44 #include "list.h" 45 46 #define dprintf(x) if (mdb_debug_level) { \ 47 mdb_printf("kmem debug: "); \ 48 /*CSTYLED*/\ 49 mdb_printf x ;\ 50 } 51 52 #define KM_ALLOCATED 0x01 53 #define KM_FREE 0x02 54 #define KM_BUFCTL 0x04 55 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 56 #define KM_HASH 0x10 57 58 static int mdb_debug_level = 0; 59 60 /*ARGSUSED*/ 61 static int 62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 63 { 64 mdb_walker_t w; 65 char descr[64]; 66 67 (void) mdb_snprintf(descr, sizeof (descr), 68 "walk the %s cache", c->cache_name); 69 70 w.walk_name = c->cache_name; 71 w.walk_descr = descr; 72 w.walk_init = kmem_walk_init; 73 w.walk_step = kmem_walk_step; 74 w.walk_fini = kmem_walk_fini; 75 w.walk_init_arg = (void *)addr; 76 77 if (mdb_add_walker(&w) == -1) 78 mdb_warn("failed to add %s walker", c->cache_name); 79 80 return (WALK_NEXT); 81 } 82 83 /*ARGSUSED*/ 84 int 85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 86 { 87 mdb_debug_level ^= 1; 88 89 mdb_printf("kmem: debugging is now %s\n", 90 mdb_debug_level ? "on" : "off"); 91 92 return (DCMD_OK); 93 } 94 95 int 96 kmem_cache_walk_init(mdb_walk_state_t *wsp) 97 { 98 GElf_Sym sym; 99 100 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 101 mdb_warn("couldn't find kmem_caches"); 102 return (WALK_ERR); 103 } 104 105 wsp->walk_addr = (uintptr_t)sym.st_value; 106 107 return (list_walk_init_named(wsp, "cache list", "cache")); 108 } 109 110 int 111 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 112 { 113 if (wsp->walk_addr == NULL) { 114 mdb_warn("kmem_cpu_cache doesn't support global walks"); 115 return (WALK_ERR); 116 } 117 118 if (mdb_layered_walk("cpu", wsp) == -1) { 119 mdb_warn("couldn't walk 'cpu'"); 120 return (WALK_ERR); 121 } 122 123 wsp->walk_data = (void *)wsp->walk_addr; 124 125 return (WALK_NEXT); 126 } 127 128 int 129 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 130 { 131 uintptr_t caddr = (uintptr_t)wsp->walk_data; 132 const cpu_t *cpu = wsp->walk_layer; 133 kmem_cpu_cache_t cc; 134 135 caddr += cpu->cpu_cache_offset; 136 137 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 138 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 139 return (WALK_ERR); 140 } 141 142 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 143 } 144 145 static int 146 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 147 { 148 kmem_slab_t *sp = p; 149 uintptr_t caddr = (uintptr_t)arg; 150 if ((uintptr_t)sp->slab_cache != caddr) { 151 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 152 saddr, caddr, sp->slab_cache); 153 return (-1); 154 } 155 156 return (0); 157 } 158 159 static int 160 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 161 { 162 kmem_slab_t *sp = p; 163 164 int rc = kmem_slab_check(p, saddr, arg); 165 if (rc != 0) { 166 return (rc); 167 } 168 169 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 170 mdb_warn("slab %p is not a partial slab\n", saddr); 171 return (-1); 172 } 173 174 return (0); 175 } 176 177 static int 178 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 179 { 180 kmem_slab_t *sp = p; 181 182 int rc = kmem_slab_check(p, saddr, arg); 183 if (rc != 0) { 184 return (rc); 185 } 186 187 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 188 mdb_warn("slab %p is not completely allocated\n", saddr); 189 return (-1); 190 } 191 192 return (0); 193 } 194 195 typedef struct { 196 uintptr_t kns_cache_addr; 197 int kns_nslabs; 198 } kmem_nth_slab_t; 199 200 static int 201 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 202 { 203 kmem_nth_slab_t *chkp = arg; 204 205 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 206 if (rc != 0) { 207 return (rc); 208 } 209 210 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 211 } 212 213 static int 214 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 215 { 216 uintptr_t caddr = wsp->walk_addr; 217 218 wsp->walk_addr = (uintptr_t)(caddr + 219 offsetof(kmem_cache_t, cache_complete_slabs)); 220 221 return (list_walk_init_checked(wsp, "slab list", "slab", 222 kmem_complete_slab_check, (void *)caddr)); 223 } 224 225 static int 226 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 227 { 228 uintptr_t caddr = wsp->walk_addr; 229 230 wsp->walk_addr = (uintptr_t)(caddr + 231 offsetof(kmem_cache_t, cache_partial_slabs)); 232 233 return (avl_walk_init_checked(wsp, "slab list", "slab", 234 kmem_partial_slab_check, (void *)caddr)); 235 } 236 237 int 238 kmem_slab_walk_init(mdb_walk_state_t *wsp) 239 { 240 uintptr_t caddr = wsp->walk_addr; 241 242 if (caddr == NULL) { 243 mdb_warn("kmem_slab doesn't support global walks\n"); 244 return (WALK_ERR); 245 } 246 247 combined_walk_init(wsp); 248 combined_walk_add(wsp, 249 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 250 combined_walk_add(wsp, 251 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 252 253 return (WALK_NEXT); 254 } 255 256 static int 257 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 258 { 259 uintptr_t caddr = wsp->walk_addr; 260 kmem_nth_slab_t *chk; 261 262 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 263 UM_SLEEP | UM_GC); 264 chk->kns_cache_addr = caddr; 265 chk->kns_nslabs = 1; 266 wsp->walk_addr = (uintptr_t)(caddr + 267 offsetof(kmem_cache_t, cache_complete_slabs)); 268 269 return (list_walk_init_checked(wsp, "slab list", "slab", 270 kmem_nth_slab_check, chk)); 271 } 272 273 int 274 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 275 { 276 uintptr_t caddr = wsp->walk_addr; 277 kmem_cache_t c; 278 279 if (caddr == NULL) { 280 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 281 return (WALK_ERR); 282 } 283 284 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 285 mdb_warn("couldn't read kmem_cache at %p", caddr); 286 return (WALK_ERR); 287 } 288 289 combined_walk_init(wsp); 290 291 /* 292 * Some consumers (umem_walk_step(), in particular) require at 293 * least one callback if there are any buffers in the cache. So 294 * if there are *no* partial slabs, report the first full slab, if 295 * any. 296 * 297 * Yes, this is ugly, but it's cleaner than the other possibilities. 298 */ 299 if (c.cache_partial_slabs.avl_numnodes == 0) { 300 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 301 list_walk_step, list_walk_fini); 302 } else { 303 combined_walk_add(wsp, kmem_partial_slab_walk_init, 304 avl_walk_step, avl_walk_fini); 305 } 306 307 return (WALK_NEXT); 308 } 309 310 int 311 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 312 { 313 kmem_cache_t c; 314 const char *filter = NULL; 315 316 if (mdb_getopts(ac, argv, 317 'n', MDB_OPT_STR, &filter, 318 NULL) != ac) { 319 return (DCMD_USAGE); 320 } 321 322 if (!(flags & DCMD_ADDRSPEC)) { 323 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 324 mdb_warn("can't walk kmem_cache"); 325 return (DCMD_ERR); 326 } 327 return (DCMD_OK); 328 } 329 330 if (DCMD_HDRSPEC(flags)) 331 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 332 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 333 334 if (mdb_vread(&c, sizeof (c), addr) == -1) { 335 mdb_warn("couldn't read kmem_cache at %p", addr); 336 return (DCMD_ERR); 337 } 338 339 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 340 return (DCMD_OK); 341 342 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 343 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 344 345 return (DCMD_OK); 346 } 347 348 void 349 kmem_cache_help(void) 350 { 351 mdb_printf("%s", "Print kernel memory caches.\n\n"); 352 mdb_dec_indent(2); 353 mdb_printf("%<b>OPTIONS%</b>\n"); 354 mdb_inc_indent(2); 355 mdb_printf("%s", 356 " -n name\n" 357 " name of kmem cache (or matching partial name)\n" 358 "\n" 359 "Column\tDescription\n" 360 "\n" 361 "ADDR\t\taddress of kmem cache\n" 362 "NAME\t\tname of kmem cache\n" 363 "FLAG\t\tvarious cache state flags\n" 364 "CFLAG\t\tcache creation flags\n" 365 "BUFSIZE\tobject size in bytes\n" 366 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 367 } 368 369 #define LABEL_WIDTH 11 370 static void 371 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 372 size_t maxbuckets, size_t minbucketsize) 373 { 374 uint64_t total; 375 int buckets; 376 int i; 377 const int *distarray; 378 int complete[2]; 379 380 buckets = buffers_per_slab; 381 382 total = 0; 383 for (i = 0; i <= buffers_per_slab; i++) 384 total += ks_bucket[i]; 385 386 if (maxbuckets > 1) 387 buckets = MIN(buckets, maxbuckets); 388 389 if (minbucketsize > 1) { 390 /* 391 * minbucketsize does not apply to the first bucket reserved 392 * for completely allocated slabs 393 */ 394 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 395 minbucketsize)); 396 if ((buckets < 2) && (buffers_per_slab > 1)) { 397 buckets = 2; 398 minbucketsize = (buffers_per_slab - 1); 399 } 400 } 401 402 /* 403 * The first printed bucket is reserved for completely allocated slabs. 404 * Passing (buckets - 1) excludes that bucket from the generated 405 * distribution, since we're handling it as a special case. 406 */ 407 complete[0] = buffers_per_slab; 408 complete[1] = buffers_per_slab + 1; 409 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 410 411 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 412 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 413 414 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 415 /* 416 * Print bucket ranges in descending order after the first bucket for 417 * completely allocated slabs, so a person can see immediately whether 418 * or not there is fragmentation without having to scan possibly 419 * multiple screens of output. Starting at (buckets - 2) excludes the 420 * extra terminating bucket. 421 */ 422 for (i = buckets - 2; i >= 0; i--) { 423 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 424 } 425 mdb_printf("\n"); 426 } 427 #undef LABEL_WIDTH 428 429 /*ARGSUSED*/ 430 static int 431 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 432 { 433 *is_slab = B_TRUE; 434 return (WALK_DONE); 435 } 436 437 /*ARGSUSED*/ 438 static int 439 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 440 boolean_t *is_slab) 441 { 442 /* 443 * The "kmem_partial_slab" walker reports the first full slab if there 444 * are no partial slabs (for the sake of consumers that require at least 445 * one callback if there are any buffers in the cache). 446 */ 447 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 448 return (WALK_DONE); 449 } 450 451 typedef struct kmem_slab_usage { 452 int ksu_refcnt; /* count of allocated buffers on slab */ 453 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 454 } kmem_slab_usage_t; 455 456 typedef struct kmem_slab_stats { 457 const kmem_cache_t *ks_cp; 458 int ks_slabs; /* slabs in cache */ 459 int ks_partial_slabs; /* partially allocated slabs in cache */ 460 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 461 int ks_max_buffers_per_slab; /* max buffers per slab */ 462 int ks_usage_len; /* ks_usage array length */ 463 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 464 uint_t *ks_bucket; /* slab usage distribution */ 465 } kmem_slab_stats_t; 466 467 /*ARGSUSED*/ 468 static int 469 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 470 kmem_slab_stats_t *ks) 471 { 472 kmem_slab_usage_t *ksu; 473 long unused; 474 475 ks->ks_slabs++; 476 ks->ks_bucket[sp->slab_refcnt]++; 477 478 unused = (sp->slab_chunks - sp->slab_refcnt); 479 if (unused == 0) { 480 return (WALK_NEXT); 481 } 482 483 ks->ks_partial_slabs++; 484 ks->ks_unused_buffers += unused; 485 486 if (ks->ks_partial_slabs > ks->ks_usage_len) { 487 kmem_slab_usage_t *usage; 488 int len = ks->ks_usage_len; 489 490 len = (len == 0 ? 16 : len * 2); 491 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 492 if (ks->ks_usage != NULL) { 493 bcopy(ks->ks_usage, usage, 494 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 495 mdb_free(ks->ks_usage, 496 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 497 } 498 ks->ks_usage = usage; 499 ks->ks_usage_len = len; 500 } 501 502 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 503 ksu->ksu_refcnt = sp->slab_refcnt; 504 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 505 return (WALK_NEXT); 506 } 507 508 static void 509 kmem_slabs_header() 510 { 511 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 512 "", "", "Partial", "", "Unused", ""); 513 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 514 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 515 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 516 "-------------------------", "--------", "--------", "---------", 517 "---------", "------"); 518 } 519 520 int 521 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 522 { 523 kmem_cache_t c; 524 kmem_slab_stats_t stats; 525 mdb_walk_cb_t cb; 526 int pct; 527 int tenths_pct; 528 size_t maxbuckets = 1; 529 size_t minbucketsize = 0; 530 const char *filter = NULL; 531 const char *name = NULL; 532 uint_t opt_v = FALSE; 533 boolean_t buckets = B_FALSE; 534 boolean_t skip = B_FALSE; 535 536 if (mdb_getopts(argc, argv, 537 'B', MDB_OPT_UINTPTR, &minbucketsize, 538 'b', MDB_OPT_UINTPTR, &maxbuckets, 539 'n', MDB_OPT_STR, &filter, 540 'N', MDB_OPT_STR, &name, 541 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 542 NULL) != argc) { 543 return (DCMD_USAGE); 544 } 545 546 if ((maxbuckets != 1) || (minbucketsize != 0)) { 547 buckets = B_TRUE; 548 } 549 550 if (!(flags & DCMD_ADDRSPEC)) { 551 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 552 argv) == -1) { 553 mdb_warn("can't walk kmem_cache"); 554 return (DCMD_ERR); 555 } 556 return (DCMD_OK); 557 } 558 559 if (mdb_vread(&c, sizeof (c), addr) == -1) { 560 mdb_warn("couldn't read kmem_cache at %p", addr); 561 return (DCMD_ERR); 562 } 563 564 if (name == NULL) { 565 skip = ((filter != NULL) && 566 (strstr(c.cache_name, filter) == NULL)); 567 } else if (filter == NULL) { 568 skip = (strcmp(c.cache_name, name) != 0); 569 } else { 570 /* match either -n or -N */ 571 skip = ((strcmp(c.cache_name, name) != 0) && 572 (strstr(c.cache_name, filter) == NULL)); 573 } 574 575 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 576 kmem_slabs_header(); 577 } else if ((opt_v || buckets) && !skip) { 578 if (DCMD_HDRSPEC(flags)) { 579 kmem_slabs_header(); 580 } else { 581 boolean_t is_slab = B_FALSE; 582 const char *walker_name; 583 if (opt_v) { 584 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 585 walker_name = "kmem_slab_partial"; 586 } else { 587 cb = (mdb_walk_cb_t)kmem_first_slab; 588 walker_name = "kmem_slab"; 589 } 590 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 591 if (is_slab) { 592 kmem_slabs_header(); 593 } 594 } 595 } 596 597 if (skip) { 598 return (DCMD_OK); 599 } 600 601 bzero(&stats, sizeof (kmem_slab_stats_t)); 602 stats.ks_cp = &c; 603 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 604 /* +1 to include a zero bucket */ 605 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 606 sizeof (*stats.ks_bucket), UM_SLEEP); 607 cb = (mdb_walk_cb_t)kmem_slablist_stat; 608 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 609 610 if (c.cache_buftotal == 0) { 611 pct = 0; 612 tenths_pct = 0; 613 } else { 614 uint64_t n = stats.ks_unused_buffers * 10000; 615 pct = (int)(n / c.cache_buftotal); 616 tenths_pct = pct - ((pct / 100) * 100); 617 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 618 if (tenths_pct == 10) { 619 pct += 100; 620 tenths_pct = 0; 621 } 622 } 623 624 pct /= 100; 625 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 626 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 627 stats.ks_unused_buffers, pct, tenths_pct); 628 629 if (maxbuckets == 0) { 630 maxbuckets = stats.ks_max_buffers_per_slab; 631 } 632 633 if (((maxbuckets > 1) || (minbucketsize > 0)) && 634 (stats.ks_slabs > 0)) { 635 mdb_printf("\n"); 636 kmem_slabs_print_dist(stats.ks_bucket, 637 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 638 } 639 640 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 641 sizeof (*stats.ks_bucket)); 642 643 if (!opt_v) { 644 return (DCMD_OK); 645 } 646 647 if (opt_v && (stats.ks_partial_slabs > 0)) { 648 int i; 649 kmem_slab_usage_t *ksu; 650 651 mdb_printf(" %d complete, %d partial", 652 (stats.ks_slabs - stats.ks_partial_slabs), 653 stats.ks_partial_slabs); 654 if (stats.ks_partial_slabs > 0) { 655 mdb_printf(" (%d):", stats.ks_max_buffers_per_slab); 656 } 657 for (i = 0; i < stats.ks_partial_slabs; i++) { 658 ksu = &stats.ks_usage[i]; 659 if (ksu->ksu_nomove) { 660 const char *symbol = "*"; 661 mdb_printf(" %d%s", ksu->ksu_refcnt, symbol); 662 } else { 663 mdb_printf(" %d", ksu->ksu_refcnt); 664 } 665 } 666 mdb_printf("\n\n"); 667 } 668 669 if (stats.ks_usage_len > 0) { 670 mdb_free(stats.ks_usage, 671 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 672 } 673 674 return (DCMD_OK); 675 } 676 677 void 678 kmem_slabs_help(void) 679 { 680 mdb_printf("%s", 681 "Display slab usage per kmem cache.\n\n"); 682 mdb_dec_indent(2); 683 mdb_printf("%<b>OPTIONS%</b>\n"); 684 mdb_inc_indent(2); 685 mdb_printf("%s", 686 " -n name\n" 687 " name of kmem cache (or matching partial name)\n" 688 " -N name\n" 689 " exact name of kmem cache\n" 690 " -b maxbins\n" 691 " Print a distribution of allocated buffers per slab using at\n" 692 " most maxbins bins. The first bin is reserved for completely\n" 693 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 694 " effect as specifying the maximum allocated buffers per slab\n" 695 " or setting minbinsize to 1 (-B 1).\n" 696 " -B minbinsize\n" 697 " Print a distribution of allocated buffers per slab, making\n" 698 " all bins (except the first, reserved for completely allocated\n" 699 " slabs) at least minbinsize buffers apart.\n" 700 " -v verbose output: List the allocated buffer count of each partial\n" 701 " slab on the free list in order from front to back to show how\n" 702 " closely the slabs are ordered by usage. For example\n" 703 "\n" 704 " 10 complete, 3 partial (8): 7 3 1\n" 705 "\n" 706 " means there are thirteen slabs with eight buffers each, including\n" 707 " three partially allocated slabs with less than all eight buffers\n" 708 " allocated.\n" 709 "\n" 710 " Buffer allocations are always from the front of the partial slab\n" 711 " list. When a buffer is freed from a completely used slab, that\n" 712 " slab is added to the front of the partial slab list. Assuming\n" 713 " that all buffers are equally likely to be freed soon, the\n" 714 " desired order of partial slabs is most-used at the front of the\n" 715 " list and least-used at the back (as in the example above).\n" 716 " However, if a slab contains an allocated buffer that will not\n" 717 " soon be freed, it would be better for that slab to be at the\n" 718 " front where all of its buffers can be allocated. Taking a slab\n" 719 " off the partial slab list (either with all buffers freed or all\n" 720 " buffers allocated) reduces cache fragmentation.\n" 721 "\n" 722 " A slab's allocated buffer count representing a partial slab (9 in\n" 723 " the example below) may be marked as follows:\n" 724 "\n" 725 " 9* An asterisk indicates that kmem has marked the slab non-\n" 726 " reclaimable because the kmem client refused to move one of the\n" 727 " slab's buffers. Since kmem does not expect to completely free the\n" 728 " slab, it moves it to the front of the list in the hope of\n" 729 " completely allocating it instead. A slab marked with an asterisk\n" 730 " stays marked for as long as it remains on the partial slab list.\n" 731 "\n" 732 "Column\t\tDescription\n" 733 "\n" 734 "Cache Name\t\tname of kmem cache\n" 735 "Slabs\t\t\ttotal slab count\n" 736 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 737 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 738 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 739 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 740 "\t\t\t for accounting structures (debug mode), slab\n" 741 "\t\t\t coloring (incremental small offsets to stagger\n" 742 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 743 } 744 745 static int 746 addrcmp(const void *lhs, const void *rhs) 747 { 748 uintptr_t p1 = *((uintptr_t *)lhs); 749 uintptr_t p2 = *((uintptr_t *)rhs); 750 751 if (p1 < p2) 752 return (-1); 753 if (p1 > p2) 754 return (1); 755 return (0); 756 } 757 758 static int 759 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 760 { 761 const kmem_bufctl_audit_t *bcp1 = *lhs; 762 const kmem_bufctl_audit_t *bcp2 = *rhs; 763 764 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 765 return (-1); 766 767 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 768 return (1); 769 770 return (0); 771 } 772 773 typedef struct kmem_hash_walk { 774 uintptr_t *kmhw_table; 775 size_t kmhw_nelems; 776 size_t kmhw_pos; 777 kmem_bufctl_t kmhw_cur; 778 } kmem_hash_walk_t; 779 780 int 781 kmem_hash_walk_init(mdb_walk_state_t *wsp) 782 { 783 kmem_hash_walk_t *kmhw; 784 uintptr_t *hash; 785 kmem_cache_t c; 786 uintptr_t haddr, addr = wsp->walk_addr; 787 size_t nelems; 788 size_t hsize; 789 790 if (addr == NULL) { 791 mdb_warn("kmem_hash doesn't support global walks\n"); 792 return (WALK_ERR); 793 } 794 795 if (mdb_vread(&c, sizeof (c), addr) == -1) { 796 mdb_warn("couldn't read cache at addr %p", addr); 797 return (WALK_ERR); 798 } 799 800 if (!(c.cache_flags & KMF_HASH)) { 801 mdb_warn("cache %p doesn't have a hash table\n", addr); 802 return (WALK_DONE); /* nothing to do */ 803 } 804 805 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 806 kmhw->kmhw_cur.bc_next = NULL; 807 kmhw->kmhw_pos = 0; 808 809 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 810 hsize = nelems * sizeof (uintptr_t); 811 haddr = (uintptr_t)c.cache_hash_table; 812 813 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 814 if (mdb_vread(hash, hsize, haddr) == -1) { 815 mdb_warn("failed to read hash table at %p", haddr); 816 mdb_free(hash, hsize); 817 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 818 return (WALK_ERR); 819 } 820 821 wsp->walk_data = kmhw; 822 823 return (WALK_NEXT); 824 } 825 826 int 827 kmem_hash_walk_step(mdb_walk_state_t *wsp) 828 { 829 kmem_hash_walk_t *kmhw = wsp->walk_data; 830 uintptr_t addr = NULL; 831 832 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 833 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 834 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 835 break; 836 } 837 } 838 if (addr == NULL) 839 return (WALK_DONE); 840 841 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 842 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 843 return (WALK_ERR); 844 } 845 846 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 847 } 848 849 void 850 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 851 { 852 kmem_hash_walk_t *kmhw = wsp->walk_data; 853 854 if (kmhw == NULL) 855 return; 856 857 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 858 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 859 } 860 861 /* 862 * Find the address of the bufctl structure for the address 'buf' in cache 863 * 'cp', which is at address caddr, and place it in *out. 864 */ 865 static int 866 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 867 { 868 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 869 kmem_bufctl_t *bcp; 870 kmem_bufctl_t bc; 871 872 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 873 mdb_warn("unable to read hash bucket for %p in cache %p", 874 buf, caddr); 875 return (-1); 876 } 877 878 while (bcp != NULL) { 879 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 880 (uintptr_t)bcp) == -1) { 881 mdb_warn("unable to read bufctl at %p", bcp); 882 return (-1); 883 } 884 if (bc.bc_addr == buf) { 885 *out = (uintptr_t)bcp; 886 return (0); 887 } 888 bcp = bc.bc_next; 889 } 890 891 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 892 return (-1); 893 } 894 895 int 896 kmem_get_magsize(const kmem_cache_t *cp) 897 { 898 uintptr_t addr = (uintptr_t)cp->cache_magtype; 899 GElf_Sym mt_sym; 900 kmem_magtype_t mt; 901 int res; 902 903 /* 904 * if cpu 0 has a non-zero magsize, it must be correct. caches 905 * with KMF_NOMAGAZINE have disabled their magazine layers, so 906 * it is okay to return 0 for them. 907 */ 908 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 909 (cp->cache_flags & KMF_NOMAGAZINE)) 910 return (res); 911 912 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 913 mdb_warn("unable to read 'kmem_magtype'"); 914 } else if (addr < mt_sym.st_value || 915 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 916 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 917 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 918 cp->cache_name, addr); 919 return (0); 920 } 921 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 922 mdb_warn("unable to read magtype at %a", addr); 923 return (0); 924 } 925 return (mt.mt_magsize); 926 } 927 928 /*ARGSUSED*/ 929 static int 930 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 931 { 932 *est -= (sp->slab_chunks - sp->slab_refcnt); 933 934 return (WALK_NEXT); 935 } 936 937 /* 938 * Returns an upper bound on the number of allocated buffers in a given 939 * cache. 940 */ 941 size_t 942 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 943 { 944 int magsize; 945 size_t cache_est; 946 947 cache_est = cp->cache_buftotal; 948 949 (void) mdb_pwalk("kmem_slab_partial", 950 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 951 952 if ((magsize = kmem_get_magsize(cp)) != 0) { 953 size_t mag_est = cp->cache_full.ml_total * magsize; 954 955 if (cache_est >= mag_est) { 956 cache_est -= mag_est; 957 } else { 958 mdb_warn("cache %p's magazine layer holds more buffers " 959 "than the slab layer.\n", addr); 960 } 961 } 962 return (cache_est); 963 } 964 965 #define READMAG_ROUNDS(rounds) { \ 966 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 967 mdb_warn("couldn't read magazine at %p", kmp); \ 968 goto fail; \ 969 } \ 970 for (i = 0; i < rounds; i++) { \ 971 maglist[magcnt++] = mp->mag_round[i]; \ 972 if (magcnt == magmax) { \ 973 mdb_warn("%d magazines exceeds fudge factor\n", \ 974 magcnt); \ 975 goto fail; \ 976 } \ 977 } \ 978 } 979 980 int 981 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 982 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 983 { 984 kmem_magazine_t *kmp, *mp; 985 void **maglist = NULL; 986 int i, cpu; 987 size_t magsize, magmax, magbsize; 988 size_t magcnt = 0; 989 990 /* 991 * Read the magtype out of the cache, after verifying the pointer's 992 * correctness. 993 */ 994 magsize = kmem_get_magsize(cp); 995 if (magsize == 0) { 996 *maglistp = NULL; 997 *magcntp = 0; 998 *magmaxp = 0; 999 return (WALK_NEXT); 1000 } 1001 1002 /* 1003 * There are several places where we need to go buffer hunting: 1004 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1005 * and the full magazine list in the depot. 1006 * 1007 * For an upper bound on the number of buffers in the magazine 1008 * layer, we have the number of magazines on the cache_full 1009 * list plus at most two magazines per CPU (the loaded and the 1010 * spare). Toss in 100 magazines as a fudge factor in case this 1011 * is live (the number "100" comes from the same fudge factor in 1012 * crash(1M)). 1013 */ 1014 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1015 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1016 1017 if (magbsize >= PAGESIZE / 2) { 1018 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1019 addr, magbsize); 1020 return (WALK_ERR); 1021 } 1022 1023 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1024 mp = mdb_alloc(magbsize, alloc_flags); 1025 if (mp == NULL || maglist == NULL) 1026 goto fail; 1027 1028 /* 1029 * First up: the magazines in the depot (i.e. on the cache_full list). 1030 */ 1031 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1032 READMAG_ROUNDS(magsize); 1033 kmp = mp->mag_next; 1034 1035 if (kmp == cp->cache_full.ml_list) 1036 break; /* cache_full list loop detected */ 1037 } 1038 1039 dprintf(("cache_full list done\n")); 1040 1041 /* 1042 * Now whip through the CPUs, snagging the loaded magazines 1043 * and full spares. 1044 */ 1045 for (cpu = 0; cpu < ncpus; cpu++) { 1046 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1047 1048 dprintf(("reading cpu cache %p\n", 1049 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1050 1051 if (ccp->cc_rounds > 0 && 1052 (kmp = ccp->cc_loaded) != NULL) { 1053 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 1054 READMAG_ROUNDS(ccp->cc_rounds); 1055 } 1056 1057 if (ccp->cc_prounds > 0 && 1058 (kmp = ccp->cc_ploaded) != NULL) { 1059 dprintf(("reading %d previously loaded rounds\n", 1060 ccp->cc_prounds)); 1061 READMAG_ROUNDS(ccp->cc_prounds); 1062 } 1063 } 1064 1065 dprintf(("magazine layer: %d buffers\n", magcnt)); 1066 1067 if (!(alloc_flags & UM_GC)) 1068 mdb_free(mp, magbsize); 1069 1070 *maglistp = maglist; 1071 *magcntp = magcnt; 1072 *magmaxp = magmax; 1073 1074 return (WALK_NEXT); 1075 1076 fail: 1077 if (!(alloc_flags & UM_GC)) { 1078 if (mp) 1079 mdb_free(mp, magbsize); 1080 if (maglist) 1081 mdb_free(maglist, magmax * sizeof (void *)); 1082 } 1083 return (WALK_ERR); 1084 } 1085 1086 static int 1087 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1088 { 1089 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1090 } 1091 1092 static int 1093 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1094 { 1095 kmem_bufctl_audit_t b; 1096 1097 /* 1098 * if KMF_AUDIT is not set, we know that we're looking at a 1099 * kmem_bufctl_t. 1100 */ 1101 if (!(cp->cache_flags & KMF_AUDIT) || 1102 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1103 (void) memset(&b, 0, sizeof (b)); 1104 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1105 mdb_warn("unable to read bufctl at %p", buf); 1106 return (WALK_ERR); 1107 } 1108 } 1109 1110 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1111 } 1112 1113 typedef struct kmem_walk { 1114 int kmw_type; 1115 1116 int kmw_addr; /* cache address */ 1117 kmem_cache_t *kmw_cp; 1118 size_t kmw_csize; 1119 1120 /* 1121 * magazine layer 1122 */ 1123 void **kmw_maglist; 1124 size_t kmw_max; 1125 size_t kmw_count; 1126 size_t kmw_pos; 1127 1128 /* 1129 * slab layer 1130 */ 1131 char *kmw_valid; /* to keep track of freed buffers */ 1132 char *kmw_ubase; /* buffer for slab data */ 1133 } kmem_walk_t; 1134 1135 static int 1136 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1137 { 1138 kmem_walk_t *kmw; 1139 int ncpus, csize; 1140 kmem_cache_t *cp; 1141 size_t vm_quantum; 1142 1143 size_t magmax, magcnt; 1144 void **maglist = NULL; 1145 uint_t chunksize, slabsize; 1146 int status = WALK_ERR; 1147 uintptr_t addr = wsp->walk_addr; 1148 const char *layered; 1149 1150 type &= ~KM_HASH; 1151 1152 if (addr == NULL) { 1153 mdb_warn("kmem walk doesn't support global walks\n"); 1154 return (WALK_ERR); 1155 } 1156 1157 dprintf(("walking %p\n", addr)); 1158 1159 /* 1160 * First we need to figure out how many CPUs are configured in the 1161 * system to know how much to slurp out. 1162 */ 1163 mdb_readvar(&ncpus, "max_ncpus"); 1164 1165 csize = KMEM_CACHE_SIZE(ncpus); 1166 cp = mdb_alloc(csize, UM_SLEEP); 1167 1168 if (mdb_vread(cp, csize, addr) == -1) { 1169 mdb_warn("couldn't read cache at addr %p", addr); 1170 goto out2; 1171 } 1172 1173 /* 1174 * It's easy for someone to hand us an invalid cache address. 1175 * Unfortunately, it is hard for this walker to survive an 1176 * invalid cache cleanly. So we make sure that: 1177 * 1178 * 1. the vmem arena for the cache is readable, 1179 * 2. the vmem arena's quantum is a power of 2, 1180 * 3. our slabsize is a multiple of the quantum, and 1181 * 4. our chunksize is >0 and less than our slabsize. 1182 */ 1183 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1184 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1185 vm_quantum == 0 || 1186 (vm_quantum & (vm_quantum - 1)) != 0 || 1187 cp->cache_slabsize < vm_quantum || 1188 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1189 cp->cache_chunksize == 0 || 1190 cp->cache_chunksize > cp->cache_slabsize) { 1191 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1192 goto out2; 1193 } 1194 1195 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1196 1197 if (cp->cache_buftotal == 0) { 1198 mdb_free(cp, csize); 1199 return (WALK_DONE); 1200 } 1201 1202 /* 1203 * If they ask for bufctls, but it's a small-slab cache, 1204 * there is nothing to report. 1205 */ 1206 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1207 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1208 cp->cache_flags)); 1209 mdb_free(cp, csize); 1210 return (WALK_DONE); 1211 } 1212 1213 /* 1214 * If they want constructed buffers, but there's no constructor or 1215 * the cache has DEADBEEF checking enabled, there is nothing to report. 1216 */ 1217 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1218 cp->cache_constructor == NULL || 1219 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1220 mdb_free(cp, csize); 1221 return (WALK_DONE); 1222 } 1223 1224 /* 1225 * Read in the contents of the magazine layer 1226 */ 1227 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1228 &magmax, UM_SLEEP) == WALK_ERR) 1229 goto out2; 1230 1231 /* 1232 * We have all of the buffers from the magazines; if we are walking 1233 * allocated buffers, sort them so we can bsearch them later. 1234 */ 1235 if (type & KM_ALLOCATED) 1236 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1237 1238 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1239 1240 kmw->kmw_type = type; 1241 kmw->kmw_addr = addr; 1242 kmw->kmw_cp = cp; 1243 kmw->kmw_csize = csize; 1244 kmw->kmw_maglist = maglist; 1245 kmw->kmw_max = magmax; 1246 kmw->kmw_count = magcnt; 1247 kmw->kmw_pos = 0; 1248 1249 /* 1250 * When walking allocated buffers in a KMF_HASH cache, we walk the 1251 * hash table instead of the slab layer. 1252 */ 1253 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1254 layered = "kmem_hash"; 1255 1256 kmw->kmw_type |= KM_HASH; 1257 } else { 1258 /* 1259 * If we are walking freed buffers, we only need the 1260 * magazine layer plus the partially allocated slabs. 1261 * To walk allocated buffers, we need all of the slabs. 1262 */ 1263 if (type & KM_ALLOCATED) 1264 layered = "kmem_slab"; 1265 else 1266 layered = "kmem_slab_partial"; 1267 1268 /* 1269 * for small-slab caches, we read in the entire slab. For 1270 * freed buffers, we can just walk the freelist. For 1271 * allocated buffers, we use a 'valid' array to track 1272 * the freed buffers. 1273 */ 1274 if (!(cp->cache_flags & KMF_HASH)) { 1275 chunksize = cp->cache_chunksize; 1276 slabsize = cp->cache_slabsize; 1277 1278 kmw->kmw_ubase = mdb_alloc(slabsize + 1279 sizeof (kmem_bufctl_t), UM_SLEEP); 1280 1281 if (type & KM_ALLOCATED) 1282 kmw->kmw_valid = 1283 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1284 } 1285 } 1286 1287 status = WALK_NEXT; 1288 1289 if (mdb_layered_walk(layered, wsp) == -1) { 1290 mdb_warn("unable to start layered '%s' walk", layered); 1291 status = WALK_ERR; 1292 } 1293 1294 out1: 1295 if (status == WALK_ERR) { 1296 if (kmw->kmw_valid) 1297 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1298 1299 if (kmw->kmw_ubase) 1300 mdb_free(kmw->kmw_ubase, slabsize + 1301 sizeof (kmem_bufctl_t)); 1302 1303 if (kmw->kmw_maglist) 1304 mdb_free(kmw->kmw_maglist, 1305 kmw->kmw_max * sizeof (uintptr_t)); 1306 1307 mdb_free(kmw, sizeof (kmem_walk_t)); 1308 wsp->walk_data = NULL; 1309 } 1310 1311 out2: 1312 if (status == WALK_ERR) 1313 mdb_free(cp, csize); 1314 1315 return (status); 1316 } 1317 1318 int 1319 kmem_walk_step(mdb_walk_state_t *wsp) 1320 { 1321 kmem_walk_t *kmw = wsp->walk_data; 1322 int type = kmw->kmw_type; 1323 kmem_cache_t *cp = kmw->kmw_cp; 1324 1325 void **maglist = kmw->kmw_maglist; 1326 int magcnt = kmw->kmw_count; 1327 1328 uintptr_t chunksize, slabsize; 1329 uintptr_t addr; 1330 const kmem_slab_t *sp; 1331 const kmem_bufctl_t *bcp; 1332 kmem_bufctl_t bc; 1333 1334 int chunks; 1335 char *kbase; 1336 void *buf; 1337 int i, ret; 1338 1339 char *valid, *ubase; 1340 1341 /* 1342 * first, handle the 'kmem_hash' layered walk case 1343 */ 1344 if (type & KM_HASH) { 1345 /* 1346 * We have a buffer which has been allocated out of the 1347 * global layer. We need to make sure that it's not 1348 * actually sitting in a magazine before we report it as 1349 * an allocated buffer. 1350 */ 1351 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1352 1353 if (magcnt > 0 && 1354 bsearch(&buf, maglist, magcnt, sizeof (void *), 1355 addrcmp) != NULL) 1356 return (WALK_NEXT); 1357 1358 if (type & KM_BUFCTL) 1359 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1360 1361 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1362 } 1363 1364 ret = WALK_NEXT; 1365 1366 addr = kmw->kmw_addr; 1367 1368 /* 1369 * If we're walking freed buffers, report everything in the 1370 * magazine layer before processing the first slab. 1371 */ 1372 if ((type & KM_FREE) && magcnt != 0) { 1373 kmw->kmw_count = 0; /* only do this once */ 1374 for (i = 0; i < magcnt; i++) { 1375 buf = maglist[i]; 1376 1377 if (type & KM_BUFCTL) { 1378 uintptr_t out; 1379 1380 if (cp->cache_flags & KMF_BUFTAG) { 1381 kmem_buftag_t *btp; 1382 kmem_buftag_t tag; 1383 1384 /* LINTED - alignment */ 1385 btp = KMEM_BUFTAG(cp, buf); 1386 if (mdb_vread(&tag, sizeof (tag), 1387 (uintptr_t)btp) == -1) { 1388 mdb_warn("reading buftag for " 1389 "%p at %p", buf, btp); 1390 continue; 1391 } 1392 out = (uintptr_t)tag.bt_bufctl; 1393 } else { 1394 if (kmem_hash_lookup(cp, addr, buf, 1395 &out) == -1) 1396 continue; 1397 } 1398 ret = bufctl_walk_callback(cp, wsp, out); 1399 } else { 1400 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1401 } 1402 1403 if (ret != WALK_NEXT) 1404 return (ret); 1405 } 1406 } 1407 1408 /* 1409 * If they want constructed buffers, we're finished, since the 1410 * magazine layer holds them all. 1411 */ 1412 if (type & KM_CONSTRUCTED) 1413 return (WALK_DONE); 1414 1415 /* 1416 * Handle the buffers in the current slab 1417 */ 1418 chunksize = cp->cache_chunksize; 1419 slabsize = cp->cache_slabsize; 1420 1421 sp = wsp->walk_layer; 1422 chunks = sp->slab_chunks; 1423 kbase = sp->slab_base; 1424 1425 dprintf(("kbase is %p\n", kbase)); 1426 1427 if (!(cp->cache_flags & KMF_HASH)) { 1428 valid = kmw->kmw_valid; 1429 ubase = kmw->kmw_ubase; 1430 1431 if (mdb_vread(ubase, chunks * chunksize, 1432 (uintptr_t)kbase) == -1) { 1433 mdb_warn("failed to read slab contents at %p", kbase); 1434 return (WALK_ERR); 1435 } 1436 1437 /* 1438 * Set up the valid map as fully allocated -- we'll punch 1439 * out the freelist. 1440 */ 1441 if (type & KM_ALLOCATED) 1442 (void) memset(valid, 1, chunks); 1443 } else { 1444 valid = NULL; 1445 ubase = NULL; 1446 } 1447 1448 /* 1449 * walk the slab's freelist 1450 */ 1451 bcp = sp->slab_head; 1452 1453 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1454 1455 /* 1456 * since we could be in the middle of allocating a buffer, 1457 * our refcnt could be one higher than it aught. So we 1458 * check one further on the freelist than the count allows. 1459 */ 1460 for (i = sp->slab_refcnt; i <= chunks; i++) { 1461 uint_t ndx; 1462 1463 dprintf(("bcp is %p\n", bcp)); 1464 1465 if (bcp == NULL) { 1466 if (i == chunks) 1467 break; 1468 mdb_warn( 1469 "slab %p in cache %p freelist too short by %d\n", 1470 sp, addr, chunks - i); 1471 break; 1472 } 1473 1474 if (cp->cache_flags & KMF_HASH) { 1475 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1476 mdb_warn("failed to read bufctl ptr at %p", 1477 bcp); 1478 break; 1479 } 1480 buf = bc.bc_addr; 1481 } else { 1482 /* 1483 * Otherwise the buffer is in the slab which 1484 * we've read in; we just need to determine 1485 * its offset in the slab to find the 1486 * kmem_bufctl_t. 1487 */ 1488 bc = *((kmem_bufctl_t *) 1489 ((uintptr_t)bcp - (uintptr_t)kbase + 1490 (uintptr_t)ubase)); 1491 1492 buf = KMEM_BUF(cp, bcp); 1493 } 1494 1495 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1496 1497 if (ndx > slabsize / cp->cache_bufsize) { 1498 /* 1499 * This is very wrong; we have managed to find 1500 * a buffer in the slab which shouldn't 1501 * actually be here. Emit a warning, and 1502 * try to continue. 1503 */ 1504 mdb_warn("buf %p is out of range for " 1505 "slab %p, cache %p\n", buf, sp, addr); 1506 } else if (type & KM_ALLOCATED) { 1507 /* 1508 * we have found a buffer on the slab's freelist; 1509 * clear its entry 1510 */ 1511 valid[ndx] = 0; 1512 } else { 1513 /* 1514 * Report this freed buffer 1515 */ 1516 if (type & KM_BUFCTL) { 1517 ret = bufctl_walk_callback(cp, wsp, 1518 (uintptr_t)bcp); 1519 } else { 1520 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1521 } 1522 if (ret != WALK_NEXT) 1523 return (ret); 1524 } 1525 1526 bcp = bc.bc_next; 1527 } 1528 1529 if (bcp != NULL) { 1530 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1531 sp, addr, bcp)); 1532 } 1533 1534 /* 1535 * If we are walking freed buffers, the loop above handled reporting 1536 * them. 1537 */ 1538 if (type & KM_FREE) 1539 return (WALK_NEXT); 1540 1541 if (type & KM_BUFCTL) { 1542 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1543 "cache %p\n", addr); 1544 return (WALK_ERR); 1545 } 1546 1547 /* 1548 * Report allocated buffers, skipping buffers in the magazine layer. 1549 * We only get this far for small-slab caches. 1550 */ 1551 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1552 buf = (char *)kbase + i * chunksize; 1553 1554 if (!valid[i]) 1555 continue; /* on slab freelist */ 1556 1557 if (magcnt > 0 && 1558 bsearch(&buf, maglist, magcnt, sizeof (void *), 1559 addrcmp) != NULL) 1560 continue; /* in magazine layer */ 1561 1562 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1563 } 1564 return (ret); 1565 } 1566 1567 void 1568 kmem_walk_fini(mdb_walk_state_t *wsp) 1569 { 1570 kmem_walk_t *kmw = wsp->walk_data; 1571 uintptr_t chunksize; 1572 uintptr_t slabsize; 1573 1574 if (kmw == NULL) 1575 return; 1576 1577 if (kmw->kmw_maglist != NULL) 1578 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1579 1580 chunksize = kmw->kmw_cp->cache_chunksize; 1581 slabsize = kmw->kmw_cp->cache_slabsize; 1582 1583 if (kmw->kmw_valid != NULL) 1584 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1585 if (kmw->kmw_ubase != NULL) 1586 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1587 1588 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1589 mdb_free(kmw, sizeof (kmem_walk_t)); 1590 } 1591 1592 /*ARGSUSED*/ 1593 static int 1594 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1595 { 1596 /* 1597 * Buffers allocated from NOTOUCH caches can also show up as freed 1598 * memory in other caches. This can be a little confusing, so we 1599 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1600 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1601 */ 1602 if (c->cache_cflags & KMC_NOTOUCH) 1603 return (WALK_NEXT); 1604 1605 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1606 wsp->walk_cbdata, addr) == -1) 1607 return (WALK_DONE); 1608 1609 return (WALK_NEXT); 1610 } 1611 1612 #define KMEM_WALK_ALL(name, wsp) { \ 1613 wsp->walk_data = (name); \ 1614 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1615 return (WALK_ERR); \ 1616 return (WALK_DONE); \ 1617 } 1618 1619 int 1620 kmem_walk_init(mdb_walk_state_t *wsp) 1621 { 1622 if (wsp->walk_arg != NULL) 1623 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1624 1625 if (wsp->walk_addr == NULL) 1626 KMEM_WALK_ALL("kmem", wsp); 1627 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1628 } 1629 1630 int 1631 bufctl_walk_init(mdb_walk_state_t *wsp) 1632 { 1633 if (wsp->walk_addr == NULL) 1634 KMEM_WALK_ALL("bufctl", wsp); 1635 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1636 } 1637 1638 int 1639 freemem_walk_init(mdb_walk_state_t *wsp) 1640 { 1641 if (wsp->walk_addr == NULL) 1642 KMEM_WALK_ALL("freemem", wsp); 1643 return (kmem_walk_init_common(wsp, KM_FREE)); 1644 } 1645 1646 int 1647 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1648 { 1649 if (wsp->walk_addr == NULL) 1650 KMEM_WALK_ALL("freemem_constructed", wsp); 1651 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1652 } 1653 1654 int 1655 freectl_walk_init(mdb_walk_state_t *wsp) 1656 { 1657 if (wsp->walk_addr == NULL) 1658 KMEM_WALK_ALL("freectl", wsp); 1659 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1660 } 1661 1662 int 1663 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1664 { 1665 if (wsp->walk_addr == NULL) 1666 KMEM_WALK_ALL("freectl_constructed", wsp); 1667 return (kmem_walk_init_common(wsp, 1668 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1669 } 1670 1671 typedef struct bufctl_history_walk { 1672 void *bhw_next; 1673 kmem_cache_t *bhw_cache; 1674 kmem_slab_t *bhw_slab; 1675 hrtime_t bhw_timestamp; 1676 } bufctl_history_walk_t; 1677 1678 int 1679 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1680 { 1681 bufctl_history_walk_t *bhw; 1682 kmem_bufctl_audit_t bc; 1683 kmem_bufctl_audit_t bcn; 1684 1685 if (wsp->walk_addr == NULL) { 1686 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1687 return (WALK_ERR); 1688 } 1689 1690 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1691 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1692 return (WALK_ERR); 1693 } 1694 1695 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1696 bhw->bhw_timestamp = 0; 1697 bhw->bhw_cache = bc.bc_cache; 1698 bhw->bhw_slab = bc.bc_slab; 1699 1700 /* 1701 * sometimes the first log entry matches the base bufctl; in that 1702 * case, skip the base bufctl. 1703 */ 1704 if (bc.bc_lastlog != NULL && 1705 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1706 bc.bc_addr == bcn.bc_addr && 1707 bc.bc_cache == bcn.bc_cache && 1708 bc.bc_slab == bcn.bc_slab && 1709 bc.bc_timestamp == bcn.bc_timestamp && 1710 bc.bc_thread == bcn.bc_thread) 1711 bhw->bhw_next = bc.bc_lastlog; 1712 else 1713 bhw->bhw_next = (void *)wsp->walk_addr; 1714 1715 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1716 wsp->walk_data = bhw; 1717 1718 return (WALK_NEXT); 1719 } 1720 1721 int 1722 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1723 { 1724 bufctl_history_walk_t *bhw = wsp->walk_data; 1725 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1726 uintptr_t baseaddr = wsp->walk_addr; 1727 kmem_bufctl_audit_t bc; 1728 1729 if (addr == NULL) 1730 return (WALK_DONE); 1731 1732 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1733 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1734 return (WALK_ERR); 1735 } 1736 1737 /* 1738 * The bufctl is only valid if the address, cache, and slab are 1739 * correct. We also check that the timestamp is decreasing, to 1740 * prevent infinite loops. 1741 */ 1742 if ((uintptr_t)bc.bc_addr != baseaddr || 1743 bc.bc_cache != bhw->bhw_cache || 1744 bc.bc_slab != bhw->bhw_slab || 1745 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1746 return (WALK_DONE); 1747 1748 bhw->bhw_next = bc.bc_lastlog; 1749 bhw->bhw_timestamp = bc.bc_timestamp; 1750 1751 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1752 } 1753 1754 void 1755 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1756 { 1757 bufctl_history_walk_t *bhw = wsp->walk_data; 1758 1759 mdb_free(bhw, sizeof (*bhw)); 1760 } 1761 1762 typedef struct kmem_log_walk { 1763 kmem_bufctl_audit_t *klw_base; 1764 kmem_bufctl_audit_t **klw_sorted; 1765 kmem_log_header_t klw_lh; 1766 size_t klw_size; 1767 size_t klw_maxndx; 1768 size_t klw_ndx; 1769 } kmem_log_walk_t; 1770 1771 int 1772 kmem_log_walk_init(mdb_walk_state_t *wsp) 1773 { 1774 uintptr_t lp = wsp->walk_addr; 1775 kmem_log_walk_t *klw; 1776 kmem_log_header_t *lhp; 1777 int maxndx, i, j, k; 1778 1779 /* 1780 * By default (global walk), walk the kmem_transaction_log. Otherwise 1781 * read the log whose kmem_log_header_t is stored at walk_addr. 1782 */ 1783 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1784 mdb_warn("failed to read 'kmem_transaction_log'"); 1785 return (WALK_ERR); 1786 } 1787 1788 if (lp == NULL) { 1789 mdb_warn("log is disabled\n"); 1790 return (WALK_ERR); 1791 } 1792 1793 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1794 lhp = &klw->klw_lh; 1795 1796 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1797 mdb_warn("failed to read log header at %p", lp); 1798 mdb_free(klw, sizeof (kmem_log_walk_t)); 1799 return (WALK_ERR); 1800 } 1801 1802 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1803 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1804 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1805 1806 if (mdb_vread(klw->klw_base, klw->klw_size, 1807 (uintptr_t)lhp->lh_base) == -1) { 1808 mdb_warn("failed to read log at base %p", lhp->lh_base); 1809 mdb_free(klw->klw_base, klw->klw_size); 1810 mdb_free(klw, sizeof (kmem_log_walk_t)); 1811 return (WALK_ERR); 1812 } 1813 1814 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1815 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1816 1817 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1818 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1819 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1820 1821 for (j = 0; j < maxndx; j++) 1822 klw->klw_sorted[k++] = &chunk[j]; 1823 } 1824 1825 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1826 (int(*)(const void *, const void *))bufctlcmp); 1827 1828 klw->klw_maxndx = k; 1829 wsp->walk_data = klw; 1830 1831 return (WALK_NEXT); 1832 } 1833 1834 int 1835 kmem_log_walk_step(mdb_walk_state_t *wsp) 1836 { 1837 kmem_log_walk_t *klw = wsp->walk_data; 1838 kmem_bufctl_audit_t *bcp; 1839 1840 if (klw->klw_ndx == klw->klw_maxndx) 1841 return (WALK_DONE); 1842 1843 bcp = klw->klw_sorted[klw->klw_ndx++]; 1844 1845 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1846 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1847 } 1848 1849 void 1850 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1851 { 1852 kmem_log_walk_t *klw = wsp->walk_data; 1853 1854 mdb_free(klw->klw_base, klw->klw_size); 1855 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1856 sizeof (kmem_bufctl_audit_t *)); 1857 mdb_free(klw, sizeof (kmem_log_walk_t)); 1858 } 1859 1860 typedef struct allocdby_bufctl { 1861 uintptr_t abb_addr; 1862 hrtime_t abb_ts; 1863 } allocdby_bufctl_t; 1864 1865 typedef struct allocdby_walk { 1866 const char *abw_walk; 1867 uintptr_t abw_thread; 1868 size_t abw_nbufs; 1869 size_t abw_size; 1870 allocdby_bufctl_t *abw_buf; 1871 size_t abw_ndx; 1872 } allocdby_walk_t; 1873 1874 int 1875 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1876 allocdby_walk_t *abw) 1877 { 1878 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1879 return (WALK_NEXT); 1880 1881 if (abw->abw_nbufs == abw->abw_size) { 1882 allocdby_bufctl_t *buf; 1883 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1884 1885 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1886 1887 bcopy(abw->abw_buf, buf, oldsize); 1888 mdb_free(abw->abw_buf, oldsize); 1889 1890 abw->abw_size <<= 1; 1891 abw->abw_buf = buf; 1892 } 1893 1894 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1895 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1896 abw->abw_nbufs++; 1897 1898 return (WALK_NEXT); 1899 } 1900 1901 /*ARGSUSED*/ 1902 int 1903 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1904 { 1905 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1906 abw, addr) == -1) { 1907 mdb_warn("couldn't walk bufctl for cache %p", addr); 1908 return (WALK_DONE); 1909 } 1910 1911 return (WALK_NEXT); 1912 } 1913 1914 static int 1915 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1916 { 1917 if (lhs->abb_ts < rhs->abb_ts) 1918 return (1); 1919 if (lhs->abb_ts > rhs->abb_ts) 1920 return (-1); 1921 return (0); 1922 } 1923 1924 static int 1925 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1926 { 1927 allocdby_walk_t *abw; 1928 1929 if (wsp->walk_addr == NULL) { 1930 mdb_warn("allocdby walk doesn't support global walks\n"); 1931 return (WALK_ERR); 1932 } 1933 1934 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1935 1936 abw->abw_thread = wsp->walk_addr; 1937 abw->abw_walk = walk; 1938 abw->abw_size = 128; /* something reasonable */ 1939 abw->abw_buf = 1940 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1941 1942 wsp->walk_data = abw; 1943 1944 if (mdb_walk("kmem_cache", 1945 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1946 mdb_warn("couldn't walk kmem_cache"); 1947 allocdby_walk_fini(wsp); 1948 return (WALK_ERR); 1949 } 1950 1951 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1952 (int(*)(const void *, const void *))allocdby_cmp); 1953 1954 return (WALK_NEXT); 1955 } 1956 1957 int 1958 allocdby_walk_init(mdb_walk_state_t *wsp) 1959 { 1960 return (allocdby_walk_init_common(wsp, "bufctl")); 1961 } 1962 1963 int 1964 freedby_walk_init(mdb_walk_state_t *wsp) 1965 { 1966 return (allocdby_walk_init_common(wsp, "freectl")); 1967 } 1968 1969 int 1970 allocdby_walk_step(mdb_walk_state_t *wsp) 1971 { 1972 allocdby_walk_t *abw = wsp->walk_data; 1973 kmem_bufctl_audit_t bc; 1974 uintptr_t addr; 1975 1976 if (abw->abw_ndx == abw->abw_nbufs) 1977 return (WALK_DONE); 1978 1979 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1980 1981 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1982 mdb_warn("couldn't read bufctl at %p", addr); 1983 return (WALK_DONE); 1984 } 1985 1986 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1987 } 1988 1989 void 1990 allocdby_walk_fini(mdb_walk_state_t *wsp) 1991 { 1992 allocdby_walk_t *abw = wsp->walk_data; 1993 1994 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1995 mdb_free(abw, sizeof (allocdby_walk_t)); 1996 } 1997 1998 /*ARGSUSED*/ 1999 int 2000 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 2001 { 2002 char c[MDB_SYM_NAMLEN]; 2003 GElf_Sym sym; 2004 int i; 2005 2006 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2007 for (i = 0; i < bcp->bc_depth; i++) { 2008 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2009 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2010 continue; 2011 if (strncmp(c, "kmem_", 5) == 0) 2012 continue; 2013 mdb_printf("%s+0x%lx", 2014 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2015 break; 2016 } 2017 mdb_printf("\n"); 2018 2019 return (WALK_NEXT); 2020 } 2021 2022 static int 2023 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2024 { 2025 if (!(flags & DCMD_ADDRSPEC)) 2026 return (DCMD_USAGE); 2027 2028 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2029 2030 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2031 mdb_warn("can't walk '%s' for %p", w, addr); 2032 return (DCMD_ERR); 2033 } 2034 2035 return (DCMD_OK); 2036 } 2037 2038 /*ARGSUSED*/ 2039 int 2040 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2041 { 2042 return (allocdby_common(addr, flags, "allocdby")); 2043 } 2044 2045 /*ARGSUSED*/ 2046 int 2047 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2048 { 2049 return (allocdby_common(addr, flags, "freedby")); 2050 } 2051 2052 /* 2053 * Return a string describing the address in relation to the given thread's 2054 * stack. 2055 * 2056 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2057 * 2058 * - If the address is above the stack pointer, return an empty string 2059 * signifying that the address is active. 2060 * 2061 * - If the address is below the stack pointer, and the thread is not on proc, 2062 * return " (below sp)". 2063 * 2064 * - If the address is below the stack pointer, and the thread is on proc, 2065 * return " (possibly below sp)". Depending on context, we may or may not 2066 * have an accurate t_sp. 2067 */ 2068 static const char * 2069 stack_active(const kthread_t *t, uintptr_t addr) 2070 { 2071 uintptr_t panicstk; 2072 GElf_Sym sym; 2073 2074 if (t->t_state == TS_FREE) 2075 return (" (inactive interrupt thread)"); 2076 2077 /* 2078 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2079 * no longer relates to the thread's real stack. 2080 */ 2081 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2082 panicstk = (uintptr_t)sym.st_value; 2083 2084 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2085 return (""); 2086 } 2087 2088 if (addr >= t->t_sp + STACK_BIAS) 2089 return (""); 2090 2091 if (t->t_state == TS_ONPROC) 2092 return (" (possibly below sp)"); 2093 2094 return (" (below sp)"); 2095 } 2096 2097 typedef struct whatis { 2098 uintptr_t w_addr; 2099 const kmem_cache_t *w_cache; 2100 const vmem_t *w_vmem; 2101 size_t w_slab_align; 2102 int w_slab_found; 2103 int w_found; 2104 int w_kmem_lite_count; 2105 uint_t w_verbose; 2106 uint_t w_freemem; 2107 uint_t w_all; 2108 uint_t w_bufctl; 2109 uint_t w_idspace; 2110 } whatis_t; 2111 2112 static void 2113 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 2114 { 2115 /* LINTED pointer cast may result in improper alignment */ 2116 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr); 2117 intptr_t stat; 2118 int count = 0; 2119 int i; 2120 pc_t callers[16]; 2121 2122 if (w->w_cache->cache_flags & KMF_REDZONE) { 2123 kmem_buftag_t bt; 2124 2125 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2126 goto done; 2127 2128 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2129 2130 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 2131 goto done; 2132 2133 /* 2134 * provide the bufctl ptr if it has useful information 2135 */ 2136 if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT)) 2137 baddr = (uintptr_t)bt.bt_bufctl; 2138 2139 if (w->w_cache->cache_flags & KMF_LITE) { 2140 count = w->w_kmem_lite_count; 2141 2142 if (count * sizeof (pc_t) > sizeof (callers)) 2143 count = 0; 2144 2145 if (count > 0 && 2146 mdb_vread(callers, count * sizeof (pc_t), 2147 btaddr + 2148 offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2149 count = 0; 2150 2151 /* 2152 * skip unused callers 2153 */ 2154 while (count > 0 && callers[count - 1] == 2155 (pc_t)KMEM_UNINITIALIZED_PATTERN) 2156 count--; 2157 } 2158 } 2159 2160 done: 2161 if (baddr == 0) 2162 mdb_printf("%p is %p+%p, %s from %s\n", 2163 w->w_addr, addr, w->w_addr - addr, 2164 w->w_freemem == FALSE ? "allocated" : "freed", 2165 w->w_cache->cache_name); 2166 else 2167 mdb_printf("%p is %p+%p, bufctl %p %s from %s\n", 2168 w->w_addr, addr, w->w_addr - addr, baddr, 2169 w->w_freemem == FALSE ? "allocated" : "freed", 2170 w->w_cache->cache_name); 2171 2172 if (count > 0) { 2173 mdb_inc_indent(8); 2174 mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"", 2175 callers[0], (count != 1)? ", ":"\n"); 2176 for (i = 1; i < count; i++) 2177 mdb_printf("%a%s", callers[i], 2178 (i + 1 < count)? ", ":"\n"); 2179 mdb_dec_indent(8); 2180 } 2181 } 2182 2183 /*ARGSUSED*/ 2184 static int 2185 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w) 2186 { 2187 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2188 return (WALK_NEXT); 2189 2190 whatis_print_kmem(addr, 0, w); 2191 w->w_found++; 2192 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2193 } 2194 2195 static int 2196 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 2197 { 2198 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 2199 return (WALK_NEXT); 2200 2201 mdb_printf("%p is %p+%p ", w->w_addr, 2202 vs->vs_start, w->w_addr - vs->vs_start); 2203 2204 /* 2205 * Always provide the vmem_seg pointer if it has a stack trace. 2206 */ 2207 if (w->w_bufctl == TRUE || 2208 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) { 2209 mdb_printf("(vmem_seg %p) ", addr); 2210 } 2211 2212 mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ? 2213 "freed " : "", w->w_vmem->vm_name); 2214 2215 w->w_found++; 2216 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2217 } 2218 2219 static int 2220 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 2221 { 2222 const char *nm = vmem->vm_name; 2223 w->w_vmem = vmem; 2224 w->w_freemem = FALSE; 2225 2226 if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2227 return (WALK_NEXT); 2228 2229 if (w->w_verbose) 2230 mdb_printf("Searching vmem arena %s...\n", nm); 2231 2232 if (mdb_pwalk("vmem_alloc", 2233 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2234 mdb_warn("can't walk vmem seg for %p", addr); 2235 return (WALK_NEXT); 2236 } 2237 2238 if (w->w_found && w->w_all == FALSE) 2239 return (WALK_DONE); 2240 2241 if (w->w_verbose) 2242 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 2243 2244 w->w_freemem = TRUE; 2245 2246 if (mdb_pwalk("vmem_free", 2247 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2248 mdb_warn("can't walk vmem seg for %p", addr); 2249 return (WALK_NEXT); 2250 } 2251 2252 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2253 } 2254 2255 /*ARGSUSED*/ 2256 static int 2257 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w) 2258 { 2259 uintptr_t addr; 2260 2261 if (bcp == NULL) 2262 return (WALK_NEXT); 2263 2264 addr = (uintptr_t)bcp->bc_addr; 2265 2266 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2267 return (WALK_NEXT); 2268 2269 whatis_print_kmem(addr, baddr, w); 2270 w->w_found++; 2271 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2272 } 2273 2274 /*ARGSUSED*/ 2275 static int 2276 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w) 2277 { 2278 uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align); 2279 2280 if ((w->w_addr - base) >= w->w_cache->cache_slabsize) 2281 return (WALK_NEXT); 2282 2283 w->w_slab_found++; 2284 return (WALK_DONE); 2285 } 2286 2287 static int 2288 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2289 { 2290 char *walk, *freewalk; 2291 mdb_walk_cb_t func; 2292 vmem_t *vmp = c->cache_arena; 2293 2294 if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2295 return (WALK_NEXT); 2296 2297 if (w->w_bufctl == FALSE) { 2298 walk = "kmem"; 2299 freewalk = "freemem"; 2300 func = (mdb_walk_cb_t)whatis_walk_kmem; 2301 } else { 2302 walk = "bufctl"; 2303 freewalk = "freectl"; 2304 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2305 } 2306 2307 w->w_cache = c; 2308 2309 if (w->w_verbose) 2310 mdb_printf("Searching %s's slabs...\n", c->cache_name); 2311 2312 /* 2313 * Verify that the address is in one of the cache's slabs. If not, 2314 * we can skip the more expensive walkers. (this is purely a 2315 * heuristic -- as long as there are no false-negatives, we'll be fine) 2316 * 2317 * We try to get the cache's arena's quantum, since to accurately 2318 * get the base of a slab, you have to align it to the quantum. If 2319 * it doesn't look sensible, we fall back to not aligning. 2320 */ 2321 if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align), 2322 (uintptr_t)&vmp->vm_quantum) == -1) { 2323 mdb_warn("unable to read %p->cache_arena->vm_quantum", c); 2324 w->w_slab_align = 1; 2325 } 2326 2327 if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 || 2328 (w->w_slab_align & (w->w_slab_align - 1))) { 2329 mdb_warn("%p's arena has invalid quantum (0x%p)\n", c, 2330 w->w_slab_align); 2331 w->w_slab_align = 1; 2332 } 2333 2334 w->w_slab_found = 0; 2335 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w, 2336 addr) == -1) { 2337 mdb_warn("can't find kmem_slab walker"); 2338 return (WALK_DONE); 2339 } 2340 if (w->w_slab_found == 0) 2341 return (WALK_NEXT); 2342 2343 if (c->cache_flags & KMF_LITE) { 2344 if (mdb_readvar(&w->w_kmem_lite_count, 2345 "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16) 2346 w->w_kmem_lite_count = 0; 2347 } 2348 2349 if (w->w_verbose) 2350 mdb_printf("Searching %s...\n", c->cache_name); 2351 2352 w->w_freemem = FALSE; 2353 2354 if (mdb_pwalk(walk, func, w, addr) == -1) { 2355 mdb_warn("can't find %s walker", walk); 2356 return (WALK_DONE); 2357 } 2358 2359 if (w->w_found && w->w_all == FALSE) 2360 return (WALK_DONE); 2361 2362 /* 2363 * We have searched for allocated memory; now search for freed memory. 2364 */ 2365 if (w->w_verbose) 2366 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2367 2368 w->w_freemem = TRUE; 2369 2370 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 2371 mdb_warn("can't find %s walker", freewalk); 2372 return (WALK_DONE); 2373 } 2374 2375 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2376 } 2377 2378 static int 2379 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2380 { 2381 if (c->cache_cflags & KMC_NOTOUCH) 2382 return (WALK_NEXT); 2383 2384 return (whatis_walk_cache(addr, c, w)); 2385 } 2386 2387 static int 2388 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2389 { 2390 if (!(c->cache_cflags & KMC_NOTOUCH)) 2391 return (WALK_NEXT); 2392 2393 return (whatis_walk_cache(addr, c, w)); 2394 } 2395 2396 static int 2397 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w) 2398 { 2399 /* 2400 * Often, one calls ::whatis on an address from a thread structure. 2401 * We use this opportunity to short circuit this case... 2402 */ 2403 if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) { 2404 mdb_printf("%p is %p+%p, allocated as a thread structure\n", 2405 w->w_addr, addr, w->w_addr - addr); 2406 w->w_found++; 2407 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2408 } 2409 2410 if (w->w_addr < (uintptr_t)t->t_stkbase || 2411 w->w_addr > (uintptr_t)t->t_stk) 2412 return (WALK_NEXT); 2413 2414 if (t->t_stkbase == NULL) 2415 return (WALK_NEXT); 2416 2417 mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr, 2418 stack_active(t, w->w_addr)); 2419 2420 w->w_found++; 2421 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2422 } 2423 2424 static int 2425 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w) 2426 { 2427 struct module mod; 2428 char name[MODMAXNAMELEN], *where; 2429 char c[MDB_SYM_NAMLEN]; 2430 Shdr shdr; 2431 GElf_Sym sym; 2432 2433 if (m->mod_mp == NULL) 2434 return (WALK_NEXT); 2435 2436 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2437 mdb_warn("couldn't read modctl %p's module", addr); 2438 return (WALK_NEXT); 2439 } 2440 2441 if (w->w_addr >= (uintptr_t)mod.text && 2442 w->w_addr < (uintptr_t)mod.text + mod.text_size) { 2443 where = "text segment"; 2444 goto found; 2445 } 2446 2447 if (w->w_addr >= (uintptr_t)mod.data && 2448 w->w_addr < (uintptr_t)mod.data + mod.data_size) { 2449 where = "data segment"; 2450 goto found; 2451 } 2452 2453 if (w->w_addr >= (uintptr_t)mod.bss && 2454 w->w_addr < (uintptr_t)mod.bss + mod.bss_size) { 2455 where = "bss"; 2456 goto found; 2457 } 2458 2459 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2460 mdb_warn("couldn't read symbol header for %p's module", addr); 2461 return (WALK_NEXT); 2462 } 2463 2464 if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr < 2465 (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) { 2466 where = "symtab"; 2467 goto found; 2468 } 2469 2470 if (w->w_addr >= (uintptr_t)mod.symspace && 2471 w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) { 2472 where = "symspace"; 2473 goto found; 2474 } 2475 2476 return (WALK_NEXT); 2477 2478 found: 2479 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2480 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2481 2482 mdb_printf("%p is ", w->w_addr); 2483 2484 /* 2485 * If we found this address in a module, then there's a chance that 2486 * it's actually a named symbol. Try the symbol lookup. 2487 */ 2488 if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c), 2489 &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value && 2490 w->w_addr < (uintptr_t)sym.st_value + sym.st_size) { 2491 mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value); 2492 } 2493 2494 mdb_printf("in %s's %s\n", name, where); 2495 2496 w->w_found++; 2497 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2498 } 2499 2500 /*ARGSUSED*/ 2501 static int 2502 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w) 2503 { 2504 static int machsize = 0; 2505 mdb_ctf_id_t id; 2506 2507 if (machsize == 0) { 2508 if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0) 2509 machsize = mdb_ctf_type_size(id); 2510 else { 2511 mdb_warn("could not get size of page_t"); 2512 machsize = sizeof (page_t); 2513 } 2514 } 2515 2516 if (w->w_addr < addr || w->w_addr >= addr + machsize) 2517 return (WALK_NEXT); 2518 2519 mdb_printf("%p is %p+%p, allocated as a page structure\n", 2520 w->w_addr, addr, w->w_addr - addr); 2521 2522 w->w_found++; 2523 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2524 } 2525 2526 int 2527 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2528 { 2529 whatis_t w; 2530 2531 if (!(flags & DCMD_ADDRSPEC)) 2532 return (DCMD_USAGE); 2533 2534 w.w_verbose = FALSE; 2535 w.w_bufctl = FALSE; 2536 w.w_all = FALSE; 2537 w.w_idspace = FALSE; 2538 2539 if (mdb_getopts(argc, argv, 2540 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2541 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2542 'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace, 2543 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc) 2544 return (DCMD_USAGE); 2545 2546 w.w_addr = addr; 2547 w.w_found = 0; 2548 2549 if (w.w_verbose) 2550 mdb_printf("Searching modules...\n"); 2551 2552 if (!w.w_idspace) { 2553 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w) 2554 == -1) { 2555 mdb_warn("couldn't find modctl walker"); 2556 return (DCMD_ERR); 2557 } 2558 2559 if (w.w_found && w.w_all == FALSE) 2560 return (DCMD_OK); 2561 2562 /* 2563 * Now search all thread stacks. Yes, this is a little weak; we 2564 * can save a lot of work by first checking to see if the 2565 * address is in segkp vs. segkmem. But hey, computers are 2566 * fast. 2567 */ 2568 if (w.w_verbose) 2569 mdb_printf("Searching threads...\n"); 2570 2571 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w) 2572 == -1) { 2573 mdb_warn("couldn't find thread walker"); 2574 return (DCMD_ERR); 2575 } 2576 2577 if (w.w_found && w.w_all == FALSE) 2578 return (DCMD_OK); 2579 2580 if (w.w_verbose) 2581 mdb_printf("Searching page structures...\n"); 2582 2583 if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w) 2584 == -1) { 2585 mdb_warn("couldn't find page walker"); 2586 return (DCMD_ERR); 2587 } 2588 2589 if (w.w_found && w.w_all == FALSE) 2590 return (DCMD_OK); 2591 } 2592 2593 if (mdb_walk("kmem_cache", 2594 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2595 mdb_warn("couldn't find kmem_cache walker"); 2596 return (DCMD_ERR); 2597 } 2598 2599 if (w.w_found && w.w_all == FALSE) 2600 return (DCMD_OK); 2601 2602 if (mdb_walk("kmem_cache", 2603 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2604 mdb_warn("couldn't find kmem_cache walker"); 2605 return (DCMD_ERR); 2606 } 2607 2608 if (w.w_found && w.w_all == FALSE) 2609 return (DCMD_OK); 2610 2611 if (mdb_walk("vmem_postfix", 2612 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2613 mdb_warn("couldn't find vmem_postfix walker"); 2614 return (DCMD_ERR); 2615 } 2616 2617 if (w.w_found == 0) 2618 mdb_printf("%p is unknown\n", addr); 2619 2620 return (DCMD_OK); 2621 } 2622 2623 void 2624 whatis_help(void) 2625 { 2626 mdb_printf( 2627 "Given a virtual address, attempt to determine where it came\n" 2628 "from.\n" 2629 "\n" 2630 "\t-v\tVerbose output; display caches/arenas/etc as they are\n" 2631 "\t\tsearched\n" 2632 "\t-a\tFind all possible sources. Default behavior is to stop at\n" 2633 "\t\tthe first (most specific) source.\n" 2634 "\t-i\tSearch only identifier arenas and caches. By default\n" 2635 "\t\tthese are ignored.\n" 2636 "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n" 2637 "\t\trespectively. Warning: if the buffer exists, but does not\n" 2638 "\t\thave a bufctl, it will not be reported.\n"); 2639 } 2640 2641 typedef struct kmem_log_cpu { 2642 uintptr_t kmc_low; 2643 uintptr_t kmc_high; 2644 } kmem_log_cpu_t; 2645 2646 typedef struct kmem_log_data { 2647 uintptr_t kmd_addr; 2648 kmem_log_cpu_t *kmd_cpu; 2649 } kmem_log_data_t; 2650 2651 int 2652 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2653 kmem_log_data_t *kmd) 2654 { 2655 int i; 2656 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2657 size_t bufsize; 2658 2659 for (i = 0; i < NCPU; i++) { 2660 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2661 break; 2662 } 2663 2664 if (kmd->kmd_addr) { 2665 if (b->bc_cache == NULL) 2666 return (WALK_NEXT); 2667 2668 if (mdb_vread(&bufsize, sizeof (bufsize), 2669 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2670 mdb_warn( 2671 "failed to read cache_bufsize for cache at %p", 2672 b->bc_cache); 2673 return (WALK_ERR); 2674 } 2675 2676 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2677 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2678 return (WALK_NEXT); 2679 } 2680 2681 if (i == NCPU) 2682 mdb_printf(" "); 2683 else 2684 mdb_printf("%3d", i); 2685 2686 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2687 b->bc_timestamp, b->bc_thread); 2688 2689 return (WALK_NEXT); 2690 } 2691 2692 /*ARGSUSED*/ 2693 int 2694 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2695 { 2696 kmem_log_header_t lh; 2697 kmem_cpu_log_header_t clh; 2698 uintptr_t lhp, clhp; 2699 int ncpus; 2700 uintptr_t *cpu; 2701 GElf_Sym sym; 2702 kmem_log_cpu_t *kmc; 2703 int i; 2704 kmem_log_data_t kmd; 2705 uint_t opt_b = FALSE; 2706 2707 if (mdb_getopts(argc, argv, 2708 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2709 return (DCMD_USAGE); 2710 2711 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2712 mdb_warn("failed to read 'kmem_transaction_log'"); 2713 return (DCMD_ERR); 2714 } 2715 2716 if (lhp == NULL) { 2717 mdb_warn("no kmem transaction log\n"); 2718 return (DCMD_ERR); 2719 } 2720 2721 mdb_readvar(&ncpus, "ncpus"); 2722 2723 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2724 mdb_warn("failed to read log header at %p", lhp); 2725 return (DCMD_ERR); 2726 } 2727 2728 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2729 2730 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2731 2732 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2733 mdb_warn("couldn't find 'cpu' array"); 2734 return (DCMD_ERR); 2735 } 2736 2737 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2738 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2739 NCPU * sizeof (uintptr_t), sym.st_size); 2740 return (DCMD_ERR); 2741 } 2742 2743 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2744 mdb_warn("failed to read cpu array at %p", sym.st_value); 2745 return (DCMD_ERR); 2746 } 2747 2748 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2749 kmd.kmd_addr = NULL; 2750 kmd.kmd_cpu = kmc; 2751 2752 for (i = 0; i < NCPU; i++) { 2753 2754 if (cpu[i] == NULL) 2755 continue; 2756 2757 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2758 mdb_warn("cannot read cpu %d's log header at %p", 2759 i, clhp); 2760 return (DCMD_ERR); 2761 } 2762 2763 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2764 (uintptr_t)lh.lh_base; 2765 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2766 2767 clhp += sizeof (kmem_cpu_log_header_t); 2768 } 2769 2770 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2771 "TIMESTAMP", "THREAD"); 2772 2773 /* 2774 * If we have been passed an address, print out only log entries 2775 * corresponding to that address. If opt_b is specified, then interpret 2776 * the address as a bufctl. 2777 */ 2778 if (flags & DCMD_ADDRSPEC) { 2779 kmem_bufctl_audit_t b; 2780 2781 if (opt_b) { 2782 kmd.kmd_addr = addr; 2783 } else { 2784 if (mdb_vread(&b, 2785 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2786 mdb_warn("failed to read bufctl at %p", addr); 2787 return (DCMD_ERR); 2788 } 2789 2790 (void) kmem_log_walk(addr, &b, &kmd); 2791 2792 return (DCMD_OK); 2793 } 2794 } 2795 2796 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2797 mdb_warn("can't find kmem log walker"); 2798 return (DCMD_ERR); 2799 } 2800 2801 return (DCMD_OK); 2802 } 2803 2804 typedef struct bufctl_history_cb { 2805 int bhc_flags; 2806 int bhc_argc; 2807 const mdb_arg_t *bhc_argv; 2808 int bhc_ret; 2809 } bufctl_history_cb_t; 2810 2811 /*ARGSUSED*/ 2812 static int 2813 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2814 { 2815 bufctl_history_cb_t *bhc = arg; 2816 2817 bhc->bhc_ret = 2818 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2819 2820 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2821 2822 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2823 } 2824 2825 void 2826 bufctl_help(void) 2827 { 2828 mdb_printf("%s", 2829 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2830 mdb_dec_indent(2); 2831 mdb_printf("%<b>OPTIONS%</b>\n"); 2832 mdb_inc_indent(2); 2833 mdb_printf("%s", 2834 " -v Display the full content of the bufctl, including its stack trace\n" 2835 " -h retrieve the bufctl's transaction history, if available\n" 2836 " -a addr\n" 2837 " filter out bufctls not involving the buffer at addr\n" 2838 " -c caller\n" 2839 " filter out bufctls without the function/PC in their stack trace\n" 2840 " -e earliest\n" 2841 " filter out bufctls timestamped before earliest\n" 2842 " -l latest\n" 2843 " filter out bufctls timestamped after latest\n" 2844 " -t thread\n" 2845 " filter out bufctls not involving thread\n"); 2846 } 2847 2848 int 2849 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2850 { 2851 kmem_bufctl_audit_t bc; 2852 uint_t verbose = FALSE; 2853 uint_t history = FALSE; 2854 uint_t in_history = FALSE; 2855 uintptr_t caller = NULL, thread = NULL; 2856 uintptr_t laddr, haddr, baddr = NULL; 2857 hrtime_t earliest = 0, latest = 0; 2858 int i, depth; 2859 char c[MDB_SYM_NAMLEN]; 2860 GElf_Sym sym; 2861 2862 if (mdb_getopts(argc, argv, 2863 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2864 'h', MDB_OPT_SETBITS, TRUE, &history, 2865 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2866 'c', MDB_OPT_UINTPTR, &caller, 2867 't', MDB_OPT_UINTPTR, &thread, 2868 'e', MDB_OPT_UINT64, &earliest, 2869 'l', MDB_OPT_UINT64, &latest, 2870 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2871 return (DCMD_USAGE); 2872 2873 if (!(flags & DCMD_ADDRSPEC)) 2874 return (DCMD_USAGE); 2875 2876 if (in_history && !history) 2877 return (DCMD_USAGE); 2878 2879 if (history && !in_history) { 2880 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2881 UM_SLEEP | UM_GC); 2882 bufctl_history_cb_t bhc; 2883 2884 nargv[0].a_type = MDB_TYPE_STRING; 2885 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2886 2887 for (i = 0; i < argc; i++) 2888 nargv[i + 1] = argv[i]; 2889 2890 /* 2891 * When in history mode, we treat each element as if it 2892 * were in a seperate loop, so that the headers group 2893 * bufctls with similar histories. 2894 */ 2895 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2896 bhc.bhc_argc = argc + 1; 2897 bhc.bhc_argv = nargv; 2898 bhc.bhc_ret = DCMD_OK; 2899 2900 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2901 addr) == -1) { 2902 mdb_warn("unable to walk bufctl_history"); 2903 return (DCMD_ERR); 2904 } 2905 2906 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2907 mdb_printf("\n"); 2908 2909 return (bhc.bhc_ret); 2910 } 2911 2912 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2913 if (verbose) { 2914 mdb_printf("%16s %16s %16s %16s\n" 2915 "%<u>%16s %16s %16s %16s%</u>\n", 2916 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2917 "", "CACHE", "LASTLOG", "CONTENTS"); 2918 } else { 2919 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2920 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2921 } 2922 } 2923 2924 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2925 mdb_warn("couldn't read bufctl at %p", addr); 2926 return (DCMD_ERR); 2927 } 2928 2929 /* 2930 * Guard against bogus bc_depth in case the bufctl is corrupt or 2931 * the address does not really refer to a bufctl. 2932 */ 2933 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2934 2935 if (caller != NULL) { 2936 laddr = caller; 2937 haddr = caller + sizeof (caller); 2938 2939 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2940 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2941 /* 2942 * We were provided an exact symbol value; any 2943 * address in the function is valid. 2944 */ 2945 laddr = (uintptr_t)sym.st_value; 2946 haddr = (uintptr_t)sym.st_value + sym.st_size; 2947 } 2948 2949 for (i = 0; i < depth; i++) 2950 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2951 break; 2952 2953 if (i == depth) 2954 return (DCMD_OK); 2955 } 2956 2957 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2958 return (DCMD_OK); 2959 2960 if (earliest != 0 && bc.bc_timestamp < earliest) 2961 return (DCMD_OK); 2962 2963 if (latest != 0 && bc.bc_timestamp > latest) 2964 return (DCMD_OK); 2965 2966 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2967 return (DCMD_OK); 2968 2969 if (flags & DCMD_PIPE_OUT) { 2970 mdb_printf("%#lr\n", addr); 2971 return (DCMD_OK); 2972 } 2973 2974 if (verbose) { 2975 mdb_printf( 2976 "%<b>%16p%</b> %16p %16llx %16p\n" 2977 "%16s %16p %16p %16p\n", 2978 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2979 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2980 2981 mdb_inc_indent(17); 2982 for (i = 0; i < depth; i++) 2983 mdb_printf("%a\n", bc.bc_stack[i]); 2984 mdb_dec_indent(17); 2985 mdb_printf("\n"); 2986 } else { 2987 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2988 bc.bc_timestamp, bc.bc_thread); 2989 2990 for (i = 0; i < depth; i++) { 2991 if (mdb_lookup_by_addr(bc.bc_stack[i], 2992 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2993 continue; 2994 if (strncmp(c, "kmem_", 5) == 0) 2995 continue; 2996 mdb_printf(" %a\n", bc.bc_stack[i]); 2997 break; 2998 } 2999 3000 if (i >= depth) 3001 mdb_printf("\n"); 3002 } 3003 3004 return (DCMD_OK); 3005 } 3006 3007 typedef struct kmem_verify { 3008 uint64_t *kmv_buf; /* buffer to read cache contents into */ 3009 size_t kmv_size; /* number of bytes in kmv_buf */ 3010 int kmv_corruption; /* > 0 if corruption found. */ 3011 int kmv_besilent; /* report actual corruption sites */ 3012 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3013 } kmem_verify_t; 3014 3015 /* 3016 * verify_pattern() 3017 * verify that buf is filled with the pattern pat. 3018 */ 3019 static int64_t 3020 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3021 { 3022 /*LINTED*/ 3023 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3024 uint64_t *buf; 3025 3026 for (buf = buf_arg; buf < bufend; buf++) 3027 if (*buf != pat) 3028 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3029 return (-1); 3030 } 3031 3032 /* 3033 * verify_buftag() 3034 * verify that btp->bt_bxstat == (bcp ^ pat) 3035 */ 3036 static int 3037 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3038 { 3039 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3040 } 3041 3042 /* 3043 * verify_free() 3044 * verify the integrity of a free block of memory by checking 3045 * that it is filled with 0xdeadbeef and that its buftag is sane. 3046 */ 3047 /*ARGSUSED1*/ 3048 static int 3049 verify_free(uintptr_t addr, const void *data, void *private) 3050 { 3051 kmem_verify_t *kmv = (kmem_verify_t *)private; 3052 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3053 int64_t corrupt; /* corruption offset */ 3054 kmem_buftag_t *buftagp; /* ptr to buftag */ 3055 kmem_cache_t *cp = &kmv->kmv_cache; 3056 int besilent = kmv->kmv_besilent; 3057 3058 /*LINTED*/ 3059 buftagp = KMEM_BUFTAG(cp, buf); 3060 3061 /* 3062 * Read the buffer to check. 3063 */ 3064 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3065 if (!besilent) 3066 mdb_warn("couldn't read %p", addr); 3067 return (WALK_NEXT); 3068 } 3069 3070 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3071 KMEM_FREE_PATTERN)) >= 0) { 3072 if (!besilent) 3073 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3074 addr, (uintptr_t)addr + corrupt); 3075 goto corrupt; 3076 } 3077 /* 3078 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3079 * the first bytes of the buffer, hence we cannot check for red 3080 * zone corruption. 3081 */ 3082 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3083 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3084 if (!besilent) 3085 mdb_printf("buffer %p (free) seems to " 3086 "have a corrupt redzone pattern\n", addr); 3087 goto corrupt; 3088 } 3089 3090 /* 3091 * confirm bufctl pointer integrity. 3092 */ 3093 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3094 if (!besilent) 3095 mdb_printf("buffer %p (free) has a corrupt " 3096 "buftag\n", addr); 3097 goto corrupt; 3098 } 3099 3100 return (WALK_NEXT); 3101 corrupt: 3102 kmv->kmv_corruption++; 3103 return (WALK_NEXT); 3104 } 3105 3106 /* 3107 * verify_alloc() 3108 * Verify that the buftag of an allocated buffer makes sense with respect 3109 * to the buffer. 3110 */ 3111 /*ARGSUSED1*/ 3112 static int 3113 verify_alloc(uintptr_t addr, const void *data, void *private) 3114 { 3115 kmem_verify_t *kmv = (kmem_verify_t *)private; 3116 kmem_cache_t *cp = &kmv->kmv_cache; 3117 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3118 /*LINTED*/ 3119 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3120 uint32_t *ip = (uint32_t *)buftagp; 3121 uint8_t *bp = (uint8_t *)buf; 3122 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3123 int besilent = kmv->kmv_besilent; 3124 3125 /* 3126 * Read the buffer to check. 3127 */ 3128 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3129 if (!besilent) 3130 mdb_warn("couldn't read %p", addr); 3131 return (WALK_NEXT); 3132 } 3133 3134 /* 3135 * There are two cases to handle: 3136 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3137 * 0xfeedfacefeedface at the end of it 3138 * 2. If the buf was alloc'd using kmem_alloc, it will have 3139 * 0xbb just past the end of the region in use. At the buftag, 3140 * it will have 0xfeedface (or, if the whole buffer is in use, 3141 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3142 * endianness), followed by 32 bits containing the offset of the 3143 * 0xbb byte in the buffer. 3144 * 3145 * Finally, the two 32-bit words that comprise the second half of the 3146 * buftag should xor to KMEM_BUFTAG_ALLOC 3147 */ 3148 3149 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3150 looks_ok = 1; 3151 else if (!KMEM_SIZE_VALID(ip[1])) 3152 size_ok = 0; 3153 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3154 looks_ok = 1; 3155 else 3156 size_ok = 0; 3157 3158 if (!size_ok) { 3159 if (!besilent) 3160 mdb_printf("buffer %p (allocated) has a corrupt " 3161 "redzone size encoding\n", addr); 3162 goto corrupt; 3163 } 3164 3165 if (!looks_ok) { 3166 if (!besilent) 3167 mdb_printf("buffer %p (allocated) has a corrupt " 3168 "redzone signature\n", addr); 3169 goto corrupt; 3170 } 3171 3172 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3173 if (!besilent) 3174 mdb_printf("buffer %p (allocated) has a " 3175 "corrupt buftag\n", addr); 3176 goto corrupt; 3177 } 3178 3179 return (WALK_NEXT); 3180 corrupt: 3181 kmv->kmv_corruption++; 3182 return (WALK_NEXT); 3183 } 3184 3185 /*ARGSUSED2*/ 3186 int 3187 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3188 { 3189 if (flags & DCMD_ADDRSPEC) { 3190 int check_alloc = 0, check_free = 0; 3191 kmem_verify_t kmv; 3192 3193 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3194 addr) == -1) { 3195 mdb_warn("couldn't read kmem_cache %p", addr); 3196 return (DCMD_ERR); 3197 } 3198 3199 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3200 sizeof (kmem_buftag_t); 3201 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3202 kmv.kmv_corruption = 0; 3203 3204 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3205 check_alloc = 1; 3206 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3207 check_free = 1; 3208 } else { 3209 if (!(flags & DCMD_LOOP)) { 3210 mdb_warn("cache %p (%s) does not have " 3211 "redzone checking enabled\n", addr, 3212 kmv.kmv_cache.cache_name); 3213 } 3214 return (DCMD_ERR); 3215 } 3216 3217 if (flags & DCMD_LOOP) { 3218 /* 3219 * table mode, don't print out every corrupt buffer 3220 */ 3221 kmv.kmv_besilent = 1; 3222 } else { 3223 mdb_printf("Summary for cache '%s'\n", 3224 kmv.kmv_cache.cache_name); 3225 mdb_inc_indent(2); 3226 kmv.kmv_besilent = 0; 3227 } 3228 3229 if (check_alloc) 3230 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3231 if (check_free) 3232 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3233 3234 if (flags & DCMD_LOOP) { 3235 if (kmv.kmv_corruption == 0) { 3236 mdb_printf("%-*s %?p clean\n", 3237 KMEM_CACHE_NAMELEN, 3238 kmv.kmv_cache.cache_name, addr); 3239 } else { 3240 char *s = ""; /* optional s in "buffer[s]" */ 3241 if (kmv.kmv_corruption > 1) 3242 s = "s"; 3243 3244 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3245 KMEM_CACHE_NAMELEN, 3246 kmv.kmv_cache.cache_name, addr, 3247 kmv.kmv_corruption, s); 3248 } 3249 } else { 3250 /* 3251 * This is the more verbose mode, when the user has 3252 * type addr::kmem_verify. If the cache was clean, 3253 * nothing will have yet been printed. So say something. 3254 */ 3255 if (kmv.kmv_corruption == 0) 3256 mdb_printf("clean\n"); 3257 3258 mdb_dec_indent(2); 3259 } 3260 } else { 3261 /* 3262 * If the user didn't specify a cache to verify, we'll walk all 3263 * kmem_cache's, specifying ourself as a callback for each... 3264 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3265 */ 3266 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3267 "Cache Name", "Addr", "Cache Integrity"); 3268 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3269 } 3270 3271 return (DCMD_OK); 3272 } 3273 3274 typedef struct vmem_node { 3275 struct vmem_node *vn_next; 3276 struct vmem_node *vn_parent; 3277 struct vmem_node *vn_sibling; 3278 struct vmem_node *vn_children; 3279 uintptr_t vn_addr; 3280 int vn_marked; 3281 vmem_t vn_vmem; 3282 } vmem_node_t; 3283 3284 typedef struct vmem_walk { 3285 vmem_node_t *vw_root; 3286 vmem_node_t *vw_current; 3287 } vmem_walk_t; 3288 3289 int 3290 vmem_walk_init(mdb_walk_state_t *wsp) 3291 { 3292 uintptr_t vaddr, paddr; 3293 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3294 vmem_walk_t *vw; 3295 3296 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3297 mdb_warn("couldn't read 'vmem_list'"); 3298 return (WALK_ERR); 3299 } 3300 3301 while (vaddr != NULL) { 3302 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3303 vp->vn_addr = vaddr; 3304 vp->vn_next = head; 3305 head = vp; 3306 3307 if (vaddr == wsp->walk_addr) 3308 current = vp; 3309 3310 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3311 mdb_warn("couldn't read vmem_t at %p", vaddr); 3312 goto err; 3313 } 3314 3315 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3316 } 3317 3318 for (vp = head; vp != NULL; vp = vp->vn_next) { 3319 3320 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3321 vp->vn_sibling = root; 3322 root = vp; 3323 continue; 3324 } 3325 3326 for (parent = head; parent != NULL; parent = parent->vn_next) { 3327 if (parent->vn_addr != paddr) 3328 continue; 3329 vp->vn_sibling = parent->vn_children; 3330 parent->vn_children = vp; 3331 vp->vn_parent = parent; 3332 break; 3333 } 3334 3335 if (parent == NULL) { 3336 mdb_warn("couldn't find %p's parent (%p)\n", 3337 vp->vn_addr, paddr); 3338 goto err; 3339 } 3340 } 3341 3342 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3343 vw->vw_root = root; 3344 3345 if (current != NULL) 3346 vw->vw_current = current; 3347 else 3348 vw->vw_current = root; 3349 3350 wsp->walk_data = vw; 3351 return (WALK_NEXT); 3352 err: 3353 for (vp = head; head != NULL; vp = head) { 3354 head = vp->vn_next; 3355 mdb_free(vp, sizeof (vmem_node_t)); 3356 } 3357 3358 return (WALK_ERR); 3359 } 3360 3361 int 3362 vmem_walk_step(mdb_walk_state_t *wsp) 3363 { 3364 vmem_walk_t *vw = wsp->walk_data; 3365 vmem_node_t *vp; 3366 int rval; 3367 3368 if ((vp = vw->vw_current) == NULL) 3369 return (WALK_DONE); 3370 3371 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3372 3373 if (vp->vn_children != NULL) { 3374 vw->vw_current = vp->vn_children; 3375 return (rval); 3376 } 3377 3378 do { 3379 vw->vw_current = vp->vn_sibling; 3380 vp = vp->vn_parent; 3381 } while (vw->vw_current == NULL && vp != NULL); 3382 3383 return (rval); 3384 } 3385 3386 /* 3387 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3388 * children are visited before their parent. We perform the postfix walk 3389 * iteratively (rather than recursively) to allow mdb to regain control 3390 * after each callback. 3391 */ 3392 int 3393 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3394 { 3395 vmem_walk_t *vw = wsp->walk_data; 3396 vmem_node_t *vp = vw->vw_current; 3397 int rval; 3398 3399 /* 3400 * If this node is marked, then we know that we have already visited 3401 * all of its children. If the node has any siblings, they need to 3402 * be visited next; otherwise, we need to visit the parent. Note 3403 * that vp->vn_marked will only be zero on the first invocation of 3404 * the step function. 3405 */ 3406 if (vp->vn_marked) { 3407 if (vp->vn_sibling != NULL) 3408 vp = vp->vn_sibling; 3409 else if (vp->vn_parent != NULL) 3410 vp = vp->vn_parent; 3411 else { 3412 /* 3413 * We have neither a parent, nor a sibling, and we 3414 * have already been visited; we're done. 3415 */ 3416 return (WALK_DONE); 3417 } 3418 } 3419 3420 /* 3421 * Before we visit this node, visit its children. 3422 */ 3423 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3424 vp = vp->vn_children; 3425 3426 vp->vn_marked = 1; 3427 vw->vw_current = vp; 3428 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3429 3430 return (rval); 3431 } 3432 3433 void 3434 vmem_walk_fini(mdb_walk_state_t *wsp) 3435 { 3436 vmem_walk_t *vw = wsp->walk_data; 3437 vmem_node_t *root = vw->vw_root; 3438 int done; 3439 3440 if (root == NULL) 3441 return; 3442 3443 if ((vw->vw_root = root->vn_children) != NULL) 3444 vmem_walk_fini(wsp); 3445 3446 vw->vw_root = root->vn_sibling; 3447 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3448 mdb_free(root, sizeof (vmem_node_t)); 3449 3450 if (done) { 3451 mdb_free(vw, sizeof (vmem_walk_t)); 3452 } else { 3453 vmem_walk_fini(wsp); 3454 } 3455 } 3456 3457 typedef struct vmem_seg_walk { 3458 uint8_t vsw_type; 3459 uintptr_t vsw_start; 3460 uintptr_t vsw_current; 3461 } vmem_seg_walk_t; 3462 3463 /*ARGSUSED*/ 3464 int 3465 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3466 { 3467 vmem_seg_walk_t *vsw; 3468 3469 if (wsp->walk_addr == NULL) { 3470 mdb_warn("vmem_%s does not support global walks\n", name); 3471 return (WALK_ERR); 3472 } 3473 3474 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3475 3476 vsw->vsw_type = type; 3477 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3478 vsw->vsw_current = vsw->vsw_start; 3479 3480 return (WALK_NEXT); 3481 } 3482 3483 /* 3484 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3485 */ 3486 #define VMEM_NONE 0 3487 3488 int 3489 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3490 { 3491 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3492 } 3493 3494 int 3495 vmem_free_walk_init(mdb_walk_state_t *wsp) 3496 { 3497 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3498 } 3499 3500 int 3501 vmem_span_walk_init(mdb_walk_state_t *wsp) 3502 { 3503 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3504 } 3505 3506 int 3507 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3508 { 3509 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3510 } 3511 3512 int 3513 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3514 { 3515 vmem_seg_t seg; 3516 vmem_seg_walk_t *vsw = wsp->walk_data; 3517 uintptr_t addr = vsw->vsw_current; 3518 static size_t seg_size = 0; 3519 int rval; 3520 3521 if (!seg_size) { 3522 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3523 mdb_warn("failed to read 'vmem_seg_size'"); 3524 seg_size = sizeof (vmem_seg_t); 3525 } 3526 } 3527 3528 if (seg_size < sizeof (seg)) 3529 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3530 3531 if (mdb_vread(&seg, seg_size, addr) == -1) { 3532 mdb_warn("couldn't read vmem_seg at %p", addr); 3533 return (WALK_ERR); 3534 } 3535 3536 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3537 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3538 rval = WALK_NEXT; 3539 } else { 3540 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3541 } 3542 3543 if (vsw->vsw_current == vsw->vsw_start) 3544 return (WALK_DONE); 3545 3546 return (rval); 3547 } 3548 3549 void 3550 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3551 { 3552 vmem_seg_walk_t *vsw = wsp->walk_data; 3553 3554 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3555 } 3556 3557 #define VMEM_NAMEWIDTH 22 3558 3559 int 3560 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3561 { 3562 vmem_t v, parent; 3563 vmem_kstat_t *vkp = &v.vm_kstat; 3564 uintptr_t paddr; 3565 int ident = 0; 3566 char c[VMEM_NAMEWIDTH]; 3567 3568 if (!(flags & DCMD_ADDRSPEC)) { 3569 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3570 mdb_warn("can't walk vmem"); 3571 return (DCMD_ERR); 3572 } 3573 return (DCMD_OK); 3574 } 3575 3576 if (DCMD_HDRSPEC(flags)) 3577 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3578 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3579 "TOTAL", "SUCCEED", "FAIL"); 3580 3581 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3582 mdb_warn("couldn't read vmem at %p", addr); 3583 return (DCMD_ERR); 3584 } 3585 3586 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3587 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3588 mdb_warn("couldn't trace %p's ancestry", addr); 3589 ident = 0; 3590 break; 3591 } 3592 paddr = (uintptr_t)parent.vm_source; 3593 } 3594 3595 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3596 3597 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3598 addr, VMEM_NAMEWIDTH, c, 3599 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3600 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3601 3602 return (DCMD_OK); 3603 } 3604 3605 void 3606 vmem_seg_help(void) 3607 { 3608 mdb_printf("%s", 3609 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3610 "\n" 3611 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3612 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3613 "information.\n"); 3614 mdb_dec_indent(2); 3615 mdb_printf("%<b>OPTIONS%</b>\n"); 3616 mdb_inc_indent(2); 3617 mdb_printf("%s", 3618 " -v Display the full content of the vmem_seg, including its stack trace\n" 3619 " -s report the size of the segment, instead of the end address\n" 3620 " -c caller\n" 3621 " filter out segments without the function/PC in their stack trace\n" 3622 " -e earliest\n" 3623 " filter out segments timestamped before earliest\n" 3624 " -l latest\n" 3625 " filter out segments timestamped after latest\n" 3626 " -m minsize\n" 3627 " filer out segments smaller than minsize\n" 3628 " -M maxsize\n" 3629 " filer out segments larger than maxsize\n" 3630 " -t thread\n" 3631 " filter out segments not involving thread\n" 3632 " -T type\n" 3633 " filter out segments not of type 'type'\n" 3634 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3635 } 3636 3637 /*ARGSUSED*/ 3638 int 3639 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3640 { 3641 vmem_seg_t vs; 3642 pc_t *stk = vs.vs_stack; 3643 uintptr_t sz; 3644 uint8_t t; 3645 const char *type = NULL; 3646 GElf_Sym sym; 3647 char c[MDB_SYM_NAMLEN]; 3648 int no_debug; 3649 int i; 3650 int depth; 3651 uintptr_t laddr, haddr; 3652 3653 uintptr_t caller = NULL, thread = NULL; 3654 uintptr_t minsize = 0, maxsize = 0; 3655 3656 hrtime_t earliest = 0, latest = 0; 3657 3658 uint_t size = 0; 3659 uint_t verbose = 0; 3660 3661 if (!(flags & DCMD_ADDRSPEC)) 3662 return (DCMD_USAGE); 3663 3664 if (mdb_getopts(argc, argv, 3665 'c', MDB_OPT_UINTPTR, &caller, 3666 'e', MDB_OPT_UINT64, &earliest, 3667 'l', MDB_OPT_UINT64, &latest, 3668 's', MDB_OPT_SETBITS, TRUE, &size, 3669 'm', MDB_OPT_UINTPTR, &minsize, 3670 'M', MDB_OPT_UINTPTR, &maxsize, 3671 't', MDB_OPT_UINTPTR, &thread, 3672 'T', MDB_OPT_STR, &type, 3673 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3674 NULL) != argc) 3675 return (DCMD_USAGE); 3676 3677 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3678 if (verbose) { 3679 mdb_printf("%16s %4s %16s %16s %16s\n" 3680 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3681 "ADDR", "TYPE", "START", "END", "SIZE", 3682 "", "", "THREAD", "TIMESTAMP", ""); 3683 } else { 3684 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3685 "START", size? "SIZE" : "END", "WHO"); 3686 } 3687 } 3688 3689 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3690 mdb_warn("couldn't read vmem_seg at %p", addr); 3691 return (DCMD_ERR); 3692 } 3693 3694 if (type != NULL) { 3695 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3696 t = VMEM_ALLOC; 3697 else if (strcmp(type, "FREE") == 0) 3698 t = VMEM_FREE; 3699 else if (strcmp(type, "SPAN") == 0) 3700 t = VMEM_SPAN; 3701 else if (strcmp(type, "ROTR") == 0 || 3702 strcmp(type, "ROTOR") == 0) 3703 t = VMEM_ROTOR; 3704 else if (strcmp(type, "WLKR") == 0 || 3705 strcmp(type, "WALKER") == 0) 3706 t = VMEM_WALKER; 3707 else { 3708 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3709 type); 3710 return (DCMD_ERR); 3711 } 3712 3713 if (vs.vs_type != t) 3714 return (DCMD_OK); 3715 } 3716 3717 sz = vs.vs_end - vs.vs_start; 3718 3719 if (minsize != 0 && sz < minsize) 3720 return (DCMD_OK); 3721 3722 if (maxsize != 0 && sz > maxsize) 3723 return (DCMD_OK); 3724 3725 t = vs.vs_type; 3726 depth = vs.vs_depth; 3727 3728 /* 3729 * debug info, when present, is only accurate for VMEM_ALLOC segments 3730 */ 3731 no_debug = (t != VMEM_ALLOC) || 3732 (depth == 0 || depth > VMEM_STACK_DEPTH); 3733 3734 if (no_debug) { 3735 if (caller != NULL || thread != NULL || earliest != 0 || 3736 latest != 0) 3737 return (DCMD_OK); /* not enough info */ 3738 } else { 3739 if (caller != NULL) { 3740 laddr = caller; 3741 haddr = caller + sizeof (caller); 3742 3743 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3744 sizeof (c), &sym) != -1 && 3745 caller == (uintptr_t)sym.st_value) { 3746 /* 3747 * We were provided an exact symbol value; any 3748 * address in the function is valid. 3749 */ 3750 laddr = (uintptr_t)sym.st_value; 3751 haddr = (uintptr_t)sym.st_value + sym.st_size; 3752 } 3753 3754 for (i = 0; i < depth; i++) 3755 if (vs.vs_stack[i] >= laddr && 3756 vs.vs_stack[i] < haddr) 3757 break; 3758 3759 if (i == depth) 3760 return (DCMD_OK); 3761 } 3762 3763 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3764 return (DCMD_OK); 3765 3766 if (earliest != 0 && vs.vs_timestamp < earliest) 3767 return (DCMD_OK); 3768 3769 if (latest != 0 && vs.vs_timestamp > latest) 3770 return (DCMD_OK); 3771 } 3772 3773 type = (t == VMEM_ALLOC ? "ALLC" : 3774 t == VMEM_FREE ? "FREE" : 3775 t == VMEM_SPAN ? "SPAN" : 3776 t == VMEM_ROTOR ? "ROTR" : 3777 t == VMEM_WALKER ? "WLKR" : 3778 "????"); 3779 3780 if (flags & DCMD_PIPE_OUT) { 3781 mdb_printf("%#lr\n", addr); 3782 return (DCMD_OK); 3783 } 3784 3785 if (verbose) { 3786 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3787 addr, type, vs.vs_start, vs.vs_end, sz); 3788 3789 if (no_debug) 3790 return (DCMD_OK); 3791 3792 mdb_printf("%16s %4s %16p %16llx\n", 3793 "", "", vs.vs_thread, vs.vs_timestamp); 3794 3795 mdb_inc_indent(17); 3796 for (i = 0; i < depth; i++) { 3797 mdb_printf("%a\n", stk[i]); 3798 } 3799 mdb_dec_indent(17); 3800 mdb_printf("\n"); 3801 } else { 3802 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3803 vs.vs_start, size? sz : vs.vs_end); 3804 3805 if (no_debug) { 3806 mdb_printf("\n"); 3807 return (DCMD_OK); 3808 } 3809 3810 for (i = 0; i < depth; i++) { 3811 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3812 c, sizeof (c), &sym) == -1) 3813 continue; 3814 if (strncmp(c, "vmem_", 5) == 0) 3815 continue; 3816 break; 3817 } 3818 mdb_printf(" %a\n", stk[i]); 3819 } 3820 return (DCMD_OK); 3821 } 3822 3823 typedef struct kmalog_data { 3824 uintptr_t kma_addr; 3825 hrtime_t kma_newest; 3826 } kmalog_data_t; 3827 3828 /*ARGSUSED*/ 3829 static int 3830 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3831 { 3832 char name[KMEM_CACHE_NAMELEN + 1]; 3833 hrtime_t delta; 3834 int i, depth; 3835 size_t bufsize; 3836 3837 if (bcp->bc_timestamp == 0) 3838 return (WALK_DONE); 3839 3840 if (kma->kma_newest == 0) 3841 kma->kma_newest = bcp->bc_timestamp; 3842 3843 if (kma->kma_addr) { 3844 if (mdb_vread(&bufsize, sizeof (bufsize), 3845 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3846 mdb_warn( 3847 "failed to read cache_bufsize for cache at %p", 3848 bcp->bc_cache); 3849 return (WALK_ERR); 3850 } 3851 3852 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3853 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3854 return (WALK_NEXT); 3855 } 3856 3857 delta = kma->kma_newest - bcp->bc_timestamp; 3858 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3859 3860 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3861 &bcp->bc_cache->cache_name) <= 0) 3862 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3863 3864 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3865 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3866 3867 for (i = 0; i < depth; i++) 3868 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3869 3870 return (WALK_NEXT); 3871 } 3872 3873 int 3874 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3875 { 3876 const char *logname = "kmem_transaction_log"; 3877 kmalog_data_t kma; 3878 3879 if (argc > 1) 3880 return (DCMD_USAGE); 3881 3882 kma.kma_newest = 0; 3883 if (flags & DCMD_ADDRSPEC) 3884 kma.kma_addr = addr; 3885 else 3886 kma.kma_addr = NULL; 3887 3888 if (argc > 0) { 3889 if (argv->a_type != MDB_TYPE_STRING) 3890 return (DCMD_USAGE); 3891 if (strcmp(argv->a_un.a_str, "fail") == 0) 3892 logname = "kmem_failure_log"; 3893 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3894 logname = "kmem_slab_log"; 3895 else 3896 return (DCMD_USAGE); 3897 } 3898 3899 if (mdb_readvar(&addr, logname) == -1) { 3900 mdb_warn("failed to read %s log header pointer"); 3901 return (DCMD_ERR); 3902 } 3903 3904 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3905 mdb_warn("failed to walk kmem log"); 3906 return (DCMD_ERR); 3907 } 3908 3909 return (DCMD_OK); 3910 } 3911 3912 /* 3913 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3914 * The first piece is a structure which we use to accumulate kmem_cache_t 3915 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3916 * walker; we either add all caches, or ones named explicitly as arguments. 3917 */ 3918 3919 typedef struct kmclist { 3920 const char *kmc_name; /* Name to match (or NULL) */ 3921 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3922 int kmc_nelems; /* Num entries in kmc_caches */ 3923 int kmc_size; /* Size of kmc_caches array */ 3924 } kmclist_t; 3925 3926 static int 3927 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3928 { 3929 void *p; 3930 int s; 3931 3932 if (kmc->kmc_name == NULL || 3933 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3934 /* 3935 * If we have a match, grow our array (if necessary), and then 3936 * add the virtual address of the matching cache to our list. 3937 */ 3938 if (kmc->kmc_nelems >= kmc->kmc_size) { 3939 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3940 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3941 3942 bcopy(kmc->kmc_caches, p, 3943 sizeof (uintptr_t) * kmc->kmc_size); 3944 3945 kmc->kmc_caches = p; 3946 kmc->kmc_size = s; 3947 } 3948 3949 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3950 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3951 } 3952 3953 return (WALK_NEXT); 3954 } 3955 3956 /* 3957 * The second piece of ::kmausers is a hash table of allocations. Each 3958 * allocation owner is identified by its stack trace and data_size. We then 3959 * track the total bytes of all such allocations, and the number of allocations 3960 * to report at the end. Once we have a list of caches, we walk through the 3961 * allocated bufctls of each, and update our hash table accordingly. 3962 */ 3963 3964 typedef struct kmowner { 3965 struct kmowner *kmo_head; /* First hash elt in bucket */ 3966 struct kmowner *kmo_next; /* Next hash elt in chain */ 3967 size_t kmo_signature; /* Hash table signature */ 3968 uint_t kmo_num; /* Number of allocations */ 3969 size_t kmo_data_size; /* Size of each allocation */ 3970 size_t kmo_total_size; /* Total bytes of allocation */ 3971 int kmo_depth; /* Depth of stack trace */ 3972 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3973 } kmowner_t; 3974 3975 typedef struct kmusers { 3976 uintptr_t kmu_addr; /* address of interest */ 3977 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3978 kmowner_t *kmu_hash; /* Hash table of owners */ 3979 int kmu_nelems; /* Number of entries in use */ 3980 int kmu_size; /* Total number of entries */ 3981 } kmusers_t; 3982 3983 static void 3984 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3985 size_t size, size_t data_size) 3986 { 3987 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3988 size_t bucket, signature = data_size; 3989 kmowner_t *kmo, *kmoend; 3990 3991 /* 3992 * If the hash table is full, double its size and rehash everything. 3993 */ 3994 if (kmu->kmu_nelems >= kmu->kmu_size) { 3995 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3996 3997 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3998 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 3999 kmu->kmu_hash = kmo; 4000 kmu->kmu_size = s; 4001 4002 kmoend = kmu->kmu_hash + kmu->kmu_size; 4003 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 4004 kmo->kmo_head = NULL; 4005 4006 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 4007 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 4008 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 4009 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4010 kmu->kmu_hash[bucket].kmo_head = kmo; 4011 } 4012 } 4013 4014 /* 4015 * Finish computing the hash signature from the stack trace, and then 4016 * see if the owner is in the hash table. If so, update our stats. 4017 */ 4018 for (i = 0; i < depth; i++) 4019 signature += bcp->bc_stack[i]; 4020 4021 bucket = signature & (kmu->kmu_size - 1); 4022 4023 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4024 if (kmo->kmo_signature == signature) { 4025 size_t difference = 0; 4026 4027 difference |= kmo->kmo_data_size - data_size; 4028 difference |= kmo->kmo_depth - depth; 4029 4030 for (i = 0; i < depth; i++) { 4031 difference |= kmo->kmo_stack[i] - 4032 bcp->bc_stack[i]; 4033 } 4034 4035 if (difference == 0) { 4036 kmo->kmo_total_size += size; 4037 kmo->kmo_num++; 4038 return; 4039 } 4040 } 4041 } 4042 4043 /* 4044 * If the owner is not yet hashed, grab the next element and fill it 4045 * in based on the allocation information. 4046 */ 4047 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4048 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4049 kmu->kmu_hash[bucket].kmo_head = kmo; 4050 4051 kmo->kmo_signature = signature; 4052 kmo->kmo_num = 1; 4053 kmo->kmo_data_size = data_size; 4054 kmo->kmo_total_size = size; 4055 kmo->kmo_depth = depth; 4056 4057 for (i = 0; i < depth; i++) 4058 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4059 } 4060 4061 /* 4062 * When ::kmausers is invoked without the -f flag, we simply update our hash 4063 * table with the information from each allocated bufctl. 4064 */ 4065 /*ARGSUSED*/ 4066 static int 4067 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4068 { 4069 const kmem_cache_t *cp = kmu->kmu_cache; 4070 4071 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4072 return (WALK_NEXT); 4073 } 4074 4075 /* 4076 * When ::kmausers is invoked with the -f flag, we print out the information 4077 * for each bufctl as well as updating the hash table. 4078 */ 4079 static int 4080 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4081 { 4082 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4083 const kmem_cache_t *cp = kmu->kmu_cache; 4084 kmem_bufctl_t bufctl; 4085 4086 if (kmu->kmu_addr) { 4087 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4088 mdb_warn("couldn't read bufctl at %p", addr); 4089 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4090 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4091 cp->cache_bufsize) 4092 return (WALK_NEXT); 4093 } 4094 4095 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4096 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4097 4098 for (i = 0; i < depth; i++) 4099 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4100 4101 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4102 return (WALK_NEXT); 4103 } 4104 4105 /* 4106 * We sort our results by allocation size before printing them. 4107 */ 4108 static int 4109 kmownercmp(const void *lp, const void *rp) 4110 { 4111 const kmowner_t *lhs = lp; 4112 const kmowner_t *rhs = rp; 4113 4114 return (rhs->kmo_total_size - lhs->kmo_total_size); 4115 } 4116 4117 /* 4118 * The main engine of ::kmausers is relatively straightforward: First we 4119 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4120 * iterate over the allocated bufctls of each cache in the list. Finally, 4121 * we sort and print our results. 4122 */ 4123 /*ARGSUSED*/ 4124 int 4125 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4126 { 4127 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4128 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4129 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4130 int do_all_caches = 1; /* Do all caches (no arguments) */ 4131 int opt_e = FALSE; /* Include "small" users */ 4132 int opt_f = FALSE; /* Print stack traces */ 4133 4134 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4135 kmowner_t *kmo, *kmoend; 4136 int i, oelems; 4137 4138 kmclist_t kmc; 4139 kmusers_t kmu; 4140 4141 bzero(&kmc, sizeof (kmc)); 4142 bzero(&kmu, sizeof (kmu)); 4143 4144 while ((i = mdb_getopts(argc, argv, 4145 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4146 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4147 4148 argv += i; /* skip past options we just processed */ 4149 argc -= i; /* adjust argc */ 4150 4151 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4152 return (DCMD_USAGE); 4153 4154 oelems = kmc.kmc_nelems; 4155 kmc.kmc_name = argv->a_un.a_str; 4156 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4157 4158 if (kmc.kmc_nelems == oelems) { 4159 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4160 return (DCMD_ERR); 4161 } 4162 4163 do_all_caches = 0; 4164 argv++; 4165 argc--; 4166 } 4167 4168 if (flags & DCMD_ADDRSPEC) { 4169 opt_f = TRUE; 4170 kmu.kmu_addr = addr; 4171 } else { 4172 kmu.kmu_addr = NULL; 4173 } 4174 4175 if (opt_e) 4176 mem_threshold = cnt_threshold = 0; 4177 4178 if (opt_f) 4179 callback = (mdb_walk_cb_t)kmause2; 4180 4181 if (do_all_caches) { 4182 kmc.kmc_name = NULL; /* match all cache names */ 4183 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4184 } 4185 4186 for (i = 0; i < kmc.kmc_nelems; i++) { 4187 uintptr_t cp = kmc.kmc_caches[i]; 4188 kmem_cache_t c; 4189 4190 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4191 mdb_warn("failed to read cache at %p", cp); 4192 continue; 4193 } 4194 4195 if (!(c.cache_flags & KMF_AUDIT)) { 4196 if (!do_all_caches) { 4197 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4198 c.cache_name); 4199 } 4200 continue; 4201 } 4202 4203 kmu.kmu_cache = &c; 4204 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4205 audited_caches++; 4206 } 4207 4208 if (audited_caches == 0 && do_all_caches) { 4209 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4210 return (DCMD_ERR); 4211 } 4212 4213 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4214 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4215 4216 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4217 if (kmo->kmo_total_size < mem_threshold && 4218 kmo->kmo_num < cnt_threshold) 4219 continue; 4220 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4221 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4222 for (i = 0; i < kmo->kmo_depth; i++) 4223 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4224 } 4225 4226 return (DCMD_OK); 4227 } 4228 4229 void 4230 kmausers_help(void) 4231 { 4232 mdb_printf( 4233 "Displays the largest users of the kmem allocator, sorted by \n" 4234 "trace. If one or more caches is specified, only those caches\n" 4235 "will be searched. By default, all caches are searched. If an\n" 4236 "address is specified, then only those allocations which include\n" 4237 "the given address are displayed. Specifying an address implies\n" 4238 "-f.\n" 4239 "\n" 4240 "\t-e\tInclude all users, not just the largest\n" 4241 "\t-f\tDisplay individual allocations. By default, users are\n" 4242 "\t\tgrouped by stack\n"); 4243 } 4244 4245 static int 4246 kmem_ready_check(void) 4247 { 4248 int ready; 4249 4250 if (mdb_readvar(&ready, "kmem_ready") < 0) 4251 return (-1); /* errno is set for us */ 4252 4253 return (ready); 4254 } 4255 4256 void 4257 kmem_statechange(void) 4258 { 4259 static int been_ready = 0; 4260 4261 if (been_ready) 4262 return; 4263 4264 if (kmem_ready_check() <= 0) 4265 return; 4266 4267 been_ready = 1; 4268 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4269 } 4270 4271 void 4272 kmem_init(void) 4273 { 4274 mdb_walker_t w = { 4275 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4276 list_walk_step, list_walk_fini 4277 }; 4278 4279 /* 4280 * If kmem is ready, we'll need to invoke the kmem_cache walker 4281 * immediately. Walkers in the linkage structure won't be ready until 4282 * _mdb_init returns, so we'll need to add this one manually. If kmem 4283 * is ready, we'll use the walker to initialize the caches. If kmem 4284 * isn't ready, we'll register a callback that will allow us to defer 4285 * cache walking until it is. 4286 */ 4287 if (mdb_add_walker(&w) != 0) { 4288 mdb_warn("failed to add kmem_cache walker"); 4289 return; 4290 } 4291 4292 kmem_statechange(); 4293 } 4294 4295 typedef struct whatthread { 4296 uintptr_t wt_target; 4297 int wt_verbose; 4298 } whatthread_t; 4299 4300 static int 4301 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4302 { 4303 uintptr_t current, data; 4304 4305 if (t->t_stkbase == NULL) 4306 return (WALK_NEXT); 4307 4308 /* 4309 * Warn about swapped out threads, but drive on anyway 4310 */ 4311 if (!(t->t_schedflag & TS_LOAD)) { 4312 mdb_warn("thread %p's stack swapped out\n", addr); 4313 return (WALK_NEXT); 4314 } 4315 4316 /* 4317 * Search the thread's stack for the given pointer. Note that it would 4318 * be more efficient to follow ::kgrep's lead and read in page-sized 4319 * chunks, but this routine is already fast and simple. 4320 */ 4321 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4322 current += sizeof (uintptr_t)) { 4323 if (mdb_vread(&data, sizeof (data), current) == -1) { 4324 mdb_warn("couldn't read thread %p's stack at %p", 4325 addr, current); 4326 return (WALK_ERR); 4327 } 4328 4329 if (data == w->wt_target) { 4330 if (w->wt_verbose) { 4331 mdb_printf("%p in thread %p's stack%s\n", 4332 current, addr, stack_active(t, current)); 4333 } else { 4334 mdb_printf("%#lr\n", addr); 4335 return (WALK_NEXT); 4336 } 4337 } 4338 } 4339 4340 return (WALK_NEXT); 4341 } 4342 4343 int 4344 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4345 { 4346 whatthread_t w; 4347 4348 if (!(flags & DCMD_ADDRSPEC)) 4349 return (DCMD_USAGE); 4350 4351 w.wt_verbose = FALSE; 4352 w.wt_target = addr; 4353 4354 if (mdb_getopts(argc, argv, 4355 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4356 return (DCMD_USAGE); 4357 4358 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4359 == -1) { 4360 mdb_warn("couldn't walk threads"); 4361 return (DCMD_ERR); 4362 } 4363 4364 return (DCMD_OK); 4365 } 4366