1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ctf.h> 29 #include <sys/cpuvar.h> 30 #include <sys/kmem_impl.h> 31 #include <sys/vmem_impl.h> 32 #include <sys/machelf.h> 33 #include <sys/modctl.h> 34 #include <sys/kobj.h> 35 #include <sys/panic.h> 36 #include <sys/stack.h> 37 #include <sys/sysmacros.h> 38 #include <vm/page.h> 39 40 #include "avl.h" 41 #include "combined.h" 42 #include "dist.h" 43 #include "kmem.h" 44 #include "list.h" 45 46 #define dprintf(x) if (mdb_debug_level) { \ 47 mdb_printf("kmem debug: "); \ 48 /*CSTYLED*/\ 49 mdb_printf x ;\ 50 } 51 52 #define KM_ALLOCATED 0x01 53 #define KM_FREE 0x02 54 #define KM_BUFCTL 0x04 55 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 56 #define KM_HASH 0x10 57 58 static int mdb_debug_level = 0; 59 60 /*ARGSUSED*/ 61 static int 62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 63 { 64 mdb_walker_t w; 65 char descr[64]; 66 67 (void) mdb_snprintf(descr, sizeof (descr), 68 "walk the %s cache", c->cache_name); 69 70 w.walk_name = c->cache_name; 71 w.walk_descr = descr; 72 w.walk_init = kmem_walk_init; 73 w.walk_step = kmem_walk_step; 74 w.walk_fini = kmem_walk_fini; 75 w.walk_init_arg = (void *)addr; 76 77 if (mdb_add_walker(&w) == -1) 78 mdb_warn("failed to add %s walker", c->cache_name); 79 80 return (WALK_NEXT); 81 } 82 83 /*ARGSUSED*/ 84 int 85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 86 { 87 mdb_debug_level ^= 1; 88 89 mdb_printf("kmem: debugging is now %s\n", 90 mdb_debug_level ? "on" : "off"); 91 92 return (DCMD_OK); 93 } 94 95 int 96 kmem_cache_walk_init(mdb_walk_state_t *wsp) 97 { 98 GElf_Sym sym; 99 100 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 101 mdb_warn("couldn't find kmem_caches"); 102 return (WALK_ERR); 103 } 104 105 wsp->walk_addr = (uintptr_t)sym.st_value; 106 107 return (list_walk_init_named(wsp, "cache list", "cache")); 108 } 109 110 int 111 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 112 { 113 if (wsp->walk_addr == NULL) { 114 mdb_warn("kmem_cpu_cache doesn't support global walks"); 115 return (WALK_ERR); 116 } 117 118 if (mdb_layered_walk("cpu", wsp) == -1) { 119 mdb_warn("couldn't walk 'cpu'"); 120 return (WALK_ERR); 121 } 122 123 wsp->walk_data = (void *)wsp->walk_addr; 124 125 return (WALK_NEXT); 126 } 127 128 int 129 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 130 { 131 uintptr_t caddr = (uintptr_t)wsp->walk_data; 132 const cpu_t *cpu = wsp->walk_layer; 133 kmem_cpu_cache_t cc; 134 135 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]); 136 137 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 138 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 139 return (WALK_ERR); 140 } 141 142 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 143 } 144 145 static int 146 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 147 { 148 kmem_slab_t *sp = p; 149 uintptr_t caddr = (uintptr_t)arg; 150 if ((uintptr_t)sp->slab_cache != caddr) { 151 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 152 saddr, caddr, sp->slab_cache); 153 return (-1); 154 } 155 156 return (0); 157 } 158 159 static int 160 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 161 { 162 kmem_slab_t *sp = p; 163 164 int rc = kmem_slab_check(p, saddr, arg); 165 if (rc != 0) { 166 return (rc); 167 } 168 169 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 170 mdb_warn("slab %p is not a partial slab\n", saddr); 171 return (-1); 172 } 173 174 return (0); 175 } 176 177 static int 178 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 179 { 180 kmem_slab_t *sp = p; 181 182 int rc = kmem_slab_check(p, saddr, arg); 183 if (rc != 0) { 184 return (rc); 185 } 186 187 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 188 mdb_warn("slab %p is not completely allocated\n", saddr); 189 return (-1); 190 } 191 192 return (0); 193 } 194 195 typedef struct { 196 uintptr_t kns_cache_addr; 197 int kns_nslabs; 198 } kmem_nth_slab_t; 199 200 static int 201 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 202 { 203 kmem_nth_slab_t *chkp = arg; 204 205 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 206 if (rc != 0) { 207 return (rc); 208 } 209 210 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 211 } 212 213 static int 214 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 215 { 216 uintptr_t caddr = wsp->walk_addr; 217 218 wsp->walk_addr = (uintptr_t)(caddr + 219 offsetof(kmem_cache_t, cache_complete_slabs)); 220 221 return (list_walk_init_checked(wsp, "slab list", "slab", 222 kmem_complete_slab_check, (void *)caddr)); 223 } 224 225 static int 226 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 227 { 228 uintptr_t caddr = wsp->walk_addr; 229 230 wsp->walk_addr = (uintptr_t)(caddr + 231 offsetof(kmem_cache_t, cache_partial_slabs)); 232 233 return (avl_walk_init_checked(wsp, "slab list", "slab", 234 kmem_partial_slab_check, (void *)caddr)); 235 } 236 237 int 238 kmem_slab_walk_init(mdb_walk_state_t *wsp) 239 { 240 uintptr_t caddr = wsp->walk_addr; 241 242 if (caddr == NULL) { 243 mdb_warn("kmem_slab doesn't support global walks\n"); 244 return (WALK_ERR); 245 } 246 247 combined_walk_init(wsp); 248 combined_walk_add(wsp, 249 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 250 combined_walk_add(wsp, 251 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 252 253 return (WALK_NEXT); 254 } 255 256 static int 257 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 258 { 259 uintptr_t caddr = wsp->walk_addr; 260 kmem_nth_slab_t *chk; 261 262 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 263 UM_SLEEP | UM_GC); 264 chk->kns_cache_addr = caddr; 265 chk->kns_nslabs = 1; 266 wsp->walk_addr = (uintptr_t)(caddr + 267 offsetof(kmem_cache_t, cache_complete_slabs)); 268 269 return (list_walk_init_checked(wsp, "slab list", "slab", 270 kmem_nth_slab_check, chk)); 271 } 272 273 int 274 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 275 { 276 uintptr_t caddr = wsp->walk_addr; 277 kmem_cache_t c; 278 279 if (caddr == NULL) { 280 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 281 return (WALK_ERR); 282 } 283 284 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 285 mdb_warn("couldn't read kmem_cache at %p", caddr); 286 return (WALK_ERR); 287 } 288 289 combined_walk_init(wsp); 290 291 /* 292 * Some consumers (umem_walk_step(), in particular) require at 293 * least one callback if there are any buffers in the cache. So 294 * if there are *no* partial slabs, report the first full slab, if 295 * any. 296 * 297 * Yes, this is ugly, but it's cleaner than the other possibilities. 298 */ 299 if (c.cache_partial_slabs.avl_numnodes == 0) { 300 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 301 list_walk_step, list_walk_fini); 302 } else { 303 combined_walk_add(wsp, kmem_partial_slab_walk_init, 304 avl_walk_step, avl_walk_fini); 305 } 306 307 return (WALK_NEXT); 308 } 309 310 int 311 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 312 { 313 kmem_cache_t c; 314 const char *filter = NULL; 315 316 if (mdb_getopts(ac, argv, 317 'n', MDB_OPT_STR, &filter, 318 NULL) != ac) { 319 return (DCMD_USAGE); 320 } 321 322 if (!(flags & DCMD_ADDRSPEC)) { 323 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 324 mdb_warn("can't walk kmem_cache"); 325 return (DCMD_ERR); 326 } 327 return (DCMD_OK); 328 } 329 330 if (DCMD_HDRSPEC(flags)) 331 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 332 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 333 334 if (mdb_vread(&c, sizeof (c), addr) == -1) { 335 mdb_warn("couldn't read kmem_cache at %p", addr); 336 return (DCMD_ERR); 337 } 338 339 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 340 return (DCMD_OK); 341 342 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 343 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 344 345 return (DCMD_OK); 346 } 347 348 void 349 kmem_cache_help(void) 350 { 351 mdb_printf("%s", "Print kernel memory caches.\n\n"); 352 mdb_dec_indent(2); 353 mdb_printf("%<b>OPTIONS%</b>\n"); 354 mdb_inc_indent(2); 355 mdb_printf("%s", 356 " -n name\n" 357 " name of kmem cache (or matching partial name)\n" 358 "\n" 359 "Column\tDescription\n" 360 "\n" 361 "ADDR\t\taddress of kmem cache\n" 362 "NAME\t\tname of kmem cache\n" 363 "FLAG\t\tvarious cache state flags\n" 364 "CFLAG\t\tcache creation flags\n" 365 "BUFSIZE\tobject size in bytes\n" 366 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 367 } 368 369 #define LABEL_WIDTH 11 370 static void 371 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 372 size_t maxbuckets, size_t minbucketsize) 373 { 374 uint64_t total; 375 int buckets; 376 int i; 377 const int *distarray; 378 int complete[2]; 379 380 buckets = buffers_per_slab; 381 382 total = 0; 383 for (i = 0; i <= buffers_per_slab; i++) 384 total += ks_bucket[i]; 385 386 if (maxbuckets > 1) 387 buckets = MIN(buckets, maxbuckets); 388 389 if (minbucketsize > 1) { 390 /* 391 * minbucketsize does not apply to the first bucket reserved 392 * for completely allocated slabs 393 */ 394 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 395 minbucketsize)); 396 if ((buckets < 2) && (buffers_per_slab > 1)) { 397 buckets = 2; 398 minbucketsize = (buffers_per_slab - 1); 399 } 400 } 401 402 /* 403 * The first printed bucket is reserved for completely allocated slabs. 404 * Passing (buckets - 1) excludes that bucket from the generated 405 * distribution, since we're handling it as a special case. 406 */ 407 complete[0] = buffers_per_slab; 408 complete[1] = buffers_per_slab + 1; 409 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 410 411 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 412 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 413 414 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 415 /* 416 * Print bucket ranges in descending order after the first bucket for 417 * completely allocated slabs, so a person can see immediately whether 418 * or not there is fragmentation without having to scan possibly 419 * multiple screens of output. Starting at (buckets - 2) excludes the 420 * extra terminating bucket. 421 */ 422 for (i = buckets - 2; i >= 0; i--) { 423 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 424 } 425 mdb_printf("\n"); 426 } 427 #undef LABEL_WIDTH 428 429 /*ARGSUSED*/ 430 static int 431 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 432 { 433 *is_slab = B_TRUE; 434 return (WALK_DONE); 435 } 436 437 /*ARGSUSED*/ 438 static int 439 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 440 boolean_t *is_slab) 441 { 442 /* 443 * The "kmem_partial_slab" walker reports the first full slab if there 444 * are no partial slabs (for the sake of consumers that require at least 445 * one callback if there are any buffers in the cache). 446 */ 447 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 448 return (WALK_DONE); 449 } 450 451 typedef struct kmem_slab_usage { 452 int ksu_refcnt; /* count of allocated buffers on slab */ 453 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 454 } kmem_slab_usage_t; 455 456 typedef struct kmem_slab_stats { 457 const kmem_cache_t *ks_cp; 458 int ks_slabs; /* slabs in cache */ 459 int ks_partial_slabs; /* partially allocated slabs in cache */ 460 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 461 int ks_max_buffers_per_slab; /* max buffers per slab */ 462 int ks_usage_len; /* ks_usage array length */ 463 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 464 uint_t *ks_bucket; /* slab usage distribution */ 465 } kmem_slab_stats_t; 466 467 /*ARGSUSED*/ 468 static int 469 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 470 kmem_slab_stats_t *ks) 471 { 472 kmem_slab_usage_t *ksu; 473 long unused; 474 475 ks->ks_slabs++; 476 ks->ks_bucket[sp->slab_refcnt]++; 477 478 unused = (sp->slab_chunks - sp->slab_refcnt); 479 if (unused == 0) { 480 return (WALK_NEXT); 481 } 482 483 ks->ks_partial_slabs++; 484 ks->ks_unused_buffers += unused; 485 486 if (ks->ks_partial_slabs > ks->ks_usage_len) { 487 kmem_slab_usage_t *usage; 488 int len = ks->ks_usage_len; 489 490 len = (len == 0 ? 16 : len * 2); 491 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 492 if (ks->ks_usage != NULL) { 493 bcopy(ks->ks_usage, usage, 494 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 495 mdb_free(ks->ks_usage, 496 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 497 } 498 ks->ks_usage = usage; 499 ks->ks_usage_len = len; 500 } 501 502 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 503 ksu->ksu_refcnt = sp->slab_refcnt; 504 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 505 return (WALK_NEXT); 506 } 507 508 static void 509 kmem_slabs_header() 510 { 511 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 512 "", "", "Partial", "", "Unused", ""); 513 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 514 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 515 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 516 "-------------------------", "--------", "--------", "---------", 517 "---------", "------"); 518 } 519 520 int 521 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 522 { 523 kmem_cache_t c; 524 kmem_slab_stats_t stats; 525 mdb_walk_cb_t cb; 526 int pct; 527 int tenths_pct; 528 size_t maxbuckets = 1; 529 size_t minbucketsize = 0; 530 const char *filter = NULL; 531 const char *name = NULL; 532 uint_t opt_v = FALSE; 533 boolean_t buckets = B_FALSE; 534 boolean_t skip = B_FALSE; 535 536 if (mdb_getopts(argc, argv, 537 'B', MDB_OPT_UINTPTR, &minbucketsize, 538 'b', MDB_OPT_UINTPTR, &maxbuckets, 539 'n', MDB_OPT_STR, &filter, 540 'N', MDB_OPT_STR, &name, 541 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 542 NULL) != argc) { 543 return (DCMD_USAGE); 544 } 545 546 if ((maxbuckets != 1) || (minbucketsize != 0)) { 547 buckets = B_TRUE; 548 } 549 550 if (!(flags & DCMD_ADDRSPEC)) { 551 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 552 argv) == -1) { 553 mdb_warn("can't walk kmem_cache"); 554 return (DCMD_ERR); 555 } 556 return (DCMD_OK); 557 } 558 559 if (mdb_vread(&c, sizeof (c), addr) == -1) { 560 mdb_warn("couldn't read kmem_cache at %p", addr); 561 return (DCMD_ERR); 562 } 563 564 if (name == NULL) { 565 skip = ((filter != NULL) && 566 (strstr(c.cache_name, filter) == NULL)); 567 } else if (filter == NULL) { 568 skip = (strcmp(c.cache_name, name) != 0); 569 } else { 570 /* match either -n or -N */ 571 skip = ((strcmp(c.cache_name, name) != 0) && 572 (strstr(c.cache_name, filter) == NULL)); 573 } 574 575 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 576 kmem_slabs_header(); 577 } else if ((opt_v || buckets) && !skip) { 578 if (DCMD_HDRSPEC(flags)) { 579 kmem_slabs_header(); 580 } else { 581 boolean_t is_slab = B_FALSE; 582 const char *walker_name; 583 if (opt_v) { 584 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 585 walker_name = "kmem_slab_partial"; 586 } else { 587 cb = (mdb_walk_cb_t)kmem_first_slab; 588 walker_name = "kmem_slab"; 589 } 590 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 591 if (is_slab) { 592 kmem_slabs_header(); 593 } 594 } 595 } 596 597 if (skip) { 598 return (DCMD_OK); 599 } 600 601 bzero(&stats, sizeof (kmem_slab_stats_t)); 602 stats.ks_cp = &c; 603 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 604 /* +1 to include a zero bucket */ 605 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 606 sizeof (*stats.ks_bucket), UM_SLEEP); 607 cb = (mdb_walk_cb_t)kmem_slablist_stat; 608 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 609 610 if (c.cache_buftotal == 0) { 611 pct = 0; 612 tenths_pct = 0; 613 } else { 614 uint64_t n = stats.ks_unused_buffers * 10000; 615 pct = (int)(n / c.cache_buftotal); 616 tenths_pct = pct - ((pct / 100) * 100); 617 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 618 if (tenths_pct == 10) { 619 pct += 100; 620 tenths_pct = 0; 621 } 622 } 623 624 pct /= 100; 625 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 626 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 627 stats.ks_unused_buffers, pct, tenths_pct); 628 629 if (maxbuckets == 0) { 630 maxbuckets = stats.ks_max_buffers_per_slab; 631 } 632 633 if (((maxbuckets > 1) || (minbucketsize > 0)) && 634 (stats.ks_slabs > 0)) { 635 mdb_printf("\n"); 636 kmem_slabs_print_dist(stats.ks_bucket, 637 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 638 } 639 640 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 641 sizeof (*stats.ks_bucket)); 642 643 if (!opt_v) { 644 return (DCMD_OK); 645 } 646 647 if (opt_v && (stats.ks_partial_slabs > 0)) { 648 int i; 649 kmem_slab_usage_t *ksu; 650 651 mdb_printf(" %d complete (%d), %d partial:", 652 (stats.ks_slabs - stats.ks_partial_slabs), 653 stats.ks_max_buffers_per_slab, 654 stats.ks_partial_slabs); 655 656 for (i = 0; i < stats.ks_partial_slabs; i++) { 657 ksu = &stats.ks_usage[i]; 658 mdb_printf(" %d%s", ksu->ksu_refcnt, 659 (ksu->ksu_nomove ? "*" : "")); 660 } 661 mdb_printf("\n\n"); 662 } 663 664 if (stats.ks_usage_len > 0) { 665 mdb_free(stats.ks_usage, 666 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 667 } 668 669 return (DCMD_OK); 670 } 671 672 void 673 kmem_slabs_help(void) 674 { 675 mdb_printf("%s", 676 "Display slab usage per kmem cache.\n\n"); 677 mdb_dec_indent(2); 678 mdb_printf("%<b>OPTIONS%</b>\n"); 679 mdb_inc_indent(2); 680 mdb_printf("%s", 681 " -n name\n" 682 " name of kmem cache (or matching partial name)\n" 683 " -N name\n" 684 " exact name of kmem cache\n" 685 " -b maxbins\n" 686 " Print a distribution of allocated buffers per slab using at\n" 687 " most maxbins bins. The first bin is reserved for completely\n" 688 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 689 " effect as specifying the maximum allocated buffers per slab\n" 690 " or setting minbinsize to 1 (-B 1).\n" 691 " -B minbinsize\n" 692 " Print a distribution of allocated buffers per slab, making\n" 693 " all bins (except the first, reserved for completely allocated\n" 694 " slabs) at least minbinsize buffers apart.\n" 695 " -v verbose output: List the allocated buffer count of each partial\n" 696 " slab on the free list in order from front to back to show how\n" 697 " closely the slabs are ordered by usage. For example\n" 698 "\n" 699 " 10 complete, 3 partial (8): 7 3 1\n" 700 "\n" 701 " means there are thirteen slabs with eight buffers each, including\n" 702 " three partially allocated slabs with less than all eight buffers\n" 703 " allocated.\n" 704 "\n" 705 " Buffer allocations are always from the front of the partial slab\n" 706 " list. When a buffer is freed from a completely used slab, that\n" 707 " slab is added to the front of the partial slab list. Assuming\n" 708 " that all buffers are equally likely to be freed soon, the\n" 709 " desired order of partial slabs is most-used at the front of the\n" 710 " list and least-used at the back (as in the example above).\n" 711 " However, if a slab contains an allocated buffer that will not\n" 712 " soon be freed, it would be better for that slab to be at the\n" 713 " front where all of its buffers can be allocated. Taking a slab\n" 714 " off the partial slab list (either with all buffers freed or all\n" 715 " buffers allocated) reduces cache fragmentation.\n" 716 "\n" 717 " A slab's allocated buffer count representing a partial slab (9 in\n" 718 " the example below) may be marked as follows:\n" 719 "\n" 720 " 9* An asterisk indicates that kmem has marked the slab non-\n" 721 " reclaimable because the kmem client refused to move one of the\n" 722 " slab's buffers. Since kmem does not expect to completely free the\n" 723 " slab, it moves it to the front of the list in the hope of\n" 724 " completely allocating it instead. A slab marked with an asterisk\n" 725 " stays marked for as long as it remains on the partial slab list.\n" 726 "\n" 727 "Column\t\tDescription\n" 728 "\n" 729 "Cache Name\t\tname of kmem cache\n" 730 "Slabs\t\t\ttotal slab count\n" 731 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 732 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 733 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 734 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 735 "\t\t\t for accounting structures (debug mode), slab\n" 736 "\t\t\t coloring (incremental small offsets to stagger\n" 737 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 738 } 739 740 static int 741 addrcmp(const void *lhs, const void *rhs) 742 { 743 uintptr_t p1 = *((uintptr_t *)lhs); 744 uintptr_t p2 = *((uintptr_t *)rhs); 745 746 if (p1 < p2) 747 return (-1); 748 if (p1 > p2) 749 return (1); 750 return (0); 751 } 752 753 static int 754 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 755 { 756 const kmem_bufctl_audit_t *bcp1 = *lhs; 757 const kmem_bufctl_audit_t *bcp2 = *rhs; 758 759 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 760 return (-1); 761 762 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 763 return (1); 764 765 return (0); 766 } 767 768 typedef struct kmem_hash_walk { 769 uintptr_t *kmhw_table; 770 size_t kmhw_nelems; 771 size_t kmhw_pos; 772 kmem_bufctl_t kmhw_cur; 773 } kmem_hash_walk_t; 774 775 int 776 kmem_hash_walk_init(mdb_walk_state_t *wsp) 777 { 778 kmem_hash_walk_t *kmhw; 779 uintptr_t *hash; 780 kmem_cache_t c; 781 uintptr_t haddr, addr = wsp->walk_addr; 782 size_t nelems; 783 size_t hsize; 784 785 if (addr == NULL) { 786 mdb_warn("kmem_hash doesn't support global walks\n"); 787 return (WALK_ERR); 788 } 789 790 if (mdb_vread(&c, sizeof (c), addr) == -1) { 791 mdb_warn("couldn't read cache at addr %p", addr); 792 return (WALK_ERR); 793 } 794 795 if (!(c.cache_flags & KMF_HASH)) { 796 mdb_warn("cache %p doesn't have a hash table\n", addr); 797 return (WALK_DONE); /* nothing to do */ 798 } 799 800 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 801 kmhw->kmhw_cur.bc_next = NULL; 802 kmhw->kmhw_pos = 0; 803 804 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 805 hsize = nelems * sizeof (uintptr_t); 806 haddr = (uintptr_t)c.cache_hash_table; 807 808 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 809 if (mdb_vread(hash, hsize, haddr) == -1) { 810 mdb_warn("failed to read hash table at %p", haddr); 811 mdb_free(hash, hsize); 812 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 813 return (WALK_ERR); 814 } 815 816 wsp->walk_data = kmhw; 817 818 return (WALK_NEXT); 819 } 820 821 int 822 kmem_hash_walk_step(mdb_walk_state_t *wsp) 823 { 824 kmem_hash_walk_t *kmhw = wsp->walk_data; 825 uintptr_t addr = NULL; 826 827 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 828 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 829 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 830 break; 831 } 832 } 833 if (addr == NULL) 834 return (WALK_DONE); 835 836 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 837 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 838 return (WALK_ERR); 839 } 840 841 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 842 } 843 844 void 845 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 846 { 847 kmem_hash_walk_t *kmhw = wsp->walk_data; 848 849 if (kmhw == NULL) 850 return; 851 852 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 853 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 854 } 855 856 /* 857 * Find the address of the bufctl structure for the address 'buf' in cache 858 * 'cp', which is at address caddr, and place it in *out. 859 */ 860 static int 861 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 862 { 863 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 864 kmem_bufctl_t *bcp; 865 kmem_bufctl_t bc; 866 867 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 868 mdb_warn("unable to read hash bucket for %p in cache %p", 869 buf, caddr); 870 return (-1); 871 } 872 873 while (bcp != NULL) { 874 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 875 (uintptr_t)bcp) == -1) { 876 mdb_warn("unable to read bufctl at %p", bcp); 877 return (-1); 878 } 879 if (bc.bc_addr == buf) { 880 *out = (uintptr_t)bcp; 881 return (0); 882 } 883 bcp = bc.bc_next; 884 } 885 886 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 887 return (-1); 888 } 889 890 int 891 kmem_get_magsize(const kmem_cache_t *cp) 892 { 893 uintptr_t addr = (uintptr_t)cp->cache_magtype; 894 GElf_Sym mt_sym; 895 kmem_magtype_t mt; 896 int res; 897 898 /* 899 * if cpu 0 has a non-zero magsize, it must be correct. caches 900 * with KMF_NOMAGAZINE have disabled their magazine layers, so 901 * it is okay to return 0 for them. 902 */ 903 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 904 (cp->cache_flags & KMF_NOMAGAZINE)) 905 return (res); 906 907 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 908 mdb_warn("unable to read 'kmem_magtype'"); 909 } else if (addr < mt_sym.st_value || 910 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 911 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 912 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 913 cp->cache_name, addr); 914 return (0); 915 } 916 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 917 mdb_warn("unable to read magtype at %a", addr); 918 return (0); 919 } 920 return (mt.mt_magsize); 921 } 922 923 /*ARGSUSED*/ 924 static int 925 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 926 { 927 *est -= (sp->slab_chunks - sp->slab_refcnt); 928 929 return (WALK_NEXT); 930 } 931 932 /* 933 * Returns an upper bound on the number of allocated buffers in a given 934 * cache. 935 */ 936 size_t 937 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 938 { 939 int magsize; 940 size_t cache_est; 941 942 cache_est = cp->cache_buftotal; 943 944 (void) mdb_pwalk("kmem_slab_partial", 945 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 946 947 if ((magsize = kmem_get_magsize(cp)) != 0) { 948 size_t mag_est = cp->cache_full.ml_total * magsize; 949 950 if (cache_est >= mag_est) { 951 cache_est -= mag_est; 952 } else { 953 mdb_warn("cache %p's magazine layer holds more buffers " 954 "than the slab layer.\n", addr); 955 } 956 } 957 return (cache_est); 958 } 959 960 #define READMAG_ROUNDS(rounds) { \ 961 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 962 mdb_warn("couldn't read magazine at %p", kmp); \ 963 goto fail; \ 964 } \ 965 for (i = 0; i < rounds; i++) { \ 966 maglist[magcnt++] = mp->mag_round[i]; \ 967 if (magcnt == magmax) { \ 968 mdb_warn("%d magazines exceeds fudge factor\n", \ 969 magcnt); \ 970 goto fail; \ 971 } \ 972 } \ 973 } 974 975 int 976 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 977 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 978 { 979 kmem_magazine_t *kmp, *mp; 980 void **maglist = NULL; 981 int i, cpu; 982 size_t magsize, magmax, magbsize; 983 size_t magcnt = 0; 984 985 /* 986 * Read the magtype out of the cache, after verifying the pointer's 987 * correctness. 988 */ 989 magsize = kmem_get_magsize(cp); 990 if (magsize == 0) { 991 *maglistp = NULL; 992 *magcntp = 0; 993 *magmaxp = 0; 994 return (WALK_NEXT); 995 } 996 997 /* 998 * There are several places where we need to go buffer hunting: 999 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1000 * and the full magazine list in the depot. 1001 * 1002 * For an upper bound on the number of buffers in the magazine 1003 * layer, we have the number of magazines on the cache_full 1004 * list plus at most two magazines per CPU (the loaded and the 1005 * spare). Toss in 100 magazines as a fudge factor in case this 1006 * is live (the number "100" comes from the same fudge factor in 1007 * crash(1M)). 1008 */ 1009 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1010 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1011 1012 if (magbsize >= PAGESIZE / 2) { 1013 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1014 addr, magbsize); 1015 return (WALK_ERR); 1016 } 1017 1018 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1019 mp = mdb_alloc(magbsize, alloc_flags); 1020 if (mp == NULL || maglist == NULL) 1021 goto fail; 1022 1023 /* 1024 * First up: the magazines in the depot (i.e. on the cache_full list). 1025 */ 1026 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1027 READMAG_ROUNDS(magsize); 1028 kmp = mp->mag_next; 1029 1030 if (kmp == cp->cache_full.ml_list) 1031 break; /* cache_full list loop detected */ 1032 } 1033 1034 dprintf(("cache_full list done\n")); 1035 1036 /* 1037 * Now whip through the CPUs, snagging the loaded magazines 1038 * and full spares. 1039 */ 1040 for (cpu = 0; cpu < ncpus; cpu++) { 1041 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1042 1043 dprintf(("reading cpu cache %p\n", 1044 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1045 1046 if (ccp->cc_rounds > 0 && 1047 (kmp = ccp->cc_loaded) != NULL) { 1048 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 1049 READMAG_ROUNDS(ccp->cc_rounds); 1050 } 1051 1052 if (ccp->cc_prounds > 0 && 1053 (kmp = ccp->cc_ploaded) != NULL) { 1054 dprintf(("reading %d previously loaded rounds\n", 1055 ccp->cc_prounds)); 1056 READMAG_ROUNDS(ccp->cc_prounds); 1057 } 1058 } 1059 1060 dprintf(("magazine layer: %d buffers\n", magcnt)); 1061 1062 if (!(alloc_flags & UM_GC)) 1063 mdb_free(mp, magbsize); 1064 1065 *maglistp = maglist; 1066 *magcntp = magcnt; 1067 *magmaxp = magmax; 1068 1069 return (WALK_NEXT); 1070 1071 fail: 1072 if (!(alloc_flags & UM_GC)) { 1073 if (mp) 1074 mdb_free(mp, magbsize); 1075 if (maglist) 1076 mdb_free(maglist, magmax * sizeof (void *)); 1077 } 1078 return (WALK_ERR); 1079 } 1080 1081 static int 1082 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1083 { 1084 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1085 } 1086 1087 static int 1088 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1089 { 1090 kmem_bufctl_audit_t b; 1091 1092 /* 1093 * if KMF_AUDIT is not set, we know that we're looking at a 1094 * kmem_bufctl_t. 1095 */ 1096 if (!(cp->cache_flags & KMF_AUDIT) || 1097 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1098 (void) memset(&b, 0, sizeof (b)); 1099 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1100 mdb_warn("unable to read bufctl at %p", buf); 1101 return (WALK_ERR); 1102 } 1103 } 1104 1105 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1106 } 1107 1108 typedef struct kmem_walk { 1109 int kmw_type; 1110 1111 int kmw_addr; /* cache address */ 1112 kmem_cache_t *kmw_cp; 1113 size_t kmw_csize; 1114 1115 /* 1116 * magazine layer 1117 */ 1118 void **kmw_maglist; 1119 size_t kmw_max; 1120 size_t kmw_count; 1121 size_t kmw_pos; 1122 1123 /* 1124 * slab layer 1125 */ 1126 char *kmw_valid; /* to keep track of freed buffers */ 1127 char *kmw_ubase; /* buffer for slab data */ 1128 } kmem_walk_t; 1129 1130 static int 1131 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1132 { 1133 kmem_walk_t *kmw; 1134 int ncpus, csize; 1135 kmem_cache_t *cp; 1136 size_t vm_quantum; 1137 1138 size_t magmax, magcnt; 1139 void **maglist = NULL; 1140 uint_t chunksize, slabsize; 1141 int status = WALK_ERR; 1142 uintptr_t addr = wsp->walk_addr; 1143 const char *layered; 1144 1145 type &= ~KM_HASH; 1146 1147 if (addr == NULL) { 1148 mdb_warn("kmem walk doesn't support global walks\n"); 1149 return (WALK_ERR); 1150 } 1151 1152 dprintf(("walking %p\n", addr)); 1153 1154 /* 1155 * First we need to figure out how many CPUs are configured in the 1156 * system to know how much to slurp out. 1157 */ 1158 mdb_readvar(&ncpus, "max_ncpus"); 1159 1160 csize = KMEM_CACHE_SIZE(ncpus); 1161 cp = mdb_alloc(csize, UM_SLEEP); 1162 1163 if (mdb_vread(cp, csize, addr) == -1) { 1164 mdb_warn("couldn't read cache at addr %p", addr); 1165 goto out2; 1166 } 1167 1168 /* 1169 * It's easy for someone to hand us an invalid cache address. 1170 * Unfortunately, it is hard for this walker to survive an 1171 * invalid cache cleanly. So we make sure that: 1172 * 1173 * 1. the vmem arena for the cache is readable, 1174 * 2. the vmem arena's quantum is a power of 2, 1175 * 3. our slabsize is a multiple of the quantum, and 1176 * 4. our chunksize is >0 and less than our slabsize. 1177 */ 1178 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1179 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1180 vm_quantum == 0 || 1181 (vm_quantum & (vm_quantum - 1)) != 0 || 1182 cp->cache_slabsize < vm_quantum || 1183 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1184 cp->cache_chunksize == 0 || 1185 cp->cache_chunksize > cp->cache_slabsize) { 1186 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1187 goto out2; 1188 } 1189 1190 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1191 1192 if (cp->cache_buftotal == 0) { 1193 mdb_free(cp, csize); 1194 return (WALK_DONE); 1195 } 1196 1197 /* 1198 * If they ask for bufctls, but it's a small-slab cache, 1199 * there is nothing to report. 1200 */ 1201 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1202 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1203 cp->cache_flags)); 1204 mdb_free(cp, csize); 1205 return (WALK_DONE); 1206 } 1207 1208 /* 1209 * If they want constructed buffers, but there's no constructor or 1210 * the cache has DEADBEEF checking enabled, there is nothing to report. 1211 */ 1212 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1213 cp->cache_constructor == NULL || 1214 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1215 mdb_free(cp, csize); 1216 return (WALK_DONE); 1217 } 1218 1219 /* 1220 * Read in the contents of the magazine layer 1221 */ 1222 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1223 &magmax, UM_SLEEP) == WALK_ERR) 1224 goto out2; 1225 1226 /* 1227 * We have all of the buffers from the magazines; if we are walking 1228 * allocated buffers, sort them so we can bsearch them later. 1229 */ 1230 if (type & KM_ALLOCATED) 1231 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1232 1233 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1234 1235 kmw->kmw_type = type; 1236 kmw->kmw_addr = addr; 1237 kmw->kmw_cp = cp; 1238 kmw->kmw_csize = csize; 1239 kmw->kmw_maglist = maglist; 1240 kmw->kmw_max = magmax; 1241 kmw->kmw_count = magcnt; 1242 kmw->kmw_pos = 0; 1243 1244 /* 1245 * When walking allocated buffers in a KMF_HASH cache, we walk the 1246 * hash table instead of the slab layer. 1247 */ 1248 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1249 layered = "kmem_hash"; 1250 1251 kmw->kmw_type |= KM_HASH; 1252 } else { 1253 /* 1254 * If we are walking freed buffers, we only need the 1255 * magazine layer plus the partially allocated slabs. 1256 * To walk allocated buffers, we need all of the slabs. 1257 */ 1258 if (type & KM_ALLOCATED) 1259 layered = "kmem_slab"; 1260 else 1261 layered = "kmem_slab_partial"; 1262 1263 /* 1264 * for small-slab caches, we read in the entire slab. For 1265 * freed buffers, we can just walk the freelist. For 1266 * allocated buffers, we use a 'valid' array to track 1267 * the freed buffers. 1268 */ 1269 if (!(cp->cache_flags & KMF_HASH)) { 1270 chunksize = cp->cache_chunksize; 1271 slabsize = cp->cache_slabsize; 1272 1273 kmw->kmw_ubase = mdb_alloc(slabsize + 1274 sizeof (kmem_bufctl_t), UM_SLEEP); 1275 1276 if (type & KM_ALLOCATED) 1277 kmw->kmw_valid = 1278 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1279 } 1280 } 1281 1282 status = WALK_NEXT; 1283 1284 if (mdb_layered_walk(layered, wsp) == -1) { 1285 mdb_warn("unable to start layered '%s' walk", layered); 1286 status = WALK_ERR; 1287 } 1288 1289 out1: 1290 if (status == WALK_ERR) { 1291 if (kmw->kmw_valid) 1292 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1293 1294 if (kmw->kmw_ubase) 1295 mdb_free(kmw->kmw_ubase, slabsize + 1296 sizeof (kmem_bufctl_t)); 1297 1298 if (kmw->kmw_maglist) 1299 mdb_free(kmw->kmw_maglist, 1300 kmw->kmw_max * sizeof (uintptr_t)); 1301 1302 mdb_free(kmw, sizeof (kmem_walk_t)); 1303 wsp->walk_data = NULL; 1304 } 1305 1306 out2: 1307 if (status == WALK_ERR) 1308 mdb_free(cp, csize); 1309 1310 return (status); 1311 } 1312 1313 int 1314 kmem_walk_step(mdb_walk_state_t *wsp) 1315 { 1316 kmem_walk_t *kmw = wsp->walk_data; 1317 int type = kmw->kmw_type; 1318 kmem_cache_t *cp = kmw->kmw_cp; 1319 1320 void **maglist = kmw->kmw_maglist; 1321 int magcnt = kmw->kmw_count; 1322 1323 uintptr_t chunksize, slabsize; 1324 uintptr_t addr; 1325 const kmem_slab_t *sp; 1326 const kmem_bufctl_t *bcp; 1327 kmem_bufctl_t bc; 1328 1329 int chunks; 1330 char *kbase; 1331 void *buf; 1332 int i, ret; 1333 1334 char *valid, *ubase; 1335 1336 /* 1337 * first, handle the 'kmem_hash' layered walk case 1338 */ 1339 if (type & KM_HASH) { 1340 /* 1341 * We have a buffer which has been allocated out of the 1342 * global layer. We need to make sure that it's not 1343 * actually sitting in a magazine before we report it as 1344 * an allocated buffer. 1345 */ 1346 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1347 1348 if (magcnt > 0 && 1349 bsearch(&buf, maglist, magcnt, sizeof (void *), 1350 addrcmp) != NULL) 1351 return (WALK_NEXT); 1352 1353 if (type & KM_BUFCTL) 1354 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1355 1356 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1357 } 1358 1359 ret = WALK_NEXT; 1360 1361 addr = kmw->kmw_addr; 1362 1363 /* 1364 * If we're walking freed buffers, report everything in the 1365 * magazine layer before processing the first slab. 1366 */ 1367 if ((type & KM_FREE) && magcnt != 0) { 1368 kmw->kmw_count = 0; /* only do this once */ 1369 for (i = 0; i < magcnt; i++) { 1370 buf = maglist[i]; 1371 1372 if (type & KM_BUFCTL) { 1373 uintptr_t out; 1374 1375 if (cp->cache_flags & KMF_BUFTAG) { 1376 kmem_buftag_t *btp; 1377 kmem_buftag_t tag; 1378 1379 /* LINTED - alignment */ 1380 btp = KMEM_BUFTAG(cp, buf); 1381 if (mdb_vread(&tag, sizeof (tag), 1382 (uintptr_t)btp) == -1) { 1383 mdb_warn("reading buftag for " 1384 "%p at %p", buf, btp); 1385 continue; 1386 } 1387 out = (uintptr_t)tag.bt_bufctl; 1388 } else { 1389 if (kmem_hash_lookup(cp, addr, buf, 1390 &out) == -1) 1391 continue; 1392 } 1393 ret = bufctl_walk_callback(cp, wsp, out); 1394 } else { 1395 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1396 } 1397 1398 if (ret != WALK_NEXT) 1399 return (ret); 1400 } 1401 } 1402 1403 /* 1404 * If they want constructed buffers, we're finished, since the 1405 * magazine layer holds them all. 1406 */ 1407 if (type & KM_CONSTRUCTED) 1408 return (WALK_DONE); 1409 1410 /* 1411 * Handle the buffers in the current slab 1412 */ 1413 chunksize = cp->cache_chunksize; 1414 slabsize = cp->cache_slabsize; 1415 1416 sp = wsp->walk_layer; 1417 chunks = sp->slab_chunks; 1418 kbase = sp->slab_base; 1419 1420 dprintf(("kbase is %p\n", kbase)); 1421 1422 if (!(cp->cache_flags & KMF_HASH)) { 1423 valid = kmw->kmw_valid; 1424 ubase = kmw->kmw_ubase; 1425 1426 if (mdb_vread(ubase, chunks * chunksize, 1427 (uintptr_t)kbase) == -1) { 1428 mdb_warn("failed to read slab contents at %p", kbase); 1429 return (WALK_ERR); 1430 } 1431 1432 /* 1433 * Set up the valid map as fully allocated -- we'll punch 1434 * out the freelist. 1435 */ 1436 if (type & KM_ALLOCATED) 1437 (void) memset(valid, 1, chunks); 1438 } else { 1439 valid = NULL; 1440 ubase = NULL; 1441 } 1442 1443 /* 1444 * walk the slab's freelist 1445 */ 1446 bcp = sp->slab_head; 1447 1448 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1449 1450 /* 1451 * since we could be in the middle of allocating a buffer, 1452 * our refcnt could be one higher than it aught. So we 1453 * check one further on the freelist than the count allows. 1454 */ 1455 for (i = sp->slab_refcnt; i <= chunks; i++) { 1456 uint_t ndx; 1457 1458 dprintf(("bcp is %p\n", bcp)); 1459 1460 if (bcp == NULL) { 1461 if (i == chunks) 1462 break; 1463 mdb_warn( 1464 "slab %p in cache %p freelist too short by %d\n", 1465 sp, addr, chunks - i); 1466 break; 1467 } 1468 1469 if (cp->cache_flags & KMF_HASH) { 1470 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1471 mdb_warn("failed to read bufctl ptr at %p", 1472 bcp); 1473 break; 1474 } 1475 buf = bc.bc_addr; 1476 } else { 1477 /* 1478 * Otherwise the buffer is in the slab which 1479 * we've read in; we just need to determine 1480 * its offset in the slab to find the 1481 * kmem_bufctl_t. 1482 */ 1483 bc = *((kmem_bufctl_t *) 1484 ((uintptr_t)bcp - (uintptr_t)kbase + 1485 (uintptr_t)ubase)); 1486 1487 buf = KMEM_BUF(cp, bcp); 1488 } 1489 1490 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1491 1492 if (ndx > slabsize / cp->cache_bufsize) { 1493 /* 1494 * This is very wrong; we have managed to find 1495 * a buffer in the slab which shouldn't 1496 * actually be here. Emit a warning, and 1497 * try to continue. 1498 */ 1499 mdb_warn("buf %p is out of range for " 1500 "slab %p, cache %p\n", buf, sp, addr); 1501 } else if (type & KM_ALLOCATED) { 1502 /* 1503 * we have found a buffer on the slab's freelist; 1504 * clear its entry 1505 */ 1506 valid[ndx] = 0; 1507 } else { 1508 /* 1509 * Report this freed buffer 1510 */ 1511 if (type & KM_BUFCTL) { 1512 ret = bufctl_walk_callback(cp, wsp, 1513 (uintptr_t)bcp); 1514 } else { 1515 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1516 } 1517 if (ret != WALK_NEXT) 1518 return (ret); 1519 } 1520 1521 bcp = bc.bc_next; 1522 } 1523 1524 if (bcp != NULL) { 1525 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1526 sp, addr, bcp)); 1527 } 1528 1529 /* 1530 * If we are walking freed buffers, the loop above handled reporting 1531 * them. 1532 */ 1533 if (type & KM_FREE) 1534 return (WALK_NEXT); 1535 1536 if (type & KM_BUFCTL) { 1537 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1538 "cache %p\n", addr); 1539 return (WALK_ERR); 1540 } 1541 1542 /* 1543 * Report allocated buffers, skipping buffers in the magazine layer. 1544 * We only get this far for small-slab caches. 1545 */ 1546 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1547 buf = (char *)kbase + i * chunksize; 1548 1549 if (!valid[i]) 1550 continue; /* on slab freelist */ 1551 1552 if (magcnt > 0 && 1553 bsearch(&buf, maglist, magcnt, sizeof (void *), 1554 addrcmp) != NULL) 1555 continue; /* in magazine layer */ 1556 1557 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1558 } 1559 return (ret); 1560 } 1561 1562 void 1563 kmem_walk_fini(mdb_walk_state_t *wsp) 1564 { 1565 kmem_walk_t *kmw = wsp->walk_data; 1566 uintptr_t chunksize; 1567 uintptr_t slabsize; 1568 1569 if (kmw == NULL) 1570 return; 1571 1572 if (kmw->kmw_maglist != NULL) 1573 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1574 1575 chunksize = kmw->kmw_cp->cache_chunksize; 1576 slabsize = kmw->kmw_cp->cache_slabsize; 1577 1578 if (kmw->kmw_valid != NULL) 1579 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1580 if (kmw->kmw_ubase != NULL) 1581 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1582 1583 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1584 mdb_free(kmw, sizeof (kmem_walk_t)); 1585 } 1586 1587 /*ARGSUSED*/ 1588 static int 1589 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1590 { 1591 /* 1592 * Buffers allocated from NOTOUCH caches can also show up as freed 1593 * memory in other caches. This can be a little confusing, so we 1594 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1595 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1596 */ 1597 if (c->cache_cflags & KMC_NOTOUCH) 1598 return (WALK_NEXT); 1599 1600 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1601 wsp->walk_cbdata, addr) == -1) 1602 return (WALK_DONE); 1603 1604 return (WALK_NEXT); 1605 } 1606 1607 #define KMEM_WALK_ALL(name, wsp) { \ 1608 wsp->walk_data = (name); \ 1609 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1610 return (WALK_ERR); \ 1611 return (WALK_DONE); \ 1612 } 1613 1614 int 1615 kmem_walk_init(mdb_walk_state_t *wsp) 1616 { 1617 if (wsp->walk_arg != NULL) 1618 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1619 1620 if (wsp->walk_addr == NULL) 1621 KMEM_WALK_ALL("kmem", wsp); 1622 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1623 } 1624 1625 int 1626 bufctl_walk_init(mdb_walk_state_t *wsp) 1627 { 1628 if (wsp->walk_addr == NULL) 1629 KMEM_WALK_ALL("bufctl", wsp); 1630 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1631 } 1632 1633 int 1634 freemem_walk_init(mdb_walk_state_t *wsp) 1635 { 1636 if (wsp->walk_addr == NULL) 1637 KMEM_WALK_ALL("freemem", wsp); 1638 return (kmem_walk_init_common(wsp, KM_FREE)); 1639 } 1640 1641 int 1642 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1643 { 1644 if (wsp->walk_addr == NULL) 1645 KMEM_WALK_ALL("freemem_constructed", wsp); 1646 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1647 } 1648 1649 int 1650 freectl_walk_init(mdb_walk_state_t *wsp) 1651 { 1652 if (wsp->walk_addr == NULL) 1653 KMEM_WALK_ALL("freectl", wsp); 1654 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1655 } 1656 1657 int 1658 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1659 { 1660 if (wsp->walk_addr == NULL) 1661 KMEM_WALK_ALL("freectl_constructed", wsp); 1662 return (kmem_walk_init_common(wsp, 1663 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1664 } 1665 1666 typedef struct bufctl_history_walk { 1667 void *bhw_next; 1668 kmem_cache_t *bhw_cache; 1669 kmem_slab_t *bhw_slab; 1670 hrtime_t bhw_timestamp; 1671 } bufctl_history_walk_t; 1672 1673 int 1674 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1675 { 1676 bufctl_history_walk_t *bhw; 1677 kmem_bufctl_audit_t bc; 1678 kmem_bufctl_audit_t bcn; 1679 1680 if (wsp->walk_addr == NULL) { 1681 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1682 return (WALK_ERR); 1683 } 1684 1685 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1686 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1687 return (WALK_ERR); 1688 } 1689 1690 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1691 bhw->bhw_timestamp = 0; 1692 bhw->bhw_cache = bc.bc_cache; 1693 bhw->bhw_slab = bc.bc_slab; 1694 1695 /* 1696 * sometimes the first log entry matches the base bufctl; in that 1697 * case, skip the base bufctl. 1698 */ 1699 if (bc.bc_lastlog != NULL && 1700 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1701 bc.bc_addr == bcn.bc_addr && 1702 bc.bc_cache == bcn.bc_cache && 1703 bc.bc_slab == bcn.bc_slab && 1704 bc.bc_timestamp == bcn.bc_timestamp && 1705 bc.bc_thread == bcn.bc_thread) 1706 bhw->bhw_next = bc.bc_lastlog; 1707 else 1708 bhw->bhw_next = (void *)wsp->walk_addr; 1709 1710 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1711 wsp->walk_data = bhw; 1712 1713 return (WALK_NEXT); 1714 } 1715 1716 int 1717 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1718 { 1719 bufctl_history_walk_t *bhw = wsp->walk_data; 1720 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1721 uintptr_t baseaddr = wsp->walk_addr; 1722 kmem_bufctl_audit_t bc; 1723 1724 if (addr == NULL) 1725 return (WALK_DONE); 1726 1727 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1728 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1729 return (WALK_ERR); 1730 } 1731 1732 /* 1733 * The bufctl is only valid if the address, cache, and slab are 1734 * correct. We also check that the timestamp is decreasing, to 1735 * prevent infinite loops. 1736 */ 1737 if ((uintptr_t)bc.bc_addr != baseaddr || 1738 bc.bc_cache != bhw->bhw_cache || 1739 bc.bc_slab != bhw->bhw_slab || 1740 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1741 return (WALK_DONE); 1742 1743 bhw->bhw_next = bc.bc_lastlog; 1744 bhw->bhw_timestamp = bc.bc_timestamp; 1745 1746 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1747 } 1748 1749 void 1750 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1751 { 1752 bufctl_history_walk_t *bhw = wsp->walk_data; 1753 1754 mdb_free(bhw, sizeof (*bhw)); 1755 } 1756 1757 typedef struct kmem_log_walk { 1758 kmem_bufctl_audit_t *klw_base; 1759 kmem_bufctl_audit_t **klw_sorted; 1760 kmem_log_header_t klw_lh; 1761 size_t klw_size; 1762 size_t klw_maxndx; 1763 size_t klw_ndx; 1764 } kmem_log_walk_t; 1765 1766 int 1767 kmem_log_walk_init(mdb_walk_state_t *wsp) 1768 { 1769 uintptr_t lp = wsp->walk_addr; 1770 kmem_log_walk_t *klw; 1771 kmem_log_header_t *lhp; 1772 int maxndx, i, j, k; 1773 1774 /* 1775 * By default (global walk), walk the kmem_transaction_log. Otherwise 1776 * read the log whose kmem_log_header_t is stored at walk_addr. 1777 */ 1778 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1779 mdb_warn("failed to read 'kmem_transaction_log'"); 1780 return (WALK_ERR); 1781 } 1782 1783 if (lp == NULL) { 1784 mdb_warn("log is disabled\n"); 1785 return (WALK_ERR); 1786 } 1787 1788 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1789 lhp = &klw->klw_lh; 1790 1791 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1792 mdb_warn("failed to read log header at %p", lp); 1793 mdb_free(klw, sizeof (kmem_log_walk_t)); 1794 return (WALK_ERR); 1795 } 1796 1797 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1798 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1799 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1800 1801 if (mdb_vread(klw->klw_base, klw->klw_size, 1802 (uintptr_t)lhp->lh_base) == -1) { 1803 mdb_warn("failed to read log at base %p", lhp->lh_base); 1804 mdb_free(klw->klw_base, klw->klw_size); 1805 mdb_free(klw, sizeof (kmem_log_walk_t)); 1806 return (WALK_ERR); 1807 } 1808 1809 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1810 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1811 1812 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1813 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1814 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1815 1816 for (j = 0; j < maxndx; j++) 1817 klw->klw_sorted[k++] = &chunk[j]; 1818 } 1819 1820 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1821 (int(*)(const void *, const void *))bufctlcmp); 1822 1823 klw->klw_maxndx = k; 1824 wsp->walk_data = klw; 1825 1826 return (WALK_NEXT); 1827 } 1828 1829 int 1830 kmem_log_walk_step(mdb_walk_state_t *wsp) 1831 { 1832 kmem_log_walk_t *klw = wsp->walk_data; 1833 kmem_bufctl_audit_t *bcp; 1834 1835 if (klw->klw_ndx == klw->klw_maxndx) 1836 return (WALK_DONE); 1837 1838 bcp = klw->klw_sorted[klw->klw_ndx++]; 1839 1840 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1841 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1842 } 1843 1844 void 1845 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1846 { 1847 kmem_log_walk_t *klw = wsp->walk_data; 1848 1849 mdb_free(klw->klw_base, klw->klw_size); 1850 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1851 sizeof (kmem_bufctl_audit_t *)); 1852 mdb_free(klw, sizeof (kmem_log_walk_t)); 1853 } 1854 1855 typedef struct allocdby_bufctl { 1856 uintptr_t abb_addr; 1857 hrtime_t abb_ts; 1858 } allocdby_bufctl_t; 1859 1860 typedef struct allocdby_walk { 1861 const char *abw_walk; 1862 uintptr_t abw_thread; 1863 size_t abw_nbufs; 1864 size_t abw_size; 1865 allocdby_bufctl_t *abw_buf; 1866 size_t abw_ndx; 1867 } allocdby_walk_t; 1868 1869 int 1870 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1871 allocdby_walk_t *abw) 1872 { 1873 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1874 return (WALK_NEXT); 1875 1876 if (abw->abw_nbufs == abw->abw_size) { 1877 allocdby_bufctl_t *buf; 1878 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1879 1880 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1881 1882 bcopy(abw->abw_buf, buf, oldsize); 1883 mdb_free(abw->abw_buf, oldsize); 1884 1885 abw->abw_size <<= 1; 1886 abw->abw_buf = buf; 1887 } 1888 1889 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1890 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1891 abw->abw_nbufs++; 1892 1893 return (WALK_NEXT); 1894 } 1895 1896 /*ARGSUSED*/ 1897 int 1898 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1899 { 1900 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1901 abw, addr) == -1) { 1902 mdb_warn("couldn't walk bufctl for cache %p", addr); 1903 return (WALK_DONE); 1904 } 1905 1906 return (WALK_NEXT); 1907 } 1908 1909 static int 1910 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1911 { 1912 if (lhs->abb_ts < rhs->abb_ts) 1913 return (1); 1914 if (lhs->abb_ts > rhs->abb_ts) 1915 return (-1); 1916 return (0); 1917 } 1918 1919 static int 1920 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1921 { 1922 allocdby_walk_t *abw; 1923 1924 if (wsp->walk_addr == NULL) { 1925 mdb_warn("allocdby walk doesn't support global walks\n"); 1926 return (WALK_ERR); 1927 } 1928 1929 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1930 1931 abw->abw_thread = wsp->walk_addr; 1932 abw->abw_walk = walk; 1933 abw->abw_size = 128; /* something reasonable */ 1934 abw->abw_buf = 1935 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1936 1937 wsp->walk_data = abw; 1938 1939 if (mdb_walk("kmem_cache", 1940 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1941 mdb_warn("couldn't walk kmem_cache"); 1942 allocdby_walk_fini(wsp); 1943 return (WALK_ERR); 1944 } 1945 1946 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1947 (int(*)(const void *, const void *))allocdby_cmp); 1948 1949 return (WALK_NEXT); 1950 } 1951 1952 int 1953 allocdby_walk_init(mdb_walk_state_t *wsp) 1954 { 1955 return (allocdby_walk_init_common(wsp, "bufctl")); 1956 } 1957 1958 int 1959 freedby_walk_init(mdb_walk_state_t *wsp) 1960 { 1961 return (allocdby_walk_init_common(wsp, "freectl")); 1962 } 1963 1964 int 1965 allocdby_walk_step(mdb_walk_state_t *wsp) 1966 { 1967 allocdby_walk_t *abw = wsp->walk_data; 1968 kmem_bufctl_audit_t bc; 1969 uintptr_t addr; 1970 1971 if (abw->abw_ndx == abw->abw_nbufs) 1972 return (WALK_DONE); 1973 1974 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1975 1976 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1977 mdb_warn("couldn't read bufctl at %p", addr); 1978 return (WALK_DONE); 1979 } 1980 1981 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1982 } 1983 1984 void 1985 allocdby_walk_fini(mdb_walk_state_t *wsp) 1986 { 1987 allocdby_walk_t *abw = wsp->walk_data; 1988 1989 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1990 mdb_free(abw, sizeof (allocdby_walk_t)); 1991 } 1992 1993 /*ARGSUSED*/ 1994 int 1995 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 1996 { 1997 char c[MDB_SYM_NAMLEN]; 1998 GElf_Sym sym; 1999 int i; 2000 2001 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2002 for (i = 0; i < bcp->bc_depth; i++) { 2003 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2004 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2005 continue; 2006 if (strncmp(c, "kmem_", 5) == 0) 2007 continue; 2008 mdb_printf("%s+0x%lx", 2009 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2010 break; 2011 } 2012 mdb_printf("\n"); 2013 2014 return (WALK_NEXT); 2015 } 2016 2017 static int 2018 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2019 { 2020 if (!(flags & DCMD_ADDRSPEC)) 2021 return (DCMD_USAGE); 2022 2023 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2024 2025 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2026 mdb_warn("can't walk '%s' for %p", w, addr); 2027 return (DCMD_ERR); 2028 } 2029 2030 return (DCMD_OK); 2031 } 2032 2033 /*ARGSUSED*/ 2034 int 2035 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2036 { 2037 return (allocdby_common(addr, flags, "allocdby")); 2038 } 2039 2040 /*ARGSUSED*/ 2041 int 2042 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2043 { 2044 return (allocdby_common(addr, flags, "freedby")); 2045 } 2046 2047 /* 2048 * Return a string describing the address in relation to the given thread's 2049 * stack. 2050 * 2051 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2052 * 2053 * - If the address is above the stack pointer, return an empty string 2054 * signifying that the address is active. 2055 * 2056 * - If the address is below the stack pointer, and the thread is not on proc, 2057 * return " (below sp)". 2058 * 2059 * - If the address is below the stack pointer, and the thread is on proc, 2060 * return " (possibly below sp)". Depending on context, we may or may not 2061 * have an accurate t_sp. 2062 */ 2063 static const char * 2064 stack_active(const kthread_t *t, uintptr_t addr) 2065 { 2066 uintptr_t panicstk; 2067 GElf_Sym sym; 2068 2069 if (t->t_state == TS_FREE) 2070 return (" (inactive interrupt thread)"); 2071 2072 /* 2073 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2074 * no longer relates to the thread's real stack. 2075 */ 2076 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2077 panicstk = (uintptr_t)sym.st_value; 2078 2079 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2080 return (""); 2081 } 2082 2083 if (addr >= t->t_sp + STACK_BIAS) 2084 return (""); 2085 2086 if (t->t_state == TS_ONPROC) 2087 return (" (possibly below sp)"); 2088 2089 return (" (below sp)"); 2090 } 2091 2092 typedef struct whatis { 2093 uintptr_t w_addr; 2094 const kmem_cache_t *w_cache; 2095 const vmem_t *w_vmem; 2096 size_t w_slab_align; 2097 int w_slab_found; 2098 int w_found; 2099 int w_kmem_lite_count; 2100 uint_t w_verbose; 2101 uint_t w_freemem; 2102 uint_t w_all; 2103 uint_t w_bufctl; 2104 uint_t w_idspace; 2105 } whatis_t; 2106 2107 static void 2108 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 2109 { 2110 /* LINTED pointer cast may result in improper alignment */ 2111 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr); 2112 intptr_t stat; 2113 int count = 0; 2114 int i; 2115 pc_t callers[16]; 2116 2117 if (w->w_cache->cache_flags & KMF_REDZONE) { 2118 kmem_buftag_t bt; 2119 2120 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2121 goto done; 2122 2123 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2124 2125 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 2126 goto done; 2127 2128 /* 2129 * provide the bufctl ptr if it has useful information 2130 */ 2131 if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT)) 2132 baddr = (uintptr_t)bt.bt_bufctl; 2133 2134 if (w->w_cache->cache_flags & KMF_LITE) { 2135 count = w->w_kmem_lite_count; 2136 2137 if (count * sizeof (pc_t) > sizeof (callers)) 2138 count = 0; 2139 2140 if (count > 0 && 2141 mdb_vread(callers, count * sizeof (pc_t), 2142 btaddr + 2143 offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2144 count = 0; 2145 2146 /* 2147 * skip unused callers 2148 */ 2149 while (count > 0 && callers[count - 1] == 2150 (pc_t)KMEM_UNINITIALIZED_PATTERN) 2151 count--; 2152 } 2153 } 2154 2155 done: 2156 if (baddr == 0) 2157 mdb_printf("%p is %p+%p, %s from %s\n", 2158 w->w_addr, addr, w->w_addr - addr, 2159 w->w_freemem == FALSE ? "allocated" : "freed", 2160 w->w_cache->cache_name); 2161 else 2162 mdb_printf("%p is %p+%p, bufctl %p %s from %s\n", 2163 w->w_addr, addr, w->w_addr - addr, baddr, 2164 w->w_freemem == FALSE ? "allocated" : "freed", 2165 w->w_cache->cache_name); 2166 2167 if (count > 0) { 2168 mdb_inc_indent(8); 2169 mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"", 2170 callers[0], (count != 1)? ", ":"\n"); 2171 for (i = 1; i < count; i++) 2172 mdb_printf("%a%s", callers[i], 2173 (i + 1 < count)? ", ":"\n"); 2174 mdb_dec_indent(8); 2175 } 2176 } 2177 2178 /*ARGSUSED*/ 2179 static int 2180 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w) 2181 { 2182 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2183 return (WALK_NEXT); 2184 2185 whatis_print_kmem(addr, 0, w); 2186 w->w_found++; 2187 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2188 } 2189 2190 static int 2191 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 2192 { 2193 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 2194 return (WALK_NEXT); 2195 2196 mdb_printf("%p is %p+%p ", w->w_addr, 2197 vs->vs_start, w->w_addr - vs->vs_start); 2198 2199 /* 2200 * Always provide the vmem_seg pointer if it has a stack trace. 2201 */ 2202 if (w->w_bufctl == TRUE || 2203 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) { 2204 mdb_printf("(vmem_seg %p) ", addr); 2205 } 2206 2207 mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ? 2208 "freed " : "", w->w_vmem->vm_name); 2209 2210 w->w_found++; 2211 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2212 } 2213 2214 static int 2215 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 2216 { 2217 const char *nm = vmem->vm_name; 2218 w->w_vmem = vmem; 2219 w->w_freemem = FALSE; 2220 2221 if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2222 return (WALK_NEXT); 2223 2224 if (w->w_verbose) 2225 mdb_printf("Searching vmem arena %s...\n", nm); 2226 2227 if (mdb_pwalk("vmem_alloc", 2228 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2229 mdb_warn("can't walk vmem seg for %p", addr); 2230 return (WALK_NEXT); 2231 } 2232 2233 if (w->w_found && w->w_all == FALSE) 2234 return (WALK_DONE); 2235 2236 if (w->w_verbose) 2237 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 2238 2239 w->w_freemem = TRUE; 2240 2241 if (mdb_pwalk("vmem_free", 2242 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2243 mdb_warn("can't walk vmem seg for %p", addr); 2244 return (WALK_NEXT); 2245 } 2246 2247 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2248 } 2249 2250 /*ARGSUSED*/ 2251 static int 2252 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w) 2253 { 2254 uintptr_t addr; 2255 2256 if (bcp == NULL) 2257 return (WALK_NEXT); 2258 2259 addr = (uintptr_t)bcp->bc_addr; 2260 2261 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2262 return (WALK_NEXT); 2263 2264 whatis_print_kmem(addr, baddr, w); 2265 w->w_found++; 2266 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2267 } 2268 2269 /*ARGSUSED*/ 2270 static int 2271 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w) 2272 { 2273 uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align); 2274 2275 if ((w->w_addr - base) >= w->w_cache->cache_slabsize) 2276 return (WALK_NEXT); 2277 2278 w->w_slab_found++; 2279 return (WALK_DONE); 2280 } 2281 2282 static int 2283 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2284 { 2285 char *walk, *freewalk; 2286 mdb_walk_cb_t func; 2287 vmem_t *vmp = c->cache_arena; 2288 2289 if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2290 return (WALK_NEXT); 2291 2292 if (w->w_bufctl == FALSE) { 2293 walk = "kmem"; 2294 freewalk = "freemem"; 2295 func = (mdb_walk_cb_t)whatis_walk_kmem; 2296 } else { 2297 walk = "bufctl"; 2298 freewalk = "freectl"; 2299 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2300 } 2301 2302 w->w_cache = c; 2303 2304 if (w->w_verbose) 2305 mdb_printf("Searching %s's slabs...\n", c->cache_name); 2306 2307 /* 2308 * Verify that the address is in one of the cache's slabs. If not, 2309 * we can skip the more expensive walkers. (this is purely a 2310 * heuristic -- as long as there are no false-negatives, we'll be fine) 2311 * 2312 * We try to get the cache's arena's quantum, since to accurately 2313 * get the base of a slab, you have to align it to the quantum. If 2314 * it doesn't look sensible, we fall back to not aligning. 2315 */ 2316 if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align), 2317 (uintptr_t)&vmp->vm_quantum) == -1) { 2318 mdb_warn("unable to read %p->cache_arena->vm_quantum", c); 2319 w->w_slab_align = 1; 2320 } 2321 2322 if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 || 2323 (w->w_slab_align & (w->w_slab_align - 1))) { 2324 mdb_warn("%p's arena has invalid quantum (0x%p)\n", c, 2325 w->w_slab_align); 2326 w->w_slab_align = 1; 2327 } 2328 2329 w->w_slab_found = 0; 2330 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w, 2331 addr) == -1) { 2332 mdb_warn("can't find kmem_slab walker"); 2333 return (WALK_DONE); 2334 } 2335 if (w->w_slab_found == 0) 2336 return (WALK_NEXT); 2337 2338 if (c->cache_flags & KMF_LITE) { 2339 if (mdb_readvar(&w->w_kmem_lite_count, 2340 "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16) 2341 w->w_kmem_lite_count = 0; 2342 } 2343 2344 if (w->w_verbose) 2345 mdb_printf("Searching %s...\n", c->cache_name); 2346 2347 w->w_freemem = FALSE; 2348 2349 if (mdb_pwalk(walk, func, w, addr) == -1) { 2350 mdb_warn("can't find %s walker", walk); 2351 return (WALK_DONE); 2352 } 2353 2354 if (w->w_found && w->w_all == FALSE) 2355 return (WALK_DONE); 2356 2357 /* 2358 * We have searched for allocated memory; now search for freed memory. 2359 */ 2360 if (w->w_verbose) 2361 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2362 2363 w->w_freemem = TRUE; 2364 2365 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 2366 mdb_warn("can't find %s walker", freewalk); 2367 return (WALK_DONE); 2368 } 2369 2370 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2371 } 2372 2373 static int 2374 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2375 { 2376 if (c->cache_cflags & KMC_NOTOUCH) 2377 return (WALK_NEXT); 2378 2379 return (whatis_walk_cache(addr, c, w)); 2380 } 2381 2382 static int 2383 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2384 { 2385 if (!(c->cache_cflags & KMC_NOTOUCH)) 2386 return (WALK_NEXT); 2387 2388 return (whatis_walk_cache(addr, c, w)); 2389 } 2390 2391 static int 2392 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w) 2393 { 2394 /* 2395 * Often, one calls ::whatis on an address from a thread structure. 2396 * We use this opportunity to short circuit this case... 2397 */ 2398 if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) { 2399 mdb_printf("%p is %p+%p, allocated as a thread structure\n", 2400 w->w_addr, addr, w->w_addr - addr); 2401 w->w_found++; 2402 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2403 } 2404 2405 if (w->w_addr < (uintptr_t)t->t_stkbase || 2406 w->w_addr > (uintptr_t)t->t_stk) 2407 return (WALK_NEXT); 2408 2409 if (t->t_stkbase == NULL) 2410 return (WALK_NEXT); 2411 2412 mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr, 2413 stack_active(t, w->w_addr)); 2414 2415 w->w_found++; 2416 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2417 } 2418 2419 static int 2420 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w) 2421 { 2422 struct module mod; 2423 char name[MODMAXNAMELEN], *where; 2424 char c[MDB_SYM_NAMLEN]; 2425 Shdr shdr; 2426 GElf_Sym sym; 2427 2428 if (m->mod_mp == NULL) 2429 return (WALK_NEXT); 2430 2431 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2432 mdb_warn("couldn't read modctl %p's module", addr); 2433 return (WALK_NEXT); 2434 } 2435 2436 if (w->w_addr >= (uintptr_t)mod.text && 2437 w->w_addr < (uintptr_t)mod.text + mod.text_size) { 2438 where = "text segment"; 2439 goto found; 2440 } 2441 2442 if (w->w_addr >= (uintptr_t)mod.data && 2443 w->w_addr < (uintptr_t)mod.data + mod.data_size) { 2444 where = "data segment"; 2445 goto found; 2446 } 2447 2448 if (w->w_addr >= (uintptr_t)mod.bss && 2449 w->w_addr < (uintptr_t)mod.bss + mod.bss_size) { 2450 where = "bss"; 2451 goto found; 2452 } 2453 2454 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2455 mdb_warn("couldn't read symbol header for %p's module", addr); 2456 return (WALK_NEXT); 2457 } 2458 2459 if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr < 2460 (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) { 2461 where = "symtab"; 2462 goto found; 2463 } 2464 2465 if (w->w_addr >= (uintptr_t)mod.symspace && 2466 w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) { 2467 where = "symspace"; 2468 goto found; 2469 } 2470 2471 return (WALK_NEXT); 2472 2473 found: 2474 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2475 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2476 2477 mdb_printf("%p is ", w->w_addr); 2478 2479 /* 2480 * If we found this address in a module, then there's a chance that 2481 * it's actually a named symbol. Try the symbol lookup. 2482 */ 2483 if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c), 2484 &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value && 2485 w->w_addr < (uintptr_t)sym.st_value + sym.st_size) { 2486 mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value); 2487 } 2488 2489 mdb_printf("in %s's %s\n", name, where); 2490 2491 w->w_found++; 2492 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2493 } 2494 2495 /*ARGSUSED*/ 2496 static int 2497 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w) 2498 { 2499 static int machsize = 0; 2500 mdb_ctf_id_t id; 2501 2502 if (machsize == 0) { 2503 if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0) 2504 machsize = mdb_ctf_type_size(id); 2505 else { 2506 mdb_warn("could not get size of page_t"); 2507 machsize = sizeof (page_t); 2508 } 2509 } 2510 2511 if (w->w_addr < addr || w->w_addr >= addr + machsize) 2512 return (WALK_NEXT); 2513 2514 mdb_printf("%p is %p+%p, allocated as a page structure\n", 2515 w->w_addr, addr, w->w_addr - addr); 2516 2517 w->w_found++; 2518 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2519 } 2520 2521 int 2522 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2523 { 2524 whatis_t w; 2525 2526 if (!(flags & DCMD_ADDRSPEC)) 2527 return (DCMD_USAGE); 2528 2529 w.w_verbose = FALSE; 2530 w.w_bufctl = FALSE; 2531 w.w_all = FALSE; 2532 w.w_idspace = FALSE; 2533 2534 if (mdb_getopts(argc, argv, 2535 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2536 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2537 'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace, 2538 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc) 2539 return (DCMD_USAGE); 2540 2541 w.w_addr = addr; 2542 w.w_found = 0; 2543 2544 if (w.w_verbose) 2545 mdb_printf("Searching modules...\n"); 2546 2547 if (!w.w_idspace) { 2548 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w) 2549 == -1) { 2550 mdb_warn("couldn't find modctl walker"); 2551 return (DCMD_ERR); 2552 } 2553 2554 if (w.w_found && w.w_all == FALSE) 2555 return (DCMD_OK); 2556 2557 /* 2558 * Now search all thread stacks. Yes, this is a little weak; we 2559 * can save a lot of work by first checking to see if the 2560 * address is in segkp vs. segkmem. But hey, computers are 2561 * fast. 2562 */ 2563 if (w.w_verbose) 2564 mdb_printf("Searching threads...\n"); 2565 2566 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w) 2567 == -1) { 2568 mdb_warn("couldn't find thread walker"); 2569 return (DCMD_ERR); 2570 } 2571 2572 if (w.w_found && w.w_all == FALSE) 2573 return (DCMD_OK); 2574 2575 if (w.w_verbose) 2576 mdb_printf("Searching page structures...\n"); 2577 2578 if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w) 2579 == -1) { 2580 mdb_warn("couldn't find page walker"); 2581 return (DCMD_ERR); 2582 } 2583 2584 if (w.w_found && w.w_all == FALSE) 2585 return (DCMD_OK); 2586 } 2587 2588 if (mdb_walk("kmem_cache", 2589 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2590 mdb_warn("couldn't find kmem_cache walker"); 2591 return (DCMD_ERR); 2592 } 2593 2594 if (w.w_found && w.w_all == FALSE) 2595 return (DCMD_OK); 2596 2597 if (mdb_walk("kmem_cache", 2598 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2599 mdb_warn("couldn't find kmem_cache walker"); 2600 return (DCMD_ERR); 2601 } 2602 2603 if (w.w_found && w.w_all == FALSE) 2604 return (DCMD_OK); 2605 2606 if (mdb_walk("vmem_postfix", 2607 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2608 mdb_warn("couldn't find vmem_postfix walker"); 2609 return (DCMD_ERR); 2610 } 2611 2612 if (w.w_found == 0) 2613 mdb_printf("%p is unknown\n", addr); 2614 2615 return (DCMD_OK); 2616 } 2617 2618 void 2619 whatis_help(void) 2620 { 2621 mdb_printf( 2622 "Given a virtual address, attempt to determine where it came\n" 2623 "from.\n" 2624 "\n" 2625 "\t-v\tVerbose output; display caches/arenas/etc as they are\n" 2626 "\t\tsearched\n" 2627 "\t-a\tFind all possible sources. Default behavior is to stop at\n" 2628 "\t\tthe first (most specific) source.\n" 2629 "\t-i\tSearch only identifier arenas and caches. By default\n" 2630 "\t\tthese are ignored.\n" 2631 "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n" 2632 "\t\trespectively. Warning: if the buffer exists, but does not\n" 2633 "\t\thave a bufctl, it will not be reported.\n"); 2634 } 2635 2636 typedef struct kmem_log_cpu { 2637 uintptr_t kmc_low; 2638 uintptr_t kmc_high; 2639 } kmem_log_cpu_t; 2640 2641 typedef struct kmem_log_data { 2642 uintptr_t kmd_addr; 2643 kmem_log_cpu_t *kmd_cpu; 2644 } kmem_log_data_t; 2645 2646 int 2647 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2648 kmem_log_data_t *kmd) 2649 { 2650 int i; 2651 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2652 size_t bufsize; 2653 2654 for (i = 0; i < NCPU; i++) { 2655 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2656 break; 2657 } 2658 2659 if (kmd->kmd_addr) { 2660 if (b->bc_cache == NULL) 2661 return (WALK_NEXT); 2662 2663 if (mdb_vread(&bufsize, sizeof (bufsize), 2664 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2665 mdb_warn( 2666 "failed to read cache_bufsize for cache at %p", 2667 b->bc_cache); 2668 return (WALK_ERR); 2669 } 2670 2671 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2672 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2673 return (WALK_NEXT); 2674 } 2675 2676 if (i == NCPU) 2677 mdb_printf(" "); 2678 else 2679 mdb_printf("%3d", i); 2680 2681 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2682 b->bc_timestamp, b->bc_thread); 2683 2684 return (WALK_NEXT); 2685 } 2686 2687 /*ARGSUSED*/ 2688 int 2689 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2690 { 2691 kmem_log_header_t lh; 2692 kmem_cpu_log_header_t clh; 2693 uintptr_t lhp, clhp; 2694 int ncpus; 2695 uintptr_t *cpu; 2696 GElf_Sym sym; 2697 kmem_log_cpu_t *kmc; 2698 int i; 2699 kmem_log_data_t kmd; 2700 uint_t opt_b = FALSE; 2701 2702 if (mdb_getopts(argc, argv, 2703 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2704 return (DCMD_USAGE); 2705 2706 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2707 mdb_warn("failed to read 'kmem_transaction_log'"); 2708 return (DCMD_ERR); 2709 } 2710 2711 if (lhp == NULL) { 2712 mdb_warn("no kmem transaction log\n"); 2713 return (DCMD_ERR); 2714 } 2715 2716 mdb_readvar(&ncpus, "ncpus"); 2717 2718 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2719 mdb_warn("failed to read log header at %p", lhp); 2720 return (DCMD_ERR); 2721 } 2722 2723 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2724 2725 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2726 2727 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2728 mdb_warn("couldn't find 'cpu' array"); 2729 return (DCMD_ERR); 2730 } 2731 2732 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2733 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2734 NCPU * sizeof (uintptr_t), sym.st_size); 2735 return (DCMD_ERR); 2736 } 2737 2738 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2739 mdb_warn("failed to read cpu array at %p", sym.st_value); 2740 return (DCMD_ERR); 2741 } 2742 2743 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2744 kmd.kmd_addr = NULL; 2745 kmd.kmd_cpu = kmc; 2746 2747 for (i = 0; i < NCPU; i++) { 2748 2749 if (cpu[i] == NULL) 2750 continue; 2751 2752 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2753 mdb_warn("cannot read cpu %d's log header at %p", 2754 i, clhp); 2755 return (DCMD_ERR); 2756 } 2757 2758 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2759 (uintptr_t)lh.lh_base; 2760 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2761 2762 clhp += sizeof (kmem_cpu_log_header_t); 2763 } 2764 2765 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2766 "TIMESTAMP", "THREAD"); 2767 2768 /* 2769 * If we have been passed an address, print out only log entries 2770 * corresponding to that address. If opt_b is specified, then interpret 2771 * the address as a bufctl. 2772 */ 2773 if (flags & DCMD_ADDRSPEC) { 2774 kmem_bufctl_audit_t b; 2775 2776 if (opt_b) { 2777 kmd.kmd_addr = addr; 2778 } else { 2779 if (mdb_vread(&b, 2780 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2781 mdb_warn("failed to read bufctl at %p", addr); 2782 return (DCMD_ERR); 2783 } 2784 2785 (void) kmem_log_walk(addr, &b, &kmd); 2786 2787 return (DCMD_OK); 2788 } 2789 } 2790 2791 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2792 mdb_warn("can't find kmem log walker"); 2793 return (DCMD_ERR); 2794 } 2795 2796 return (DCMD_OK); 2797 } 2798 2799 typedef struct bufctl_history_cb { 2800 int bhc_flags; 2801 int bhc_argc; 2802 const mdb_arg_t *bhc_argv; 2803 int bhc_ret; 2804 } bufctl_history_cb_t; 2805 2806 /*ARGSUSED*/ 2807 static int 2808 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2809 { 2810 bufctl_history_cb_t *bhc = arg; 2811 2812 bhc->bhc_ret = 2813 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2814 2815 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2816 2817 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2818 } 2819 2820 void 2821 bufctl_help(void) 2822 { 2823 mdb_printf("%s", 2824 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2825 mdb_dec_indent(2); 2826 mdb_printf("%<b>OPTIONS%</b>\n"); 2827 mdb_inc_indent(2); 2828 mdb_printf("%s", 2829 " -v Display the full content of the bufctl, including its stack trace\n" 2830 " -h retrieve the bufctl's transaction history, if available\n" 2831 " -a addr\n" 2832 " filter out bufctls not involving the buffer at addr\n" 2833 " -c caller\n" 2834 " filter out bufctls without the function/PC in their stack trace\n" 2835 " -e earliest\n" 2836 " filter out bufctls timestamped before earliest\n" 2837 " -l latest\n" 2838 " filter out bufctls timestamped after latest\n" 2839 " -t thread\n" 2840 " filter out bufctls not involving thread\n"); 2841 } 2842 2843 int 2844 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2845 { 2846 kmem_bufctl_audit_t bc; 2847 uint_t verbose = FALSE; 2848 uint_t history = FALSE; 2849 uint_t in_history = FALSE; 2850 uintptr_t caller = NULL, thread = NULL; 2851 uintptr_t laddr, haddr, baddr = NULL; 2852 hrtime_t earliest = 0, latest = 0; 2853 int i, depth; 2854 char c[MDB_SYM_NAMLEN]; 2855 GElf_Sym sym; 2856 2857 if (mdb_getopts(argc, argv, 2858 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2859 'h', MDB_OPT_SETBITS, TRUE, &history, 2860 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2861 'c', MDB_OPT_UINTPTR, &caller, 2862 't', MDB_OPT_UINTPTR, &thread, 2863 'e', MDB_OPT_UINT64, &earliest, 2864 'l', MDB_OPT_UINT64, &latest, 2865 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2866 return (DCMD_USAGE); 2867 2868 if (!(flags & DCMD_ADDRSPEC)) 2869 return (DCMD_USAGE); 2870 2871 if (in_history && !history) 2872 return (DCMD_USAGE); 2873 2874 if (history && !in_history) { 2875 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2876 UM_SLEEP | UM_GC); 2877 bufctl_history_cb_t bhc; 2878 2879 nargv[0].a_type = MDB_TYPE_STRING; 2880 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2881 2882 for (i = 0; i < argc; i++) 2883 nargv[i + 1] = argv[i]; 2884 2885 /* 2886 * When in history mode, we treat each element as if it 2887 * were in a seperate loop, so that the headers group 2888 * bufctls with similar histories. 2889 */ 2890 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2891 bhc.bhc_argc = argc + 1; 2892 bhc.bhc_argv = nargv; 2893 bhc.bhc_ret = DCMD_OK; 2894 2895 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2896 addr) == -1) { 2897 mdb_warn("unable to walk bufctl_history"); 2898 return (DCMD_ERR); 2899 } 2900 2901 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2902 mdb_printf("\n"); 2903 2904 return (bhc.bhc_ret); 2905 } 2906 2907 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2908 if (verbose) { 2909 mdb_printf("%16s %16s %16s %16s\n" 2910 "%<u>%16s %16s %16s %16s%</u>\n", 2911 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2912 "", "CACHE", "LASTLOG", "CONTENTS"); 2913 } else { 2914 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2915 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2916 } 2917 } 2918 2919 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2920 mdb_warn("couldn't read bufctl at %p", addr); 2921 return (DCMD_ERR); 2922 } 2923 2924 /* 2925 * Guard against bogus bc_depth in case the bufctl is corrupt or 2926 * the address does not really refer to a bufctl. 2927 */ 2928 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2929 2930 if (caller != NULL) { 2931 laddr = caller; 2932 haddr = caller + sizeof (caller); 2933 2934 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2935 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2936 /* 2937 * We were provided an exact symbol value; any 2938 * address in the function is valid. 2939 */ 2940 laddr = (uintptr_t)sym.st_value; 2941 haddr = (uintptr_t)sym.st_value + sym.st_size; 2942 } 2943 2944 for (i = 0; i < depth; i++) 2945 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2946 break; 2947 2948 if (i == depth) 2949 return (DCMD_OK); 2950 } 2951 2952 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2953 return (DCMD_OK); 2954 2955 if (earliest != 0 && bc.bc_timestamp < earliest) 2956 return (DCMD_OK); 2957 2958 if (latest != 0 && bc.bc_timestamp > latest) 2959 return (DCMD_OK); 2960 2961 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2962 return (DCMD_OK); 2963 2964 if (flags & DCMD_PIPE_OUT) { 2965 mdb_printf("%#lr\n", addr); 2966 return (DCMD_OK); 2967 } 2968 2969 if (verbose) { 2970 mdb_printf( 2971 "%<b>%16p%</b> %16p %16llx %16p\n" 2972 "%16s %16p %16p %16p\n", 2973 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2974 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2975 2976 mdb_inc_indent(17); 2977 for (i = 0; i < depth; i++) 2978 mdb_printf("%a\n", bc.bc_stack[i]); 2979 mdb_dec_indent(17); 2980 mdb_printf("\n"); 2981 } else { 2982 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2983 bc.bc_timestamp, bc.bc_thread); 2984 2985 for (i = 0; i < depth; i++) { 2986 if (mdb_lookup_by_addr(bc.bc_stack[i], 2987 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2988 continue; 2989 if (strncmp(c, "kmem_", 5) == 0) 2990 continue; 2991 mdb_printf(" %a\n", bc.bc_stack[i]); 2992 break; 2993 } 2994 2995 if (i >= depth) 2996 mdb_printf("\n"); 2997 } 2998 2999 return (DCMD_OK); 3000 } 3001 3002 typedef struct kmem_verify { 3003 uint64_t *kmv_buf; /* buffer to read cache contents into */ 3004 size_t kmv_size; /* number of bytes in kmv_buf */ 3005 int kmv_corruption; /* > 0 if corruption found. */ 3006 int kmv_besilent; /* report actual corruption sites */ 3007 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3008 } kmem_verify_t; 3009 3010 /* 3011 * verify_pattern() 3012 * verify that buf is filled with the pattern pat. 3013 */ 3014 static int64_t 3015 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3016 { 3017 /*LINTED*/ 3018 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3019 uint64_t *buf; 3020 3021 for (buf = buf_arg; buf < bufend; buf++) 3022 if (*buf != pat) 3023 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3024 return (-1); 3025 } 3026 3027 /* 3028 * verify_buftag() 3029 * verify that btp->bt_bxstat == (bcp ^ pat) 3030 */ 3031 static int 3032 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3033 { 3034 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3035 } 3036 3037 /* 3038 * verify_free() 3039 * verify the integrity of a free block of memory by checking 3040 * that it is filled with 0xdeadbeef and that its buftag is sane. 3041 */ 3042 /*ARGSUSED1*/ 3043 static int 3044 verify_free(uintptr_t addr, const void *data, void *private) 3045 { 3046 kmem_verify_t *kmv = (kmem_verify_t *)private; 3047 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3048 int64_t corrupt; /* corruption offset */ 3049 kmem_buftag_t *buftagp; /* ptr to buftag */ 3050 kmem_cache_t *cp = &kmv->kmv_cache; 3051 int besilent = kmv->kmv_besilent; 3052 3053 /*LINTED*/ 3054 buftagp = KMEM_BUFTAG(cp, buf); 3055 3056 /* 3057 * Read the buffer to check. 3058 */ 3059 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3060 if (!besilent) 3061 mdb_warn("couldn't read %p", addr); 3062 return (WALK_NEXT); 3063 } 3064 3065 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3066 KMEM_FREE_PATTERN)) >= 0) { 3067 if (!besilent) 3068 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3069 addr, (uintptr_t)addr + corrupt); 3070 goto corrupt; 3071 } 3072 /* 3073 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3074 * the first bytes of the buffer, hence we cannot check for red 3075 * zone corruption. 3076 */ 3077 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3078 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3079 if (!besilent) 3080 mdb_printf("buffer %p (free) seems to " 3081 "have a corrupt redzone pattern\n", addr); 3082 goto corrupt; 3083 } 3084 3085 /* 3086 * confirm bufctl pointer integrity. 3087 */ 3088 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3089 if (!besilent) 3090 mdb_printf("buffer %p (free) has a corrupt " 3091 "buftag\n", addr); 3092 goto corrupt; 3093 } 3094 3095 return (WALK_NEXT); 3096 corrupt: 3097 kmv->kmv_corruption++; 3098 return (WALK_NEXT); 3099 } 3100 3101 /* 3102 * verify_alloc() 3103 * Verify that the buftag of an allocated buffer makes sense with respect 3104 * to the buffer. 3105 */ 3106 /*ARGSUSED1*/ 3107 static int 3108 verify_alloc(uintptr_t addr, const void *data, void *private) 3109 { 3110 kmem_verify_t *kmv = (kmem_verify_t *)private; 3111 kmem_cache_t *cp = &kmv->kmv_cache; 3112 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3113 /*LINTED*/ 3114 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3115 uint32_t *ip = (uint32_t *)buftagp; 3116 uint8_t *bp = (uint8_t *)buf; 3117 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3118 int besilent = kmv->kmv_besilent; 3119 3120 /* 3121 * Read the buffer to check. 3122 */ 3123 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3124 if (!besilent) 3125 mdb_warn("couldn't read %p", addr); 3126 return (WALK_NEXT); 3127 } 3128 3129 /* 3130 * There are two cases to handle: 3131 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3132 * 0xfeedfacefeedface at the end of it 3133 * 2. If the buf was alloc'd using kmem_alloc, it will have 3134 * 0xbb just past the end of the region in use. At the buftag, 3135 * it will have 0xfeedface (or, if the whole buffer is in use, 3136 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3137 * endianness), followed by 32 bits containing the offset of the 3138 * 0xbb byte in the buffer. 3139 * 3140 * Finally, the two 32-bit words that comprise the second half of the 3141 * buftag should xor to KMEM_BUFTAG_ALLOC 3142 */ 3143 3144 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3145 looks_ok = 1; 3146 else if (!KMEM_SIZE_VALID(ip[1])) 3147 size_ok = 0; 3148 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3149 looks_ok = 1; 3150 else 3151 size_ok = 0; 3152 3153 if (!size_ok) { 3154 if (!besilent) 3155 mdb_printf("buffer %p (allocated) has a corrupt " 3156 "redzone size encoding\n", addr); 3157 goto corrupt; 3158 } 3159 3160 if (!looks_ok) { 3161 if (!besilent) 3162 mdb_printf("buffer %p (allocated) has a corrupt " 3163 "redzone signature\n", addr); 3164 goto corrupt; 3165 } 3166 3167 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3168 if (!besilent) 3169 mdb_printf("buffer %p (allocated) has a " 3170 "corrupt buftag\n", addr); 3171 goto corrupt; 3172 } 3173 3174 return (WALK_NEXT); 3175 corrupt: 3176 kmv->kmv_corruption++; 3177 return (WALK_NEXT); 3178 } 3179 3180 /*ARGSUSED2*/ 3181 int 3182 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3183 { 3184 if (flags & DCMD_ADDRSPEC) { 3185 int check_alloc = 0, check_free = 0; 3186 kmem_verify_t kmv; 3187 3188 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3189 addr) == -1) { 3190 mdb_warn("couldn't read kmem_cache %p", addr); 3191 return (DCMD_ERR); 3192 } 3193 3194 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3195 sizeof (kmem_buftag_t); 3196 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3197 kmv.kmv_corruption = 0; 3198 3199 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3200 check_alloc = 1; 3201 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3202 check_free = 1; 3203 } else { 3204 if (!(flags & DCMD_LOOP)) { 3205 mdb_warn("cache %p (%s) does not have " 3206 "redzone checking enabled\n", addr, 3207 kmv.kmv_cache.cache_name); 3208 } 3209 return (DCMD_ERR); 3210 } 3211 3212 if (flags & DCMD_LOOP) { 3213 /* 3214 * table mode, don't print out every corrupt buffer 3215 */ 3216 kmv.kmv_besilent = 1; 3217 } else { 3218 mdb_printf("Summary for cache '%s'\n", 3219 kmv.kmv_cache.cache_name); 3220 mdb_inc_indent(2); 3221 kmv.kmv_besilent = 0; 3222 } 3223 3224 if (check_alloc) 3225 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3226 if (check_free) 3227 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3228 3229 if (flags & DCMD_LOOP) { 3230 if (kmv.kmv_corruption == 0) { 3231 mdb_printf("%-*s %?p clean\n", 3232 KMEM_CACHE_NAMELEN, 3233 kmv.kmv_cache.cache_name, addr); 3234 } else { 3235 char *s = ""; /* optional s in "buffer[s]" */ 3236 if (kmv.kmv_corruption > 1) 3237 s = "s"; 3238 3239 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3240 KMEM_CACHE_NAMELEN, 3241 kmv.kmv_cache.cache_name, addr, 3242 kmv.kmv_corruption, s); 3243 } 3244 } else { 3245 /* 3246 * This is the more verbose mode, when the user has 3247 * type addr::kmem_verify. If the cache was clean, 3248 * nothing will have yet been printed. So say something. 3249 */ 3250 if (kmv.kmv_corruption == 0) 3251 mdb_printf("clean\n"); 3252 3253 mdb_dec_indent(2); 3254 } 3255 } else { 3256 /* 3257 * If the user didn't specify a cache to verify, we'll walk all 3258 * kmem_cache's, specifying ourself as a callback for each... 3259 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3260 */ 3261 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3262 "Cache Name", "Addr", "Cache Integrity"); 3263 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3264 } 3265 3266 return (DCMD_OK); 3267 } 3268 3269 typedef struct vmem_node { 3270 struct vmem_node *vn_next; 3271 struct vmem_node *vn_parent; 3272 struct vmem_node *vn_sibling; 3273 struct vmem_node *vn_children; 3274 uintptr_t vn_addr; 3275 int vn_marked; 3276 vmem_t vn_vmem; 3277 } vmem_node_t; 3278 3279 typedef struct vmem_walk { 3280 vmem_node_t *vw_root; 3281 vmem_node_t *vw_current; 3282 } vmem_walk_t; 3283 3284 int 3285 vmem_walk_init(mdb_walk_state_t *wsp) 3286 { 3287 uintptr_t vaddr, paddr; 3288 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3289 vmem_walk_t *vw; 3290 3291 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3292 mdb_warn("couldn't read 'vmem_list'"); 3293 return (WALK_ERR); 3294 } 3295 3296 while (vaddr != NULL) { 3297 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3298 vp->vn_addr = vaddr; 3299 vp->vn_next = head; 3300 head = vp; 3301 3302 if (vaddr == wsp->walk_addr) 3303 current = vp; 3304 3305 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3306 mdb_warn("couldn't read vmem_t at %p", vaddr); 3307 goto err; 3308 } 3309 3310 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3311 } 3312 3313 for (vp = head; vp != NULL; vp = vp->vn_next) { 3314 3315 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3316 vp->vn_sibling = root; 3317 root = vp; 3318 continue; 3319 } 3320 3321 for (parent = head; parent != NULL; parent = parent->vn_next) { 3322 if (parent->vn_addr != paddr) 3323 continue; 3324 vp->vn_sibling = parent->vn_children; 3325 parent->vn_children = vp; 3326 vp->vn_parent = parent; 3327 break; 3328 } 3329 3330 if (parent == NULL) { 3331 mdb_warn("couldn't find %p's parent (%p)\n", 3332 vp->vn_addr, paddr); 3333 goto err; 3334 } 3335 } 3336 3337 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3338 vw->vw_root = root; 3339 3340 if (current != NULL) 3341 vw->vw_current = current; 3342 else 3343 vw->vw_current = root; 3344 3345 wsp->walk_data = vw; 3346 return (WALK_NEXT); 3347 err: 3348 for (vp = head; head != NULL; vp = head) { 3349 head = vp->vn_next; 3350 mdb_free(vp, sizeof (vmem_node_t)); 3351 } 3352 3353 return (WALK_ERR); 3354 } 3355 3356 int 3357 vmem_walk_step(mdb_walk_state_t *wsp) 3358 { 3359 vmem_walk_t *vw = wsp->walk_data; 3360 vmem_node_t *vp; 3361 int rval; 3362 3363 if ((vp = vw->vw_current) == NULL) 3364 return (WALK_DONE); 3365 3366 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3367 3368 if (vp->vn_children != NULL) { 3369 vw->vw_current = vp->vn_children; 3370 return (rval); 3371 } 3372 3373 do { 3374 vw->vw_current = vp->vn_sibling; 3375 vp = vp->vn_parent; 3376 } while (vw->vw_current == NULL && vp != NULL); 3377 3378 return (rval); 3379 } 3380 3381 /* 3382 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3383 * children are visited before their parent. We perform the postfix walk 3384 * iteratively (rather than recursively) to allow mdb to regain control 3385 * after each callback. 3386 */ 3387 int 3388 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3389 { 3390 vmem_walk_t *vw = wsp->walk_data; 3391 vmem_node_t *vp = vw->vw_current; 3392 int rval; 3393 3394 /* 3395 * If this node is marked, then we know that we have already visited 3396 * all of its children. If the node has any siblings, they need to 3397 * be visited next; otherwise, we need to visit the parent. Note 3398 * that vp->vn_marked will only be zero on the first invocation of 3399 * the step function. 3400 */ 3401 if (vp->vn_marked) { 3402 if (vp->vn_sibling != NULL) 3403 vp = vp->vn_sibling; 3404 else if (vp->vn_parent != NULL) 3405 vp = vp->vn_parent; 3406 else { 3407 /* 3408 * We have neither a parent, nor a sibling, and we 3409 * have already been visited; we're done. 3410 */ 3411 return (WALK_DONE); 3412 } 3413 } 3414 3415 /* 3416 * Before we visit this node, visit its children. 3417 */ 3418 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3419 vp = vp->vn_children; 3420 3421 vp->vn_marked = 1; 3422 vw->vw_current = vp; 3423 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3424 3425 return (rval); 3426 } 3427 3428 void 3429 vmem_walk_fini(mdb_walk_state_t *wsp) 3430 { 3431 vmem_walk_t *vw = wsp->walk_data; 3432 vmem_node_t *root = vw->vw_root; 3433 int done; 3434 3435 if (root == NULL) 3436 return; 3437 3438 if ((vw->vw_root = root->vn_children) != NULL) 3439 vmem_walk_fini(wsp); 3440 3441 vw->vw_root = root->vn_sibling; 3442 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3443 mdb_free(root, sizeof (vmem_node_t)); 3444 3445 if (done) { 3446 mdb_free(vw, sizeof (vmem_walk_t)); 3447 } else { 3448 vmem_walk_fini(wsp); 3449 } 3450 } 3451 3452 typedef struct vmem_seg_walk { 3453 uint8_t vsw_type; 3454 uintptr_t vsw_start; 3455 uintptr_t vsw_current; 3456 } vmem_seg_walk_t; 3457 3458 /*ARGSUSED*/ 3459 int 3460 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3461 { 3462 vmem_seg_walk_t *vsw; 3463 3464 if (wsp->walk_addr == NULL) { 3465 mdb_warn("vmem_%s does not support global walks\n", name); 3466 return (WALK_ERR); 3467 } 3468 3469 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3470 3471 vsw->vsw_type = type; 3472 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3473 vsw->vsw_current = vsw->vsw_start; 3474 3475 return (WALK_NEXT); 3476 } 3477 3478 /* 3479 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3480 */ 3481 #define VMEM_NONE 0 3482 3483 int 3484 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3485 { 3486 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3487 } 3488 3489 int 3490 vmem_free_walk_init(mdb_walk_state_t *wsp) 3491 { 3492 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3493 } 3494 3495 int 3496 vmem_span_walk_init(mdb_walk_state_t *wsp) 3497 { 3498 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3499 } 3500 3501 int 3502 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3503 { 3504 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3505 } 3506 3507 int 3508 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3509 { 3510 vmem_seg_t seg; 3511 vmem_seg_walk_t *vsw = wsp->walk_data; 3512 uintptr_t addr = vsw->vsw_current; 3513 static size_t seg_size = 0; 3514 int rval; 3515 3516 if (!seg_size) { 3517 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3518 mdb_warn("failed to read 'vmem_seg_size'"); 3519 seg_size = sizeof (vmem_seg_t); 3520 } 3521 } 3522 3523 if (seg_size < sizeof (seg)) 3524 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3525 3526 if (mdb_vread(&seg, seg_size, addr) == -1) { 3527 mdb_warn("couldn't read vmem_seg at %p", addr); 3528 return (WALK_ERR); 3529 } 3530 3531 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3532 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3533 rval = WALK_NEXT; 3534 } else { 3535 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3536 } 3537 3538 if (vsw->vsw_current == vsw->vsw_start) 3539 return (WALK_DONE); 3540 3541 return (rval); 3542 } 3543 3544 void 3545 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3546 { 3547 vmem_seg_walk_t *vsw = wsp->walk_data; 3548 3549 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3550 } 3551 3552 #define VMEM_NAMEWIDTH 22 3553 3554 int 3555 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3556 { 3557 vmem_t v, parent; 3558 vmem_kstat_t *vkp = &v.vm_kstat; 3559 uintptr_t paddr; 3560 int ident = 0; 3561 char c[VMEM_NAMEWIDTH]; 3562 3563 if (!(flags & DCMD_ADDRSPEC)) { 3564 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3565 mdb_warn("can't walk vmem"); 3566 return (DCMD_ERR); 3567 } 3568 return (DCMD_OK); 3569 } 3570 3571 if (DCMD_HDRSPEC(flags)) 3572 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3573 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3574 "TOTAL", "SUCCEED", "FAIL"); 3575 3576 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3577 mdb_warn("couldn't read vmem at %p", addr); 3578 return (DCMD_ERR); 3579 } 3580 3581 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3582 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3583 mdb_warn("couldn't trace %p's ancestry", addr); 3584 ident = 0; 3585 break; 3586 } 3587 paddr = (uintptr_t)parent.vm_source; 3588 } 3589 3590 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3591 3592 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3593 addr, VMEM_NAMEWIDTH, c, 3594 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3595 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3596 3597 return (DCMD_OK); 3598 } 3599 3600 void 3601 vmem_seg_help(void) 3602 { 3603 mdb_printf("%s", 3604 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3605 "\n" 3606 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3607 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3608 "information.\n"); 3609 mdb_dec_indent(2); 3610 mdb_printf("%<b>OPTIONS%</b>\n"); 3611 mdb_inc_indent(2); 3612 mdb_printf("%s", 3613 " -v Display the full content of the vmem_seg, including its stack trace\n" 3614 " -s report the size of the segment, instead of the end address\n" 3615 " -c caller\n" 3616 " filter out segments without the function/PC in their stack trace\n" 3617 " -e earliest\n" 3618 " filter out segments timestamped before earliest\n" 3619 " -l latest\n" 3620 " filter out segments timestamped after latest\n" 3621 " -m minsize\n" 3622 " filer out segments smaller than minsize\n" 3623 " -M maxsize\n" 3624 " filer out segments larger than maxsize\n" 3625 " -t thread\n" 3626 " filter out segments not involving thread\n" 3627 " -T type\n" 3628 " filter out segments not of type 'type'\n" 3629 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3630 } 3631 3632 /*ARGSUSED*/ 3633 int 3634 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3635 { 3636 vmem_seg_t vs; 3637 pc_t *stk = vs.vs_stack; 3638 uintptr_t sz; 3639 uint8_t t; 3640 const char *type = NULL; 3641 GElf_Sym sym; 3642 char c[MDB_SYM_NAMLEN]; 3643 int no_debug; 3644 int i; 3645 int depth; 3646 uintptr_t laddr, haddr; 3647 3648 uintptr_t caller = NULL, thread = NULL; 3649 uintptr_t minsize = 0, maxsize = 0; 3650 3651 hrtime_t earliest = 0, latest = 0; 3652 3653 uint_t size = 0; 3654 uint_t verbose = 0; 3655 3656 if (!(flags & DCMD_ADDRSPEC)) 3657 return (DCMD_USAGE); 3658 3659 if (mdb_getopts(argc, argv, 3660 'c', MDB_OPT_UINTPTR, &caller, 3661 'e', MDB_OPT_UINT64, &earliest, 3662 'l', MDB_OPT_UINT64, &latest, 3663 's', MDB_OPT_SETBITS, TRUE, &size, 3664 'm', MDB_OPT_UINTPTR, &minsize, 3665 'M', MDB_OPT_UINTPTR, &maxsize, 3666 't', MDB_OPT_UINTPTR, &thread, 3667 'T', MDB_OPT_STR, &type, 3668 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3669 NULL) != argc) 3670 return (DCMD_USAGE); 3671 3672 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3673 if (verbose) { 3674 mdb_printf("%16s %4s %16s %16s %16s\n" 3675 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3676 "ADDR", "TYPE", "START", "END", "SIZE", 3677 "", "", "THREAD", "TIMESTAMP", ""); 3678 } else { 3679 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3680 "START", size? "SIZE" : "END", "WHO"); 3681 } 3682 } 3683 3684 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3685 mdb_warn("couldn't read vmem_seg at %p", addr); 3686 return (DCMD_ERR); 3687 } 3688 3689 if (type != NULL) { 3690 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3691 t = VMEM_ALLOC; 3692 else if (strcmp(type, "FREE") == 0) 3693 t = VMEM_FREE; 3694 else if (strcmp(type, "SPAN") == 0) 3695 t = VMEM_SPAN; 3696 else if (strcmp(type, "ROTR") == 0 || 3697 strcmp(type, "ROTOR") == 0) 3698 t = VMEM_ROTOR; 3699 else if (strcmp(type, "WLKR") == 0 || 3700 strcmp(type, "WALKER") == 0) 3701 t = VMEM_WALKER; 3702 else { 3703 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3704 type); 3705 return (DCMD_ERR); 3706 } 3707 3708 if (vs.vs_type != t) 3709 return (DCMD_OK); 3710 } 3711 3712 sz = vs.vs_end - vs.vs_start; 3713 3714 if (minsize != 0 && sz < minsize) 3715 return (DCMD_OK); 3716 3717 if (maxsize != 0 && sz > maxsize) 3718 return (DCMD_OK); 3719 3720 t = vs.vs_type; 3721 depth = vs.vs_depth; 3722 3723 /* 3724 * debug info, when present, is only accurate for VMEM_ALLOC segments 3725 */ 3726 no_debug = (t != VMEM_ALLOC) || 3727 (depth == 0 || depth > VMEM_STACK_DEPTH); 3728 3729 if (no_debug) { 3730 if (caller != NULL || thread != NULL || earliest != 0 || 3731 latest != 0) 3732 return (DCMD_OK); /* not enough info */ 3733 } else { 3734 if (caller != NULL) { 3735 laddr = caller; 3736 haddr = caller + sizeof (caller); 3737 3738 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3739 sizeof (c), &sym) != -1 && 3740 caller == (uintptr_t)sym.st_value) { 3741 /* 3742 * We were provided an exact symbol value; any 3743 * address in the function is valid. 3744 */ 3745 laddr = (uintptr_t)sym.st_value; 3746 haddr = (uintptr_t)sym.st_value + sym.st_size; 3747 } 3748 3749 for (i = 0; i < depth; i++) 3750 if (vs.vs_stack[i] >= laddr && 3751 vs.vs_stack[i] < haddr) 3752 break; 3753 3754 if (i == depth) 3755 return (DCMD_OK); 3756 } 3757 3758 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3759 return (DCMD_OK); 3760 3761 if (earliest != 0 && vs.vs_timestamp < earliest) 3762 return (DCMD_OK); 3763 3764 if (latest != 0 && vs.vs_timestamp > latest) 3765 return (DCMD_OK); 3766 } 3767 3768 type = (t == VMEM_ALLOC ? "ALLC" : 3769 t == VMEM_FREE ? "FREE" : 3770 t == VMEM_SPAN ? "SPAN" : 3771 t == VMEM_ROTOR ? "ROTR" : 3772 t == VMEM_WALKER ? "WLKR" : 3773 "????"); 3774 3775 if (flags & DCMD_PIPE_OUT) { 3776 mdb_printf("%#lr\n", addr); 3777 return (DCMD_OK); 3778 } 3779 3780 if (verbose) { 3781 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3782 addr, type, vs.vs_start, vs.vs_end, sz); 3783 3784 if (no_debug) 3785 return (DCMD_OK); 3786 3787 mdb_printf("%16s %4s %16p %16llx\n", 3788 "", "", vs.vs_thread, vs.vs_timestamp); 3789 3790 mdb_inc_indent(17); 3791 for (i = 0; i < depth; i++) { 3792 mdb_printf("%a\n", stk[i]); 3793 } 3794 mdb_dec_indent(17); 3795 mdb_printf("\n"); 3796 } else { 3797 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3798 vs.vs_start, size? sz : vs.vs_end); 3799 3800 if (no_debug) { 3801 mdb_printf("\n"); 3802 return (DCMD_OK); 3803 } 3804 3805 for (i = 0; i < depth; i++) { 3806 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3807 c, sizeof (c), &sym) == -1) 3808 continue; 3809 if (strncmp(c, "vmem_", 5) == 0) 3810 continue; 3811 break; 3812 } 3813 mdb_printf(" %a\n", stk[i]); 3814 } 3815 return (DCMD_OK); 3816 } 3817 3818 typedef struct kmalog_data { 3819 uintptr_t kma_addr; 3820 hrtime_t kma_newest; 3821 } kmalog_data_t; 3822 3823 /*ARGSUSED*/ 3824 static int 3825 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3826 { 3827 char name[KMEM_CACHE_NAMELEN + 1]; 3828 hrtime_t delta; 3829 int i, depth; 3830 size_t bufsize; 3831 3832 if (bcp->bc_timestamp == 0) 3833 return (WALK_DONE); 3834 3835 if (kma->kma_newest == 0) 3836 kma->kma_newest = bcp->bc_timestamp; 3837 3838 if (kma->kma_addr) { 3839 if (mdb_vread(&bufsize, sizeof (bufsize), 3840 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3841 mdb_warn( 3842 "failed to read cache_bufsize for cache at %p", 3843 bcp->bc_cache); 3844 return (WALK_ERR); 3845 } 3846 3847 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3848 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3849 return (WALK_NEXT); 3850 } 3851 3852 delta = kma->kma_newest - bcp->bc_timestamp; 3853 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3854 3855 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3856 &bcp->bc_cache->cache_name) <= 0) 3857 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3858 3859 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3860 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3861 3862 for (i = 0; i < depth; i++) 3863 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3864 3865 return (WALK_NEXT); 3866 } 3867 3868 int 3869 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3870 { 3871 const char *logname = "kmem_transaction_log"; 3872 kmalog_data_t kma; 3873 3874 if (argc > 1) 3875 return (DCMD_USAGE); 3876 3877 kma.kma_newest = 0; 3878 if (flags & DCMD_ADDRSPEC) 3879 kma.kma_addr = addr; 3880 else 3881 kma.kma_addr = NULL; 3882 3883 if (argc > 0) { 3884 if (argv->a_type != MDB_TYPE_STRING) 3885 return (DCMD_USAGE); 3886 if (strcmp(argv->a_un.a_str, "fail") == 0) 3887 logname = "kmem_failure_log"; 3888 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3889 logname = "kmem_slab_log"; 3890 else 3891 return (DCMD_USAGE); 3892 } 3893 3894 if (mdb_readvar(&addr, logname) == -1) { 3895 mdb_warn("failed to read %s log header pointer"); 3896 return (DCMD_ERR); 3897 } 3898 3899 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3900 mdb_warn("failed to walk kmem log"); 3901 return (DCMD_ERR); 3902 } 3903 3904 return (DCMD_OK); 3905 } 3906 3907 /* 3908 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3909 * The first piece is a structure which we use to accumulate kmem_cache_t 3910 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3911 * walker; we either add all caches, or ones named explicitly as arguments. 3912 */ 3913 3914 typedef struct kmclist { 3915 const char *kmc_name; /* Name to match (or NULL) */ 3916 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3917 int kmc_nelems; /* Num entries in kmc_caches */ 3918 int kmc_size; /* Size of kmc_caches array */ 3919 } kmclist_t; 3920 3921 static int 3922 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3923 { 3924 void *p; 3925 int s; 3926 3927 if (kmc->kmc_name == NULL || 3928 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3929 /* 3930 * If we have a match, grow our array (if necessary), and then 3931 * add the virtual address of the matching cache to our list. 3932 */ 3933 if (kmc->kmc_nelems >= kmc->kmc_size) { 3934 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3935 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3936 3937 bcopy(kmc->kmc_caches, p, 3938 sizeof (uintptr_t) * kmc->kmc_size); 3939 3940 kmc->kmc_caches = p; 3941 kmc->kmc_size = s; 3942 } 3943 3944 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3945 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3946 } 3947 3948 return (WALK_NEXT); 3949 } 3950 3951 /* 3952 * The second piece of ::kmausers is a hash table of allocations. Each 3953 * allocation owner is identified by its stack trace and data_size. We then 3954 * track the total bytes of all such allocations, and the number of allocations 3955 * to report at the end. Once we have a list of caches, we walk through the 3956 * allocated bufctls of each, and update our hash table accordingly. 3957 */ 3958 3959 typedef struct kmowner { 3960 struct kmowner *kmo_head; /* First hash elt in bucket */ 3961 struct kmowner *kmo_next; /* Next hash elt in chain */ 3962 size_t kmo_signature; /* Hash table signature */ 3963 uint_t kmo_num; /* Number of allocations */ 3964 size_t kmo_data_size; /* Size of each allocation */ 3965 size_t kmo_total_size; /* Total bytes of allocation */ 3966 int kmo_depth; /* Depth of stack trace */ 3967 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3968 } kmowner_t; 3969 3970 typedef struct kmusers { 3971 uintptr_t kmu_addr; /* address of interest */ 3972 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3973 kmowner_t *kmu_hash; /* Hash table of owners */ 3974 int kmu_nelems; /* Number of entries in use */ 3975 int kmu_size; /* Total number of entries */ 3976 } kmusers_t; 3977 3978 static void 3979 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3980 size_t size, size_t data_size) 3981 { 3982 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3983 size_t bucket, signature = data_size; 3984 kmowner_t *kmo, *kmoend; 3985 3986 /* 3987 * If the hash table is full, double its size and rehash everything. 3988 */ 3989 if (kmu->kmu_nelems >= kmu->kmu_size) { 3990 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3991 3992 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3993 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 3994 kmu->kmu_hash = kmo; 3995 kmu->kmu_size = s; 3996 3997 kmoend = kmu->kmu_hash + kmu->kmu_size; 3998 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 3999 kmo->kmo_head = NULL; 4000 4001 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 4002 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 4003 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 4004 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4005 kmu->kmu_hash[bucket].kmo_head = kmo; 4006 } 4007 } 4008 4009 /* 4010 * Finish computing the hash signature from the stack trace, and then 4011 * see if the owner is in the hash table. If so, update our stats. 4012 */ 4013 for (i = 0; i < depth; i++) 4014 signature += bcp->bc_stack[i]; 4015 4016 bucket = signature & (kmu->kmu_size - 1); 4017 4018 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4019 if (kmo->kmo_signature == signature) { 4020 size_t difference = 0; 4021 4022 difference |= kmo->kmo_data_size - data_size; 4023 difference |= kmo->kmo_depth - depth; 4024 4025 for (i = 0; i < depth; i++) { 4026 difference |= kmo->kmo_stack[i] - 4027 bcp->bc_stack[i]; 4028 } 4029 4030 if (difference == 0) { 4031 kmo->kmo_total_size += size; 4032 kmo->kmo_num++; 4033 return; 4034 } 4035 } 4036 } 4037 4038 /* 4039 * If the owner is not yet hashed, grab the next element and fill it 4040 * in based on the allocation information. 4041 */ 4042 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4043 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4044 kmu->kmu_hash[bucket].kmo_head = kmo; 4045 4046 kmo->kmo_signature = signature; 4047 kmo->kmo_num = 1; 4048 kmo->kmo_data_size = data_size; 4049 kmo->kmo_total_size = size; 4050 kmo->kmo_depth = depth; 4051 4052 for (i = 0; i < depth; i++) 4053 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4054 } 4055 4056 /* 4057 * When ::kmausers is invoked without the -f flag, we simply update our hash 4058 * table with the information from each allocated bufctl. 4059 */ 4060 /*ARGSUSED*/ 4061 static int 4062 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4063 { 4064 const kmem_cache_t *cp = kmu->kmu_cache; 4065 4066 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4067 return (WALK_NEXT); 4068 } 4069 4070 /* 4071 * When ::kmausers is invoked with the -f flag, we print out the information 4072 * for each bufctl as well as updating the hash table. 4073 */ 4074 static int 4075 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4076 { 4077 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4078 const kmem_cache_t *cp = kmu->kmu_cache; 4079 kmem_bufctl_t bufctl; 4080 4081 if (kmu->kmu_addr) { 4082 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4083 mdb_warn("couldn't read bufctl at %p", addr); 4084 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4085 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4086 cp->cache_bufsize) 4087 return (WALK_NEXT); 4088 } 4089 4090 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4091 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4092 4093 for (i = 0; i < depth; i++) 4094 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4095 4096 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4097 return (WALK_NEXT); 4098 } 4099 4100 /* 4101 * We sort our results by allocation size before printing them. 4102 */ 4103 static int 4104 kmownercmp(const void *lp, const void *rp) 4105 { 4106 const kmowner_t *lhs = lp; 4107 const kmowner_t *rhs = rp; 4108 4109 return (rhs->kmo_total_size - lhs->kmo_total_size); 4110 } 4111 4112 /* 4113 * The main engine of ::kmausers is relatively straightforward: First we 4114 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4115 * iterate over the allocated bufctls of each cache in the list. Finally, 4116 * we sort and print our results. 4117 */ 4118 /*ARGSUSED*/ 4119 int 4120 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4121 { 4122 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4123 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4124 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4125 int do_all_caches = 1; /* Do all caches (no arguments) */ 4126 int opt_e = FALSE; /* Include "small" users */ 4127 int opt_f = FALSE; /* Print stack traces */ 4128 4129 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4130 kmowner_t *kmo, *kmoend; 4131 int i, oelems; 4132 4133 kmclist_t kmc; 4134 kmusers_t kmu; 4135 4136 bzero(&kmc, sizeof (kmc)); 4137 bzero(&kmu, sizeof (kmu)); 4138 4139 while ((i = mdb_getopts(argc, argv, 4140 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4141 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4142 4143 argv += i; /* skip past options we just processed */ 4144 argc -= i; /* adjust argc */ 4145 4146 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4147 return (DCMD_USAGE); 4148 4149 oelems = kmc.kmc_nelems; 4150 kmc.kmc_name = argv->a_un.a_str; 4151 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4152 4153 if (kmc.kmc_nelems == oelems) { 4154 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4155 return (DCMD_ERR); 4156 } 4157 4158 do_all_caches = 0; 4159 argv++; 4160 argc--; 4161 } 4162 4163 if (flags & DCMD_ADDRSPEC) { 4164 opt_f = TRUE; 4165 kmu.kmu_addr = addr; 4166 } else { 4167 kmu.kmu_addr = NULL; 4168 } 4169 4170 if (opt_e) 4171 mem_threshold = cnt_threshold = 0; 4172 4173 if (opt_f) 4174 callback = (mdb_walk_cb_t)kmause2; 4175 4176 if (do_all_caches) { 4177 kmc.kmc_name = NULL; /* match all cache names */ 4178 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4179 } 4180 4181 for (i = 0; i < kmc.kmc_nelems; i++) { 4182 uintptr_t cp = kmc.kmc_caches[i]; 4183 kmem_cache_t c; 4184 4185 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4186 mdb_warn("failed to read cache at %p", cp); 4187 continue; 4188 } 4189 4190 if (!(c.cache_flags & KMF_AUDIT)) { 4191 if (!do_all_caches) { 4192 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4193 c.cache_name); 4194 } 4195 continue; 4196 } 4197 4198 kmu.kmu_cache = &c; 4199 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4200 audited_caches++; 4201 } 4202 4203 if (audited_caches == 0 && do_all_caches) { 4204 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4205 return (DCMD_ERR); 4206 } 4207 4208 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4209 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4210 4211 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4212 if (kmo->kmo_total_size < mem_threshold && 4213 kmo->kmo_num < cnt_threshold) 4214 continue; 4215 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4216 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4217 for (i = 0; i < kmo->kmo_depth; i++) 4218 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4219 } 4220 4221 return (DCMD_OK); 4222 } 4223 4224 void 4225 kmausers_help(void) 4226 { 4227 mdb_printf( 4228 "Displays the largest users of the kmem allocator, sorted by \n" 4229 "trace. If one or more caches is specified, only those caches\n" 4230 "will be searched. By default, all caches are searched. If an\n" 4231 "address is specified, then only those allocations which include\n" 4232 "the given address are displayed. Specifying an address implies\n" 4233 "-f.\n" 4234 "\n" 4235 "\t-e\tInclude all users, not just the largest\n" 4236 "\t-f\tDisplay individual allocations. By default, users are\n" 4237 "\t\tgrouped by stack\n"); 4238 } 4239 4240 static int 4241 kmem_ready_check(void) 4242 { 4243 int ready; 4244 4245 if (mdb_readvar(&ready, "kmem_ready") < 0) 4246 return (-1); /* errno is set for us */ 4247 4248 return (ready); 4249 } 4250 4251 void 4252 kmem_statechange(void) 4253 { 4254 static int been_ready = 0; 4255 4256 if (been_ready) 4257 return; 4258 4259 if (kmem_ready_check() <= 0) 4260 return; 4261 4262 been_ready = 1; 4263 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4264 } 4265 4266 void 4267 kmem_init(void) 4268 { 4269 mdb_walker_t w = { 4270 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4271 list_walk_step, list_walk_fini 4272 }; 4273 4274 /* 4275 * If kmem is ready, we'll need to invoke the kmem_cache walker 4276 * immediately. Walkers in the linkage structure won't be ready until 4277 * _mdb_init returns, so we'll need to add this one manually. If kmem 4278 * is ready, we'll use the walker to initialize the caches. If kmem 4279 * isn't ready, we'll register a callback that will allow us to defer 4280 * cache walking until it is. 4281 */ 4282 if (mdb_add_walker(&w) != 0) { 4283 mdb_warn("failed to add kmem_cache walker"); 4284 return; 4285 } 4286 4287 kmem_statechange(); 4288 } 4289 4290 typedef struct whatthread { 4291 uintptr_t wt_target; 4292 int wt_verbose; 4293 } whatthread_t; 4294 4295 static int 4296 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4297 { 4298 uintptr_t current, data; 4299 4300 if (t->t_stkbase == NULL) 4301 return (WALK_NEXT); 4302 4303 /* 4304 * Warn about swapped out threads, but drive on anyway 4305 */ 4306 if (!(t->t_schedflag & TS_LOAD)) { 4307 mdb_warn("thread %p's stack swapped out\n", addr); 4308 return (WALK_NEXT); 4309 } 4310 4311 /* 4312 * Search the thread's stack for the given pointer. Note that it would 4313 * be more efficient to follow ::kgrep's lead and read in page-sized 4314 * chunks, but this routine is already fast and simple. 4315 */ 4316 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4317 current += sizeof (uintptr_t)) { 4318 if (mdb_vread(&data, sizeof (data), current) == -1) { 4319 mdb_warn("couldn't read thread %p's stack at %p", 4320 addr, current); 4321 return (WALK_ERR); 4322 } 4323 4324 if (data == w->wt_target) { 4325 if (w->wt_verbose) { 4326 mdb_printf("%p in thread %p's stack%s\n", 4327 current, addr, stack_active(t, current)); 4328 } else { 4329 mdb_printf("%#lr\n", addr); 4330 return (WALK_NEXT); 4331 } 4332 } 4333 } 4334 4335 return (WALK_NEXT); 4336 } 4337 4338 int 4339 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4340 { 4341 whatthread_t w; 4342 4343 if (!(flags & DCMD_ADDRSPEC)) 4344 return (DCMD_USAGE); 4345 4346 w.wt_verbose = FALSE; 4347 w.wt_target = addr; 4348 4349 if (mdb_getopts(argc, argv, 4350 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4351 return (DCMD_USAGE); 4352 4353 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4354 == -1) { 4355 mdb_warn("couldn't walk threads"); 4356 return (DCMD_ERR); 4357 } 4358 4359 return (DCMD_OK); 4360 } 4361