1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ctf.h> 29 #include <mdb/mdb_whatis.h> 30 #include <sys/cpuvar.h> 31 #include <sys/kmem_impl.h> 32 #include <sys/vmem_impl.h> 33 #include <sys/machelf.h> 34 #include <sys/modctl.h> 35 #include <sys/kobj.h> 36 #include <sys/panic.h> 37 #include <sys/stack.h> 38 #include <sys/sysmacros.h> 39 #include <vm/page.h> 40 41 #include "avl.h" 42 #include "combined.h" 43 #include "dist.h" 44 #include "kmem.h" 45 #include "list.h" 46 47 #define dprintf(x) if (mdb_debug_level) { \ 48 mdb_printf("kmem debug: "); \ 49 /*CSTYLED*/\ 50 mdb_printf x ;\ 51 } 52 53 #define KM_ALLOCATED 0x01 54 #define KM_FREE 0x02 55 #define KM_BUFCTL 0x04 56 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 57 #define KM_HASH 0x10 58 59 static int mdb_debug_level = 0; 60 61 /*ARGSUSED*/ 62 static int 63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 64 { 65 mdb_walker_t w; 66 char descr[64]; 67 68 (void) mdb_snprintf(descr, sizeof (descr), 69 "walk the %s cache", c->cache_name); 70 71 w.walk_name = c->cache_name; 72 w.walk_descr = descr; 73 w.walk_init = kmem_walk_init; 74 w.walk_step = kmem_walk_step; 75 w.walk_fini = kmem_walk_fini; 76 w.walk_init_arg = (void *)addr; 77 78 if (mdb_add_walker(&w) == -1) 79 mdb_warn("failed to add %s walker", c->cache_name); 80 81 return (WALK_NEXT); 82 } 83 84 /*ARGSUSED*/ 85 int 86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 87 { 88 mdb_debug_level ^= 1; 89 90 mdb_printf("kmem: debugging is now %s\n", 91 mdb_debug_level ? "on" : "off"); 92 93 return (DCMD_OK); 94 } 95 96 int 97 kmem_cache_walk_init(mdb_walk_state_t *wsp) 98 { 99 GElf_Sym sym; 100 101 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 102 mdb_warn("couldn't find kmem_caches"); 103 return (WALK_ERR); 104 } 105 106 wsp->walk_addr = (uintptr_t)sym.st_value; 107 108 return (list_walk_init_named(wsp, "cache list", "cache")); 109 } 110 111 int 112 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 113 { 114 if (wsp->walk_addr == NULL) { 115 mdb_warn("kmem_cpu_cache doesn't support global walks"); 116 return (WALK_ERR); 117 } 118 119 if (mdb_layered_walk("cpu", wsp) == -1) { 120 mdb_warn("couldn't walk 'cpu'"); 121 return (WALK_ERR); 122 } 123 124 wsp->walk_data = (void *)wsp->walk_addr; 125 126 return (WALK_NEXT); 127 } 128 129 int 130 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 131 { 132 uintptr_t caddr = (uintptr_t)wsp->walk_data; 133 const cpu_t *cpu = wsp->walk_layer; 134 kmem_cpu_cache_t cc; 135 136 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]); 137 138 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 139 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 140 return (WALK_ERR); 141 } 142 143 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 144 } 145 146 static int 147 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 148 { 149 kmem_slab_t *sp = p; 150 uintptr_t caddr = (uintptr_t)arg; 151 if ((uintptr_t)sp->slab_cache != caddr) { 152 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 153 saddr, caddr, sp->slab_cache); 154 return (-1); 155 } 156 157 return (0); 158 } 159 160 static int 161 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 162 { 163 kmem_slab_t *sp = p; 164 165 int rc = kmem_slab_check(p, saddr, arg); 166 if (rc != 0) { 167 return (rc); 168 } 169 170 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 171 mdb_warn("slab %p is not a partial slab\n", saddr); 172 return (-1); 173 } 174 175 return (0); 176 } 177 178 static int 179 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 180 { 181 kmem_slab_t *sp = p; 182 183 int rc = kmem_slab_check(p, saddr, arg); 184 if (rc != 0) { 185 return (rc); 186 } 187 188 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 189 mdb_warn("slab %p is not completely allocated\n", saddr); 190 return (-1); 191 } 192 193 return (0); 194 } 195 196 typedef struct { 197 uintptr_t kns_cache_addr; 198 int kns_nslabs; 199 } kmem_nth_slab_t; 200 201 static int 202 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 203 { 204 kmem_nth_slab_t *chkp = arg; 205 206 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 207 if (rc != 0) { 208 return (rc); 209 } 210 211 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 212 } 213 214 static int 215 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 216 { 217 uintptr_t caddr = wsp->walk_addr; 218 219 wsp->walk_addr = (uintptr_t)(caddr + 220 offsetof(kmem_cache_t, cache_complete_slabs)); 221 222 return (list_walk_init_checked(wsp, "slab list", "slab", 223 kmem_complete_slab_check, (void *)caddr)); 224 } 225 226 static int 227 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 228 { 229 uintptr_t caddr = wsp->walk_addr; 230 231 wsp->walk_addr = (uintptr_t)(caddr + 232 offsetof(kmem_cache_t, cache_partial_slabs)); 233 234 return (avl_walk_init_checked(wsp, "slab list", "slab", 235 kmem_partial_slab_check, (void *)caddr)); 236 } 237 238 int 239 kmem_slab_walk_init(mdb_walk_state_t *wsp) 240 { 241 uintptr_t caddr = wsp->walk_addr; 242 243 if (caddr == NULL) { 244 mdb_warn("kmem_slab doesn't support global walks\n"); 245 return (WALK_ERR); 246 } 247 248 combined_walk_init(wsp); 249 combined_walk_add(wsp, 250 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 251 combined_walk_add(wsp, 252 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 253 254 return (WALK_NEXT); 255 } 256 257 static int 258 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 259 { 260 uintptr_t caddr = wsp->walk_addr; 261 kmem_nth_slab_t *chk; 262 263 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 264 UM_SLEEP | UM_GC); 265 chk->kns_cache_addr = caddr; 266 chk->kns_nslabs = 1; 267 wsp->walk_addr = (uintptr_t)(caddr + 268 offsetof(kmem_cache_t, cache_complete_slabs)); 269 270 return (list_walk_init_checked(wsp, "slab list", "slab", 271 kmem_nth_slab_check, chk)); 272 } 273 274 int 275 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 276 { 277 uintptr_t caddr = wsp->walk_addr; 278 kmem_cache_t c; 279 280 if (caddr == NULL) { 281 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 282 return (WALK_ERR); 283 } 284 285 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 286 mdb_warn("couldn't read kmem_cache at %p", caddr); 287 return (WALK_ERR); 288 } 289 290 combined_walk_init(wsp); 291 292 /* 293 * Some consumers (umem_walk_step(), in particular) require at 294 * least one callback if there are any buffers in the cache. So 295 * if there are *no* partial slabs, report the first full slab, if 296 * any. 297 * 298 * Yes, this is ugly, but it's cleaner than the other possibilities. 299 */ 300 if (c.cache_partial_slabs.avl_numnodes == 0) { 301 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 302 list_walk_step, list_walk_fini); 303 } else { 304 combined_walk_add(wsp, kmem_partial_slab_walk_init, 305 avl_walk_step, avl_walk_fini); 306 } 307 308 return (WALK_NEXT); 309 } 310 311 int 312 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 313 { 314 kmem_cache_t c; 315 const char *filter = NULL; 316 317 if (mdb_getopts(ac, argv, 318 'n', MDB_OPT_STR, &filter, 319 NULL) != ac) { 320 return (DCMD_USAGE); 321 } 322 323 if (!(flags & DCMD_ADDRSPEC)) { 324 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 325 mdb_warn("can't walk kmem_cache"); 326 return (DCMD_ERR); 327 } 328 return (DCMD_OK); 329 } 330 331 if (DCMD_HDRSPEC(flags)) 332 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 333 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 334 335 if (mdb_vread(&c, sizeof (c), addr) == -1) { 336 mdb_warn("couldn't read kmem_cache at %p", addr); 337 return (DCMD_ERR); 338 } 339 340 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 341 return (DCMD_OK); 342 343 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 344 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 345 346 return (DCMD_OK); 347 } 348 349 void 350 kmem_cache_help(void) 351 { 352 mdb_printf("%s", "Print kernel memory caches.\n\n"); 353 mdb_dec_indent(2); 354 mdb_printf("%<b>OPTIONS%</b>\n"); 355 mdb_inc_indent(2); 356 mdb_printf("%s", 357 " -n name\n" 358 " name of kmem cache (or matching partial name)\n" 359 "\n" 360 "Column\tDescription\n" 361 "\n" 362 "ADDR\t\taddress of kmem cache\n" 363 "NAME\t\tname of kmem cache\n" 364 "FLAG\t\tvarious cache state flags\n" 365 "CFLAG\t\tcache creation flags\n" 366 "BUFSIZE\tobject size in bytes\n" 367 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 368 } 369 370 #define LABEL_WIDTH 11 371 static void 372 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 373 size_t maxbuckets, size_t minbucketsize) 374 { 375 uint64_t total; 376 int buckets; 377 int i; 378 const int *distarray; 379 int complete[2]; 380 381 buckets = buffers_per_slab; 382 383 total = 0; 384 for (i = 0; i <= buffers_per_slab; i++) 385 total += ks_bucket[i]; 386 387 if (maxbuckets > 1) 388 buckets = MIN(buckets, maxbuckets); 389 390 if (minbucketsize > 1) { 391 /* 392 * minbucketsize does not apply to the first bucket reserved 393 * for completely allocated slabs 394 */ 395 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 396 minbucketsize)); 397 if ((buckets < 2) && (buffers_per_slab > 1)) { 398 buckets = 2; 399 minbucketsize = (buffers_per_slab - 1); 400 } 401 } 402 403 /* 404 * The first printed bucket is reserved for completely allocated slabs. 405 * Passing (buckets - 1) excludes that bucket from the generated 406 * distribution, since we're handling it as a special case. 407 */ 408 complete[0] = buffers_per_slab; 409 complete[1] = buffers_per_slab + 1; 410 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 411 412 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 413 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 414 415 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 416 /* 417 * Print bucket ranges in descending order after the first bucket for 418 * completely allocated slabs, so a person can see immediately whether 419 * or not there is fragmentation without having to scan possibly 420 * multiple screens of output. Starting at (buckets - 2) excludes the 421 * extra terminating bucket. 422 */ 423 for (i = buckets - 2; i >= 0; i--) { 424 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 425 } 426 mdb_printf("\n"); 427 } 428 #undef LABEL_WIDTH 429 430 /*ARGSUSED*/ 431 static int 432 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 433 { 434 *is_slab = B_TRUE; 435 return (WALK_DONE); 436 } 437 438 /*ARGSUSED*/ 439 static int 440 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 441 boolean_t *is_slab) 442 { 443 /* 444 * The "kmem_partial_slab" walker reports the first full slab if there 445 * are no partial slabs (for the sake of consumers that require at least 446 * one callback if there are any buffers in the cache). 447 */ 448 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 449 return (WALK_DONE); 450 } 451 452 typedef struct kmem_slab_usage { 453 int ksu_refcnt; /* count of allocated buffers on slab */ 454 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 455 } kmem_slab_usage_t; 456 457 typedef struct kmem_slab_stats { 458 const kmem_cache_t *ks_cp; 459 int ks_slabs; /* slabs in cache */ 460 int ks_partial_slabs; /* partially allocated slabs in cache */ 461 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 462 int ks_max_buffers_per_slab; /* max buffers per slab */ 463 int ks_usage_len; /* ks_usage array length */ 464 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 465 uint_t *ks_bucket; /* slab usage distribution */ 466 } kmem_slab_stats_t; 467 468 /*ARGSUSED*/ 469 static int 470 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 471 kmem_slab_stats_t *ks) 472 { 473 kmem_slab_usage_t *ksu; 474 long unused; 475 476 ks->ks_slabs++; 477 ks->ks_bucket[sp->slab_refcnt]++; 478 479 unused = (sp->slab_chunks - sp->slab_refcnt); 480 if (unused == 0) { 481 return (WALK_NEXT); 482 } 483 484 ks->ks_partial_slabs++; 485 ks->ks_unused_buffers += unused; 486 487 if (ks->ks_partial_slabs > ks->ks_usage_len) { 488 kmem_slab_usage_t *usage; 489 int len = ks->ks_usage_len; 490 491 len = (len == 0 ? 16 : len * 2); 492 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 493 if (ks->ks_usage != NULL) { 494 bcopy(ks->ks_usage, usage, 495 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 496 mdb_free(ks->ks_usage, 497 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 498 } 499 ks->ks_usage = usage; 500 ks->ks_usage_len = len; 501 } 502 503 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 504 ksu->ksu_refcnt = sp->slab_refcnt; 505 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 506 return (WALK_NEXT); 507 } 508 509 static void 510 kmem_slabs_header() 511 { 512 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 513 "", "", "Partial", "", "Unused", ""); 514 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 515 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 517 "-------------------------", "--------", "--------", "---------", 518 "---------", "------"); 519 } 520 521 int 522 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 523 { 524 kmem_cache_t c; 525 kmem_slab_stats_t stats; 526 mdb_walk_cb_t cb; 527 int pct; 528 int tenths_pct; 529 size_t maxbuckets = 1; 530 size_t minbucketsize = 0; 531 const char *filter = NULL; 532 const char *name = NULL; 533 uint_t opt_v = FALSE; 534 boolean_t buckets = B_FALSE; 535 boolean_t skip = B_FALSE; 536 537 if (mdb_getopts(argc, argv, 538 'B', MDB_OPT_UINTPTR, &minbucketsize, 539 'b', MDB_OPT_UINTPTR, &maxbuckets, 540 'n', MDB_OPT_STR, &filter, 541 'N', MDB_OPT_STR, &name, 542 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 543 NULL) != argc) { 544 return (DCMD_USAGE); 545 } 546 547 if ((maxbuckets != 1) || (minbucketsize != 0)) { 548 buckets = B_TRUE; 549 } 550 551 if (!(flags & DCMD_ADDRSPEC)) { 552 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 553 argv) == -1) { 554 mdb_warn("can't walk kmem_cache"); 555 return (DCMD_ERR); 556 } 557 return (DCMD_OK); 558 } 559 560 if (mdb_vread(&c, sizeof (c), addr) == -1) { 561 mdb_warn("couldn't read kmem_cache at %p", addr); 562 return (DCMD_ERR); 563 } 564 565 if (name == NULL) { 566 skip = ((filter != NULL) && 567 (strstr(c.cache_name, filter) == NULL)); 568 } else if (filter == NULL) { 569 skip = (strcmp(c.cache_name, name) != 0); 570 } else { 571 /* match either -n or -N */ 572 skip = ((strcmp(c.cache_name, name) != 0) && 573 (strstr(c.cache_name, filter) == NULL)); 574 } 575 576 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 577 kmem_slabs_header(); 578 } else if ((opt_v || buckets) && !skip) { 579 if (DCMD_HDRSPEC(flags)) { 580 kmem_slabs_header(); 581 } else { 582 boolean_t is_slab = B_FALSE; 583 const char *walker_name; 584 if (opt_v) { 585 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 586 walker_name = "kmem_slab_partial"; 587 } else { 588 cb = (mdb_walk_cb_t)kmem_first_slab; 589 walker_name = "kmem_slab"; 590 } 591 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 592 if (is_slab) { 593 kmem_slabs_header(); 594 } 595 } 596 } 597 598 if (skip) { 599 return (DCMD_OK); 600 } 601 602 bzero(&stats, sizeof (kmem_slab_stats_t)); 603 stats.ks_cp = &c; 604 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 605 /* +1 to include a zero bucket */ 606 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 607 sizeof (*stats.ks_bucket), UM_SLEEP); 608 cb = (mdb_walk_cb_t)kmem_slablist_stat; 609 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 610 611 if (c.cache_buftotal == 0) { 612 pct = 0; 613 tenths_pct = 0; 614 } else { 615 uint64_t n = stats.ks_unused_buffers * 10000; 616 pct = (int)(n / c.cache_buftotal); 617 tenths_pct = pct - ((pct / 100) * 100); 618 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 619 if (tenths_pct == 10) { 620 pct += 100; 621 tenths_pct = 0; 622 } 623 } 624 625 pct /= 100; 626 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 627 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 628 stats.ks_unused_buffers, pct, tenths_pct); 629 630 if (maxbuckets == 0) { 631 maxbuckets = stats.ks_max_buffers_per_slab; 632 } 633 634 if (((maxbuckets > 1) || (minbucketsize > 0)) && 635 (stats.ks_slabs > 0)) { 636 mdb_printf("\n"); 637 kmem_slabs_print_dist(stats.ks_bucket, 638 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 639 } 640 641 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 642 sizeof (*stats.ks_bucket)); 643 644 if (!opt_v) { 645 return (DCMD_OK); 646 } 647 648 if (opt_v && (stats.ks_partial_slabs > 0)) { 649 int i; 650 kmem_slab_usage_t *ksu; 651 652 mdb_printf(" %d complete (%d), %d partial:", 653 (stats.ks_slabs - stats.ks_partial_slabs), 654 stats.ks_max_buffers_per_slab, 655 stats.ks_partial_slabs); 656 657 for (i = 0; i < stats.ks_partial_slabs; i++) { 658 ksu = &stats.ks_usage[i]; 659 mdb_printf(" %d%s", ksu->ksu_refcnt, 660 (ksu->ksu_nomove ? "*" : "")); 661 } 662 mdb_printf("\n\n"); 663 } 664 665 if (stats.ks_usage_len > 0) { 666 mdb_free(stats.ks_usage, 667 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 668 } 669 670 return (DCMD_OK); 671 } 672 673 void 674 kmem_slabs_help(void) 675 { 676 mdb_printf("%s", 677 "Display slab usage per kmem cache.\n\n"); 678 mdb_dec_indent(2); 679 mdb_printf("%<b>OPTIONS%</b>\n"); 680 mdb_inc_indent(2); 681 mdb_printf("%s", 682 " -n name\n" 683 " name of kmem cache (or matching partial name)\n" 684 " -N name\n" 685 " exact name of kmem cache\n" 686 " -b maxbins\n" 687 " Print a distribution of allocated buffers per slab using at\n" 688 " most maxbins bins. The first bin is reserved for completely\n" 689 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 690 " effect as specifying the maximum allocated buffers per slab\n" 691 " or setting minbinsize to 1 (-B 1).\n" 692 " -B minbinsize\n" 693 " Print a distribution of allocated buffers per slab, making\n" 694 " all bins (except the first, reserved for completely allocated\n" 695 " slabs) at least minbinsize buffers apart.\n" 696 " -v verbose output: List the allocated buffer count of each partial\n" 697 " slab on the free list in order from front to back to show how\n" 698 " closely the slabs are ordered by usage. For example\n" 699 "\n" 700 " 10 complete, 3 partial (8): 7 3 1\n" 701 "\n" 702 " means there are thirteen slabs with eight buffers each, including\n" 703 " three partially allocated slabs with less than all eight buffers\n" 704 " allocated.\n" 705 "\n" 706 " Buffer allocations are always from the front of the partial slab\n" 707 " list. When a buffer is freed from a completely used slab, that\n" 708 " slab is added to the front of the partial slab list. Assuming\n" 709 " that all buffers are equally likely to be freed soon, the\n" 710 " desired order of partial slabs is most-used at the front of the\n" 711 " list and least-used at the back (as in the example above).\n" 712 " However, if a slab contains an allocated buffer that will not\n" 713 " soon be freed, it would be better for that slab to be at the\n" 714 " front where all of its buffers can be allocated. Taking a slab\n" 715 " off the partial slab list (either with all buffers freed or all\n" 716 " buffers allocated) reduces cache fragmentation.\n" 717 "\n" 718 " A slab's allocated buffer count representing a partial slab (9 in\n" 719 " the example below) may be marked as follows:\n" 720 "\n" 721 " 9* An asterisk indicates that kmem has marked the slab non-\n" 722 " reclaimable because the kmem client refused to move one of the\n" 723 " slab's buffers. Since kmem does not expect to completely free the\n" 724 " slab, it moves it to the front of the list in the hope of\n" 725 " completely allocating it instead. A slab marked with an asterisk\n" 726 " stays marked for as long as it remains on the partial slab list.\n" 727 "\n" 728 "Column\t\tDescription\n" 729 "\n" 730 "Cache Name\t\tname of kmem cache\n" 731 "Slabs\t\t\ttotal slab count\n" 732 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 733 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 734 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 735 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 736 "\t\t\t for accounting structures (debug mode), slab\n" 737 "\t\t\t coloring (incremental small offsets to stagger\n" 738 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 739 } 740 741 static int 742 addrcmp(const void *lhs, const void *rhs) 743 { 744 uintptr_t p1 = *((uintptr_t *)lhs); 745 uintptr_t p2 = *((uintptr_t *)rhs); 746 747 if (p1 < p2) 748 return (-1); 749 if (p1 > p2) 750 return (1); 751 return (0); 752 } 753 754 static int 755 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 756 { 757 const kmem_bufctl_audit_t *bcp1 = *lhs; 758 const kmem_bufctl_audit_t *bcp2 = *rhs; 759 760 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 761 return (-1); 762 763 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 764 return (1); 765 766 return (0); 767 } 768 769 typedef struct kmem_hash_walk { 770 uintptr_t *kmhw_table; 771 size_t kmhw_nelems; 772 size_t kmhw_pos; 773 kmem_bufctl_t kmhw_cur; 774 } kmem_hash_walk_t; 775 776 int 777 kmem_hash_walk_init(mdb_walk_state_t *wsp) 778 { 779 kmem_hash_walk_t *kmhw; 780 uintptr_t *hash; 781 kmem_cache_t c; 782 uintptr_t haddr, addr = wsp->walk_addr; 783 size_t nelems; 784 size_t hsize; 785 786 if (addr == NULL) { 787 mdb_warn("kmem_hash doesn't support global walks\n"); 788 return (WALK_ERR); 789 } 790 791 if (mdb_vread(&c, sizeof (c), addr) == -1) { 792 mdb_warn("couldn't read cache at addr %p", addr); 793 return (WALK_ERR); 794 } 795 796 if (!(c.cache_flags & KMF_HASH)) { 797 mdb_warn("cache %p doesn't have a hash table\n", addr); 798 return (WALK_DONE); /* nothing to do */ 799 } 800 801 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 802 kmhw->kmhw_cur.bc_next = NULL; 803 kmhw->kmhw_pos = 0; 804 805 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 806 hsize = nelems * sizeof (uintptr_t); 807 haddr = (uintptr_t)c.cache_hash_table; 808 809 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 810 if (mdb_vread(hash, hsize, haddr) == -1) { 811 mdb_warn("failed to read hash table at %p", haddr); 812 mdb_free(hash, hsize); 813 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 814 return (WALK_ERR); 815 } 816 817 wsp->walk_data = kmhw; 818 819 return (WALK_NEXT); 820 } 821 822 int 823 kmem_hash_walk_step(mdb_walk_state_t *wsp) 824 { 825 kmem_hash_walk_t *kmhw = wsp->walk_data; 826 uintptr_t addr = NULL; 827 828 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 829 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 830 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 831 break; 832 } 833 } 834 if (addr == NULL) 835 return (WALK_DONE); 836 837 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 838 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 839 return (WALK_ERR); 840 } 841 842 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 843 } 844 845 void 846 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 847 { 848 kmem_hash_walk_t *kmhw = wsp->walk_data; 849 850 if (kmhw == NULL) 851 return; 852 853 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 854 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 855 } 856 857 /* 858 * Find the address of the bufctl structure for the address 'buf' in cache 859 * 'cp', which is at address caddr, and place it in *out. 860 */ 861 static int 862 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 863 { 864 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 865 kmem_bufctl_t *bcp; 866 kmem_bufctl_t bc; 867 868 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 869 mdb_warn("unable to read hash bucket for %p in cache %p", 870 buf, caddr); 871 return (-1); 872 } 873 874 while (bcp != NULL) { 875 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 876 (uintptr_t)bcp) == -1) { 877 mdb_warn("unable to read bufctl at %p", bcp); 878 return (-1); 879 } 880 if (bc.bc_addr == buf) { 881 *out = (uintptr_t)bcp; 882 return (0); 883 } 884 bcp = bc.bc_next; 885 } 886 887 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 888 return (-1); 889 } 890 891 int 892 kmem_get_magsize(const kmem_cache_t *cp) 893 { 894 uintptr_t addr = (uintptr_t)cp->cache_magtype; 895 GElf_Sym mt_sym; 896 kmem_magtype_t mt; 897 int res; 898 899 /* 900 * if cpu 0 has a non-zero magsize, it must be correct. caches 901 * with KMF_NOMAGAZINE have disabled their magazine layers, so 902 * it is okay to return 0 for them. 903 */ 904 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 905 (cp->cache_flags & KMF_NOMAGAZINE)) 906 return (res); 907 908 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 909 mdb_warn("unable to read 'kmem_magtype'"); 910 } else if (addr < mt_sym.st_value || 911 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 912 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 913 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 914 cp->cache_name, addr); 915 return (0); 916 } 917 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 918 mdb_warn("unable to read magtype at %a", addr); 919 return (0); 920 } 921 return (mt.mt_magsize); 922 } 923 924 /*ARGSUSED*/ 925 static int 926 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 927 { 928 *est -= (sp->slab_chunks - sp->slab_refcnt); 929 930 return (WALK_NEXT); 931 } 932 933 /* 934 * Returns an upper bound on the number of allocated buffers in a given 935 * cache. 936 */ 937 size_t 938 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 939 { 940 int magsize; 941 size_t cache_est; 942 943 cache_est = cp->cache_buftotal; 944 945 (void) mdb_pwalk("kmem_slab_partial", 946 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 947 948 if ((magsize = kmem_get_magsize(cp)) != 0) { 949 size_t mag_est = cp->cache_full.ml_total * magsize; 950 951 if (cache_est >= mag_est) { 952 cache_est -= mag_est; 953 } else { 954 mdb_warn("cache %p's magazine layer holds more buffers " 955 "than the slab layer.\n", addr); 956 } 957 } 958 return (cache_est); 959 } 960 961 #define READMAG_ROUNDS(rounds) { \ 962 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 963 mdb_warn("couldn't read magazine at %p", kmp); \ 964 goto fail; \ 965 } \ 966 for (i = 0; i < rounds; i++) { \ 967 maglist[magcnt++] = mp->mag_round[i]; \ 968 if (magcnt == magmax) { \ 969 mdb_warn("%d magazines exceeds fudge factor\n", \ 970 magcnt); \ 971 goto fail; \ 972 } \ 973 } \ 974 } 975 976 int 977 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 978 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 979 { 980 kmem_magazine_t *kmp, *mp; 981 void **maglist = NULL; 982 int i, cpu; 983 size_t magsize, magmax, magbsize; 984 size_t magcnt = 0; 985 986 /* 987 * Read the magtype out of the cache, after verifying the pointer's 988 * correctness. 989 */ 990 magsize = kmem_get_magsize(cp); 991 if (magsize == 0) { 992 *maglistp = NULL; 993 *magcntp = 0; 994 *magmaxp = 0; 995 return (WALK_NEXT); 996 } 997 998 /* 999 * There are several places where we need to go buffer hunting: 1000 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1001 * and the full magazine list in the depot. 1002 * 1003 * For an upper bound on the number of buffers in the magazine 1004 * layer, we have the number of magazines on the cache_full 1005 * list plus at most two magazines per CPU (the loaded and the 1006 * spare). Toss in 100 magazines as a fudge factor in case this 1007 * is live (the number "100" comes from the same fudge factor in 1008 * crash(1M)). 1009 */ 1010 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1011 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1012 1013 if (magbsize >= PAGESIZE / 2) { 1014 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1015 addr, magbsize); 1016 return (WALK_ERR); 1017 } 1018 1019 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1020 mp = mdb_alloc(magbsize, alloc_flags); 1021 if (mp == NULL || maglist == NULL) 1022 goto fail; 1023 1024 /* 1025 * First up: the magazines in the depot (i.e. on the cache_full list). 1026 */ 1027 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1028 READMAG_ROUNDS(magsize); 1029 kmp = mp->mag_next; 1030 1031 if (kmp == cp->cache_full.ml_list) 1032 break; /* cache_full list loop detected */ 1033 } 1034 1035 dprintf(("cache_full list done\n")); 1036 1037 /* 1038 * Now whip through the CPUs, snagging the loaded magazines 1039 * and full spares. 1040 */ 1041 for (cpu = 0; cpu < ncpus; cpu++) { 1042 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1043 1044 dprintf(("reading cpu cache %p\n", 1045 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1046 1047 if (ccp->cc_rounds > 0 && 1048 (kmp = ccp->cc_loaded) != NULL) { 1049 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 1050 READMAG_ROUNDS(ccp->cc_rounds); 1051 } 1052 1053 if (ccp->cc_prounds > 0 && 1054 (kmp = ccp->cc_ploaded) != NULL) { 1055 dprintf(("reading %d previously loaded rounds\n", 1056 ccp->cc_prounds)); 1057 READMAG_ROUNDS(ccp->cc_prounds); 1058 } 1059 } 1060 1061 dprintf(("magazine layer: %d buffers\n", magcnt)); 1062 1063 if (!(alloc_flags & UM_GC)) 1064 mdb_free(mp, magbsize); 1065 1066 *maglistp = maglist; 1067 *magcntp = magcnt; 1068 *magmaxp = magmax; 1069 1070 return (WALK_NEXT); 1071 1072 fail: 1073 if (!(alloc_flags & UM_GC)) { 1074 if (mp) 1075 mdb_free(mp, magbsize); 1076 if (maglist) 1077 mdb_free(maglist, magmax * sizeof (void *)); 1078 } 1079 return (WALK_ERR); 1080 } 1081 1082 static int 1083 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1084 { 1085 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1086 } 1087 1088 static int 1089 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1090 { 1091 kmem_bufctl_audit_t b; 1092 1093 /* 1094 * if KMF_AUDIT is not set, we know that we're looking at a 1095 * kmem_bufctl_t. 1096 */ 1097 if (!(cp->cache_flags & KMF_AUDIT) || 1098 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1099 (void) memset(&b, 0, sizeof (b)); 1100 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1101 mdb_warn("unable to read bufctl at %p", buf); 1102 return (WALK_ERR); 1103 } 1104 } 1105 1106 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1107 } 1108 1109 typedef struct kmem_walk { 1110 int kmw_type; 1111 1112 int kmw_addr; /* cache address */ 1113 kmem_cache_t *kmw_cp; 1114 size_t kmw_csize; 1115 1116 /* 1117 * magazine layer 1118 */ 1119 void **kmw_maglist; 1120 size_t kmw_max; 1121 size_t kmw_count; 1122 size_t kmw_pos; 1123 1124 /* 1125 * slab layer 1126 */ 1127 char *kmw_valid; /* to keep track of freed buffers */ 1128 char *kmw_ubase; /* buffer for slab data */ 1129 } kmem_walk_t; 1130 1131 static int 1132 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1133 { 1134 kmem_walk_t *kmw; 1135 int ncpus, csize; 1136 kmem_cache_t *cp; 1137 size_t vm_quantum; 1138 1139 size_t magmax, magcnt; 1140 void **maglist = NULL; 1141 uint_t chunksize, slabsize; 1142 int status = WALK_ERR; 1143 uintptr_t addr = wsp->walk_addr; 1144 const char *layered; 1145 1146 type &= ~KM_HASH; 1147 1148 if (addr == NULL) { 1149 mdb_warn("kmem walk doesn't support global walks\n"); 1150 return (WALK_ERR); 1151 } 1152 1153 dprintf(("walking %p\n", addr)); 1154 1155 /* 1156 * First we need to figure out how many CPUs are configured in the 1157 * system to know how much to slurp out. 1158 */ 1159 mdb_readvar(&ncpus, "max_ncpus"); 1160 1161 csize = KMEM_CACHE_SIZE(ncpus); 1162 cp = mdb_alloc(csize, UM_SLEEP); 1163 1164 if (mdb_vread(cp, csize, addr) == -1) { 1165 mdb_warn("couldn't read cache at addr %p", addr); 1166 goto out2; 1167 } 1168 1169 /* 1170 * It's easy for someone to hand us an invalid cache address. 1171 * Unfortunately, it is hard for this walker to survive an 1172 * invalid cache cleanly. So we make sure that: 1173 * 1174 * 1. the vmem arena for the cache is readable, 1175 * 2. the vmem arena's quantum is a power of 2, 1176 * 3. our slabsize is a multiple of the quantum, and 1177 * 4. our chunksize is >0 and less than our slabsize. 1178 */ 1179 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1180 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1181 vm_quantum == 0 || 1182 (vm_quantum & (vm_quantum - 1)) != 0 || 1183 cp->cache_slabsize < vm_quantum || 1184 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1185 cp->cache_chunksize == 0 || 1186 cp->cache_chunksize > cp->cache_slabsize) { 1187 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1188 goto out2; 1189 } 1190 1191 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1192 1193 if (cp->cache_buftotal == 0) { 1194 mdb_free(cp, csize); 1195 return (WALK_DONE); 1196 } 1197 1198 /* 1199 * If they ask for bufctls, but it's a small-slab cache, 1200 * there is nothing to report. 1201 */ 1202 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1203 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1204 cp->cache_flags)); 1205 mdb_free(cp, csize); 1206 return (WALK_DONE); 1207 } 1208 1209 /* 1210 * If they want constructed buffers, but there's no constructor or 1211 * the cache has DEADBEEF checking enabled, there is nothing to report. 1212 */ 1213 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1214 cp->cache_constructor == NULL || 1215 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1216 mdb_free(cp, csize); 1217 return (WALK_DONE); 1218 } 1219 1220 /* 1221 * Read in the contents of the magazine layer 1222 */ 1223 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1224 &magmax, UM_SLEEP) == WALK_ERR) 1225 goto out2; 1226 1227 /* 1228 * We have all of the buffers from the magazines; if we are walking 1229 * allocated buffers, sort them so we can bsearch them later. 1230 */ 1231 if (type & KM_ALLOCATED) 1232 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1233 1234 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1235 1236 kmw->kmw_type = type; 1237 kmw->kmw_addr = addr; 1238 kmw->kmw_cp = cp; 1239 kmw->kmw_csize = csize; 1240 kmw->kmw_maglist = maglist; 1241 kmw->kmw_max = magmax; 1242 kmw->kmw_count = magcnt; 1243 kmw->kmw_pos = 0; 1244 1245 /* 1246 * When walking allocated buffers in a KMF_HASH cache, we walk the 1247 * hash table instead of the slab layer. 1248 */ 1249 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1250 layered = "kmem_hash"; 1251 1252 kmw->kmw_type |= KM_HASH; 1253 } else { 1254 /* 1255 * If we are walking freed buffers, we only need the 1256 * magazine layer plus the partially allocated slabs. 1257 * To walk allocated buffers, we need all of the slabs. 1258 */ 1259 if (type & KM_ALLOCATED) 1260 layered = "kmem_slab"; 1261 else 1262 layered = "kmem_slab_partial"; 1263 1264 /* 1265 * for small-slab caches, we read in the entire slab. For 1266 * freed buffers, we can just walk the freelist. For 1267 * allocated buffers, we use a 'valid' array to track 1268 * the freed buffers. 1269 */ 1270 if (!(cp->cache_flags & KMF_HASH)) { 1271 chunksize = cp->cache_chunksize; 1272 slabsize = cp->cache_slabsize; 1273 1274 kmw->kmw_ubase = mdb_alloc(slabsize + 1275 sizeof (kmem_bufctl_t), UM_SLEEP); 1276 1277 if (type & KM_ALLOCATED) 1278 kmw->kmw_valid = 1279 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1280 } 1281 } 1282 1283 status = WALK_NEXT; 1284 1285 if (mdb_layered_walk(layered, wsp) == -1) { 1286 mdb_warn("unable to start layered '%s' walk", layered); 1287 status = WALK_ERR; 1288 } 1289 1290 out1: 1291 if (status == WALK_ERR) { 1292 if (kmw->kmw_valid) 1293 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1294 1295 if (kmw->kmw_ubase) 1296 mdb_free(kmw->kmw_ubase, slabsize + 1297 sizeof (kmem_bufctl_t)); 1298 1299 if (kmw->kmw_maglist) 1300 mdb_free(kmw->kmw_maglist, 1301 kmw->kmw_max * sizeof (uintptr_t)); 1302 1303 mdb_free(kmw, sizeof (kmem_walk_t)); 1304 wsp->walk_data = NULL; 1305 } 1306 1307 out2: 1308 if (status == WALK_ERR) 1309 mdb_free(cp, csize); 1310 1311 return (status); 1312 } 1313 1314 int 1315 kmem_walk_step(mdb_walk_state_t *wsp) 1316 { 1317 kmem_walk_t *kmw = wsp->walk_data; 1318 int type = kmw->kmw_type; 1319 kmem_cache_t *cp = kmw->kmw_cp; 1320 1321 void **maglist = kmw->kmw_maglist; 1322 int magcnt = kmw->kmw_count; 1323 1324 uintptr_t chunksize, slabsize; 1325 uintptr_t addr; 1326 const kmem_slab_t *sp; 1327 const kmem_bufctl_t *bcp; 1328 kmem_bufctl_t bc; 1329 1330 int chunks; 1331 char *kbase; 1332 void *buf; 1333 int i, ret; 1334 1335 char *valid, *ubase; 1336 1337 /* 1338 * first, handle the 'kmem_hash' layered walk case 1339 */ 1340 if (type & KM_HASH) { 1341 /* 1342 * We have a buffer which has been allocated out of the 1343 * global layer. We need to make sure that it's not 1344 * actually sitting in a magazine before we report it as 1345 * an allocated buffer. 1346 */ 1347 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1348 1349 if (magcnt > 0 && 1350 bsearch(&buf, maglist, magcnt, sizeof (void *), 1351 addrcmp) != NULL) 1352 return (WALK_NEXT); 1353 1354 if (type & KM_BUFCTL) 1355 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1356 1357 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1358 } 1359 1360 ret = WALK_NEXT; 1361 1362 addr = kmw->kmw_addr; 1363 1364 /* 1365 * If we're walking freed buffers, report everything in the 1366 * magazine layer before processing the first slab. 1367 */ 1368 if ((type & KM_FREE) && magcnt != 0) { 1369 kmw->kmw_count = 0; /* only do this once */ 1370 for (i = 0; i < magcnt; i++) { 1371 buf = maglist[i]; 1372 1373 if (type & KM_BUFCTL) { 1374 uintptr_t out; 1375 1376 if (cp->cache_flags & KMF_BUFTAG) { 1377 kmem_buftag_t *btp; 1378 kmem_buftag_t tag; 1379 1380 /* LINTED - alignment */ 1381 btp = KMEM_BUFTAG(cp, buf); 1382 if (mdb_vread(&tag, sizeof (tag), 1383 (uintptr_t)btp) == -1) { 1384 mdb_warn("reading buftag for " 1385 "%p at %p", buf, btp); 1386 continue; 1387 } 1388 out = (uintptr_t)tag.bt_bufctl; 1389 } else { 1390 if (kmem_hash_lookup(cp, addr, buf, 1391 &out) == -1) 1392 continue; 1393 } 1394 ret = bufctl_walk_callback(cp, wsp, out); 1395 } else { 1396 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1397 } 1398 1399 if (ret != WALK_NEXT) 1400 return (ret); 1401 } 1402 } 1403 1404 /* 1405 * If they want constructed buffers, we're finished, since the 1406 * magazine layer holds them all. 1407 */ 1408 if (type & KM_CONSTRUCTED) 1409 return (WALK_DONE); 1410 1411 /* 1412 * Handle the buffers in the current slab 1413 */ 1414 chunksize = cp->cache_chunksize; 1415 slabsize = cp->cache_slabsize; 1416 1417 sp = wsp->walk_layer; 1418 chunks = sp->slab_chunks; 1419 kbase = sp->slab_base; 1420 1421 dprintf(("kbase is %p\n", kbase)); 1422 1423 if (!(cp->cache_flags & KMF_HASH)) { 1424 valid = kmw->kmw_valid; 1425 ubase = kmw->kmw_ubase; 1426 1427 if (mdb_vread(ubase, chunks * chunksize, 1428 (uintptr_t)kbase) == -1) { 1429 mdb_warn("failed to read slab contents at %p", kbase); 1430 return (WALK_ERR); 1431 } 1432 1433 /* 1434 * Set up the valid map as fully allocated -- we'll punch 1435 * out the freelist. 1436 */ 1437 if (type & KM_ALLOCATED) 1438 (void) memset(valid, 1, chunks); 1439 } else { 1440 valid = NULL; 1441 ubase = NULL; 1442 } 1443 1444 /* 1445 * walk the slab's freelist 1446 */ 1447 bcp = sp->slab_head; 1448 1449 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1450 1451 /* 1452 * since we could be in the middle of allocating a buffer, 1453 * our refcnt could be one higher than it aught. So we 1454 * check one further on the freelist than the count allows. 1455 */ 1456 for (i = sp->slab_refcnt; i <= chunks; i++) { 1457 uint_t ndx; 1458 1459 dprintf(("bcp is %p\n", bcp)); 1460 1461 if (bcp == NULL) { 1462 if (i == chunks) 1463 break; 1464 mdb_warn( 1465 "slab %p in cache %p freelist too short by %d\n", 1466 sp, addr, chunks - i); 1467 break; 1468 } 1469 1470 if (cp->cache_flags & KMF_HASH) { 1471 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1472 mdb_warn("failed to read bufctl ptr at %p", 1473 bcp); 1474 break; 1475 } 1476 buf = bc.bc_addr; 1477 } else { 1478 /* 1479 * Otherwise the buffer is in the slab which 1480 * we've read in; we just need to determine 1481 * its offset in the slab to find the 1482 * kmem_bufctl_t. 1483 */ 1484 bc = *((kmem_bufctl_t *) 1485 ((uintptr_t)bcp - (uintptr_t)kbase + 1486 (uintptr_t)ubase)); 1487 1488 buf = KMEM_BUF(cp, bcp); 1489 } 1490 1491 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1492 1493 if (ndx > slabsize / cp->cache_bufsize) { 1494 /* 1495 * This is very wrong; we have managed to find 1496 * a buffer in the slab which shouldn't 1497 * actually be here. Emit a warning, and 1498 * try to continue. 1499 */ 1500 mdb_warn("buf %p is out of range for " 1501 "slab %p, cache %p\n", buf, sp, addr); 1502 } else if (type & KM_ALLOCATED) { 1503 /* 1504 * we have found a buffer on the slab's freelist; 1505 * clear its entry 1506 */ 1507 valid[ndx] = 0; 1508 } else { 1509 /* 1510 * Report this freed buffer 1511 */ 1512 if (type & KM_BUFCTL) { 1513 ret = bufctl_walk_callback(cp, wsp, 1514 (uintptr_t)bcp); 1515 } else { 1516 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1517 } 1518 if (ret != WALK_NEXT) 1519 return (ret); 1520 } 1521 1522 bcp = bc.bc_next; 1523 } 1524 1525 if (bcp != NULL) { 1526 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1527 sp, addr, bcp)); 1528 } 1529 1530 /* 1531 * If we are walking freed buffers, the loop above handled reporting 1532 * them. 1533 */ 1534 if (type & KM_FREE) 1535 return (WALK_NEXT); 1536 1537 if (type & KM_BUFCTL) { 1538 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1539 "cache %p\n", addr); 1540 return (WALK_ERR); 1541 } 1542 1543 /* 1544 * Report allocated buffers, skipping buffers in the magazine layer. 1545 * We only get this far for small-slab caches. 1546 */ 1547 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1548 buf = (char *)kbase + i * chunksize; 1549 1550 if (!valid[i]) 1551 continue; /* on slab freelist */ 1552 1553 if (magcnt > 0 && 1554 bsearch(&buf, maglist, magcnt, sizeof (void *), 1555 addrcmp) != NULL) 1556 continue; /* in magazine layer */ 1557 1558 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1559 } 1560 return (ret); 1561 } 1562 1563 void 1564 kmem_walk_fini(mdb_walk_state_t *wsp) 1565 { 1566 kmem_walk_t *kmw = wsp->walk_data; 1567 uintptr_t chunksize; 1568 uintptr_t slabsize; 1569 1570 if (kmw == NULL) 1571 return; 1572 1573 if (kmw->kmw_maglist != NULL) 1574 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1575 1576 chunksize = kmw->kmw_cp->cache_chunksize; 1577 slabsize = kmw->kmw_cp->cache_slabsize; 1578 1579 if (kmw->kmw_valid != NULL) 1580 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1581 if (kmw->kmw_ubase != NULL) 1582 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1583 1584 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1585 mdb_free(kmw, sizeof (kmem_walk_t)); 1586 } 1587 1588 /*ARGSUSED*/ 1589 static int 1590 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1591 { 1592 /* 1593 * Buffers allocated from NOTOUCH caches can also show up as freed 1594 * memory in other caches. This can be a little confusing, so we 1595 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1596 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1597 */ 1598 if (c->cache_cflags & KMC_NOTOUCH) 1599 return (WALK_NEXT); 1600 1601 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1602 wsp->walk_cbdata, addr) == -1) 1603 return (WALK_DONE); 1604 1605 return (WALK_NEXT); 1606 } 1607 1608 #define KMEM_WALK_ALL(name, wsp) { \ 1609 wsp->walk_data = (name); \ 1610 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1611 return (WALK_ERR); \ 1612 return (WALK_DONE); \ 1613 } 1614 1615 int 1616 kmem_walk_init(mdb_walk_state_t *wsp) 1617 { 1618 if (wsp->walk_arg != NULL) 1619 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1620 1621 if (wsp->walk_addr == NULL) 1622 KMEM_WALK_ALL("kmem", wsp); 1623 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1624 } 1625 1626 int 1627 bufctl_walk_init(mdb_walk_state_t *wsp) 1628 { 1629 if (wsp->walk_addr == NULL) 1630 KMEM_WALK_ALL("bufctl", wsp); 1631 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1632 } 1633 1634 int 1635 freemem_walk_init(mdb_walk_state_t *wsp) 1636 { 1637 if (wsp->walk_addr == NULL) 1638 KMEM_WALK_ALL("freemem", wsp); 1639 return (kmem_walk_init_common(wsp, KM_FREE)); 1640 } 1641 1642 int 1643 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1644 { 1645 if (wsp->walk_addr == NULL) 1646 KMEM_WALK_ALL("freemem_constructed", wsp); 1647 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1648 } 1649 1650 int 1651 freectl_walk_init(mdb_walk_state_t *wsp) 1652 { 1653 if (wsp->walk_addr == NULL) 1654 KMEM_WALK_ALL("freectl", wsp); 1655 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1656 } 1657 1658 int 1659 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1660 { 1661 if (wsp->walk_addr == NULL) 1662 KMEM_WALK_ALL("freectl_constructed", wsp); 1663 return (kmem_walk_init_common(wsp, 1664 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1665 } 1666 1667 typedef struct bufctl_history_walk { 1668 void *bhw_next; 1669 kmem_cache_t *bhw_cache; 1670 kmem_slab_t *bhw_slab; 1671 hrtime_t bhw_timestamp; 1672 } bufctl_history_walk_t; 1673 1674 int 1675 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1676 { 1677 bufctl_history_walk_t *bhw; 1678 kmem_bufctl_audit_t bc; 1679 kmem_bufctl_audit_t bcn; 1680 1681 if (wsp->walk_addr == NULL) { 1682 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1683 return (WALK_ERR); 1684 } 1685 1686 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1687 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1688 return (WALK_ERR); 1689 } 1690 1691 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1692 bhw->bhw_timestamp = 0; 1693 bhw->bhw_cache = bc.bc_cache; 1694 bhw->bhw_slab = bc.bc_slab; 1695 1696 /* 1697 * sometimes the first log entry matches the base bufctl; in that 1698 * case, skip the base bufctl. 1699 */ 1700 if (bc.bc_lastlog != NULL && 1701 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1702 bc.bc_addr == bcn.bc_addr && 1703 bc.bc_cache == bcn.bc_cache && 1704 bc.bc_slab == bcn.bc_slab && 1705 bc.bc_timestamp == bcn.bc_timestamp && 1706 bc.bc_thread == bcn.bc_thread) 1707 bhw->bhw_next = bc.bc_lastlog; 1708 else 1709 bhw->bhw_next = (void *)wsp->walk_addr; 1710 1711 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1712 wsp->walk_data = bhw; 1713 1714 return (WALK_NEXT); 1715 } 1716 1717 int 1718 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1719 { 1720 bufctl_history_walk_t *bhw = wsp->walk_data; 1721 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1722 uintptr_t baseaddr = wsp->walk_addr; 1723 kmem_bufctl_audit_t bc; 1724 1725 if (addr == NULL) 1726 return (WALK_DONE); 1727 1728 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1729 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1730 return (WALK_ERR); 1731 } 1732 1733 /* 1734 * The bufctl is only valid if the address, cache, and slab are 1735 * correct. We also check that the timestamp is decreasing, to 1736 * prevent infinite loops. 1737 */ 1738 if ((uintptr_t)bc.bc_addr != baseaddr || 1739 bc.bc_cache != bhw->bhw_cache || 1740 bc.bc_slab != bhw->bhw_slab || 1741 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1742 return (WALK_DONE); 1743 1744 bhw->bhw_next = bc.bc_lastlog; 1745 bhw->bhw_timestamp = bc.bc_timestamp; 1746 1747 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1748 } 1749 1750 void 1751 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1752 { 1753 bufctl_history_walk_t *bhw = wsp->walk_data; 1754 1755 mdb_free(bhw, sizeof (*bhw)); 1756 } 1757 1758 typedef struct kmem_log_walk { 1759 kmem_bufctl_audit_t *klw_base; 1760 kmem_bufctl_audit_t **klw_sorted; 1761 kmem_log_header_t klw_lh; 1762 size_t klw_size; 1763 size_t klw_maxndx; 1764 size_t klw_ndx; 1765 } kmem_log_walk_t; 1766 1767 int 1768 kmem_log_walk_init(mdb_walk_state_t *wsp) 1769 { 1770 uintptr_t lp = wsp->walk_addr; 1771 kmem_log_walk_t *klw; 1772 kmem_log_header_t *lhp; 1773 int maxndx, i, j, k; 1774 1775 /* 1776 * By default (global walk), walk the kmem_transaction_log. Otherwise 1777 * read the log whose kmem_log_header_t is stored at walk_addr. 1778 */ 1779 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1780 mdb_warn("failed to read 'kmem_transaction_log'"); 1781 return (WALK_ERR); 1782 } 1783 1784 if (lp == NULL) { 1785 mdb_warn("log is disabled\n"); 1786 return (WALK_ERR); 1787 } 1788 1789 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1790 lhp = &klw->klw_lh; 1791 1792 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1793 mdb_warn("failed to read log header at %p", lp); 1794 mdb_free(klw, sizeof (kmem_log_walk_t)); 1795 return (WALK_ERR); 1796 } 1797 1798 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1799 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1800 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1801 1802 if (mdb_vread(klw->klw_base, klw->klw_size, 1803 (uintptr_t)lhp->lh_base) == -1) { 1804 mdb_warn("failed to read log at base %p", lhp->lh_base); 1805 mdb_free(klw->klw_base, klw->klw_size); 1806 mdb_free(klw, sizeof (kmem_log_walk_t)); 1807 return (WALK_ERR); 1808 } 1809 1810 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1811 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1812 1813 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1814 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1815 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1816 1817 for (j = 0; j < maxndx; j++) 1818 klw->klw_sorted[k++] = &chunk[j]; 1819 } 1820 1821 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1822 (int(*)(const void *, const void *))bufctlcmp); 1823 1824 klw->klw_maxndx = k; 1825 wsp->walk_data = klw; 1826 1827 return (WALK_NEXT); 1828 } 1829 1830 int 1831 kmem_log_walk_step(mdb_walk_state_t *wsp) 1832 { 1833 kmem_log_walk_t *klw = wsp->walk_data; 1834 kmem_bufctl_audit_t *bcp; 1835 1836 if (klw->klw_ndx == klw->klw_maxndx) 1837 return (WALK_DONE); 1838 1839 bcp = klw->klw_sorted[klw->klw_ndx++]; 1840 1841 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1842 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1843 } 1844 1845 void 1846 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1847 { 1848 kmem_log_walk_t *klw = wsp->walk_data; 1849 1850 mdb_free(klw->klw_base, klw->klw_size); 1851 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1852 sizeof (kmem_bufctl_audit_t *)); 1853 mdb_free(klw, sizeof (kmem_log_walk_t)); 1854 } 1855 1856 typedef struct allocdby_bufctl { 1857 uintptr_t abb_addr; 1858 hrtime_t abb_ts; 1859 } allocdby_bufctl_t; 1860 1861 typedef struct allocdby_walk { 1862 const char *abw_walk; 1863 uintptr_t abw_thread; 1864 size_t abw_nbufs; 1865 size_t abw_size; 1866 allocdby_bufctl_t *abw_buf; 1867 size_t abw_ndx; 1868 } allocdby_walk_t; 1869 1870 int 1871 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1872 allocdby_walk_t *abw) 1873 { 1874 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1875 return (WALK_NEXT); 1876 1877 if (abw->abw_nbufs == abw->abw_size) { 1878 allocdby_bufctl_t *buf; 1879 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1880 1881 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1882 1883 bcopy(abw->abw_buf, buf, oldsize); 1884 mdb_free(abw->abw_buf, oldsize); 1885 1886 abw->abw_size <<= 1; 1887 abw->abw_buf = buf; 1888 } 1889 1890 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1891 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1892 abw->abw_nbufs++; 1893 1894 return (WALK_NEXT); 1895 } 1896 1897 /*ARGSUSED*/ 1898 int 1899 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1900 { 1901 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1902 abw, addr) == -1) { 1903 mdb_warn("couldn't walk bufctl for cache %p", addr); 1904 return (WALK_DONE); 1905 } 1906 1907 return (WALK_NEXT); 1908 } 1909 1910 static int 1911 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1912 { 1913 if (lhs->abb_ts < rhs->abb_ts) 1914 return (1); 1915 if (lhs->abb_ts > rhs->abb_ts) 1916 return (-1); 1917 return (0); 1918 } 1919 1920 static int 1921 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1922 { 1923 allocdby_walk_t *abw; 1924 1925 if (wsp->walk_addr == NULL) { 1926 mdb_warn("allocdby walk doesn't support global walks\n"); 1927 return (WALK_ERR); 1928 } 1929 1930 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1931 1932 abw->abw_thread = wsp->walk_addr; 1933 abw->abw_walk = walk; 1934 abw->abw_size = 128; /* something reasonable */ 1935 abw->abw_buf = 1936 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1937 1938 wsp->walk_data = abw; 1939 1940 if (mdb_walk("kmem_cache", 1941 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1942 mdb_warn("couldn't walk kmem_cache"); 1943 allocdby_walk_fini(wsp); 1944 return (WALK_ERR); 1945 } 1946 1947 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1948 (int(*)(const void *, const void *))allocdby_cmp); 1949 1950 return (WALK_NEXT); 1951 } 1952 1953 int 1954 allocdby_walk_init(mdb_walk_state_t *wsp) 1955 { 1956 return (allocdby_walk_init_common(wsp, "bufctl")); 1957 } 1958 1959 int 1960 freedby_walk_init(mdb_walk_state_t *wsp) 1961 { 1962 return (allocdby_walk_init_common(wsp, "freectl")); 1963 } 1964 1965 int 1966 allocdby_walk_step(mdb_walk_state_t *wsp) 1967 { 1968 allocdby_walk_t *abw = wsp->walk_data; 1969 kmem_bufctl_audit_t bc; 1970 uintptr_t addr; 1971 1972 if (abw->abw_ndx == abw->abw_nbufs) 1973 return (WALK_DONE); 1974 1975 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1976 1977 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1978 mdb_warn("couldn't read bufctl at %p", addr); 1979 return (WALK_DONE); 1980 } 1981 1982 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1983 } 1984 1985 void 1986 allocdby_walk_fini(mdb_walk_state_t *wsp) 1987 { 1988 allocdby_walk_t *abw = wsp->walk_data; 1989 1990 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1991 mdb_free(abw, sizeof (allocdby_walk_t)); 1992 } 1993 1994 /*ARGSUSED*/ 1995 int 1996 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 1997 { 1998 char c[MDB_SYM_NAMLEN]; 1999 GElf_Sym sym; 2000 int i; 2001 2002 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2003 for (i = 0; i < bcp->bc_depth; i++) { 2004 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2005 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2006 continue; 2007 if (strncmp(c, "kmem_", 5) == 0) 2008 continue; 2009 mdb_printf("%s+0x%lx", 2010 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2011 break; 2012 } 2013 mdb_printf("\n"); 2014 2015 return (WALK_NEXT); 2016 } 2017 2018 static int 2019 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2020 { 2021 if (!(flags & DCMD_ADDRSPEC)) 2022 return (DCMD_USAGE); 2023 2024 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2025 2026 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2027 mdb_warn("can't walk '%s' for %p", w, addr); 2028 return (DCMD_ERR); 2029 } 2030 2031 return (DCMD_OK); 2032 } 2033 2034 /*ARGSUSED*/ 2035 int 2036 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2037 { 2038 return (allocdby_common(addr, flags, "allocdby")); 2039 } 2040 2041 /*ARGSUSED*/ 2042 int 2043 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2044 { 2045 return (allocdby_common(addr, flags, "freedby")); 2046 } 2047 2048 /* 2049 * Return a string describing the address in relation to the given thread's 2050 * stack. 2051 * 2052 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2053 * 2054 * - If the address is above the stack pointer, return an empty string 2055 * signifying that the address is active. 2056 * 2057 * - If the address is below the stack pointer, and the thread is not on proc, 2058 * return " (below sp)". 2059 * 2060 * - If the address is below the stack pointer, and the thread is on proc, 2061 * return " (possibly below sp)". Depending on context, we may or may not 2062 * have an accurate t_sp. 2063 */ 2064 static const char * 2065 stack_active(const kthread_t *t, uintptr_t addr) 2066 { 2067 uintptr_t panicstk; 2068 GElf_Sym sym; 2069 2070 if (t->t_state == TS_FREE) 2071 return (" (inactive interrupt thread)"); 2072 2073 /* 2074 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2075 * no longer relates to the thread's real stack. 2076 */ 2077 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2078 panicstk = (uintptr_t)sym.st_value; 2079 2080 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2081 return (""); 2082 } 2083 2084 if (addr >= t->t_sp + STACK_BIAS) 2085 return (""); 2086 2087 if (t->t_state == TS_ONPROC) 2088 return (" (possibly below sp)"); 2089 2090 return (" (below sp)"); 2091 } 2092 2093 /* 2094 * Additional state for the kmem and vmem ::whatis handlers 2095 */ 2096 typedef struct whatis_info { 2097 mdb_whatis_t *wi_w; 2098 const kmem_cache_t *wi_cache; 2099 const vmem_t *wi_vmem; 2100 vmem_t *wi_msb_arena; 2101 size_t wi_slab_size; 2102 uint_t wi_slab_found; 2103 uint_t wi_kmem_lite_count; 2104 uint_t wi_freemem; 2105 } whatis_info_t; 2106 2107 /* call one of our dcmd functions with "-v" and the provided address */ 2108 static void 2109 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2110 { 2111 mdb_arg_t a; 2112 a.a_type = MDB_TYPE_STRING; 2113 a.a_un.a_str = "-v"; 2114 2115 mdb_printf(":\n"); 2116 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2117 } 2118 2119 static void 2120 whatis_print_kmf_lite(uintptr_t btaddr, size_t count) 2121 { 2122 #define KMEM_LITE_MAX 16 2123 pc_t callers[KMEM_LITE_MAX]; 2124 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN; 2125 2126 kmem_buftag_t bt; 2127 intptr_t stat; 2128 const char *plural = ""; 2129 int i; 2130 2131 /* validate our arguments and read in the buftag */ 2132 if (count == 0 || count > KMEM_LITE_MAX || 2133 mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2134 return; 2135 2136 /* validate the buffer state and read in the callers */ 2137 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2138 2139 if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE || 2140 mdb_vread(callers, count * sizeof (pc_t), 2141 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2142 return; 2143 2144 /* If there aren't any filled in callers, bail */ 2145 if (callers[0] == uninit) 2146 return; 2147 2148 plural = (callers[1] == uninit) ? "" : "s"; 2149 2150 /* Everything's done and checked; print them out */ 2151 mdb_printf(":\n"); 2152 2153 mdb_inc_indent(8); 2154 mdb_printf("recent caller%s: %a", plural, callers[0]); 2155 for (i = 1; i < count; i++) { 2156 if (callers[i] == uninit) 2157 break; 2158 mdb_printf(", %a", callers[i]); 2159 } 2160 mdb_dec_indent(8); 2161 } 2162 2163 static void 2164 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 2165 uintptr_t baddr) 2166 { 2167 mdb_whatis_t *w = wi->wi_w; 2168 2169 const kmem_cache_t *cp = wi->wi_cache; 2170 /* LINTED pointer cast may result in improper alignment */ 2171 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr); 2172 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 2173 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT)); 2174 2175 mdb_whatis_report_object(w, maddr, addr, ""); 2176 2177 if (baddr != 0 && !call_printer) 2178 mdb_printf("bufctl %p ", baddr); 2179 2180 mdb_printf("%s from %s", 2181 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 2182 2183 if (baddr != 0 && call_printer) { 2184 whatis_call_printer(bufctl, baddr); 2185 return; 2186 } 2187 2188 /* for KMF_LITE caches, try to print out the previous callers */ 2189 if (!quiet && (cp->cache_flags & KMF_LITE)) 2190 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count); 2191 2192 mdb_printf("\n"); 2193 } 2194 2195 /*ARGSUSED*/ 2196 static int 2197 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi) 2198 { 2199 mdb_whatis_t *w = wi->wi_w; 2200 2201 uintptr_t cur; 2202 size_t size = wi->wi_cache->cache_bufsize; 2203 2204 while (mdb_whatis_match(w, addr, size, &cur)) 2205 whatis_print_kmem(wi, cur, addr, NULL); 2206 2207 return (WHATIS_WALKRET(w)); 2208 } 2209 2210 /*ARGSUSED*/ 2211 static int 2212 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi) 2213 { 2214 mdb_whatis_t *w = wi->wi_w; 2215 2216 uintptr_t cur; 2217 uintptr_t addr = (uintptr_t)bcp->bc_addr; 2218 size_t size = wi->wi_cache->cache_bufsize; 2219 2220 while (mdb_whatis_match(w, addr, size, &cur)) 2221 whatis_print_kmem(wi, cur, addr, baddr); 2222 2223 return (WHATIS_WALKRET(w)); 2224 } 2225 2226 static int 2227 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 2228 { 2229 mdb_whatis_t *w = wi->wi_w; 2230 2231 size_t size = vs->vs_end - vs->vs_start; 2232 uintptr_t cur; 2233 2234 /* We're not interested in anything but alloc and free segments */ 2235 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 2236 return (WALK_NEXT); 2237 2238 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 2239 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 2240 2241 /* 2242 * If we're not printing it seperately, provide the vmem_seg 2243 * pointer if it has a stack trace. 2244 */ 2245 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 2246 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) || 2247 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2248 mdb_printf("vmem_seg %p ", addr); 2249 } 2250 2251 mdb_printf("%s from the %s vmem arena", 2252 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 2253 wi->wi_vmem->vm_name); 2254 2255 if (!(mdb_whatis_flags(w) & WHATIS_QUIET)) 2256 whatis_call_printer(vmem_seg, addr); 2257 else 2258 mdb_printf("\n"); 2259 } 2260 2261 return (WHATIS_WALKRET(w)); 2262 } 2263 2264 static int 2265 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 2266 { 2267 mdb_whatis_t *w = wi->wi_w; 2268 const char *nm = vmem->vm_name; 2269 2270 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0); 2271 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2272 2273 if (identifier != idspace) 2274 return (WALK_NEXT); 2275 2276 wi->wi_vmem = vmem; 2277 2278 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2279 mdb_printf("Searching vmem arena %s...\n", nm); 2280 2281 if (mdb_pwalk("vmem_seg", 2282 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 2283 mdb_warn("can't walk vmem_seg for %p", addr); 2284 return (WALK_NEXT); 2285 } 2286 2287 return (WHATIS_WALKRET(w)); 2288 } 2289 2290 /*ARGSUSED*/ 2291 static int 2292 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi) 2293 { 2294 mdb_whatis_t *w = wi->wi_w; 2295 2296 /* It must overlap with the slab data, or it's not interesting */ 2297 if (mdb_whatis_overlaps(w, 2298 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 2299 wi->wi_slab_found++; 2300 return (WALK_DONE); 2301 } 2302 return (WALK_NEXT); 2303 } 2304 2305 static int 2306 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2307 { 2308 mdb_whatis_t *w = wi->wi_w; 2309 2310 char *walk, *freewalk; 2311 mdb_walk_cb_t func; 2312 int do_bufctl; 2313 2314 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0); 2315 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2316 2317 if (identifier != idspace) 2318 return (WALK_NEXT); 2319 2320 /* Override the '-b' flag as necessary */ 2321 if (!(c->cache_flags & KMF_HASH)) 2322 do_bufctl = FALSE; /* no bufctls to walk */ 2323 else if (c->cache_flags & KMF_AUDIT) 2324 do_bufctl = TRUE; /* we always want debugging info */ 2325 else 2326 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2327 2328 if (do_bufctl) { 2329 walk = "bufctl"; 2330 freewalk = "freectl"; 2331 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2332 } else { 2333 walk = "kmem"; 2334 freewalk = "freemem"; 2335 func = (mdb_walk_cb_t)whatis_walk_kmem; 2336 } 2337 2338 wi->wi_cache = c; 2339 2340 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2341 mdb_printf("Searching %s...\n", c->cache_name); 2342 2343 /* 2344 * If more then two buffers live on each slab, figure out if we're 2345 * interested in anything in any slab before doing the more expensive 2346 * kmem/freemem (bufctl/freectl) walkers. 2347 */ 2348 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2349 if (!(c->cache_flags & KMF_HASH)) 2350 wi->wi_slab_size -= sizeof (kmem_slab_t); 2351 2352 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2353 wi->wi_slab_found = 0; 2354 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2355 addr) == -1) { 2356 mdb_warn("can't find kmem_slab walker"); 2357 return (WALK_DONE); 2358 } 2359 if (wi->wi_slab_found == 0) 2360 return (WALK_NEXT); 2361 } 2362 2363 wi->wi_freemem = FALSE; 2364 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2365 mdb_warn("can't find %s walker", walk); 2366 return (WALK_DONE); 2367 } 2368 2369 if (mdb_whatis_done(w)) 2370 return (WALK_DONE); 2371 2372 /* 2373 * We have searched for allocated memory; now search for freed memory. 2374 */ 2375 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2376 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2377 2378 wi->wi_freemem = TRUE; 2379 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2380 mdb_warn("can't find %s walker", freewalk); 2381 return (WALK_DONE); 2382 } 2383 2384 return (WHATIS_WALKRET(w)); 2385 } 2386 2387 static int 2388 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2389 { 2390 if (c->cache_arena == wi->wi_msb_arena || 2391 (c->cache_cflags & KMC_NOTOUCH)) 2392 return (WALK_NEXT); 2393 2394 return (whatis_walk_cache(addr, c, wi)); 2395 } 2396 2397 static int 2398 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2399 { 2400 if (c->cache_arena != wi->wi_msb_arena) 2401 return (WALK_NEXT); 2402 2403 return (whatis_walk_cache(addr, c, wi)); 2404 } 2405 2406 static int 2407 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2408 { 2409 if (c->cache_arena == wi->wi_msb_arena || 2410 !(c->cache_cflags & KMC_NOTOUCH)) 2411 return (WALK_NEXT); 2412 2413 return (whatis_walk_cache(addr, c, wi)); 2414 } 2415 2416 static int 2417 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w) 2418 { 2419 uintptr_t cur; 2420 uintptr_t saddr; 2421 size_t size; 2422 2423 /* 2424 * Often, one calls ::whatis on an address from a thread structure. 2425 * We use this opportunity to short circuit this case... 2426 */ 2427 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur)) 2428 mdb_whatis_report_object(w, cur, addr, 2429 "allocated as a thread structure\n"); 2430 2431 /* 2432 * Now check the stack 2433 */ 2434 if (t->t_stkbase == NULL) 2435 return (WALK_NEXT); 2436 2437 /* 2438 * This assumes that t_stk is the end of the stack, but it's really 2439 * only the initial stack pointer for the thread. Arguments to the 2440 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So 2441 * that 't->t_stk::whatis' reports "part of t's stack", we include 2442 * t_stk in the range (the "+ 1", below), but the kernel should 2443 * really include the full stack bounds where we can find it. 2444 */ 2445 saddr = (uintptr_t)t->t_stkbase; 2446 size = (uintptr_t)t->t_stk - saddr + 1; 2447 while (mdb_whatis_match(w, saddr, size, &cur)) 2448 mdb_whatis_report_object(w, cur, cur, 2449 "in thread %p's stack%s\n", addr, stack_active(t, cur)); 2450 2451 return (WHATIS_WALKRET(w)); 2452 } 2453 2454 static void 2455 whatis_modctl_match(mdb_whatis_t *w, const char *name, 2456 uintptr_t base, size_t size, const char *where) 2457 { 2458 uintptr_t cur; 2459 2460 /* 2461 * Since we're searching for addresses inside a module, we report 2462 * them as symbols. 2463 */ 2464 while (mdb_whatis_match(w, base, size, &cur)) 2465 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where); 2466 } 2467 2468 static int 2469 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w) 2470 { 2471 char name[MODMAXNAMELEN]; 2472 struct module mod; 2473 Shdr shdr; 2474 2475 if (m->mod_mp == NULL) 2476 return (WALK_NEXT); 2477 2478 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2479 mdb_warn("couldn't read modctl %p's module", addr); 2480 return (WALK_NEXT); 2481 } 2482 2483 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2484 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2485 2486 whatis_modctl_match(w, name, 2487 (uintptr_t)mod.text, mod.text_size, "text segment"); 2488 whatis_modctl_match(w, name, 2489 (uintptr_t)mod.data, mod.data_size, "data segment"); 2490 whatis_modctl_match(w, name, 2491 (uintptr_t)mod.bss, mod.bss_size, "bss segment"); 2492 2493 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2494 mdb_warn("couldn't read symbol header for %p's module", addr); 2495 return (WALK_NEXT); 2496 } 2497 2498 whatis_modctl_match(w, name, 2499 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab"); 2500 whatis_modctl_match(w, name, 2501 (uintptr_t)mod.symspace, mod.symsize, "symtab"); 2502 2503 return (WHATIS_WALKRET(w)); 2504 } 2505 2506 /*ARGSUSED*/ 2507 static int 2508 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w) 2509 { 2510 uintptr_t cur; 2511 2512 uintptr_t base = (uintptr_t)seg->pages; 2513 size_t size = (uintptr_t)seg->epages - base; 2514 2515 while (mdb_whatis_match(w, base, size, &cur)) { 2516 /* round our found pointer down to the page_t base. */ 2517 size_t offset = (cur - base) % sizeof (page_t); 2518 2519 mdb_whatis_report_object(w, cur, cur - offset, 2520 "allocated as a page structure\n"); 2521 } 2522 2523 return (WHATIS_WALKRET(w)); 2524 } 2525 2526 /*ARGSUSED*/ 2527 static int 2528 whatis_run_modules(mdb_whatis_t *w, void *arg) 2529 { 2530 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) { 2531 mdb_warn("couldn't find modctl walker"); 2532 return (1); 2533 } 2534 return (0); 2535 } 2536 2537 /*ARGSUSED*/ 2538 static int 2539 whatis_run_threads(mdb_whatis_t *w, void *ignored) 2540 { 2541 /* 2542 * Now search all thread stacks. Yes, this is a little weak; we 2543 * can save a lot of work by first checking to see if the 2544 * address is in segkp vs. segkmem. But hey, computers are 2545 * fast. 2546 */ 2547 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) { 2548 mdb_warn("couldn't find thread walker"); 2549 return (1); 2550 } 2551 return (0); 2552 } 2553 2554 /*ARGSUSED*/ 2555 static int 2556 whatis_run_pages(mdb_whatis_t *w, void *ignored) 2557 { 2558 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) { 2559 mdb_warn("couldn't find memseg walker"); 2560 return (1); 2561 } 2562 return (0); 2563 } 2564 2565 /*ARGSUSED*/ 2566 static int 2567 whatis_run_kmem(mdb_whatis_t *w, void *ignored) 2568 { 2569 whatis_info_t wi; 2570 2571 bzero(&wi, sizeof (wi)); 2572 wi.wi_w = w; 2573 2574 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1) 2575 mdb_warn("unable to readvar \"kmem_msb_arena\""); 2576 2577 if (mdb_readvar(&wi.wi_kmem_lite_count, 2578 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16) 2579 wi.wi_kmem_lite_count = 0; 2580 2581 /* 2582 * We process kmem caches in the following order: 2583 * 2584 * non-KMC_NOTOUCH, non-metadata (typically the most interesting) 2585 * metadata (can be huge with KMF_AUDIT) 2586 * KMC_NOTOUCH, non-metadata (see kmem_walk_all()) 2587 */ 2588 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2589 &wi) == -1 || 2590 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2591 &wi) == -1 || 2592 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2593 &wi) == -1) { 2594 mdb_warn("couldn't find kmem_cache walker"); 2595 return (1); 2596 } 2597 return (0); 2598 } 2599 2600 /*ARGSUSED*/ 2601 static int 2602 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2603 { 2604 whatis_info_t wi; 2605 2606 bzero(&wi, sizeof (wi)); 2607 wi.wi_w = w; 2608 2609 if (mdb_walk("vmem_postfix", 2610 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2611 mdb_warn("couldn't find vmem_postfix walker"); 2612 return (1); 2613 } 2614 return (0); 2615 } 2616 2617 typedef struct kmem_log_cpu { 2618 uintptr_t kmc_low; 2619 uintptr_t kmc_high; 2620 } kmem_log_cpu_t; 2621 2622 typedef struct kmem_log_data { 2623 uintptr_t kmd_addr; 2624 kmem_log_cpu_t *kmd_cpu; 2625 } kmem_log_data_t; 2626 2627 int 2628 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2629 kmem_log_data_t *kmd) 2630 { 2631 int i; 2632 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2633 size_t bufsize; 2634 2635 for (i = 0; i < NCPU; i++) { 2636 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2637 break; 2638 } 2639 2640 if (kmd->kmd_addr) { 2641 if (b->bc_cache == NULL) 2642 return (WALK_NEXT); 2643 2644 if (mdb_vread(&bufsize, sizeof (bufsize), 2645 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2646 mdb_warn( 2647 "failed to read cache_bufsize for cache at %p", 2648 b->bc_cache); 2649 return (WALK_ERR); 2650 } 2651 2652 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2653 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2654 return (WALK_NEXT); 2655 } 2656 2657 if (i == NCPU) 2658 mdb_printf(" "); 2659 else 2660 mdb_printf("%3d", i); 2661 2662 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2663 b->bc_timestamp, b->bc_thread); 2664 2665 return (WALK_NEXT); 2666 } 2667 2668 /*ARGSUSED*/ 2669 int 2670 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2671 { 2672 kmem_log_header_t lh; 2673 kmem_cpu_log_header_t clh; 2674 uintptr_t lhp, clhp; 2675 int ncpus; 2676 uintptr_t *cpu; 2677 GElf_Sym sym; 2678 kmem_log_cpu_t *kmc; 2679 int i; 2680 kmem_log_data_t kmd; 2681 uint_t opt_b = FALSE; 2682 2683 if (mdb_getopts(argc, argv, 2684 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2685 return (DCMD_USAGE); 2686 2687 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2688 mdb_warn("failed to read 'kmem_transaction_log'"); 2689 return (DCMD_ERR); 2690 } 2691 2692 if (lhp == NULL) { 2693 mdb_warn("no kmem transaction log\n"); 2694 return (DCMD_ERR); 2695 } 2696 2697 mdb_readvar(&ncpus, "ncpus"); 2698 2699 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2700 mdb_warn("failed to read log header at %p", lhp); 2701 return (DCMD_ERR); 2702 } 2703 2704 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2705 2706 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2707 2708 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2709 mdb_warn("couldn't find 'cpu' array"); 2710 return (DCMD_ERR); 2711 } 2712 2713 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2714 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2715 NCPU * sizeof (uintptr_t), sym.st_size); 2716 return (DCMD_ERR); 2717 } 2718 2719 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2720 mdb_warn("failed to read cpu array at %p", sym.st_value); 2721 return (DCMD_ERR); 2722 } 2723 2724 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2725 kmd.kmd_addr = NULL; 2726 kmd.kmd_cpu = kmc; 2727 2728 for (i = 0; i < NCPU; i++) { 2729 2730 if (cpu[i] == NULL) 2731 continue; 2732 2733 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2734 mdb_warn("cannot read cpu %d's log header at %p", 2735 i, clhp); 2736 return (DCMD_ERR); 2737 } 2738 2739 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2740 (uintptr_t)lh.lh_base; 2741 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2742 2743 clhp += sizeof (kmem_cpu_log_header_t); 2744 } 2745 2746 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2747 "TIMESTAMP", "THREAD"); 2748 2749 /* 2750 * If we have been passed an address, print out only log entries 2751 * corresponding to that address. If opt_b is specified, then interpret 2752 * the address as a bufctl. 2753 */ 2754 if (flags & DCMD_ADDRSPEC) { 2755 kmem_bufctl_audit_t b; 2756 2757 if (opt_b) { 2758 kmd.kmd_addr = addr; 2759 } else { 2760 if (mdb_vread(&b, 2761 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2762 mdb_warn("failed to read bufctl at %p", addr); 2763 return (DCMD_ERR); 2764 } 2765 2766 (void) kmem_log_walk(addr, &b, &kmd); 2767 2768 return (DCMD_OK); 2769 } 2770 } 2771 2772 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2773 mdb_warn("can't find kmem log walker"); 2774 return (DCMD_ERR); 2775 } 2776 2777 return (DCMD_OK); 2778 } 2779 2780 typedef struct bufctl_history_cb { 2781 int bhc_flags; 2782 int bhc_argc; 2783 const mdb_arg_t *bhc_argv; 2784 int bhc_ret; 2785 } bufctl_history_cb_t; 2786 2787 /*ARGSUSED*/ 2788 static int 2789 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2790 { 2791 bufctl_history_cb_t *bhc = arg; 2792 2793 bhc->bhc_ret = 2794 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2795 2796 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2797 2798 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2799 } 2800 2801 void 2802 bufctl_help(void) 2803 { 2804 mdb_printf("%s", 2805 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2806 mdb_dec_indent(2); 2807 mdb_printf("%<b>OPTIONS%</b>\n"); 2808 mdb_inc_indent(2); 2809 mdb_printf("%s", 2810 " -v Display the full content of the bufctl, including its stack trace\n" 2811 " -h retrieve the bufctl's transaction history, if available\n" 2812 " -a addr\n" 2813 " filter out bufctls not involving the buffer at addr\n" 2814 " -c caller\n" 2815 " filter out bufctls without the function/PC in their stack trace\n" 2816 " -e earliest\n" 2817 " filter out bufctls timestamped before earliest\n" 2818 " -l latest\n" 2819 " filter out bufctls timestamped after latest\n" 2820 " -t thread\n" 2821 " filter out bufctls not involving thread\n"); 2822 } 2823 2824 int 2825 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2826 { 2827 kmem_bufctl_audit_t bc; 2828 uint_t verbose = FALSE; 2829 uint_t history = FALSE; 2830 uint_t in_history = FALSE; 2831 uintptr_t caller = NULL, thread = NULL; 2832 uintptr_t laddr, haddr, baddr = NULL; 2833 hrtime_t earliest = 0, latest = 0; 2834 int i, depth; 2835 char c[MDB_SYM_NAMLEN]; 2836 GElf_Sym sym; 2837 2838 if (mdb_getopts(argc, argv, 2839 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2840 'h', MDB_OPT_SETBITS, TRUE, &history, 2841 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2842 'c', MDB_OPT_UINTPTR, &caller, 2843 't', MDB_OPT_UINTPTR, &thread, 2844 'e', MDB_OPT_UINT64, &earliest, 2845 'l', MDB_OPT_UINT64, &latest, 2846 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2847 return (DCMD_USAGE); 2848 2849 if (!(flags & DCMD_ADDRSPEC)) 2850 return (DCMD_USAGE); 2851 2852 if (in_history && !history) 2853 return (DCMD_USAGE); 2854 2855 if (history && !in_history) { 2856 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2857 UM_SLEEP | UM_GC); 2858 bufctl_history_cb_t bhc; 2859 2860 nargv[0].a_type = MDB_TYPE_STRING; 2861 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2862 2863 for (i = 0; i < argc; i++) 2864 nargv[i + 1] = argv[i]; 2865 2866 /* 2867 * When in history mode, we treat each element as if it 2868 * were in a seperate loop, so that the headers group 2869 * bufctls with similar histories. 2870 */ 2871 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2872 bhc.bhc_argc = argc + 1; 2873 bhc.bhc_argv = nargv; 2874 bhc.bhc_ret = DCMD_OK; 2875 2876 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2877 addr) == -1) { 2878 mdb_warn("unable to walk bufctl_history"); 2879 return (DCMD_ERR); 2880 } 2881 2882 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2883 mdb_printf("\n"); 2884 2885 return (bhc.bhc_ret); 2886 } 2887 2888 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2889 if (verbose) { 2890 mdb_printf("%16s %16s %16s %16s\n" 2891 "%<u>%16s %16s %16s %16s%</u>\n", 2892 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2893 "", "CACHE", "LASTLOG", "CONTENTS"); 2894 } else { 2895 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2896 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2897 } 2898 } 2899 2900 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2901 mdb_warn("couldn't read bufctl at %p", addr); 2902 return (DCMD_ERR); 2903 } 2904 2905 /* 2906 * Guard against bogus bc_depth in case the bufctl is corrupt or 2907 * the address does not really refer to a bufctl. 2908 */ 2909 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2910 2911 if (caller != NULL) { 2912 laddr = caller; 2913 haddr = caller + sizeof (caller); 2914 2915 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2916 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2917 /* 2918 * We were provided an exact symbol value; any 2919 * address in the function is valid. 2920 */ 2921 laddr = (uintptr_t)sym.st_value; 2922 haddr = (uintptr_t)sym.st_value + sym.st_size; 2923 } 2924 2925 for (i = 0; i < depth; i++) 2926 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2927 break; 2928 2929 if (i == depth) 2930 return (DCMD_OK); 2931 } 2932 2933 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2934 return (DCMD_OK); 2935 2936 if (earliest != 0 && bc.bc_timestamp < earliest) 2937 return (DCMD_OK); 2938 2939 if (latest != 0 && bc.bc_timestamp > latest) 2940 return (DCMD_OK); 2941 2942 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2943 return (DCMD_OK); 2944 2945 if (flags & DCMD_PIPE_OUT) { 2946 mdb_printf("%#lr\n", addr); 2947 return (DCMD_OK); 2948 } 2949 2950 if (verbose) { 2951 mdb_printf( 2952 "%<b>%16p%</b> %16p %16llx %16p\n" 2953 "%16s %16p %16p %16p\n", 2954 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2955 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2956 2957 mdb_inc_indent(17); 2958 for (i = 0; i < depth; i++) 2959 mdb_printf("%a\n", bc.bc_stack[i]); 2960 mdb_dec_indent(17); 2961 mdb_printf("\n"); 2962 } else { 2963 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2964 bc.bc_timestamp, bc.bc_thread); 2965 2966 for (i = 0; i < depth; i++) { 2967 if (mdb_lookup_by_addr(bc.bc_stack[i], 2968 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2969 continue; 2970 if (strncmp(c, "kmem_", 5) == 0) 2971 continue; 2972 mdb_printf(" %a\n", bc.bc_stack[i]); 2973 break; 2974 } 2975 2976 if (i >= depth) 2977 mdb_printf("\n"); 2978 } 2979 2980 return (DCMD_OK); 2981 } 2982 2983 typedef struct kmem_verify { 2984 uint64_t *kmv_buf; /* buffer to read cache contents into */ 2985 size_t kmv_size; /* number of bytes in kmv_buf */ 2986 int kmv_corruption; /* > 0 if corruption found. */ 2987 int kmv_besilent; /* report actual corruption sites */ 2988 struct kmem_cache kmv_cache; /* the cache we're operating on */ 2989 } kmem_verify_t; 2990 2991 /* 2992 * verify_pattern() 2993 * verify that buf is filled with the pattern pat. 2994 */ 2995 static int64_t 2996 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2997 { 2998 /*LINTED*/ 2999 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3000 uint64_t *buf; 3001 3002 for (buf = buf_arg; buf < bufend; buf++) 3003 if (*buf != pat) 3004 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3005 return (-1); 3006 } 3007 3008 /* 3009 * verify_buftag() 3010 * verify that btp->bt_bxstat == (bcp ^ pat) 3011 */ 3012 static int 3013 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3014 { 3015 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3016 } 3017 3018 /* 3019 * verify_free() 3020 * verify the integrity of a free block of memory by checking 3021 * that it is filled with 0xdeadbeef and that its buftag is sane. 3022 */ 3023 /*ARGSUSED1*/ 3024 static int 3025 verify_free(uintptr_t addr, const void *data, void *private) 3026 { 3027 kmem_verify_t *kmv = (kmem_verify_t *)private; 3028 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3029 int64_t corrupt; /* corruption offset */ 3030 kmem_buftag_t *buftagp; /* ptr to buftag */ 3031 kmem_cache_t *cp = &kmv->kmv_cache; 3032 int besilent = kmv->kmv_besilent; 3033 3034 /*LINTED*/ 3035 buftagp = KMEM_BUFTAG(cp, buf); 3036 3037 /* 3038 * Read the buffer to check. 3039 */ 3040 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3041 if (!besilent) 3042 mdb_warn("couldn't read %p", addr); 3043 return (WALK_NEXT); 3044 } 3045 3046 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3047 KMEM_FREE_PATTERN)) >= 0) { 3048 if (!besilent) 3049 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3050 addr, (uintptr_t)addr + corrupt); 3051 goto corrupt; 3052 } 3053 /* 3054 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3055 * the first bytes of the buffer, hence we cannot check for red 3056 * zone corruption. 3057 */ 3058 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3059 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3060 if (!besilent) 3061 mdb_printf("buffer %p (free) seems to " 3062 "have a corrupt redzone pattern\n", addr); 3063 goto corrupt; 3064 } 3065 3066 /* 3067 * confirm bufctl pointer integrity. 3068 */ 3069 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3070 if (!besilent) 3071 mdb_printf("buffer %p (free) has a corrupt " 3072 "buftag\n", addr); 3073 goto corrupt; 3074 } 3075 3076 return (WALK_NEXT); 3077 corrupt: 3078 kmv->kmv_corruption++; 3079 return (WALK_NEXT); 3080 } 3081 3082 /* 3083 * verify_alloc() 3084 * Verify that the buftag of an allocated buffer makes sense with respect 3085 * to the buffer. 3086 */ 3087 /*ARGSUSED1*/ 3088 static int 3089 verify_alloc(uintptr_t addr, const void *data, void *private) 3090 { 3091 kmem_verify_t *kmv = (kmem_verify_t *)private; 3092 kmem_cache_t *cp = &kmv->kmv_cache; 3093 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3094 /*LINTED*/ 3095 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3096 uint32_t *ip = (uint32_t *)buftagp; 3097 uint8_t *bp = (uint8_t *)buf; 3098 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3099 int besilent = kmv->kmv_besilent; 3100 3101 /* 3102 * Read the buffer to check. 3103 */ 3104 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3105 if (!besilent) 3106 mdb_warn("couldn't read %p", addr); 3107 return (WALK_NEXT); 3108 } 3109 3110 /* 3111 * There are two cases to handle: 3112 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3113 * 0xfeedfacefeedface at the end of it 3114 * 2. If the buf was alloc'd using kmem_alloc, it will have 3115 * 0xbb just past the end of the region in use. At the buftag, 3116 * it will have 0xfeedface (or, if the whole buffer is in use, 3117 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3118 * endianness), followed by 32 bits containing the offset of the 3119 * 0xbb byte in the buffer. 3120 * 3121 * Finally, the two 32-bit words that comprise the second half of the 3122 * buftag should xor to KMEM_BUFTAG_ALLOC 3123 */ 3124 3125 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3126 looks_ok = 1; 3127 else if (!KMEM_SIZE_VALID(ip[1])) 3128 size_ok = 0; 3129 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3130 looks_ok = 1; 3131 else 3132 size_ok = 0; 3133 3134 if (!size_ok) { 3135 if (!besilent) 3136 mdb_printf("buffer %p (allocated) has a corrupt " 3137 "redzone size encoding\n", addr); 3138 goto corrupt; 3139 } 3140 3141 if (!looks_ok) { 3142 if (!besilent) 3143 mdb_printf("buffer %p (allocated) has a corrupt " 3144 "redzone signature\n", addr); 3145 goto corrupt; 3146 } 3147 3148 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3149 if (!besilent) 3150 mdb_printf("buffer %p (allocated) has a " 3151 "corrupt buftag\n", addr); 3152 goto corrupt; 3153 } 3154 3155 return (WALK_NEXT); 3156 corrupt: 3157 kmv->kmv_corruption++; 3158 return (WALK_NEXT); 3159 } 3160 3161 /*ARGSUSED2*/ 3162 int 3163 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3164 { 3165 if (flags & DCMD_ADDRSPEC) { 3166 int check_alloc = 0, check_free = 0; 3167 kmem_verify_t kmv; 3168 3169 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3170 addr) == -1) { 3171 mdb_warn("couldn't read kmem_cache %p", addr); 3172 return (DCMD_ERR); 3173 } 3174 3175 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3176 sizeof (kmem_buftag_t); 3177 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3178 kmv.kmv_corruption = 0; 3179 3180 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3181 check_alloc = 1; 3182 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3183 check_free = 1; 3184 } else { 3185 if (!(flags & DCMD_LOOP)) { 3186 mdb_warn("cache %p (%s) does not have " 3187 "redzone checking enabled\n", addr, 3188 kmv.kmv_cache.cache_name); 3189 } 3190 return (DCMD_ERR); 3191 } 3192 3193 if (flags & DCMD_LOOP) { 3194 /* 3195 * table mode, don't print out every corrupt buffer 3196 */ 3197 kmv.kmv_besilent = 1; 3198 } else { 3199 mdb_printf("Summary for cache '%s'\n", 3200 kmv.kmv_cache.cache_name); 3201 mdb_inc_indent(2); 3202 kmv.kmv_besilent = 0; 3203 } 3204 3205 if (check_alloc) 3206 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3207 if (check_free) 3208 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3209 3210 if (flags & DCMD_LOOP) { 3211 if (kmv.kmv_corruption == 0) { 3212 mdb_printf("%-*s %?p clean\n", 3213 KMEM_CACHE_NAMELEN, 3214 kmv.kmv_cache.cache_name, addr); 3215 } else { 3216 char *s = ""; /* optional s in "buffer[s]" */ 3217 if (kmv.kmv_corruption > 1) 3218 s = "s"; 3219 3220 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3221 KMEM_CACHE_NAMELEN, 3222 kmv.kmv_cache.cache_name, addr, 3223 kmv.kmv_corruption, s); 3224 } 3225 } else { 3226 /* 3227 * This is the more verbose mode, when the user has 3228 * type addr::kmem_verify. If the cache was clean, 3229 * nothing will have yet been printed. So say something. 3230 */ 3231 if (kmv.kmv_corruption == 0) 3232 mdb_printf("clean\n"); 3233 3234 mdb_dec_indent(2); 3235 } 3236 } else { 3237 /* 3238 * If the user didn't specify a cache to verify, we'll walk all 3239 * kmem_cache's, specifying ourself as a callback for each... 3240 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3241 */ 3242 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3243 "Cache Name", "Addr", "Cache Integrity"); 3244 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3245 } 3246 3247 return (DCMD_OK); 3248 } 3249 3250 typedef struct vmem_node { 3251 struct vmem_node *vn_next; 3252 struct vmem_node *vn_parent; 3253 struct vmem_node *vn_sibling; 3254 struct vmem_node *vn_children; 3255 uintptr_t vn_addr; 3256 int vn_marked; 3257 vmem_t vn_vmem; 3258 } vmem_node_t; 3259 3260 typedef struct vmem_walk { 3261 vmem_node_t *vw_root; 3262 vmem_node_t *vw_current; 3263 } vmem_walk_t; 3264 3265 int 3266 vmem_walk_init(mdb_walk_state_t *wsp) 3267 { 3268 uintptr_t vaddr, paddr; 3269 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3270 vmem_walk_t *vw; 3271 3272 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3273 mdb_warn("couldn't read 'vmem_list'"); 3274 return (WALK_ERR); 3275 } 3276 3277 while (vaddr != NULL) { 3278 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3279 vp->vn_addr = vaddr; 3280 vp->vn_next = head; 3281 head = vp; 3282 3283 if (vaddr == wsp->walk_addr) 3284 current = vp; 3285 3286 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3287 mdb_warn("couldn't read vmem_t at %p", vaddr); 3288 goto err; 3289 } 3290 3291 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3292 } 3293 3294 for (vp = head; vp != NULL; vp = vp->vn_next) { 3295 3296 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3297 vp->vn_sibling = root; 3298 root = vp; 3299 continue; 3300 } 3301 3302 for (parent = head; parent != NULL; parent = parent->vn_next) { 3303 if (parent->vn_addr != paddr) 3304 continue; 3305 vp->vn_sibling = parent->vn_children; 3306 parent->vn_children = vp; 3307 vp->vn_parent = parent; 3308 break; 3309 } 3310 3311 if (parent == NULL) { 3312 mdb_warn("couldn't find %p's parent (%p)\n", 3313 vp->vn_addr, paddr); 3314 goto err; 3315 } 3316 } 3317 3318 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3319 vw->vw_root = root; 3320 3321 if (current != NULL) 3322 vw->vw_current = current; 3323 else 3324 vw->vw_current = root; 3325 3326 wsp->walk_data = vw; 3327 return (WALK_NEXT); 3328 err: 3329 for (vp = head; head != NULL; vp = head) { 3330 head = vp->vn_next; 3331 mdb_free(vp, sizeof (vmem_node_t)); 3332 } 3333 3334 return (WALK_ERR); 3335 } 3336 3337 int 3338 vmem_walk_step(mdb_walk_state_t *wsp) 3339 { 3340 vmem_walk_t *vw = wsp->walk_data; 3341 vmem_node_t *vp; 3342 int rval; 3343 3344 if ((vp = vw->vw_current) == NULL) 3345 return (WALK_DONE); 3346 3347 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3348 3349 if (vp->vn_children != NULL) { 3350 vw->vw_current = vp->vn_children; 3351 return (rval); 3352 } 3353 3354 do { 3355 vw->vw_current = vp->vn_sibling; 3356 vp = vp->vn_parent; 3357 } while (vw->vw_current == NULL && vp != NULL); 3358 3359 return (rval); 3360 } 3361 3362 /* 3363 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3364 * children are visited before their parent. We perform the postfix walk 3365 * iteratively (rather than recursively) to allow mdb to regain control 3366 * after each callback. 3367 */ 3368 int 3369 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3370 { 3371 vmem_walk_t *vw = wsp->walk_data; 3372 vmem_node_t *vp = vw->vw_current; 3373 int rval; 3374 3375 /* 3376 * If this node is marked, then we know that we have already visited 3377 * all of its children. If the node has any siblings, they need to 3378 * be visited next; otherwise, we need to visit the parent. Note 3379 * that vp->vn_marked will only be zero on the first invocation of 3380 * the step function. 3381 */ 3382 if (vp->vn_marked) { 3383 if (vp->vn_sibling != NULL) 3384 vp = vp->vn_sibling; 3385 else if (vp->vn_parent != NULL) 3386 vp = vp->vn_parent; 3387 else { 3388 /* 3389 * We have neither a parent, nor a sibling, and we 3390 * have already been visited; we're done. 3391 */ 3392 return (WALK_DONE); 3393 } 3394 } 3395 3396 /* 3397 * Before we visit this node, visit its children. 3398 */ 3399 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3400 vp = vp->vn_children; 3401 3402 vp->vn_marked = 1; 3403 vw->vw_current = vp; 3404 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3405 3406 return (rval); 3407 } 3408 3409 void 3410 vmem_walk_fini(mdb_walk_state_t *wsp) 3411 { 3412 vmem_walk_t *vw = wsp->walk_data; 3413 vmem_node_t *root = vw->vw_root; 3414 int done; 3415 3416 if (root == NULL) 3417 return; 3418 3419 if ((vw->vw_root = root->vn_children) != NULL) 3420 vmem_walk_fini(wsp); 3421 3422 vw->vw_root = root->vn_sibling; 3423 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3424 mdb_free(root, sizeof (vmem_node_t)); 3425 3426 if (done) { 3427 mdb_free(vw, sizeof (vmem_walk_t)); 3428 } else { 3429 vmem_walk_fini(wsp); 3430 } 3431 } 3432 3433 typedef struct vmem_seg_walk { 3434 uint8_t vsw_type; 3435 uintptr_t vsw_start; 3436 uintptr_t vsw_current; 3437 } vmem_seg_walk_t; 3438 3439 /*ARGSUSED*/ 3440 int 3441 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3442 { 3443 vmem_seg_walk_t *vsw; 3444 3445 if (wsp->walk_addr == NULL) { 3446 mdb_warn("vmem_%s does not support global walks\n", name); 3447 return (WALK_ERR); 3448 } 3449 3450 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3451 3452 vsw->vsw_type = type; 3453 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3454 vsw->vsw_current = vsw->vsw_start; 3455 3456 return (WALK_NEXT); 3457 } 3458 3459 /* 3460 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3461 */ 3462 #define VMEM_NONE 0 3463 3464 int 3465 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3466 { 3467 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3468 } 3469 3470 int 3471 vmem_free_walk_init(mdb_walk_state_t *wsp) 3472 { 3473 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3474 } 3475 3476 int 3477 vmem_span_walk_init(mdb_walk_state_t *wsp) 3478 { 3479 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3480 } 3481 3482 int 3483 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3484 { 3485 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3486 } 3487 3488 int 3489 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3490 { 3491 vmem_seg_t seg; 3492 vmem_seg_walk_t *vsw = wsp->walk_data; 3493 uintptr_t addr = vsw->vsw_current; 3494 static size_t seg_size = 0; 3495 int rval; 3496 3497 if (!seg_size) { 3498 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3499 mdb_warn("failed to read 'vmem_seg_size'"); 3500 seg_size = sizeof (vmem_seg_t); 3501 } 3502 } 3503 3504 if (seg_size < sizeof (seg)) 3505 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3506 3507 if (mdb_vread(&seg, seg_size, addr) == -1) { 3508 mdb_warn("couldn't read vmem_seg at %p", addr); 3509 return (WALK_ERR); 3510 } 3511 3512 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3513 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3514 rval = WALK_NEXT; 3515 } else { 3516 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3517 } 3518 3519 if (vsw->vsw_current == vsw->vsw_start) 3520 return (WALK_DONE); 3521 3522 return (rval); 3523 } 3524 3525 void 3526 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3527 { 3528 vmem_seg_walk_t *vsw = wsp->walk_data; 3529 3530 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3531 } 3532 3533 #define VMEM_NAMEWIDTH 22 3534 3535 int 3536 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3537 { 3538 vmem_t v, parent; 3539 vmem_kstat_t *vkp = &v.vm_kstat; 3540 uintptr_t paddr; 3541 int ident = 0; 3542 char c[VMEM_NAMEWIDTH]; 3543 3544 if (!(flags & DCMD_ADDRSPEC)) { 3545 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3546 mdb_warn("can't walk vmem"); 3547 return (DCMD_ERR); 3548 } 3549 return (DCMD_OK); 3550 } 3551 3552 if (DCMD_HDRSPEC(flags)) 3553 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3554 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3555 "TOTAL", "SUCCEED", "FAIL"); 3556 3557 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3558 mdb_warn("couldn't read vmem at %p", addr); 3559 return (DCMD_ERR); 3560 } 3561 3562 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3563 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3564 mdb_warn("couldn't trace %p's ancestry", addr); 3565 ident = 0; 3566 break; 3567 } 3568 paddr = (uintptr_t)parent.vm_source; 3569 } 3570 3571 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3572 3573 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3574 addr, VMEM_NAMEWIDTH, c, 3575 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3576 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3577 3578 return (DCMD_OK); 3579 } 3580 3581 void 3582 vmem_seg_help(void) 3583 { 3584 mdb_printf("%s", 3585 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3586 "\n" 3587 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3588 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3589 "information.\n"); 3590 mdb_dec_indent(2); 3591 mdb_printf("%<b>OPTIONS%</b>\n"); 3592 mdb_inc_indent(2); 3593 mdb_printf("%s", 3594 " -v Display the full content of the vmem_seg, including its stack trace\n" 3595 " -s report the size of the segment, instead of the end address\n" 3596 " -c caller\n" 3597 " filter out segments without the function/PC in their stack trace\n" 3598 " -e earliest\n" 3599 " filter out segments timestamped before earliest\n" 3600 " -l latest\n" 3601 " filter out segments timestamped after latest\n" 3602 " -m minsize\n" 3603 " filer out segments smaller than minsize\n" 3604 " -M maxsize\n" 3605 " filer out segments larger than maxsize\n" 3606 " -t thread\n" 3607 " filter out segments not involving thread\n" 3608 " -T type\n" 3609 " filter out segments not of type 'type'\n" 3610 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3611 } 3612 3613 /*ARGSUSED*/ 3614 int 3615 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3616 { 3617 vmem_seg_t vs; 3618 pc_t *stk = vs.vs_stack; 3619 uintptr_t sz; 3620 uint8_t t; 3621 const char *type = NULL; 3622 GElf_Sym sym; 3623 char c[MDB_SYM_NAMLEN]; 3624 int no_debug; 3625 int i; 3626 int depth; 3627 uintptr_t laddr, haddr; 3628 3629 uintptr_t caller = NULL, thread = NULL; 3630 uintptr_t minsize = 0, maxsize = 0; 3631 3632 hrtime_t earliest = 0, latest = 0; 3633 3634 uint_t size = 0; 3635 uint_t verbose = 0; 3636 3637 if (!(flags & DCMD_ADDRSPEC)) 3638 return (DCMD_USAGE); 3639 3640 if (mdb_getopts(argc, argv, 3641 'c', MDB_OPT_UINTPTR, &caller, 3642 'e', MDB_OPT_UINT64, &earliest, 3643 'l', MDB_OPT_UINT64, &latest, 3644 's', MDB_OPT_SETBITS, TRUE, &size, 3645 'm', MDB_OPT_UINTPTR, &minsize, 3646 'M', MDB_OPT_UINTPTR, &maxsize, 3647 't', MDB_OPT_UINTPTR, &thread, 3648 'T', MDB_OPT_STR, &type, 3649 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3650 NULL) != argc) 3651 return (DCMD_USAGE); 3652 3653 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3654 if (verbose) { 3655 mdb_printf("%16s %4s %16s %16s %16s\n" 3656 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3657 "ADDR", "TYPE", "START", "END", "SIZE", 3658 "", "", "THREAD", "TIMESTAMP", ""); 3659 } else { 3660 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3661 "START", size? "SIZE" : "END", "WHO"); 3662 } 3663 } 3664 3665 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3666 mdb_warn("couldn't read vmem_seg at %p", addr); 3667 return (DCMD_ERR); 3668 } 3669 3670 if (type != NULL) { 3671 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3672 t = VMEM_ALLOC; 3673 else if (strcmp(type, "FREE") == 0) 3674 t = VMEM_FREE; 3675 else if (strcmp(type, "SPAN") == 0) 3676 t = VMEM_SPAN; 3677 else if (strcmp(type, "ROTR") == 0 || 3678 strcmp(type, "ROTOR") == 0) 3679 t = VMEM_ROTOR; 3680 else if (strcmp(type, "WLKR") == 0 || 3681 strcmp(type, "WALKER") == 0) 3682 t = VMEM_WALKER; 3683 else { 3684 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3685 type); 3686 return (DCMD_ERR); 3687 } 3688 3689 if (vs.vs_type != t) 3690 return (DCMD_OK); 3691 } 3692 3693 sz = vs.vs_end - vs.vs_start; 3694 3695 if (minsize != 0 && sz < minsize) 3696 return (DCMD_OK); 3697 3698 if (maxsize != 0 && sz > maxsize) 3699 return (DCMD_OK); 3700 3701 t = vs.vs_type; 3702 depth = vs.vs_depth; 3703 3704 /* 3705 * debug info, when present, is only accurate for VMEM_ALLOC segments 3706 */ 3707 no_debug = (t != VMEM_ALLOC) || 3708 (depth == 0 || depth > VMEM_STACK_DEPTH); 3709 3710 if (no_debug) { 3711 if (caller != NULL || thread != NULL || earliest != 0 || 3712 latest != 0) 3713 return (DCMD_OK); /* not enough info */ 3714 } else { 3715 if (caller != NULL) { 3716 laddr = caller; 3717 haddr = caller + sizeof (caller); 3718 3719 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3720 sizeof (c), &sym) != -1 && 3721 caller == (uintptr_t)sym.st_value) { 3722 /* 3723 * We were provided an exact symbol value; any 3724 * address in the function is valid. 3725 */ 3726 laddr = (uintptr_t)sym.st_value; 3727 haddr = (uintptr_t)sym.st_value + sym.st_size; 3728 } 3729 3730 for (i = 0; i < depth; i++) 3731 if (vs.vs_stack[i] >= laddr && 3732 vs.vs_stack[i] < haddr) 3733 break; 3734 3735 if (i == depth) 3736 return (DCMD_OK); 3737 } 3738 3739 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3740 return (DCMD_OK); 3741 3742 if (earliest != 0 && vs.vs_timestamp < earliest) 3743 return (DCMD_OK); 3744 3745 if (latest != 0 && vs.vs_timestamp > latest) 3746 return (DCMD_OK); 3747 } 3748 3749 type = (t == VMEM_ALLOC ? "ALLC" : 3750 t == VMEM_FREE ? "FREE" : 3751 t == VMEM_SPAN ? "SPAN" : 3752 t == VMEM_ROTOR ? "ROTR" : 3753 t == VMEM_WALKER ? "WLKR" : 3754 "????"); 3755 3756 if (flags & DCMD_PIPE_OUT) { 3757 mdb_printf("%#lr\n", addr); 3758 return (DCMD_OK); 3759 } 3760 3761 if (verbose) { 3762 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3763 addr, type, vs.vs_start, vs.vs_end, sz); 3764 3765 if (no_debug) 3766 return (DCMD_OK); 3767 3768 mdb_printf("%16s %4s %16p %16llx\n", 3769 "", "", vs.vs_thread, vs.vs_timestamp); 3770 3771 mdb_inc_indent(17); 3772 for (i = 0; i < depth; i++) { 3773 mdb_printf("%a\n", stk[i]); 3774 } 3775 mdb_dec_indent(17); 3776 mdb_printf("\n"); 3777 } else { 3778 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3779 vs.vs_start, size? sz : vs.vs_end); 3780 3781 if (no_debug) { 3782 mdb_printf("\n"); 3783 return (DCMD_OK); 3784 } 3785 3786 for (i = 0; i < depth; i++) { 3787 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3788 c, sizeof (c), &sym) == -1) 3789 continue; 3790 if (strncmp(c, "vmem_", 5) == 0) 3791 continue; 3792 break; 3793 } 3794 mdb_printf(" %a\n", stk[i]); 3795 } 3796 return (DCMD_OK); 3797 } 3798 3799 typedef struct kmalog_data { 3800 uintptr_t kma_addr; 3801 hrtime_t kma_newest; 3802 } kmalog_data_t; 3803 3804 /*ARGSUSED*/ 3805 static int 3806 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3807 { 3808 char name[KMEM_CACHE_NAMELEN + 1]; 3809 hrtime_t delta; 3810 int i, depth; 3811 size_t bufsize; 3812 3813 if (bcp->bc_timestamp == 0) 3814 return (WALK_DONE); 3815 3816 if (kma->kma_newest == 0) 3817 kma->kma_newest = bcp->bc_timestamp; 3818 3819 if (kma->kma_addr) { 3820 if (mdb_vread(&bufsize, sizeof (bufsize), 3821 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3822 mdb_warn( 3823 "failed to read cache_bufsize for cache at %p", 3824 bcp->bc_cache); 3825 return (WALK_ERR); 3826 } 3827 3828 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3829 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3830 return (WALK_NEXT); 3831 } 3832 3833 delta = kma->kma_newest - bcp->bc_timestamp; 3834 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3835 3836 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3837 &bcp->bc_cache->cache_name) <= 0) 3838 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3839 3840 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3841 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3842 3843 for (i = 0; i < depth; i++) 3844 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3845 3846 return (WALK_NEXT); 3847 } 3848 3849 int 3850 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3851 { 3852 const char *logname = "kmem_transaction_log"; 3853 kmalog_data_t kma; 3854 3855 if (argc > 1) 3856 return (DCMD_USAGE); 3857 3858 kma.kma_newest = 0; 3859 if (flags & DCMD_ADDRSPEC) 3860 kma.kma_addr = addr; 3861 else 3862 kma.kma_addr = NULL; 3863 3864 if (argc > 0) { 3865 if (argv->a_type != MDB_TYPE_STRING) 3866 return (DCMD_USAGE); 3867 if (strcmp(argv->a_un.a_str, "fail") == 0) 3868 logname = "kmem_failure_log"; 3869 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3870 logname = "kmem_slab_log"; 3871 else 3872 return (DCMD_USAGE); 3873 } 3874 3875 if (mdb_readvar(&addr, logname) == -1) { 3876 mdb_warn("failed to read %s log header pointer"); 3877 return (DCMD_ERR); 3878 } 3879 3880 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3881 mdb_warn("failed to walk kmem log"); 3882 return (DCMD_ERR); 3883 } 3884 3885 return (DCMD_OK); 3886 } 3887 3888 /* 3889 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3890 * The first piece is a structure which we use to accumulate kmem_cache_t 3891 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3892 * walker; we either add all caches, or ones named explicitly as arguments. 3893 */ 3894 3895 typedef struct kmclist { 3896 const char *kmc_name; /* Name to match (or NULL) */ 3897 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3898 int kmc_nelems; /* Num entries in kmc_caches */ 3899 int kmc_size; /* Size of kmc_caches array */ 3900 } kmclist_t; 3901 3902 static int 3903 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3904 { 3905 void *p; 3906 int s; 3907 3908 if (kmc->kmc_name == NULL || 3909 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3910 /* 3911 * If we have a match, grow our array (if necessary), and then 3912 * add the virtual address of the matching cache to our list. 3913 */ 3914 if (kmc->kmc_nelems >= kmc->kmc_size) { 3915 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3916 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3917 3918 bcopy(kmc->kmc_caches, p, 3919 sizeof (uintptr_t) * kmc->kmc_size); 3920 3921 kmc->kmc_caches = p; 3922 kmc->kmc_size = s; 3923 } 3924 3925 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3926 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3927 } 3928 3929 return (WALK_NEXT); 3930 } 3931 3932 /* 3933 * The second piece of ::kmausers is a hash table of allocations. Each 3934 * allocation owner is identified by its stack trace and data_size. We then 3935 * track the total bytes of all such allocations, and the number of allocations 3936 * to report at the end. Once we have a list of caches, we walk through the 3937 * allocated bufctls of each, and update our hash table accordingly. 3938 */ 3939 3940 typedef struct kmowner { 3941 struct kmowner *kmo_head; /* First hash elt in bucket */ 3942 struct kmowner *kmo_next; /* Next hash elt in chain */ 3943 size_t kmo_signature; /* Hash table signature */ 3944 uint_t kmo_num; /* Number of allocations */ 3945 size_t kmo_data_size; /* Size of each allocation */ 3946 size_t kmo_total_size; /* Total bytes of allocation */ 3947 int kmo_depth; /* Depth of stack trace */ 3948 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3949 } kmowner_t; 3950 3951 typedef struct kmusers { 3952 uintptr_t kmu_addr; /* address of interest */ 3953 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3954 kmowner_t *kmu_hash; /* Hash table of owners */ 3955 int kmu_nelems; /* Number of entries in use */ 3956 int kmu_size; /* Total number of entries */ 3957 } kmusers_t; 3958 3959 static void 3960 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3961 size_t size, size_t data_size) 3962 { 3963 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3964 size_t bucket, signature = data_size; 3965 kmowner_t *kmo, *kmoend; 3966 3967 /* 3968 * If the hash table is full, double its size and rehash everything. 3969 */ 3970 if (kmu->kmu_nelems >= kmu->kmu_size) { 3971 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3972 3973 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3974 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 3975 kmu->kmu_hash = kmo; 3976 kmu->kmu_size = s; 3977 3978 kmoend = kmu->kmu_hash + kmu->kmu_size; 3979 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 3980 kmo->kmo_head = NULL; 3981 3982 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 3983 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 3984 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 3985 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 3986 kmu->kmu_hash[bucket].kmo_head = kmo; 3987 } 3988 } 3989 3990 /* 3991 * Finish computing the hash signature from the stack trace, and then 3992 * see if the owner is in the hash table. If so, update our stats. 3993 */ 3994 for (i = 0; i < depth; i++) 3995 signature += bcp->bc_stack[i]; 3996 3997 bucket = signature & (kmu->kmu_size - 1); 3998 3999 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4000 if (kmo->kmo_signature == signature) { 4001 size_t difference = 0; 4002 4003 difference |= kmo->kmo_data_size - data_size; 4004 difference |= kmo->kmo_depth - depth; 4005 4006 for (i = 0; i < depth; i++) { 4007 difference |= kmo->kmo_stack[i] - 4008 bcp->bc_stack[i]; 4009 } 4010 4011 if (difference == 0) { 4012 kmo->kmo_total_size += size; 4013 kmo->kmo_num++; 4014 return; 4015 } 4016 } 4017 } 4018 4019 /* 4020 * If the owner is not yet hashed, grab the next element and fill it 4021 * in based on the allocation information. 4022 */ 4023 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4024 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4025 kmu->kmu_hash[bucket].kmo_head = kmo; 4026 4027 kmo->kmo_signature = signature; 4028 kmo->kmo_num = 1; 4029 kmo->kmo_data_size = data_size; 4030 kmo->kmo_total_size = size; 4031 kmo->kmo_depth = depth; 4032 4033 for (i = 0; i < depth; i++) 4034 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4035 } 4036 4037 /* 4038 * When ::kmausers is invoked without the -f flag, we simply update our hash 4039 * table with the information from each allocated bufctl. 4040 */ 4041 /*ARGSUSED*/ 4042 static int 4043 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4044 { 4045 const kmem_cache_t *cp = kmu->kmu_cache; 4046 4047 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4048 return (WALK_NEXT); 4049 } 4050 4051 /* 4052 * When ::kmausers is invoked with the -f flag, we print out the information 4053 * for each bufctl as well as updating the hash table. 4054 */ 4055 static int 4056 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4057 { 4058 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4059 const kmem_cache_t *cp = kmu->kmu_cache; 4060 kmem_bufctl_t bufctl; 4061 4062 if (kmu->kmu_addr) { 4063 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4064 mdb_warn("couldn't read bufctl at %p", addr); 4065 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4066 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4067 cp->cache_bufsize) 4068 return (WALK_NEXT); 4069 } 4070 4071 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4072 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4073 4074 for (i = 0; i < depth; i++) 4075 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4076 4077 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4078 return (WALK_NEXT); 4079 } 4080 4081 /* 4082 * We sort our results by allocation size before printing them. 4083 */ 4084 static int 4085 kmownercmp(const void *lp, const void *rp) 4086 { 4087 const kmowner_t *lhs = lp; 4088 const kmowner_t *rhs = rp; 4089 4090 return (rhs->kmo_total_size - lhs->kmo_total_size); 4091 } 4092 4093 /* 4094 * The main engine of ::kmausers is relatively straightforward: First we 4095 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4096 * iterate over the allocated bufctls of each cache in the list. Finally, 4097 * we sort and print our results. 4098 */ 4099 /*ARGSUSED*/ 4100 int 4101 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4102 { 4103 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4104 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4105 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4106 int do_all_caches = 1; /* Do all caches (no arguments) */ 4107 int opt_e = FALSE; /* Include "small" users */ 4108 int opt_f = FALSE; /* Print stack traces */ 4109 4110 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4111 kmowner_t *kmo, *kmoend; 4112 int i, oelems; 4113 4114 kmclist_t kmc; 4115 kmusers_t kmu; 4116 4117 bzero(&kmc, sizeof (kmc)); 4118 bzero(&kmu, sizeof (kmu)); 4119 4120 while ((i = mdb_getopts(argc, argv, 4121 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4122 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4123 4124 argv += i; /* skip past options we just processed */ 4125 argc -= i; /* adjust argc */ 4126 4127 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4128 return (DCMD_USAGE); 4129 4130 oelems = kmc.kmc_nelems; 4131 kmc.kmc_name = argv->a_un.a_str; 4132 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4133 4134 if (kmc.kmc_nelems == oelems) { 4135 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4136 return (DCMD_ERR); 4137 } 4138 4139 do_all_caches = 0; 4140 argv++; 4141 argc--; 4142 } 4143 4144 if (flags & DCMD_ADDRSPEC) { 4145 opt_f = TRUE; 4146 kmu.kmu_addr = addr; 4147 } else { 4148 kmu.kmu_addr = NULL; 4149 } 4150 4151 if (opt_e) 4152 mem_threshold = cnt_threshold = 0; 4153 4154 if (opt_f) 4155 callback = (mdb_walk_cb_t)kmause2; 4156 4157 if (do_all_caches) { 4158 kmc.kmc_name = NULL; /* match all cache names */ 4159 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4160 } 4161 4162 for (i = 0; i < kmc.kmc_nelems; i++) { 4163 uintptr_t cp = kmc.kmc_caches[i]; 4164 kmem_cache_t c; 4165 4166 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4167 mdb_warn("failed to read cache at %p", cp); 4168 continue; 4169 } 4170 4171 if (!(c.cache_flags & KMF_AUDIT)) { 4172 if (!do_all_caches) { 4173 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4174 c.cache_name); 4175 } 4176 continue; 4177 } 4178 4179 kmu.kmu_cache = &c; 4180 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4181 audited_caches++; 4182 } 4183 4184 if (audited_caches == 0 && do_all_caches) { 4185 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4186 return (DCMD_ERR); 4187 } 4188 4189 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4190 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4191 4192 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4193 if (kmo->kmo_total_size < mem_threshold && 4194 kmo->kmo_num < cnt_threshold) 4195 continue; 4196 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4197 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4198 for (i = 0; i < kmo->kmo_depth; i++) 4199 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4200 } 4201 4202 return (DCMD_OK); 4203 } 4204 4205 void 4206 kmausers_help(void) 4207 { 4208 mdb_printf( 4209 "Displays the largest users of the kmem allocator, sorted by \n" 4210 "trace. If one or more caches is specified, only those caches\n" 4211 "will be searched. By default, all caches are searched. If an\n" 4212 "address is specified, then only those allocations which include\n" 4213 "the given address are displayed. Specifying an address implies\n" 4214 "-f.\n" 4215 "\n" 4216 "\t-e\tInclude all users, not just the largest\n" 4217 "\t-f\tDisplay individual allocations. By default, users are\n" 4218 "\t\tgrouped by stack\n"); 4219 } 4220 4221 static int 4222 kmem_ready_check(void) 4223 { 4224 int ready; 4225 4226 if (mdb_readvar(&ready, "kmem_ready") < 0) 4227 return (-1); /* errno is set for us */ 4228 4229 return (ready); 4230 } 4231 4232 void 4233 kmem_statechange(void) 4234 { 4235 static int been_ready = 0; 4236 4237 if (been_ready) 4238 return; 4239 4240 if (kmem_ready_check() <= 0) 4241 return; 4242 4243 been_ready = 1; 4244 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4245 } 4246 4247 void 4248 kmem_init(void) 4249 { 4250 mdb_walker_t w = { 4251 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4252 list_walk_step, list_walk_fini 4253 }; 4254 4255 /* 4256 * If kmem is ready, we'll need to invoke the kmem_cache walker 4257 * immediately. Walkers in the linkage structure won't be ready until 4258 * _mdb_init returns, so we'll need to add this one manually. If kmem 4259 * is ready, we'll use the walker to initialize the caches. If kmem 4260 * isn't ready, we'll register a callback that will allow us to defer 4261 * cache walking until it is. 4262 */ 4263 if (mdb_add_walker(&w) != 0) { 4264 mdb_warn("failed to add kmem_cache walker"); 4265 return; 4266 } 4267 4268 kmem_statechange(); 4269 4270 /* register our ::whatis handlers */ 4271 mdb_whatis_register("modules", whatis_run_modules, NULL, 4272 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4273 mdb_whatis_register("threads", whatis_run_threads, NULL, 4274 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4275 mdb_whatis_register("pages", whatis_run_pages, NULL, 4276 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4277 mdb_whatis_register("kmem", whatis_run_kmem, NULL, 4278 WHATIS_PRIO_ALLOCATOR, 0); 4279 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 4280 WHATIS_PRIO_ALLOCATOR, 0); 4281 } 4282 4283 typedef struct whatthread { 4284 uintptr_t wt_target; 4285 int wt_verbose; 4286 } whatthread_t; 4287 4288 static int 4289 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4290 { 4291 uintptr_t current, data; 4292 4293 if (t->t_stkbase == NULL) 4294 return (WALK_NEXT); 4295 4296 /* 4297 * Warn about swapped out threads, but drive on anyway 4298 */ 4299 if (!(t->t_schedflag & TS_LOAD)) { 4300 mdb_warn("thread %p's stack swapped out\n", addr); 4301 return (WALK_NEXT); 4302 } 4303 4304 /* 4305 * Search the thread's stack for the given pointer. Note that it would 4306 * be more efficient to follow ::kgrep's lead and read in page-sized 4307 * chunks, but this routine is already fast and simple. 4308 */ 4309 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4310 current += sizeof (uintptr_t)) { 4311 if (mdb_vread(&data, sizeof (data), current) == -1) { 4312 mdb_warn("couldn't read thread %p's stack at %p", 4313 addr, current); 4314 return (WALK_ERR); 4315 } 4316 4317 if (data == w->wt_target) { 4318 if (w->wt_verbose) { 4319 mdb_printf("%p in thread %p's stack%s\n", 4320 current, addr, stack_active(t, current)); 4321 } else { 4322 mdb_printf("%#lr\n", addr); 4323 return (WALK_NEXT); 4324 } 4325 } 4326 } 4327 4328 return (WALK_NEXT); 4329 } 4330 4331 int 4332 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4333 { 4334 whatthread_t w; 4335 4336 if (!(flags & DCMD_ADDRSPEC)) 4337 return (DCMD_USAGE); 4338 4339 w.wt_verbose = FALSE; 4340 w.wt_target = addr; 4341 4342 if (mdb_getopts(argc, argv, 4343 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4344 return (DCMD_USAGE); 4345 4346 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4347 == -1) { 4348 mdb_warn("couldn't walk threads"); 4349 return (DCMD_ERR); 4350 } 4351 4352 return (DCMD_OK); 4353 } 4354