1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ctf.h> 29 #include <mdb/mdb_whatis.h> 30 #include <sys/cpuvar.h> 31 #include <sys/kmem_impl.h> 32 #include <sys/vmem_impl.h> 33 #include <sys/machelf.h> 34 #include <sys/modctl.h> 35 #include <sys/kobj.h> 36 #include <sys/panic.h> 37 #include <sys/stack.h> 38 #include <sys/sysmacros.h> 39 #include <vm/page.h> 40 41 #include "avl.h" 42 #include "combined.h" 43 #include "dist.h" 44 #include "kmem.h" 45 #include "list.h" 46 47 #define dprintf(x) if (mdb_debug_level) { \ 48 mdb_printf("kmem debug: "); \ 49 /*CSTYLED*/\ 50 mdb_printf x ;\ 51 } 52 53 #define KM_ALLOCATED 0x01 54 #define KM_FREE 0x02 55 #define KM_BUFCTL 0x04 56 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 57 #define KM_HASH 0x10 58 59 static int mdb_debug_level = 0; 60 61 /*ARGSUSED*/ 62 static int 63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 64 { 65 mdb_walker_t w; 66 char descr[64]; 67 68 (void) mdb_snprintf(descr, sizeof (descr), 69 "walk the %s cache", c->cache_name); 70 71 w.walk_name = c->cache_name; 72 w.walk_descr = descr; 73 w.walk_init = kmem_walk_init; 74 w.walk_step = kmem_walk_step; 75 w.walk_fini = kmem_walk_fini; 76 w.walk_init_arg = (void *)addr; 77 78 if (mdb_add_walker(&w) == -1) 79 mdb_warn("failed to add %s walker", c->cache_name); 80 81 return (WALK_NEXT); 82 } 83 84 /*ARGSUSED*/ 85 int 86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 87 { 88 mdb_debug_level ^= 1; 89 90 mdb_printf("kmem: debugging is now %s\n", 91 mdb_debug_level ? "on" : "off"); 92 93 return (DCMD_OK); 94 } 95 96 int 97 kmem_cache_walk_init(mdb_walk_state_t *wsp) 98 { 99 GElf_Sym sym; 100 101 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 102 mdb_warn("couldn't find kmem_caches"); 103 return (WALK_ERR); 104 } 105 106 wsp->walk_addr = (uintptr_t)sym.st_value; 107 108 return (list_walk_init_named(wsp, "cache list", "cache")); 109 } 110 111 int 112 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 113 { 114 if (wsp->walk_addr == NULL) { 115 mdb_warn("kmem_cpu_cache doesn't support global walks"); 116 return (WALK_ERR); 117 } 118 119 if (mdb_layered_walk("cpu", wsp) == -1) { 120 mdb_warn("couldn't walk 'cpu'"); 121 return (WALK_ERR); 122 } 123 124 wsp->walk_data = (void *)wsp->walk_addr; 125 126 return (WALK_NEXT); 127 } 128 129 int 130 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 131 { 132 uintptr_t caddr = (uintptr_t)wsp->walk_data; 133 const cpu_t *cpu = wsp->walk_layer; 134 kmem_cpu_cache_t cc; 135 136 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]); 137 138 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 139 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 140 return (WALK_ERR); 141 } 142 143 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 144 } 145 146 static int 147 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 148 { 149 kmem_slab_t *sp = p; 150 uintptr_t caddr = (uintptr_t)arg; 151 if ((uintptr_t)sp->slab_cache != caddr) { 152 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 153 saddr, caddr, sp->slab_cache); 154 return (-1); 155 } 156 157 return (0); 158 } 159 160 static int 161 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 162 { 163 kmem_slab_t *sp = p; 164 165 int rc = kmem_slab_check(p, saddr, arg); 166 if (rc != 0) { 167 return (rc); 168 } 169 170 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 171 mdb_warn("slab %p is not a partial slab\n", saddr); 172 return (-1); 173 } 174 175 return (0); 176 } 177 178 static int 179 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 180 { 181 kmem_slab_t *sp = p; 182 183 int rc = kmem_slab_check(p, saddr, arg); 184 if (rc != 0) { 185 return (rc); 186 } 187 188 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 189 mdb_warn("slab %p is not completely allocated\n", saddr); 190 return (-1); 191 } 192 193 return (0); 194 } 195 196 typedef struct { 197 uintptr_t kns_cache_addr; 198 int kns_nslabs; 199 } kmem_nth_slab_t; 200 201 static int 202 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 203 { 204 kmem_nth_slab_t *chkp = arg; 205 206 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 207 if (rc != 0) { 208 return (rc); 209 } 210 211 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 212 } 213 214 static int 215 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 216 { 217 uintptr_t caddr = wsp->walk_addr; 218 219 wsp->walk_addr = (uintptr_t)(caddr + 220 offsetof(kmem_cache_t, cache_complete_slabs)); 221 222 return (list_walk_init_checked(wsp, "slab list", "slab", 223 kmem_complete_slab_check, (void *)caddr)); 224 } 225 226 static int 227 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 228 { 229 uintptr_t caddr = wsp->walk_addr; 230 231 wsp->walk_addr = (uintptr_t)(caddr + 232 offsetof(kmem_cache_t, cache_partial_slabs)); 233 234 return (avl_walk_init_checked(wsp, "slab list", "slab", 235 kmem_partial_slab_check, (void *)caddr)); 236 } 237 238 int 239 kmem_slab_walk_init(mdb_walk_state_t *wsp) 240 { 241 uintptr_t caddr = wsp->walk_addr; 242 243 if (caddr == NULL) { 244 mdb_warn("kmem_slab doesn't support global walks\n"); 245 return (WALK_ERR); 246 } 247 248 combined_walk_init(wsp); 249 combined_walk_add(wsp, 250 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 251 combined_walk_add(wsp, 252 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 253 254 return (WALK_NEXT); 255 } 256 257 static int 258 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 259 { 260 uintptr_t caddr = wsp->walk_addr; 261 kmem_nth_slab_t *chk; 262 263 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 264 UM_SLEEP | UM_GC); 265 chk->kns_cache_addr = caddr; 266 chk->kns_nslabs = 1; 267 wsp->walk_addr = (uintptr_t)(caddr + 268 offsetof(kmem_cache_t, cache_complete_slabs)); 269 270 return (list_walk_init_checked(wsp, "slab list", "slab", 271 kmem_nth_slab_check, chk)); 272 } 273 274 int 275 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 276 { 277 uintptr_t caddr = wsp->walk_addr; 278 kmem_cache_t c; 279 280 if (caddr == NULL) { 281 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 282 return (WALK_ERR); 283 } 284 285 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 286 mdb_warn("couldn't read kmem_cache at %p", caddr); 287 return (WALK_ERR); 288 } 289 290 combined_walk_init(wsp); 291 292 /* 293 * Some consumers (umem_walk_step(), in particular) require at 294 * least one callback if there are any buffers in the cache. So 295 * if there are *no* partial slabs, report the first full slab, if 296 * any. 297 * 298 * Yes, this is ugly, but it's cleaner than the other possibilities. 299 */ 300 if (c.cache_partial_slabs.avl_numnodes == 0) { 301 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 302 list_walk_step, list_walk_fini); 303 } else { 304 combined_walk_add(wsp, kmem_partial_slab_walk_init, 305 avl_walk_step, avl_walk_fini); 306 } 307 308 return (WALK_NEXT); 309 } 310 311 int 312 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 313 { 314 kmem_cache_t c; 315 const char *filter = NULL; 316 317 if (mdb_getopts(ac, argv, 318 'n', MDB_OPT_STR, &filter, 319 NULL) != ac) { 320 return (DCMD_USAGE); 321 } 322 323 if (!(flags & DCMD_ADDRSPEC)) { 324 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 325 mdb_warn("can't walk kmem_cache"); 326 return (DCMD_ERR); 327 } 328 return (DCMD_OK); 329 } 330 331 if (DCMD_HDRSPEC(flags)) 332 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 333 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 334 335 if (mdb_vread(&c, sizeof (c), addr) == -1) { 336 mdb_warn("couldn't read kmem_cache at %p", addr); 337 return (DCMD_ERR); 338 } 339 340 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 341 return (DCMD_OK); 342 343 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 344 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 345 346 return (DCMD_OK); 347 } 348 349 void 350 kmem_cache_help(void) 351 { 352 mdb_printf("%s", "Print kernel memory caches.\n\n"); 353 mdb_dec_indent(2); 354 mdb_printf("%<b>OPTIONS%</b>\n"); 355 mdb_inc_indent(2); 356 mdb_printf("%s", 357 " -n name\n" 358 " name of kmem cache (or matching partial name)\n" 359 "\n" 360 "Column\tDescription\n" 361 "\n" 362 "ADDR\t\taddress of kmem cache\n" 363 "NAME\t\tname of kmem cache\n" 364 "FLAG\t\tvarious cache state flags\n" 365 "CFLAG\t\tcache creation flags\n" 366 "BUFSIZE\tobject size in bytes\n" 367 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 368 } 369 370 #define LABEL_WIDTH 11 371 static void 372 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 373 size_t maxbuckets, size_t minbucketsize) 374 { 375 uint64_t total; 376 int buckets; 377 int i; 378 const int *distarray; 379 int complete[2]; 380 381 buckets = buffers_per_slab; 382 383 total = 0; 384 for (i = 0; i <= buffers_per_slab; i++) 385 total += ks_bucket[i]; 386 387 if (maxbuckets > 1) 388 buckets = MIN(buckets, maxbuckets); 389 390 if (minbucketsize > 1) { 391 /* 392 * minbucketsize does not apply to the first bucket reserved 393 * for completely allocated slabs 394 */ 395 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 396 minbucketsize)); 397 if ((buckets < 2) && (buffers_per_slab > 1)) { 398 buckets = 2; 399 minbucketsize = (buffers_per_slab - 1); 400 } 401 } 402 403 /* 404 * The first printed bucket is reserved for completely allocated slabs. 405 * Passing (buckets - 1) excludes that bucket from the generated 406 * distribution, since we're handling it as a special case. 407 */ 408 complete[0] = buffers_per_slab; 409 complete[1] = buffers_per_slab + 1; 410 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 411 412 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 413 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 414 415 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 416 /* 417 * Print bucket ranges in descending order after the first bucket for 418 * completely allocated slabs, so a person can see immediately whether 419 * or not there is fragmentation without having to scan possibly 420 * multiple screens of output. Starting at (buckets - 2) excludes the 421 * extra terminating bucket. 422 */ 423 for (i = buckets - 2; i >= 0; i--) { 424 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 425 } 426 mdb_printf("\n"); 427 } 428 #undef LABEL_WIDTH 429 430 /*ARGSUSED*/ 431 static int 432 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 433 { 434 *is_slab = B_TRUE; 435 return (WALK_DONE); 436 } 437 438 /*ARGSUSED*/ 439 static int 440 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 441 boolean_t *is_slab) 442 { 443 /* 444 * The "kmem_partial_slab" walker reports the first full slab if there 445 * are no partial slabs (for the sake of consumers that require at least 446 * one callback if there are any buffers in the cache). 447 */ 448 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 449 return (WALK_DONE); 450 } 451 452 typedef struct kmem_slab_usage { 453 int ksu_refcnt; /* count of allocated buffers on slab */ 454 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 455 } kmem_slab_usage_t; 456 457 typedef struct kmem_slab_stats { 458 const kmem_cache_t *ks_cp; 459 int ks_slabs; /* slabs in cache */ 460 int ks_partial_slabs; /* partially allocated slabs in cache */ 461 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 462 int ks_max_buffers_per_slab; /* max buffers per slab */ 463 int ks_usage_len; /* ks_usage array length */ 464 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 465 uint_t *ks_bucket; /* slab usage distribution */ 466 } kmem_slab_stats_t; 467 468 /*ARGSUSED*/ 469 static int 470 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 471 kmem_slab_stats_t *ks) 472 { 473 kmem_slab_usage_t *ksu; 474 long unused; 475 476 ks->ks_slabs++; 477 ks->ks_bucket[sp->slab_refcnt]++; 478 479 unused = (sp->slab_chunks - sp->slab_refcnt); 480 if (unused == 0) { 481 return (WALK_NEXT); 482 } 483 484 ks->ks_partial_slabs++; 485 ks->ks_unused_buffers += unused; 486 487 if (ks->ks_partial_slabs > ks->ks_usage_len) { 488 kmem_slab_usage_t *usage; 489 int len = ks->ks_usage_len; 490 491 len = (len == 0 ? 16 : len * 2); 492 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 493 if (ks->ks_usage != NULL) { 494 bcopy(ks->ks_usage, usage, 495 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 496 mdb_free(ks->ks_usage, 497 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 498 } 499 ks->ks_usage = usage; 500 ks->ks_usage_len = len; 501 } 502 503 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 504 ksu->ksu_refcnt = sp->slab_refcnt; 505 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 506 return (WALK_NEXT); 507 } 508 509 static void 510 kmem_slabs_header() 511 { 512 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 513 "", "", "Partial", "", "Unused", ""); 514 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 515 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 517 "-------------------------", "--------", "--------", "---------", 518 "---------", "------"); 519 } 520 521 int 522 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 523 { 524 kmem_cache_t c; 525 kmem_slab_stats_t stats; 526 mdb_walk_cb_t cb; 527 int pct; 528 int tenths_pct; 529 size_t maxbuckets = 1; 530 size_t minbucketsize = 0; 531 const char *filter = NULL; 532 const char *name = NULL; 533 uint_t opt_v = FALSE; 534 boolean_t buckets = B_FALSE; 535 boolean_t skip = B_FALSE; 536 537 if (mdb_getopts(argc, argv, 538 'B', MDB_OPT_UINTPTR, &minbucketsize, 539 'b', MDB_OPT_UINTPTR, &maxbuckets, 540 'n', MDB_OPT_STR, &filter, 541 'N', MDB_OPT_STR, &name, 542 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 543 NULL) != argc) { 544 return (DCMD_USAGE); 545 } 546 547 if ((maxbuckets != 1) || (minbucketsize != 0)) { 548 buckets = B_TRUE; 549 } 550 551 if (!(flags & DCMD_ADDRSPEC)) { 552 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 553 argv) == -1) { 554 mdb_warn("can't walk kmem_cache"); 555 return (DCMD_ERR); 556 } 557 return (DCMD_OK); 558 } 559 560 if (mdb_vread(&c, sizeof (c), addr) == -1) { 561 mdb_warn("couldn't read kmem_cache at %p", addr); 562 return (DCMD_ERR); 563 } 564 565 if (name == NULL) { 566 skip = ((filter != NULL) && 567 (strstr(c.cache_name, filter) == NULL)); 568 } else if (filter == NULL) { 569 skip = (strcmp(c.cache_name, name) != 0); 570 } else { 571 /* match either -n or -N */ 572 skip = ((strcmp(c.cache_name, name) != 0) && 573 (strstr(c.cache_name, filter) == NULL)); 574 } 575 576 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 577 kmem_slabs_header(); 578 } else if ((opt_v || buckets) && !skip) { 579 if (DCMD_HDRSPEC(flags)) { 580 kmem_slabs_header(); 581 } else { 582 boolean_t is_slab = B_FALSE; 583 const char *walker_name; 584 if (opt_v) { 585 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 586 walker_name = "kmem_slab_partial"; 587 } else { 588 cb = (mdb_walk_cb_t)kmem_first_slab; 589 walker_name = "kmem_slab"; 590 } 591 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 592 if (is_slab) { 593 kmem_slabs_header(); 594 } 595 } 596 } 597 598 if (skip) { 599 return (DCMD_OK); 600 } 601 602 bzero(&stats, sizeof (kmem_slab_stats_t)); 603 stats.ks_cp = &c; 604 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 605 /* +1 to include a zero bucket */ 606 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 607 sizeof (*stats.ks_bucket), UM_SLEEP); 608 cb = (mdb_walk_cb_t)kmem_slablist_stat; 609 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 610 611 if (c.cache_buftotal == 0) { 612 pct = 0; 613 tenths_pct = 0; 614 } else { 615 uint64_t n = stats.ks_unused_buffers * 10000; 616 pct = (int)(n / c.cache_buftotal); 617 tenths_pct = pct - ((pct / 100) * 100); 618 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 619 if (tenths_pct == 10) { 620 pct += 100; 621 tenths_pct = 0; 622 } 623 } 624 625 pct /= 100; 626 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 627 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 628 stats.ks_unused_buffers, pct, tenths_pct); 629 630 if (maxbuckets == 0) { 631 maxbuckets = stats.ks_max_buffers_per_slab; 632 } 633 634 if (((maxbuckets > 1) || (minbucketsize > 0)) && 635 (stats.ks_slabs > 0)) { 636 mdb_printf("\n"); 637 kmem_slabs_print_dist(stats.ks_bucket, 638 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 639 } 640 641 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 642 sizeof (*stats.ks_bucket)); 643 644 if (!opt_v) { 645 return (DCMD_OK); 646 } 647 648 if (opt_v && (stats.ks_partial_slabs > 0)) { 649 int i; 650 kmem_slab_usage_t *ksu; 651 652 mdb_printf(" %d complete (%d), %d partial:", 653 (stats.ks_slabs - stats.ks_partial_slabs), 654 stats.ks_max_buffers_per_slab, 655 stats.ks_partial_slabs); 656 657 for (i = 0; i < stats.ks_partial_slabs; i++) { 658 ksu = &stats.ks_usage[i]; 659 mdb_printf(" %d%s", ksu->ksu_refcnt, 660 (ksu->ksu_nomove ? "*" : "")); 661 } 662 mdb_printf("\n\n"); 663 } 664 665 if (stats.ks_usage_len > 0) { 666 mdb_free(stats.ks_usage, 667 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 668 } 669 670 return (DCMD_OK); 671 } 672 673 void 674 kmem_slabs_help(void) 675 { 676 mdb_printf("%s", 677 "Display slab usage per kmem cache.\n\n"); 678 mdb_dec_indent(2); 679 mdb_printf("%<b>OPTIONS%</b>\n"); 680 mdb_inc_indent(2); 681 mdb_printf("%s", 682 " -n name\n" 683 " name of kmem cache (or matching partial name)\n" 684 " -N name\n" 685 " exact name of kmem cache\n" 686 " -b maxbins\n" 687 " Print a distribution of allocated buffers per slab using at\n" 688 " most maxbins bins. The first bin is reserved for completely\n" 689 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 690 " effect as specifying the maximum allocated buffers per slab\n" 691 " or setting minbinsize to 1 (-B 1).\n" 692 " -B minbinsize\n" 693 " Print a distribution of allocated buffers per slab, making\n" 694 " all bins (except the first, reserved for completely allocated\n" 695 " slabs) at least minbinsize buffers apart.\n" 696 " -v verbose output: List the allocated buffer count of each partial\n" 697 " slab on the free list in order from front to back to show how\n" 698 " closely the slabs are ordered by usage. For example\n" 699 "\n" 700 " 10 complete, 3 partial (8): 7 3 1\n" 701 "\n" 702 " means there are thirteen slabs with eight buffers each, including\n" 703 " three partially allocated slabs with less than all eight buffers\n" 704 " allocated.\n" 705 "\n" 706 " Buffer allocations are always from the front of the partial slab\n" 707 " list. When a buffer is freed from a completely used slab, that\n" 708 " slab is added to the front of the partial slab list. Assuming\n" 709 " that all buffers are equally likely to be freed soon, the\n" 710 " desired order of partial slabs is most-used at the front of the\n" 711 " list and least-used at the back (as in the example above).\n" 712 " However, if a slab contains an allocated buffer that will not\n" 713 " soon be freed, it would be better for that slab to be at the\n" 714 " front where all of its buffers can be allocated. Taking a slab\n" 715 " off the partial slab list (either with all buffers freed or all\n" 716 " buffers allocated) reduces cache fragmentation.\n" 717 "\n" 718 " A slab's allocated buffer count representing a partial slab (9 in\n" 719 " the example below) may be marked as follows:\n" 720 "\n" 721 " 9* An asterisk indicates that kmem has marked the slab non-\n" 722 " reclaimable because the kmem client refused to move one of the\n" 723 " slab's buffers. Since kmem does not expect to completely free the\n" 724 " slab, it moves it to the front of the list in the hope of\n" 725 " completely allocating it instead. A slab marked with an asterisk\n" 726 " stays marked for as long as it remains on the partial slab list.\n" 727 "\n" 728 "Column\t\tDescription\n" 729 "\n" 730 "Cache Name\t\tname of kmem cache\n" 731 "Slabs\t\t\ttotal slab count\n" 732 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 733 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 734 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 735 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 736 "\t\t\t for accounting structures (debug mode), slab\n" 737 "\t\t\t coloring (incremental small offsets to stagger\n" 738 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 739 } 740 741 static int 742 addrcmp(const void *lhs, const void *rhs) 743 { 744 uintptr_t p1 = *((uintptr_t *)lhs); 745 uintptr_t p2 = *((uintptr_t *)rhs); 746 747 if (p1 < p2) 748 return (-1); 749 if (p1 > p2) 750 return (1); 751 return (0); 752 } 753 754 static int 755 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 756 { 757 const kmem_bufctl_audit_t *bcp1 = *lhs; 758 const kmem_bufctl_audit_t *bcp2 = *rhs; 759 760 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 761 return (-1); 762 763 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 764 return (1); 765 766 return (0); 767 } 768 769 typedef struct kmem_hash_walk { 770 uintptr_t *kmhw_table; 771 size_t kmhw_nelems; 772 size_t kmhw_pos; 773 kmem_bufctl_t kmhw_cur; 774 } kmem_hash_walk_t; 775 776 int 777 kmem_hash_walk_init(mdb_walk_state_t *wsp) 778 { 779 kmem_hash_walk_t *kmhw; 780 uintptr_t *hash; 781 kmem_cache_t c; 782 uintptr_t haddr, addr = wsp->walk_addr; 783 size_t nelems; 784 size_t hsize; 785 786 if (addr == NULL) { 787 mdb_warn("kmem_hash doesn't support global walks\n"); 788 return (WALK_ERR); 789 } 790 791 if (mdb_vread(&c, sizeof (c), addr) == -1) { 792 mdb_warn("couldn't read cache at addr %p", addr); 793 return (WALK_ERR); 794 } 795 796 if (!(c.cache_flags & KMF_HASH)) { 797 mdb_warn("cache %p doesn't have a hash table\n", addr); 798 return (WALK_DONE); /* nothing to do */ 799 } 800 801 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 802 kmhw->kmhw_cur.bc_next = NULL; 803 kmhw->kmhw_pos = 0; 804 805 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 806 hsize = nelems * sizeof (uintptr_t); 807 haddr = (uintptr_t)c.cache_hash_table; 808 809 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 810 if (mdb_vread(hash, hsize, haddr) == -1) { 811 mdb_warn("failed to read hash table at %p", haddr); 812 mdb_free(hash, hsize); 813 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 814 return (WALK_ERR); 815 } 816 817 wsp->walk_data = kmhw; 818 819 return (WALK_NEXT); 820 } 821 822 int 823 kmem_hash_walk_step(mdb_walk_state_t *wsp) 824 { 825 kmem_hash_walk_t *kmhw = wsp->walk_data; 826 uintptr_t addr = NULL; 827 828 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 829 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 830 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 831 break; 832 } 833 } 834 if (addr == NULL) 835 return (WALK_DONE); 836 837 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 838 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 839 return (WALK_ERR); 840 } 841 842 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 843 } 844 845 void 846 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 847 { 848 kmem_hash_walk_t *kmhw = wsp->walk_data; 849 850 if (kmhw == NULL) 851 return; 852 853 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 854 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 855 } 856 857 /* 858 * Find the address of the bufctl structure for the address 'buf' in cache 859 * 'cp', which is at address caddr, and place it in *out. 860 */ 861 static int 862 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 863 { 864 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 865 kmem_bufctl_t *bcp; 866 kmem_bufctl_t bc; 867 868 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 869 mdb_warn("unable to read hash bucket for %p in cache %p", 870 buf, caddr); 871 return (-1); 872 } 873 874 while (bcp != NULL) { 875 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 876 (uintptr_t)bcp) == -1) { 877 mdb_warn("unable to read bufctl at %p", bcp); 878 return (-1); 879 } 880 if (bc.bc_addr == buf) { 881 *out = (uintptr_t)bcp; 882 return (0); 883 } 884 bcp = bc.bc_next; 885 } 886 887 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 888 return (-1); 889 } 890 891 int 892 kmem_get_magsize(const kmem_cache_t *cp) 893 { 894 uintptr_t addr = (uintptr_t)cp->cache_magtype; 895 GElf_Sym mt_sym; 896 kmem_magtype_t mt; 897 int res; 898 899 /* 900 * if cpu 0 has a non-zero magsize, it must be correct. caches 901 * with KMF_NOMAGAZINE have disabled their magazine layers, so 902 * it is okay to return 0 for them. 903 */ 904 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 905 (cp->cache_flags & KMF_NOMAGAZINE)) 906 return (res); 907 908 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 909 mdb_warn("unable to read 'kmem_magtype'"); 910 } else if (addr < mt_sym.st_value || 911 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 912 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 913 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 914 cp->cache_name, addr); 915 return (0); 916 } 917 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 918 mdb_warn("unable to read magtype at %a", addr); 919 return (0); 920 } 921 return (mt.mt_magsize); 922 } 923 924 /*ARGSUSED*/ 925 static int 926 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 927 { 928 *est -= (sp->slab_chunks - sp->slab_refcnt); 929 930 return (WALK_NEXT); 931 } 932 933 /* 934 * Returns an upper bound on the number of allocated buffers in a given 935 * cache. 936 */ 937 size_t 938 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 939 { 940 int magsize; 941 size_t cache_est; 942 943 cache_est = cp->cache_buftotal; 944 945 (void) mdb_pwalk("kmem_slab_partial", 946 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 947 948 if ((magsize = kmem_get_magsize(cp)) != 0) { 949 size_t mag_est = cp->cache_full.ml_total * magsize; 950 951 if (cache_est >= mag_est) { 952 cache_est -= mag_est; 953 } else { 954 mdb_warn("cache %p's magazine layer holds more buffers " 955 "than the slab layer.\n", addr); 956 } 957 } 958 return (cache_est); 959 } 960 961 #define READMAG_ROUNDS(rounds) { \ 962 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 963 mdb_warn("couldn't read magazine at %p", kmp); \ 964 goto fail; \ 965 } \ 966 for (i = 0; i < rounds; i++) { \ 967 maglist[magcnt++] = mp->mag_round[i]; \ 968 if (magcnt == magmax) { \ 969 mdb_warn("%d magazines exceeds fudge factor\n", \ 970 magcnt); \ 971 goto fail; \ 972 } \ 973 } \ 974 } 975 976 int 977 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 978 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 979 { 980 kmem_magazine_t *kmp, *mp; 981 void **maglist = NULL; 982 int i, cpu; 983 size_t magsize, magmax, magbsize; 984 size_t magcnt = 0; 985 986 /* 987 * Read the magtype out of the cache, after verifying the pointer's 988 * correctness. 989 */ 990 magsize = kmem_get_magsize(cp); 991 if (magsize == 0) { 992 *maglistp = NULL; 993 *magcntp = 0; 994 *magmaxp = 0; 995 return (WALK_NEXT); 996 } 997 998 /* 999 * There are several places where we need to go buffer hunting: 1000 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1001 * and the full magazine list in the depot. 1002 * 1003 * For an upper bound on the number of buffers in the magazine 1004 * layer, we have the number of magazines on the cache_full 1005 * list plus at most two magazines per CPU (the loaded and the 1006 * spare). Toss in 100 magazines as a fudge factor in case this 1007 * is live (the number "100" comes from the same fudge factor in 1008 * crash(1M)). 1009 */ 1010 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1011 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1012 1013 if (magbsize >= PAGESIZE / 2) { 1014 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1015 addr, magbsize); 1016 return (WALK_ERR); 1017 } 1018 1019 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1020 mp = mdb_alloc(magbsize, alloc_flags); 1021 if (mp == NULL || maglist == NULL) 1022 goto fail; 1023 1024 /* 1025 * First up: the magazines in the depot (i.e. on the cache_full list). 1026 */ 1027 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1028 READMAG_ROUNDS(magsize); 1029 kmp = mp->mag_next; 1030 1031 if (kmp == cp->cache_full.ml_list) 1032 break; /* cache_full list loop detected */ 1033 } 1034 1035 dprintf(("cache_full list done\n")); 1036 1037 /* 1038 * Now whip through the CPUs, snagging the loaded magazines 1039 * and full spares. 1040 * 1041 * In order to prevent inconsistent dumps, rounds and prounds 1042 * are copied aside before dumping begins. 1043 */ 1044 for (cpu = 0; cpu < ncpus; cpu++) { 1045 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1046 short rounds, prounds; 1047 1048 if (KMEM_DUMPCC(ccp)) { 1049 rounds = ccp->cc_dump_rounds; 1050 prounds = ccp->cc_dump_prounds; 1051 } else { 1052 rounds = ccp->cc_rounds; 1053 prounds = ccp->cc_prounds; 1054 } 1055 1056 dprintf(("reading cpu cache %p\n", 1057 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1058 1059 if (rounds > 0 && 1060 (kmp = ccp->cc_loaded) != NULL) { 1061 dprintf(("reading %d loaded rounds\n", rounds)); 1062 READMAG_ROUNDS(rounds); 1063 } 1064 1065 if (prounds > 0 && 1066 (kmp = ccp->cc_ploaded) != NULL) { 1067 dprintf(("reading %d previously loaded rounds\n", 1068 prounds)); 1069 READMAG_ROUNDS(prounds); 1070 } 1071 } 1072 1073 dprintf(("magazine layer: %d buffers\n", magcnt)); 1074 1075 if (!(alloc_flags & UM_GC)) 1076 mdb_free(mp, magbsize); 1077 1078 *maglistp = maglist; 1079 *magcntp = magcnt; 1080 *magmaxp = magmax; 1081 1082 return (WALK_NEXT); 1083 1084 fail: 1085 if (!(alloc_flags & UM_GC)) { 1086 if (mp) 1087 mdb_free(mp, magbsize); 1088 if (maglist) 1089 mdb_free(maglist, magmax * sizeof (void *)); 1090 } 1091 return (WALK_ERR); 1092 } 1093 1094 static int 1095 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1096 { 1097 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1098 } 1099 1100 static int 1101 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1102 { 1103 kmem_bufctl_audit_t b; 1104 1105 /* 1106 * if KMF_AUDIT is not set, we know that we're looking at a 1107 * kmem_bufctl_t. 1108 */ 1109 if (!(cp->cache_flags & KMF_AUDIT) || 1110 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1111 (void) memset(&b, 0, sizeof (b)); 1112 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1113 mdb_warn("unable to read bufctl at %p", buf); 1114 return (WALK_ERR); 1115 } 1116 } 1117 1118 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1119 } 1120 1121 typedef struct kmem_walk { 1122 int kmw_type; 1123 1124 int kmw_addr; /* cache address */ 1125 kmem_cache_t *kmw_cp; 1126 size_t kmw_csize; 1127 1128 /* 1129 * magazine layer 1130 */ 1131 void **kmw_maglist; 1132 size_t kmw_max; 1133 size_t kmw_count; 1134 size_t kmw_pos; 1135 1136 /* 1137 * slab layer 1138 */ 1139 char *kmw_valid; /* to keep track of freed buffers */ 1140 char *kmw_ubase; /* buffer for slab data */ 1141 } kmem_walk_t; 1142 1143 static int 1144 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1145 { 1146 kmem_walk_t *kmw; 1147 int ncpus, csize; 1148 kmem_cache_t *cp; 1149 size_t vm_quantum; 1150 1151 size_t magmax, magcnt; 1152 void **maglist = NULL; 1153 uint_t chunksize, slabsize; 1154 int status = WALK_ERR; 1155 uintptr_t addr = wsp->walk_addr; 1156 const char *layered; 1157 1158 type &= ~KM_HASH; 1159 1160 if (addr == NULL) { 1161 mdb_warn("kmem walk doesn't support global walks\n"); 1162 return (WALK_ERR); 1163 } 1164 1165 dprintf(("walking %p\n", addr)); 1166 1167 /* 1168 * First we need to figure out how many CPUs are configured in the 1169 * system to know how much to slurp out. 1170 */ 1171 mdb_readvar(&ncpus, "max_ncpus"); 1172 1173 csize = KMEM_CACHE_SIZE(ncpus); 1174 cp = mdb_alloc(csize, UM_SLEEP); 1175 1176 if (mdb_vread(cp, csize, addr) == -1) { 1177 mdb_warn("couldn't read cache at addr %p", addr); 1178 goto out2; 1179 } 1180 1181 /* 1182 * It's easy for someone to hand us an invalid cache address. 1183 * Unfortunately, it is hard for this walker to survive an 1184 * invalid cache cleanly. So we make sure that: 1185 * 1186 * 1. the vmem arena for the cache is readable, 1187 * 2. the vmem arena's quantum is a power of 2, 1188 * 3. our slabsize is a multiple of the quantum, and 1189 * 4. our chunksize is >0 and less than our slabsize. 1190 */ 1191 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1192 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1193 vm_quantum == 0 || 1194 (vm_quantum & (vm_quantum - 1)) != 0 || 1195 cp->cache_slabsize < vm_quantum || 1196 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1197 cp->cache_chunksize == 0 || 1198 cp->cache_chunksize > cp->cache_slabsize) { 1199 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1200 goto out2; 1201 } 1202 1203 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1204 1205 if (cp->cache_buftotal == 0) { 1206 mdb_free(cp, csize); 1207 return (WALK_DONE); 1208 } 1209 1210 /* 1211 * If they ask for bufctls, but it's a small-slab cache, 1212 * there is nothing to report. 1213 */ 1214 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1215 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1216 cp->cache_flags)); 1217 mdb_free(cp, csize); 1218 return (WALK_DONE); 1219 } 1220 1221 /* 1222 * If they want constructed buffers, but there's no constructor or 1223 * the cache has DEADBEEF checking enabled, there is nothing to report. 1224 */ 1225 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1226 cp->cache_constructor == NULL || 1227 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1228 mdb_free(cp, csize); 1229 return (WALK_DONE); 1230 } 1231 1232 /* 1233 * Read in the contents of the magazine layer 1234 */ 1235 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1236 &magmax, UM_SLEEP) == WALK_ERR) 1237 goto out2; 1238 1239 /* 1240 * We have all of the buffers from the magazines; if we are walking 1241 * allocated buffers, sort them so we can bsearch them later. 1242 */ 1243 if (type & KM_ALLOCATED) 1244 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1245 1246 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1247 1248 kmw->kmw_type = type; 1249 kmw->kmw_addr = addr; 1250 kmw->kmw_cp = cp; 1251 kmw->kmw_csize = csize; 1252 kmw->kmw_maglist = maglist; 1253 kmw->kmw_max = magmax; 1254 kmw->kmw_count = magcnt; 1255 kmw->kmw_pos = 0; 1256 1257 /* 1258 * When walking allocated buffers in a KMF_HASH cache, we walk the 1259 * hash table instead of the slab layer. 1260 */ 1261 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1262 layered = "kmem_hash"; 1263 1264 kmw->kmw_type |= KM_HASH; 1265 } else { 1266 /* 1267 * If we are walking freed buffers, we only need the 1268 * magazine layer plus the partially allocated slabs. 1269 * To walk allocated buffers, we need all of the slabs. 1270 */ 1271 if (type & KM_ALLOCATED) 1272 layered = "kmem_slab"; 1273 else 1274 layered = "kmem_slab_partial"; 1275 1276 /* 1277 * for small-slab caches, we read in the entire slab. For 1278 * freed buffers, we can just walk the freelist. For 1279 * allocated buffers, we use a 'valid' array to track 1280 * the freed buffers. 1281 */ 1282 if (!(cp->cache_flags & KMF_HASH)) { 1283 chunksize = cp->cache_chunksize; 1284 slabsize = cp->cache_slabsize; 1285 1286 kmw->kmw_ubase = mdb_alloc(slabsize + 1287 sizeof (kmem_bufctl_t), UM_SLEEP); 1288 1289 if (type & KM_ALLOCATED) 1290 kmw->kmw_valid = 1291 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1292 } 1293 } 1294 1295 status = WALK_NEXT; 1296 1297 if (mdb_layered_walk(layered, wsp) == -1) { 1298 mdb_warn("unable to start layered '%s' walk", layered); 1299 status = WALK_ERR; 1300 } 1301 1302 out1: 1303 if (status == WALK_ERR) { 1304 if (kmw->kmw_valid) 1305 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1306 1307 if (kmw->kmw_ubase) 1308 mdb_free(kmw->kmw_ubase, slabsize + 1309 sizeof (kmem_bufctl_t)); 1310 1311 if (kmw->kmw_maglist) 1312 mdb_free(kmw->kmw_maglist, 1313 kmw->kmw_max * sizeof (uintptr_t)); 1314 1315 mdb_free(kmw, sizeof (kmem_walk_t)); 1316 wsp->walk_data = NULL; 1317 } 1318 1319 out2: 1320 if (status == WALK_ERR) 1321 mdb_free(cp, csize); 1322 1323 return (status); 1324 } 1325 1326 int 1327 kmem_walk_step(mdb_walk_state_t *wsp) 1328 { 1329 kmem_walk_t *kmw = wsp->walk_data; 1330 int type = kmw->kmw_type; 1331 kmem_cache_t *cp = kmw->kmw_cp; 1332 1333 void **maglist = kmw->kmw_maglist; 1334 int magcnt = kmw->kmw_count; 1335 1336 uintptr_t chunksize, slabsize; 1337 uintptr_t addr; 1338 const kmem_slab_t *sp; 1339 const kmem_bufctl_t *bcp; 1340 kmem_bufctl_t bc; 1341 1342 int chunks; 1343 char *kbase; 1344 void *buf; 1345 int i, ret; 1346 1347 char *valid, *ubase; 1348 1349 /* 1350 * first, handle the 'kmem_hash' layered walk case 1351 */ 1352 if (type & KM_HASH) { 1353 /* 1354 * We have a buffer which has been allocated out of the 1355 * global layer. We need to make sure that it's not 1356 * actually sitting in a magazine before we report it as 1357 * an allocated buffer. 1358 */ 1359 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1360 1361 if (magcnt > 0 && 1362 bsearch(&buf, maglist, magcnt, sizeof (void *), 1363 addrcmp) != NULL) 1364 return (WALK_NEXT); 1365 1366 if (type & KM_BUFCTL) 1367 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1368 1369 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1370 } 1371 1372 ret = WALK_NEXT; 1373 1374 addr = kmw->kmw_addr; 1375 1376 /* 1377 * If we're walking freed buffers, report everything in the 1378 * magazine layer before processing the first slab. 1379 */ 1380 if ((type & KM_FREE) && magcnt != 0) { 1381 kmw->kmw_count = 0; /* only do this once */ 1382 for (i = 0; i < magcnt; i++) { 1383 buf = maglist[i]; 1384 1385 if (type & KM_BUFCTL) { 1386 uintptr_t out; 1387 1388 if (cp->cache_flags & KMF_BUFTAG) { 1389 kmem_buftag_t *btp; 1390 kmem_buftag_t tag; 1391 1392 /* LINTED - alignment */ 1393 btp = KMEM_BUFTAG(cp, buf); 1394 if (mdb_vread(&tag, sizeof (tag), 1395 (uintptr_t)btp) == -1) { 1396 mdb_warn("reading buftag for " 1397 "%p at %p", buf, btp); 1398 continue; 1399 } 1400 out = (uintptr_t)tag.bt_bufctl; 1401 } else { 1402 if (kmem_hash_lookup(cp, addr, buf, 1403 &out) == -1) 1404 continue; 1405 } 1406 ret = bufctl_walk_callback(cp, wsp, out); 1407 } else { 1408 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1409 } 1410 1411 if (ret != WALK_NEXT) 1412 return (ret); 1413 } 1414 } 1415 1416 /* 1417 * If they want constructed buffers, we're finished, since the 1418 * magazine layer holds them all. 1419 */ 1420 if (type & KM_CONSTRUCTED) 1421 return (WALK_DONE); 1422 1423 /* 1424 * Handle the buffers in the current slab 1425 */ 1426 chunksize = cp->cache_chunksize; 1427 slabsize = cp->cache_slabsize; 1428 1429 sp = wsp->walk_layer; 1430 chunks = sp->slab_chunks; 1431 kbase = sp->slab_base; 1432 1433 dprintf(("kbase is %p\n", kbase)); 1434 1435 if (!(cp->cache_flags & KMF_HASH)) { 1436 valid = kmw->kmw_valid; 1437 ubase = kmw->kmw_ubase; 1438 1439 if (mdb_vread(ubase, chunks * chunksize, 1440 (uintptr_t)kbase) == -1) { 1441 mdb_warn("failed to read slab contents at %p", kbase); 1442 return (WALK_ERR); 1443 } 1444 1445 /* 1446 * Set up the valid map as fully allocated -- we'll punch 1447 * out the freelist. 1448 */ 1449 if (type & KM_ALLOCATED) 1450 (void) memset(valid, 1, chunks); 1451 } else { 1452 valid = NULL; 1453 ubase = NULL; 1454 } 1455 1456 /* 1457 * walk the slab's freelist 1458 */ 1459 bcp = sp->slab_head; 1460 1461 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1462 1463 /* 1464 * since we could be in the middle of allocating a buffer, 1465 * our refcnt could be one higher than it aught. So we 1466 * check one further on the freelist than the count allows. 1467 */ 1468 for (i = sp->slab_refcnt; i <= chunks; i++) { 1469 uint_t ndx; 1470 1471 dprintf(("bcp is %p\n", bcp)); 1472 1473 if (bcp == NULL) { 1474 if (i == chunks) 1475 break; 1476 mdb_warn( 1477 "slab %p in cache %p freelist too short by %d\n", 1478 sp, addr, chunks - i); 1479 break; 1480 } 1481 1482 if (cp->cache_flags & KMF_HASH) { 1483 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1484 mdb_warn("failed to read bufctl ptr at %p", 1485 bcp); 1486 break; 1487 } 1488 buf = bc.bc_addr; 1489 } else { 1490 /* 1491 * Otherwise the buffer is in the slab which 1492 * we've read in; we just need to determine 1493 * its offset in the slab to find the 1494 * kmem_bufctl_t. 1495 */ 1496 bc = *((kmem_bufctl_t *) 1497 ((uintptr_t)bcp - (uintptr_t)kbase + 1498 (uintptr_t)ubase)); 1499 1500 buf = KMEM_BUF(cp, bcp); 1501 } 1502 1503 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1504 1505 if (ndx > slabsize / cp->cache_bufsize) { 1506 /* 1507 * This is very wrong; we have managed to find 1508 * a buffer in the slab which shouldn't 1509 * actually be here. Emit a warning, and 1510 * try to continue. 1511 */ 1512 mdb_warn("buf %p is out of range for " 1513 "slab %p, cache %p\n", buf, sp, addr); 1514 } else if (type & KM_ALLOCATED) { 1515 /* 1516 * we have found a buffer on the slab's freelist; 1517 * clear its entry 1518 */ 1519 valid[ndx] = 0; 1520 } else { 1521 /* 1522 * Report this freed buffer 1523 */ 1524 if (type & KM_BUFCTL) { 1525 ret = bufctl_walk_callback(cp, wsp, 1526 (uintptr_t)bcp); 1527 } else { 1528 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1529 } 1530 if (ret != WALK_NEXT) 1531 return (ret); 1532 } 1533 1534 bcp = bc.bc_next; 1535 } 1536 1537 if (bcp != NULL) { 1538 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1539 sp, addr, bcp)); 1540 } 1541 1542 /* 1543 * If we are walking freed buffers, the loop above handled reporting 1544 * them. 1545 */ 1546 if (type & KM_FREE) 1547 return (WALK_NEXT); 1548 1549 if (type & KM_BUFCTL) { 1550 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1551 "cache %p\n", addr); 1552 return (WALK_ERR); 1553 } 1554 1555 /* 1556 * Report allocated buffers, skipping buffers in the magazine layer. 1557 * We only get this far for small-slab caches. 1558 */ 1559 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1560 buf = (char *)kbase + i * chunksize; 1561 1562 if (!valid[i]) 1563 continue; /* on slab freelist */ 1564 1565 if (magcnt > 0 && 1566 bsearch(&buf, maglist, magcnt, sizeof (void *), 1567 addrcmp) != NULL) 1568 continue; /* in magazine layer */ 1569 1570 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1571 } 1572 return (ret); 1573 } 1574 1575 void 1576 kmem_walk_fini(mdb_walk_state_t *wsp) 1577 { 1578 kmem_walk_t *kmw = wsp->walk_data; 1579 uintptr_t chunksize; 1580 uintptr_t slabsize; 1581 1582 if (kmw == NULL) 1583 return; 1584 1585 if (kmw->kmw_maglist != NULL) 1586 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1587 1588 chunksize = kmw->kmw_cp->cache_chunksize; 1589 slabsize = kmw->kmw_cp->cache_slabsize; 1590 1591 if (kmw->kmw_valid != NULL) 1592 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1593 if (kmw->kmw_ubase != NULL) 1594 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1595 1596 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1597 mdb_free(kmw, sizeof (kmem_walk_t)); 1598 } 1599 1600 /*ARGSUSED*/ 1601 static int 1602 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1603 { 1604 /* 1605 * Buffers allocated from NOTOUCH caches can also show up as freed 1606 * memory in other caches. This can be a little confusing, so we 1607 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1608 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1609 */ 1610 if (c->cache_cflags & KMC_NOTOUCH) 1611 return (WALK_NEXT); 1612 1613 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1614 wsp->walk_cbdata, addr) == -1) 1615 return (WALK_DONE); 1616 1617 return (WALK_NEXT); 1618 } 1619 1620 #define KMEM_WALK_ALL(name, wsp) { \ 1621 wsp->walk_data = (name); \ 1622 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1623 return (WALK_ERR); \ 1624 return (WALK_DONE); \ 1625 } 1626 1627 int 1628 kmem_walk_init(mdb_walk_state_t *wsp) 1629 { 1630 if (wsp->walk_arg != NULL) 1631 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1632 1633 if (wsp->walk_addr == NULL) 1634 KMEM_WALK_ALL("kmem", wsp); 1635 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1636 } 1637 1638 int 1639 bufctl_walk_init(mdb_walk_state_t *wsp) 1640 { 1641 if (wsp->walk_addr == NULL) 1642 KMEM_WALK_ALL("bufctl", wsp); 1643 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1644 } 1645 1646 int 1647 freemem_walk_init(mdb_walk_state_t *wsp) 1648 { 1649 if (wsp->walk_addr == NULL) 1650 KMEM_WALK_ALL("freemem", wsp); 1651 return (kmem_walk_init_common(wsp, KM_FREE)); 1652 } 1653 1654 int 1655 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1656 { 1657 if (wsp->walk_addr == NULL) 1658 KMEM_WALK_ALL("freemem_constructed", wsp); 1659 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1660 } 1661 1662 int 1663 freectl_walk_init(mdb_walk_state_t *wsp) 1664 { 1665 if (wsp->walk_addr == NULL) 1666 KMEM_WALK_ALL("freectl", wsp); 1667 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1668 } 1669 1670 int 1671 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1672 { 1673 if (wsp->walk_addr == NULL) 1674 KMEM_WALK_ALL("freectl_constructed", wsp); 1675 return (kmem_walk_init_common(wsp, 1676 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1677 } 1678 1679 typedef struct bufctl_history_walk { 1680 void *bhw_next; 1681 kmem_cache_t *bhw_cache; 1682 kmem_slab_t *bhw_slab; 1683 hrtime_t bhw_timestamp; 1684 } bufctl_history_walk_t; 1685 1686 int 1687 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1688 { 1689 bufctl_history_walk_t *bhw; 1690 kmem_bufctl_audit_t bc; 1691 kmem_bufctl_audit_t bcn; 1692 1693 if (wsp->walk_addr == NULL) { 1694 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1695 return (WALK_ERR); 1696 } 1697 1698 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1699 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1700 return (WALK_ERR); 1701 } 1702 1703 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1704 bhw->bhw_timestamp = 0; 1705 bhw->bhw_cache = bc.bc_cache; 1706 bhw->bhw_slab = bc.bc_slab; 1707 1708 /* 1709 * sometimes the first log entry matches the base bufctl; in that 1710 * case, skip the base bufctl. 1711 */ 1712 if (bc.bc_lastlog != NULL && 1713 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1714 bc.bc_addr == bcn.bc_addr && 1715 bc.bc_cache == bcn.bc_cache && 1716 bc.bc_slab == bcn.bc_slab && 1717 bc.bc_timestamp == bcn.bc_timestamp && 1718 bc.bc_thread == bcn.bc_thread) 1719 bhw->bhw_next = bc.bc_lastlog; 1720 else 1721 bhw->bhw_next = (void *)wsp->walk_addr; 1722 1723 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1724 wsp->walk_data = bhw; 1725 1726 return (WALK_NEXT); 1727 } 1728 1729 int 1730 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1731 { 1732 bufctl_history_walk_t *bhw = wsp->walk_data; 1733 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1734 uintptr_t baseaddr = wsp->walk_addr; 1735 kmem_bufctl_audit_t bc; 1736 1737 if (addr == NULL) 1738 return (WALK_DONE); 1739 1740 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1741 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1742 return (WALK_ERR); 1743 } 1744 1745 /* 1746 * The bufctl is only valid if the address, cache, and slab are 1747 * correct. We also check that the timestamp is decreasing, to 1748 * prevent infinite loops. 1749 */ 1750 if ((uintptr_t)bc.bc_addr != baseaddr || 1751 bc.bc_cache != bhw->bhw_cache || 1752 bc.bc_slab != bhw->bhw_slab || 1753 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1754 return (WALK_DONE); 1755 1756 bhw->bhw_next = bc.bc_lastlog; 1757 bhw->bhw_timestamp = bc.bc_timestamp; 1758 1759 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1760 } 1761 1762 void 1763 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1764 { 1765 bufctl_history_walk_t *bhw = wsp->walk_data; 1766 1767 mdb_free(bhw, sizeof (*bhw)); 1768 } 1769 1770 typedef struct kmem_log_walk { 1771 kmem_bufctl_audit_t *klw_base; 1772 kmem_bufctl_audit_t **klw_sorted; 1773 kmem_log_header_t klw_lh; 1774 size_t klw_size; 1775 size_t klw_maxndx; 1776 size_t klw_ndx; 1777 } kmem_log_walk_t; 1778 1779 int 1780 kmem_log_walk_init(mdb_walk_state_t *wsp) 1781 { 1782 uintptr_t lp = wsp->walk_addr; 1783 kmem_log_walk_t *klw; 1784 kmem_log_header_t *lhp; 1785 int maxndx, i, j, k; 1786 1787 /* 1788 * By default (global walk), walk the kmem_transaction_log. Otherwise 1789 * read the log whose kmem_log_header_t is stored at walk_addr. 1790 */ 1791 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1792 mdb_warn("failed to read 'kmem_transaction_log'"); 1793 return (WALK_ERR); 1794 } 1795 1796 if (lp == NULL) { 1797 mdb_warn("log is disabled\n"); 1798 return (WALK_ERR); 1799 } 1800 1801 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1802 lhp = &klw->klw_lh; 1803 1804 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1805 mdb_warn("failed to read log header at %p", lp); 1806 mdb_free(klw, sizeof (kmem_log_walk_t)); 1807 return (WALK_ERR); 1808 } 1809 1810 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1811 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1812 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1813 1814 if (mdb_vread(klw->klw_base, klw->klw_size, 1815 (uintptr_t)lhp->lh_base) == -1) { 1816 mdb_warn("failed to read log at base %p", lhp->lh_base); 1817 mdb_free(klw->klw_base, klw->klw_size); 1818 mdb_free(klw, sizeof (kmem_log_walk_t)); 1819 return (WALK_ERR); 1820 } 1821 1822 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1823 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1824 1825 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1826 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1827 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1828 1829 for (j = 0; j < maxndx; j++) 1830 klw->klw_sorted[k++] = &chunk[j]; 1831 } 1832 1833 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1834 (int(*)(const void *, const void *))bufctlcmp); 1835 1836 klw->klw_maxndx = k; 1837 wsp->walk_data = klw; 1838 1839 return (WALK_NEXT); 1840 } 1841 1842 int 1843 kmem_log_walk_step(mdb_walk_state_t *wsp) 1844 { 1845 kmem_log_walk_t *klw = wsp->walk_data; 1846 kmem_bufctl_audit_t *bcp; 1847 1848 if (klw->klw_ndx == klw->klw_maxndx) 1849 return (WALK_DONE); 1850 1851 bcp = klw->klw_sorted[klw->klw_ndx++]; 1852 1853 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1854 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1855 } 1856 1857 void 1858 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1859 { 1860 kmem_log_walk_t *klw = wsp->walk_data; 1861 1862 mdb_free(klw->klw_base, klw->klw_size); 1863 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1864 sizeof (kmem_bufctl_audit_t *)); 1865 mdb_free(klw, sizeof (kmem_log_walk_t)); 1866 } 1867 1868 typedef struct allocdby_bufctl { 1869 uintptr_t abb_addr; 1870 hrtime_t abb_ts; 1871 } allocdby_bufctl_t; 1872 1873 typedef struct allocdby_walk { 1874 const char *abw_walk; 1875 uintptr_t abw_thread; 1876 size_t abw_nbufs; 1877 size_t abw_size; 1878 allocdby_bufctl_t *abw_buf; 1879 size_t abw_ndx; 1880 } allocdby_walk_t; 1881 1882 int 1883 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1884 allocdby_walk_t *abw) 1885 { 1886 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1887 return (WALK_NEXT); 1888 1889 if (abw->abw_nbufs == abw->abw_size) { 1890 allocdby_bufctl_t *buf; 1891 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1892 1893 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1894 1895 bcopy(abw->abw_buf, buf, oldsize); 1896 mdb_free(abw->abw_buf, oldsize); 1897 1898 abw->abw_size <<= 1; 1899 abw->abw_buf = buf; 1900 } 1901 1902 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1903 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1904 abw->abw_nbufs++; 1905 1906 return (WALK_NEXT); 1907 } 1908 1909 /*ARGSUSED*/ 1910 int 1911 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1912 { 1913 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1914 abw, addr) == -1) { 1915 mdb_warn("couldn't walk bufctl for cache %p", addr); 1916 return (WALK_DONE); 1917 } 1918 1919 return (WALK_NEXT); 1920 } 1921 1922 static int 1923 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1924 { 1925 if (lhs->abb_ts < rhs->abb_ts) 1926 return (1); 1927 if (lhs->abb_ts > rhs->abb_ts) 1928 return (-1); 1929 return (0); 1930 } 1931 1932 static int 1933 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1934 { 1935 allocdby_walk_t *abw; 1936 1937 if (wsp->walk_addr == NULL) { 1938 mdb_warn("allocdby walk doesn't support global walks\n"); 1939 return (WALK_ERR); 1940 } 1941 1942 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1943 1944 abw->abw_thread = wsp->walk_addr; 1945 abw->abw_walk = walk; 1946 abw->abw_size = 128; /* something reasonable */ 1947 abw->abw_buf = 1948 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1949 1950 wsp->walk_data = abw; 1951 1952 if (mdb_walk("kmem_cache", 1953 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1954 mdb_warn("couldn't walk kmem_cache"); 1955 allocdby_walk_fini(wsp); 1956 return (WALK_ERR); 1957 } 1958 1959 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1960 (int(*)(const void *, const void *))allocdby_cmp); 1961 1962 return (WALK_NEXT); 1963 } 1964 1965 int 1966 allocdby_walk_init(mdb_walk_state_t *wsp) 1967 { 1968 return (allocdby_walk_init_common(wsp, "bufctl")); 1969 } 1970 1971 int 1972 freedby_walk_init(mdb_walk_state_t *wsp) 1973 { 1974 return (allocdby_walk_init_common(wsp, "freectl")); 1975 } 1976 1977 int 1978 allocdby_walk_step(mdb_walk_state_t *wsp) 1979 { 1980 allocdby_walk_t *abw = wsp->walk_data; 1981 kmem_bufctl_audit_t bc; 1982 uintptr_t addr; 1983 1984 if (abw->abw_ndx == abw->abw_nbufs) 1985 return (WALK_DONE); 1986 1987 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1988 1989 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1990 mdb_warn("couldn't read bufctl at %p", addr); 1991 return (WALK_DONE); 1992 } 1993 1994 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1995 } 1996 1997 void 1998 allocdby_walk_fini(mdb_walk_state_t *wsp) 1999 { 2000 allocdby_walk_t *abw = wsp->walk_data; 2001 2002 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 2003 mdb_free(abw, sizeof (allocdby_walk_t)); 2004 } 2005 2006 /*ARGSUSED*/ 2007 int 2008 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 2009 { 2010 char c[MDB_SYM_NAMLEN]; 2011 GElf_Sym sym; 2012 int i; 2013 2014 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2015 for (i = 0; i < bcp->bc_depth; i++) { 2016 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2017 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2018 continue; 2019 if (strncmp(c, "kmem_", 5) == 0) 2020 continue; 2021 mdb_printf("%s+0x%lx", 2022 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2023 break; 2024 } 2025 mdb_printf("\n"); 2026 2027 return (WALK_NEXT); 2028 } 2029 2030 static int 2031 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2032 { 2033 if (!(flags & DCMD_ADDRSPEC)) 2034 return (DCMD_USAGE); 2035 2036 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2037 2038 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2039 mdb_warn("can't walk '%s' for %p", w, addr); 2040 return (DCMD_ERR); 2041 } 2042 2043 return (DCMD_OK); 2044 } 2045 2046 /*ARGSUSED*/ 2047 int 2048 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2049 { 2050 return (allocdby_common(addr, flags, "allocdby")); 2051 } 2052 2053 /*ARGSUSED*/ 2054 int 2055 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2056 { 2057 return (allocdby_common(addr, flags, "freedby")); 2058 } 2059 2060 /* 2061 * Return a string describing the address in relation to the given thread's 2062 * stack. 2063 * 2064 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2065 * 2066 * - If the address is above the stack pointer, return an empty string 2067 * signifying that the address is active. 2068 * 2069 * - If the address is below the stack pointer, and the thread is not on proc, 2070 * return " (below sp)". 2071 * 2072 * - If the address is below the stack pointer, and the thread is on proc, 2073 * return " (possibly below sp)". Depending on context, we may or may not 2074 * have an accurate t_sp. 2075 */ 2076 static const char * 2077 stack_active(const kthread_t *t, uintptr_t addr) 2078 { 2079 uintptr_t panicstk; 2080 GElf_Sym sym; 2081 2082 if (t->t_state == TS_FREE) 2083 return (" (inactive interrupt thread)"); 2084 2085 /* 2086 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2087 * no longer relates to the thread's real stack. 2088 */ 2089 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2090 panicstk = (uintptr_t)sym.st_value; 2091 2092 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2093 return (""); 2094 } 2095 2096 if (addr >= t->t_sp + STACK_BIAS) 2097 return (""); 2098 2099 if (t->t_state == TS_ONPROC) 2100 return (" (possibly below sp)"); 2101 2102 return (" (below sp)"); 2103 } 2104 2105 /* 2106 * Additional state for the kmem and vmem ::whatis handlers 2107 */ 2108 typedef struct whatis_info { 2109 mdb_whatis_t *wi_w; 2110 const kmem_cache_t *wi_cache; 2111 const vmem_t *wi_vmem; 2112 vmem_t *wi_msb_arena; 2113 size_t wi_slab_size; 2114 uint_t wi_slab_found; 2115 uint_t wi_kmem_lite_count; 2116 uint_t wi_freemem; 2117 } whatis_info_t; 2118 2119 /* call one of our dcmd functions with "-v" and the provided address */ 2120 static void 2121 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2122 { 2123 mdb_arg_t a; 2124 a.a_type = MDB_TYPE_STRING; 2125 a.a_un.a_str = "-v"; 2126 2127 mdb_printf(":\n"); 2128 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2129 } 2130 2131 static void 2132 whatis_print_kmf_lite(uintptr_t btaddr, size_t count) 2133 { 2134 #define KMEM_LITE_MAX 16 2135 pc_t callers[KMEM_LITE_MAX]; 2136 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN; 2137 2138 kmem_buftag_t bt; 2139 intptr_t stat; 2140 const char *plural = ""; 2141 int i; 2142 2143 /* validate our arguments and read in the buftag */ 2144 if (count == 0 || count > KMEM_LITE_MAX || 2145 mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2146 return; 2147 2148 /* validate the buffer state and read in the callers */ 2149 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2150 2151 if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE || 2152 mdb_vread(callers, count * sizeof (pc_t), 2153 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2154 return; 2155 2156 /* If there aren't any filled in callers, bail */ 2157 if (callers[0] == uninit) 2158 return; 2159 2160 plural = (callers[1] == uninit) ? "" : "s"; 2161 2162 /* Everything's done and checked; print them out */ 2163 mdb_printf(":\n"); 2164 2165 mdb_inc_indent(8); 2166 mdb_printf("recent caller%s: %a", plural, callers[0]); 2167 for (i = 1; i < count; i++) { 2168 if (callers[i] == uninit) 2169 break; 2170 mdb_printf(", %a", callers[i]); 2171 } 2172 mdb_dec_indent(8); 2173 } 2174 2175 static void 2176 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 2177 uintptr_t baddr) 2178 { 2179 mdb_whatis_t *w = wi->wi_w; 2180 2181 const kmem_cache_t *cp = wi->wi_cache; 2182 /* LINTED pointer cast may result in improper alignment */ 2183 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr); 2184 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 2185 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT)); 2186 2187 mdb_whatis_report_object(w, maddr, addr, ""); 2188 2189 if (baddr != 0 && !call_printer) 2190 mdb_printf("bufctl %p ", baddr); 2191 2192 mdb_printf("%s from %s", 2193 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 2194 2195 if (baddr != 0 && call_printer) { 2196 whatis_call_printer(bufctl, baddr); 2197 return; 2198 } 2199 2200 /* for KMF_LITE caches, try to print out the previous callers */ 2201 if (!quiet && (cp->cache_flags & KMF_LITE)) 2202 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count); 2203 2204 mdb_printf("\n"); 2205 } 2206 2207 /*ARGSUSED*/ 2208 static int 2209 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi) 2210 { 2211 mdb_whatis_t *w = wi->wi_w; 2212 2213 uintptr_t cur; 2214 size_t size = wi->wi_cache->cache_bufsize; 2215 2216 while (mdb_whatis_match(w, addr, size, &cur)) 2217 whatis_print_kmem(wi, cur, addr, NULL); 2218 2219 return (WHATIS_WALKRET(w)); 2220 } 2221 2222 /*ARGSUSED*/ 2223 static int 2224 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi) 2225 { 2226 mdb_whatis_t *w = wi->wi_w; 2227 2228 uintptr_t cur; 2229 uintptr_t addr = (uintptr_t)bcp->bc_addr; 2230 size_t size = wi->wi_cache->cache_bufsize; 2231 2232 while (mdb_whatis_match(w, addr, size, &cur)) 2233 whatis_print_kmem(wi, cur, addr, baddr); 2234 2235 return (WHATIS_WALKRET(w)); 2236 } 2237 2238 static int 2239 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 2240 { 2241 mdb_whatis_t *w = wi->wi_w; 2242 2243 size_t size = vs->vs_end - vs->vs_start; 2244 uintptr_t cur; 2245 2246 /* We're not interested in anything but alloc and free segments */ 2247 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 2248 return (WALK_NEXT); 2249 2250 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 2251 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 2252 2253 /* 2254 * If we're not printing it seperately, provide the vmem_seg 2255 * pointer if it has a stack trace. 2256 */ 2257 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 2258 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) || 2259 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2260 mdb_printf("vmem_seg %p ", addr); 2261 } 2262 2263 mdb_printf("%s from the %s vmem arena", 2264 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 2265 wi->wi_vmem->vm_name); 2266 2267 if (!(mdb_whatis_flags(w) & WHATIS_QUIET)) 2268 whatis_call_printer(vmem_seg, addr); 2269 else 2270 mdb_printf("\n"); 2271 } 2272 2273 return (WHATIS_WALKRET(w)); 2274 } 2275 2276 static int 2277 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 2278 { 2279 mdb_whatis_t *w = wi->wi_w; 2280 const char *nm = vmem->vm_name; 2281 2282 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0); 2283 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2284 2285 if (identifier != idspace) 2286 return (WALK_NEXT); 2287 2288 wi->wi_vmem = vmem; 2289 2290 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2291 mdb_printf("Searching vmem arena %s...\n", nm); 2292 2293 if (mdb_pwalk("vmem_seg", 2294 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 2295 mdb_warn("can't walk vmem_seg for %p", addr); 2296 return (WALK_NEXT); 2297 } 2298 2299 return (WHATIS_WALKRET(w)); 2300 } 2301 2302 /*ARGSUSED*/ 2303 static int 2304 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi) 2305 { 2306 mdb_whatis_t *w = wi->wi_w; 2307 2308 /* It must overlap with the slab data, or it's not interesting */ 2309 if (mdb_whatis_overlaps(w, 2310 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 2311 wi->wi_slab_found++; 2312 return (WALK_DONE); 2313 } 2314 return (WALK_NEXT); 2315 } 2316 2317 static int 2318 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2319 { 2320 mdb_whatis_t *w = wi->wi_w; 2321 2322 char *walk, *freewalk; 2323 mdb_walk_cb_t func; 2324 int do_bufctl; 2325 2326 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0); 2327 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2328 2329 if (identifier != idspace) 2330 return (WALK_NEXT); 2331 2332 /* Override the '-b' flag as necessary */ 2333 if (!(c->cache_flags & KMF_HASH)) 2334 do_bufctl = FALSE; /* no bufctls to walk */ 2335 else if (c->cache_flags & KMF_AUDIT) 2336 do_bufctl = TRUE; /* we always want debugging info */ 2337 else 2338 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2339 2340 if (do_bufctl) { 2341 walk = "bufctl"; 2342 freewalk = "freectl"; 2343 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2344 } else { 2345 walk = "kmem"; 2346 freewalk = "freemem"; 2347 func = (mdb_walk_cb_t)whatis_walk_kmem; 2348 } 2349 2350 wi->wi_cache = c; 2351 2352 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2353 mdb_printf("Searching %s...\n", c->cache_name); 2354 2355 /* 2356 * If more then two buffers live on each slab, figure out if we're 2357 * interested in anything in any slab before doing the more expensive 2358 * kmem/freemem (bufctl/freectl) walkers. 2359 */ 2360 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2361 if (!(c->cache_flags & KMF_HASH)) 2362 wi->wi_slab_size -= sizeof (kmem_slab_t); 2363 2364 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2365 wi->wi_slab_found = 0; 2366 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2367 addr) == -1) { 2368 mdb_warn("can't find kmem_slab walker"); 2369 return (WALK_DONE); 2370 } 2371 if (wi->wi_slab_found == 0) 2372 return (WALK_NEXT); 2373 } 2374 2375 wi->wi_freemem = FALSE; 2376 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2377 mdb_warn("can't find %s walker", walk); 2378 return (WALK_DONE); 2379 } 2380 2381 if (mdb_whatis_done(w)) 2382 return (WALK_DONE); 2383 2384 /* 2385 * We have searched for allocated memory; now search for freed memory. 2386 */ 2387 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2388 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2389 2390 wi->wi_freemem = TRUE; 2391 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2392 mdb_warn("can't find %s walker", freewalk); 2393 return (WALK_DONE); 2394 } 2395 2396 return (WHATIS_WALKRET(w)); 2397 } 2398 2399 static int 2400 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2401 { 2402 if (c->cache_arena == wi->wi_msb_arena || 2403 (c->cache_cflags & KMC_NOTOUCH)) 2404 return (WALK_NEXT); 2405 2406 return (whatis_walk_cache(addr, c, wi)); 2407 } 2408 2409 static int 2410 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2411 { 2412 if (c->cache_arena != wi->wi_msb_arena) 2413 return (WALK_NEXT); 2414 2415 return (whatis_walk_cache(addr, c, wi)); 2416 } 2417 2418 static int 2419 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2420 { 2421 if (c->cache_arena == wi->wi_msb_arena || 2422 !(c->cache_cflags & KMC_NOTOUCH)) 2423 return (WALK_NEXT); 2424 2425 return (whatis_walk_cache(addr, c, wi)); 2426 } 2427 2428 static int 2429 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w) 2430 { 2431 uintptr_t cur; 2432 uintptr_t saddr; 2433 size_t size; 2434 2435 /* 2436 * Often, one calls ::whatis on an address from a thread structure. 2437 * We use this opportunity to short circuit this case... 2438 */ 2439 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur)) 2440 mdb_whatis_report_object(w, cur, addr, 2441 "allocated as a thread structure\n"); 2442 2443 /* 2444 * Now check the stack 2445 */ 2446 if (t->t_stkbase == NULL) 2447 return (WALK_NEXT); 2448 2449 /* 2450 * This assumes that t_stk is the end of the stack, but it's really 2451 * only the initial stack pointer for the thread. Arguments to the 2452 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So 2453 * that 't->t_stk::whatis' reports "part of t's stack", we include 2454 * t_stk in the range (the "+ 1", below), but the kernel should 2455 * really include the full stack bounds where we can find it. 2456 */ 2457 saddr = (uintptr_t)t->t_stkbase; 2458 size = (uintptr_t)t->t_stk - saddr + 1; 2459 while (mdb_whatis_match(w, saddr, size, &cur)) 2460 mdb_whatis_report_object(w, cur, cur, 2461 "in thread %p's stack%s\n", addr, stack_active(t, cur)); 2462 2463 return (WHATIS_WALKRET(w)); 2464 } 2465 2466 static void 2467 whatis_modctl_match(mdb_whatis_t *w, const char *name, 2468 uintptr_t base, size_t size, const char *where) 2469 { 2470 uintptr_t cur; 2471 2472 /* 2473 * Since we're searching for addresses inside a module, we report 2474 * them as symbols. 2475 */ 2476 while (mdb_whatis_match(w, base, size, &cur)) 2477 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where); 2478 } 2479 2480 static int 2481 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w) 2482 { 2483 char name[MODMAXNAMELEN]; 2484 struct module mod; 2485 Shdr shdr; 2486 2487 if (m->mod_mp == NULL) 2488 return (WALK_NEXT); 2489 2490 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2491 mdb_warn("couldn't read modctl %p's module", addr); 2492 return (WALK_NEXT); 2493 } 2494 2495 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2496 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2497 2498 whatis_modctl_match(w, name, 2499 (uintptr_t)mod.text, mod.text_size, "text segment"); 2500 whatis_modctl_match(w, name, 2501 (uintptr_t)mod.data, mod.data_size, "data segment"); 2502 whatis_modctl_match(w, name, 2503 (uintptr_t)mod.bss, mod.bss_size, "bss segment"); 2504 2505 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2506 mdb_warn("couldn't read symbol header for %p's module", addr); 2507 return (WALK_NEXT); 2508 } 2509 2510 whatis_modctl_match(w, name, 2511 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab"); 2512 whatis_modctl_match(w, name, 2513 (uintptr_t)mod.symspace, mod.symsize, "symtab"); 2514 2515 return (WHATIS_WALKRET(w)); 2516 } 2517 2518 /*ARGSUSED*/ 2519 static int 2520 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w) 2521 { 2522 uintptr_t cur; 2523 2524 uintptr_t base = (uintptr_t)seg->pages; 2525 size_t size = (uintptr_t)seg->epages - base; 2526 2527 while (mdb_whatis_match(w, base, size, &cur)) { 2528 /* round our found pointer down to the page_t base. */ 2529 size_t offset = (cur - base) % sizeof (page_t); 2530 2531 mdb_whatis_report_object(w, cur, cur - offset, 2532 "allocated as a page structure\n"); 2533 } 2534 2535 return (WHATIS_WALKRET(w)); 2536 } 2537 2538 /*ARGSUSED*/ 2539 static int 2540 whatis_run_modules(mdb_whatis_t *w, void *arg) 2541 { 2542 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) { 2543 mdb_warn("couldn't find modctl walker"); 2544 return (1); 2545 } 2546 return (0); 2547 } 2548 2549 /*ARGSUSED*/ 2550 static int 2551 whatis_run_threads(mdb_whatis_t *w, void *ignored) 2552 { 2553 /* 2554 * Now search all thread stacks. Yes, this is a little weak; we 2555 * can save a lot of work by first checking to see if the 2556 * address is in segkp vs. segkmem. But hey, computers are 2557 * fast. 2558 */ 2559 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) { 2560 mdb_warn("couldn't find thread walker"); 2561 return (1); 2562 } 2563 return (0); 2564 } 2565 2566 /*ARGSUSED*/ 2567 static int 2568 whatis_run_pages(mdb_whatis_t *w, void *ignored) 2569 { 2570 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) { 2571 mdb_warn("couldn't find memseg walker"); 2572 return (1); 2573 } 2574 return (0); 2575 } 2576 2577 /*ARGSUSED*/ 2578 static int 2579 whatis_run_kmem(mdb_whatis_t *w, void *ignored) 2580 { 2581 whatis_info_t wi; 2582 2583 bzero(&wi, sizeof (wi)); 2584 wi.wi_w = w; 2585 2586 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1) 2587 mdb_warn("unable to readvar \"kmem_msb_arena\""); 2588 2589 if (mdb_readvar(&wi.wi_kmem_lite_count, 2590 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16) 2591 wi.wi_kmem_lite_count = 0; 2592 2593 /* 2594 * We process kmem caches in the following order: 2595 * 2596 * non-KMC_NOTOUCH, non-metadata (typically the most interesting) 2597 * metadata (can be huge with KMF_AUDIT) 2598 * KMC_NOTOUCH, non-metadata (see kmem_walk_all()) 2599 */ 2600 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2601 &wi) == -1 || 2602 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2603 &wi) == -1 || 2604 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2605 &wi) == -1) { 2606 mdb_warn("couldn't find kmem_cache walker"); 2607 return (1); 2608 } 2609 return (0); 2610 } 2611 2612 /*ARGSUSED*/ 2613 static int 2614 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2615 { 2616 whatis_info_t wi; 2617 2618 bzero(&wi, sizeof (wi)); 2619 wi.wi_w = w; 2620 2621 if (mdb_walk("vmem_postfix", 2622 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2623 mdb_warn("couldn't find vmem_postfix walker"); 2624 return (1); 2625 } 2626 return (0); 2627 } 2628 2629 typedef struct kmem_log_cpu { 2630 uintptr_t kmc_low; 2631 uintptr_t kmc_high; 2632 } kmem_log_cpu_t; 2633 2634 typedef struct kmem_log_data { 2635 uintptr_t kmd_addr; 2636 kmem_log_cpu_t *kmd_cpu; 2637 } kmem_log_data_t; 2638 2639 int 2640 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2641 kmem_log_data_t *kmd) 2642 { 2643 int i; 2644 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2645 size_t bufsize; 2646 2647 for (i = 0; i < NCPU; i++) { 2648 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2649 break; 2650 } 2651 2652 if (kmd->kmd_addr) { 2653 if (b->bc_cache == NULL) 2654 return (WALK_NEXT); 2655 2656 if (mdb_vread(&bufsize, sizeof (bufsize), 2657 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2658 mdb_warn( 2659 "failed to read cache_bufsize for cache at %p", 2660 b->bc_cache); 2661 return (WALK_ERR); 2662 } 2663 2664 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2665 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2666 return (WALK_NEXT); 2667 } 2668 2669 if (i == NCPU) 2670 mdb_printf(" "); 2671 else 2672 mdb_printf("%3d", i); 2673 2674 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2675 b->bc_timestamp, b->bc_thread); 2676 2677 return (WALK_NEXT); 2678 } 2679 2680 /*ARGSUSED*/ 2681 int 2682 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2683 { 2684 kmem_log_header_t lh; 2685 kmem_cpu_log_header_t clh; 2686 uintptr_t lhp, clhp; 2687 int ncpus; 2688 uintptr_t *cpu; 2689 GElf_Sym sym; 2690 kmem_log_cpu_t *kmc; 2691 int i; 2692 kmem_log_data_t kmd; 2693 uint_t opt_b = FALSE; 2694 2695 if (mdb_getopts(argc, argv, 2696 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2697 return (DCMD_USAGE); 2698 2699 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2700 mdb_warn("failed to read 'kmem_transaction_log'"); 2701 return (DCMD_ERR); 2702 } 2703 2704 if (lhp == NULL) { 2705 mdb_warn("no kmem transaction log\n"); 2706 return (DCMD_ERR); 2707 } 2708 2709 mdb_readvar(&ncpus, "ncpus"); 2710 2711 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2712 mdb_warn("failed to read log header at %p", lhp); 2713 return (DCMD_ERR); 2714 } 2715 2716 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2717 2718 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2719 2720 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2721 mdb_warn("couldn't find 'cpu' array"); 2722 return (DCMD_ERR); 2723 } 2724 2725 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2726 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2727 NCPU * sizeof (uintptr_t), sym.st_size); 2728 return (DCMD_ERR); 2729 } 2730 2731 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2732 mdb_warn("failed to read cpu array at %p", sym.st_value); 2733 return (DCMD_ERR); 2734 } 2735 2736 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2737 kmd.kmd_addr = NULL; 2738 kmd.kmd_cpu = kmc; 2739 2740 for (i = 0; i < NCPU; i++) { 2741 2742 if (cpu[i] == NULL) 2743 continue; 2744 2745 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2746 mdb_warn("cannot read cpu %d's log header at %p", 2747 i, clhp); 2748 return (DCMD_ERR); 2749 } 2750 2751 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2752 (uintptr_t)lh.lh_base; 2753 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2754 2755 clhp += sizeof (kmem_cpu_log_header_t); 2756 } 2757 2758 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2759 "TIMESTAMP", "THREAD"); 2760 2761 /* 2762 * If we have been passed an address, print out only log entries 2763 * corresponding to that address. If opt_b is specified, then interpret 2764 * the address as a bufctl. 2765 */ 2766 if (flags & DCMD_ADDRSPEC) { 2767 kmem_bufctl_audit_t b; 2768 2769 if (opt_b) { 2770 kmd.kmd_addr = addr; 2771 } else { 2772 if (mdb_vread(&b, 2773 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2774 mdb_warn("failed to read bufctl at %p", addr); 2775 return (DCMD_ERR); 2776 } 2777 2778 (void) kmem_log_walk(addr, &b, &kmd); 2779 2780 return (DCMD_OK); 2781 } 2782 } 2783 2784 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2785 mdb_warn("can't find kmem log walker"); 2786 return (DCMD_ERR); 2787 } 2788 2789 return (DCMD_OK); 2790 } 2791 2792 typedef struct bufctl_history_cb { 2793 int bhc_flags; 2794 int bhc_argc; 2795 const mdb_arg_t *bhc_argv; 2796 int bhc_ret; 2797 } bufctl_history_cb_t; 2798 2799 /*ARGSUSED*/ 2800 static int 2801 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2802 { 2803 bufctl_history_cb_t *bhc = arg; 2804 2805 bhc->bhc_ret = 2806 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2807 2808 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2809 2810 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2811 } 2812 2813 void 2814 bufctl_help(void) 2815 { 2816 mdb_printf("%s", 2817 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2818 mdb_dec_indent(2); 2819 mdb_printf("%<b>OPTIONS%</b>\n"); 2820 mdb_inc_indent(2); 2821 mdb_printf("%s", 2822 " -v Display the full content of the bufctl, including its stack trace\n" 2823 " -h retrieve the bufctl's transaction history, if available\n" 2824 " -a addr\n" 2825 " filter out bufctls not involving the buffer at addr\n" 2826 " -c caller\n" 2827 " filter out bufctls without the function/PC in their stack trace\n" 2828 " -e earliest\n" 2829 " filter out bufctls timestamped before earliest\n" 2830 " -l latest\n" 2831 " filter out bufctls timestamped after latest\n" 2832 " -t thread\n" 2833 " filter out bufctls not involving thread\n"); 2834 } 2835 2836 int 2837 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2838 { 2839 kmem_bufctl_audit_t bc; 2840 uint_t verbose = FALSE; 2841 uint_t history = FALSE; 2842 uint_t in_history = FALSE; 2843 uintptr_t caller = NULL, thread = NULL; 2844 uintptr_t laddr, haddr, baddr = NULL; 2845 hrtime_t earliest = 0, latest = 0; 2846 int i, depth; 2847 char c[MDB_SYM_NAMLEN]; 2848 GElf_Sym sym; 2849 2850 if (mdb_getopts(argc, argv, 2851 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2852 'h', MDB_OPT_SETBITS, TRUE, &history, 2853 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2854 'c', MDB_OPT_UINTPTR, &caller, 2855 't', MDB_OPT_UINTPTR, &thread, 2856 'e', MDB_OPT_UINT64, &earliest, 2857 'l', MDB_OPT_UINT64, &latest, 2858 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2859 return (DCMD_USAGE); 2860 2861 if (!(flags & DCMD_ADDRSPEC)) 2862 return (DCMD_USAGE); 2863 2864 if (in_history && !history) 2865 return (DCMD_USAGE); 2866 2867 if (history && !in_history) { 2868 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2869 UM_SLEEP | UM_GC); 2870 bufctl_history_cb_t bhc; 2871 2872 nargv[0].a_type = MDB_TYPE_STRING; 2873 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2874 2875 for (i = 0; i < argc; i++) 2876 nargv[i + 1] = argv[i]; 2877 2878 /* 2879 * When in history mode, we treat each element as if it 2880 * were in a seperate loop, so that the headers group 2881 * bufctls with similar histories. 2882 */ 2883 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2884 bhc.bhc_argc = argc + 1; 2885 bhc.bhc_argv = nargv; 2886 bhc.bhc_ret = DCMD_OK; 2887 2888 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2889 addr) == -1) { 2890 mdb_warn("unable to walk bufctl_history"); 2891 return (DCMD_ERR); 2892 } 2893 2894 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2895 mdb_printf("\n"); 2896 2897 return (bhc.bhc_ret); 2898 } 2899 2900 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2901 if (verbose) { 2902 mdb_printf("%16s %16s %16s %16s\n" 2903 "%<u>%16s %16s %16s %16s%</u>\n", 2904 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2905 "", "CACHE", "LASTLOG", "CONTENTS"); 2906 } else { 2907 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2908 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2909 } 2910 } 2911 2912 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2913 mdb_warn("couldn't read bufctl at %p", addr); 2914 return (DCMD_ERR); 2915 } 2916 2917 /* 2918 * Guard against bogus bc_depth in case the bufctl is corrupt or 2919 * the address does not really refer to a bufctl. 2920 */ 2921 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2922 2923 if (caller != NULL) { 2924 laddr = caller; 2925 haddr = caller + sizeof (caller); 2926 2927 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2928 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2929 /* 2930 * We were provided an exact symbol value; any 2931 * address in the function is valid. 2932 */ 2933 laddr = (uintptr_t)sym.st_value; 2934 haddr = (uintptr_t)sym.st_value + sym.st_size; 2935 } 2936 2937 for (i = 0; i < depth; i++) 2938 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2939 break; 2940 2941 if (i == depth) 2942 return (DCMD_OK); 2943 } 2944 2945 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2946 return (DCMD_OK); 2947 2948 if (earliest != 0 && bc.bc_timestamp < earliest) 2949 return (DCMD_OK); 2950 2951 if (latest != 0 && bc.bc_timestamp > latest) 2952 return (DCMD_OK); 2953 2954 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2955 return (DCMD_OK); 2956 2957 if (flags & DCMD_PIPE_OUT) { 2958 mdb_printf("%#lr\n", addr); 2959 return (DCMD_OK); 2960 } 2961 2962 if (verbose) { 2963 mdb_printf( 2964 "%<b>%16p%</b> %16p %16llx %16p\n" 2965 "%16s %16p %16p %16p\n", 2966 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2967 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2968 2969 mdb_inc_indent(17); 2970 for (i = 0; i < depth; i++) 2971 mdb_printf("%a\n", bc.bc_stack[i]); 2972 mdb_dec_indent(17); 2973 mdb_printf("\n"); 2974 } else { 2975 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2976 bc.bc_timestamp, bc.bc_thread); 2977 2978 for (i = 0; i < depth; i++) { 2979 if (mdb_lookup_by_addr(bc.bc_stack[i], 2980 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2981 continue; 2982 if (strncmp(c, "kmem_", 5) == 0) 2983 continue; 2984 mdb_printf(" %a\n", bc.bc_stack[i]); 2985 break; 2986 } 2987 2988 if (i >= depth) 2989 mdb_printf("\n"); 2990 } 2991 2992 return (DCMD_OK); 2993 } 2994 2995 typedef struct kmem_verify { 2996 uint64_t *kmv_buf; /* buffer to read cache contents into */ 2997 size_t kmv_size; /* number of bytes in kmv_buf */ 2998 int kmv_corruption; /* > 0 if corruption found. */ 2999 int kmv_besilent; /* report actual corruption sites */ 3000 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3001 } kmem_verify_t; 3002 3003 /* 3004 * verify_pattern() 3005 * verify that buf is filled with the pattern pat. 3006 */ 3007 static int64_t 3008 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3009 { 3010 /*LINTED*/ 3011 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3012 uint64_t *buf; 3013 3014 for (buf = buf_arg; buf < bufend; buf++) 3015 if (*buf != pat) 3016 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3017 return (-1); 3018 } 3019 3020 /* 3021 * verify_buftag() 3022 * verify that btp->bt_bxstat == (bcp ^ pat) 3023 */ 3024 static int 3025 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3026 { 3027 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3028 } 3029 3030 /* 3031 * verify_free() 3032 * verify the integrity of a free block of memory by checking 3033 * that it is filled with 0xdeadbeef and that its buftag is sane. 3034 */ 3035 /*ARGSUSED1*/ 3036 static int 3037 verify_free(uintptr_t addr, const void *data, void *private) 3038 { 3039 kmem_verify_t *kmv = (kmem_verify_t *)private; 3040 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3041 int64_t corrupt; /* corruption offset */ 3042 kmem_buftag_t *buftagp; /* ptr to buftag */ 3043 kmem_cache_t *cp = &kmv->kmv_cache; 3044 int besilent = kmv->kmv_besilent; 3045 3046 /*LINTED*/ 3047 buftagp = KMEM_BUFTAG(cp, buf); 3048 3049 /* 3050 * Read the buffer to check. 3051 */ 3052 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3053 if (!besilent) 3054 mdb_warn("couldn't read %p", addr); 3055 return (WALK_NEXT); 3056 } 3057 3058 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3059 KMEM_FREE_PATTERN)) >= 0) { 3060 if (!besilent) 3061 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3062 addr, (uintptr_t)addr + corrupt); 3063 goto corrupt; 3064 } 3065 /* 3066 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3067 * the first bytes of the buffer, hence we cannot check for red 3068 * zone corruption. 3069 */ 3070 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3071 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3072 if (!besilent) 3073 mdb_printf("buffer %p (free) seems to " 3074 "have a corrupt redzone pattern\n", addr); 3075 goto corrupt; 3076 } 3077 3078 /* 3079 * confirm bufctl pointer integrity. 3080 */ 3081 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3082 if (!besilent) 3083 mdb_printf("buffer %p (free) has a corrupt " 3084 "buftag\n", addr); 3085 goto corrupt; 3086 } 3087 3088 return (WALK_NEXT); 3089 corrupt: 3090 kmv->kmv_corruption++; 3091 return (WALK_NEXT); 3092 } 3093 3094 /* 3095 * verify_alloc() 3096 * Verify that the buftag of an allocated buffer makes sense with respect 3097 * to the buffer. 3098 */ 3099 /*ARGSUSED1*/ 3100 static int 3101 verify_alloc(uintptr_t addr, const void *data, void *private) 3102 { 3103 kmem_verify_t *kmv = (kmem_verify_t *)private; 3104 kmem_cache_t *cp = &kmv->kmv_cache; 3105 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3106 /*LINTED*/ 3107 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3108 uint32_t *ip = (uint32_t *)buftagp; 3109 uint8_t *bp = (uint8_t *)buf; 3110 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3111 int besilent = kmv->kmv_besilent; 3112 3113 /* 3114 * Read the buffer to check. 3115 */ 3116 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3117 if (!besilent) 3118 mdb_warn("couldn't read %p", addr); 3119 return (WALK_NEXT); 3120 } 3121 3122 /* 3123 * There are two cases to handle: 3124 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3125 * 0xfeedfacefeedface at the end of it 3126 * 2. If the buf was alloc'd using kmem_alloc, it will have 3127 * 0xbb just past the end of the region in use. At the buftag, 3128 * it will have 0xfeedface (or, if the whole buffer is in use, 3129 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3130 * endianness), followed by 32 bits containing the offset of the 3131 * 0xbb byte in the buffer. 3132 * 3133 * Finally, the two 32-bit words that comprise the second half of the 3134 * buftag should xor to KMEM_BUFTAG_ALLOC 3135 */ 3136 3137 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3138 looks_ok = 1; 3139 else if (!KMEM_SIZE_VALID(ip[1])) 3140 size_ok = 0; 3141 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3142 looks_ok = 1; 3143 else 3144 size_ok = 0; 3145 3146 if (!size_ok) { 3147 if (!besilent) 3148 mdb_printf("buffer %p (allocated) has a corrupt " 3149 "redzone size encoding\n", addr); 3150 goto corrupt; 3151 } 3152 3153 if (!looks_ok) { 3154 if (!besilent) 3155 mdb_printf("buffer %p (allocated) has a corrupt " 3156 "redzone signature\n", addr); 3157 goto corrupt; 3158 } 3159 3160 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3161 if (!besilent) 3162 mdb_printf("buffer %p (allocated) has a " 3163 "corrupt buftag\n", addr); 3164 goto corrupt; 3165 } 3166 3167 return (WALK_NEXT); 3168 corrupt: 3169 kmv->kmv_corruption++; 3170 return (WALK_NEXT); 3171 } 3172 3173 /*ARGSUSED2*/ 3174 int 3175 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3176 { 3177 if (flags & DCMD_ADDRSPEC) { 3178 int check_alloc = 0, check_free = 0; 3179 kmem_verify_t kmv; 3180 3181 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3182 addr) == -1) { 3183 mdb_warn("couldn't read kmem_cache %p", addr); 3184 return (DCMD_ERR); 3185 } 3186 3187 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3188 sizeof (kmem_buftag_t); 3189 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3190 kmv.kmv_corruption = 0; 3191 3192 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3193 check_alloc = 1; 3194 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3195 check_free = 1; 3196 } else { 3197 if (!(flags & DCMD_LOOP)) { 3198 mdb_warn("cache %p (%s) does not have " 3199 "redzone checking enabled\n", addr, 3200 kmv.kmv_cache.cache_name); 3201 } 3202 return (DCMD_ERR); 3203 } 3204 3205 if (flags & DCMD_LOOP) { 3206 /* 3207 * table mode, don't print out every corrupt buffer 3208 */ 3209 kmv.kmv_besilent = 1; 3210 } else { 3211 mdb_printf("Summary for cache '%s'\n", 3212 kmv.kmv_cache.cache_name); 3213 mdb_inc_indent(2); 3214 kmv.kmv_besilent = 0; 3215 } 3216 3217 if (check_alloc) 3218 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3219 if (check_free) 3220 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3221 3222 if (flags & DCMD_LOOP) { 3223 if (kmv.kmv_corruption == 0) { 3224 mdb_printf("%-*s %?p clean\n", 3225 KMEM_CACHE_NAMELEN, 3226 kmv.kmv_cache.cache_name, addr); 3227 } else { 3228 char *s = ""; /* optional s in "buffer[s]" */ 3229 if (kmv.kmv_corruption > 1) 3230 s = "s"; 3231 3232 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3233 KMEM_CACHE_NAMELEN, 3234 kmv.kmv_cache.cache_name, addr, 3235 kmv.kmv_corruption, s); 3236 } 3237 } else { 3238 /* 3239 * This is the more verbose mode, when the user has 3240 * type addr::kmem_verify. If the cache was clean, 3241 * nothing will have yet been printed. So say something. 3242 */ 3243 if (kmv.kmv_corruption == 0) 3244 mdb_printf("clean\n"); 3245 3246 mdb_dec_indent(2); 3247 } 3248 } else { 3249 /* 3250 * If the user didn't specify a cache to verify, we'll walk all 3251 * kmem_cache's, specifying ourself as a callback for each... 3252 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3253 */ 3254 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3255 "Cache Name", "Addr", "Cache Integrity"); 3256 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3257 } 3258 3259 return (DCMD_OK); 3260 } 3261 3262 typedef struct vmem_node { 3263 struct vmem_node *vn_next; 3264 struct vmem_node *vn_parent; 3265 struct vmem_node *vn_sibling; 3266 struct vmem_node *vn_children; 3267 uintptr_t vn_addr; 3268 int vn_marked; 3269 vmem_t vn_vmem; 3270 } vmem_node_t; 3271 3272 typedef struct vmem_walk { 3273 vmem_node_t *vw_root; 3274 vmem_node_t *vw_current; 3275 } vmem_walk_t; 3276 3277 int 3278 vmem_walk_init(mdb_walk_state_t *wsp) 3279 { 3280 uintptr_t vaddr, paddr; 3281 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3282 vmem_walk_t *vw; 3283 3284 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3285 mdb_warn("couldn't read 'vmem_list'"); 3286 return (WALK_ERR); 3287 } 3288 3289 while (vaddr != NULL) { 3290 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3291 vp->vn_addr = vaddr; 3292 vp->vn_next = head; 3293 head = vp; 3294 3295 if (vaddr == wsp->walk_addr) 3296 current = vp; 3297 3298 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3299 mdb_warn("couldn't read vmem_t at %p", vaddr); 3300 goto err; 3301 } 3302 3303 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3304 } 3305 3306 for (vp = head; vp != NULL; vp = vp->vn_next) { 3307 3308 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3309 vp->vn_sibling = root; 3310 root = vp; 3311 continue; 3312 } 3313 3314 for (parent = head; parent != NULL; parent = parent->vn_next) { 3315 if (parent->vn_addr != paddr) 3316 continue; 3317 vp->vn_sibling = parent->vn_children; 3318 parent->vn_children = vp; 3319 vp->vn_parent = parent; 3320 break; 3321 } 3322 3323 if (parent == NULL) { 3324 mdb_warn("couldn't find %p's parent (%p)\n", 3325 vp->vn_addr, paddr); 3326 goto err; 3327 } 3328 } 3329 3330 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3331 vw->vw_root = root; 3332 3333 if (current != NULL) 3334 vw->vw_current = current; 3335 else 3336 vw->vw_current = root; 3337 3338 wsp->walk_data = vw; 3339 return (WALK_NEXT); 3340 err: 3341 for (vp = head; head != NULL; vp = head) { 3342 head = vp->vn_next; 3343 mdb_free(vp, sizeof (vmem_node_t)); 3344 } 3345 3346 return (WALK_ERR); 3347 } 3348 3349 int 3350 vmem_walk_step(mdb_walk_state_t *wsp) 3351 { 3352 vmem_walk_t *vw = wsp->walk_data; 3353 vmem_node_t *vp; 3354 int rval; 3355 3356 if ((vp = vw->vw_current) == NULL) 3357 return (WALK_DONE); 3358 3359 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3360 3361 if (vp->vn_children != NULL) { 3362 vw->vw_current = vp->vn_children; 3363 return (rval); 3364 } 3365 3366 do { 3367 vw->vw_current = vp->vn_sibling; 3368 vp = vp->vn_parent; 3369 } while (vw->vw_current == NULL && vp != NULL); 3370 3371 return (rval); 3372 } 3373 3374 /* 3375 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3376 * children are visited before their parent. We perform the postfix walk 3377 * iteratively (rather than recursively) to allow mdb to regain control 3378 * after each callback. 3379 */ 3380 int 3381 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3382 { 3383 vmem_walk_t *vw = wsp->walk_data; 3384 vmem_node_t *vp = vw->vw_current; 3385 int rval; 3386 3387 /* 3388 * If this node is marked, then we know that we have already visited 3389 * all of its children. If the node has any siblings, they need to 3390 * be visited next; otherwise, we need to visit the parent. Note 3391 * that vp->vn_marked will only be zero on the first invocation of 3392 * the step function. 3393 */ 3394 if (vp->vn_marked) { 3395 if (vp->vn_sibling != NULL) 3396 vp = vp->vn_sibling; 3397 else if (vp->vn_parent != NULL) 3398 vp = vp->vn_parent; 3399 else { 3400 /* 3401 * We have neither a parent, nor a sibling, and we 3402 * have already been visited; we're done. 3403 */ 3404 return (WALK_DONE); 3405 } 3406 } 3407 3408 /* 3409 * Before we visit this node, visit its children. 3410 */ 3411 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3412 vp = vp->vn_children; 3413 3414 vp->vn_marked = 1; 3415 vw->vw_current = vp; 3416 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3417 3418 return (rval); 3419 } 3420 3421 void 3422 vmem_walk_fini(mdb_walk_state_t *wsp) 3423 { 3424 vmem_walk_t *vw = wsp->walk_data; 3425 vmem_node_t *root = vw->vw_root; 3426 int done; 3427 3428 if (root == NULL) 3429 return; 3430 3431 if ((vw->vw_root = root->vn_children) != NULL) 3432 vmem_walk_fini(wsp); 3433 3434 vw->vw_root = root->vn_sibling; 3435 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3436 mdb_free(root, sizeof (vmem_node_t)); 3437 3438 if (done) { 3439 mdb_free(vw, sizeof (vmem_walk_t)); 3440 } else { 3441 vmem_walk_fini(wsp); 3442 } 3443 } 3444 3445 typedef struct vmem_seg_walk { 3446 uint8_t vsw_type; 3447 uintptr_t vsw_start; 3448 uintptr_t vsw_current; 3449 } vmem_seg_walk_t; 3450 3451 /*ARGSUSED*/ 3452 int 3453 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3454 { 3455 vmem_seg_walk_t *vsw; 3456 3457 if (wsp->walk_addr == NULL) { 3458 mdb_warn("vmem_%s does not support global walks\n", name); 3459 return (WALK_ERR); 3460 } 3461 3462 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3463 3464 vsw->vsw_type = type; 3465 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3466 vsw->vsw_current = vsw->vsw_start; 3467 3468 return (WALK_NEXT); 3469 } 3470 3471 /* 3472 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3473 */ 3474 #define VMEM_NONE 0 3475 3476 int 3477 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3478 { 3479 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3480 } 3481 3482 int 3483 vmem_free_walk_init(mdb_walk_state_t *wsp) 3484 { 3485 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3486 } 3487 3488 int 3489 vmem_span_walk_init(mdb_walk_state_t *wsp) 3490 { 3491 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3492 } 3493 3494 int 3495 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3496 { 3497 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3498 } 3499 3500 int 3501 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3502 { 3503 vmem_seg_t seg; 3504 vmem_seg_walk_t *vsw = wsp->walk_data; 3505 uintptr_t addr = vsw->vsw_current; 3506 static size_t seg_size = 0; 3507 int rval; 3508 3509 if (!seg_size) { 3510 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3511 mdb_warn("failed to read 'vmem_seg_size'"); 3512 seg_size = sizeof (vmem_seg_t); 3513 } 3514 } 3515 3516 if (seg_size < sizeof (seg)) 3517 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3518 3519 if (mdb_vread(&seg, seg_size, addr) == -1) { 3520 mdb_warn("couldn't read vmem_seg at %p", addr); 3521 return (WALK_ERR); 3522 } 3523 3524 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3525 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3526 rval = WALK_NEXT; 3527 } else { 3528 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3529 } 3530 3531 if (vsw->vsw_current == vsw->vsw_start) 3532 return (WALK_DONE); 3533 3534 return (rval); 3535 } 3536 3537 void 3538 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3539 { 3540 vmem_seg_walk_t *vsw = wsp->walk_data; 3541 3542 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3543 } 3544 3545 #define VMEM_NAMEWIDTH 22 3546 3547 int 3548 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3549 { 3550 vmem_t v, parent; 3551 vmem_kstat_t *vkp = &v.vm_kstat; 3552 uintptr_t paddr; 3553 int ident = 0; 3554 char c[VMEM_NAMEWIDTH]; 3555 3556 if (!(flags & DCMD_ADDRSPEC)) { 3557 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3558 mdb_warn("can't walk vmem"); 3559 return (DCMD_ERR); 3560 } 3561 return (DCMD_OK); 3562 } 3563 3564 if (DCMD_HDRSPEC(flags)) 3565 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3566 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3567 "TOTAL", "SUCCEED", "FAIL"); 3568 3569 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3570 mdb_warn("couldn't read vmem at %p", addr); 3571 return (DCMD_ERR); 3572 } 3573 3574 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3575 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3576 mdb_warn("couldn't trace %p's ancestry", addr); 3577 ident = 0; 3578 break; 3579 } 3580 paddr = (uintptr_t)parent.vm_source; 3581 } 3582 3583 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3584 3585 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3586 addr, VMEM_NAMEWIDTH, c, 3587 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3588 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3589 3590 return (DCMD_OK); 3591 } 3592 3593 void 3594 vmem_seg_help(void) 3595 { 3596 mdb_printf("%s", 3597 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3598 "\n" 3599 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3600 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3601 "information.\n"); 3602 mdb_dec_indent(2); 3603 mdb_printf("%<b>OPTIONS%</b>\n"); 3604 mdb_inc_indent(2); 3605 mdb_printf("%s", 3606 " -v Display the full content of the vmem_seg, including its stack trace\n" 3607 " -s report the size of the segment, instead of the end address\n" 3608 " -c caller\n" 3609 " filter out segments without the function/PC in their stack trace\n" 3610 " -e earliest\n" 3611 " filter out segments timestamped before earliest\n" 3612 " -l latest\n" 3613 " filter out segments timestamped after latest\n" 3614 " -m minsize\n" 3615 " filer out segments smaller than minsize\n" 3616 " -M maxsize\n" 3617 " filer out segments larger than maxsize\n" 3618 " -t thread\n" 3619 " filter out segments not involving thread\n" 3620 " -T type\n" 3621 " filter out segments not of type 'type'\n" 3622 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3623 } 3624 3625 /*ARGSUSED*/ 3626 int 3627 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3628 { 3629 vmem_seg_t vs; 3630 pc_t *stk = vs.vs_stack; 3631 uintptr_t sz; 3632 uint8_t t; 3633 const char *type = NULL; 3634 GElf_Sym sym; 3635 char c[MDB_SYM_NAMLEN]; 3636 int no_debug; 3637 int i; 3638 int depth; 3639 uintptr_t laddr, haddr; 3640 3641 uintptr_t caller = NULL, thread = NULL; 3642 uintptr_t minsize = 0, maxsize = 0; 3643 3644 hrtime_t earliest = 0, latest = 0; 3645 3646 uint_t size = 0; 3647 uint_t verbose = 0; 3648 3649 if (!(flags & DCMD_ADDRSPEC)) 3650 return (DCMD_USAGE); 3651 3652 if (mdb_getopts(argc, argv, 3653 'c', MDB_OPT_UINTPTR, &caller, 3654 'e', MDB_OPT_UINT64, &earliest, 3655 'l', MDB_OPT_UINT64, &latest, 3656 's', MDB_OPT_SETBITS, TRUE, &size, 3657 'm', MDB_OPT_UINTPTR, &minsize, 3658 'M', MDB_OPT_UINTPTR, &maxsize, 3659 't', MDB_OPT_UINTPTR, &thread, 3660 'T', MDB_OPT_STR, &type, 3661 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3662 NULL) != argc) 3663 return (DCMD_USAGE); 3664 3665 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3666 if (verbose) { 3667 mdb_printf("%16s %4s %16s %16s %16s\n" 3668 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3669 "ADDR", "TYPE", "START", "END", "SIZE", 3670 "", "", "THREAD", "TIMESTAMP", ""); 3671 } else { 3672 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3673 "START", size? "SIZE" : "END", "WHO"); 3674 } 3675 } 3676 3677 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3678 mdb_warn("couldn't read vmem_seg at %p", addr); 3679 return (DCMD_ERR); 3680 } 3681 3682 if (type != NULL) { 3683 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3684 t = VMEM_ALLOC; 3685 else if (strcmp(type, "FREE") == 0) 3686 t = VMEM_FREE; 3687 else if (strcmp(type, "SPAN") == 0) 3688 t = VMEM_SPAN; 3689 else if (strcmp(type, "ROTR") == 0 || 3690 strcmp(type, "ROTOR") == 0) 3691 t = VMEM_ROTOR; 3692 else if (strcmp(type, "WLKR") == 0 || 3693 strcmp(type, "WALKER") == 0) 3694 t = VMEM_WALKER; 3695 else { 3696 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3697 type); 3698 return (DCMD_ERR); 3699 } 3700 3701 if (vs.vs_type != t) 3702 return (DCMD_OK); 3703 } 3704 3705 sz = vs.vs_end - vs.vs_start; 3706 3707 if (minsize != 0 && sz < minsize) 3708 return (DCMD_OK); 3709 3710 if (maxsize != 0 && sz > maxsize) 3711 return (DCMD_OK); 3712 3713 t = vs.vs_type; 3714 depth = vs.vs_depth; 3715 3716 /* 3717 * debug info, when present, is only accurate for VMEM_ALLOC segments 3718 */ 3719 no_debug = (t != VMEM_ALLOC) || 3720 (depth == 0 || depth > VMEM_STACK_DEPTH); 3721 3722 if (no_debug) { 3723 if (caller != NULL || thread != NULL || earliest != 0 || 3724 latest != 0) 3725 return (DCMD_OK); /* not enough info */ 3726 } else { 3727 if (caller != NULL) { 3728 laddr = caller; 3729 haddr = caller + sizeof (caller); 3730 3731 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3732 sizeof (c), &sym) != -1 && 3733 caller == (uintptr_t)sym.st_value) { 3734 /* 3735 * We were provided an exact symbol value; any 3736 * address in the function is valid. 3737 */ 3738 laddr = (uintptr_t)sym.st_value; 3739 haddr = (uintptr_t)sym.st_value + sym.st_size; 3740 } 3741 3742 for (i = 0; i < depth; i++) 3743 if (vs.vs_stack[i] >= laddr && 3744 vs.vs_stack[i] < haddr) 3745 break; 3746 3747 if (i == depth) 3748 return (DCMD_OK); 3749 } 3750 3751 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3752 return (DCMD_OK); 3753 3754 if (earliest != 0 && vs.vs_timestamp < earliest) 3755 return (DCMD_OK); 3756 3757 if (latest != 0 && vs.vs_timestamp > latest) 3758 return (DCMD_OK); 3759 } 3760 3761 type = (t == VMEM_ALLOC ? "ALLC" : 3762 t == VMEM_FREE ? "FREE" : 3763 t == VMEM_SPAN ? "SPAN" : 3764 t == VMEM_ROTOR ? "ROTR" : 3765 t == VMEM_WALKER ? "WLKR" : 3766 "????"); 3767 3768 if (flags & DCMD_PIPE_OUT) { 3769 mdb_printf("%#lr\n", addr); 3770 return (DCMD_OK); 3771 } 3772 3773 if (verbose) { 3774 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3775 addr, type, vs.vs_start, vs.vs_end, sz); 3776 3777 if (no_debug) 3778 return (DCMD_OK); 3779 3780 mdb_printf("%16s %4s %16p %16llx\n", 3781 "", "", vs.vs_thread, vs.vs_timestamp); 3782 3783 mdb_inc_indent(17); 3784 for (i = 0; i < depth; i++) { 3785 mdb_printf("%a\n", stk[i]); 3786 } 3787 mdb_dec_indent(17); 3788 mdb_printf("\n"); 3789 } else { 3790 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3791 vs.vs_start, size? sz : vs.vs_end); 3792 3793 if (no_debug) { 3794 mdb_printf("\n"); 3795 return (DCMD_OK); 3796 } 3797 3798 for (i = 0; i < depth; i++) { 3799 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3800 c, sizeof (c), &sym) == -1) 3801 continue; 3802 if (strncmp(c, "vmem_", 5) == 0) 3803 continue; 3804 break; 3805 } 3806 mdb_printf(" %a\n", stk[i]); 3807 } 3808 return (DCMD_OK); 3809 } 3810 3811 typedef struct kmalog_data { 3812 uintptr_t kma_addr; 3813 hrtime_t kma_newest; 3814 } kmalog_data_t; 3815 3816 /*ARGSUSED*/ 3817 static int 3818 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3819 { 3820 char name[KMEM_CACHE_NAMELEN + 1]; 3821 hrtime_t delta; 3822 int i, depth; 3823 size_t bufsize; 3824 3825 if (bcp->bc_timestamp == 0) 3826 return (WALK_DONE); 3827 3828 if (kma->kma_newest == 0) 3829 kma->kma_newest = bcp->bc_timestamp; 3830 3831 if (kma->kma_addr) { 3832 if (mdb_vread(&bufsize, sizeof (bufsize), 3833 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3834 mdb_warn( 3835 "failed to read cache_bufsize for cache at %p", 3836 bcp->bc_cache); 3837 return (WALK_ERR); 3838 } 3839 3840 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3841 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3842 return (WALK_NEXT); 3843 } 3844 3845 delta = kma->kma_newest - bcp->bc_timestamp; 3846 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3847 3848 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3849 &bcp->bc_cache->cache_name) <= 0) 3850 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3851 3852 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3853 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3854 3855 for (i = 0; i < depth; i++) 3856 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3857 3858 return (WALK_NEXT); 3859 } 3860 3861 int 3862 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3863 { 3864 const char *logname = "kmem_transaction_log"; 3865 kmalog_data_t kma; 3866 3867 if (argc > 1) 3868 return (DCMD_USAGE); 3869 3870 kma.kma_newest = 0; 3871 if (flags & DCMD_ADDRSPEC) 3872 kma.kma_addr = addr; 3873 else 3874 kma.kma_addr = NULL; 3875 3876 if (argc > 0) { 3877 if (argv->a_type != MDB_TYPE_STRING) 3878 return (DCMD_USAGE); 3879 if (strcmp(argv->a_un.a_str, "fail") == 0) 3880 logname = "kmem_failure_log"; 3881 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3882 logname = "kmem_slab_log"; 3883 else 3884 return (DCMD_USAGE); 3885 } 3886 3887 if (mdb_readvar(&addr, logname) == -1) { 3888 mdb_warn("failed to read %s log header pointer"); 3889 return (DCMD_ERR); 3890 } 3891 3892 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3893 mdb_warn("failed to walk kmem log"); 3894 return (DCMD_ERR); 3895 } 3896 3897 return (DCMD_OK); 3898 } 3899 3900 /* 3901 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3902 * The first piece is a structure which we use to accumulate kmem_cache_t 3903 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3904 * walker; we either add all caches, or ones named explicitly as arguments. 3905 */ 3906 3907 typedef struct kmclist { 3908 const char *kmc_name; /* Name to match (or NULL) */ 3909 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3910 int kmc_nelems; /* Num entries in kmc_caches */ 3911 int kmc_size; /* Size of kmc_caches array */ 3912 } kmclist_t; 3913 3914 static int 3915 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3916 { 3917 void *p; 3918 int s; 3919 3920 if (kmc->kmc_name == NULL || 3921 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3922 /* 3923 * If we have a match, grow our array (if necessary), and then 3924 * add the virtual address of the matching cache to our list. 3925 */ 3926 if (kmc->kmc_nelems >= kmc->kmc_size) { 3927 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3928 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3929 3930 bcopy(kmc->kmc_caches, p, 3931 sizeof (uintptr_t) * kmc->kmc_size); 3932 3933 kmc->kmc_caches = p; 3934 kmc->kmc_size = s; 3935 } 3936 3937 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3938 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3939 } 3940 3941 return (WALK_NEXT); 3942 } 3943 3944 /* 3945 * The second piece of ::kmausers is a hash table of allocations. Each 3946 * allocation owner is identified by its stack trace and data_size. We then 3947 * track the total bytes of all such allocations, and the number of allocations 3948 * to report at the end. Once we have a list of caches, we walk through the 3949 * allocated bufctls of each, and update our hash table accordingly. 3950 */ 3951 3952 typedef struct kmowner { 3953 struct kmowner *kmo_head; /* First hash elt in bucket */ 3954 struct kmowner *kmo_next; /* Next hash elt in chain */ 3955 size_t kmo_signature; /* Hash table signature */ 3956 uint_t kmo_num; /* Number of allocations */ 3957 size_t kmo_data_size; /* Size of each allocation */ 3958 size_t kmo_total_size; /* Total bytes of allocation */ 3959 int kmo_depth; /* Depth of stack trace */ 3960 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3961 } kmowner_t; 3962 3963 typedef struct kmusers { 3964 uintptr_t kmu_addr; /* address of interest */ 3965 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3966 kmowner_t *kmu_hash; /* Hash table of owners */ 3967 int kmu_nelems; /* Number of entries in use */ 3968 int kmu_size; /* Total number of entries */ 3969 } kmusers_t; 3970 3971 static void 3972 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3973 size_t size, size_t data_size) 3974 { 3975 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3976 size_t bucket, signature = data_size; 3977 kmowner_t *kmo, *kmoend; 3978 3979 /* 3980 * If the hash table is full, double its size and rehash everything. 3981 */ 3982 if (kmu->kmu_nelems >= kmu->kmu_size) { 3983 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3984 3985 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3986 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 3987 kmu->kmu_hash = kmo; 3988 kmu->kmu_size = s; 3989 3990 kmoend = kmu->kmu_hash + kmu->kmu_size; 3991 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 3992 kmo->kmo_head = NULL; 3993 3994 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 3995 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 3996 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 3997 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 3998 kmu->kmu_hash[bucket].kmo_head = kmo; 3999 } 4000 } 4001 4002 /* 4003 * Finish computing the hash signature from the stack trace, and then 4004 * see if the owner is in the hash table. If so, update our stats. 4005 */ 4006 for (i = 0; i < depth; i++) 4007 signature += bcp->bc_stack[i]; 4008 4009 bucket = signature & (kmu->kmu_size - 1); 4010 4011 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4012 if (kmo->kmo_signature == signature) { 4013 size_t difference = 0; 4014 4015 difference |= kmo->kmo_data_size - data_size; 4016 difference |= kmo->kmo_depth - depth; 4017 4018 for (i = 0; i < depth; i++) { 4019 difference |= kmo->kmo_stack[i] - 4020 bcp->bc_stack[i]; 4021 } 4022 4023 if (difference == 0) { 4024 kmo->kmo_total_size += size; 4025 kmo->kmo_num++; 4026 return; 4027 } 4028 } 4029 } 4030 4031 /* 4032 * If the owner is not yet hashed, grab the next element and fill it 4033 * in based on the allocation information. 4034 */ 4035 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4036 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4037 kmu->kmu_hash[bucket].kmo_head = kmo; 4038 4039 kmo->kmo_signature = signature; 4040 kmo->kmo_num = 1; 4041 kmo->kmo_data_size = data_size; 4042 kmo->kmo_total_size = size; 4043 kmo->kmo_depth = depth; 4044 4045 for (i = 0; i < depth; i++) 4046 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4047 } 4048 4049 /* 4050 * When ::kmausers is invoked without the -f flag, we simply update our hash 4051 * table with the information from each allocated bufctl. 4052 */ 4053 /*ARGSUSED*/ 4054 static int 4055 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4056 { 4057 const kmem_cache_t *cp = kmu->kmu_cache; 4058 4059 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4060 return (WALK_NEXT); 4061 } 4062 4063 /* 4064 * When ::kmausers is invoked with the -f flag, we print out the information 4065 * for each bufctl as well as updating the hash table. 4066 */ 4067 static int 4068 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4069 { 4070 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4071 const kmem_cache_t *cp = kmu->kmu_cache; 4072 kmem_bufctl_t bufctl; 4073 4074 if (kmu->kmu_addr) { 4075 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4076 mdb_warn("couldn't read bufctl at %p", addr); 4077 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4078 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4079 cp->cache_bufsize) 4080 return (WALK_NEXT); 4081 } 4082 4083 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4084 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4085 4086 for (i = 0; i < depth; i++) 4087 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4088 4089 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4090 return (WALK_NEXT); 4091 } 4092 4093 /* 4094 * We sort our results by allocation size before printing them. 4095 */ 4096 static int 4097 kmownercmp(const void *lp, const void *rp) 4098 { 4099 const kmowner_t *lhs = lp; 4100 const kmowner_t *rhs = rp; 4101 4102 return (rhs->kmo_total_size - lhs->kmo_total_size); 4103 } 4104 4105 /* 4106 * The main engine of ::kmausers is relatively straightforward: First we 4107 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4108 * iterate over the allocated bufctls of each cache in the list. Finally, 4109 * we sort and print our results. 4110 */ 4111 /*ARGSUSED*/ 4112 int 4113 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4114 { 4115 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4116 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4117 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4118 int do_all_caches = 1; /* Do all caches (no arguments) */ 4119 int opt_e = FALSE; /* Include "small" users */ 4120 int opt_f = FALSE; /* Print stack traces */ 4121 4122 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4123 kmowner_t *kmo, *kmoend; 4124 int i, oelems; 4125 4126 kmclist_t kmc; 4127 kmusers_t kmu; 4128 4129 bzero(&kmc, sizeof (kmc)); 4130 bzero(&kmu, sizeof (kmu)); 4131 4132 while ((i = mdb_getopts(argc, argv, 4133 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4134 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4135 4136 argv += i; /* skip past options we just processed */ 4137 argc -= i; /* adjust argc */ 4138 4139 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4140 return (DCMD_USAGE); 4141 4142 oelems = kmc.kmc_nelems; 4143 kmc.kmc_name = argv->a_un.a_str; 4144 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4145 4146 if (kmc.kmc_nelems == oelems) { 4147 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4148 return (DCMD_ERR); 4149 } 4150 4151 do_all_caches = 0; 4152 argv++; 4153 argc--; 4154 } 4155 4156 if (flags & DCMD_ADDRSPEC) { 4157 opt_f = TRUE; 4158 kmu.kmu_addr = addr; 4159 } else { 4160 kmu.kmu_addr = NULL; 4161 } 4162 4163 if (opt_e) 4164 mem_threshold = cnt_threshold = 0; 4165 4166 if (opt_f) 4167 callback = (mdb_walk_cb_t)kmause2; 4168 4169 if (do_all_caches) { 4170 kmc.kmc_name = NULL; /* match all cache names */ 4171 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4172 } 4173 4174 for (i = 0; i < kmc.kmc_nelems; i++) { 4175 uintptr_t cp = kmc.kmc_caches[i]; 4176 kmem_cache_t c; 4177 4178 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4179 mdb_warn("failed to read cache at %p", cp); 4180 continue; 4181 } 4182 4183 if (!(c.cache_flags & KMF_AUDIT)) { 4184 if (!do_all_caches) { 4185 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4186 c.cache_name); 4187 } 4188 continue; 4189 } 4190 4191 kmu.kmu_cache = &c; 4192 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4193 audited_caches++; 4194 } 4195 4196 if (audited_caches == 0 && do_all_caches) { 4197 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4198 return (DCMD_ERR); 4199 } 4200 4201 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4202 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4203 4204 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4205 if (kmo->kmo_total_size < mem_threshold && 4206 kmo->kmo_num < cnt_threshold) 4207 continue; 4208 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4209 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4210 for (i = 0; i < kmo->kmo_depth; i++) 4211 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4212 } 4213 4214 return (DCMD_OK); 4215 } 4216 4217 void 4218 kmausers_help(void) 4219 { 4220 mdb_printf( 4221 "Displays the largest users of the kmem allocator, sorted by \n" 4222 "trace. If one or more caches is specified, only those caches\n" 4223 "will be searched. By default, all caches are searched. If an\n" 4224 "address is specified, then only those allocations which include\n" 4225 "the given address are displayed. Specifying an address implies\n" 4226 "-f.\n" 4227 "\n" 4228 "\t-e\tInclude all users, not just the largest\n" 4229 "\t-f\tDisplay individual allocations. By default, users are\n" 4230 "\t\tgrouped by stack\n"); 4231 } 4232 4233 static int 4234 kmem_ready_check(void) 4235 { 4236 int ready; 4237 4238 if (mdb_readvar(&ready, "kmem_ready") < 0) 4239 return (-1); /* errno is set for us */ 4240 4241 return (ready); 4242 } 4243 4244 void 4245 kmem_statechange(void) 4246 { 4247 static int been_ready = 0; 4248 4249 if (been_ready) 4250 return; 4251 4252 if (kmem_ready_check() <= 0) 4253 return; 4254 4255 been_ready = 1; 4256 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4257 } 4258 4259 void 4260 kmem_init(void) 4261 { 4262 mdb_walker_t w = { 4263 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4264 list_walk_step, list_walk_fini 4265 }; 4266 4267 /* 4268 * If kmem is ready, we'll need to invoke the kmem_cache walker 4269 * immediately. Walkers in the linkage structure won't be ready until 4270 * _mdb_init returns, so we'll need to add this one manually. If kmem 4271 * is ready, we'll use the walker to initialize the caches. If kmem 4272 * isn't ready, we'll register a callback that will allow us to defer 4273 * cache walking until it is. 4274 */ 4275 if (mdb_add_walker(&w) != 0) { 4276 mdb_warn("failed to add kmem_cache walker"); 4277 return; 4278 } 4279 4280 kmem_statechange(); 4281 4282 /* register our ::whatis handlers */ 4283 mdb_whatis_register("modules", whatis_run_modules, NULL, 4284 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4285 mdb_whatis_register("threads", whatis_run_threads, NULL, 4286 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4287 mdb_whatis_register("pages", whatis_run_pages, NULL, 4288 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4289 mdb_whatis_register("kmem", whatis_run_kmem, NULL, 4290 WHATIS_PRIO_ALLOCATOR, 0); 4291 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 4292 WHATIS_PRIO_ALLOCATOR, 0); 4293 } 4294 4295 typedef struct whatthread { 4296 uintptr_t wt_target; 4297 int wt_verbose; 4298 } whatthread_t; 4299 4300 static int 4301 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4302 { 4303 uintptr_t current, data; 4304 4305 if (t->t_stkbase == NULL) 4306 return (WALK_NEXT); 4307 4308 /* 4309 * Warn about swapped out threads, but drive on anyway 4310 */ 4311 if (!(t->t_schedflag & TS_LOAD)) { 4312 mdb_warn("thread %p's stack swapped out\n", addr); 4313 return (WALK_NEXT); 4314 } 4315 4316 /* 4317 * Search the thread's stack for the given pointer. Note that it would 4318 * be more efficient to follow ::kgrep's lead and read in page-sized 4319 * chunks, but this routine is already fast and simple. 4320 */ 4321 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4322 current += sizeof (uintptr_t)) { 4323 if (mdb_vread(&data, sizeof (data), current) == -1) { 4324 mdb_warn("couldn't read thread %p's stack at %p", 4325 addr, current); 4326 return (WALK_ERR); 4327 } 4328 4329 if (data == w->wt_target) { 4330 if (w->wt_verbose) { 4331 mdb_printf("%p in thread %p's stack%s\n", 4332 current, addr, stack_active(t, current)); 4333 } else { 4334 mdb_printf("%#lr\n", addr); 4335 return (WALK_NEXT); 4336 } 4337 } 4338 } 4339 4340 return (WALK_NEXT); 4341 } 4342 4343 int 4344 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4345 { 4346 whatthread_t w; 4347 4348 if (!(flags & DCMD_ADDRSPEC)) 4349 return (DCMD_USAGE); 4350 4351 w.wt_verbose = FALSE; 4352 w.wt_target = addr; 4353 4354 if (mdb_getopts(argc, argv, 4355 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4356 return (DCMD_USAGE); 4357 4358 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4359 == -1) { 4360 mdb_warn("couldn't walk threads"); 4361 return (DCMD_ERR); 4362 } 4363 4364 return (DCMD_OK); 4365 } 4366