1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <mdb/mdb_param.h> 29 #include <mdb/mdb_modapi.h> 30 #include <mdb/mdb_ctf.h> 31 #include <sys/cpuvar.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/vmem_impl.h> 34 #include <sys/machelf.h> 35 #include <sys/modctl.h> 36 #include <sys/kobj.h> 37 #include <sys/panic.h> 38 #include <sys/stack.h> 39 #include <sys/sysmacros.h> 40 #include <vm/page.h> 41 42 #include "avl.h" 43 #include "combined.h" 44 #include "dist.h" 45 #include "kmem.h" 46 #include "leaky.h" 47 #include "list.h" 48 49 #define dprintf(x) if (mdb_debug_level) { \ 50 mdb_printf("kmem debug: "); \ 51 /*CSTYLED*/\ 52 mdb_printf x ;\ 53 } 54 55 #define KM_ALLOCATED 0x01 56 #define KM_FREE 0x02 57 #define KM_BUFCTL 0x04 58 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 59 #define KM_HASH 0x10 60 61 static int mdb_debug_level = 0; 62 63 /*ARGSUSED*/ 64 static int 65 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 66 { 67 mdb_walker_t w; 68 char descr[64]; 69 70 (void) mdb_snprintf(descr, sizeof (descr), 71 "walk the %s cache", c->cache_name); 72 73 w.walk_name = c->cache_name; 74 w.walk_descr = descr; 75 w.walk_init = kmem_walk_init; 76 w.walk_step = kmem_walk_step; 77 w.walk_fini = kmem_walk_fini; 78 w.walk_init_arg = (void *)addr; 79 80 if (mdb_add_walker(&w) == -1) 81 mdb_warn("failed to add %s walker", c->cache_name); 82 83 return (WALK_NEXT); 84 } 85 86 /*ARGSUSED*/ 87 int 88 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 89 { 90 mdb_debug_level ^= 1; 91 92 mdb_printf("kmem: debugging is now %s\n", 93 mdb_debug_level ? "on" : "off"); 94 95 return (DCMD_OK); 96 } 97 98 int 99 kmem_cache_walk_init(mdb_walk_state_t *wsp) 100 { 101 GElf_Sym sym; 102 103 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 104 mdb_warn("couldn't find kmem_caches"); 105 return (WALK_ERR); 106 } 107 108 wsp->walk_addr = (uintptr_t)sym.st_value; 109 110 return (list_walk_init_named(wsp, "cache list", "cache")); 111 } 112 113 int 114 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 115 { 116 if (wsp->walk_addr == NULL) { 117 mdb_warn("kmem_cpu_cache doesn't support global walks"); 118 return (WALK_ERR); 119 } 120 121 if (mdb_layered_walk("cpu", wsp) == -1) { 122 mdb_warn("couldn't walk 'cpu'"); 123 return (WALK_ERR); 124 } 125 126 wsp->walk_data = (void *)wsp->walk_addr; 127 128 return (WALK_NEXT); 129 } 130 131 int 132 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 133 { 134 uintptr_t caddr = (uintptr_t)wsp->walk_data; 135 const cpu_t *cpu = wsp->walk_layer; 136 kmem_cpu_cache_t cc; 137 138 caddr += cpu->cpu_cache_offset; 139 140 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 141 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 142 return (WALK_ERR); 143 } 144 145 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 146 } 147 148 static int 149 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 150 { 151 kmem_slab_t *sp = p; 152 uintptr_t caddr = (uintptr_t)arg; 153 if ((uintptr_t)sp->slab_cache != caddr) { 154 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 155 saddr, caddr, sp->slab_cache); 156 return (-1); 157 } 158 159 return (0); 160 } 161 162 static int 163 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 164 { 165 kmem_slab_t *sp = p; 166 167 int rc = kmem_slab_check(p, saddr, arg); 168 if (rc != 0) { 169 return (rc); 170 } 171 172 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 173 mdb_warn("slab %p is not a partial slab\n", saddr); 174 return (-1); 175 } 176 177 return (0); 178 } 179 180 static int 181 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 182 { 183 kmem_slab_t *sp = p; 184 185 int rc = kmem_slab_check(p, saddr, arg); 186 if (rc != 0) { 187 return (rc); 188 } 189 190 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 191 mdb_warn("slab %p is not completely allocated\n", saddr); 192 return (-1); 193 } 194 195 return (0); 196 } 197 198 typedef struct { 199 uintptr_t kns_cache_addr; 200 int kns_nslabs; 201 } kmem_nth_slab_t; 202 203 static int 204 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 205 { 206 kmem_nth_slab_t *chkp = arg; 207 208 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 209 if (rc != 0) { 210 return (rc); 211 } 212 213 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 214 } 215 216 static int 217 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 218 { 219 uintptr_t caddr = wsp->walk_addr; 220 221 wsp->walk_addr = (uintptr_t)(caddr + 222 offsetof(kmem_cache_t, cache_complete_slabs)); 223 224 return (list_walk_init_checked(wsp, "slab list", "slab", 225 kmem_complete_slab_check, (void *)caddr)); 226 } 227 228 static int 229 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 230 { 231 uintptr_t caddr = wsp->walk_addr; 232 233 wsp->walk_addr = (uintptr_t)(caddr + 234 offsetof(kmem_cache_t, cache_partial_slabs)); 235 236 return (avl_walk_init_checked(wsp, "slab list", "slab", 237 kmem_partial_slab_check, (void *)caddr)); 238 } 239 240 int 241 kmem_slab_walk_init(mdb_walk_state_t *wsp) 242 { 243 uintptr_t caddr = wsp->walk_addr; 244 245 if (caddr == NULL) { 246 mdb_warn("kmem_slab doesn't support global walks\n"); 247 return (WALK_ERR); 248 } 249 250 combined_walk_init(wsp); 251 combined_walk_add(wsp, 252 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 253 combined_walk_add(wsp, 254 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 255 256 return (WALK_NEXT); 257 } 258 259 static int 260 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 261 { 262 uintptr_t caddr = wsp->walk_addr; 263 kmem_nth_slab_t *chk; 264 265 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 266 UM_SLEEP | UM_GC); 267 chk->kns_cache_addr = caddr; 268 chk->kns_nslabs = 1; 269 wsp->walk_addr = (uintptr_t)(caddr + 270 offsetof(kmem_cache_t, cache_complete_slabs)); 271 272 return (list_walk_init_checked(wsp, "slab list", "slab", 273 kmem_nth_slab_check, chk)); 274 } 275 276 int 277 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 278 { 279 uintptr_t caddr = wsp->walk_addr; 280 kmem_cache_t c; 281 282 if (caddr == NULL) { 283 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 284 return (WALK_ERR); 285 } 286 287 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 288 mdb_warn("couldn't read kmem_cache at %p", caddr); 289 return (WALK_ERR); 290 } 291 292 combined_walk_init(wsp); 293 294 /* 295 * Some consumers (umem_walk_step(), in particular) require at 296 * least one callback if there are any buffers in the cache. So 297 * if there are *no* partial slabs, report the first full slab, if 298 * any. 299 * 300 * Yes, this is ugly, but it's cleaner than the other possibilities. 301 */ 302 if (c.cache_partial_slabs.avl_numnodes == 0) { 303 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 304 list_walk_step, list_walk_fini); 305 } else { 306 combined_walk_add(wsp, kmem_partial_slab_walk_init, 307 avl_walk_step, avl_walk_fini); 308 } 309 310 return (WALK_NEXT); 311 } 312 313 int 314 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 315 { 316 kmem_cache_t c; 317 const char *filter = NULL; 318 319 if (mdb_getopts(ac, argv, 320 'n', MDB_OPT_STR, &filter, 321 NULL) != ac) { 322 return (DCMD_USAGE); 323 } 324 325 if (!(flags & DCMD_ADDRSPEC)) { 326 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 327 mdb_warn("can't walk kmem_cache"); 328 return (DCMD_ERR); 329 } 330 return (DCMD_OK); 331 } 332 333 if (DCMD_HDRSPEC(flags)) 334 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 335 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 336 337 if (mdb_vread(&c, sizeof (c), addr) == -1) { 338 mdb_warn("couldn't read kmem_cache at %p", addr); 339 return (DCMD_ERR); 340 } 341 342 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 343 return (DCMD_OK); 344 345 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 346 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 347 348 return (DCMD_OK); 349 } 350 351 void 352 kmem_cache_help(void) 353 { 354 mdb_printf("%s", "Print kernel memory caches.\n\n"); 355 mdb_dec_indent(2); 356 mdb_printf("%<b>OPTIONS%</b>\n"); 357 mdb_inc_indent(2); 358 mdb_printf("%s", 359 " -n name\n" 360 " name of kmem cache (or matching partial name)\n" 361 "\n" 362 "Column\tDescription\n" 363 "\n" 364 "ADDR\t\taddress of kmem cache\n" 365 "NAME\t\tname of kmem cache\n" 366 "FLAG\t\tvarious cache state flags\n" 367 "CFLAG\t\tcache creation flags\n" 368 "BUFSIZE\tobject size in bytes\n" 369 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 370 } 371 372 #define LABEL_WIDTH 11 373 static void 374 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 375 size_t maxbuckets, size_t minbucketsize) 376 { 377 uint64_t total; 378 int buckets; 379 int i; 380 const int *distarray; 381 int complete[2]; 382 383 buckets = buffers_per_slab; 384 385 total = 0; 386 for (i = 0; i <= buffers_per_slab; i++) 387 total += ks_bucket[i]; 388 389 if (maxbuckets > 1) 390 buckets = MIN(buckets, maxbuckets); 391 392 if (minbucketsize > 1) { 393 /* 394 * minbucketsize does not apply to the first bucket reserved 395 * for completely allocated slabs 396 */ 397 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 398 minbucketsize)); 399 if ((buckets < 2) && (buffers_per_slab > 1)) { 400 buckets = 2; 401 minbucketsize = (buffers_per_slab - 1); 402 } 403 } 404 405 /* 406 * The first printed bucket is reserved for completely allocated slabs. 407 * Passing (buckets - 1) excludes that bucket from the generated 408 * distribution, since we're handling it as a special case. 409 */ 410 complete[0] = buffers_per_slab; 411 complete[1] = buffers_per_slab + 1; 412 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 413 414 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 415 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 416 417 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 418 /* 419 * Print bucket ranges in descending order after the first bucket for 420 * completely allocated slabs, so a person can see immediately whether 421 * or not there is fragmentation without having to scan possibly 422 * multiple screens of output. Starting at (buckets - 2) excludes the 423 * extra terminating bucket. 424 */ 425 for (i = buckets - 2; i >= 0; i--) { 426 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 427 } 428 mdb_printf("\n"); 429 } 430 #undef LABEL_WIDTH 431 432 /*ARGSUSED*/ 433 static int 434 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 435 { 436 *is_slab = B_TRUE; 437 return (WALK_DONE); 438 } 439 440 /*ARGSUSED*/ 441 static int 442 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 443 boolean_t *is_slab) 444 { 445 /* 446 * The "kmem_partial_slab" walker reports the first full slab if there 447 * are no partial slabs (for the sake of consumers that require at least 448 * one callback if there are any buffers in the cache). 449 */ 450 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 451 return (WALK_DONE); 452 } 453 454 typedef struct kmem_slab_usage { 455 int ksu_refcnt; /* count of allocated buffers on slab */ 456 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 457 } kmem_slab_usage_t; 458 459 typedef struct kmem_slab_stats { 460 const kmem_cache_t *ks_cp; 461 int ks_slabs; /* slabs in cache */ 462 int ks_partial_slabs; /* partially allocated slabs in cache */ 463 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 464 int ks_max_buffers_per_slab; /* max buffers per slab */ 465 int ks_usage_len; /* ks_usage array length */ 466 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 467 uint_t *ks_bucket; /* slab usage distribution */ 468 } kmem_slab_stats_t; 469 470 /*ARGSUSED*/ 471 static int 472 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 473 kmem_slab_stats_t *ks) 474 { 475 kmem_slab_usage_t *ksu; 476 long unused; 477 478 ks->ks_slabs++; 479 ks->ks_bucket[sp->slab_refcnt]++; 480 481 unused = (sp->slab_chunks - sp->slab_refcnt); 482 if (unused == 0) { 483 return (WALK_NEXT); 484 } 485 486 ks->ks_partial_slabs++; 487 ks->ks_unused_buffers += unused; 488 489 if (ks->ks_partial_slabs > ks->ks_usage_len) { 490 kmem_slab_usage_t *usage; 491 int len = ks->ks_usage_len; 492 493 len = (len == 0 ? 16 : len * 2); 494 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 495 if (ks->ks_usage != NULL) { 496 bcopy(ks->ks_usage, usage, 497 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 498 mdb_free(ks->ks_usage, 499 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 500 } 501 ks->ks_usage = usage; 502 ks->ks_usage_len = len; 503 } 504 505 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 506 ksu->ksu_refcnt = sp->slab_refcnt; 507 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 508 return (WALK_NEXT); 509 } 510 511 static void 512 kmem_slabs_header() 513 { 514 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 515 "", "", "Partial", "", "Unused", ""); 516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 517 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 519 "-------------------------", "--------", "--------", "---------", 520 "---------", "------"); 521 } 522 523 int 524 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 525 { 526 kmem_cache_t c; 527 kmem_slab_stats_t stats; 528 mdb_walk_cb_t cb; 529 int pct; 530 int tenths_pct; 531 size_t maxbuckets = 1; 532 size_t minbucketsize = 0; 533 const char *filter = NULL; 534 const char *name = NULL; 535 uint_t opt_v = FALSE; 536 boolean_t buckets = B_FALSE; 537 boolean_t skip = B_FALSE; 538 539 if (mdb_getopts(argc, argv, 540 'B', MDB_OPT_UINTPTR, &minbucketsize, 541 'b', MDB_OPT_UINTPTR, &maxbuckets, 542 'n', MDB_OPT_STR, &filter, 543 'N', MDB_OPT_STR, &name, 544 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 545 NULL) != argc) { 546 return (DCMD_USAGE); 547 } 548 549 if ((maxbuckets != 1) || (minbucketsize != 0)) { 550 buckets = B_TRUE; 551 } 552 553 if (!(flags & DCMD_ADDRSPEC)) { 554 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 555 argv) == -1) { 556 mdb_warn("can't walk kmem_cache"); 557 return (DCMD_ERR); 558 } 559 return (DCMD_OK); 560 } 561 562 if (mdb_vread(&c, sizeof (c), addr) == -1) { 563 mdb_warn("couldn't read kmem_cache at %p", addr); 564 return (DCMD_ERR); 565 } 566 567 if (name == NULL) { 568 skip = ((filter != NULL) && 569 (strstr(c.cache_name, filter) == NULL)); 570 } else if (filter == NULL) { 571 skip = (strcmp(c.cache_name, name) != 0); 572 } else { 573 /* match either -n or -N */ 574 skip = ((strcmp(c.cache_name, name) != 0) && 575 (strstr(c.cache_name, filter) == NULL)); 576 } 577 578 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 579 kmem_slabs_header(); 580 } else if ((opt_v || buckets) && !skip) { 581 if (DCMD_HDRSPEC(flags)) { 582 kmem_slabs_header(); 583 } else { 584 boolean_t is_slab = B_FALSE; 585 const char *walker_name; 586 if (opt_v) { 587 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 588 walker_name = "kmem_slab_partial"; 589 } else { 590 cb = (mdb_walk_cb_t)kmem_first_slab; 591 walker_name = "kmem_slab"; 592 } 593 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 594 if (is_slab) { 595 kmem_slabs_header(); 596 } 597 } 598 } 599 600 if (skip) { 601 return (DCMD_OK); 602 } 603 604 bzero(&stats, sizeof (kmem_slab_stats_t)); 605 stats.ks_cp = &c; 606 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 607 /* +1 to include a zero bucket */ 608 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 609 sizeof (*stats.ks_bucket), UM_SLEEP); 610 cb = (mdb_walk_cb_t)kmem_slablist_stat; 611 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 612 613 if (c.cache_buftotal == 0) { 614 pct = 0; 615 tenths_pct = 0; 616 } else { 617 uint64_t n = stats.ks_unused_buffers * 10000; 618 pct = (int)(n / c.cache_buftotal); 619 tenths_pct = pct - ((pct / 100) * 100); 620 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 621 if (tenths_pct == 10) { 622 pct += 100; 623 tenths_pct = 0; 624 } 625 } 626 627 pct /= 100; 628 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 629 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 630 stats.ks_unused_buffers, pct, tenths_pct); 631 632 if (maxbuckets == 0) { 633 maxbuckets = stats.ks_max_buffers_per_slab; 634 } 635 636 if (((maxbuckets > 1) || (minbucketsize > 0)) && 637 (stats.ks_slabs > 0)) { 638 mdb_printf("\n"); 639 kmem_slabs_print_dist(stats.ks_bucket, 640 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 641 } 642 643 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 644 sizeof (*stats.ks_bucket)); 645 646 if (!opt_v) { 647 return (DCMD_OK); 648 } 649 650 if (opt_v && (stats.ks_partial_slabs > 0)) { 651 int i; 652 kmem_slab_usage_t *ksu; 653 654 mdb_printf(" %d complete, %d partial", 655 (stats.ks_slabs - stats.ks_partial_slabs), 656 stats.ks_partial_slabs); 657 if (stats.ks_partial_slabs > 0) { 658 mdb_printf(" (%d):", stats.ks_max_buffers_per_slab); 659 } 660 for (i = 0; i < stats.ks_partial_slabs; i++) { 661 ksu = &stats.ks_usage[i]; 662 if (ksu->ksu_nomove) { 663 const char *symbol = "*"; 664 mdb_printf(" %d%s", ksu->ksu_refcnt, symbol); 665 } else { 666 mdb_printf(" %d", ksu->ksu_refcnt); 667 } 668 } 669 mdb_printf("\n\n"); 670 } 671 672 if (stats.ks_usage_len > 0) { 673 mdb_free(stats.ks_usage, 674 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 675 } 676 677 return (DCMD_OK); 678 } 679 680 void 681 kmem_slabs_help(void) 682 { 683 mdb_printf("%s", 684 "Display slab usage per kmem cache.\n\n"); 685 mdb_dec_indent(2); 686 mdb_printf("%<b>OPTIONS%</b>\n"); 687 mdb_inc_indent(2); 688 mdb_printf("%s", 689 " -n name\n" 690 " name of kmem cache (or matching partial name)\n" 691 " -N name\n" 692 " exact name of kmem cache\n" 693 " -b maxbins\n" 694 " Print a distribution of allocated buffers per slab using at\n" 695 " most maxbins bins. The first bin is reserved for completely\n" 696 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 697 " effect as specifying the maximum allocated buffers per slab\n" 698 " or setting minbinsize to 1 (-B 1).\n" 699 " -B minbinsize\n" 700 " Print a distribution of allocated buffers per slab, making\n" 701 " all bins (except the first, reserved for completely allocated\n" 702 " slabs) at least minbinsize buffers apart.\n" 703 " -v verbose output: List the allocated buffer count of each partial\n" 704 " slab on the free list in order from front to back to show how\n" 705 " closely the slabs are ordered by usage. For example\n" 706 "\n" 707 " 10 complete, 3 partial (8): 7 3 1\n" 708 "\n" 709 " means there are thirteen slabs with eight buffers each, including\n" 710 " three partially allocated slabs with less than all eight buffers\n" 711 " allocated.\n" 712 "\n" 713 " Buffer allocations are always from the front of the partial slab\n" 714 " list. When a buffer is freed from a completely used slab, that\n" 715 " slab is added to the front of the partial slab list. Assuming\n" 716 " that all buffers are equally likely to be freed soon, the\n" 717 " desired order of partial slabs is most-used at the front of the\n" 718 " list and least-used at the back (as in the example above).\n" 719 " However, if a slab contains an allocated buffer that will not\n" 720 " soon be freed, it would be better for that slab to be at the\n" 721 " front where all of its buffers can be allocated. Taking a slab\n" 722 " off the partial slab list (either with all buffers freed or all\n" 723 " buffers allocated) reduces cache fragmentation.\n" 724 "\n" 725 " A slab's allocated buffer count representing a partial slab (9 in\n" 726 " the example below) may be marked as follows:\n" 727 "\n" 728 " 9* An asterisk indicates that kmem has marked the slab non-\n" 729 " reclaimable because the kmem client refused to move one of the\n" 730 " slab's buffers. Since kmem does not expect to completely free the\n" 731 " slab, it moves it to the front of the list in the hope of\n" 732 " completely allocating it instead. A slab marked with an asterisk\n" 733 " stays marked for as long as it remains on the partial slab list.\n" 734 "\n" 735 "Column\t\tDescription\n" 736 "\n" 737 "Cache Name\t\tname of kmem cache\n" 738 "Slabs\t\t\ttotal slab count\n" 739 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 740 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 741 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 742 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 743 "\t\t\t for accounting structures (debug mode), slab\n" 744 "\t\t\t coloring (incremental small offsets to stagger\n" 745 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 746 } 747 748 static int 749 addrcmp(const void *lhs, const void *rhs) 750 { 751 uintptr_t p1 = *((uintptr_t *)lhs); 752 uintptr_t p2 = *((uintptr_t *)rhs); 753 754 if (p1 < p2) 755 return (-1); 756 if (p1 > p2) 757 return (1); 758 return (0); 759 } 760 761 static int 762 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 763 { 764 const kmem_bufctl_audit_t *bcp1 = *lhs; 765 const kmem_bufctl_audit_t *bcp2 = *rhs; 766 767 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 768 return (-1); 769 770 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 771 return (1); 772 773 return (0); 774 } 775 776 typedef struct kmem_hash_walk { 777 uintptr_t *kmhw_table; 778 size_t kmhw_nelems; 779 size_t kmhw_pos; 780 kmem_bufctl_t kmhw_cur; 781 } kmem_hash_walk_t; 782 783 int 784 kmem_hash_walk_init(mdb_walk_state_t *wsp) 785 { 786 kmem_hash_walk_t *kmhw; 787 uintptr_t *hash; 788 kmem_cache_t c; 789 uintptr_t haddr, addr = wsp->walk_addr; 790 size_t nelems; 791 size_t hsize; 792 793 if (addr == NULL) { 794 mdb_warn("kmem_hash doesn't support global walks\n"); 795 return (WALK_ERR); 796 } 797 798 if (mdb_vread(&c, sizeof (c), addr) == -1) { 799 mdb_warn("couldn't read cache at addr %p", addr); 800 return (WALK_ERR); 801 } 802 803 if (!(c.cache_flags & KMF_HASH)) { 804 mdb_warn("cache %p doesn't have a hash table\n", addr); 805 return (WALK_DONE); /* nothing to do */ 806 } 807 808 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 809 kmhw->kmhw_cur.bc_next = NULL; 810 kmhw->kmhw_pos = 0; 811 812 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 813 hsize = nelems * sizeof (uintptr_t); 814 haddr = (uintptr_t)c.cache_hash_table; 815 816 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 817 if (mdb_vread(hash, hsize, haddr) == -1) { 818 mdb_warn("failed to read hash table at %p", haddr); 819 mdb_free(hash, hsize); 820 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 821 return (WALK_ERR); 822 } 823 824 wsp->walk_data = kmhw; 825 826 return (WALK_NEXT); 827 } 828 829 int 830 kmem_hash_walk_step(mdb_walk_state_t *wsp) 831 { 832 kmem_hash_walk_t *kmhw = wsp->walk_data; 833 uintptr_t addr = NULL; 834 835 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 836 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 837 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 838 break; 839 } 840 } 841 if (addr == NULL) 842 return (WALK_DONE); 843 844 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 845 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 846 return (WALK_ERR); 847 } 848 849 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 850 } 851 852 void 853 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 854 { 855 kmem_hash_walk_t *kmhw = wsp->walk_data; 856 857 if (kmhw == NULL) 858 return; 859 860 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 861 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 862 } 863 864 /* 865 * Find the address of the bufctl structure for the address 'buf' in cache 866 * 'cp', which is at address caddr, and place it in *out. 867 */ 868 static int 869 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 870 { 871 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 872 kmem_bufctl_t *bcp; 873 kmem_bufctl_t bc; 874 875 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 876 mdb_warn("unable to read hash bucket for %p in cache %p", 877 buf, caddr); 878 return (-1); 879 } 880 881 while (bcp != NULL) { 882 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 883 (uintptr_t)bcp) == -1) { 884 mdb_warn("unable to read bufctl at %p", bcp); 885 return (-1); 886 } 887 if (bc.bc_addr == buf) { 888 *out = (uintptr_t)bcp; 889 return (0); 890 } 891 bcp = bc.bc_next; 892 } 893 894 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 895 return (-1); 896 } 897 898 int 899 kmem_get_magsize(const kmem_cache_t *cp) 900 { 901 uintptr_t addr = (uintptr_t)cp->cache_magtype; 902 GElf_Sym mt_sym; 903 kmem_magtype_t mt; 904 int res; 905 906 /* 907 * if cpu 0 has a non-zero magsize, it must be correct. caches 908 * with KMF_NOMAGAZINE have disabled their magazine layers, so 909 * it is okay to return 0 for them. 910 */ 911 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 912 (cp->cache_flags & KMF_NOMAGAZINE)) 913 return (res); 914 915 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 916 mdb_warn("unable to read 'kmem_magtype'"); 917 } else if (addr < mt_sym.st_value || 918 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 919 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 920 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 921 cp->cache_name, addr); 922 return (0); 923 } 924 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 925 mdb_warn("unable to read magtype at %a", addr); 926 return (0); 927 } 928 return (mt.mt_magsize); 929 } 930 931 /*ARGSUSED*/ 932 static int 933 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 934 { 935 *est -= (sp->slab_chunks - sp->slab_refcnt); 936 937 return (WALK_NEXT); 938 } 939 940 /* 941 * Returns an upper bound on the number of allocated buffers in a given 942 * cache. 943 */ 944 size_t 945 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 946 { 947 int magsize; 948 size_t cache_est; 949 950 cache_est = cp->cache_buftotal; 951 952 (void) mdb_pwalk("kmem_slab_partial", 953 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 954 955 if ((magsize = kmem_get_magsize(cp)) != 0) { 956 size_t mag_est = cp->cache_full.ml_total * magsize; 957 958 if (cache_est >= mag_est) { 959 cache_est -= mag_est; 960 } else { 961 mdb_warn("cache %p's magazine layer holds more buffers " 962 "than the slab layer.\n", addr); 963 } 964 } 965 return (cache_est); 966 } 967 968 #define READMAG_ROUNDS(rounds) { \ 969 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 970 mdb_warn("couldn't read magazine at %p", kmp); \ 971 goto fail; \ 972 } \ 973 for (i = 0; i < rounds; i++) { \ 974 maglist[magcnt++] = mp->mag_round[i]; \ 975 if (magcnt == magmax) { \ 976 mdb_warn("%d magazines exceeds fudge factor\n", \ 977 magcnt); \ 978 goto fail; \ 979 } \ 980 } \ 981 } 982 983 int 984 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 985 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 986 { 987 kmem_magazine_t *kmp, *mp; 988 void **maglist = NULL; 989 int i, cpu; 990 size_t magsize, magmax, magbsize; 991 size_t magcnt = 0; 992 993 /* 994 * Read the magtype out of the cache, after verifying the pointer's 995 * correctness. 996 */ 997 magsize = kmem_get_magsize(cp); 998 if (magsize == 0) { 999 *maglistp = NULL; 1000 *magcntp = 0; 1001 *magmaxp = 0; 1002 return (WALK_NEXT); 1003 } 1004 1005 /* 1006 * There are several places where we need to go buffer hunting: 1007 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1008 * and the full magazine list in the depot. 1009 * 1010 * For an upper bound on the number of buffers in the magazine 1011 * layer, we have the number of magazines on the cache_full 1012 * list plus at most two magazines per CPU (the loaded and the 1013 * spare). Toss in 100 magazines as a fudge factor in case this 1014 * is live (the number "100" comes from the same fudge factor in 1015 * crash(1M)). 1016 */ 1017 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1018 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1019 1020 if (magbsize >= PAGESIZE / 2) { 1021 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1022 addr, magbsize); 1023 return (WALK_ERR); 1024 } 1025 1026 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1027 mp = mdb_alloc(magbsize, alloc_flags); 1028 if (mp == NULL || maglist == NULL) 1029 goto fail; 1030 1031 /* 1032 * First up: the magazines in the depot (i.e. on the cache_full list). 1033 */ 1034 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1035 READMAG_ROUNDS(magsize); 1036 kmp = mp->mag_next; 1037 1038 if (kmp == cp->cache_full.ml_list) 1039 break; /* cache_full list loop detected */ 1040 } 1041 1042 dprintf(("cache_full list done\n")); 1043 1044 /* 1045 * Now whip through the CPUs, snagging the loaded magazines 1046 * and full spares. 1047 */ 1048 for (cpu = 0; cpu < ncpus; cpu++) { 1049 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1050 1051 dprintf(("reading cpu cache %p\n", 1052 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1053 1054 if (ccp->cc_rounds > 0 && 1055 (kmp = ccp->cc_loaded) != NULL) { 1056 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 1057 READMAG_ROUNDS(ccp->cc_rounds); 1058 } 1059 1060 if (ccp->cc_prounds > 0 && 1061 (kmp = ccp->cc_ploaded) != NULL) { 1062 dprintf(("reading %d previously loaded rounds\n", 1063 ccp->cc_prounds)); 1064 READMAG_ROUNDS(ccp->cc_prounds); 1065 } 1066 } 1067 1068 dprintf(("magazine layer: %d buffers\n", magcnt)); 1069 1070 if (!(alloc_flags & UM_GC)) 1071 mdb_free(mp, magbsize); 1072 1073 *maglistp = maglist; 1074 *magcntp = magcnt; 1075 *magmaxp = magmax; 1076 1077 return (WALK_NEXT); 1078 1079 fail: 1080 if (!(alloc_flags & UM_GC)) { 1081 if (mp) 1082 mdb_free(mp, magbsize); 1083 if (maglist) 1084 mdb_free(maglist, magmax * sizeof (void *)); 1085 } 1086 return (WALK_ERR); 1087 } 1088 1089 static int 1090 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1091 { 1092 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1093 } 1094 1095 static int 1096 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1097 { 1098 kmem_bufctl_audit_t b; 1099 1100 /* 1101 * if KMF_AUDIT is not set, we know that we're looking at a 1102 * kmem_bufctl_t. 1103 */ 1104 if (!(cp->cache_flags & KMF_AUDIT) || 1105 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1106 (void) memset(&b, 0, sizeof (b)); 1107 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1108 mdb_warn("unable to read bufctl at %p", buf); 1109 return (WALK_ERR); 1110 } 1111 } 1112 1113 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1114 } 1115 1116 typedef struct kmem_walk { 1117 int kmw_type; 1118 1119 int kmw_addr; /* cache address */ 1120 kmem_cache_t *kmw_cp; 1121 size_t kmw_csize; 1122 1123 /* 1124 * magazine layer 1125 */ 1126 void **kmw_maglist; 1127 size_t kmw_max; 1128 size_t kmw_count; 1129 size_t kmw_pos; 1130 1131 /* 1132 * slab layer 1133 */ 1134 char *kmw_valid; /* to keep track of freed buffers */ 1135 char *kmw_ubase; /* buffer for slab data */ 1136 } kmem_walk_t; 1137 1138 static int 1139 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1140 { 1141 kmem_walk_t *kmw; 1142 int ncpus, csize; 1143 kmem_cache_t *cp; 1144 size_t vm_quantum; 1145 1146 size_t magmax, magcnt; 1147 void **maglist = NULL; 1148 uint_t chunksize, slabsize; 1149 int status = WALK_ERR; 1150 uintptr_t addr = wsp->walk_addr; 1151 const char *layered; 1152 1153 type &= ~KM_HASH; 1154 1155 if (addr == NULL) { 1156 mdb_warn("kmem walk doesn't support global walks\n"); 1157 return (WALK_ERR); 1158 } 1159 1160 dprintf(("walking %p\n", addr)); 1161 1162 /* 1163 * First we need to figure out how many CPUs are configured in the 1164 * system to know how much to slurp out. 1165 */ 1166 mdb_readvar(&ncpus, "max_ncpus"); 1167 1168 csize = KMEM_CACHE_SIZE(ncpus); 1169 cp = mdb_alloc(csize, UM_SLEEP); 1170 1171 if (mdb_vread(cp, csize, addr) == -1) { 1172 mdb_warn("couldn't read cache at addr %p", addr); 1173 goto out2; 1174 } 1175 1176 /* 1177 * It's easy for someone to hand us an invalid cache address. 1178 * Unfortunately, it is hard for this walker to survive an 1179 * invalid cache cleanly. So we make sure that: 1180 * 1181 * 1. the vmem arena for the cache is readable, 1182 * 2. the vmem arena's quantum is a power of 2, 1183 * 3. our slabsize is a multiple of the quantum, and 1184 * 4. our chunksize is >0 and less than our slabsize. 1185 */ 1186 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1187 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1188 vm_quantum == 0 || 1189 (vm_quantum & (vm_quantum - 1)) != 0 || 1190 cp->cache_slabsize < vm_quantum || 1191 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1192 cp->cache_chunksize == 0 || 1193 cp->cache_chunksize > cp->cache_slabsize) { 1194 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1195 goto out2; 1196 } 1197 1198 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1199 1200 if (cp->cache_buftotal == 0) { 1201 mdb_free(cp, csize); 1202 return (WALK_DONE); 1203 } 1204 1205 /* 1206 * If they ask for bufctls, but it's a small-slab cache, 1207 * there is nothing to report. 1208 */ 1209 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1210 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1211 cp->cache_flags)); 1212 mdb_free(cp, csize); 1213 return (WALK_DONE); 1214 } 1215 1216 /* 1217 * If they want constructed buffers, but there's no constructor or 1218 * the cache has DEADBEEF checking enabled, there is nothing to report. 1219 */ 1220 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1221 cp->cache_constructor == NULL || 1222 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1223 mdb_free(cp, csize); 1224 return (WALK_DONE); 1225 } 1226 1227 /* 1228 * Read in the contents of the magazine layer 1229 */ 1230 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1231 &magmax, UM_SLEEP) == WALK_ERR) 1232 goto out2; 1233 1234 /* 1235 * We have all of the buffers from the magazines; if we are walking 1236 * allocated buffers, sort them so we can bsearch them later. 1237 */ 1238 if (type & KM_ALLOCATED) 1239 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1240 1241 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1242 1243 kmw->kmw_type = type; 1244 kmw->kmw_addr = addr; 1245 kmw->kmw_cp = cp; 1246 kmw->kmw_csize = csize; 1247 kmw->kmw_maglist = maglist; 1248 kmw->kmw_max = magmax; 1249 kmw->kmw_count = magcnt; 1250 kmw->kmw_pos = 0; 1251 1252 /* 1253 * When walking allocated buffers in a KMF_HASH cache, we walk the 1254 * hash table instead of the slab layer. 1255 */ 1256 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1257 layered = "kmem_hash"; 1258 1259 kmw->kmw_type |= KM_HASH; 1260 } else { 1261 /* 1262 * If we are walking freed buffers, we only need the 1263 * magazine layer plus the partially allocated slabs. 1264 * To walk allocated buffers, we need all of the slabs. 1265 */ 1266 if (type & KM_ALLOCATED) 1267 layered = "kmem_slab"; 1268 else 1269 layered = "kmem_slab_partial"; 1270 1271 /* 1272 * for small-slab caches, we read in the entire slab. For 1273 * freed buffers, we can just walk the freelist. For 1274 * allocated buffers, we use a 'valid' array to track 1275 * the freed buffers. 1276 */ 1277 if (!(cp->cache_flags & KMF_HASH)) { 1278 chunksize = cp->cache_chunksize; 1279 slabsize = cp->cache_slabsize; 1280 1281 kmw->kmw_ubase = mdb_alloc(slabsize + 1282 sizeof (kmem_bufctl_t), UM_SLEEP); 1283 1284 if (type & KM_ALLOCATED) 1285 kmw->kmw_valid = 1286 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1287 } 1288 } 1289 1290 status = WALK_NEXT; 1291 1292 if (mdb_layered_walk(layered, wsp) == -1) { 1293 mdb_warn("unable to start layered '%s' walk", layered); 1294 status = WALK_ERR; 1295 } 1296 1297 out1: 1298 if (status == WALK_ERR) { 1299 if (kmw->kmw_valid) 1300 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1301 1302 if (kmw->kmw_ubase) 1303 mdb_free(kmw->kmw_ubase, slabsize + 1304 sizeof (kmem_bufctl_t)); 1305 1306 if (kmw->kmw_maglist) 1307 mdb_free(kmw->kmw_maglist, 1308 kmw->kmw_max * sizeof (uintptr_t)); 1309 1310 mdb_free(kmw, sizeof (kmem_walk_t)); 1311 wsp->walk_data = NULL; 1312 } 1313 1314 out2: 1315 if (status == WALK_ERR) 1316 mdb_free(cp, csize); 1317 1318 return (status); 1319 } 1320 1321 int 1322 kmem_walk_step(mdb_walk_state_t *wsp) 1323 { 1324 kmem_walk_t *kmw = wsp->walk_data; 1325 int type = kmw->kmw_type; 1326 kmem_cache_t *cp = kmw->kmw_cp; 1327 1328 void **maglist = kmw->kmw_maglist; 1329 int magcnt = kmw->kmw_count; 1330 1331 uintptr_t chunksize, slabsize; 1332 uintptr_t addr; 1333 const kmem_slab_t *sp; 1334 const kmem_bufctl_t *bcp; 1335 kmem_bufctl_t bc; 1336 1337 int chunks; 1338 char *kbase; 1339 void *buf; 1340 int i, ret; 1341 1342 char *valid, *ubase; 1343 1344 /* 1345 * first, handle the 'kmem_hash' layered walk case 1346 */ 1347 if (type & KM_HASH) { 1348 /* 1349 * We have a buffer which has been allocated out of the 1350 * global layer. We need to make sure that it's not 1351 * actually sitting in a magazine before we report it as 1352 * an allocated buffer. 1353 */ 1354 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1355 1356 if (magcnt > 0 && 1357 bsearch(&buf, maglist, magcnt, sizeof (void *), 1358 addrcmp) != NULL) 1359 return (WALK_NEXT); 1360 1361 if (type & KM_BUFCTL) 1362 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1363 1364 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1365 } 1366 1367 ret = WALK_NEXT; 1368 1369 addr = kmw->kmw_addr; 1370 1371 /* 1372 * If we're walking freed buffers, report everything in the 1373 * magazine layer before processing the first slab. 1374 */ 1375 if ((type & KM_FREE) && magcnt != 0) { 1376 kmw->kmw_count = 0; /* only do this once */ 1377 for (i = 0; i < magcnt; i++) { 1378 buf = maglist[i]; 1379 1380 if (type & KM_BUFCTL) { 1381 uintptr_t out; 1382 1383 if (cp->cache_flags & KMF_BUFTAG) { 1384 kmem_buftag_t *btp; 1385 kmem_buftag_t tag; 1386 1387 /* LINTED - alignment */ 1388 btp = KMEM_BUFTAG(cp, buf); 1389 if (mdb_vread(&tag, sizeof (tag), 1390 (uintptr_t)btp) == -1) { 1391 mdb_warn("reading buftag for " 1392 "%p at %p", buf, btp); 1393 continue; 1394 } 1395 out = (uintptr_t)tag.bt_bufctl; 1396 } else { 1397 if (kmem_hash_lookup(cp, addr, buf, 1398 &out) == -1) 1399 continue; 1400 } 1401 ret = bufctl_walk_callback(cp, wsp, out); 1402 } else { 1403 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1404 } 1405 1406 if (ret != WALK_NEXT) 1407 return (ret); 1408 } 1409 } 1410 1411 /* 1412 * If they want constructed buffers, we're finished, since the 1413 * magazine layer holds them all. 1414 */ 1415 if (type & KM_CONSTRUCTED) 1416 return (WALK_DONE); 1417 1418 /* 1419 * Handle the buffers in the current slab 1420 */ 1421 chunksize = cp->cache_chunksize; 1422 slabsize = cp->cache_slabsize; 1423 1424 sp = wsp->walk_layer; 1425 chunks = sp->slab_chunks; 1426 kbase = sp->slab_base; 1427 1428 dprintf(("kbase is %p\n", kbase)); 1429 1430 if (!(cp->cache_flags & KMF_HASH)) { 1431 valid = kmw->kmw_valid; 1432 ubase = kmw->kmw_ubase; 1433 1434 if (mdb_vread(ubase, chunks * chunksize, 1435 (uintptr_t)kbase) == -1) { 1436 mdb_warn("failed to read slab contents at %p", kbase); 1437 return (WALK_ERR); 1438 } 1439 1440 /* 1441 * Set up the valid map as fully allocated -- we'll punch 1442 * out the freelist. 1443 */ 1444 if (type & KM_ALLOCATED) 1445 (void) memset(valid, 1, chunks); 1446 } else { 1447 valid = NULL; 1448 ubase = NULL; 1449 } 1450 1451 /* 1452 * walk the slab's freelist 1453 */ 1454 bcp = sp->slab_head; 1455 1456 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1457 1458 /* 1459 * since we could be in the middle of allocating a buffer, 1460 * our refcnt could be one higher than it aught. So we 1461 * check one further on the freelist than the count allows. 1462 */ 1463 for (i = sp->slab_refcnt; i <= chunks; i++) { 1464 uint_t ndx; 1465 1466 dprintf(("bcp is %p\n", bcp)); 1467 1468 if (bcp == NULL) { 1469 if (i == chunks) 1470 break; 1471 mdb_warn( 1472 "slab %p in cache %p freelist too short by %d\n", 1473 sp, addr, chunks - i); 1474 break; 1475 } 1476 1477 if (cp->cache_flags & KMF_HASH) { 1478 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1479 mdb_warn("failed to read bufctl ptr at %p", 1480 bcp); 1481 break; 1482 } 1483 buf = bc.bc_addr; 1484 } else { 1485 /* 1486 * Otherwise the buffer is in the slab which 1487 * we've read in; we just need to determine 1488 * its offset in the slab to find the 1489 * kmem_bufctl_t. 1490 */ 1491 bc = *((kmem_bufctl_t *) 1492 ((uintptr_t)bcp - (uintptr_t)kbase + 1493 (uintptr_t)ubase)); 1494 1495 buf = KMEM_BUF(cp, bcp); 1496 } 1497 1498 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1499 1500 if (ndx > slabsize / cp->cache_bufsize) { 1501 /* 1502 * This is very wrong; we have managed to find 1503 * a buffer in the slab which shouldn't 1504 * actually be here. Emit a warning, and 1505 * try to continue. 1506 */ 1507 mdb_warn("buf %p is out of range for " 1508 "slab %p, cache %p\n", buf, sp, addr); 1509 } else if (type & KM_ALLOCATED) { 1510 /* 1511 * we have found a buffer on the slab's freelist; 1512 * clear its entry 1513 */ 1514 valid[ndx] = 0; 1515 } else { 1516 /* 1517 * Report this freed buffer 1518 */ 1519 if (type & KM_BUFCTL) { 1520 ret = bufctl_walk_callback(cp, wsp, 1521 (uintptr_t)bcp); 1522 } else { 1523 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1524 } 1525 if (ret != WALK_NEXT) 1526 return (ret); 1527 } 1528 1529 bcp = bc.bc_next; 1530 } 1531 1532 if (bcp != NULL) { 1533 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1534 sp, addr, bcp)); 1535 } 1536 1537 /* 1538 * If we are walking freed buffers, the loop above handled reporting 1539 * them. 1540 */ 1541 if (type & KM_FREE) 1542 return (WALK_NEXT); 1543 1544 if (type & KM_BUFCTL) { 1545 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1546 "cache %p\n", addr); 1547 return (WALK_ERR); 1548 } 1549 1550 /* 1551 * Report allocated buffers, skipping buffers in the magazine layer. 1552 * We only get this far for small-slab caches. 1553 */ 1554 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1555 buf = (char *)kbase + i * chunksize; 1556 1557 if (!valid[i]) 1558 continue; /* on slab freelist */ 1559 1560 if (magcnt > 0 && 1561 bsearch(&buf, maglist, magcnt, sizeof (void *), 1562 addrcmp) != NULL) 1563 continue; /* in magazine layer */ 1564 1565 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1566 } 1567 return (ret); 1568 } 1569 1570 void 1571 kmem_walk_fini(mdb_walk_state_t *wsp) 1572 { 1573 kmem_walk_t *kmw = wsp->walk_data; 1574 uintptr_t chunksize; 1575 uintptr_t slabsize; 1576 1577 if (kmw == NULL) 1578 return; 1579 1580 if (kmw->kmw_maglist != NULL) 1581 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1582 1583 chunksize = kmw->kmw_cp->cache_chunksize; 1584 slabsize = kmw->kmw_cp->cache_slabsize; 1585 1586 if (kmw->kmw_valid != NULL) 1587 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1588 if (kmw->kmw_ubase != NULL) 1589 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1590 1591 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1592 mdb_free(kmw, sizeof (kmem_walk_t)); 1593 } 1594 1595 /*ARGSUSED*/ 1596 static int 1597 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1598 { 1599 /* 1600 * Buffers allocated from NOTOUCH caches can also show up as freed 1601 * memory in other caches. This can be a little confusing, so we 1602 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1603 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1604 */ 1605 if (c->cache_cflags & KMC_NOTOUCH) 1606 return (WALK_NEXT); 1607 1608 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1609 wsp->walk_cbdata, addr) == -1) 1610 return (WALK_DONE); 1611 1612 return (WALK_NEXT); 1613 } 1614 1615 #define KMEM_WALK_ALL(name, wsp) { \ 1616 wsp->walk_data = (name); \ 1617 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1618 return (WALK_ERR); \ 1619 return (WALK_DONE); \ 1620 } 1621 1622 int 1623 kmem_walk_init(mdb_walk_state_t *wsp) 1624 { 1625 if (wsp->walk_arg != NULL) 1626 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1627 1628 if (wsp->walk_addr == NULL) 1629 KMEM_WALK_ALL("kmem", wsp); 1630 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1631 } 1632 1633 int 1634 bufctl_walk_init(mdb_walk_state_t *wsp) 1635 { 1636 if (wsp->walk_addr == NULL) 1637 KMEM_WALK_ALL("bufctl", wsp); 1638 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1639 } 1640 1641 int 1642 freemem_walk_init(mdb_walk_state_t *wsp) 1643 { 1644 if (wsp->walk_addr == NULL) 1645 KMEM_WALK_ALL("freemem", wsp); 1646 return (kmem_walk_init_common(wsp, KM_FREE)); 1647 } 1648 1649 int 1650 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1651 { 1652 if (wsp->walk_addr == NULL) 1653 KMEM_WALK_ALL("freemem_constructed", wsp); 1654 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1655 } 1656 1657 int 1658 freectl_walk_init(mdb_walk_state_t *wsp) 1659 { 1660 if (wsp->walk_addr == NULL) 1661 KMEM_WALK_ALL("freectl", wsp); 1662 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1663 } 1664 1665 int 1666 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1667 { 1668 if (wsp->walk_addr == NULL) 1669 KMEM_WALK_ALL("freectl_constructed", wsp); 1670 return (kmem_walk_init_common(wsp, 1671 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1672 } 1673 1674 typedef struct bufctl_history_walk { 1675 void *bhw_next; 1676 kmem_cache_t *bhw_cache; 1677 kmem_slab_t *bhw_slab; 1678 hrtime_t bhw_timestamp; 1679 } bufctl_history_walk_t; 1680 1681 int 1682 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1683 { 1684 bufctl_history_walk_t *bhw; 1685 kmem_bufctl_audit_t bc; 1686 kmem_bufctl_audit_t bcn; 1687 1688 if (wsp->walk_addr == NULL) { 1689 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1690 return (WALK_ERR); 1691 } 1692 1693 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1694 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1695 return (WALK_ERR); 1696 } 1697 1698 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1699 bhw->bhw_timestamp = 0; 1700 bhw->bhw_cache = bc.bc_cache; 1701 bhw->bhw_slab = bc.bc_slab; 1702 1703 /* 1704 * sometimes the first log entry matches the base bufctl; in that 1705 * case, skip the base bufctl. 1706 */ 1707 if (bc.bc_lastlog != NULL && 1708 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1709 bc.bc_addr == bcn.bc_addr && 1710 bc.bc_cache == bcn.bc_cache && 1711 bc.bc_slab == bcn.bc_slab && 1712 bc.bc_timestamp == bcn.bc_timestamp && 1713 bc.bc_thread == bcn.bc_thread) 1714 bhw->bhw_next = bc.bc_lastlog; 1715 else 1716 bhw->bhw_next = (void *)wsp->walk_addr; 1717 1718 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1719 wsp->walk_data = bhw; 1720 1721 return (WALK_NEXT); 1722 } 1723 1724 int 1725 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1726 { 1727 bufctl_history_walk_t *bhw = wsp->walk_data; 1728 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1729 uintptr_t baseaddr = wsp->walk_addr; 1730 kmem_bufctl_audit_t bc; 1731 1732 if (addr == NULL) 1733 return (WALK_DONE); 1734 1735 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1736 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1737 return (WALK_ERR); 1738 } 1739 1740 /* 1741 * The bufctl is only valid if the address, cache, and slab are 1742 * correct. We also check that the timestamp is decreasing, to 1743 * prevent infinite loops. 1744 */ 1745 if ((uintptr_t)bc.bc_addr != baseaddr || 1746 bc.bc_cache != bhw->bhw_cache || 1747 bc.bc_slab != bhw->bhw_slab || 1748 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1749 return (WALK_DONE); 1750 1751 bhw->bhw_next = bc.bc_lastlog; 1752 bhw->bhw_timestamp = bc.bc_timestamp; 1753 1754 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1755 } 1756 1757 void 1758 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1759 { 1760 bufctl_history_walk_t *bhw = wsp->walk_data; 1761 1762 mdb_free(bhw, sizeof (*bhw)); 1763 } 1764 1765 typedef struct kmem_log_walk { 1766 kmem_bufctl_audit_t *klw_base; 1767 kmem_bufctl_audit_t **klw_sorted; 1768 kmem_log_header_t klw_lh; 1769 size_t klw_size; 1770 size_t klw_maxndx; 1771 size_t klw_ndx; 1772 } kmem_log_walk_t; 1773 1774 int 1775 kmem_log_walk_init(mdb_walk_state_t *wsp) 1776 { 1777 uintptr_t lp = wsp->walk_addr; 1778 kmem_log_walk_t *klw; 1779 kmem_log_header_t *lhp; 1780 int maxndx, i, j, k; 1781 1782 /* 1783 * By default (global walk), walk the kmem_transaction_log. Otherwise 1784 * read the log whose kmem_log_header_t is stored at walk_addr. 1785 */ 1786 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1787 mdb_warn("failed to read 'kmem_transaction_log'"); 1788 return (WALK_ERR); 1789 } 1790 1791 if (lp == NULL) { 1792 mdb_warn("log is disabled\n"); 1793 return (WALK_ERR); 1794 } 1795 1796 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1797 lhp = &klw->klw_lh; 1798 1799 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1800 mdb_warn("failed to read log header at %p", lp); 1801 mdb_free(klw, sizeof (kmem_log_walk_t)); 1802 return (WALK_ERR); 1803 } 1804 1805 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1806 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1807 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1808 1809 if (mdb_vread(klw->klw_base, klw->klw_size, 1810 (uintptr_t)lhp->lh_base) == -1) { 1811 mdb_warn("failed to read log at base %p", lhp->lh_base); 1812 mdb_free(klw->klw_base, klw->klw_size); 1813 mdb_free(klw, sizeof (kmem_log_walk_t)); 1814 return (WALK_ERR); 1815 } 1816 1817 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1818 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1819 1820 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1821 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1822 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1823 1824 for (j = 0; j < maxndx; j++) 1825 klw->klw_sorted[k++] = &chunk[j]; 1826 } 1827 1828 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1829 (int(*)(const void *, const void *))bufctlcmp); 1830 1831 klw->klw_maxndx = k; 1832 wsp->walk_data = klw; 1833 1834 return (WALK_NEXT); 1835 } 1836 1837 int 1838 kmem_log_walk_step(mdb_walk_state_t *wsp) 1839 { 1840 kmem_log_walk_t *klw = wsp->walk_data; 1841 kmem_bufctl_audit_t *bcp; 1842 1843 if (klw->klw_ndx == klw->klw_maxndx) 1844 return (WALK_DONE); 1845 1846 bcp = klw->klw_sorted[klw->klw_ndx++]; 1847 1848 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1849 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1850 } 1851 1852 void 1853 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1854 { 1855 kmem_log_walk_t *klw = wsp->walk_data; 1856 1857 mdb_free(klw->klw_base, klw->klw_size); 1858 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1859 sizeof (kmem_bufctl_audit_t *)); 1860 mdb_free(klw, sizeof (kmem_log_walk_t)); 1861 } 1862 1863 typedef struct allocdby_bufctl { 1864 uintptr_t abb_addr; 1865 hrtime_t abb_ts; 1866 } allocdby_bufctl_t; 1867 1868 typedef struct allocdby_walk { 1869 const char *abw_walk; 1870 uintptr_t abw_thread; 1871 size_t abw_nbufs; 1872 size_t abw_size; 1873 allocdby_bufctl_t *abw_buf; 1874 size_t abw_ndx; 1875 } allocdby_walk_t; 1876 1877 int 1878 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1879 allocdby_walk_t *abw) 1880 { 1881 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1882 return (WALK_NEXT); 1883 1884 if (abw->abw_nbufs == abw->abw_size) { 1885 allocdby_bufctl_t *buf; 1886 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1887 1888 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1889 1890 bcopy(abw->abw_buf, buf, oldsize); 1891 mdb_free(abw->abw_buf, oldsize); 1892 1893 abw->abw_size <<= 1; 1894 abw->abw_buf = buf; 1895 } 1896 1897 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1898 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1899 abw->abw_nbufs++; 1900 1901 return (WALK_NEXT); 1902 } 1903 1904 /*ARGSUSED*/ 1905 int 1906 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1907 { 1908 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1909 abw, addr) == -1) { 1910 mdb_warn("couldn't walk bufctl for cache %p", addr); 1911 return (WALK_DONE); 1912 } 1913 1914 return (WALK_NEXT); 1915 } 1916 1917 static int 1918 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1919 { 1920 if (lhs->abb_ts < rhs->abb_ts) 1921 return (1); 1922 if (lhs->abb_ts > rhs->abb_ts) 1923 return (-1); 1924 return (0); 1925 } 1926 1927 static int 1928 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1929 { 1930 allocdby_walk_t *abw; 1931 1932 if (wsp->walk_addr == NULL) { 1933 mdb_warn("allocdby walk doesn't support global walks\n"); 1934 return (WALK_ERR); 1935 } 1936 1937 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1938 1939 abw->abw_thread = wsp->walk_addr; 1940 abw->abw_walk = walk; 1941 abw->abw_size = 128; /* something reasonable */ 1942 abw->abw_buf = 1943 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1944 1945 wsp->walk_data = abw; 1946 1947 if (mdb_walk("kmem_cache", 1948 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1949 mdb_warn("couldn't walk kmem_cache"); 1950 allocdby_walk_fini(wsp); 1951 return (WALK_ERR); 1952 } 1953 1954 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1955 (int(*)(const void *, const void *))allocdby_cmp); 1956 1957 return (WALK_NEXT); 1958 } 1959 1960 int 1961 allocdby_walk_init(mdb_walk_state_t *wsp) 1962 { 1963 return (allocdby_walk_init_common(wsp, "bufctl")); 1964 } 1965 1966 int 1967 freedby_walk_init(mdb_walk_state_t *wsp) 1968 { 1969 return (allocdby_walk_init_common(wsp, "freectl")); 1970 } 1971 1972 int 1973 allocdby_walk_step(mdb_walk_state_t *wsp) 1974 { 1975 allocdby_walk_t *abw = wsp->walk_data; 1976 kmem_bufctl_audit_t bc; 1977 uintptr_t addr; 1978 1979 if (abw->abw_ndx == abw->abw_nbufs) 1980 return (WALK_DONE); 1981 1982 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1983 1984 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1985 mdb_warn("couldn't read bufctl at %p", addr); 1986 return (WALK_DONE); 1987 } 1988 1989 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1990 } 1991 1992 void 1993 allocdby_walk_fini(mdb_walk_state_t *wsp) 1994 { 1995 allocdby_walk_t *abw = wsp->walk_data; 1996 1997 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1998 mdb_free(abw, sizeof (allocdby_walk_t)); 1999 } 2000 2001 /*ARGSUSED*/ 2002 int 2003 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 2004 { 2005 char c[MDB_SYM_NAMLEN]; 2006 GElf_Sym sym; 2007 int i; 2008 2009 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2010 for (i = 0; i < bcp->bc_depth; i++) { 2011 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2012 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2013 continue; 2014 if (strncmp(c, "kmem_", 5) == 0) 2015 continue; 2016 mdb_printf("%s+0x%lx", 2017 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2018 break; 2019 } 2020 mdb_printf("\n"); 2021 2022 return (WALK_NEXT); 2023 } 2024 2025 static int 2026 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2027 { 2028 if (!(flags & DCMD_ADDRSPEC)) 2029 return (DCMD_USAGE); 2030 2031 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2032 2033 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2034 mdb_warn("can't walk '%s' for %p", w, addr); 2035 return (DCMD_ERR); 2036 } 2037 2038 return (DCMD_OK); 2039 } 2040 2041 /*ARGSUSED*/ 2042 int 2043 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2044 { 2045 return (allocdby_common(addr, flags, "allocdby")); 2046 } 2047 2048 /*ARGSUSED*/ 2049 int 2050 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2051 { 2052 return (allocdby_common(addr, flags, "freedby")); 2053 } 2054 2055 /* 2056 * Return a string describing the address in relation to the given thread's 2057 * stack. 2058 * 2059 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2060 * 2061 * - If the address is above the stack pointer, return an empty string 2062 * signifying that the address is active. 2063 * 2064 * - If the address is below the stack pointer, and the thread is not on proc, 2065 * return " (below sp)". 2066 * 2067 * - If the address is below the stack pointer, and the thread is on proc, 2068 * return " (possibly below sp)". Depending on context, we may or may not 2069 * have an accurate t_sp. 2070 */ 2071 static const char * 2072 stack_active(const kthread_t *t, uintptr_t addr) 2073 { 2074 uintptr_t panicstk; 2075 GElf_Sym sym; 2076 2077 if (t->t_state == TS_FREE) 2078 return (" (inactive interrupt thread)"); 2079 2080 /* 2081 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2082 * no longer relates to the thread's real stack. 2083 */ 2084 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2085 panicstk = (uintptr_t)sym.st_value; 2086 2087 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2088 return (""); 2089 } 2090 2091 if (addr >= t->t_sp + STACK_BIAS) 2092 return (""); 2093 2094 if (t->t_state == TS_ONPROC) 2095 return (" (possibly below sp)"); 2096 2097 return (" (below sp)"); 2098 } 2099 2100 typedef struct whatis { 2101 uintptr_t w_addr; 2102 const kmem_cache_t *w_cache; 2103 const vmem_t *w_vmem; 2104 size_t w_slab_align; 2105 int w_slab_found; 2106 int w_found; 2107 int w_kmem_lite_count; 2108 uint_t w_verbose; 2109 uint_t w_freemem; 2110 uint_t w_all; 2111 uint_t w_bufctl; 2112 uint_t w_idspace; 2113 } whatis_t; 2114 2115 static void 2116 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 2117 { 2118 /* LINTED pointer cast may result in improper alignment */ 2119 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr); 2120 intptr_t stat; 2121 int count = 0; 2122 int i; 2123 pc_t callers[16]; 2124 2125 if (w->w_cache->cache_flags & KMF_REDZONE) { 2126 kmem_buftag_t bt; 2127 2128 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2129 goto done; 2130 2131 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2132 2133 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 2134 goto done; 2135 2136 /* 2137 * provide the bufctl ptr if it has useful information 2138 */ 2139 if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT)) 2140 baddr = (uintptr_t)bt.bt_bufctl; 2141 2142 if (w->w_cache->cache_flags & KMF_LITE) { 2143 count = w->w_kmem_lite_count; 2144 2145 if (count * sizeof (pc_t) > sizeof (callers)) 2146 count = 0; 2147 2148 if (count > 0 && 2149 mdb_vread(callers, count * sizeof (pc_t), 2150 btaddr + 2151 offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2152 count = 0; 2153 2154 /* 2155 * skip unused callers 2156 */ 2157 while (count > 0 && callers[count - 1] == 2158 (pc_t)KMEM_UNINITIALIZED_PATTERN) 2159 count--; 2160 } 2161 } 2162 2163 done: 2164 if (baddr == 0) 2165 mdb_printf("%p is %p+%p, %s from %s\n", 2166 w->w_addr, addr, w->w_addr - addr, 2167 w->w_freemem == FALSE ? "allocated" : "freed", 2168 w->w_cache->cache_name); 2169 else 2170 mdb_printf("%p is %p+%p, bufctl %p %s from %s\n", 2171 w->w_addr, addr, w->w_addr - addr, baddr, 2172 w->w_freemem == FALSE ? "allocated" : "freed", 2173 w->w_cache->cache_name); 2174 2175 if (count > 0) { 2176 mdb_inc_indent(8); 2177 mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"", 2178 callers[0], (count != 1)? ", ":"\n"); 2179 for (i = 1; i < count; i++) 2180 mdb_printf("%a%s", callers[i], 2181 (i + 1 < count)? ", ":"\n"); 2182 mdb_dec_indent(8); 2183 } 2184 } 2185 2186 /*ARGSUSED*/ 2187 static int 2188 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w) 2189 { 2190 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2191 return (WALK_NEXT); 2192 2193 whatis_print_kmem(addr, 0, w); 2194 w->w_found++; 2195 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2196 } 2197 2198 static int 2199 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 2200 { 2201 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 2202 return (WALK_NEXT); 2203 2204 mdb_printf("%p is %p+%p ", w->w_addr, 2205 vs->vs_start, w->w_addr - vs->vs_start); 2206 2207 /* 2208 * Always provide the vmem_seg pointer if it has a stack trace. 2209 */ 2210 if (w->w_bufctl == TRUE || 2211 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) { 2212 mdb_printf("(vmem_seg %p) ", addr); 2213 } 2214 2215 mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ? 2216 "freed " : "", w->w_vmem->vm_name); 2217 2218 w->w_found++; 2219 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2220 } 2221 2222 static int 2223 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 2224 { 2225 const char *nm = vmem->vm_name; 2226 w->w_vmem = vmem; 2227 w->w_freemem = FALSE; 2228 2229 if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2230 return (WALK_NEXT); 2231 2232 if (w->w_verbose) 2233 mdb_printf("Searching vmem arena %s...\n", nm); 2234 2235 if (mdb_pwalk("vmem_alloc", 2236 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2237 mdb_warn("can't walk vmem seg for %p", addr); 2238 return (WALK_NEXT); 2239 } 2240 2241 if (w->w_found && w->w_all == FALSE) 2242 return (WALK_DONE); 2243 2244 if (w->w_verbose) 2245 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 2246 2247 w->w_freemem = TRUE; 2248 2249 if (mdb_pwalk("vmem_free", 2250 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2251 mdb_warn("can't walk vmem seg for %p", addr); 2252 return (WALK_NEXT); 2253 } 2254 2255 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2256 } 2257 2258 /*ARGSUSED*/ 2259 static int 2260 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w) 2261 { 2262 uintptr_t addr; 2263 2264 if (bcp == NULL) 2265 return (WALK_NEXT); 2266 2267 addr = (uintptr_t)bcp->bc_addr; 2268 2269 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2270 return (WALK_NEXT); 2271 2272 whatis_print_kmem(addr, baddr, w); 2273 w->w_found++; 2274 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2275 } 2276 2277 /*ARGSUSED*/ 2278 static int 2279 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w) 2280 { 2281 uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align); 2282 2283 if ((w->w_addr - base) >= w->w_cache->cache_slabsize) 2284 return (WALK_NEXT); 2285 2286 w->w_slab_found++; 2287 return (WALK_DONE); 2288 } 2289 2290 static int 2291 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2292 { 2293 char *walk, *freewalk; 2294 mdb_walk_cb_t func; 2295 vmem_t *vmp = c->cache_arena; 2296 2297 if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2298 return (WALK_NEXT); 2299 2300 if (w->w_bufctl == FALSE) { 2301 walk = "kmem"; 2302 freewalk = "freemem"; 2303 func = (mdb_walk_cb_t)whatis_walk_kmem; 2304 } else { 2305 walk = "bufctl"; 2306 freewalk = "freectl"; 2307 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2308 } 2309 2310 w->w_cache = c; 2311 2312 if (w->w_verbose) 2313 mdb_printf("Searching %s's slabs...\n", c->cache_name); 2314 2315 /* 2316 * Verify that the address is in one of the cache's slabs. If not, 2317 * we can skip the more expensive walkers. (this is purely a 2318 * heuristic -- as long as there are no false-negatives, we'll be fine) 2319 * 2320 * We try to get the cache's arena's quantum, since to accurately 2321 * get the base of a slab, you have to align it to the quantum. If 2322 * it doesn't look sensible, we fall back to not aligning. 2323 */ 2324 if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align), 2325 (uintptr_t)&vmp->vm_quantum) == -1) { 2326 mdb_warn("unable to read %p->cache_arena->vm_quantum", c); 2327 w->w_slab_align = 1; 2328 } 2329 2330 if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 || 2331 (w->w_slab_align & (w->w_slab_align - 1))) { 2332 mdb_warn("%p's arena has invalid quantum (0x%p)\n", c, 2333 w->w_slab_align); 2334 w->w_slab_align = 1; 2335 } 2336 2337 w->w_slab_found = 0; 2338 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w, 2339 addr) == -1) { 2340 mdb_warn("can't find kmem_slab walker"); 2341 return (WALK_DONE); 2342 } 2343 if (w->w_slab_found == 0) 2344 return (WALK_NEXT); 2345 2346 if (c->cache_flags & KMF_LITE) { 2347 if (mdb_readvar(&w->w_kmem_lite_count, 2348 "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16) 2349 w->w_kmem_lite_count = 0; 2350 } 2351 2352 if (w->w_verbose) 2353 mdb_printf("Searching %s...\n", c->cache_name); 2354 2355 w->w_freemem = FALSE; 2356 2357 if (mdb_pwalk(walk, func, w, addr) == -1) { 2358 mdb_warn("can't find %s walker", walk); 2359 return (WALK_DONE); 2360 } 2361 2362 if (w->w_found && w->w_all == FALSE) 2363 return (WALK_DONE); 2364 2365 /* 2366 * We have searched for allocated memory; now search for freed memory. 2367 */ 2368 if (w->w_verbose) 2369 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2370 2371 w->w_freemem = TRUE; 2372 2373 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 2374 mdb_warn("can't find %s walker", freewalk); 2375 return (WALK_DONE); 2376 } 2377 2378 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2379 } 2380 2381 static int 2382 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2383 { 2384 if (c->cache_cflags & KMC_NOTOUCH) 2385 return (WALK_NEXT); 2386 2387 return (whatis_walk_cache(addr, c, w)); 2388 } 2389 2390 static int 2391 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2392 { 2393 if (!(c->cache_cflags & KMC_NOTOUCH)) 2394 return (WALK_NEXT); 2395 2396 return (whatis_walk_cache(addr, c, w)); 2397 } 2398 2399 static int 2400 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w) 2401 { 2402 /* 2403 * Often, one calls ::whatis on an address from a thread structure. 2404 * We use this opportunity to short circuit this case... 2405 */ 2406 if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) { 2407 mdb_printf("%p is %p+%p, allocated as a thread structure\n", 2408 w->w_addr, addr, w->w_addr - addr); 2409 w->w_found++; 2410 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2411 } 2412 2413 if (w->w_addr < (uintptr_t)t->t_stkbase || 2414 w->w_addr > (uintptr_t)t->t_stk) 2415 return (WALK_NEXT); 2416 2417 if (t->t_stkbase == NULL) 2418 return (WALK_NEXT); 2419 2420 mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr, 2421 stack_active(t, w->w_addr)); 2422 2423 w->w_found++; 2424 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2425 } 2426 2427 static int 2428 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w) 2429 { 2430 struct module mod; 2431 char name[MODMAXNAMELEN], *where; 2432 char c[MDB_SYM_NAMLEN]; 2433 Shdr shdr; 2434 GElf_Sym sym; 2435 2436 if (m->mod_mp == NULL) 2437 return (WALK_NEXT); 2438 2439 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2440 mdb_warn("couldn't read modctl %p's module", addr); 2441 return (WALK_NEXT); 2442 } 2443 2444 if (w->w_addr >= (uintptr_t)mod.text && 2445 w->w_addr < (uintptr_t)mod.text + mod.text_size) { 2446 where = "text segment"; 2447 goto found; 2448 } 2449 2450 if (w->w_addr >= (uintptr_t)mod.data && 2451 w->w_addr < (uintptr_t)mod.data + mod.data_size) { 2452 where = "data segment"; 2453 goto found; 2454 } 2455 2456 if (w->w_addr >= (uintptr_t)mod.bss && 2457 w->w_addr < (uintptr_t)mod.bss + mod.bss_size) { 2458 where = "bss"; 2459 goto found; 2460 } 2461 2462 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2463 mdb_warn("couldn't read symbol header for %p's module", addr); 2464 return (WALK_NEXT); 2465 } 2466 2467 if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr < 2468 (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) { 2469 where = "symtab"; 2470 goto found; 2471 } 2472 2473 if (w->w_addr >= (uintptr_t)mod.symspace && 2474 w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) { 2475 where = "symspace"; 2476 goto found; 2477 } 2478 2479 return (WALK_NEXT); 2480 2481 found: 2482 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2483 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2484 2485 mdb_printf("%p is ", w->w_addr); 2486 2487 /* 2488 * If we found this address in a module, then there's a chance that 2489 * it's actually a named symbol. Try the symbol lookup. 2490 */ 2491 if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c), 2492 &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value && 2493 w->w_addr < (uintptr_t)sym.st_value + sym.st_size) { 2494 mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value); 2495 } 2496 2497 mdb_printf("in %s's %s\n", name, where); 2498 2499 w->w_found++; 2500 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2501 } 2502 2503 /*ARGSUSED*/ 2504 static int 2505 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w) 2506 { 2507 static int machsize = 0; 2508 mdb_ctf_id_t id; 2509 2510 if (machsize == 0) { 2511 if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0) 2512 machsize = mdb_ctf_type_size(id); 2513 else { 2514 mdb_warn("could not get size of page_t"); 2515 machsize = sizeof (page_t); 2516 } 2517 } 2518 2519 if (w->w_addr < addr || w->w_addr >= addr + machsize) 2520 return (WALK_NEXT); 2521 2522 mdb_printf("%p is %p+%p, allocated as a page structure\n", 2523 w->w_addr, addr, w->w_addr - addr); 2524 2525 w->w_found++; 2526 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2527 } 2528 2529 int 2530 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2531 { 2532 whatis_t w; 2533 2534 if (!(flags & DCMD_ADDRSPEC)) 2535 return (DCMD_USAGE); 2536 2537 w.w_verbose = FALSE; 2538 w.w_bufctl = FALSE; 2539 w.w_all = FALSE; 2540 w.w_idspace = FALSE; 2541 2542 if (mdb_getopts(argc, argv, 2543 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2544 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2545 'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace, 2546 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc) 2547 return (DCMD_USAGE); 2548 2549 w.w_addr = addr; 2550 w.w_found = 0; 2551 2552 if (w.w_verbose) 2553 mdb_printf("Searching modules...\n"); 2554 2555 if (!w.w_idspace) { 2556 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w) 2557 == -1) { 2558 mdb_warn("couldn't find modctl walker"); 2559 return (DCMD_ERR); 2560 } 2561 2562 if (w.w_found && w.w_all == FALSE) 2563 return (DCMD_OK); 2564 2565 /* 2566 * Now search all thread stacks. Yes, this is a little weak; we 2567 * can save a lot of work by first checking to see if the 2568 * address is in segkp vs. segkmem. But hey, computers are 2569 * fast. 2570 */ 2571 if (w.w_verbose) 2572 mdb_printf("Searching threads...\n"); 2573 2574 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w) 2575 == -1) { 2576 mdb_warn("couldn't find thread walker"); 2577 return (DCMD_ERR); 2578 } 2579 2580 if (w.w_found && w.w_all == FALSE) 2581 return (DCMD_OK); 2582 2583 if (w.w_verbose) 2584 mdb_printf("Searching page structures...\n"); 2585 2586 if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w) 2587 == -1) { 2588 mdb_warn("couldn't find page walker"); 2589 return (DCMD_ERR); 2590 } 2591 2592 if (w.w_found && w.w_all == FALSE) 2593 return (DCMD_OK); 2594 } 2595 2596 if (mdb_walk("kmem_cache", 2597 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2598 mdb_warn("couldn't find kmem_cache walker"); 2599 return (DCMD_ERR); 2600 } 2601 2602 if (w.w_found && w.w_all == FALSE) 2603 return (DCMD_OK); 2604 2605 if (mdb_walk("kmem_cache", 2606 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2607 mdb_warn("couldn't find kmem_cache walker"); 2608 return (DCMD_ERR); 2609 } 2610 2611 if (w.w_found && w.w_all == FALSE) 2612 return (DCMD_OK); 2613 2614 if (mdb_walk("vmem_postfix", 2615 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2616 mdb_warn("couldn't find vmem_postfix walker"); 2617 return (DCMD_ERR); 2618 } 2619 2620 if (w.w_found == 0) 2621 mdb_printf("%p is unknown\n", addr); 2622 2623 return (DCMD_OK); 2624 } 2625 2626 void 2627 whatis_help(void) 2628 { 2629 mdb_printf( 2630 "Given a virtual address, attempt to determine where it came\n" 2631 "from.\n" 2632 "\n" 2633 "\t-v\tVerbose output; display caches/arenas/etc as they are\n" 2634 "\t\tsearched\n" 2635 "\t-a\tFind all possible sources. Default behavior is to stop at\n" 2636 "\t\tthe first (most specific) source.\n" 2637 "\t-i\tSearch only identifier arenas and caches. By default\n" 2638 "\t\tthese are ignored.\n" 2639 "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n" 2640 "\t\trespectively. Warning: if the buffer exists, but does not\n" 2641 "\t\thave a bufctl, it will not be reported.\n"); 2642 } 2643 2644 typedef struct kmem_log_cpu { 2645 uintptr_t kmc_low; 2646 uintptr_t kmc_high; 2647 } kmem_log_cpu_t; 2648 2649 typedef struct kmem_log_data { 2650 uintptr_t kmd_addr; 2651 kmem_log_cpu_t *kmd_cpu; 2652 } kmem_log_data_t; 2653 2654 int 2655 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2656 kmem_log_data_t *kmd) 2657 { 2658 int i; 2659 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2660 size_t bufsize; 2661 2662 for (i = 0; i < NCPU; i++) { 2663 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2664 break; 2665 } 2666 2667 if (kmd->kmd_addr) { 2668 if (b->bc_cache == NULL) 2669 return (WALK_NEXT); 2670 2671 if (mdb_vread(&bufsize, sizeof (bufsize), 2672 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2673 mdb_warn( 2674 "failed to read cache_bufsize for cache at %p", 2675 b->bc_cache); 2676 return (WALK_ERR); 2677 } 2678 2679 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2680 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2681 return (WALK_NEXT); 2682 } 2683 2684 if (i == NCPU) 2685 mdb_printf(" "); 2686 else 2687 mdb_printf("%3d", i); 2688 2689 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2690 b->bc_timestamp, b->bc_thread); 2691 2692 return (WALK_NEXT); 2693 } 2694 2695 /*ARGSUSED*/ 2696 int 2697 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2698 { 2699 kmem_log_header_t lh; 2700 kmem_cpu_log_header_t clh; 2701 uintptr_t lhp, clhp; 2702 int ncpus; 2703 uintptr_t *cpu; 2704 GElf_Sym sym; 2705 kmem_log_cpu_t *kmc; 2706 int i; 2707 kmem_log_data_t kmd; 2708 uint_t opt_b = FALSE; 2709 2710 if (mdb_getopts(argc, argv, 2711 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2712 return (DCMD_USAGE); 2713 2714 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2715 mdb_warn("failed to read 'kmem_transaction_log'"); 2716 return (DCMD_ERR); 2717 } 2718 2719 if (lhp == NULL) { 2720 mdb_warn("no kmem transaction log\n"); 2721 return (DCMD_ERR); 2722 } 2723 2724 mdb_readvar(&ncpus, "ncpus"); 2725 2726 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2727 mdb_warn("failed to read log header at %p", lhp); 2728 return (DCMD_ERR); 2729 } 2730 2731 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2732 2733 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2734 2735 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2736 mdb_warn("couldn't find 'cpu' array"); 2737 return (DCMD_ERR); 2738 } 2739 2740 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2741 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2742 NCPU * sizeof (uintptr_t), sym.st_size); 2743 return (DCMD_ERR); 2744 } 2745 2746 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2747 mdb_warn("failed to read cpu array at %p", sym.st_value); 2748 return (DCMD_ERR); 2749 } 2750 2751 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2752 kmd.kmd_addr = NULL; 2753 kmd.kmd_cpu = kmc; 2754 2755 for (i = 0; i < NCPU; i++) { 2756 2757 if (cpu[i] == NULL) 2758 continue; 2759 2760 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2761 mdb_warn("cannot read cpu %d's log header at %p", 2762 i, clhp); 2763 return (DCMD_ERR); 2764 } 2765 2766 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2767 (uintptr_t)lh.lh_base; 2768 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2769 2770 clhp += sizeof (kmem_cpu_log_header_t); 2771 } 2772 2773 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2774 "TIMESTAMP", "THREAD"); 2775 2776 /* 2777 * If we have been passed an address, print out only log entries 2778 * corresponding to that address. If opt_b is specified, then interpret 2779 * the address as a bufctl. 2780 */ 2781 if (flags & DCMD_ADDRSPEC) { 2782 kmem_bufctl_audit_t b; 2783 2784 if (opt_b) { 2785 kmd.kmd_addr = addr; 2786 } else { 2787 if (mdb_vread(&b, 2788 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2789 mdb_warn("failed to read bufctl at %p", addr); 2790 return (DCMD_ERR); 2791 } 2792 2793 (void) kmem_log_walk(addr, &b, &kmd); 2794 2795 return (DCMD_OK); 2796 } 2797 } 2798 2799 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2800 mdb_warn("can't find kmem log walker"); 2801 return (DCMD_ERR); 2802 } 2803 2804 return (DCMD_OK); 2805 } 2806 2807 typedef struct bufctl_history_cb { 2808 int bhc_flags; 2809 int bhc_argc; 2810 const mdb_arg_t *bhc_argv; 2811 int bhc_ret; 2812 } bufctl_history_cb_t; 2813 2814 /*ARGSUSED*/ 2815 static int 2816 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2817 { 2818 bufctl_history_cb_t *bhc = arg; 2819 2820 bhc->bhc_ret = 2821 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2822 2823 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2824 2825 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2826 } 2827 2828 void 2829 bufctl_help(void) 2830 { 2831 mdb_printf("%s", 2832 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2833 mdb_dec_indent(2); 2834 mdb_printf("%<b>OPTIONS%</b>\n"); 2835 mdb_inc_indent(2); 2836 mdb_printf("%s", 2837 " -v Display the full content of the bufctl, including its stack trace\n" 2838 " -h retrieve the bufctl's transaction history, if available\n" 2839 " -a addr\n" 2840 " filter out bufctls not involving the buffer at addr\n" 2841 " -c caller\n" 2842 " filter out bufctls without the function/PC in their stack trace\n" 2843 " -e earliest\n" 2844 " filter out bufctls timestamped before earliest\n" 2845 " -l latest\n" 2846 " filter out bufctls timestamped after latest\n" 2847 " -t thread\n" 2848 " filter out bufctls not involving thread\n"); 2849 } 2850 2851 int 2852 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2853 { 2854 kmem_bufctl_audit_t bc; 2855 uint_t verbose = FALSE; 2856 uint_t history = FALSE; 2857 uint_t in_history = FALSE; 2858 uintptr_t caller = NULL, thread = NULL; 2859 uintptr_t laddr, haddr, baddr = NULL; 2860 hrtime_t earliest = 0, latest = 0; 2861 int i, depth; 2862 char c[MDB_SYM_NAMLEN]; 2863 GElf_Sym sym; 2864 2865 if (mdb_getopts(argc, argv, 2866 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2867 'h', MDB_OPT_SETBITS, TRUE, &history, 2868 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2869 'c', MDB_OPT_UINTPTR, &caller, 2870 't', MDB_OPT_UINTPTR, &thread, 2871 'e', MDB_OPT_UINT64, &earliest, 2872 'l', MDB_OPT_UINT64, &latest, 2873 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2874 return (DCMD_USAGE); 2875 2876 if (!(flags & DCMD_ADDRSPEC)) 2877 return (DCMD_USAGE); 2878 2879 if (in_history && !history) 2880 return (DCMD_USAGE); 2881 2882 if (history && !in_history) { 2883 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2884 UM_SLEEP | UM_GC); 2885 bufctl_history_cb_t bhc; 2886 2887 nargv[0].a_type = MDB_TYPE_STRING; 2888 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2889 2890 for (i = 0; i < argc; i++) 2891 nargv[i + 1] = argv[i]; 2892 2893 /* 2894 * When in history mode, we treat each element as if it 2895 * were in a seperate loop, so that the headers group 2896 * bufctls with similar histories. 2897 */ 2898 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2899 bhc.bhc_argc = argc + 1; 2900 bhc.bhc_argv = nargv; 2901 bhc.bhc_ret = DCMD_OK; 2902 2903 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2904 addr) == -1) { 2905 mdb_warn("unable to walk bufctl_history"); 2906 return (DCMD_ERR); 2907 } 2908 2909 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2910 mdb_printf("\n"); 2911 2912 return (bhc.bhc_ret); 2913 } 2914 2915 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2916 if (verbose) { 2917 mdb_printf("%16s %16s %16s %16s\n" 2918 "%<u>%16s %16s %16s %16s%</u>\n", 2919 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2920 "", "CACHE", "LASTLOG", "CONTENTS"); 2921 } else { 2922 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2923 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2924 } 2925 } 2926 2927 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2928 mdb_warn("couldn't read bufctl at %p", addr); 2929 return (DCMD_ERR); 2930 } 2931 2932 /* 2933 * Guard against bogus bc_depth in case the bufctl is corrupt or 2934 * the address does not really refer to a bufctl. 2935 */ 2936 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2937 2938 if (caller != NULL) { 2939 laddr = caller; 2940 haddr = caller + sizeof (caller); 2941 2942 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2943 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2944 /* 2945 * We were provided an exact symbol value; any 2946 * address in the function is valid. 2947 */ 2948 laddr = (uintptr_t)sym.st_value; 2949 haddr = (uintptr_t)sym.st_value + sym.st_size; 2950 } 2951 2952 for (i = 0; i < depth; i++) 2953 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2954 break; 2955 2956 if (i == depth) 2957 return (DCMD_OK); 2958 } 2959 2960 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2961 return (DCMD_OK); 2962 2963 if (earliest != 0 && bc.bc_timestamp < earliest) 2964 return (DCMD_OK); 2965 2966 if (latest != 0 && bc.bc_timestamp > latest) 2967 return (DCMD_OK); 2968 2969 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2970 return (DCMD_OK); 2971 2972 if (flags & DCMD_PIPE_OUT) { 2973 mdb_printf("%#lr\n", addr); 2974 return (DCMD_OK); 2975 } 2976 2977 if (verbose) { 2978 mdb_printf( 2979 "%<b>%16p%</b> %16p %16llx %16p\n" 2980 "%16s %16p %16p %16p\n", 2981 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2982 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2983 2984 mdb_inc_indent(17); 2985 for (i = 0; i < depth; i++) 2986 mdb_printf("%a\n", bc.bc_stack[i]); 2987 mdb_dec_indent(17); 2988 mdb_printf("\n"); 2989 } else { 2990 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2991 bc.bc_timestamp, bc.bc_thread); 2992 2993 for (i = 0; i < depth; i++) { 2994 if (mdb_lookup_by_addr(bc.bc_stack[i], 2995 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2996 continue; 2997 if (strncmp(c, "kmem_", 5) == 0) 2998 continue; 2999 mdb_printf(" %a\n", bc.bc_stack[i]); 3000 break; 3001 } 3002 3003 if (i >= depth) 3004 mdb_printf("\n"); 3005 } 3006 3007 return (DCMD_OK); 3008 } 3009 3010 typedef struct kmem_verify { 3011 uint64_t *kmv_buf; /* buffer to read cache contents into */ 3012 size_t kmv_size; /* number of bytes in kmv_buf */ 3013 int kmv_corruption; /* > 0 if corruption found. */ 3014 int kmv_besilent; /* report actual corruption sites */ 3015 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3016 } kmem_verify_t; 3017 3018 /* 3019 * verify_pattern() 3020 * verify that buf is filled with the pattern pat. 3021 */ 3022 static int64_t 3023 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3024 { 3025 /*LINTED*/ 3026 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3027 uint64_t *buf; 3028 3029 for (buf = buf_arg; buf < bufend; buf++) 3030 if (*buf != pat) 3031 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3032 return (-1); 3033 } 3034 3035 /* 3036 * verify_buftag() 3037 * verify that btp->bt_bxstat == (bcp ^ pat) 3038 */ 3039 static int 3040 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3041 { 3042 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3043 } 3044 3045 /* 3046 * verify_free() 3047 * verify the integrity of a free block of memory by checking 3048 * that it is filled with 0xdeadbeef and that its buftag is sane. 3049 */ 3050 /*ARGSUSED1*/ 3051 static int 3052 verify_free(uintptr_t addr, const void *data, void *private) 3053 { 3054 kmem_verify_t *kmv = (kmem_verify_t *)private; 3055 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3056 int64_t corrupt; /* corruption offset */ 3057 kmem_buftag_t *buftagp; /* ptr to buftag */ 3058 kmem_cache_t *cp = &kmv->kmv_cache; 3059 int besilent = kmv->kmv_besilent; 3060 3061 /*LINTED*/ 3062 buftagp = KMEM_BUFTAG(cp, buf); 3063 3064 /* 3065 * Read the buffer to check. 3066 */ 3067 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3068 if (!besilent) 3069 mdb_warn("couldn't read %p", addr); 3070 return (WALK_NEXT); 3071 } 3072 3073 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3074 KMEM_FREE_PATTERN)) >= 0) { 3075 if (!besilent) 3076 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3077 addr, (uintptr_t)addr + corrupt); 3078 goto corrupt; 3079 } 3080 /* 3081 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3082 * the first bytes of the buffer, hence we cannot check for red 3083 * zone corruption. 3084 */ 3085 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3086 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3087 if (!besilent) 3088 mdb_printf("buffer %p (free) seems to " 3089 "have a corrupt redzone pattern\n", addr); 3090 goto corrupt; 3091 } 3092 3093 /* 3094 * confirm bufctl pointer integrity. 3095 */ 3096 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3097 if (!besilent) 3098 mdb_printf("buffer %p (free) has a corrupt " 3099 "buftag\n", addr); 3100 goto corrupt; 3101 } 3102 3103 return (WALK_NEXT); 3104 corrupt: 3105 kmv->kmv_corruption++; 3106 return (WALK_NEXT); 3107 } 3108 3109 /* 3110 * verify_alloc() 3111 * Verify that the buftag of an allocated buffer makes sense with respect 3112 * to the buffer. 3113 */ 3114 /*ARGSUSED1*/ 3115 static int 3116 verify_alloc(uintptr_t addr, const void *data, void *private) 3117 { 3118 kmem_verify_t *kmv = (kmem_verify_t *)private; 3119 kmem_cache_t *cp = &kmv->kmv_cache; 3120 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3121 /*LINTED*/ 3122 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3123 uint32_t *ip = (uint32_t *)buftagp; 3124 uint8_t *bp = (uint8_t *)buf; 3125 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3126 int besilent = kmv->kmv_besilent; 3127 3128 /* 3129 * Read the buffer to check. 3130 */ 3131 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3132 if (!besilent) 3133 mdb_warn("couldn't read %p", addr); 3134 return (WALK_NEXT); 3135 } 3136 3137 /* 3138 * There are two cases to handle: 3139 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3140 * 0xfeedfacefeedface at the end of it 3141 * 2. If the buf was alloc'd using kmem_alloc, it will have 3142 * 0xbb just past the end of the region in use. At the buftag, 3143 * it will have 0xfeedface (or, if the whole buffer is in use, 3144 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3145 * endianness), followed by 32 bits containing the offset of the 3146 * 0xbb byte in the buffer. 3147 * 3148 * Finally, the two 32-bit words that comprise the second half of the 3149 * buftag should xor to KMEM_BUFTAG_ALLOC 3150 */ 3151 3152 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3153 looks_ok = 1; 3154 else if (!KMEM_SIZE_VALID(ip[1])) 3155 size_ok = 0; 3156 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3157 looks_ok = 1; 3158 else 3159 size_ok = 0; 3160 3161 if (!size_ok) { 3162 if (!besilent) 3163 mdb_printf("buffer %p (allocated) has a corrupt " 3164 "redzone size encoding\n", addr); 3165 goto corrupt; 3166 } 3167 3168 if (!looks_ok) { 3169 if (!besilent) 3170 mdb_printf("buffer %p (allocated) has a corrupt " 3171 "redzone signature\n", addr); 3172 goto corrupt; 3173 } 3174 3175 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3176 if (!besilent) 3177 mdb_printf("buffer %p (allocated) has a " 3178 "corrupt buftag\n", addr); 3179 goto corrupt; 3180 } 3181 3182 return (WALK_NEXT); 3183 corrupt: 3184 kmv->kmv_corruption++; 3185 return (WALK_NEXT); 3186 } 3187 3188 /*ARGSUSED2*/ 3189 int 3190 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3191 { 3192 if (flags & DCMD_ADDRSPEC) { 3193 int check_alloc = 0, check_free = 0; 3194 kmem_verify_t kmv; 3195 3196 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3197 addr) == -1) { 3198 mdb_warn("couldn't read kmem_cache %p", addr); 3199 return (DCMD_ERR); 3200 } 3201 3202 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3203 sizeof (kmem_buftag_t); 3204 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3205 kmv.kmv_corruption = 0; 3206 3207 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3208 check_alloc = 1; 3209 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3210 check_free = 1; 3211 } else { 3212 if (!(flags & DCMD_LOOP)) { 3213 mdb_warn("cache %p (%s) does not have " 3214 "redzone checking enabled\n", addr, 3215 kmv.kmv_cache.cache_name); 3216 } 3217 return (DCMD_ERR); 3218 } 3219 3220 if (flags & DCMD_LOOP) { 3221 /* 3222 * table mode, don't print out every corrupt buffer 3223 */ 3224 kmv.kmv_besilent = 1; 3225 } else { 3226 mdb_printf("Summary for cache '%s'\n", 3227 kmv.kmv_cache.cache_name); 3228 mdb_inc_indent(2); 3229 kmv.kmv_besilent = 0; 3230 } 3231 3232 if (check_alloc) 3233 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3234 if (check_free) 3235 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3236 3237 if (flags & DCMD_LOOP) { 3238 if (kmv.kmv_corruption == 0) { 3239 mdb_printf("%-*s %?p clean\n", 3240 KMEM_CACHE_NAMELEN, 3241 kmv.kmv_cache.cache_name, addr); 3242 } else { 3243 char *s = ""; /* optional s in "buffer[s]" */ 3244 if (kmv.kmv_corruption > 1) 3245 s = "s"; 3246 3247 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3248 KMEM_CACHE_NAMELEN, 3249 kmv.kmv_cache.cache_name, addr, 3250 kmv.kmv_corruption, s); 3251 } 3252 } else { 3253 /* 3254 * This is the more verbose mode, when the user has 3255 * type addr::kmem_verify. If the cache was clean, 3256 * nothing will have yet been printed. So say something. 3257 */ 3258 if (kmv.kmv_corruption == 0) 3259 mdb_printf("clean\n"); 3260 3261 mdb_dec_indent(2); 3262 } 3263 } else { 3264 /* 3265 * If the user didn't specify a cache to verify, we'll walk all 3266 * kmem_cache's, specifying ourself as a callback for each... 3267 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3268 */ 3269 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3270 "Cache Name", "Addr", "Cache Integrity"); 3271 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3272 } 3273 3274 return (DCMD_OK); 3275 } 3276 3277 typedef struct vmem_node { 3278 struct vmem_node *vn_next; 3279 struct vmem_node *vn_parent; 3280 struct vmem_node *vn_sibling; 3281 struct vmem_node *vn_children; 3282 uintptr_t vn_addr; 3283 int vn_marked; 3284 vmem_t vn_vmem; 3285 } vmem_node_t; 3286 3287 typedef struct vmem_walk { 3288 vmem_node_t *vw_root; 3289 vmem_node_t *vw_current; 3290 } vmem_walk_t; 3291 3292 int 3293 vmem_walk_init(mdb_walk_state_t *wsp) 3294 { 3295 uintptr_t vaddr, paddr; 3296 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3297 vmem_walk_t *vw; 3298 3299 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3300 mdb_warn("couldn't read 'vmem_list'"); 3301 return (WALK_ERR); 3302 } 3303 3304 while (vaddr != NULL) { 3305 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3306 vp->vn_addr = vaddr; 3307 vp->vn_next = head; 3308 head = vp; 3309 3310 if (vaddr == wsp->walk_addr) 3311 current = vp; 3312 3313 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3314 mdb_warn("couldn't read vmem_t at %p", vaddr); 3315 goto err; 3316 } 3317 3318 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3319 } 3320 3321 for (vp = head; vp != NULL; vp = vp->vn_next) { 3322 3323 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3324 vp->vn_sibling = root; 3325 root = vp; 3326 continue; 3327 } 3328 3329 for (parent = head; parent != NULL; parent = parent->vn_next) { 3330 if (parent->vn_addr != paddr) 3331 continue; 3332 vp->vn_sibling = parent->vn_children; 3333 parent->vn_children = vp; 3334 vp->vn_parent = parent; 3335 break; 3336 } 3337 3338 if (parent == NULL) { 3339 mdb_warn("couldn't find %p's parent (%p)\n", 3340 vp->vn_addr, paddr); 3341 goto err; 3342 } 3343 } 3344 3345 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3346 vw->vw_root = root; 3347 3348 if (current != NULL) 3349 vw->vw_current = current; 3350 else 3351 vw->vw_current = root; 3352 3353 wsp->walk_data = vw; 3354 return (WALK_NEXT); 3355 err: 3356 for (vp = head; head != NULL; vp = head) { 3357 head = vp->vn_next; 3358 mdb_free(vp, sizeof (vmem_node_t)); 3359 } 3360 3361 return (WALK_ERR); 3362 } 3363 3364 int 3365 vmem_walk_step(mdb_walk_state_t *wsp) 3366 { 3367 vmem_walk_t *vw = wsp->walk_data; 3368 vmem_node_t *vp; 3369 int rval; 3370 3371 if ((vp = vw->vw_current) == NULL) 3372 return (WALK_DONE); 3373 3374 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3375 3376 if (vp->vn_children != NULL) { 3377 vw->vw_current = vp->vn_children; 3378 return (rval); 3379 } 3380 3381 do { 3382 vw->vw_current = vp->vn_sibling; 3383 vp = vp->vn_parent; 3384 } while (vw->vw_current == NULL && vp != NULL); 3385 3386 return (rval); 3387 } 3388 3389 /* 3390 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3391 * children are visited before their parent. We perform the postfix walk 3392 * iteratively (rather than recursively) to allow mdb to regain control 3393 * after each callback. 3394 */ 3395 int 3396 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3397 { 3398 vmem_walk_t *vw = wsp->walk_data; 3399 vmem_node_t *vp = vw->vw_current; 3400 int rval; 3401 3402 /* 3403 * If this node is marked, then we know that we have already visited 3404 * all of its children. If the node has any siblings, they need to 3405 * be visited next; otherwise, we need to visit the parent. Note 3406 * that vp->vn_marked will only be zero on the first invocation of 3407 * the step function. 3408 */ 3409 if (vp->vn_marked) { 3410 if (vp->vn_sibling != NULL) 3411 vp = vp->vn_sibling; 3412 else if (vp->vn_parent != NULL) 3413 vp = vp->vn_parent; 3414 else { 3415 /* 3416 * We have neither a parent, nor a sibling, and we 3417 * have already been visited; we're done. 3418 */ 3419 return (WALK_DONE); 3420 } 3421 } 3422 3423 /* 3424 * Before we visit this node, visit its children. 3425 */ 3426 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3427 vp = vp->vn_children; 3428 3429 vp->vn_marked = 1; 3430 vw->vw_current = vp; 3431 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3432 3433 return (rval); 3434 } 3435 3436 void 3437 vmem_walk_fini(mdb_walk_state_t *wsp) 3438 { 3439 vmem_walk_t *vw = wsp->walk_data; 3440 vmem_node_t *root = vw->vw_root; 3441 int done; 3442 3443 if (root == NULL) 3444 return; 3445 3446 if ((vw->vw_root = root->vn_children) != NULL) 3447 vmem_walk_fini(wsp); 3448 3449 vw->vw_root = root->vn_sibling; 3450 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3451 mdb_free(root, sizeof (vmem_node_t)); 3452 3453 if (done) { 3454 mdb_free(vw, sizeof (vmem_walk_t)); 3455 } else { 3456 vmem_walk_fini(wsp); 3457 } 3458 } 3459 3460 typedef struct vmem_seg_walk { 3461 uint8_t vsw_type; 3462 uintptr_t vsw_start; 3463 uintptr_t vsw_current; 3464 } vmem_seg_walk_t; 3465 3466 /*ARGSUSED*/ 3467 int 3468 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3469 { 3470 vmem_seg_walk_t *vsw; 3471 3472 if (wsp->walk_addr == NULL) { 3473 mdb_warn("vmem_%s does not support global walks\n", name); 3474 return (WALK_ERR); 3475 } 3476 3477 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3478 3479 vsw->vsw_type = type; 3480 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3481 vsw->vsw_current = vsw->vsw_start; 3482 3483 return (WALK_NEXT); 3484 } 3485 3486 /* 3487 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3488 */ 3489 #define VMEM_NONE 0 3490 3491 int 3492 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3493 { 3494 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3495 } 3496 3497 int 3498 vmem_free_walk_init(mdb_walk_state_t *wsp) 3499 { 3500 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3501 } 3502 3503 int 3504 vmem_span_walk_init(mdb_walk_state_t *wsp) 3505 { 3506 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3507 } 3508 3509 int 3510 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3511 { 3512 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3513 } 3514 3515 int 3516 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3517 { 3518 vmem_seg_t seg; 3519 vmem_seg_walk_t *vsw = wsp->walk_data; 3520 uintptr_t addr = vsw->vsw_current; 3521 static size_t seg_size = 0; 3522 int rval; 3523 3524 if (!seg_size) { 3525 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3526 mdb_warn("failed to read 'vmem_seg_size'"); 3527 seg_size = sizeof (vmem_seg_t); 3528 } 3529 } 3530 3531 if (seg_size < sizeof (seg)) 3532 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3533 3534 if (mdb_vread(&seg, seg_size, addr) == -1) { 3535 mdb_warn("couldn't read vmem_seg at %p", addr); 3536 return (WALK_ERR); 3537 } 3538 3539 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3540 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3541 rval = WALK_NEXT; 3542 } else { 3543 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3544 } 3545 3546 if (vsw->vsw_current == vsw->vsw_start) 3547 return (WALK_DONE); 3548 3549 return (rval); 3550 } 3551 3552 void 3553 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3554 { 3555 vmem_seg_walk_t *vsw = wsp->walk_data; 3556 3557 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3558 } 3559 3560 #define VMEM_NAMEWIDTH 22 3561 3562 int 3563 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3564 { 3565 vmem_t v, parent; 3566 vmem_kstat_t *vkp = &v.vm_kstat; 3567 uintptr_t paddr; 3568 int ident = 0; 3569 char c[VMEM_NAMEWIDTH]; 3570 3571 if (!(flags & DCMD_ADDRSPEC)) { 3572 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3573 mdb_warn("can't walk vmem"); 3574 return (DCMD_ERR); 3575 } 3576 return (DCMD_OK); 3577 } 3578 3579 if (DCMD_HDRSPEC(flags)) 3580 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3581 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3582 "TOTAL", "SUCCEED", "FAIL"); 3583 3584 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3585 mdb_warn("couldn't read vmem at %p", addr); 3586 return (DCMD_ERR); 3587 } 3588 3589 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3590 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3591 mdb_warn("couldn't trace %p's ancestry", addr); 3592 ident = 0; 3593 break; 3594 } 3595 paddr = (uintptr_t)parent.vm_source; 3596 } 3597 3598 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3599 3600 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3601 addr, VMEM_NAMEWIDTH, c, 3602 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3603 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3604 3605 return (DCMD_OK); 3606 } 3607 3608 void 3609 vmem_seg_help(void) 3610 { 3611 mdb_printf("%s", 3612 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3613 "\n" 3614 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3615 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3616 "information.\n"); 3617 mdb_dec_indent(2); 3618 mdb_printf("%<b>OPTIONS%</b>\n"); 3619 mdb_inc_indent(2); 3620 mdb_printf("%s", 3621 " -v Display the full content of the vmem_seg, including its stack trace\n" 3622 " -s report the size of the segment, instead of the end address\n" 3623 " -c caller\n" 3624 " filter out segments without the function/PC in their stack trace\n" 3625 " -e earliest\n" 3626 " filter out segments timestamped before earliest\n" 3627 " -l latest\n" 3628 " filter out segments timestamped after latest\n" 3629 " -m minsize\n" 3630 " filer out segments smaller than minsize\n" 3631 " -M maxsize\n" 3632 " filer out segments larger than maxsize\n" 3633 " -t thread\n" 3634 " filter out segments not involving thread\n" 3635 " -T type\n" 3636 " filter out segments not of type 'type'\n" 3637 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3638 } 3639 3640 /*ARGSUSED*/ 3641 int 3642 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3643 { 3644 vmem_seg_t vs; 3645 pc_t *stk = vs.vs_stack; 3646 uintptr_t sz; 3647 uint8_t t; 3648 const char *type = NULL; 3649 GElf_Sym sym; 3650 char c[MDB_SYM_NAMLEN]; 3651 int no_debug; 3652 int i; 3653 int depth; 3654 uintptr_t laddr, haddr; 3655 3656 uintptr_t caller = NULL, thread = NULL; 3657 uintptr_t minsize = 0, maxsize = 0; 3658 3659 hrtime_t earliest = 0, latest = 0; 3660 3661 uint_t size = 0; 3662 uint_t verbose = 0; 3663 3664 if (!(flags & DCMD_ADDRSPEC)) 3665 return (DCMD_USAGE); 3666 3667 if (mdb_getopts(argc, argv, 3668 'c', MDB_OPT_UINTPTR, &caller, 3669 'e', MDB_OPT_UINT64, &earliest, 3670 'l', MDB_OPT_UINT64, &latest, 3671 's', MDB_OPT_SETBITS, TRUE, &size, 3672 'm', MDB_OPT_UINTPTR, &minsize, 3673 'M', MDB_OPT_UINTPTR, &maxsize, 3674 't', MDB_OPT_UINTPTR, &thread, 3675 'T', MDB_OPT_STR, &type, 3676 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3677 NULL) != argc) 3678 return (DCMD_USAGE); 3679 3680 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3681 if (verbose) { 3682 mdb_printf("%16s %4s %16s %16s %16s\n" 3683 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3684 "ADDR", "TYPE", "START", "END", "SIZE", 3685 "", "", "THREAD", "TIMESTAMP", ""); 3686 } else { 3687 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3688 "START", size? "SIZE" : "END", "WHO"); 3689 } 3690 } 3691 3692 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3693 mdb_warn("couldn't read vmem_seg at %p", addr); 3694 return (DCMD_ERR); 3695 } 3696 3697 if (type != NULL) { 3698 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3699 t = VMEM_ALLOC; 3700 else if (strcmp(type, "FREE") == 0) 3701 t = VMEM_FREE; 3702 else if (strcmp(type, "SPAN") == 0) 3703 t = VMEM_SPAN; 3704 else if (strcmp(type, "ROTR") == 0 || 3705 strcmp(type, "ROTOR") == 0) 3706 t = VMEM_ROTOR; 3707 else if (strcmp(type, "WLKR") == 0 || 3708 strcmp(type, "WALKER") == 0) 3709 t = VMEM_WALKER; 3710 else { 3711 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3712 type); 3713 return (DCMD_ERR); 3714 } 3715 3716 if (vs.vs_type != t) 3717 return (DCMD_OK); 3718 } 3719 3720 sz = vs.vs_end - vs.vs_start; 3721 3722 if (minsize != 0 && sz < minsize) 3723 return (DCMD_OK); 3724 3725 if (maxsize != 0 && sz > maxsize) 3726 return (DCMD_OK); 3727 3728 t = vs.vs_type; 3729 depth = vs.vs_depth; 3730 3731 /* 3732 * debug info, when present, is only accurate for VMEM_ALLOC segments 3733 */ 3734 no_debug = (t != VMEM_ALLOC) || 3735 (depth == 0 || depth > VMEM_STACK_DEPTH); 3736 3737 if (no_debug) { 3738 if (caller != NULL || thread != NULL || earliest != 0 || 3739 latest != 0) 3740 return (DCMD_OK); /* not enough info */ 3741 } else { 3742 if (caller != NULL) { 3743 laddr = caller; 3744 haddr = caller + sizeof (caller); 3745 3746 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3747 sizeof (c), &sym) != -1 && 3748 caller == (uintptr_t)sym.st_value) { 3749 /* 3750 * We were provided an exact symbol value; any 3751 * address in the function is valid. 3752 */ 3753 laddr = (uintptr_t)sym.st_value; 3754 haddr = (uintptr_t)sym.st_value + sym.st_size; 3755 } 3756 3757 for (i = 0; i < depth; i++) 3758 if (vs.vs_stack[i] >= laddr && 3759 vs.vs_stack[i] < haddr) 3760 break; 3761 3762 if (i == depth) 3763 return (DCMD_OK); 3764 } 3765 3766 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3767 return (DCMD_OK); 3768 3769 if (earliest != 0 && vs.vs_timestamp < earliest) 3770 return (DCMD_OK); 3771 3772 if (latest != 0 && vs.vs_timestamp > latest) 3773 return (DCMD_OK); 3774 } 3775 3776 type = (t == VMEM_ALLOC ? "ALLC" : 3777 t == VMEM_FREE ? "FREE" : 3778 t == VMEM_SPAN ? "SPAN" : 3779 t == VMEM_ROTOR ? "ROTR" : 3780 t == VMEM_WALKER ? "WLKR" : 3781 "????"); 3782 3783 if (flags & DCMD_PIPE_OUT) { 3784 mdb_printf("%#lr\n", addr); 3785 return (DCMD_OK); 3786 } 3787 3788 if (verbose) { 3789 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3790 addr, type, vs.vs_start, vs.vs_end, sz); 3791 3792 if (no_debug) 3793 return (DCMD_OK); 3794 3795 mdb_printf("%16s %4s %16p %16llx\n", 3796 "", "", vs.vs_thread, vs.vs_timestamp); 3797 3798 mdb_inc_indent(17); 3799 for (i = 0; i < depth; i++) { 3800 mdb_printf("%a\n", stk[i]); 3801 } 3802 mdb_dec_indent(17); 3803 mdb_printf("\n"); 3804 } else { 3805 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3806 vs.vs_start, size? sz : vs.vs_end); 3807 3808 if (no_debug) { 3809 mdb_printf("\n"); 3810 return (DCMD_OK); 3811 } 3812 3813 for (i = 0; i < depth; i++) { 3814 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3815 c, sizeof (c), &sym) == -1) 3816 continue; 3817 if (strncmp(c, "vmem_", 5) == 0) 3818 continue; 3819 break; 3820 } 3821 mdb_printf(" %a\n", stk[i]); 3822 } 3823 return (DCMD_OK); 3824 } 3825 3826 typedef struct kmalog_data { 3827 uintptr_t kma_addr; 3828 hrtime_t kma_newest; 3829 } kmalog_data_t; 3830 3831 /*ARGSUSED*/ 3832 static int 3833 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3834 { 3835 char name[KMEM_CACHE_NAMELEN + 1]; 3836 hrtime_t delta; 3837 int i, depth; 3838 size_t bufsize; 3839 3840 if (bcp->bc_timestamp == 0) 3841 return (WALK_DONE); 3842 3843 if (kma->kma_newest == 0) 3844 kma->kma_newest = bcp->bc_timestamp; 3845 3846 if (kma->kma_addr) { 3847 if (mdb_vread(&bufsize, sizeof (bufsize), 3848 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3849 mdb_warn( 3850 "failed to read cache_bufsize for cache at %p", 3851 bcp->bc_cache); 3852 return (WALK_ERR); 3853 } 3854 3855 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3856 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3857 return (WALK_NEXT); 3858 } 3859 3860 delta = kma->kma_newest - bcp->bc_timestamp; 3861 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3862 3863 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3864 &bcp->bc_cache->cache_name) <= 0) 3865 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3866 3867 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3868 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3869 3870 for (i = 0; i < depth; i++) 3871 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3872 3873 return (WALK_NEXT); 3874 } 3875 3876 int 3877 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3878 { 3879 const char *logname = "kmem_transaction_log"; 3880 kmalog_data_t kma; 3881 3882 if (argc > 1) 3883 return (DCMD_USAGE); 3884 3885 kma.kma_newest = 0; 3886 if (flags & DCMD_ADDRSPEC) 3887 kma.kma_addr = addr; 3888 else 3889 kma.kma_addr = NULL; 3890 3891 if (argc > 0) { 3892 if (argv->a_type != MDB_TYPE_STRING) 3893 return (DCMD_USAGE); 3894 if (strcmp(argv->a_un.a_str, "fail") == 0) 3895 logname = "kmem_failure_log"; 3896 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3897 logname = "kmem_slab_log"; 3898 else 3899 return (DCMD_USAGE); 3900 } 3901 3902 if (mdb_readvar(&addr, logname) == -1) { 3903 mdb_warn("failed to read %s log header pointer"); 3904 return (DCMD_ERR); 3905 } 3906 3907 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3908 mdb_warn("failed to walk kmem log"); 3909 return (DCMD_ERR); 3910 } 3911 3912 return (DCMD_OK); 3913 } 3914 3915 /* 3916 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3917 * The first piece is a structure which we use to accumulate kmem_cache_t 3918 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3919 * walker; we either add all caches, or ones named explicitly as arguments. 3920 */ 3921 3922 typedef struct kmclist { 3923 const char *kmc_name; /* Name to match (or NULL) */ 3924 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3925 int kmc_nelems; /* Num entries in kmc_caches */ 3926 int kmc_size; /* Size of kmc_caches array */ 3927 } kmclist_t; 3928 3929 static int 3930 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3931 { 3932 void *p; 3933 int s; 3934 3935 if (kmc->kmc_name == NULL || 3936 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3937 /* 3938 * If we have a match, grow our array (if necessary), and then 3939 * add the virtual address of the matching cache to our list. 3940 */ 3941 if (kmc->kmc_nelems >= kmc->kmc_size) { 3942 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3943 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3944 3945 bcopy(kmc->kmc_caches, p, 3946 sizeof (uintptr_t) * kmc->kmc_size); 3947 3948 kmc->kmc_caches = p; 3949 kmc->kmc_size = s; 3950 } 3951 3952 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3953 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3954 } 3955 3956 return (WALK_NEXT); 3957 } 3958 3959 /* 3960 * The second piece of ::kmausers is a hash table of allocations. Each 3961 * allocation owner is identified by its stack trace and data_size. We then 3962 * track the total bytes of all such allocations, and the number of allocations 3963 * to report at the end. Once we have a list of caches, we walk through the 3964 * allocated bufctls of each, and update our hash table accordingly. 3965 */ 3966 3967 typedef struct kmowner { 3968 struct kmowner *kmo_head; /* First hash elt in bucket */ 3969 struct kmowner *kmo_next; /* Next hash elt in chain */ 3970 size_t kmo_signature; /* Hash table signature */ 3971 uint_t kmo_num; /* Number of allocations */ 3972 size_t kmo_data_size; /* Size of each allocation */ 3973 size_t kmo_total_size; /* Total bytes of allocation */ 3974 int kmo_depth; /* Depth of stack trace */ 3975 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3976 } kmowner_t; 3977 3978 typedef struct kmusers { 3979 uintptr_t kmu_addr; /* address of interest */ 3980 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3981 kmowner_t *kmu_hash; /* Hash table of owners */ 3982 int kmu_nelems; /* Number of entries in use */ 3983 int kmu_size; /* Total number of entries */ 3984 } kmusers_t; 3985 3986 static void 3987 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3988 size_t size, size_t data_size) 3989 { 3990 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3991 size_t bucket, signature = data_size; 3992 kmowner_t *kmo, *kmoend; 3993 3994 /* 3995 * If the hash table is full, double its size and rehash everything. 3996 */ 3997 if (kmu->kmu_nelems >= kmu->kmu_size) { 3998 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3999 4000 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 4001 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 4002 kmu->kmu_hash = kmo; 4003 kmu->kmu_size = s; 4004 4005 kmoend = kmu->kmu_hash + kmu->kmu_size; 4006 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 4007 kmo->kmo_head = NULL; 4008 4009 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 4010 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 4011 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 4012 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4013 kmu->kmu_hash[bucket].kmo_head = kmo; 4014 } 4015 } 4016 4017 /* 4018 * Finish computing the hash signature from the stack trace, and then 4019 * see if the owner is in the hash table. If so, update our stats. 4020 */ 4021 for (i = 0; i < depth; i++) 4022 signature += bcp->bc_stack[i]; 4023 4024 bucket = signature & (kmu->kmu_size - 1); 4025 4026 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4027 if (kmo->kmo_signature == signature) { 4028 size_t difference = 0; 4029 4030 difference |= kmo->kmo_data_size - data_size; 4031 difference |= kmo->kmo_depth - depth; 4032 4033 for (i = 0; i < depth; i++) { 4034 difference |= kmo->kmo_stack[i] - 4035 bcp->bc_stack[i]; 4036 } 4037 4038 if (difference == 0) { 4039 kmo->kmo_total_size += size; 4040 kmo->kmo_num++; 4041 return; 4042 } 4043 } 4044 } 4045 4046 /* 4047 * If the owner is not yet hashed, grab the next element and fill it 4048 * in based on the allocation information. 4049 */ 4050 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4051 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4052 kmu->kmu_hash[bucket].kmo_head = kmo; 4053 4054 kmo->kmo_signature = signature; 4055 kmo->kmo_num = 1; 4056 kmo->kmo_data_size = data_size; 4057 kmo->kmo_total_size = size; 4058 kmo->kmo_depth = depth; 4059 4060 for (i = 0; i < depth; i++) 4061 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4062 } 4063 4064 /* 4065 * When ::kmausers is invoked without the -f flag, we simply update our hash 4066 * table with the information from each allocated bufctl. 4067 */ 4068 /*ARGSUSED*/ 4069 static int 4070 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4071 { 4072 const kmem_cache_t *cp = kmu->kmu_cache; 4073 4074 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4075 return (WALK_NEXT); 4076 } 4077 4078 /* 4079 * When ::kmausers is invoked with the -f flag, we print out the information 4080 * for each bufctl as well as updating the hash table. 4081 */ 4082 static int 4083 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4084 { 4085 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4086 const kmem_cache_t *cp = kmu->kmu_cache; 4087 kmem_bufctl_t bufctl; 4088 4089 if (kmu->kmu_addr) { 4090 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4091 mdb_warn("couldn't read bufctl at %p", addr); 4092 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4093 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4094 cp->cache_bufsize) 4095 return (WALK_NEXT); 4096 } 4097 4098 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4099 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4100 4101 for (i = 0; i < depth; i++) 4102 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4103 4104 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4105 return (WALK_NEXT); 4106 } 4107 4108 /* 4109 * We sort our results by allocation size before printing them. 4110 */ 4111 static int 4112 kmownercmp(const void *lp, const void *rp) 4113 { 4114 const kmowner_t *lhs = lp; 4115 const kmowner_t *rhs = rp; 4116 4117 return (rhs->kmo_total_size - lhs->kmo_total_size); 4118 } 4119 4120 /* 4121 * The main engine of ::kmausers is relatively straightforward: First we 4122 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4123 * iterate over the allocated bufctls of each cache in the list. Finally, 4124 * we sort and print our results. 4125 */ 4126 /*ARGSUSED*/ 4127 int 4128 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4129 { 4130 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4131 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4132 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4133 int do_all_caches = 1; /* Do all caches (no arguments) */ 4134 int opt_e = FALSE; /* Include "small" users */ 4135 int opt_f = FALSE; /* Print stack traces */ 4136 4137 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4138 kmowner_t *kmo, *kmoend; 4139 int i, oelems; 4140 4141 kmclist_t kmc; 4142 kmusers_t kmu; 4143 4144 bzero(&kmc, sizeof (kmc)); 4145 bzero(&kmu, sizeof (kmu)); 4146 4147 while ((i = mdb_getopts(argc, argv, 4148 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4149 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4150 4151 argv += i; /* skip past options we just processed */ 4152 argc -= i; /* adjust argc */ 4153 4154 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4155 return (DCMD_USAGE); 4156 4157 oelems = kmc.kmc_nelems; 4158 kmc.kmc_name = argv->a_un.a_str; 4159 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4160 4161 if (kmc.kmc_nelems == oelems) { 4162 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4163 return (DCMD_ERR); 4164 } 4165 4166 do_all_caches = 0; 4167 argv++; 4168 argc--; 4169 } 4170 4171 if (flags & DCMD_ADDRSPEC) { 4172 opt_f = TRUE; 4173 kmu.kmu_addr = addr; 4174 } else { 4175 kmu.kmu_addr = NULL; 4176 } 4177 4178 if (opt_e) 4179 mem_threshold = cnt_threshold = 0; 4180 4181 if (opt_f) 4182 callback = (mdb_walk_cb_t)kmause2; 4183 4184 if (do_all_caches) { 4185 kmc.kmc_name = NULL; /* match all cache names */ 4186 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4187 } 4188 4189 for (i = 0; i < kmc.kmc_nelems; i++) { 4190 uintptr_t cp = kmc.kmc_caches[i]; 4191 kmem_cache_t c; 4192 4193 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4194 mdb_warn("failed to read cache at %p", cp); 4195 continue; 4196 } 4197 4198 if (!(c.cache_flags & KMF_AUDIT)) { 4199 if (!do_all_caches) { 4200 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4201 c.cache_name); 4202 } 4203 continue; 4204 } 4205 4206 kmu.kmu_cache = &c; 4207 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4208 audited_caches++; 4209 } 4210 4211 if (audited_caches == 0 && do_all_caches) { 4212 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4213 return (DCMD_ERR); 4214 } 4215 4216 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4217 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4218 4219 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4220 if (kmo->kmo_total_size < mem_threshold && 4221 kmo->kmo_num < cnt_threshold) 4222 continue; 4223 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4224 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4225 for (i = 0; i < kmo->kmo_depth; i++) 4226 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4227 } 4228 4229 return (DCMD_OK); 4230 } 4231 4232 void 4233 kmausers_help(void) 4234 { 4235 mdb_printf( 4236 "Displays the largest users of the kmem allocator, sorted by \n" 4237 "trace. If one or more caches is specified, only those caches\n" 4238 "will be searched. By default, all caches are searched. If an\n" 4239 "address is specified, then only those allocations which include\n" 4240 "the given address are displayed. Specifying an address implies\n" 4241 "-f.\n" 4242 "\n" 4243 "\t-e\tInclude all users, not just the largest\n" 4244 "\t-f\tDisplay individual allocations. By default, users are\n" 4245 "\t\tgrouped by stack\n"); 4246 } 4247 4248 static int 4249 kmem_ready_check(void) 4250 { 4251 int ready; 4252 4253 if (mdb_readvar(&ready, "kmem_ready") < 0) 4254 return (-1); /* errno is set for us */ 4255 4256 return (ready); 4257 } 4258 4259 /*ARGSUSED*/ 4260 static void 4261 kmem_statechange_cb(void *arg) 4262 { 4263 static int been_ready = 0; 4264 4265 leaky_cleanup(1); /* state changes invalidate leaky state */ 4266 4267 if (been_ready) 4268 return; 4269 4270 if (kmem_ready_check() <= 0) 4271 return; 4272 4273 been_ready = 1; 4274 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4275 } 4276 4277 void 4278 kmem_init(void) 4279 { 4280 mdb_walker_t w = { 4281 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4282 list_walk_step, list_walk_fini 4283 }; 4284 4285 /* 4286 * If kmem is ready, we'll need to invoke the kmem_cache walker 4287 * immediately. Walkers in the linkage structure won't be ready until 4288 * _mdb_init returns, so we'll need to add this one manually. If kmem 4289 * is ready, we'll use the walker to initialize the caches. If kmem 4290 * isn't ready, we'll register a callback that will allow us to defer 4291 * cache walking until it is. 4292 */ 4293 if (mdb_add_walker(&w) != 0) { 4294 mdb_warn("failed to add kmem_cache walker"); 4295 return; 4296 } 4297 4298 (void) mdb_callback_add(MDB_CALLBACK_STCHG, kmem_statechange_cb, NULL); 4299 kmem_statechange_cb(NULL); 4300 } 4301 4302 typedef struct whatthread { 4303 uintptr_t wt_target; 4304 int wt_verbose; 4305 } whatthread_t; 4306 4307 static int 4308 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4309 { 4310 uintptr_t current, data; 4311 4312 if (t->t_stkbase == NULL) 4313 return (WALK_NEXT); 4314 4315 /* 4316 * Warn about swapped out threads, but drive on anyway 4317 */ 4318 if (!(t->t_schedflag & TS_LOAD)) { 4319 mdb_warn("thread %p's stack swapped out\n", addr); 4320 return (WALK_NEXT); 4321 } 4322 4323 /* 4324 * Search the thread's stack for the given pointer. Note that it would 4325 * be more efficient to follow ::kgrep's lead and read in page-sized 4326 * chunks, but this routine is already fast and simple. 4327 */ 4328 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4329 current += sizeof (uintptr_t)) { 4330 if (mdb_vread(&data, sizeof (data), current) == -1) { 4331 mdb_warn("couldn't read thread %p's stack at %p", 4332 addr, current); 4333 return (WALK_ERR); 4334 } 4335 4336 if (data == w->wt_target) { 4337 if (w->wt_verbose) { 4338 mdb_printf("%p in thread %p's stack%s\n", 4339 current, addr, stack_active(t, current)); 4340 } else { 4341 mdb_printf("%#lr\n", addr); 4342 return (WALK_NEXT); 4343 } 4344 } 4345 } 4346 4347 return (WALK_NEXT); 4348 } 4349 4350 int 4351 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4352 { 4353 whatthread_t w; 4354 4355 if (!(flags & DCMD_ADDRSPEC)) 4356 return (DCMD_USAGE); 4357 4358 w.wt_verbose = FALSE; 4359 w.wt_target = addr; 4360 4361 if (mdb_getopts(argc, argv, 4362 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4363 return (DCMD_USAGE); 4364 4365 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4366 == -1) { 4367 mdb_warn("couldn't walk threads"); 4368 return (DCMD_ERR); 4369 } 4370 4371 return (DCMD_OK); 4372 } 4373