1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_param.h> 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ctf.h> 29 #include <sys/cpuvar.h> 30 #include <sys/kmem_impl.h> 31 #include <sys/vmem_impl.h> 32 #include <sys/machelf.h> 33 #include <sys/modctl.h> 34 #include <sys/kobj.h> 35 #include <sys/panic.h> 36 #include <sys/stack.h> 37 #include <sys/sysmacros.h> 38 #include <vm/page.h> 39 40 #include "avl.h" 41 #include "combined.h" 42 #include "dist.h" 43 #include "kmem.h" 44 #include "list.h" 45 46 #define dprintf(x) if (mdb_debug_level) { \ 47 mdb_printf("kmem debug: "); \ 48 /*CSTYLED*/\ 49 mdb_printf x ;\ 50 } 51 52 #define KM_ALLOCATED 0x01 53 #define KM_FREE 0x02 54 #define KM_BUFCTL 0x04 55 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 56 #define KM_HASH 0x10 57 58 static int mdb_debug_level = 0; 59 60 /*ARGSUSED*/ 61 static int 62 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 63 { 64 mdb_walker_t w; 65 char descr[64]; 66 67 (void) mdb_snprintf(descr, sizeof (descr), 68 "walk the %s cache", c->cache_name); 69 70 w.walk_name = c->cache_name; 71 w.walk_descr = descr; 72 w.walk_init = kmem_walk_init; 73 w.walk_step = kmem_walk_step; 74 w.walk_fini = kmem_walk_fini; 75 w.walk_init_arg = (void *)addr; 76 77 if (mdb_add_walker(&w) == -1) 78 mdb_warn("failed to add %s walker", c->cache_name); 79 80 return (WALK_NEXT); 81 } 82 83 /*ARGSUSED*/ 84 int 85 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 86 { 87 mdb_debug_level ^= 1; 88 89 mdb_printf("kmem: debugging is now %s\n", 90 mdb_debug_level ? "on" : "off"); 91 92 return (DCMD_OK); 93 } 94 95 int 96 kmem_cache_walk_init(mdb_walk_state_t *wsp) 97 { 98 GElf_Sym sym; 99 100 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 101 mdb_warn("couldn't find kmem_caches"); 102 return (WALK_ERR); 103 } 104 105 wsp->walk_addr = (uintptr_t)sym.st_value; 106 107 return (list_walk_init_named(wsp, "cache list", "cache")); 108 } 109 110 int 111 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 112 { 113 if (wsp->walk_addr == NULL) { 114 mdb_warn("kmem_cpu_cache doesn't support global walks"); 115 return (WALK_ERR); 116 } 117 118 if (mdb_layered_walk("cpu", wsp) == -1) { 119 mdb_warn("couldn't walk 'cpu'"); 120 return (WALK_ERR); 121 } 122 123 wsp->walk_data = (void *)wsp->walk_addr; 124 125 return (WALK_NEXT); 126 } 127 128 int 129 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 130 { 131 uintptr_t caddr = (uintptr_t)wsp->walk_data; 132 const cpu_t *cpu = wsp->walk_layer; 133 kmem_cpu_cache_t cc; 134 135 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]); 136 137 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 138 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 139 return (WALK_ERR); 140 } 141 142 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 143 } 144 145 static int 146 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 147 { 148 kmem_slab_t *sp = p; 149 uintptr_t caddr = (uintptr_t)arg; 150 if ((uintptr_t)sp->slab_cache != caddr) { 151 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 152 saddr, caddr, sp->slab_cache); 153 return (-1); 154 } 155 156 return (0); 157 } 158 159 static int 160 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 161 { 162 kmem_slab_t *sp = p; 163 164 int rc = kmem_slab_check(p, saddr, arg); 165 if (rc != 0) { 166 return (rc); 167 } 168 169 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 170 mdb_warn("slab %p is not a partial slab\n", saddr); 171 return (-1); 172 } 173 174 return (0); 175 } 176 177 static int 178 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 179 { 180 kmem_slab_t *sp = p; 181 182 int rc = kmem_slab_check(p, saddr, arg); 183 if (rc != 0) { 184 return (rc); 185 } 186 187 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 188 mdb_warn("slab %p is not completely allocated\n", saddr); 189 return (-1); 190 } 191 192 return (0); 193 } 194 195 typedef struct { 196 uintptr_t kns_cache_addr; 197 int kns_nslabs; 198 } kmem_nth_slab_t; 199 200 static int 201 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 202 { 203 kmem_nth_slab_t *chkp = arg; 204 205 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 206 if (rc != 0) { 207 return (rc); 208 } 209 210 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 211 } 212 213 static int 214 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 215 { 216 uintptr_t caddr = wsp->walk_addr; 217 218 wsp->walk_addr = (uintptr_t)(caddr + 219 offsetof(kmem_cache_t, cache_complete_slabs)); 220 221 return (list_walk_init_checked(wsp, "slab list", "slab", 222 kmem_complete_slab_check, (void *)caddr)); 223 } 224 225 static int 226 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 227 { 228 uintptr_t caddr = wsp->walk_addr; 229 230 wsp->walk_addr = (uintptr_t)(caddr + 231 offsetof(kmem_cache_t, cache_partial_slabs)); 232 233 return (avl_walk_init_checked(wsp, "slab list", "slab", 234 kmem_partial_slab_check, (void *)caddr)); 235 } 236 237 int 238 kmem_slab_walk_init(mdb_walk_state_t *wsp) 239 { 240 uintptr_t caddr = wsp->walk_addr; 241 242 if (caddr == NULL) { 243 mdb_warn("kmem_slab doesn't support global walks\n"); 244 return (WALK_ERR); 245 } 246 247 combined_walk_init(wsp); 248 combined_walk_add(wsp, 249 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 250 combined_walk_add(wsp, 251 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 252 253 return (WALK_NEXT); 254 } 255 256 static int 257 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 258 { 259 uintptr_t caddr = wsp->walk_addr; 260 kmem_nth_slab_t *chk; 261 262 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 263 UM_SLEEP | UM_GC); 264 chk->kns_cache_addr = caddr; 265 chk->kns_nslabs = 1; 266 wsp->walk_addr = (uintptr_t)(caddr + 267 offsetof(kmem_cache_t, cache_complete_slabs)); 268 269 return (list_walk_init_checked(wsp, "slab list", "slab", 270 kmem_nth_slab_check, chk)); 271 } 272 273 int 274 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 275 { 276 uintptr_t caddr = wsp->walk_addr; 277 kmem_cache_t c; 278 279 if (caddr == NULL) { 280 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 281 return (WALK_ERR); 282 } 283 284 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 285 mdb_warn("couldn't read kmem_cache at %p", caddr); 286 return (WALK_ERR); 287 } 288 289 combined_walk_init(wsp); 290 291 /* 292 * Some consumers (umem_walk_step(), in particular) require at 293 * least one callback if there are any buffers in the cache. So 294 * if there are *no* partial slabs, report the first full slab, if 295 * any. 296 * 297 * Yes, this is ugly, but it's cleaner than the other possibilities. 298 */ 299 if (c.cache_partial_slabs.avl_numnodes == 0) { 300 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 301 list_walk_step, list_walk_fini); 302 } else { 303 combined_walk_add(wsp, kmem_partial_slab_walk_init, 304 avl_walk_step, avl_walk_fini); 305 } 306 307 return (WALK_NEXT); 308 } 309 310 int 311 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 312 { 313 kmem_cache_t c; 314 const char *filter = NULL; 315 316 if (mdb_getopts(ac, argv, 317 'n', MDB_OPT_STR, &filter, 318 NULL) != ac) { 319 return (DCMD_USAGE); 320 } 321 322 if (!(flags & DCMD_ADDRSPEC)) { 323 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 324 mdb_warn("can't walk kmem_cache"); 325 return (DCMD_ERR); 326 } 327 return (DCMD_OK); 328 } 329 330 if (DCMD_HDRSPEC(flags)) 331 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 332 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 333 334 if (mdb_vread(&c, sizeof (c), addr) == -1) { 335 mdb_warn("couldn't read kmem_cache at %p", addr); 336 return (DCMD_ERR); 337 } 338 339 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 340 return (DCMD_OK); 341 342 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 343 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 344 345 return (DCMD_OK); 346 } 347 348 void 349 kmem_cache_help(void) 350 { 351 mdb_printf("%s", "Print kernel memory caches.\n\n"); 352 mdb_dec_indent(2); 353 mdb_printf("%<b>OPTIONS%</b>\n"); 354 mdb_inc_indent(2); 355 mdb_printf("%s", 356 " -n name\n" 357 " name of kmem cache (or matching partial name)\n" 358 "\n" 359 "Column\tDescription\n" 360 "\n" 361 "ADDR\t\taddress of kmem cache\n" 362 "NAME\t\tname of kmem cache\n" 363 "FLAG\t\tvarious cache state flags\n" 364 "CFLAG\t\tcache creation flags\n" 365 "BUFSIZE\tobject size in bytes\n" 366 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 367 } 368 369 #define LABEL_WIDTH 11 370 static void 371 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 372 size_t maxbuckets, size_t minbucketsize) 373 { 374 uint64_t total; 375 int buckets; 376 int i; 377 const int *distarray; 378 int complete[2]; 379 380 buckets = buffers_per_slab; 381 382 total = 0; 383 for (i = 0; i <= buffers_per_slab; i++) 384 total += ks_bucket[i]; 385 386 if (maxbuckets > 1) 387 buckets = MIN(buckets, maxbuckets); 388 389 if (minbucketsize > 1) { 390 /* 391 * minbucketsize does not apply to the first bucket reserved 392 * for completely allocated slabs 393 */ 394 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 395 minbucketsize)); 396 if ((buckets < 2) && (buffers_per_slab > 1)) { 397 buckets = 2; 398 minbucketsize = (buffers_per_slab - 1); 399 } 400 } 401 402 /* 403 * The first printed bucket is reserved for completely allocated slabs. 404 * Passing (buckets - 1) excludes that bucket from the generated 405 * distribution, since we're handling it as a special case. 406 */ 407 complete[0] = buffers_per_slab; 408 complete[1] = buffers_per_slab + 1; 409 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 410 411 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 412 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 413 414 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 415 /* 416 * Print bucket ranges in descending order after the first bucket for 417 * completely allocated slabs, so a person can see immediately whether 418 * or not there is fragmentation without having to scan possibly 419 * multiple screens of output. Starting at (buckets - 2) excludes the 420 * extra terminating bucket. 421 */ 422 for (i = buckets - 2; i >= 0; i--) { 423 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 424 } 425 mdb_printf("\n"); 426 } 427 #undef LABEL_WIDTH 428 429 /*ARGSUSED*/ 430 static int 431 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 432 { 433 *is_slab = B_TRUE; 434 return (WALK_DONE); 435 } 436 437 /*ARGSUSED*/ 438 static int 439 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 440 boolean_t *is_slab) 441 { 442 /* 443 * The "kmem_partial_slab" walker reports the first full slab if there 444 * are no partial slabs (for the sake of consumers that require at least 445 * one callback if there are any buffers in the cache). 446 */ 447 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 448 return (WALK_DONE); 449 } 450 451 typedef struct kmem_slab_usage { 452 int ksu_refcnt; /* count of allocated buffers on slab */ 453 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 454 } kmem_slab_usage_t; 455 456 typedef struct kmem_slab_stats { 457 const kmem_cache_t *ks_cp; 458 int ks_slabs; /* slabs in cache */ 459 int ks_partial_slabs; /* partially allocated slabs in cache */ 460 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 461 int ks_max_buffers_per_slab; /* max buffers per slab */ 462 int ks_usage_len; /* ks_usage array length */ 463 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 464 uint_t *ks_bucket; /* slab usage distribution */ 465 } kmem_slab_stats_t; 466 467 /*ARGSUSED*/ 468 static int 469 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 470 kmem_slab_stats_t *ks) 471 { 472 kmem_slab_usage_t *ksu; 473 long unused; 474 475 ks->ks_slabs++; 476 ks->ks_bucket[sp->slab_refcnt]++; 477 478 unused = (sp->slab_chunks - sp->slab_refcnt); 479 if (unused == 0) { 480 return (WALK_NEXT); 481 } 482 483 ks->ks_partial_slabs++; 484 ks->ks_unused_buffers += unused; 485 486 if (ks->ks_partial_slabs > ks->ks_usage_len) { 487 kmem_slab_usage_t *usage; 488 int len = ks->ks_usage_len; 489 490 len = (len == 0 ? 16 : len * 2); 491 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 492 if (ks->ks_usage != NULL) { 493 bcopy(ks->ks_usage, usage, 494 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 495 mdb_free(ks->ks_usage, 496 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 497 } 498 ks->ks_usage = usage; 499 ks->ks_usage_len = len; 500 } 501 502 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 503 ksu->ksu_refcnt = sp->slab_refcnt; 504 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 505 return (WALK_NEXT); 506 } 507 508 static void 509 kmem_slabs_header() 510 { 511 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 512 "", "", "Partial", "", "Unused", ""); 513 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 514 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 515 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 516 "-------------------------", "--------", "--------", "---------", 517 "---------", "------"); 518 } 519 520 int 521 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 522 { 523 kmem_cache_t c; 524 kmem_slab_stats_t stats; 525 mdb_walk_cb_t cb; 526 int pct; 527 int tenths_pct; 528 size_t maxbuckets = 1; 529 size_t minbucketsize = 0; 530 const char *filter = NULL; 531 const char *name = NULL; 532 uint_t opt_v = FALSE; 533 boolean_t buckets = B_FALSE; 534 boolean_t skip = B_FALSE; 535 536 if (mdb_getopts(argc, argv, 537 'B', MDB_OPT_UINTPTR, &minbucketsize, 538 'b', MDB_OPT_UINTPTR, &maxbuckets, 539 'n', MDB_OPT_STR, &filter, 540 'N', MDB_OPT_STR, &name, 541 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 542 NULL) != argc) { 543 return (DCMD_USAGE); 544 } 545 546 if ((maxbuckets != 1) || (minbucketsize != 0)) { 547 buckets = B_TRUE; 548 } 549 550 if (!(flags & DCMD_ADDRSPEC)) { 551 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 552 argv) == -1) { 553 mdb_warn("can't walk kmem_cache"); 554 return (DCMD_ERR); 555 } 556 return (DCMD_OK); 557 } 558 559 if (mdb_vread(&c, sizeof (c), addr) == -1) { 560 mdb_warn("couldn't read kmem_cache at %p", addr); 561 return (DCMD_ERR); 562 } 563 564 if (name == NULL) { 565 skip = ((filter != NULL) && 566 (strstr(c.cache_name, filter) == NULL)); 567 } else if (filter == NULL) { 568 skip = (strcmp(c.cache_name, name) != 0); 569 } else { 570 /* match either -n or -N */ 571 skip = ((strcmp(c.cache_name, name) != 0) && 572 (strstr(c.cache_name, filter) == NULL)); 573 } 574 575 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 576 kmem_slabs_header(); 577 } else if ((opt_v || buckets) && !skip) { 578 if (DCMD_HDRSPEC(flags)) { 579 kmem_slabs_header(); 580 } else { 581 boolean_t is_slab = B_FALSE; 582 const char *walker_name; 583 if (opt_v) { 584 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 585 walker_name = "kmem_slab_partial"; 586 } else { 587 cb = (mdb_walk_cb_t)kmem_first_slab; 588 walker_name = "kmem_slab"; 589 } 590 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 591 if (is_slab) { 592 kmem_slabs_header(); 593 } 594 } 595 } 596 597 if (skip) { 598 return (DCMD_OK); 599 } 600 601 bzero(&stats, sizeof (kmem_slab_stats_t)); 602 stats.ks_cp = &c; 603 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 604 /* +1 to include a zero bucket */ 605 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 606 sizeof (*stats.ks_bucket), UM_SLEEP); 607 cb = (mdb_walk_cb_t)kmem_slablist_stat; 608 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 609 610 if (c.cache_buftotal == 0) { 611 pct = 0; 612 tenths_pct = 0; 613 } else { 614 uint64_t n = stats.ks_unused_buffers * 10000; 615 pct = (int)(n / c.cache_buftotal); 616 tenths_pct = pct - ((pct / 100) * 100); 617 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 618 if (tenths_pct == 10) { 619 pct += 100; 620 tenths_pct = 0; 621 } 622 } 623 624 pct /= 100; 625 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 626 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 627 stats.ks_unused_buffers, pct, tenths_pct); 628 629 if (maxbuckets == 0) { 630 maxbuckets = stats.ks_max_buffers_per_slab; 631 } 632 633 if (((maxbuckets > 1) || (minbucketsize > 0)) && 634 (stats.ks_slabs > 0)) { 635 mdb_printf("\n"); 636 kmem_slabs_print_dist(stats.ks_bucket, 637 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 638 } 639 640 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 641 sizeof (*stats.ks_bucket)); 642 643 if (!opt_v) { 644 return (DCMD_OK); 645 } 646 647 if (opt_v && (stats.ks_partial_slabs > 0)) { 648 int i; 649 kmem_slab_usage_t *ksu; 650 651 mdb_printf(" %d complete (%d), %d partial:", 652 (stats.ks_slabs - stats.ks_partial_slabs), 653 stats.ks_max_buffers_per_slab, 654 stats.ks_partial_slabs); 655 656 for (i = 0; i < stats.ks_partial_slabs; i++) { 657 ksu = &stats.ks_usage[i]; 658 mdb_printf(" %d%s", ksu->ksu_refcnt, 659 (ksu->ksu_nomove ? "*" : "")); 660 } 661 mdb_printf("\n\n"); 662 } 663 664 if (stats.ks_usage_len > 0) { 665 mdb_free(stats.ks_usage, 666 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 667 } 668 669 return (DCMD_OK); 670 } 671 672 void 673 kmem_slabs_help(void) 674 { 675 mdb_printf("%s", 676 "Display slab usage per kmem cache.\n\n"); 677 mdb_dec_indent(2); 678 mdb_printf("%<b>OPTIONS%</b>\n"); 679 mdb_inc_indent(2); 680 mdb_printf("%s", 681 " -n name\n" 682 " name of kmem cache (or matching partial name)\n" 683 " -N name\n" 684 " exact name of kmem cache\n" 685 " -b maxbins\n" 686 " Print a distribution of allocated buffers per slab using at\n" 687 " most maxbins bins. The first bin is reserved for completely\n" 688 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 689 " effect as specifying the maximum allocated buffers per slab\n" 690 " or setting minbinsize to 1 (-B 1).\n" 691 " -B minbinsize\n" 692 " Print a distribution of allocated buffers per slab, making\n" 693 " all bins (except the first, reserved for completely allocated\n" 694 " slabs) at least minbinsize buffers apart.\n" 695 " -v verbose output: List the allocated buffer count of each partial\n" 696 " slab on the free list in order from front to back to show how\n" 697 " closely the slabs are ordered by usage. For example\n" 698 "\n" 699 " 10 complete, 3 partial (8): 7 3 1\n" 700 "\n" 701 " means there are thirteen slabs with eight buffers each, including\n" 702 " three partially allocated slabs with less than all eight buffers\n" 703 " allocated.\n" 704 "\n" 705 " Buffer allocations are always from the front of the partial slab\n" 706 " list. When a buffer is freed from a completely used slab, that\n" 707 " slab is added to the front of the partial slab list. Assuming\n" 708 " that all buffers are equally likely to be freed soon, the\n" 709 " desired order of partial slabs is most-used at the front of the\n" 710 " list and least-used at the back (as in the example above).\n" 711 " However, if a slab contains an allocated buffer that will not\n" 712 " soon be freed, it would be better for that slab to be at the\n" 713 " front where all of its buffers can be allocated. Taking a slab\n" 714 " off the partial slab list (either with all buffers freed or all\n" 715 " buffers allocated) reduces cache fragmentation.\n" 716 "\n" 717 " A slab's allocated buffer count representing a partial slab (9 in\n" 718 " the example below) may be marked as follows:\n" 719 "\n" 720 " 9* An asterisk indicates that kmem has marked the slab non-\n" 721 " reclaimable because the kmem client refused to move one of the\n" 722 " slab's buffers. Since kmem does not expect to completely free the\n" 723 " slab, it moves it to the front of the list in the hope of\n" 724 " completely allocating it instead. A slab marked with an asterisk\n" 725 " stays marked for as long as it remains on the partial slab list.\n" 726 "\n" 727 "Column\t\tDescription\n" 728 "\n" 729 "Cache Name\t\tname of kmem cache\n" 730 "Slabs\t\t\ttotal slab count\n" 731 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 732 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 733 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 734 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 735 "\t\t\t for accounting structures (debug mode), slab\n" 736 "\t\t\t coloring (incremental small offsets to stagger\n" 737 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 738 } 739 740 static int 741 addrcmp(const void *lhs, const void *rhs) 742 { 743 uintptr_t p1 = *((uintptr_t *)lhs); 744 uintptr_t p2 = *((uintptr_t *)rhs); 745 746 if (p1 < p2) 747 return (-1); 748 if (p1 > p2) 749 return (1); 750 return (0); 751 } 752 753 static int 754 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 755 { 756 const kmem_bufctl_audit_t *bcp1 = *lhs; 757 const kmem_bufctl_audit_t *bcp2 = *rhs; 758 759 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 760 return (-1); 761 762 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 763 return (1); 764 765 return (0); 766 } 767 768 typedef struct kmem_hash_walk { 769 uintptr_t *kmhw_table; 770 size_t kmhw_nelems; 771 size_t kmhw_pos; 772 kmem_bufctl_t kmhw_cur; 773 } kmem_hash_walk_t; 774 775 int 776 kmem_hash_walk_init(mdb_walk_state_t *wsp) 777 { 778 kmem_hash_walk_t *kmhw; 779 uintptr_t *hash; 780 kmem_cache_t c; 781 uintptr_t haddr, addr = wsp->walk_addr; 782 size_t nelems; 783 size_t hsize; 784 785 if (addr == NULL) { 786 mdb_warn("kmem_hash doesn't support global walks\n"); 787 return (WALK_ERR); 788 } 789 790 if (mdb_vread(&c, sizeof (c), addr) == -1) { 791 mdb_warn("couldn't read cache at addr %p", addr); 792 return (WALK_ERR); 793 } 794 795 if (!(c.cache_flags & KMF_HASH)) { 796 mdb_warn("cache %p doesn't have a hash table\n", addr); 797 return (WALK_DONE); /* nothing to do */ 798 } 799 800 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 801 kmhw->kmhw_cur.bc_next = NULL; 802 kmhw->kmhw_pos = 0; 803 804 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 805 hsize = nelems * sizeof (uintptr_t); 806 haddr = (uintptr_t)c.cache_hash_table; 807 808 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 809 if (mdb_vread(hash, hsize, haddr) == -1) { 810 mdb_warn("failed to read hash table at %p", haddr); 811 mdb_free(hash, hsize); 812 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 813 return (WALK_ERR); 814 } 815 816 wsp->walk_data = kmhw; 817 818 return (WALK_NEXT); 819 } 820 821 int 822 kmem_hash_walk_step(mdb_walk_state_t *wsp) 823 { 824 kmem_hash_walk_t *kmhw = wsp->walk_data; 825 uintptr_t addr = NULL; 826 827 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 828 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 829 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 830 break; 831 } 832 } 833 if (addr == NULL) 834 return (WALK_DONE); 835 836 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 837 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 838 return (WALK_ERR); 839 } 840 841 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 842 } 843 844 void 845 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 846 { 847 kmem_hash_walk_t *kmhw = wsp->walk_data; 848 849 if (kmhw == NULL) 850 return; 851 852 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 853 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 854 } 855 856 /* 857 * Find the address of the bufctl structure for the address 'buf' in cache 858 * 'cp', which is at address caddr, and place it in *out. 859 */ 860 static int 861 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 862 { 863 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 864 kmem_bufctl_t *bcp; 865 kmem_bufctl_t bc; 866 867 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 868 mdb_warn("unable to read hash bucket for %p in cache %p", 869 buf, caddr); 870 return (-1); 871 } 872 873 while (bcp != NULL) { 874 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 875 (uintptr_t)bcp) == -1) { 876 mdb_warn("unable to read bufctl at %p", bcp); 877 return (-1); 878 } 879 if (bc.bc_addr == buf) { 880 *out = (uintptr_t)bcp; 881 return (0); 882 } 883 bcp = bc.bc_next; 884 } 885 886 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 887 return (-1); 888 } 889 890 int 891 kmem_get_magsize(const kmem_cache_t *cp) 892 { 893 uintptr_t addr = (uintptr_t)cp->cache_magtype; 894 GElf_Sym mt_sym; 895 kmem_magtype_t mt; 896 int res; 897 898 /* 899 * if cpu 0 has a non-zero magsize, it must be correct. caches 900 * with KMF_NOMAGAZINE have disabled their magazine layers, so 901 * it is okay to return 0 for them. 902 */ 903 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 904 (cp->cache_flags & KMF_NOMAGAZINE)) 905 return (res); 906 907 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 908 mdb_warn("unable to read 'kmem_magtype'"); 909 } else if (addr < mt_sym.st_value || 910 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 911 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 912 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 913 cp->cache_name, addr); 914 return (0); 915 } 916 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 917 mdb_warn("unable to read magtype at %a", addr); 918 return (0); 919 } 920 return (mt.mt_magsize); 921 } 922 923 /*ARGSUSED*/ 924 static int 925 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 926 { 927 *est -= (sp->slab_chunks - sp->slab_refcnt); 928 929 return (WALK_NEXT); 930 } 931 932 /* 933 * Returns an upper bound on the number of allocated buffers in a given 934 * cache. 935 */ 936 size_t 937 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 938 { 939 int magsize; 940 size_t cache_est; 941 942 cache_est = cp->cache_buftotal; 943 944 (void) mdb_pwalk("kmem_slab_partial", 945 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 946 947 if ((magsize = kmem_get_magsize(cp)) != 0) { 948 size_t mag_est = cp->cache_full.ml_total * magsize; 949 950 if (cache_est >= mag_est) { 951 cache_est -= mag_est; 952 } else { 953 mdb_warn("cache %p's magazine layer holds more buffers " 954 "than the slab layer.\n", addr); 955 } 956 } 957 return (cache_est); 958 } 959 960 #define READMAG_ROUNDS(rounds) { \ 961 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 962 mdb_warn("couldn't read magazine at %p", kmp); \ 963 goto fail; \ 964 } \ 965 for (i = 0; i < rounds; i++) { \ 966 maglist[magcnt++] = mp->mag_round[i]; \ 967 if (magcnt == magmax) { \ 968 mdb_warn("%d magazines exceeds fudge factor\n", \ 969 magcnt); \ 970 goto fail; \ 971 } \ 972 } \ 973 } 974 975 int 976 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 977 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 978 { 979 kmem_magazine_t *kmp, *mp; 980 void **maglist = NULL; 981 int i, cpu; 982 size_t magsize, magmax, magbsize; 983 size_t magcnt = 0; 984 985 /* 986 * Read the magtype out of the cache, after verifying the pointer's 987 * correctness. 988 */ 989 magsize = kmem_get_magsize(cp); 990 if (magsize == 0) { 991 *maglistp = NULL; 992 *magcntp = 0; 993 *magmaxp = 0; 994 return (WALK_NEXT); 995 } 996 997 /* 998 * There are several places where we need to go buffer hunting: 999 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1000 * and the full magazine list in the depot. 1001 * 1002 * For an upper bound on the number of buffers in the magazine 1003 * layer, we have the number of magazines on the cache_full 1004 * list plus at most two magazines per CPU (the loaded and the 1005 * spare). Toss in 100 magazines as a fudge factor in case this 1006 * is live (the number "100" comes from the same fudge factor in 1007 * crash(1M)). 1008 */ 1009 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1010 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1011 1012 if (magbsize >= PAGESIZE / 2) { 1013 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1014 addr, magbsize); 1015 return (WALK_ERR); 1016 } 1017 1018 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1019 mp = mdb_alloc(magbsize, alloc_flags); 1020 if (mp == NULL || maglist == NULL) 1021 goto fail; 1022 1023 /* 1024 * First up: the magazines in the depot (i.e. on the cache_full list). 1025 */ 1026 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1027 READMAG_ROUNDS(magsize); 1028 kmp = mp->mag_next; 1029 1030 if (kmp == cp->cache_full.ml_list) 1031 break; /* cache_full list loop detected */ 1032 } 1033 1034 dprintf(("cache_full list done\n")); 1035 1036 /* 1037 * Now whip through the CPUs, snagging the loaded magazines 1038 * and full spares. 1039 */ 1040 for (cpu = 0; cpu < ncpus; cpu++) { 1041 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1042 1043 dprintf(("reading cpu cache %p\n", 1044 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1045 1046 if (ccp->cc_rounds > 0 && 1047 (kmp = ccp->cc_loaded) != NULL) { 1048 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 1049 READMAG_ROUNDS(ccp->cc_rounds); 1050 } 1051 1052 if (ccp->cc_prounds > 0 && 1053 (kmp = ccp->cc_ploaded) != NULL) { 1054 dprintf(("reading %d previously loaded rounds\n", 1055 ccp->cc_prounds)); 1056 READMAG_ROUNDS(ccp->cc_prounds); 1057 } 1058 } 1059 1060 dprintf(("magazine layer: %d buffers\n", magcnt)); 1061 1062 if (!(alloc_flags & UM_GC)) 1063 mdb_free(mp, magbsize); 1064 1065 *maglistp = maglist; 1066 *magcntp = magcnt; 1067 *magmaxp = magmax; 1068 1069 return (WALK_NEXT); 1070 1071 fail: 1072 if (!(alloc_flags & UM_GC)) { 1073 if (mp) 1074 mdb_free(mp, magbsize); 1075 if (maglist) 1076 mdb_free(maglist, magmax * sizeof (void *)); 1077 } 1078 return (WALK_ERR); 1079 } 1080 1081 static int 1082 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1083 { 1084 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1085 } 1086 1087 static int 1088 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1089 { 1090 kmem_bufctl_audit_t b; 1091 1092 /* 1093 * if KMF_AUDIT is not set, we know that we're looking at a 1094 * kmem_bufctl_t. 1095 */ 1096 if (!(cp->cache_flags & KMF_AUDIT) || 1097 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1098 (void) memset(&b, 0, sizeof (b)); 1099 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1100 mdb_warn("unable to read bufctl at %p", buf); 1101 return (WALK_ERR); 1102 } 1103 } 1104 1105 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1106 } 1107 1108 typedef struct kmem_walk { 1109 int kmw_type; 1110 1111 int kmw_addr; /* cache address */ 1112 kmem_cache_t *kmw_cp; 1113 size_t kmw_csize; 1114 1115 /* 1116 * magazine layer 1117 */ 1118 void **kmw_maglist; 1119 size_t kmw_max; 1120 size_t kmw_count; 1121 size_t kmw_pos; 1122 1123 /* 1124 * slab layer 1125 */ 1126 char *kmw_valid; /* to keep track of freed buffers */ 1127 char *kmw_ubase; /* buffer for slab data */ 1128 } kmem_walk_t; 1129 1130 static int 1131 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1132 { 1133 kmem_walk_t *kmw; 1134 int ncpus, csize; 1135 kmem_cache_t *cp; 1136 size_t vm_quantum; 1137 1138 size_t magmax, magcnt; 1139 void **maglist = NULL; 1140 uint_t chunksize, slabsize; 1141 int status = WALK_ERR; 1142 uintptr_t addr = wsp->walk_addr; 1143 const char *layered; 1144 1145 type &= ~KM_HASH; 1146 1147 if (addr == NULL) { 1148 mdb_warn("kmem walk doesn't support global walks\n"); 1149 return (WALK_ERR); 1150 } 1151 1152 dprintf(("walking %p\n", addr)); 1153 1154 /* 1155 * First we need to figure out how many CPUs are configured in the 1156 * system to know how much to slurp out. 1157 */ 1158 mdb_readvar(&ncpus, "max_ncpus"); 1159 1160 csize = KMEM_CACHE_SIZE(ncpus); 1161 cp = mdb_alloc(csize, UM_SLEEP); 1162 1163 if (mdb_vread(cp, csize, addr) == -1) { 1164 mdb_warn("couldn't read cache at addr %p", addr); 1165 goto out2; 1166 } 1167 1168 /* 1169 * It's easy for someone to hand us an invalid cache address. 1170 * Unfortunately, it is hard for this walker to survive an 1171 * invalid cache cleanly. So we make sure that: 1172 * 1173 * 1. the vmem arena for the cache is readable, 1174 * 2. the vmem arena's quantum is a power of 2, 1175 * 3. our slabsize is a multiple of the quantum, and 1176 * 4. our chunksize is >0 and less than our slabsize. 1177 */ 1178 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1179 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1180 vm_quantum == 0 || 1181 (vm_quantum & (vm_quantum - 1)) != 0 || 1182 cp->cache_slabsize < vm_quantum || 1183 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1184 cp->cache_chunksize == 0 || 1185 cp->cache_chunksize > cp->cache_slabsize) { 1186 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1187 goto out2; 1188 } 1189 1190 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1191 1192 if (cp->cache_buftotal == 0) { 1193 mdb_free(cp, csize); 1194 return (WALK_DONE); 1195 } 1196 1197 /* 1198 * If they ask for bufctls, but it's a small-slab cache, 1199 * there is nothing to report. 1200 */ 1201 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1202 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1203 cp->cache_flags)); 1204 mdb_free(cp, csize); 1205 return (WALK_DONE); 1206 } 1207 1208 /* 1209 * If they want constructed buffers, but there's no constructor or 1210 * the cache has DEADBEEF checking enabled, there is nothing to report. 1211 */ 1212 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1213 cp->cache_constructor == NULL || 1214 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1215 mdb_free(cp, csize); 1216 return (WALK_DONE); 1217 } 1218 1219 /* 1220 * Read in the contents of the magazine layer 1221 */ 1222 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1223 &magmax, UM_SLEEP) == WALK_ERR) 1224 goto out2; 1225 1226 /* 1227 * We have all of the buffers from the magazines; if we are walking 1228 * allocated buffers, sort them so we can bsearch them later. 1229 */ 1230 if (type & KM_ALLOCATED) 1231 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1232 1233 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1234 1235 kmw->kmw_type = type; 1236 kmw->kmw_addr = addr; 1237 kmw->kmw_cp = cp; 1238 kmw->kmw_csize = csize; 1239 kmw->kmw_maglist = maglist; 1240 kmw->kmw_max = magmax; 1241 kmw->kmw_count = magcnt; 1242 kmw->kmw_pos = 0; 1243 1244 /* 1245 * When walking allocated buffers in a KMF_HASH cache, we walk the 1246 * hash table instead of the slab layer. 1247 */ 1248 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1249 layered = "kmem_hash"; 1250 1251 kmw->kmw_type |= KM_HASH; 1252 } else { 1253 /* 1254 * If we are walking freed buffers, we only need the 1255 * magazine layer plus the partially allocated slabs. 1256 * To walk allocated buffers, we need all of the slabs. 1257 */ 1258 if (type & KM_ALLOCATED) 1259 layered = "kmem_slab"; 1260 else 1261 layered = "kmem_slab_partial"; 1262 1263 /* 1264 * for small-slab caches, we read in the entire slab. For 1265 * freed buffers, we can just walk the freelist. For 1266 * allocated buffers, we use a 'valid' array to track 1267 * the freed buffers. 1268 */ 1269 if (!(cp->cache_flags & KMF_HASH)) { 1270 chunksize = cp->cache_chunksize; 1271 slabsize = cp->cache_slabsize; 1272 1273 kmw->kmw_ubase = mdb_alloc(slabsize + 1274 sizeof (kmem_bufctl_t), UM_SLEEP); 1275 1276 if (type & KM_ALLOCATED) 1277 kmw->kmw_valid = 1278 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1279 } 1280 } 1281 1282 status = WALK_NEXT; 1283 1284 if (mdb_layered_walk(layered, wsp) == -1) { 1285 mdb_warn("unable to start layered '%s' walk", layered); 1286 status = WALK_ERR; 1287 } 1288 1289 out1: 1290 if (status == WALK_ERR) { 1291 if (kmw->kmw_valid) 1292 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1293 1294 if (kmw->kmw_ubase) 1295 mdb_free(kmw->kmw_ubase, slabsize + 1296 sizeof (kmem_bufctl_t)); 1297 1298 if (kmw->kmw_maglist) 1299 mdb_free(kmw->kmw_maglist, 1300 kmw->kmw_max * sizeof (uintptr_t)); 1301 1302 mdb_free(kmw, sizeof (kmem_walk_t)); 1303 wsp->walk_data = NULL; 1304 } 1305 1306 out2: 1307 if (status == WALK_ERR) 1308 mdb_free(cp, csize); 1309 1310 return (status); 1311 } 1312 1313 int 1314 kmem_walk_step(mdb_walk_state_t *wsp) 1315 { 1316 kmem_walk_t *kmw = wsp->walk_data; 1317 int type = kmw->kmw_type; 1318 kmem_cache_t *cp = kmw->kmw_cp; 1319 1320 void **maglist = kmw->kmw_maglist; 1321 int magcnt = kmw->kmw_count; 1322 1323 uintptr_t chunksize, slabsize; 1324 uintptr_t addr; 1325 const kmem_slab_t *sp; 1326 const kmem_bufctl_t *bcp; 1327 kmem_bufctl_t bc; 1328 1329 int chunks; 1330 char *kbase; 1331 void *buf; 1332 int i, ret; 1333 1334 char *valid, *ubase; 1335 1336 /* 1337 * first, handle the 'kmem_hash' layered walk case 1338 */ 1339 if (type & KM_HASH) { 1340 /* 1341 * We have a buffer which has been allocated out of the 1342 * global layer. We need to make sure that it's not 1343 * actually sitting in a magazine before we report it as 1344 * an allocated buffer. 1345 */ 1346 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1347 1348 if (magcnt > 0 && 1349 bsearch(&buf, maglist, magcnt, sizeof (void *), 1350 addrcmp) != NULL) 1351 return (WALK_NEXT); 1352 1353 if (type & KM_BUFCTL) 1354 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1355 1356 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1357 } 1358 1359 ret = WALK_NEXT; 1360 1361 addr = kmw->kmw_addr; 1362 1363 /* 1364 * If we're walking freed buffers, report everything in the 1365 * magazine layer before processing the first slab. 1366 */ 1367 if ((type & KM_FREE) && magcnt != 0) { 1368 kmw->kmw_count = 0; /* only do this once */ 1369 for (i = 0; i < magcnt; i++) { 1370 buf = maglist[i]; 1371 1372 if (type & KM_BUFCTL) { 1373 uintptr_t out; 1374 1375 if (cp->cache_flags & KMF_BUFTAG) { 1376 kmem_buftag_t *btp; 1377 kmem_buftag_t tag; 1378 1379 /* LINTED - alignment */ 1380 btp = KMEM_BUFTAG(cp, buf); 1381 if (mdb_vread(&tag, sizeof (tag), 1382 (uintptr_t)btp) == -1) { 1383 mdb_warn("reading buftag for " 1384 "%p at %p", buf, btp); 1385 continue; 1386 } 1387 out = (uintptr_t)tag.bt_bufctl; 1388 } else { 1389 if (kmem_hash_lookup(cp, addr, buf, 1390 &out) == -1) 1391 continue; 1392 } 1393 ret = bufctl_walk_callback(cp, wsp, out); 1394 } else { 1395 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1396 } 1397 1398 if (ret != WALK_NEXT) 1399 return (ret); 1400 } 1401 } 1402 1403 /* 1404 * If they want constructed buffers, we're finished, since the 1405 * magazine layer holds them all. 1406 */ 1407 if (type & KM_CONSTRUCTED) 1408 return (WALK_DONE); 1409 1410 /* 1411 * Handle the buffers in the current slab 1412 */ 1413 chunksize = cp->cache_chunksize; 1414 slabsize = cp->cache_slabsize; 1415 1416 sp = wsp->walk_layer; 1417 chunks = sp->slab_chunks; 1418 kbase = sp->slab_base; 1419 1420 dprintf(("kbase is %p\n", kbase)); 1421 1422 if (!(cp->cache_flags & KMF_HASH)) { 1423 valid = kmw->kmw_valid; 1424 ubase = kmw->kmw_ubase; 1425 1426 if (mdb_vread(ubase, chunks * chunksize, 1427 (uintptr_t)kbase) == -1) { 1428 mdb_warn("failed to read slab contents at %p", kbase); 1429 return (WALK_ERR); 1430 } 1431 1432 /* 1433 * Set up the valid map as fully allocated -- we'll punch 1434 * out the freelist. 1435 */ 1436 if (type & KM_ALLOCATED) 1437 (void) memset(valid, 1, chunks); 1438 } else { 1439 valid = NULL; 1440 ubase = NULL; 1441 } 1442 1443 /* 1444 * walk the slab's freelist 1445 */ 1446 bcp = sp->slab_head; 1447 1448 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1449 1450 /* 1451 * since we could be in the middle of allocating a buffer, 1452 * our refcnt could be one higher than it aught. So we 1453 * check one further on the freelist than the count allows. 1454 */ 1455 for (i = sp->slab_refcnt; i <= chunks; i++) { 1456 uint_t ndx; 1457 1458 dprintf(("bcp is %p\n", bcp)); 1459 1460 if (bcp == NULL) { 1461 if (i == chunks) 1462 break; 1463 mdb_warn( 1464 "slab %p in cache %p freelist too short by %d\n", 1465 sp, addr, chunks - i); 1466 break; 1467 } 1468 1469 if (cp->cache_flags & KMF_HASH) { 1470 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1471 mdb_warn("failed to read bufctl ptr at %p", 1472 bcp); 1473 break; 1474 } 1475 buf = bc.bc_addr; 1476 } else { 1477 /* 1478 * Otherwise the buffer is in the slab which 1479 * we've read in; we just need to determine 1480 * its offset in the slab to find the 1481 * kmem_bufctl_t. 1482 */ 1483 bc = *((kmem_bufctl_t *) 1484 ((uintptr_t)bcp - (uintptr_t)kbase + 1485 (uintptr_t)ubase)); 1486 1487 buf = KMEM_BUF(cp, bcp); 1488 } 1489 1490 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1491 1492 if (ndx > slabsize / cp->cache_bufsize) { 1493 /* 1494 * This is very wrong; we have managed to find 1495 * a buffer in the slab which shouldn't 1496 * actually be here. Emit a warning, and 1497 * try to continue. 1498 */ 1499 mdb_warn("buf %p is out of range for " 1500 "slab %p, cache %p\n", buf, sp, addr); 1501 } else if (type & KM_ALLOCATED) { 1502 /* 1503 * we have found a buffer on the slab's freelist; 1504 * clear its entry 1505 */ 1506 valid[ndx] = 0; 1507 } else { 1508 /* 1509 * Report this freed buffer 1510 */ 1511 if (type & KM_BUFCTL) { 1512 ret = bufctl_walk_callback(cp, wsp, 1513 (uintptr_t)bcp); 1514 } else { 1515 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1516 } 1517 if (ret != WALK_NEXT) 1518 return (ret); 1519 } 1520 1521 bcp = bc.bc_next; 1522 } 1523 1524 if (bcp != NULL) { 1525 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1526 sp, addr, bcp)); 1527 } 1528 1529 /* 1530 * If we are walking freed buffers, the loop above handled reporting 1531 * them. 1532 */ 1533 if (type & KM_FREE) 1534 return (WALK_NEXT); 1535 1536 if (type & KM_BUFCTL) { 1537 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1538 "cache %p\n", addr); 1539 return (WALK_ERR); 1540 } 1541 1542 /* 1543 * Report allocated buffers, skipping buffers in the magazine layer. 1544 * We only get this far for small-slab caches. 1545 */ 1546 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1547 buf = (char *)kbase + i * chunksize; 1548 1549 if (!valid[i]) 1550 continue; /* on slab freelist */ 1551 1552 if (magcnt > 0 && 1553 bsearch(&buf, maglist, magcnt, sizeof (void *), 1554 addrcmp) != NULL) 1555 continue; /* in magazine layer */ 1556 1557 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1558 } 1559 return (ret); 1560 } 1561 1562 void 1563 kmem_walk_fini(mdb_walk_state_t *wsp) 1564 { 1565 kmem_walk_t *kmw = wsp->walk_data; 1566 uintptr_t chunksize; 1567 uintptr_t slabsize; 1568 1569 if (kmw == NULL) 1570 return; 1571 1572 if (kmw->kmw_maglist != NULL) 1573 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1574 1575 chunksize = kmw->kmw_cp->cache_chunksize; 1576 slabsize = kmw->kmw_cp->cache_slabsize; 1577 1578 if (kmw->kmw_valid != NULL) 1579 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1580 if (kmw->kmw_ubase != NULL) 1581 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1582 1583 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1584 mdb_free(kmw, sizeof (kmem_walk_t)); 1585 } 1586 1587 /*ARGSUSED*/ 1588 static int 1589 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1590 { 1591 /* 1592 * Buffers allocated from NOTOUCH caches can also show up as freed 1593 * memory in other caches. This can be a little confusing, so we 1594 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1595 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1596 */ 1597 if (c->cache_cflags & KMC_NOTOUCH) 1598 return (WALK_NEXT); 1599 1600 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1601 wsp->walk_cbdata, addr) == -1) 1602 return (WALK_DONE); 1603 1604 return (WALK_NEXT); 1605 } 1606 1607 #define KMEM_WALK_ALL(name, wsp) { \ 1608 wsp->walk_data = (name); \ 1609 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1610 return (WALK_ERR); \ 1611 return (WALK_DONE); \ 1612 } 1613 1614 int 1615 kmem_walk_init(mdb_walk_state_t *wsp) 1616 { 1617 if (wsp->walk_arg != NULL) 1618 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1619 1620 if (wsp->walk_addr == NULL) 1621 KMEM_WALK_ALL("kmem", wsp); 1622 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1623 } 1624 1625 int 1626 bufctl_walk_init(mdb_walk_state_t *wsp) 1627 { 1628 if (wsp->walk_addr == NULL) 1629 KMEM_WALK_ALL("bufctl", wsp); 1630 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1631 } 1632 1633 int 1634 freemem_walk_init(mdb_walk_state_t *wsp) 1635 { 1636 if (wsp->walk_addr == NULL) 1637 KMEM_WALK_ALL("freemem", wsp); 1638 return (kmem_walk_init_common(wsp, KM_FREE)); 1639 } 1640 1641 int 1642 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1643 { 1644 if (wsp->walk_addr == NULL) 1645 KMEM_WALK_ALL("freemem_constructed", wsp); 1646 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1647 } 1648 1649 int 1650 freectl_walk_init(mdb_walk_state_t *wsp) 1651 { 1652 if (wsp->walk_addr == NULL) 1653 KMEM_WALK_ALL("freectl", wsp); 1654 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1655 } 1656 1657 int 1658 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1659 { 1660 if (wsp->walk_addr == NULL) 1661 KMEM_WALK_ALL("freectl_constructed", wsp); 1662 return (kmem_walk_init_common(wsp, 1663 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1664 } 1665 1666 typedef struct bufctl_history_walk { 1667 void *bhw_next; 1668 kmem_cache_t *bhw_cache; 1669 kmem_slab_t *bhw_slab; 1670 hrtime_t bhw_timestamp; 1671 } bufctl_history_walk_t; 1672 1673 int 1674 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1675 { 1676 bufctl_history_walk_t *bhw; 1677 kmem_bufctl_audit_t bc; 1678 kmem_bufctl_audit_t bcn; 1679 1680 if (wsp->walk_addr == NULL) { 1681 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1682 return (WALK_ERR); 1683 } 1684 1685 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1686 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1687 return (WALK_ERR); 1688 } 1689 1690 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1691 bhw->bhw_timestamp = 0; 1692 bhw->bhw_cache = bc.bc_cache; 1693 bhw->bhw_slab = bc.bc_slab; 1694 1695 /* 1696 * sometimes the first log entry matches the base bufctl; in that 1697 * case, skip the base bufctl. 1698 */ 1699 if (bc.bc_lastlog != NULL && 1700 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1701 bc.bc_addr == bcn.bc_addr && 1702 bc.bc_cache == bcn.bc_cache && 1703 bc.bc_slab == bcn.bc_slab && 1704 bc.bc_timestamp == bcn.bc_timestamp && 1705 bc.bc_thread == bcn.bc_thread) 1706 bhw->bhw_next = bc.bc_lastlog; 1707 else 1708 bhw->bhw_next = (void *)wsp->walk_addr; 1709 1710 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1711 wsp->walk_data = bhw; 1712 1713 return (WALK_NEXT); 1714 } 1715 1716 int 1717 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1718 { 1719 bufctl_history_walk_t *bhw = wsp->walk_data; 1720 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1721 uintptr_t baseaddr = wsp->walk_addr; 1722 kmem_bufctl_audit_t bc; 1723 1724 if (addr == NULL) 1725 return (WALK_DONE); 1726 1727 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1728 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1729 return (WALK_ERR); 1730 } 1731 1732 /* 1733 * The bufctl is only valid if the address, cache, and slab are 1734 * correct. We also check that the timestamp is decreasing, to 1735 * prevent infinite loops. 1736 */ 1737 if ((uintptr_t)bc.bc_addr != baseaddr || 1738 bc.bc_cache != bhw->bhw_cache || 1739 bc.bc_slab != bhw->bhw_slab || 1740 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1741 return (WALK_DONE); 1742 1743 bhw->bhw_next = bc.bc_lastlog; 1744 bhw->bhw_timestamp = bc.bc_timestamp; 1745 1746 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1747 } 1748 1749 void 1750 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1751 { 1752 bufctl_history_walk_t *bhw = wsp->walk_data; 1753 1754 mdb_free(bhw, sizeof (*bhw)); 1755 } 1756 1757 typedef struct kmem_log_walk { 1758 kmem_bufctl_audit_t *klw_base; 1759 kmem_bufctl_audit_t **klw_sorted; 1760 kmem_log_header_t klw_lh; 1761 size_t klw_size; 1762 size_t klw_maxndx; 1763 size_t klw_ndx; 1764 } kmem_log_walk_t; 1765 1766 int 1767 kmem_log_walk_init(mdb_walk_state_t *wsp) 1768 { 1769 uintptr_t lp = wsp->walk_addr; 1770 kmem_log_walk_t *klw; 1771 kmem_log_header_t *lhp; 1772 int maxndx, i, j, k; 1773 1774 /* 1775 * By default (global walk), walk the kmem_transaction_log. Otherwise 1776 * read the log whose kmem_log_header_t is stored at walk_addr. 1777 */ 1778 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1779 mdb_warn("failed to read 'kmem_transaction_log'"); 1780 return (WALK_ERR); 1781 } 1782 1783 if (lp == NULL) { 1784 mdb_warn("log is disabled\n"); 1785 return (WALK_ERR); 1786 } 1787 1788 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1789 lhp = &klw->klw_lh; 1790 1791 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1792 mdb_warn("failed to read log header at %p", lp); 1793 mdb_free(klw, sizeof (kmem_log_walk_t)); 1794 return (WALK_ERR); 1795 } 1796 1797 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1798 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1799 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1800 1801 if (mdb_vread(klw->klw_base, klw->klw_size, 1802 (uintptr_t)lhp->lh_base) == -1) { 1803 mdb_warn("failed to read log at base %p", lhp->lh_base); 1804 mdb_free(klw->klw_base, klw->klw_size); 1805 mdb_free(klw, sizeof (kmem_log_walk_t)); 1806 return (WALK_ERR); 1807 } 1808 1809 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1810 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1811 1812 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1813 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1814 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1815 1816 for (j = 0; j < maxndx; j++) 1817 klw->klw_sorted[k++] = &chunk[j]; 1818 } 1819 1820 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1821 (int(*)(const void *, const void *))bufctlcmp); 1822 1823 klw->klw_maxndx = k; 1824 wsp->walk_data = klw; 1825 1826 return (WALK_NEXT); 1827 } 1828 1829 int 1830 kmem_log_walk_step(mdb_walk_state_t *wsp) 1831 { 1832 kmem_log_walk_t *klw = wsp->walk_data; 1833 kmem_bufctl_audit_t *bcp; 1834 1835 if (klw->klw_ndx == klw->klw_maxndx) 1836 return (WALK_DONE); 1837 1838 bcp = klw->klw_sorted[klw->klw_ndx++]; 1839 1840 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1841 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1842 } 1843 1844 void 1845 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1846 { 1847 kmem_log_walk_t *klw = wsp->walk_data; 1848 1849 mdb_free(klw->klw_base, klw->klw_size); 1850 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1851 sizeof (kmem_bufctl_audit_t *)); 1852 mdb_free(klw, sizeof (kmem_log_walk_t)); 1853 } 1854 1855 typedef struct allocdby_bufctl { 1856 uintptr_t abb_addr; 1857 hrtime_t abb_ts; 1858 } allocdby_bufctl_t; 1859 1860 typedef struct allocdby_walk { 1861 const char *abw_walk; 1862 uintptr_t abw_thread; 1863 size_t abw_nbufs; 1864 size_t abw_size; 1865 allocdby_bufctl_t *abw_buf; 1866 size_t abw_ndx; 1867 } allocdby_walk_t; 1868 1869 int 1870 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1871 allocdby_walk_t *abw) 1872 { 1873 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1874 return (WALK_NEXT); 1875 1876 if (abw->abw_nbufs == abw->abw_size) { 1877 allocdby_bufctl_t *buf; 1878 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1879 1880 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1881 1882 bcopy(abw->abw_buf, buf, oldsize); 1883 mdb_free(abw->abw_buf, oldsize); 1884 1885 abw->abw_size <<= 1; 1886 abw->abw_buf = buf; 1887 } 1888 1889 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1890 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1891 abw->abw_nbufs++; 1892 1893 return (WALK_NEXT); 1894 } 1895 1896 /*ARGSUSED*/ 1897 int 1898 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1899 { 1900 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1901 abw, addr) == -1) { 1902 mdb_warn("couldn't walk bufctl for cache %p", addr); 1903 return (WALK_DONE); 1904 } 1905 1906 return (WALK_NEXT); 1907 } 1908 1909 static int 1910 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1911 { 1912 if (lhs->abb_ts < rhs->abb_ts) 1913 return (1); 1914 if (lhs->abb_ts > rhs->abb_ts) 1915 return (-1); 1916 return (0); 1917 } 1918 1919 static int 1920 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1921 { 1922 allocdby_walk_t *abw; 1923 1924 if (wsp->walk_addr == NULL) { 1925 mdb_warn("allocdby walk doesn't support global walks\n"); 1926 return (WALK_ERR); 1927 } 1928 1929 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1930 1931 abw->abw_thread = wsp->walk_addr; 1932 abw->abw_walk = walk; 1933 abw->abw_size = 128; /* something reasonable */ 1934 abw->abw_buf = 1935 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1936 1937 wsp->walk_data = abw; 1938 1939 if (mdb_walk("kmem_cache", 1940 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1941 mdb_warn("couldn't walk kmem_cache"); 1942 allocdby_walk_fini(wsp); 1943 return (WALK_ERR); 1944 } 1945 1946 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1947 (int(*)(const void *, const void *))allocdby_cmp); 1948 1949 return (WALK_NEXT); 1950 } 1951 1952 int 1953 allocdby_walk_init(mdb_walk_state_t *wsp) 1954 { 1955 return (allocdby_walk_init_common(wsp, "bufctl")); 1956 } 1957 1958 int 1959 freedby_walk_init(mdb_walk_state_t *wsp) 1960 { 1961 return (allocdby_walk_init_common(wsp, "freectl")); 1962 } 1963 1964 int 1965 allocdby_walk_step(mdb_walk_state_t *wsp) 1966 { 1967 allocdby_walk_t *abw = wsp->walk_data; 1968 kmem_bufctl_audit_t bc; 1969 uintptr_t addr; 1970 1971 if (abw->abw_ndx == abw->abw_nbufs) 1972 return (WALK_DONE); 1973 1974 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1975 1976 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1977 mdb_warn("couldn't read bufctl at %p", addr); 1978 return (WALK_DONE); 1979 } 1980 1981 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1982 } 1983 1984 void 1985 allocdby_walk_fini(mdb_walk_state_t *wsp) 1986 { 1987 allocdby_walk_t *abw = wsp->walk_data; 1988 1989 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1990 mdb_free(abw, sizeof (allocdby_walk_t)); 1991 } 1992 1993 /*ARGSUSED*/ 1994 int 1995 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 1996 { 1997 char c[MDB_SYM_NAMLEN]; 1998 GElf_Sym sym; 1999 int i; 2000 2001 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2002 for (i = 0; i < bcp->bc_depth; i++) { 2003 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2004 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2005 continue; 2006 if (strncmp(c, "kmem_", 5) == 0) 2007 continue; 2008 mdb_printf("%s+0x%lx", 2009 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2010 break; 2011 } 2012 mdb_printf("\n"); 2013 2014 return (WALK_NEXT); 2015 } 2016 2017 static int 2018 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2019 { 2020 if (!(flags & DCMD_ADDRSPEC)) 2021 return (DCMD_USAGE); 2022 2023 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2024 2025 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2026 mdb_warn("can't walk '%s' for %p", w, addr); 2027 return (DCMD_ERR); 2028 } 2029 2030 return (DCMD_OK); 2031 } 2032 2033 /*ARGSUSED*/ 2034 int 2035 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2036 { 2037 return (allocdby_common(addr, flags, "allocdby")); 2038 } 2039 2040 /*ARGSUSED*/ 2041 int 2042 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2043 { 2044 return (allocdby_common(addr, flags, "freedby")); 2045 } 2046 2047 /* 2048 * Return a string describing the address in relation to the given thread's 2049 * stack. 2050 * 2051 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2052 * 2053 * - If the address is above the stack pointer, return an empty string 2054 * signifying that the address is active. 2055 * 2056 * - If the address is below the stack pointer, and the thread is not on proc, 2057 * return " (below sp)". 2058 * 2059 * - If the address is below the stack pointer, and the thread is on proc, 2060 * return " (possibly below sp)". Depending on context, we may or may not 2061 * have an accurate t_sp. 2062 */ 2063 static const char * 2064 stack_active(const kthread_t *t, uintptr_t addr) 2065 { 2066 uintptr_t panicstk; 2067 GElf_Sym sym; 2068 2069 if (t->t_state == TS_FREE) 2070 return (" (inactive interrupt thread)"); 2071 2072 /* 2073 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2074 * no longer relates to the thread's real stack. 2075 */ 2076 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2077 panicstk = (uintptr_t)sym.st_value; 2078 2079 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2080 return (""); 2081 } 2082 2083 if (addr >= t->t_sp + STACK_BIAS) 2084 return (""); 2085 2086 if (t->t_state == TS_ONPROC) 2087 return (" (possibly below sp)"); 2088 2089 return (" (below sp)"); 2090 } 2091 2092 typedef struct whatis { 2093 uintptr_t w_addr; 2094 const kmem_cache_t *w_cache; 2095 const vmem_t *w_vmem; 2096 size_t w_slab_align; 2097 int w_slab_found; 2098 int w_found; 2099 int w_kmem_lite_count; 2100 uint_t w_all; 2101 uint_t w_bufctl; 2102 uint_t w_freemem; 2103 uint_t w_idspace; 2104 uint_t w_quiet; 2105 uint_t w_verbose; 2106 } whatis_t; 2107 2108 /* nicely report pointers as offsets from a base */ 2109 static void 2110 whatis_report_pointer(uintptr_t addr, uintptr_t base, const char *description) 2111 { 2112 if (addr == base) 2113 mdb_printf("%p is %s", 2114 addr, description); 2115 else 2116 mdb_printf("%p is %p+%p, %s", 2117 addr, base, addr - base, description); 2118 } 2119 2120 /* call one of our dcmd functions with "-v" and the provided address */ 2121 static void 2122 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2123 { 2124 mdb_arg_t a; 2125 a.a_type = MDB_TYPE_STRING; 2126 a.a_un.a_str = "-v"; 2127 2128 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2129 } 2130 2131 static void 2132 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 2133 { 2134 const kmem_cache_t *cp = w->w_cache; 2135 /* LINTED pointer cast may result in improper alignment */ 2136 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr); 2137 intptr_t stat; 2138 int call_printer; 2139 int count = 0; 2140 int i; 2141 pc_t callers[16]; 2142 2143 if (cp->cache_flags & KMF_REDZONE) { 2144 kmem_buftag_t bt; 2145 2146 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2147 goto done; 2148 2149 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2150 2151 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 2152 goto done; 2153 2154 /* 2155 * provide the bufctl ptr if it has useful information 2156 */ 2157 if (baddr == 0 && (cp->cache_flags & KMF_AUDIT)) 2158 baddr = (uintptr_t)bt.bt_bufctl; 2159 2160 if (cp->cache_flags & KMF_LITE) { 2161 count = w->w_kmem_lite_count; 2162 2163 if (count * sizeof (pc_t) > sizeof (callers)) 2164 count = 0; 2165 2166 if (count > 0 && 2167 mdb_vread(callers, count * sizeof (pc_t), 2168 btaddr + 2169 offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2170 count = 0; 2171 2172 /* 2173 * skip unused callers 2174 */ 2175 while (count > 0 && callers[count - 1] == 2176 (pc_t)KMEM_UNINITIALIZED_PATTERN) 2177 count--; 2178 } 2179 } 2180 2181 done: 2182 call_printer = 2183 (!w->w_quiet && baddr != 0 && (cp->cache_flags & KMF_AUDIT)); 2184 2185 whatis_report_pointer(w->w_addr, addr, ""); 2186 2187 if (baddr != 0 && !call_printer) 2188 mdb_printf("bufctl %p ", baddr); 2189 2190 mdb_printf("%s from %s%s\n", 2191 (w->w_freemem == FALSE) ? "allocated" : "freed", cp->cache_name, 2192 (call_printer || (!w->w_quiet && count > 0)) ? ":" : ""); 2193 2194 if (call_printer) 2195 whatis_call_printer(bufctl, baddr); 2196 2197 if (!w->w_quiet && count > 0) { 2198 mdb_inc_indent(8); 2199 mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"", 2200 callers[0], (count != 1)? ", ":"\n"); 2201 for (i = 1; i < count; i++) 2202 mdb_printf("%a%s", callers[i], 2203 (i + 1 < count)? ", ":"\n"); 2204 mdb_dec_indent(8); 2205 } 2206 } 2207 2208 /*ARGSUSED*/ 2209 static int 2210 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w) 2211 { 2212 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2213 return (WALK_NEXT); 2214 2215 whatis_print_kmem(addr, 0, w); 2216 w->w_found++; 2217 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2218 } 2219 2220 static int 2221 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 2222 { 2223 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 2224 return (WALK_NEXT); 2225 2226 whatis_report_pointer(w->w_addr, vs->vs_start, ""); 2227 2228 /* 2229 * If we're not printing it seperately, provide the vmem_seg 2230 * pointer if it has a stack trace. 2231 */ 2232 if (w->w_quiet && (w->w_bufctl == TRUE || 2233 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2234 mdb_printf("vmem_seg %p ", addr); 2235 } 2236 2237 mdb_printf("%s from %s vmem arena%s\n", 2238 (w->w_freemem == FALSE) ? "allocated" : "freed", w->w_vmem->vm_name, 2239 !w->w_quiet ? ":" : ""); 2240 2241 if (!w->w_quiet) 2242 whatis_call_printer(vmem_seg, addr); 2243 2244 w->w_found++; 2245 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2246 } 2247 2248 static int 2249 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 2250 { 2251 const char *nm = vmem->vm_name; 2252 w->w_vmem = vmem; 2253 w->w_freemem = FALSE; 2254 2255 if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2256 return (WALK_NEXT); 2257 2258 if (w->w_verbose) 2259 mdb_printf("Searching vmem arena %s...\n", nm); 2260 2261 if (mdb_pwalk("vmem_alloc", 2262 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2263 mdb_warn("can't walk vmem seg for %p", addr); 2264 return (WALK_NEXT); 2265 } 2266 2267 if (w->w_found && w->w_all == FALSE) 2268 return (WALK_DONE); 2269 2270 if (w->w_verbose) 2271 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 2272 2273 w->w_freemem = TRUE; 2274 2275 if (mdb_pwalk("vmem_free", 2276 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2277 mdb_warn("can't walk vmem seg for %p", addr); 2278 return (WALK_NEXT); 2279 } 2280 2281 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2282 } 2283 2284 /*ARGSUSED*/ 2285 static int 2286 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w) 2287 { 2288 uintptr_t addr; 2289 2290 if (bcp == NULL) 2291 return (WALK_NEXT); 2292 2293 addr = (uintptr_t)bcp->bc_addr; 2294 2295 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2296 return (WALK_NEXT); 2297 2298 whatis_print_kmem(addr, baddr, w); 2299 w->w_found++; 2300 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2301 } 2302 2303 /*ARGSUSED*/ 2304 static int 2305 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w) 2306 { 2307 uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align); 2308 2309 if ((w->w_addr - base) >= w->w_cache->cache_slabsize) 2310 return (WALK_NEXT); 2311 2312 w->w_slab_found++; 2313 return (WALK_DONE); 2314 } 2315 2316 static int 2317 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2318 { 2319 char *walk, *freewalk; 2320 mdb_walk_cb_t func; 2321 vmem_t *vmp = c->cache_arena; 2322 2323 if (((c->cache_flags & KMC_IDENTIFIER) != 0) ^ w->w_idspace) 2324 return (WALK_NEXT); 2325 2326 /* For caches with auditing info, we always walk the bufctls */ 2327 if (w->w_bufctl || (c->cache_flags & KMF_AUDIT)) { 2328 walk = "bufctl"; 2329 freewalk = "freectl"; 2330 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2331 } else { 2332 walk = "kmem"; 2333 freewalk = "freemem"; 2334 func = (mdb_walk_cb_t)whatis_walk_kmem; 2335 } 2336 2337 w->w_cache = c; 2338 2339 if (w->w_verbose) 2340 mdb_printf("Searching %s's slabs...\n", c->cache_name); 2341 2342 /* 2343 * Verify that the address is in one of the cache's slabs. If not, 2344 * we can skip the more expensive walkers. (this is purely a 2345 * heuristic -- as long as there are no false-negatives, we'll be fine) 2346 * 2347 * We try to get the cache's arena's quantum, since to accurately 2348 * get the base of a slab, you have to align it to the quantum. If 2349 * it doesn't look sensible, we fall back to not aligning. 2350 */ 2351 if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align), 2352 (uintptr_t)&vmp->vm_quantum) == -1) { 2353 mdb_warn("unable to read %p->cache_arena->vm_quantum", c); 2354 w->w_slab_align = 1; 2355 } 2356 2357 if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 || 2358 (w->w_slab_align & (w->w_slab_align - 1))) { 2359 mdb_warn("%p's arena has invalid quantum (0x%p)\n", c, 2360 w->w_slab_align); 2361 w->w_slab_align = 1; 2362 } 2363 2364 w->w_slab_found = 0; 2365 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w, 2366 addr) == -1) { 2367 mdb_warn("can't find kmem_slab walker"); 2368 return (WALK_DONE); 2369 } 2370 if (w->w_slab_found == 0) 2371 return (WALK_NEXT); 2372 2373 if (c->cache_flags & KMF_LITE) { 2374 if (mdb_readvar(&w->w_kmem_lite_count, 2375 "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16) 2376 w->w_kmem_lite_count = 0; 2377 } 2378 2379 if (w->w_verbose) 2380 mdb_printf("Searching %s...\n", c->cache_name); 2381 2382 w->w_freemem = FALSE; 2383 2384 if (mdb_pwalk(walk, func, w, addr) == -1) { 2385 mdb_warn("can't find %s walker", walk); 2386 return (WALK_DONE); 2387 } 2388 2389 if (w->w_found && w->w_all == FALSE) 2390 return (WALK_DONE); 2391 2392 /* 2393 * We have searched for allocated memory; now search for freed memory. 2394 */ 2395 if (w->w_verbose) 2396 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2397 2398 w->w_freemem = TRUE; 2399 2400 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 2401 mdb_warn("can't find %s walker", freewalk); 2402 return (WALK_DONE); 2403 } 2404 2405 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2406 } 2407 2408 static int 2409 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2410 { 2411 if (c->cache_cflags & KMC_NOTOUCH) 2412 return (WALK_NEXT); 2413 2414 return (whatis_walk_cache(addr, c, w)); 2415 } 2416 2417 static int 2418 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2419 { 2420 if (!(c->cache_cflags & KMC_NOTOUCH)) 2421 return (WALK_NEXT); 2422 2423 return (whatis_walk_cache(addr, c, w)); 2424 } 2425 2426 static int 2427 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w) 2428 { 2429 /* 2430 * Often, one calls ::whatis on an address from a thread structure. 2431 * We use this opportunity to short circuit this case... 2432 */ 2433 if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) { 2434 whatis_report_pointer(w->w_addr, addr, 2435 "allocated as a thread structure\n"); 2436 w->w_found++; 2437 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2438 } 2439 2440 if (w->w_addr < (uintptr_t)t->t_stkbase || 2441 w->w_addr > (uintptr_t)t->t_stk) 2442 return (WALK_NEXT); 2443 2444 if (t->t_stkbase == NULL) 2445 return (WALK_NEXT); 2446 2447 mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr, 2448 stack_active(t, w->w_addr)); 2449 2450 w->w_found++; 2451 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2452 } 2453 2454 static int 2455 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w) 2456 { 2457 struct module mod; 2458 char name[MODMAXNAMELEN], *where; 2459 Shdr shdr; 2460 GElf_Sym sym; 2461 2462 if (m->mod_mp == NULL) 2463 return (WALK_NEXT); 2464 2465 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2466 mdb_warn("couldn't read modctl %p's module", addr); 2467 return (WALK_NEXT); 2468 } 2469 2470 if (w->w_addr >= (uintptr_t)mod.text && 2471 w->w_addr < (uintptr_t)mod.text + mod.text_size) { 2472 where = "text segment"; 2473 goto found; 2474 } 2475 2476 if (w->w_addr >= (uintptr_t)mod.data && 2477 w->w_addr < (uintptr_t)mod.data + mod.data_size) { 2478 where = "data segment"; 2479 goto found; 2480 } 2481 2482 if (w->w_addr >= (uintptr_t)mod.bss && 2483 w->w_addr < (uintptr_t)mod.bss + mod.bss_size) { 2484 where = "bss"; 2485 goto found; 2486 } 2487 2488 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2489 mdb_warn("couldn't read symbol header for %p's module", addr); 2490 return (WALK_NEXT); 2491 } 2492 2493 if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr < 2494 (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) { 2495 where = "symtab"; 2496 goto found; 2497 } 2498 2499 if (w->w_addr >= (uintptr_t)mod.symspace && 2500 w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) { 2501 where = "symspace"; 2502 goto found; 2503 } 2504 2505 return (WALK_NEXT); 2506 2507 found: 2508 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2509 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2510 2511 mdb_printf("%p is ", w->w_addr); 2512 2513 /* 2514 * If we found this address in a module, then there's a chance that 2515 * it's actually a named symbol. Try the symbol lookup. 2516 */ 2517 if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, NULL, 0, &sym) != -1 && 2518 (w->w_addr - (uintptr_t)sym.st_value) < sym.st_size) { 2519 mdb_printf("%a, ", w->w_addr); 2520 } 2521 2522 mdb_printf("in %s's %s\n", name, where); 2523 2524 w->w_found++; 2525 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2526 } 2527 2528 /*ARGSUSED*/ 2529 static int 2530 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w) 2531 { 2532 static int machsize = 0; 2533 mdb_ctf_id_t id; 2534 2535 if (machsize == 0) { 2536 if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0) 2537 machsize = mdb_ctf_type_size(id); 2538 else { 2539 mdb_warn("could not get size of page_t"); 2540 machsize = sizeof (page_t); 2541 } 2542 } 2543 2544 if (w->w_addr < addr || w->w_addr >= addr + machsize) 2545 return (WALK_NEXT); 2546 2547 whatis_report_pointer(w->w_addr, addr, 2548 "allocated as a page structure\n"); 2549 2550 w->w_found++; 2551 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2552 } 2553 2554 int 2555 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2556 { 2557 whatis_t w; 2558 2559 if (!(flags & DCMD_ADDRSPEC)) 2560 return (DCMD_USAGE); 2561 2562 w.w_all = FALSE; 2563 w.w_bufctl = FALSE; 2564 w.w_idspace = FALSE; 2565 w.w_quiet = FALSE; 2566 w.w_verbose = FALSE; 2567 2568 if (mdb_getopts(argc, argv, 2569 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2570 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, 2571 'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace, 2572 'q', MDB_OPT_SETBITS, TRUE, &w.w_quiet, 2573 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2574 NULL) != argc) 2575 return (DCMD_USAGE); 2576 2577 w.w_addr = addr; 2578 w.w_found = 0; 2579 2580 if (w.w_verbose) 2581 mdb_printf("Searching modules...\n"); 2582 2583 if (!w.w_idspace) { 2584 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w) 2585 == -1) { 2586 mdb_warn("couldn't find modctl walker"); 2587 return (DCMD_ERR); 2588 } 2589 2590 if (w.w_found && w.w_all == FALSE) 2591 return (DCMD_OK); 2592 2593 /* 2594 * Now search all thread stacks. Yes, this is a little weak; we 2595 * can save a lot of work by first checking to see if the 2596 * address is in segkp vs. segkmem. But hey, computers are 2597 * fast. 2598 */ 2599 if (w.w_verbose) 2600 mdb_printf("Searching threads...\n"); 2601 2602 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w) 2603 == -1) { 2604 mdb_warn("couldn't find thread walker"); 2605 return (DCMD_ERR); 2606 } 2607 2608 if (w.w_found && w.w_all == FALSE) 2609 return (DCMD_OK); 2610 2611 if (w.w_verbose) 2612 mdb_printf("Searching page structures...\n"); 2613 2614 if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w) 2615 == -1) { 2616 mdb_warn("couldn't find page walker"); 2617 return (DCMD_ERR); 2618 } 2619 2620 if (w.w_found && w.w_all == FALSE) 2621 return (DCMD_OK); 2622 } 2623 2624 if (mdb_walk("kmem_cache", 2625 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2626 mdb_warn("couldn't find kmem_cache walker"); 2627 return (DCMD_ERR); 2628 } 2629 2630 if (w.w_found && w.w_all == FALSE) 2631 return (DCMD_OK); 2632 2633 if (mdb_walk("kmem_cache", 2634 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2635 mdb_warn("couldn't find kmem_cache walker"); 2636 return (DCMD_ERR); 2637 } 2638 2639 if (w.w_found && w.w_all == FALSE) 2640 return (DCMD_OK); 2641 2642 if (mdb_walk("vmem_postfix", 2643 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2644 mdb_warn("couldn't find vmem_postfix walker"); 2645 return (DCMD_ERR); 2646 } 2647 2648 if (w.w_found == 0) 2649 mdb_printf("%p is unknown\n", addr); 2650 2651 return (DCMD_OK); 2652 } 2653 2654 void 2655 whatis_help(void) 2656 { 2657 mdb_printf( 2658 "Given a virtual address, attempt to determine where it came\n" 2659 "from.\n" 2660 "\n" 2661 "\t-a\tFind all possible sources. Default behavior is to stop at\n" 2662 "\t\tthe first (most specific) source.\n" 2663 "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n" 2664 "\t\trespectively. Warning: if the buffer exists, but does not\n" 2665 "\t\thave a bufctl, it will not be reported.\n" 2666 "\t-i\tSearch only identifier arenas and caches. By default\n" 2667 "\t\tthese are ignored.\n" 2668 "\t-q\tDon't print multi-line reports (stack traces, etc.)\n" 2669 "\t-v\tVerbose output; display caches/arenas/etc as they are\n" 2670 "\t\tsearched\n"); 2671 } 2672 2673 typedef struct kmem_log_cpu { 2674 uintptr_t kmc_low; 2675 uintptr_t kmc_high; 2676 } kmem_log_cpu_t; 2677 2678 typedef struct kmem_log_data { 2679 uintptr_t kmd_addr; 2680 kmem_log_cpu_t *kmd_cpu; 2681 } kmem_log_data_t; 2682 2683 int 2684 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2685 kmem_log_data_t *kmd) 2686 { 2687 int i; 2688 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2689 size_t bufsize; 2690 2691 for (i = 0; i < NCPU; i++) { 2692 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2693 break; 2694 } 2695 2696 if (kmd->kmd_addr) { 2697 if (b->bc_cache == NULL) 2698 return (WALK_NEXT); 2699 2700 if (mdb_vread(&bufsize, sizeof (bufsize), 2701 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2702 mdb_warn( 2703 "failed to read cache_bufsize for cache at %p", 2704 b->bc_cache); 2705 return (WALK_ERR); 2706 } 2707 2708 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2709 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2710 return (WALK_NEXT); 2711 } 2712 2713 if (i == NCPU) 2714 mdb_printf(" "); 2715 else 2716 mdb_printf("%3d", i); 2717 2718 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2719 b->bc_timestamp, b->bc_thread); 2720 2721 return (WALK_NEXT); 2722 } 2723 2724 /*ARGSUSED*/ 2725 int 2726 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2727 { 2728 kmem_log_header_t lh; 2729 kmem_cpu_log_header_t clh; 2730 uintptr_t lhp, clhp; 2731 int ncpus; 2732 uintptr_t *cpu; 2733 GElf_Sym sym; 2734 kmem_log_cpu_t *kmc; 2735 int i; 2736 kmem_log_data_t kmd; 2737 uint_t opt_b = FALSE; 2738 2739 if (mdb_getopts(argc, argv, 2740 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2741 return (DCMD_USAGE); 2742 2743 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2744 mdb_warn("failed to read 'kmem_transaction_log'"); 2745 return (DCMD_ERR); 2746 } 2747 2748 if (lhp == NULL) { 2749 mdb_warn("no kmem transaction log\n"); 2750 return (DCMD_ERR); 2751 } 2752 2753 mdb_readvar(&ncpus, "ncpus"); 2754 2755 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2756 mdb_warn("failed to read log header at %p", lhp); 2757 return (DCMD_ERR); 2758 } 2759 2760 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2761 2762 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2763 2764 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2765 mdb_warn("couldn't find 'cpu' array"); 2766 return (DCMD_ERR); 2767 } 2768 2769 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2770 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2771 NCPU * sizeof (uintptr_t), sym.st_size); 2772 return (DCMD_ERR); 2773 } 2774 2775 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2776 mdb_warn("failed to read cpu array at %p", sym.st_value); 2777 return (DCMD_ERR); 2778 } 2779 2780 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2781 kmd.kmd_addr = NULL; 2782 kmd.kmd_cpu = kmc; 2783 2784 for (i = 0; i < NCPU; i++) { 2785 2786 if (cpu[i] == NULL) 2787 continue; 2788 2789 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2790 mdb_warn("cannot read cpu %d's log header at %p", 2791 i, clhp); 2792 return (DCMD_ERR); 2793 } 2794 2795 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2796 (uintptr_t)lh.lh_base; 2797 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2798 2799 clhp += sizeof (kmem_cpu_log_header_t); 2800 } 2801 2802 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2803 "TIMESTAMP", "THREAD"); 2804 2805 /* 2806 * If we have been passed an address, print out only log entries 2807 * corresponding to that address. If opt_b is specified, then interpret 2808 * the address as a bufctl. 2809 */ 2810 if (flags & DCMD_ADDRSPEC) { 2811 kmem_bufctl_audit_t b; 2812 2813 if (opt_b) { 2814 kmd.kmd_addr = addr; 2815 } else { 2816 if (mdb_vread(&b, 2817 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2818 mdb_warn("failed to read bufctl at %p", addr); 2819 return (DCMD_ERR); 2820 } 2821 2822 (void) kmem_log_walk(addr, &b, &kmd); 2823 2824 return (DCMD_OK); 2825 } 2826 } 2827 2828 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2829 mdb_warn("can't find kmem log walker"); 2830 return (DCMD_ERR); 2831 } 2832 2833 return (DCMD_OK); 2834 } 2835 2836 typedef struct bufctl_history_cb { 2837 int bhc_flags; 2838 int bhc_argc; 2839 const mdb_arg_t *bhc_argv; 2840 int bhc_ret; 2841 } bufctl_history_cb_t; 2842 2843 /*ARGSUSED*/ 2844 static int 2845 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2846 { 2847 bufctl_history_cb_t *bhc = arg; 2848 2849 bhc->bhc_ret = 2850 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2851 2852 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2853 2854 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2855 } 2856 2857 void 2858 bufctl_help(void) 2859 { 2860 mdb_printf("%s", 2861 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2862 mdb_dec_indent(2); 2863 mdb_printf("%<b>OPTIONS%</b>\n"); 2864 mdb_inc_indent(2); 2865 mdb_printf("%s", 2866 " -v Display the full content of the bufctl, including its stack trace\n" 2867 " -h retrieve the bufctl's transaction history, if available\n" 2868 " -a addr\n" 2869 " filter out bufctls not involving the buffer at addr\n" 2870 " -c caller\n" 2871 " filter out bufctls without the function/PC in their stack trace\n" 2872 " -e earliest\n" 2873 " filter out bufctls timestamped before earliest\n" 2874 " -l latest\n" 2875 " filter out bufctls timestamped after latest\n" 2876 " -t thread\n" 2877 " filter out bufctls not involving thread\n"); 2878 } 2879 2880 int 2881 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2882 { 2883 kmem_bufctl_audit_t bc; 2884 uint_t verbose = FALSE; 2885 uint_t history = FALSE; 2886 uint_t in_history = FALSE; 2887 uintptr_t caller = NULL, thread = NULL; 2888 uintptr_t laddr, haddr, baddr = NULL; 2889 hrtime_t earliest = 0, latest = 0; 2890 int i, depth; 2891 char c[MDB_SYM_NAMLEN]; 2892 GElf_Sym sym; 2893 2894 if (mdb_getopts(argc, argv, 2895 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2896 'h', MDB_OPT_SETBITS, TRUE, &history, 2897 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2898 'c', MDB_OPT_UINTPTR, &caller, 2899 't', MDB_OPT_UINTPTR, &thread, 2900 'e', MDB_OPT_UINT64, &earliest, 2901 'l', MDB_OPT_UINT64, &latest, 2902 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2903 return (DCMD_USAGE); 2904 2905 if (!(flags & DCMD_ADDRSPEC)) 2906 return (DCMD_USAGE); 2907 2908 if (in_history && !history) 2909 return (DCMD_USAGE); 2910 2911 if (history && !in_history) { 2912 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2913 UM_SLEEP | UM_GC); 2914 bufctl_history_cb_t bhc; 2915 2916 nargv[0].a_type = MDB_TYPE_STRING; 2917 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2918 2919 for (i = 0; i < argc; i++) 2920 nargv[i + 1] = argv[i]; 2921 2922 /* 2923 * When in history mode, we treat each element as if it 2924 * were in a seperate loop, so that the headers group 2925 * bufctls with similar histories. 2926 */ 2927 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2928 bhc.bhc_argc = argc + 1; 2929 bhc.bhc_argv = nargv; 2930 bhc.bhc_ret = DCMD_OK; 2931 2932 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2933 addr) == -1) { 2934 mdb_warn("unable to walk bufctl_history"); 2935 return (DCMD_ERR); 2936 } 2937 2938 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2939 mdb_printf("\n"); 2940 2941 return (bhc.bhc_ret); 2942 } 2943 2944 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2945 if (verbose) { 2946 mdb_printf("%16s %16s %16s %16s\n" 2947 "%<u>%16s %16s %16s %16s%</u>\n", 2948 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2949 "", "CACHE", "LASTLOG", "CONTENTS"); 2950 } else { 2951 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2952 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2953 } 2954 } 2955 2956 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2957 mdb_warn("couldn't read bufctl at %p", addr); 2958 return (DCMD_ERR); 2959 } 2960 2961 /* 2962 * Guard against bogus bc_depth in case the bufctl is corrupt or 2963 * the address does not really refer to a bufctl. 2964 */ 2965 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2966 2967 if (caller != NULL) { 2968 laddr = caller; 2969 haddr = caller + sizeof (caller); 2970 2971 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2972 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2973 /* 2974 * We were provided an exact symbol value; any 2975 * address in the function is valid. 2976 */ 2977 laddr = (uintptr_t)sym.st_value; 2978 haddr = (uintptr_t)sym.st_value + sym.st_size; 2979 } 2980 2981 for (i = 0; i < depth; i++) 2982 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2983 break; 2984 2985 if (i == depth) 2986 return (DCMD_OK); 2987 } 2988 2989 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2990 return (DCMD_OK); 2991 2992 if (earliest != 0 && bc.bc_timestamp < earliest) 2993 return (DCMD_OK); 2994 2995 if (latest != 0 && bc.bc_timestamp > latest) 2996 return (DCMD_OK); 2997 2998 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2999 return (DCMD_OK); 3000 3001 if (flags & DCMD_PIPE_OUT) { 3002 mdb_printf("%#lr\n", addr); 3003 return (DCMD_OK); 3004 } 3005 3006 if (verbose) { 3007 mdb_printf( 3008 "%<b>%16p%</b> %16p %16llx %16p\n" 3009 "%16s %16p %16p %16p\n", 3010 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 3011 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 3012 3013 mdb_inc_indent(17); 3014 for (i = 0; i < depth; i++) 3015 mdb_printf("%a\n", bc.bc_stack[i]); 3016 mdb_dec_indent(17); 3017 mdb_printf("\n"); 3018 } else { 3019 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 3020 bc.bc_timestamp, bc.bc_thread); 3021 3022 for (i = 0; i < depth; i++) { 3023 if (mdb_lookup_by_addr(bc.bc_stack[i], 3024 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 3025 continue; 3026 if (strncmp(c, "kmem_", 5) == 0) 3027 continue; 3028 mdb_printf(" %a\n", bc.bc_stack[i]); 3029 break; 3030 } 3031 3032 if (i >= depth) 3033 mdb_printf("\n"); 3034 } 3035 3036 return (DCMD_OK); 3037 } 3038 3039 typedef struct kmem_verify { 3040 uint64_t *kmv_buf; /* buffer to read cache contents into */ 3041 size_t kmv_size; /* number of bytes in kmv_buf */ 3042 int kmv_corruption; /* > 0 if corruption found. */ 3043 int kmv_besilent; /* report actual corruption sites */ 3044 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3045 } kmem_verify_t; 3046 3047 /* 3048 * verify_pattern() 3049 * verify that buf is filled with the pattern pat. 3050 */ 3051 static int64_t 3052 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3053 { 3054 /*LINTED*/ 3055 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3056 uint64_t *buf; 3057 3058 for (buf = buf_arg; buf < bufend; buf++) 3059 if (*buf != pat) 3060 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3061 return (-1); 3062 } 3063 3064 /* 3065 * verify_buftag() 3066 * verify that btp->bt_bxstat == (bcp ^ pat) 3067 */ 3068 static int 3069 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3070 { 3071 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3072 } 3073 3074 /* 3075 * verify_free() 3076 * verify the integrity of a free block of memory by checking 3077 * that it is filled with 0xdeadbeef and that its buftag is sane. 3078 */ 3079 /*ARGSUSED1*/ 3080 static int 3081 verify_free(uintptr_t addr, const void *data, void *private) 3082 { 3083 kmem_verify_t *kmv = (kmem_verify_t *)private; 3084 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3085 int64_t corrupt; /* corruption offset */ 3086 kmem_buftag_t *buftagp; /* ptr to buftag */ 3087 kmem_cache_t *cp = &kmv->kmv_cache; 3088 int besilent = kmv->kmv_besilent; 3089 3090 /*LINTED*/ 3091 buftagp = KMEM_BUFTAG(cp, buf); 3092 3093 /* 3094 * Read the buffer to check. 3095 */ 3096 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3097 if (!besilent) 3098 mdb_warn("couldn't read %p", addr); 3099 return (WALK_NEXT); 3100 } 3101 3102 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3103 KMEM_FREE_PATTERN)) >= 0) { 3104 if (!besilent) 3105 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3106 addr, (uintptr_t)addr + corrupt); 3107 goto corrupt; 3108 } 3109 /* 3110 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3111 * the first bytes of the buffer, hence we cannot check for red 3112 * zone corruption. 3113 */ 3114 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3115 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3116 if (!besilent) 3117 mdb_printf("buffer %p (free) seems to " 3118 "have a corrupt redzone pattern\n", addr); 3119 goto corrupt; 3120 } 3121 3122 /* 3123 * confirm bufctl pointer integrity. 3124 */ 3125 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3126 if (!besilent) 3127 mdb_printf("buffer %p (free) has a corrupt " 3128 "buftag\n", addr); 3129 goto corrupt; 3130 } 3131 3132 return (WALK_NEXT); 3133 corrupt: 3134 kmv->kmv_corruption++; 3135 return (WALK_NEXT); 3136 } 3137 3138 /* 3139 * verify_alloc() 3140 * Verify that the buftag of an allocated buffer makes sense with respect 3141 * to the buffer. 3142 */ 3143 /*ARGSUSED1*/ 3144 static int 3145 verify_alloc(uintptr_t addr, const void *data, void *private) 3146 { 3147 kmem_verify_t *kmv = (kmem_verify_t *)private; 3148 kmem_cache_t *cp = &kmv->kmv_cache; 3149 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3150 /*LINTED*/ 3151 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3152 uint32_t *ip = (uint32_t *)buftagp; 3153 uint8_t *bp = (uint8_t *)buf; 3154 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3155 int besilent = kmv->kmv_besilent; 3156 3157 /* 3158 * Read the buffer to check. 3159 */ 3160 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3161 if (!besilent) 3162 mdb_warn("couldn't read %p", addr); 3163 return (WALK_NEXT); 3164 } 3165 3166 /* 3167 * There are two cases to handle: 3168 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3169 * 0xfeedfacefeedface at the end of it 3170 * 2. If the buf was alloc'd using kmem_alloc, it will have 3171 * 0xbb just past the end of the region in use. At the buftag, 3172 * it will have 0xfeedface (or, if the whole buffer is in use, 3173 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3174 * endianness), followed by 32 bits containing the offset of the 3175 * 0xbb byte in the buffer. 3176 * 3177 * Finally, the two 32-bit words that comprise the second half of the 3178 * buftag should xor to KMEM_BUFTAG_ALLOC 3179 */ 3180 3181 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3182 looks_ok = 1; 3183 else if (!KMEM_SIZE_VALID(ip[1])) 3184 size_ok = 0; 3185 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3186 looks_ok = 1; 3187 else 3188 size_ok = 0; 3189 3190 if (!size_ok) { 3191 if (!besilent) 3192 mdb_printf("buffer %p (allocated) has a corrupt " 3193 "redzone size encoding\n", addr); 3194 goto corrupt; 3195 } 3196 3197 if (!looks_ok) { 3198 if (!besilent) 3199 mdb_printf("buffer %p (allocated) has a corrupt " 3200 "redzone signature\n", addr); 3201 goto corrupt; 3202 } 3203 3204 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3205 if (!besilent) 3206 mdb_printf("buffer %p (allocated) has a " 3207 "corrupt buftag\n", addr); 3208 goto corrupt; 3209 } 3210 3211 return (WALK_NEXT); 3212 corrupt: 3213 kmv->kmv_corruption++; 3214 return (WALK_NEXT); 3215 } 3216 3217 /*ARGSUSED2*/ 3218 int 3219 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3220 { 3221 if (flags & DCMD_ADDRSPEC) { 3222 int check_alloc = 0, check_free = 0; 3223 kmem_verify_t kmv; 3224 3225 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3226 addr) == -1) { 3227 mdb_warn("couldn't read kmem_cache %p", addr); 3228 return (DCMD_ERR); 3229 } 3230 3231 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3232 sizeof (kmem_buftag_t); 3233 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3234 kmv.kmv_corruption = 0; 3235 3236 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3237 check_alloc = 1; 3238 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3239 check_free = 1; 3240 } else { 3241 if (!(flags & DCMD_LOOP)) { 3242 mdb_warn("cache %p (%s) does not have " 3243 "redzone checking enabled\n", addr, 3244 kmv.kmv_cache.cache_name); 3245 } 3246 return (DCMD_ERR); 3247 } 3248 3249 if (flags & DCMD_LOOP) { 3250 /* 3251 * table mode, don't print out every corrupt buffer 3252 */ 3253 kmv.kmv_besilent = 1; 3254 } else { 3255 mdb_printf("Summary for cache '%s'\n", 3256 kmv.kmv_cache.cache_name); 3257 mdb_inc_indent(2); 3258 kmv.kmv_besilent = 0; 3259 } 3260 3261 if (check_alloc) 3262 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3263 if (check_free) 3264 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3265 3266 if (flags & DCMD_LOOP) { 3267 if (kmv.kmv_corruption == 0) { 3268 mdb_printf("%-*s %?p clean\n", 3269 KMEM_CACHE_NAMELEN, 3270 kmv.kmv_cache.cache_name, addr); 3271 } else { 3272 char *s = ""; /* optional s in "buffer[s]" */ 3273 if (kmv.kmv_corruption > 1) 3274 s = "s"; 3275 3276 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3277 KMEM_CACHE_NAMELEN, 3278 kmv.kmv_cache.cache_name, addr, 3279 kmv.kmv_corruption, s); 3280 } 3281 } else { 3282 /* 3283 * This is the more verbose mode, when the user has 3284 * type addr::kmem_verify. If the cache was clean, 3285 * nothing will have yet been printed. So say something. 3286 */ 3287 if (kmv.kmv_corruption == 0) 3288 mdb_printf("clean\n"); 3289 3290 mdb_dec_indent(2); 3291 } 3292 } else { 3293 /* 3294 * If the user didn't specify a cache to verify, we'll walk all 3295 * kmem_cache's, specifying ourself as a callback for each... 3296 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3297 */ 3298 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3299 "Cache Name", "Addr", "Cache Integrity"); 3300 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3301 } 3302 3303 return (DCMD_OK); 3304 } 3305 3306 typedef struct vmem_node { 3307 struct vmem_node *vn_next; 3308 struct vmem_node *vn_parent; 3309 struct vmem_node *vn_sibling; 3310 struct vmem_node *vn_children; 3311 uintptr_t vn_addr; 3312 int vn_marked; 3313 vmem_t vn_vmem; 3314 } vmem_node_t; 3315 3316 typedef struct vmem_walk { 3317 vmem_node_t *vw_root; 3318 vmem_node_t *vw_current; 3319 } vmem_walk_t; 3320 3321 int 3322 vmem_walk_init(mdb_walk_state_t *wsp) 3323 { 3324 uintptr_t vaddr, paddr; 3325 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3326 vmem_walk_t *vw; 3327 3328 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3329 mdb_warn("couldn't read 'vmem_list'"); 3330 return (WALK_ERR); 3331 } 3332 3333 while (vaddr != NULL) { 3334 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3335 vp->vn_addr = vaddr; 3336 vp->vn_next = head; 3337 head = vp; 3338 3339 if (vaddr == wsp->walk_addr) 3340 current = vp; 3341 3342 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3343 mdb_warn("couldn't read vmem_t at %p", vaddr); 3344 goto err; 3345 } 3346 3347 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3348 } 3349 3350 for (vp = head; vp != NULL; vp = vp->vn_next) { 3351 3352 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3353 vp->vn_sibling = root; 3354 root = vp; 3355 continue; 3356 } 3357 3358 for (parent = head; parent != NULL; parent = parent->vn_next) { 3359 if (parent->vn_addr != paddr) 3360 continue; 3361 vp->vn_sibling = parent->vn_children; 3362 parent->vn_children = vp; 3363 vp->vn_parent = parent; 3364 break; 3365 } 3366 3367 if (parent == NULL) { 3368 mdb_warn("couldn't find %p's parent (%p)\n", 3369 vp->vn_addr, paddr); 3370 goto err; 3371 } 3372 } 3373 3374 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3375 vw->vw_root = root; 3376 3377 if (current != NULL) 3378 vw->vw_current = current; 3379 else 3380 vw->vw_current = root; 3381 3382 wsp->walk_data = vw; 3383 return (WALK_NEXT); 3384 err: 3385 for (vp = head; head != NULL; vp = head) { 3386 head = vp->vn_next; 3387 mdb_free(vp, sizeof (vmem_node_t)); 3388 } 3389 3390 return (WALK_ERR); 3391 } 3392 3393 int 3394 vmem_walk_step(mdb_walk_state_t *wsp) 3395 { 3396 vmem_walk_t *vw = wsp->walk_data; 3397 vmem_node_t *vp; 3398 int rval; 3399 3400 if ((vp = vw->vw_current) == NULL) 3401 return (WALK_DONE); 3402 3403 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3404 3405 if (vp->vn_children != NULL) { 3406 vw->vw_current = vp->vn_children; 3407 return (rval); 3408 } 3409 3410 do { 3411 vw->vw_current = vp->vn_sibling; 3412 vp = vp->vn_parent; 3413 } while (vw->vw_current == NULL && vp != NULL); 3414 3415 return (rval); 3416 } 3417 3418 /* 3419 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3420 * children are visited before their parent. We perform the postfix walk 3421 * iteratively (rather than recursively) to allow mdb to regain control 3422 * after each callback. 3423 */ 3424 int 3425 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3426 { 3427 vmem_walk_t *vw = wsp->walk_data; 3428 vmem_node_t *vp = vw->vw_current; 3429 int rval; 3430 3431 /* 3432 * If this node is marked, then we know that we have already visited 3433 * all of its children. If the node has any siblings, they need to 3434 * be visited next; otherwise, we need to visit the parent. Note 3435 * that vp->vn_marked will only be zero on the first invocation of 3436 * the step function. 3437 */ 3438 if (vp->vn_marked) { 3439 if (vp->vn_sibling != NULL) 3440 vp = vp->vn_sibling; 3441 else if (vp->vn_parent != NULL) 3442 vp = vp->vn_parent; 3443 else { 3444 /* 3445 * We have neither a parent, nor a sibling, and we 3446 * have already been visited; we're done. 3447 */ 3448 return (WALK_DONE); 3449 } 3450 } 3451 3452 /* 3453 * Before we visit this node, visit its children. 3454 */ 3455 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3456 vp = vp->vn_children; 3457 3458 vp->vn_marked = 1; 3459 vw->vw_current = vp; 3460 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3461 3462 return (rval); 3463 } 3464 3465 void 3466 vmem_walk_fini(mdb_walk_state_t *wsp) 3467 { 3468 vmem_walk_t *vw = wsp->walk_data; 3469 vmem_node_t *root = vw->vw_root; 3470 int done; 3471 3472 if (root == NULL) 3473 return; 3474 3475 if ((vw->vw_root = root->vn_children) != NULL) 3476 vmem_walk_fini(wsp); 3477 3478 vw->vw_root = root->vn_sibling; 3479 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3480 mdb_free(root, sizeof (vmem_node_t)); 3481 3482 if (done) { 3483 mdb_free(vw, sizeof (vmem_walk_t)); 3484 } else { 3485 vmem_walk_fini(wsp); 3486 } 3487 } 3488 3489 typedef struct vmem_seg_walk { 3490 uint8_t vsw_type; 3491 uintptr_t vsw_start; 3492 uintptr_t vsw_current; 3493 } vmem_seg_walk_t; 3494 3495 /*ARGSUSED*/ 3496 int 3497 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3498 { 3499 vmem_seg_walk_t *vsw; 3500 3501 if (wsp->walk_addr == NULL) { 3502 mdb_warn("vmem_%s does not support global walks\n", name); 3503 return (WALK_ERR); 3504 } 3505 3506 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3507 3508 vsw->vsw_type = type; 3509 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3510 vsw->vsw_current = vsw->vsw_start; 3511 3512 return (WALK_NEXT); 3513 } 3514 3515 /* 3516 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3517 */ 3518 #define VMEM_NONE 0 3519 3520 int 3521 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3522 { 3523 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3524 } 3525 3526 int 3527 vmem_free_walk_init(mdb_walk_state_t *wsp) 3528 { 3529 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3530 } 3531 3532 int 3533 vmem_span_walk_init(mdb_walk_state_t *wsp) 3534 { 3535 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3536 } 3537 3538 int 3539 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3540 { 3541 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3542 } 3543 3544 int 3545 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3546 { 3547 vmem_seg_t seg; 3548 vmem_seg_walk_t *vsw = wsp->walk_data; 3549 uintptr_t addr = vsw->vsw_current; 3550 static size_t seg_size = 0; 3551 int rval; 3552 3553 if (!seg_size) { 3554 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3555 mdb_warn("failed to read 'vmem_seg_size'"); 3556 seg_size = sizeof (vmem_seg_t); 3557 } 3558 } 3559 3560 if (seg_size < sizeof (seg)) 3561 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3562 3563 if (mdb_vread(&seg, seg_size, addr) == -1) { 3564 mdb_warn("couldn't read vmem_seg at %p", addr); 3565 return (WALK_ERR); 3566 } 3567 3568 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3569 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3570 rval = WALK_NEXT; 3571 } else { 3572 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3573 } 3574 3575 if (vsw->vsw_current == vsw->vsw_start) 3576 return (WALK_DONE); 3577 3578 return (rval); 3579 } 3580 3581 void 3582 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3583 { 3584 vmem_seg_walk_t *vsw = wsp->walk_data; 3585 3586 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3587 } 3588 3589 #define VMEM_NAMEWIDTH 22 3590 3591 int 3592 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3593 { 3594 vmem_t v, parent; 3595 vmem_kstat_t *vkp = &v.vm_kstat; 3596 uintptr_t paddr; 3597 int ident = 0; 3598 char c[VMEM_NAMEWIDTH]; 3599 3600 if (!(flags & DCMD_ADDRSPEC)) { 3601 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3602 mdb_warn("can't walk vmem"); 3603 return (DCMD_ERR); 3604 } 3605 return (DCMD_OK); 3606 } 3607 3608 if (DCMD_HDRSPEC(flags)) 3609 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3610 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3611 "TOTAL", "SUCCEED", "FAIL"); 3612 3613 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3614 mdb_warn("couldn't read vmem at %p", addr); 3615 return (DCMD_ERR); 3616 } 3617 3618 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3619 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3620 mdb_warn("couldn't trace %p's ancestry", addr); 3621 ident = 0; 3622 break; 3623 } 3624 paddr = (uintptr_t)parent.vm_source; 3625 } 3626 3627 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3628 3629 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3630 addr, VMEM_NAMEWIDTH, c, 3631 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3632 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3633 3634 return (DCMD_OK); 3635 } 3636 3637 void 3638 vmem_seg_help(void) 3639 { 3640 mdb_printf("%s", 3641 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3642 "\n" 3643 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3644 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3645 "information.\n"); 3646 mdb_dec_indent(2); 3647 mdb_printf("%<b>OPTIONS%</b>\n"); 3648 mdb_inc_indent(2); 3649 mdb_printf("%s", 3650 " -v Display the full content of the vmem_seg, including its stack trace\n" 3651 " -s report the size of the segment, instead of the end address\n" 3652 " -c caller\n" 3653 " filter out segments without the function/PC in their stack trace\n" 3654 " -e earliest\n" 3655 " filter out segments timestamped before earliest\n" 3656 " -l latest\n" 3657 " filter out segments timestamped after latest\n" 3658 " -m minsize\n" 3659 " filer out segments smaller than minsize\n" 3660 " -M maxsize\n" 3661 " filer out segments larger than maxsize\n" 3662 " -t thread\n" 3663 " filter out segments not involving thread\n" 3664 " -T type\n" 3665 " filter out segments not of type 'type'\n" 3666 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3667 } 3668 3669 /*ARGSUSED*/ 3670 int 3671 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3672 { 3673 vmem_seg_t vs; 3674 pc_t *stk = vs.vs_stack; 3675 uintptr_t sz; 3676 uint8_t t; 3677 const char *type = NULL; 3678 GElf_Sym sym; 3679 char c[MDB_SYM_NAMLEN]; 3680 int no_debug; 3681 int i; 3682 int depth; 3683 uintptr_t laddr, haddr; 3684 3685 uintptr_t caller = NULL, thread = NULL; 3686 uintptr_t minsize = 0, maxsize = 0; 3687 3688 hrtime_t earliest = 0, latest = 0; 3689 3690 uint_t size = 0; 3691 uint_t verbose = 0; 3692 3693 if (!(flags & DCMD_ADDRSPEC)) 3694 return (DCMD_USAGE); 3695 3696 if (mdb_getopts(argc, argv, 3697 'c', MDB_OPT_UINTPTR, &caller, 3698 'e', MDB_OPT_UINT64, &earliest, 3699 'l', MDB_OPT_UINT64, &latest, 3700 's', MDB_OPT_SETBITS, TRUE, &size, 3701 'm', MDB_OPT_UINTPTR, &minsize, 3702 'M', MDB_OPT_UINTPTR, &maxsize, 3703 't', MDB_OPT_UINTPTR, &thread, 3704 'T', MDB_OPT_STR, &type, 3705 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3706 NULL) != argc) 3707 return (DCMD_USAGE); 3708 3709 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3710 if (verbose) { 3711 mdb_printf("%16s %4s %16s %16s %16s\n" 3712 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3713 "ADDR", "TYPE", "START", "END", "SIZE", 3714 "", "", "THREAD", "TIMESTAMP", ""); 3715 } else { 3716 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3717 "START", size? "SIZE" : "END", "WHO"); 3718 } 3719 } 3720 3721 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3722 mdb_warn("couldn't read vmem_seg at %p", addr); 3723 return (DCMD_ERR); 3724 } 3725 3726 if (type != NULL) { 3727 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3728 t = VMEM_ALLOC; 3729 else if (strcmp(type, "FREE") == 0) 3730 t = VMEM_FREE; 3731 else if (strcmp(type, "SPAN") == 0) 3732 t = VMEM_SPAN; 3733 else if (strcmp(type, "ROTR") == 0 || 3734 strcmp(type, "ROTOR") == 0) 3735 t = VMEM_ROTOR; 3736 else if (strcmp(type, "WLKR") == 0 || 3737 strcmp(type, "WALKER") == 0) 3738 t = VMEM_WALKER; 3739 else { 3740 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3741 type); 3742 return (DCMD_ERR); 3743 } 3744 3745 if (vs.vs_type != t) 3746 return (DCMD_OK); 3747 } 3748 3749 sz = vs.vs_end - vs.vs_start; 3750 3751 if (minsize != 0 && sz < minsize) 3752 return (DCMD_OK); 3753 3754 if (maxsize != 0 && sz > maxsize) 3755 return (DCMD_OK); 3756 3757 t = vs.vs_type; 3758 depth = vs.vs_depth; 3759 3760 /* 3761 * debug info, when present, is only accurate for VMEM_ALLOC segments 3762 */ 3763 no_debug = (t != VMEM_ALLOC) || 3764 (depth == 0 || depth > VMEM_STACK_DEPTH); 3765 3766 if (no_debug) { 3767 if (caller != NULL || thread != NULL || earliest != 0 || 3768 latest != 0) 3769 return (DCMD_OK); /* not enough info */ 3770 } else { 3771 if (caller != NULL) { 3772 laddr = caller; 3773 haddr = caller + sizeof (caller); 3774 3775 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3776 sizeof (c), &sym) != -1 && 3777 caller == (uintptr_t)sym.st_value) { 3778 /* 3779 * We were provided an exact symbol value; any 3780 * address in the function is valid. 3781 */ 3782 laddr = (uintptr_t)sym.st_value; 3783 haddr = (uintptr_t)sym.st_value + sym.st_size; 3784 } 3785 3786 for (i = 0; i < depth; i++) 3787 if (vs.vs_stack[i] >= laddr && 3788 vs.vs_stack[i] < haddr) 3789 break; 3790 3791 if (i == depth) 3792 return (DCMD_OK); 3793 } 3794 3795 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3796 return (DCMD_OK); 3797 3798 if (earliest != 0 && vs.vs_timestamp < earliest) 3799 return (DCMD_OK); 3800 3801 if (latest != 0 && vs.vs_timestamp > latest) 3802 return (DCMD_OK); 3803 } 3804 3805 type = (t == VMEM_ALLOC ? "ALLC" : 3806 t == VMEM_FREE ? "FREE" : 3807 t == VMEM_SPAN ? "SPAN" : 3808 t == VMEM_ROTOR ? "ROTR" : 3809 t == VMEM_WALKER ? "WLKR" : 3810 "????"); 3811 3812 if (flags & DCMD_PIPE_OUT) { 3813 mdb_printf("%#lr\n", addr); 3814 return (DCMD_OK); 3815 } 3816 3817 if (verbose) { 3818 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3819 addr, type, vs.vs_start, vs.vs_end, sz); 3820 3821 if (no_debug) 3822 return (DCMD_OK); 3823 3824 mdb_printf("%16s %4s %16p %16llx\n", 3825 "", "", vs.vs_thread, vs.vs_timestamp); 3826 3827 mdb_inc_indent(17); 3828 for (i = 0; i < depth; i++) { 3829 mdb_printf("%a\n", stk[i]); 3830 } 3831 mdb_dec_indent(17); 3832 mdb_printf("\n"); 3833 } else { 3834 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3835 vs.vs_start, size? sz : vs.vs_end); 3836 3837 if (no_debug) { 3838 mdb_printf("\n"); 3839 return (DCMD_OK); 3840 } 3841 3842 for (i = 0; i < depth; i++) { 3843 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3844 c, sizeof (c), &sym) == -1) 3845 continue; 3846 if (strncmp(c, "vmem_", 5) == 0) 3847 continue; 3848 break; 3849 } 3850 mdb_printf(" %a\n", stk[i]); 3851 } 3852 return (DCMD_OK); 3853 } 3854 3855 typedef struct kmalog_data { 3856 uintptr_t kma_addr; 3857 hrtime_t kma_newest; 3858 } kmalog_data_t; 3859 3860 /*ARGSUSED*/ 3861 static int 3862 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3863 { 3864 char name[KMEM_CACHE_NAMELEN + 1]; 3865 hrtime_t delta; 3866 int i, depth; 3867 size_t bufsize; 3868 3869 if (bcp->bc_timestamp == 0) 3870 return (WALK_DONE); 3871 3872 if (kma->kma_newest == 0) 3873 kma->kma_newest = bcp->bc_timestamp; 3874 3875 if (kma->kma_addr) { 3876 if (mdb_vread(&bufsize, sizeof (bufsize), 3877 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3878 mdb_warn( 3879 "failed to read cache_bufsize for cache at %p", 3880 bcp->bc_cache); 3881 return (WALK_ERR); 3882 } 3883 3884 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3885 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3886 return (WALK_NEXT); 3887 } 3888 3889 delta = kma->kma_newest - bcp->bc_timestamp; 3890 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3891 3892 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3893 &bcp->bc_cache->cache_name) <= 0) 3894 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3895 3896 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3897 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3898 3899 for (i = 0; i < depth; i++) 3900 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3901 3902 return (WALK_NEXT); 3903 } 3904 3905 int 3906 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3907 { 3908 const char *logname = "kmem_transaction_log"; 3909 kmalog_data_t kma; 3910 3911 if (argc > 1) 3912 return (DCMD_USAGE); 3913 3914 kma.kma_newest = 0; 3915 if (flags & DCMD_ADDRSPEC) 3916 kma.kma_addr = addr; 3917 else 3918 kma.kma_addr = NULL; 3919 3920 if (argc > 0) { 3921 if (argv->a_type != MDB_TYPE_STRING) 3922 return (DCMD_USAGE); 3923 if (strcmp(argv->a_un.a_str, "fail") == 0) 3924 logname = "kmem_failure_log"; 3925 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3926 logname = "kmem_slab_log"; 3927 else 3928 return (DCMD_USAGE); 3929 } 3930 3931 if (mdb_readvar(&addr, logname) == -1) { 3932 mdb_warn("failed to read %s log header pointer"); 3933 return (DCMD_ERR); 3934 } 3935 3936 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3937 mdb_warn("failed to walk kmem log"); 3938 return (DCMD_ERR); 3939 } 3940 3941 return (DCMD_OK); 3942 } 3943 3944 /* 3945 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3946 * The first piece is a structure which we use to accumulate kmem_cache_t 3947 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3948 * walker; we either add all caches, or ones named explicitly as arguments. 3949 */ 3950 3951 typedef struct kmclist { 3952 const char *kmc_name; /* Name to match (or NULL) */ 3953 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3954 int kmc_nelems; /* Num entries in kmc_caches */ 3955 int kmc_size; /* Size of kmc_caches array */ 3956 } kmclist_t; 3957 3958 static int 3959 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3960 { 3961 void *p; 3962 int s; 3963 3964 if (kmc->kmc_name == NULL || 3965 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3966 /* 3967 * If we have a match, grow our array (if necessary), and then 3968 * add the virtual address of the matching cache to our list. 3969 */ 3970 if (kmc->kmc_nelems >= kmc->kmc_size) { 3971 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3972 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3973 3974 bcopy(kmc->kmc_caches, p, 3975 sizeof (uintptr_t) * kmc->kmc_size); 3976 3977 kmc->kmc_caches = p; 3978 kmc->kmc_size = s; 3979 } 3980 3981 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3982 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3983 } 3984 3985 return (WALK_NEXT); 3986 } 3987 3988 /* 3989 * The second piece of ::kmausers is a hash table of allocations. Each 3990 * allocation owner is identified by its stack trace and data_size. We then 3991 * track the total bytes of all such allocations, and the number of allocations 3992 * to report at the end. Once we have a list of caches, we walk through the 3993 * allocated bufctls of each, and update our hash table accordingly. 3994 */ 3995 3996 typedef struct kmowner { 3997 struct kmowner *kmo_head; /* First hash elt in bucket */ 3998 struct kmowner *kmo_next; /* Next hash elt in chain */ 3999 size_t kmo_signature; /* Hash table signature */ 4000 uint_t kmo_num; /* Number of allocations */ 4001 size_t kmo_data_size; /* Size of each allocation */ 4002 size_t kmo_total_size; /* Total bytes of allocation */ 4003 int kmo_depth; /* Depth of stack trace */ 4004 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 4005 } kmowner_t; 4006 4007 typedef struct kmusers { 4008 uintptr_t kmu_addr; /* address of interest */ 4009 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 4010 kmowner_t *kmu_hash; /* Hash table of owners */ 4011 int kmu_nelems; /* Number of entries in use */ 4012 int kmu_size; /* Total number of entries */ 4013 } kmusers_t; 4014 4015 static void 4016 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 4017 size_t size, size_t data_size) 4018 { 4019 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4020 size_t bucket, signature = data_size; 4021 kmowner_t *kmo, *kmoend; 4022 4023 /* 4024 * If the hash table is full, double its size and rehash everything. 4025 */ 4026 if (kmu->kmu_nelems >= kmu->kmu_size) { 4027 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 4028 4029 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 4030 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 4031 kmu->kmu_hash = kmo; 4032 kmu->kmu_size = s; 4033 4034 kmoend = kmu->kmu_hash + kmu->kmu_size; 4035 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 4036 kmo->kmo_head = NULL; 4037 4038 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 4039 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 4040 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 4041 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4042 kmu->kmu_hash[bucket].kmo_head = kmo; 4043 } 4044 } 4045 4046 /* 4047 * Finish computing the hash signature from the stack trace, and then 4048 * see if the owner is in the hash table. If so, update our stats. 4049 */ 4050 for (i = 0; i < depth; i++) 4051 signature += bcp->bc_stack[i]; 4052 4053 bucket = signature & (kmu->kmu_size - 1); 4054 4055 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4056 if (kmo->kmo_signature == signature) { 4057 size_t difference = 0; 4058 4059 difference |= kmo->kmo_data_size - data_size; 4060 difference |= kmo->kmo_depth - depth; 4061 4062 for (i = 0; i < depth; i++) { 4063 difference |= kmo->kmo_stack[i] - 4064 bcp->bc_stack[i]; 4065 } 4066 4067 if (difference == 0) { 4068 kmo->kmo_total_size += size; 4069 kmo->kmo_num++; 4070 return; 4071 } 4072 } 4073 } 4074 4075 /* 4076 * If the owner is not yet hashed, grab the next element and fill it 4077 * in based on the allocation information. 4078 */ 4079 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4080 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4081 kmu->kmu_hash[bucket].kmo_head = kmo; 4082 4083 kmo->kmo_signature = signature; 4084 kmo->kmo_num = 1; 4085 kmo->kmo_data_size = data_size; 4086 kmo->kmo_total_size = size; 4087 kmo->kmo_depth = depth; 4088 4089 for (i = 0; i < depth; i++) 4090 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4091 } 4092 4093 /* 4094 * When ::kmausers is invoked without the -f flag, we simply update our hash 4095 * table with the information from each allocated bufctl. 4096 */ 4097 /*ARGSUSED*/ 4098 static int 4099 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4100 { 4101 const kmem_cache_t *cp = kmu->kmu_cache; 4102 4103 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4104 return (WALK_NEXT); 4105 } 4106 4107 /* 4108 * When ::kmausers is invoked with the -f flag, we print out the information 4109 * for each bufctl as well as updating the hash table. 4110 */ 4111 static int 4112 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4113 { 4114 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4115 const kmem_cache_t *cp = kmu->kmu_cache; 4116 kmem_bufctl_t bufctl; 4117 4118 if (kmu->kmu_addr) { 4119 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4120 mdb_warn("couldn't read bufctl at %p", addr); 4121 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4122 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4123 cp->cache_bufsize) 4124 return (WALK_NEXT); 4125 } 4126 4127 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4128 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4129 4130 for (i = 0; i < depth; i++) 4131 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4132 4133 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4134 return (WALK_NEXT); 4135 } 4136 4137 /* 4138 * We sort our results by allocation size before printing them. 4139 */ 4140 static int 4141 kmownercmp(const void *lp, const void *rp) 4142 { 4143 const kmowner_t *lhs = lp; 4144 const kmowner_t *rhs = rp; 4145 4146 return (rhs->kmo_total_size - lhs->kmo_total_size); 4147 } 4148 4149 /* 4150 * The main engine of ::kmausers is relatively straightforward: First we 4151 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4152 * iterate over the allocated bufctls of each cache in the list. Finally, 4153 * we sort and print our results. 4154 */ 4155 /*ARGSUSED*/ 4156 int 4157 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4158 { 4159 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4160 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4161 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4162 int do_all_caches = 1; /* Do all caches (no arguments) */ 4163 int opt_e = FALSE; /* Include "small" users */ 4164 int opt_f = FALSE; /* Print stack traces */ 4165 4166 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4167 kmowner_t *kmo, *kmoend; 4168 int i, oelems; 4169 4170 kmclist_t kmc; 4171 kmusers_t kmu; 4172 4173 bzero(&kmc, sizeof (kmc)); 4174 bzero(&kmu, sizeof (kmu)); 4175 4176 while ((i = mdb_getopts(argc, argv, 4177 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4178 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4179 4180 argv += i; /* skip past options we just processed */ 4181 argc -= i; /* adjust argc */ 4182 4183 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4184 return (DCMD_USAGE); 4185 4186 oelems = kmc.kmc_nelems; 4187 kmc.kmc_name = argv->a_un.a_str; 4188 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4189 4190 if (kmc.kmc_nelems == oelems) { 4191 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4192 return (DCMD_ERR); 4193 } 4194 4195 do_all_caches = 0; 4196 argv++; 4197 argc--; 4198 } 4199 4200 if (flags & DCMD_ADDRSPEC) { 4201 opt_f = TRUE; 4202 kmu.kmu_addr = addr; 4203 } else { 4204 kmu.kmu_addr = NULL; 4205 } 4206 4207 if (opt_e) 4208 mem_threshold = cnt_threshold = 0; 4209 4210 if (opt_f) 4211 callback = (mdb_walk_cb_t)kmause2; 4212 4213 if (do_all_caches) { 4214 kmc.kmc_name = NULL; /* match all cache names */ 4215 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4216 } 4217 4218 for (i = 0; i < kmc.kmc_nelems; i++) { 4219 uintptr_t cp = kmc.kmc_caches[i]; 4220 kmem_cache_t c; 4221 4222 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4223 mdb_warn("failed to read cache at %p", cp); 4224 continue; 4225 } 4226 4227 if (!(c.cache_flags & KMF_AUDIT)) { 4228 if (!do_all_caches) { 4229 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4230 c.cache_name); 4231 } 4232 continue; 4233 } 4234 4235 kmu.kmu_cache = &c; 4236 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4237 audited_caches++; 4238 } 4239 4240 if (audited_caches == 0 && do_all_caches) { 4241 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4242 return (DCMD_ERR); 4243 } 4244 4245 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4246 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4247 4248 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4249 if (kmo->kmo_total_size < mem_threshold && 4250 kmo->kmo_num < cnt_threshold) 4251 continue; 4252 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4253 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4254 for (i = 0; i < kmo->kmo_depth; i++) 4255 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4256 } 4257 4258 return (DCMD_OK); 4259 } 4260 4261 void 4262 kmausers_help(void) 4263 { 4264 mdb_printf( 4265 "Displays the largest users of the kmem allocator, sorted by \n" 4266 "trace. If one or more caches is specified, only those caches\n" 4267 "will be searched. By default, all caches are searched. If an\n" 4268 "address is specified, then only those allocations which include\n" 4269 "the given address are displayed. Specifying an address implies\n" 4270 "-f.\n" 4271 "\n" 4272 "\t-e\tInclude all users, not just the largest\n" 4273 "\t-f\tDisplay individual allocations. By default, users are\n" 4274 "\t\tgrouped by stack\n"); 4275 } 4276 4277 static int 4278 kmem_ready_check(void) 4279 { 4280 int ready; 4281 4282 if (mdb_readvar(&ready, "kmem_ready") < 0) 4283 return (-1); /* errno is set for us */ 4284 4285 return (ready); 4286 } 4287 4288 void 4289 kmem_statechange(void) 4290 { 4291 static int been_ready = 0; 4292 4293 if (been_ready) 4294 return; 4295 4296 if (kmem_ready_check() <= 0) 4297 return; 4298 4299 been_ready = 1; 4300 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4301 } 4302 4303 void 4304 kmem_init(void) 4305 { 4306 mdb_walker_t w = { 4307 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4308 list_walk_step, list_walk_fini 4309 }; 4310 4311 /* 4312 * If kmem is ready, we'll need to invoke the kmem_cache walker 4313 * immediately. Walkers in the linkage structure won't be ready until 4314 * _mdb_init returns, so we'll need to add this one manually. If kmem 4315 * is ready, we'll use the walker to initialize the caches. If kmem 4316 * isn't ready, we'll register a callback that will allow us to defer 4317 * cache walking until it is. 4318 */ 4319 if (mdb_add_walker(&w) != 0) { 4320 mdb_warn("failed to add kmem_cache walker"); 4321 return; 4322 } 4323 4324 kmem_statechange(); 4325 } 4326 4327 typedef struct whatthread { 4328 uintptr_t wt_target; 4329 int wt_verbose; 4330 } whatthread_t; 4331 4332 static int 4333 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4334 { 4335 uintptr_t current, data; 4336 4337 if (t->t_stkbase == NULL) 4338 return (WALK_NEXT); 4339 4340 /* 4341 * Warn about swapped out threads, but drive on anyway 4342 */ 4343 if (!(t->t_schedflag & TS_LOAD)) { 4344 mdb_warn("thread %p's stack swapped out\n", addr); 4345 return (WALK_NEXT); 4346 } 4347 4348 /* 4349 * Search the thread's stack for the given pointer. Note that it would 4350 * be more efficient to follow ::kgrep's lead and read in page-sized 4351 * chunks, but this routine is already fast and simple. 4352 */ 4353 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4354 current += sizeof (uintptr_t)) { 4355 if (mdb_vread(&data, sizeof (data), current) == -1) { 4356 mdb_warn("couldn't read thread %p's stack at %p", 4357 addr, current); 4358 return (WALK_ERR); 4359 } 4360 4361 if (data == w->wt_target) { 4362 if (w->wt_verbose) { 4363 mdb_printf("%p in thread %p's stack%s\n", 4364 current, addr, stack_active(t, current)); 4365 } else { 4366 mdb_printf("%#lr\n", addr); 4367 return (WALK_NEXT); 4368 } 4369 } 4370 } 4371 4372 return (WALK_NEXT); 4373 } 4374 4375 int 4376 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4377 { 4378 whatthread_t w; 4379 4380 if (!(flags & DCMD_ADDRSPEC)) 4381 return (DCMD_USAGE); 4382 4383 w.wt_verbose = FALSE; 4384 w.wt_target = addr; 4385 4386 if (mdb_getopts(argc, argv, 4387 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4388 return (DCMD_USAGE); 4389 4390 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4391 == -1) { 4392 mdb_warn("couldn't walk threads"); 4393 return (DCMD_ERR); 4394 } 4395 4396 return (DCMD_OK); 4397 } 4398