1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2011 Joyent, Inc. All rights reserved. 28 */ 29 30 #include <mdb/mdb_param.h> 31 #include <mdb/mdb_modapi.h> 32 #include <mdb/mdb_ctf.h> 33 #include <mdb/mdb_whatis.h> 34 #include <sys/cpuvar.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/vmem_impl.h> 37 #include <sys/machelf.h> 38 #include <sys/modctl.h> 39 #include <sys/kobj.h> 40 #include <sys/panic.h> 41 #include <sys/stack.h> 42 #include <sys/sysmacros.h> 43 #include <vm/page.h> 44 45 #include "avl.h" 46 #include "combined.h" 47 #include "dist.h" 48 #include "kmem.h" 49 #include "list.h" 50 51 #define dprintf(x) if (mdb_debug_level) { \ 52 mdb_printf("kmem debug: "); \ 53 /*CSTYLED*/\ 54 mdb_printf x ;\ 55 } 56 57 #define KM_ALLOCATED 0x01 58 #define KM_FREE 0x02 59 #define KM_BUFCTL 0x04 60 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 61 #define KM_HASH 0x10 62 63 static int mdb_debug_level = 0; 64 65 /*ARGSUSED*/ 66 static int 67 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 68 { 69 mdb_walker_t w; 70 char descr[64]; 71 72 (void) mdb_snprintf(descr, sizeof (descr), 73 "walk the %s cache", c->cache_name); 74 75 w.walk_name = c->cache_name; 76 w.walk_descr = descr; 77 w.walk_init = kmem_walk_init; 78 w.walk_step = kmem_walk_step; 79 w.walk_fini = kmem_walk_fini; 80 w.walk_init_arg = (void *)addr; 81 82 if (mdb_add_walker(&w) == -1) 83 mdb_warn("failed to add %s walker", c->cache_name); 84 85 return (WALK_NEXT); 86 } 87 88 /*ARGSUSED*/ 89 int 90 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 91 { 92 mdb_debug_level ^= 1; 93 94 mdb_printf("kmem: debugging is now %s\n", 95 mdb_debug_level ? "on" : "off"); 96 97 return (DCMD_OK); 98 } 99 100 int 101 kmem_cache_walk_init(mdb_walk_state_t *wsp) 102 { 103 GElf_Sym sym; 104 105 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 106 mdb_warn("couldn't find kmem_caches"); 107 return (WALK_ERR); 108 } 109 110 wsp->walk_addr = (uintptr_t)sym.st_value; 111 112 return (list_walk_init_named(wsp, "cache list", "cache")); 113 } 114 115 int 116 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 117 { 118 if (wsp->walk_addr == NULL) { 119 mdb_warn("kmem_cpu_cache doesn't support global walks"); 120 return (WALK_ERR); 121 } 122 123 if (mdb_layered_walk("cpu", wsp) == -1) { 124 mdb_warn("couldn't walk 'cpu'"); 125 return (WALK_ERR); 126 } 127 128 wsp->walk_data = (void *)wsp->walk_addr; 129 130 return (WALK_NEXT); 131 } 132 133 int 134 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 135 { 136 uintptr_t caddr = (uintptr_t)wsp->walk_data; 137 const cpu_t *cpu = wsp->walk_layer; 138 kmem_cpu_cache_t cc; 139 140 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]); 141 142 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 143 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 144 return (WALK_ERR); 145 } 146 147 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 148 } 149 150 static int 151 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 152 { 153 kmem_slab_t *sp = p; 154 uintptr_t caddr = (uintptr_t)arg; 155 if ((uintptr_t)sp->slab_cache != caddr) { 156 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 157 saddr, caddr, sp->slab_cache); 158 return (-1); 159 } 160 161 return (0); 162 } 163 164 static int 165 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 166 { 167 kmem_slab_t *sp = p; 168 169 int rc = kmem_slab_check(p, saddr, arg); 170 if (rc != 0) { 171 return (rc); 172 } 173 174 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 175 mdb_warn("slab %p is not a partial slab\n", saddr); 176 return (-1); 177 } 178 179 return (0); 180 } 181 182 static int 183 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 184 { 185 kmem_slab_t *sp = p; 186 187 int rc = kmem_slab_check(p, saddr, arg); 188 if (rc != 0) { 189 return (rc); 190 } 191 192 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 193 mdb_warn("slab %p is not completely allocated\n", saddr); 194 return (-1); 195 } 196 197 return (0); 198 } 199 200 typedef struct { 201 uintptr_t kns_cache_addr; 202 int kns_nslabs; 203 } kmem_nth_slab_t; 204 205 static int 206 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 207 { 208 kmem_nth_slab_t *chkp = arg; 209 210 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 211 if (rc != 0) { 212 return (rc); 213 } 214 215 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 216 } 217 218 static int 219 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 220 { 221 uintptr_t caddr = wsp->walk_addr; 222 223 wsp->walk_addr = (uintptr_t)(caddr + 224 offsetof(kmem_cache_t, cache_complete_slabs)); 225 226 return (list_walk_init_checked(wsp, "slab list", "slab", 227 kmem_complete_slab_check, (void *)caddr)); 228 } 229 230 static int 231 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 232 { 233 uintptr_t caddr = wsp->walk_addr; 234 235 wsp->walk_addr = (uintptr_t)(caddr + 236 offsetof(kmem_cache_t, cache_partial_slabs)); 237 238 return (avl_walk_init_checked(wsp, "slab list", "slab", 239 kmem_partial_slab_check, (void *)caddr)); 240 } 241 242 int 243 kmem_slab_walk_init(mdb_walk_state_t *wsp) 244 { 245 uintptr_t caddr = wsp->walk_addr; 246 247 if (caddr == NULL) { 248 mdb_warn("kmem_slab doesn't support global walks\n"); 249 return (WALK_ERR); 250 } 251 252 combined_walk_init(wsp); 253 combined_walk_add(wsp, 254 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 255 combined_walk_add(wsp, 256 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 257 258 return (WALK_NEXT); 259 } 260 261 static int 262 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 263 { 264 uintptr_t caddr = wsp->walk_addr; 265 kmem_nth_slab_t *chk; 266 267 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 268 UM_SLEEP | UM_GC); 269 chk->kns_cache_addr = caddr; 270 chk->kns_nslabs = 1; 271 wsp->walk_addr = (uintptr_t)(caddr + 272 offsetof(kmem_cache_t, cache_complete_slabs)); 273 274 return (list_walk_init_checked(wsp, "slab list", "slab", 275 kmem_nth_slab_check, chk)); 276 } 277 278 int 279 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 280 { 281 uintptr_t caddr = wsp->walk_addr; 282 kmem_cache_t c; 283 284 if (caddr == NULL) { 285 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 286 return (WALK_ERR); 287 } 288 289 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 290 mdb_warn("couldn't read kmem_cache at %p", caddr); 291 return (WALK_ERR); 292 } 293 294 combined_walk_init(wsp); 295 296 /* 297 * Some consumers (umem_walk_step(), in particular) require at 298 * least one callback if there are any buffers in the cache. So 299 * if there are *no* partial slabs, report the first full slab, if 300 * any. 301 * 302 * Yes, this is ugly, but it's cleaner than the other possibilities. 303 */ 304 if (c.cache_partial_slabs.avl_numnodes == 0) { 305 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 306 list_walk_step, list_walk_fini); 307 } else { 308 combined_walk_add(wsp, kmem_partial_slab_walk_init, 309 avl_walk_step, avl_walk_fini); 310 } 311 312 return (WALK_NEXT); 313 } 314 315 int 316 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 317 { 318 kmem_cache_t c; 319 const char *filter = NULL; 320 321 if (mdb_getopts(ac, argv, 322 'n', MDB_OPT_STR, &filter, 323 NULL) != ac) { 324 return (DCMD_USAGE); 325 } 326 327 if (!(flags & DCMD_ADDRSPEC)) { 328 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 329 mdb_warn("can't walk kmem_cache"); 330 return (DCMD_ERR); 331 } 332 return (DCMD_OK); 333 } 334 335 if (DCMD_HDRSPEC(flags)) 336 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 337 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 338 339 if (mdb_vread(&c, sizeof (c), addr) == -1) { 340 mdb_warn("couldn't read kmem_cache at %p", addr); 341 return (DCMD_ERR); 342 } 343 344 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 345 return (DCMD_OK); 346 347 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 348 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 349 350 return (DCMD_OK); 351 } 352 353 void 354 kmem_cache_help(void) 355 { 356 mdb_printf("%s", "Print kernel memory caches.\n\n"); 357 mdb_dec_indent(2); 358 mdb_printf("%<b>OPTIONS%</b>\n"); 359 mdb_inc_indent(2); 360 mdb_printf("%s", 361 " -n name\n" 362 " name of kmem cache (or matching partial name)\n" 363 "\n" 364 "Column\tDescription\n" 365 "\n" 366 "ADDR\t\taddress of kmem cache\n" 367 "NAME\t\tname of kmem cache\n" 368 "FLAG\t\tvarious cache state flags\n" 369 "CFLAG\t\tcache creation flags\n" 370 "BUFSIZE\tobject size in bytes\n" 371 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 372 } 373 374 #define LABEL_WIDTH 11 375 static void 376 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 377 size_t maxbuckets, size_t minbucketsize) 378 { 379 uint64_t total; 380 int buckets; 381 int i; 382 const int *distarray; 383 int complete[2]; 384 385 buckets = buffers_per_slab; 386 387 total = 0; 388 for (i = 0; i <= buffers_per_slab; i++) 389 total += ks_bucket[i]; 390 391 if (maxbuckets > 1) 392 buckets = MIN(buckets, maxbuckets); 393 394 if (minbucketsize > 1) { 395 /* 396 * minbucketsize does not apply to the first bucket reserved 397 * for completely allocated slabs 398 */ 399 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 400 minbucketsize)); 401 if ((buckets < 2) && (buffers_per_slab > 1)) { 402 buckets = 2; 403 minbucketsize = (buffers_per_slab - 1); 404 } 405 } 406 407 /* 408 * The first printed bucket is reserved for completely allocated slabs. 409 * Passing (buckets - 1) excludes that bucket from the generated 410 * distribution, since we're handling it as a special case. 411 */ 412 complete[0] = buffers_per_slab; 413 complete[1] = buffers_per_slab + 1; 414 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 415 416 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 417 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 418 419 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 420 /* 421 * Print bucket ranges in descending order after the first bucket for 422 * completely allocated slabs, so a person can see immediately whether 423 * or not there is fragmentation without having to scan possibly 424 * multiple screens of output. Starting at (buckets - 2) excludes the 425 * extra terminating bucket. 426 */ 427 for (i = buckets - 2; i >= 0; i--) { 428 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 429 } 430 mdb_printf("\n"); 431 } 432 #undef LABEL_WIDTH 433 434 /*ARGSUSED*/ 435 static int 436 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 437 { 438 *is_slab = B_TRUE; 439 return (WALK_DONE); 440 } 441 442 /*ARGSUSED*/ 443 static int 444 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 445 boolean_t *is_slab) 446 { 447 /* 448 * The "kmem_partial_slab" walker reports the first full slab if there 449 * are no partial slabs (for the sake of consumers that require at least 450 * one callback if there are any buffers in the cache). 451 */ 452 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 453 return (WALK_DONE); 454 } 455 456 typedef struct kmem_slab_usage { 457 int ksu_refcnt; /* count of allocated buffers on slab */ 458 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 459 } kmem_slab_usage_t; 460 461 typedef struct kmem_slab_stats { 462 const kmem_cache_t *ks_cp; 463 int ks_slabs; /* slabs in cache */ 464 int ks_partial_slabs; /* partially allocated slabs in cache */ 465 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 466 int ks_max_buffers_per_slab; /* max buffers per slab */ 467 int ks_usage_len; /* ks_usage array length */ 468 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 469 uint_t *ks_bucket; /* slab usage distribution */ 470 } kmem_slab_stats_t; 471 472 /*ARGSUSED*/ 473 static int 474 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 475 kmem_slab_stats_t *ks) 476 { 477 kmem_slab_usage_t *ksu; 478 long unused; 479 480 ks->ks_slabs++; 481 ks->ks_bucket[sp->slab_refcnt]++; 482 483 unused = (sp->slab_chunks - sp->slab_refcnt); 484 if (unused == 0) { 485 return (WALK_NEXT); 486 } 487 488 ks->ks_partial_slabs++; 489 ks->ks_unused_buffers += unused; 490 491 if (ks->ks_partial_slabs > ks->ks_usage_len) { 492 kmem_slab_usage_t *usage; 493 int len = ks->ks_usage_len; 494 495 len = (len == 0 ? 16 : len * 2); 496 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 497 if (ks->ks_usage != NULL) { 498 bcopy(ks->ks_usage, usage, 499 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 500 mdb_free(ks->ks_usage, 501 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 502 } 503 ks->ks_usage = usage; 504 ks->ks_usage_len = len; 505 } 506 507 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 508 ksu->ksu_refcnt = sp->slab_refcnt; 509 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 510 return (WALK_NEXT); 511 } 512 513 static void 514 kmem_slabs_header() 515 { 516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 517 "", "", "Partial", "", "Unused", ""); 518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 519 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 520 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 521 "-------------------------", "--------", "--------", "---------", 522 "---------", "------"); 523 } 524 525 int 526 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 527 { 528 kmem_cache_t c; 529 kmem_slab_stats_t stats; 530 mdb_walk_cb_t cb; 531 int pct; 532 int tenths_pct; 533 size_t maxbuckets = 1; 534 size_t minbucketsize = 0; 535 const char *filter = NULL; 536 const char *name = NULL; 537 uint_t opt_v = FALSE; 538 boolean_t buckets = B_FALSE; 539 boolean_t skip = B_FALSE; 540 541 if (mdb_getopts(argc, argv, 542 'B', MDB_OPT_UINTPTR, &minbucketsize, 543 'b', MDB_OPT_UINTPTR, &maxbuckets, 544 'n', MDB_OPT_STR, &filter, 545 'N', MDB_OPT_STR, &name, 546 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 547 NULL) != argc) { 548 return (DCMD_USAGE); 549 } 550 551 if ((maxbuckets != 1) || (minbucketsize != 0)) { 552 buckets = B_TRUE; 553 } 554 555 if (!(flags & DCMD_ADDRSPEC)) { 556 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 557 argv) == -1) { 558 mdb_warn("can't walk kmem_cache"); 559 return (DCMD_ERR); 560 } 561 return (DCMD_OK); 562 } 563 564 if (mdb_vread(&c, sizeof (c), addr) == -1) { 565 mdb_warn("couldn't read kmem_cache at %p", addr); 566 return (DCMD_ERR); 567 } 568 569 if (name == NULL) { 570 skip = ((filter != NULL) && 571 (strstr(c.cache_name, filter) == NULL)); 572 } else if (filter == NULL) { 573 skip = (strcmp(c.cache_name, name) != 0); 574 } else { 575 /* match either -n or -N */ 576 skip = ((strcmp(c.cache_name, name) != 0) && 577 (strstr(c.cache_name, filter) == NULL)); 578 } 579 580 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 581 kmem_slabs_header(); 582 } else if ((opt_v || buckets) && !skip) { 583 if (DCMD_HDRSPEC(flags)) { 584 kmem_slabs_header(); 585 } else { 586 boolean_t is_slab = B_FALSE; 587 const char *walker_name; 588 if (opt_v) { 589 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 590 walker_name = "kmem_slab_partial"; 591 } else { 592 cb = (mdb_walk_cb_t)kmem_first_slab; 593 walker_name = "kmem_slab"; 594 } 595 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 596 if (is_slab) { 597 kmem_slabs_header(); 598 } 599 } 600 } 601 602 if (skip) { 603 return (DCMD_OK); 604 } 605 606 bzero(&stats, sizeof (kmem_slab_stats_t)); 607 stats.ks_cp = &c; 608 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 609 /* +1 to include a zero bucket */ 610 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 611 sizeof (*stats.ks_bucket), UM_SLEEP); 612 cb = (mdb_walk_cb_t)kmem_slablist_stat; 613 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 614 615 if (c.cache_buftotal == 0) { 616 pct = 0; 617 tenths_pct = 0; 618 } else { 619 uint64_t n = stats.ks_unused_buffers * 10000; 620 pct = (int)(n / c.cache_buftotal); 621 tenths_pct = pct - ((pct / 100) * 100); 622 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 623 if (tenths_pct == 10) { 624 pct += 100; 625 tenths_pct = 0; 626 } 627 } 628 629 pct /= 100; 630 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 631 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 632 stats.ks_unused_buffers, pct, tenths_pct); 633 634 if (maxbuckets == 0) { 635 maxbuckets = stats.ks_max_buffers_per_slab; 636 } 637 638 if (((maxbuckets > 1) || (minbucketsize > 0)) && 639 (stats.ks_slabs > 0)) { 640 mdb_printf("\n"); 641 kmem_slabs_print_dist(stats.ks_bucket, 642 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 643 } 644 645 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 646 sizeof (*stats.ks_bucket)); 647 648 if (!opt_v) { 649 return (DCMD_OK); 650 } 651 652 if (opt_v && (stats.ks_partial_slabs > 0)) { 653 int i; 654 kmem_slab_usage_t *ksu; 655 656 mdb_printf(" %d complete (%d), %d partial:", 657 (stats.ks_slabs - stats.ks_partial_slabs), 658 stats.ks_max_buffers_per_slab, 659 stats.ks_partial_slabs); 660 661 for (i = 0; i < stats.ks_partial_slabs; i++) { 662 ksu = &stats.ks_usage[i]; 663 mdb_printf(" %d%s", ksu->ksu_refcnt, 664 (ksu->ksu_nomove ? "*" : "")); 665 } 666 mdb_printf("\n\n"); 667 } 668 669 if (stats.ks_usage_len > 0) { 670 mdb_free(stats.ks_usage, 671 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 672 } 673 674 return (DCMD_OK); 675 } 676 677 void 678 kmem_slabs_help(void) 679 { 680 mdb_printf("%s", 681 "Display slab usage per kmem cache.\n\n"); 682 mdb_dec_indent(2); 683 mdb_printf("%<b>OPTIONS%</b>\n"); 684 mdb_inc_indent(2); 685 mdb_printf("%s", 686 " -n name\n" 687 " name of kmem cache (or matching partial name)\n" 688 " -N name\n" 689 " exact name of kmem cache\n" 690 " -b maxbins\n" 691 " Print a distribution of allocated buffers per slab using at\n" 692 " most maxbins bins. The first bin is reserved for completely\n" 693 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 694 " effect as specifying the maximum allocated buffers per slab\n" 695 " or setting minbinsize to 1 (-B 1).\n" 696 " -B minbinsize\n" 697 " Print a distribution of allocated buffers per slab, making\n" 698 " all bins (except the first, reserved for completely allocated\n" 699 " slabs) at least minbinsize buffers apart.\n" 700 " -v verbose output: List the allocated buffer count of each partial\n" 701 " slab on the free list in order from front to back to show how\n" 702 " closely the slabs are ordered by usage. For example\n" 703 "\n" 704 " 10 complete, 3 partial (8): 7 3 1\n" 705 "\n" 706 " means there are thirteen slabs with eight buffers each, including\n" 707 " three partially allocated slabs with less than all eight buffers\n" 708 " allocated.\n" 709 "\n" 710 " Buffer allocations are always from the front of the partial slab\n" 711 " list. When a buffer is freed from a completely used slab, that\n" 712 " slab is added to the front of the partial slab list. Assuming\n" 713 " that all buffers are equally likely to be freed soon, the\n" 714 " desired order of partial slabs is most-used at the front of the\n" 715 " list and least-used at the back (as in the example above).\n" 716 " However, if a slab contains an allocated buffer that will not\n" 717 " soon be freed, it would be better for that slab to be at the\n" 718 " front where all of its buffers can be allocated. Taking a slab\n" 719 " off the partial slab list (either with all buffers freed or all\n" 720 " buffers allocated) reduces cache fragmentation.\n" 721 "\n" 722 " A slab's allocated buffer count representing a partial slab (9 in\n" 723 " the example below) may be marked as follows:\n" 724 "\n" 725 " 9* An asterisk indicates that kmem has marked the slab non-\n" 726 " reclaimable because the kmem client refused to move one of the\n" 727 " slab's buffers. Since kmem does not expect to completely free the\n" 728 " slab, it moves it to the front of the list in the hope of\n" 729 " completely allocating it instead. A slab marked with an asterisk\n" 730 " stays marked for as long as it remains on the partial slab list.\n" 731 "\n" 732 "Column\t\tDescription\n" 733 "\n" 734 "Cache Name\t\tname of kmem cache\n" 735 "Slabs\t\t\ttotal slab count\n" 736 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 737 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 738 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 739 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 740 "\t\t\t for accounting structures (debug mode), slab\n" 741 "\t\t\t coloring (incremental small offsets to stagger\n" 742 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 743 } 744 745 static int 746 addrcmp(const void *lhs, const void *rhs) 747 { 748 uintptr_t p1 = *((uintptr_t *)lhs); 749 uintptr_t p2 = *((uintptr_t *)rhs); 750 751 if (p1 < p2) 752 return (-1); 753 if (p1 > p2) 754 return (1); 755 return (0); 756 } 757 758 static int 759 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 760 { 761 const kmem_bufctl_audit_t *bcp1 = *lhs; 762 const kmem_bufctl_audit_t *bcp2 = *rhs; 763 764 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 765 return (-1); 766 767 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 768 return (1); 769 770 return (0); 771 } 772 773 typedef struct kmem_hash_walk { 774 uintptr_t *kmhw_table; 775 size_t kmhw_nelems; 776 size_t kmhw_pos; 777 kmem_bufctl_t kmhw_cur; 778 } kmem_hash_walk_t; 779 780 int 781 kmem_hash_walk_init(mdb_walk_state_t *wsp) 782 { 783 kmem_hash_walk_t *kmhw; 784 uintptr_t *hash; 785 kmem_cache_t c; 786 uintptr_t haddr, addr = wsp->walk_addr; 787 size_t nelems; 788 size_t hsize; 789 790 if (addr == NULL) { 791 mdb_warn("kmem_hash doesn't support global walks\n"); 792 return (WALK_ERR); 793 } 794 795 if (mdb_vread(&c, sizeof (c), addr) == -1) { 796 mdb_warn("couldn't read cache at addr %p", addr); 797 return (WALK_ERR); 798 } 799 800 if (!(c.cache_flags & KMF_HASH)) { 801 mdb_warn("cache %p doesn't have a hash table\n", addr); 802 return (WALK_DONE); /* nothing to do */ 803 } 804 805 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 806 kmhw->kmhw_cur.bc_next = NULL; 807 kmhw->kmhw_pos = 0; 808 809 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 810 hsize = nelems * sizeof (uintptr_t); 811 haddr = (uintptr_t)c.cache_hash_table; 812 813 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 814 if (mdb_vread(hash, hsize, haddr) == -1) { 815 mdb_warn("failed to read hash table at %p", haddr); 816 mdb_free(hash, hsize); 817 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 818 return (WALK_ERR); 819 } 820 821 wsp->walk_data = kmhw; 822 823 return (WALK_NEXT); 824 } 825 826 int 827 kmem_hash_walk_step(mdb_walk_state_t *wsp) 828 { 829 kmem_hash_walk_t *kmhw = wsp->walk_data; 830 uintptr_t addr = NULL; 831 832 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 833 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 834 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 835 break; 836 } 837 } 838 if (addr == NULL) 839 return (WALK_DONE); 840 841 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 842 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 843 return (WALK_ERR); 844 } 845 846 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 847 } 848 849 void 850 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 851 { 852 kmem_hash_walk_t *kmhw = wsp->walk_data; 853 854 if (kmhw == NULL) 855 return; 856 857 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 858 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 859 } 860 861 /* 862 * Find the address of the bufctl structure for the address 'buf' in cache 863 * 'cp', which is at address caddr, and place it in *out. 864 */ 865 static int 866 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 867 { 868 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 869 kmem_bufctl_t *bcp; 870 kmem_bufctl_t bc; 871 872 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 873 mdb_warn("unable to read hash bucket for %p in cache %p", 874 buf, caddr); 875 return (-1); 876 } 877 878 while (bcp != NULL) { 879 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 880 (uintptr_t)bcp) == -1) { 881 mdb_warn("unable to read bufctl at %p", bcp); 882 return (-1); 883 } 884 if (bc.bc_addr == buf) { 885 *out = (uintptr_t)bcp; 886 return (0); 887 } 888 bcp = bc.bc_next; 889 } 890 891 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 892 return (-1); 893 } 894 895 int 896 kmem_get_magsize(const kmem_cache_t *cp) 897 { 898 uintptr_t addr = (uintptr_t)cp->cache_magtype; 899 GElf_Sym mt_sym; 900 kmem_magtype_t mt; 901 int res; 902 903 /* 904 * if cpu 0 has a non-zero magsize, it must be correct. caches 905 * with KMF_NOMAGAZINE have disabled their magazine layers, so 906 * it is okay to return 0 for them. 907 */ 908 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 909 (cp->cache_flags & KMF_NOMAGAZINE)) 910 return (res); 911 912 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 913 mdb_warn("unable to read 'kmem_magtype'"); 914 } else if (addr < mt_sym.st_value || 915 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 916 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 917 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 918 cp->cache_name, addr); 919 return (0); 920 } 921 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 922 mdb_warn("unable to read magtype at %a", addr); 923 return (0); 924 } 925 return (mt.mt_magsize); 926 } 927 928 /*ARGSUSED*/ 929 static int 930 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 931 { 932 *est -= (sp->slab_chunks - sp->slab_refcnt); 933 934 return (WALK_NEXT); 935 } 936 937 /* 938 * Returns an upper bound on the number of allocated buffers in a given 939 * cache. 940 */ 941 size_t 942 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 943 { 944 int magsize; 945 size_t cache_est; 946 947 cache_est = cp->cache_buftotal; 948 949 (void) mdb_pwalk("kmem_slab_partial", 950 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 951 952 if ((magsize = kmem_get_magsize(cp)) != 0) { 953 size_t mag_est = cp->cache_full.ml_total * magsize; 954 955 if (cache_est >= mag_est) { 956 cache_est -= mag_est; 957 } else { 958 mdb_warn("cache %p's magazine layer holds more buffers " 959 "than the slab layer.\n", addr); 960 } 961 } 962 return (cache_est); 963 } 964 965 #define READMAG_ROUNDS(rounds) { \ 966 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 967 mdb_warn("couldn't read magazine at %p", kmp); \ 968 goto fail; \ 969 } \ 970 for (i = 0; i < rounds; i++) { \ 971 maglist[magcnt++] = mp->mag_round[i]; \ 972 if (magcnt == magmax) { \ 973 mdb_warn("%d magazines exceeds fudge factor\n", \ 974 magcnt); \ 975 goto fail; \ 976 } \ 977 } \ 978 } 979 980 int 981 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 982 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 983 { 984 kmem_magazine_t *kmp, *mp; 985 void **maglist = NULL; 986 int i, cpu; 987 size_t magsize, magmax, magbsize; 988 size_t magcnt = 0; 989 990 /* 991 * Read the magtype out of the cache, after verifying the pointer's 992 * correctness. 993 */ 994 magsize = kmem_get_magsize(cp); 995 if (magsize == 0) { 996 *maglistp = NULL; 997 *magcntp = 0; 998 *magmaxp = 0; 999 return (WALK_NEXT); 1000 } 1001 1002 /* 1003 * There are several places where we need to go buffer hunting: 1004 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1005 * and the full magazine list in the depot. 1006 * 1007 * For an upper bound on the number of buffers in the magazine 1008 * layer, we have the number of magazines on the cache_full 1009 * list plus at most two magazines per CPU (the loaded and the 1010 * spare). Toss in 100 magazines as a fudge factor in case this 1011 * is live (the number "100" comes from the same fudge factor in 1012 * crash(1M)). 1013 */ 1014 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1015 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1016 1017 if (magbsize >= PAGESIZE / 2) { 1018 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1019 addr, magbsize); 1020 return (WALK_ERR); 1021 } 1022 1023 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1024 mp = mdb_alloc(magbsize, alloc_flags); 1025 if (mp == NULL || maglist == NULL) 1026 goto fail; 1027 1028 /* 1029 * First up: the magazines in the depot (i.e. on the cache_full list). 1030 */ 1031 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1032 READMAG_ROUNDS(magsize); 1033 kmp = mp->mag_next; 1034 1035 if (kmp == cp->cache_full.ml_list) 1036 break; /* cache_full list loop detected */ 1037 } 1038 1039 dprintf(("cache_full list done\n")); 1040 1041 /* 1042 * Now whip through the CPUs, snagging the loaded magazines 1043 * and full spares. 1044 * 1045 * In order to prevent inconsistent dumps, rounds and prounds 1046 * are copied aside before dumping begins. 1047 */ 1048 for (cpu = 0; cpu < ncpus; cpu++) { 1049 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1050 short rounds, prounds; 1051 1052 if (KMEM_DUMPCC(ccp)) { 1053 rounds = ccp->cc_dump_rounds; 1054 prounds = ccp->cc_dump_prounds; 1055 } else { 1056 rounds = ccp->cc_rounds; 1057 prounds = ccp->cc_prounds; 1058 } 1059 1060 dprintf(("reading cpu cache %p\n", 1061 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1062 1063 if (rounds > 0 && 1064 (kmp = ccp->cc_loaded) != NULL) { 1065 dprintf(("reading %d loaded rounds\n", rounds)); 1066 READMAG_ROUNDS(rounds); 1067 } 1068 1069 if (prounds > 0 && 1070 (kmp = ccp->cc_ploaded) != NULL) { 1071 dprintf(("reading %d previously loaded rounds\n", 1072 prounds)); 1073 READMAG_ROUNDS(prounds); 1074 } 1075 } 1076 1077 dprintf(("magazine layer: %d buffers\n", magcnt)); 1078 1079 if (!(alloc_flags & UM_GC)) 1080 mdb_free(mp, magbsize); 1081 1082 *maglistp = maglist; 1083 *magcntp = magcnt; 1084 *magmaxp = magmax; 1085 1086 return (WALK_NEXT); 1087 1088 fail: 1089 if (!(alloc_flags & UM_GC)) { 1090 if (mp) 1091 mdb_free(mp, magbsize); 1092 if (maglist) 1093 mdb_free(maglist, magmax * sizeof (void *)); 1094 } 1095 return (WALK_ERR); 1096 } 1097 1098 static int 1099 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1100 { 1101 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1102 } 1103 1104 static int 1105 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1106 { 1107 kmem_bufctl_audit_t b; 1108 1109 /* 1110 * if KMF_AUDIT is not set, we know that we're looking at a 1111 * kmem_bufctl_t. 1112 */ 1113 if (!(cp->cache_flags & KMF_AUDIT) || 1114 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1115 (void) memset(&b, 0, sizeof (b)); 1116 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1117 mdb_warn("unable to read bufctl at %p", buf); 1118 return (WALK_ERR); 1119 } 1120 } 1121 1122 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1123 } 1124 1125 typedef struct kmem_walk { 1126 int kmw_type; 1127 1128 uintptr_t kmw_addr; /* cache address */ 1129 kmem_cache_t *kmw_cp; 1130 size_t kmw_csize; 1131 1132 /* 1133 * magazine layer 1134 */ 1135 void **kmw_maglist; 1136 size_t kmw_max; 1137 size_t kmw_count; 1138 size_t kmw_pos; 1139 1140 /* 1141 * slab layer 1142 */ 1143 char *kmw_valid; /* to keep track of freed buffers */ 1144 char *kmw_ubase; /* buffer for slab data */ 1145 } kmem_walk_t; 1146 1147 static int 1148 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1149 { 1150 kmem_walk_t *kmw; 1151 int ncpus, csize; 1152 kmem_cache_t *cp; 1153 size_t vm_quantum; 1154 1155 size_t magmax, magcnt; 1156 void **maglist = NULL; 1157 uint_t chunksize, slabsize; 1158 int status = WALK_ERR; 1159 uintptr_t addr = wsp->walk_addr; 1160 const char *layered; 1161 1162 type &= ~KM_HASH; 1163 1164 if (addr == NULL) { 1165 mdb_warn("kmem walk doesn't support global walks\n"); 1166 return (WALK_ERR); 1167 } 1168 1169 dprintf(("walking %p\n", addr)); 1170 1171 /* 1172 * First we need to figure out how many CPUs are configured in the 1173 * system to know how much to slurp out. 1174 */ 1175 mdb_readvar(&ncpus, "max_ncpus"); 1176 1177 csize = KMEM_CACHE_SIZE(ncpus); 1178 cp = mdb_alloc(csize, UM_SLEEP); 1179 1180 if (mdb_vread(cp, csize, addr) == -1) { 1181 mdb_warn("couldn't read cache at addr %p", addr); 1182 goto out2; 1183 } 1184 1185 /* 1186 * It's easy for someone to hand us an invalid cache address. 1187 * Unfortunately, it is hard for this walker to survive an 1188 * invalid cache cleanly. So we make sure that: 1189 * 1190 * 1. the vmem arena for the cache is readable, 1191 * 2. the vmem arena's quantum is a power of 2, 1192 * 3. our slabsize is a multiple of the quantum, and 1193 * 4. our chunksize is >0 and less than our slabsize. 1194 */ 1195 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1196 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1197 vm_quantum == 0 || 1198 (vm_quantum & (vm_quantum - 1)) != 0 || 1199 cp->cache_slabsize < vm_quantum || 1200 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1201 cp->cache_chunksize == 0 || 1202 cp->cache_chunksize > cp->cache_slabsize) { 1203 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1204 goto out2; 1205 } 1206 1207 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1208 1209 if (cp->cache_buftotal == 0) { 1210 mdb_free(cp, csize); 1211 return (WALK_DONE); 1212 } 1213 1214 /* 1215 * If they ask for bufctls, but it's a small-slab cache, 1216 * there is nothing to report. 1217 */ 1218 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1219 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1220 cp->cache_flags)); 1221 mdb_free(cp, csize); 1222 return (WALK_DONE); 1223 } 1224 1225 /* 1226 * If they want constructed buffers, but there's no constructor or 1227 * the cache has DEADBEEF checking enabled, there is nothing to report. 1228 */ 1229 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1230 cp->cache_constructor == NULL || 1231 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1232 mdb_free(cp, csize); 1233 return (WALK_DONE); 1234 } 1235 1236 /* 1237 * Read in the contents of the magazine layer 1238 */ 1239 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1240 &magmax, UM_SLEEP) == WALK_ERR) 1241 goto out2; 1242 1243 /* 1244 * We have all of the buffers from the magazines; if we are walking 1245 * allocated buffers, sort them so we can bsearch them later. 1246 */ 1247 if (type & KM_ALLOCATED) 1248 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1249 1250 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1251 1252 kmw->kmw_type = type; 1253 kmw->kmw_addr = addr; 1254 kmw->kmw_cp = cp; 1255 kmw->kmw_csize = csize; 1256 kmw->kmw_maglist = maglist; 1257 kmw->kmw_max = magmax; 1258 kmw->kmw_count = magcnt; 1259 kmw->kmw_pos = 0; 1260 1261 /* 1262 * When walking allocated buffers in a KMF_HASH cache, we walk the 1263 * hash table instead of the slab layer. 1264 */ 1265 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1266 layered = "kmem_hash"; 1267 1268 kmw->kmw_type |= KM_HASH; 1269 } else { 1270 /* 1271 * If we are walking freed buffers, we only need the 1272 * magazine layer plus the partially allocated slabs. 1273 * To walk allocated buffers, we need all of the slabs. 1274 */ 1275 if (type & KM_ALLOCATED) 1276 layered = "kmem_slab"; 1277 else 1278 layered = "kmem_slab_partial"; 1279 1280 /* 1281 * for small-slab caches, we read in the entire slab. For 1282 * freed buffers, we can just walk the freelist. For 1283 * allocated buffers, we use a 'valid' array to track 1284 * the freed buffers. 1285 */ 1286 if (!(cp->cache_flags & KMF_HASH)) { 1287 chunksize = cp->cache_chunksize; 1288 slabsize = cp->cache_slabsize; 1289 1290 kmw->kmw_ubase = mdb_alloc(slabsize + 1291 sizeof (kmem_bufctl_t), UM_SLEEP); 1292 1293 if (type & KM_ALLOCATED) 1294 kmw->kmw_valid = 1295 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1296 } 1297 } 1298 1299 status = WALK_NEXT; 1300 1301 if (mdb_layered_walk(layered, wsp) == -1) { 1302 mdb_warn("unable to start layered '%s' walk", layered); 1303 status = WALK_ERR; 1304 } 1305 1306 out1: 1307 if (status == WALK_ERR) { 1308 if (kmw->kmw_valid) 1309 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1310 1311 if (kmw->kmw_ubase) 1312 mdb_free(kmw->kmw_ubase, slabsize + 1313 sizeof (kmem_bufctl_t)); 1314 1315 if (kmw->kmw_maglist) 1316 mdb_free(kmw->kmw_maglist, 1317 kmw->kmw_max * sizeof (uintptr_t)); 1318 1319 mdb_free(kmw, sizeof (kmem_walk_t)); 1320 wsp->walk_data = NULL; 1321 } 1322 1323 out2: 1324 if (status == WALK_ERR) 1325 mdb_free(cp, csize); 1326 1327 return (status); 1328 } 1329 1330 int 1331 kmem_walk_step(mdb_walk_state_t *wsp) 1332 { 1333 kmem_walk_t *kmw = wsp->walk_data; 1334 int type = kmw->kmw_type; 1335 kmem_cache_t *cp = kmw->kmw_cp; 1336 1337 void **maglist = kmw->kmw_maglist; 1338 int magcnt = kmw->kmw_count; 1339 1340 uintptr_t chunksize, slabsize; 1341 uintptr_t addr; 1342 const kmem_slab_t *sp; 1343 const kmem_bufctl_t *bcp; 1344 kmem_bufctl_t bc; 1345 1346 int chunks; 1347 char *kbase; 1348 void *buf; 1349 int i, ret; 1350 1351 char *valid, *ubase; 1352 1353 /* 1354 * first, handle the 'kmem_hash' layered walk case 1355 */ 1356 if (type & KM_HASH) { 1357 /* 1358 * We have a buffer which has been allocated out of the 1359 * global layer. We need to make sure that it's not 1360 * actually sitting in a magazine before we report it as 1361 * an allocated buffer. 1362 */ 1363 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1364 1365 if (magcnt > 0 && 1366 bsearch(&buf, maglist, magcnt, sizeof (void *), 1367 addrcmp) != NULL) 1368 return (WALK_NEXT); 1369 1370 if (type & KM_BUFCTL) 1371 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1372 1373 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1374 } 1375 1376 ret = WALK_NEXT; 1377 1378 addr = kmw->kmw_addr; 1379 1380 /* 1381 * If we're walking freed buffers, report everything in the 1382 * magazine layer before processing the first slab. 1383 */ 1384 if ((type & KM_FREE) && magcnt != 0) { 1385 kmw->kmw_count = 0; /* only do this once */ 1386 for (i = 0; i < magcnt; i++) { 1387 buf = maglist[i]; 1388 1389 if (type & KM_BUFCTL) { 1390 uintptr_t out; 1391 1392 if (cp->cache_flags & KMF_BUFTAG) { 1393 kmem_buftag_t *btp; 1394 kmem_buftag_t tag; 1395 1396 /* LINTED - alignment */ 1397 btp = KMEM_BUFTAG(cp, buf); 1398 if (mdb_vread(&tag, sizeof (tag), 1399 (uintptr_t)btp) == -1) { 1400 mdb_warn("reading buftag for " 1401 "%p at %p", buf, btp); 1402 continue; 1403 } 1404 out = (uintptr_t)tag.bt_bufctl; 1405 } else { 1406 if (kmem_hash_lookup(cp, addr, buf, 1407 &out) == -1) 1408 continue; 1409 } 1410 ret = bufctl_walk_callback(cp, wsp, out); 1411 } else { 1412 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1413 } 1414 1415 if (ret != WALK_NEXT) 1416 return (ret); 1417 } 1418 } 1419 1420 /* 1421 * If they want constructed buffers, we're finished, since the 1422 * magazine layer holds them all. 1423 */ 1424 if (type & KM_CONSTRUCTED) 1425 return (WALK_DONE); 1426 1427 /* 1428 * Handle the buffers in the current slab 1429 */ 1430 chunksize = cp->cache_chunksize; 1431 slabsize = cp->cache_slabsize; 1432 1433 sp = wsp->walk_layer; 1434 chunks = sp->slab_chunks; 1435 kbase = sp->slab_base; 1436 1437 dprintf(("kbase is %p\n", kbase)); 1438 1439 if (!(cp->cache_flags & KMF_HASH)) { 1440 valid = kmw->kmw_valid; 1441 ubase = kmw->kmw_ubase; 1442 1443 if (mdb_vread(ubase, chunks * chunksize, 1444 (uintptr_t)kbase) == -1) { 1445 mdb_warn("failed to read slab contents at %p", kbase); 1446 return (WALK_ERR); 1447 } 1448 1449 /* 1450 * Set up the valid map as fully allocated -- we'll punch 1451 * out the freelist. 1452 */ 1453 if (type & KM_ALLOCATED) 1454 (void) memset(valid, 1, chunks); 1455 } else { 1456 valid = NULL; 1457 ubase = NULL; 1458 } 1459 1460 /* 1461 * walk the slab's freelist 1462 */ 1463 bcp = sp->slab_head; 1464 1465 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1466 1467 /* 1468 * since we could be in the middle of allocating a buffer, 1469 * our refcnt could be one higher than it aught. So we 1470 * check one further on the freelist than the count allows. 1471 */ 1472 for (i = sp->slab_refcnt; i <= chunks; i++) { 1473 uint_t ndx; 1474 1475 dprintf(("bcp is %p\n", bcp)); 1476 1477 if (bcp == NULL) { 1478 if (i == chunks) 1479 break; 1480 mdb_warn( 1481 "slab %p in cache %p freelist too short by %d\n", 1482 sp, addr, chunks - i); 1483 break; 1484 } 1485 1486 if (cp->cache_flags & KMF_HASH) { 1487 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1488 mdb_warn("failed to read bufctl ptr at %p", 1489 bcp); 1490 break; 1491 } 1492 buf = bc.bc_addr; 1493 } else { 1494 /* 1495 * Otherwise the buffer is (or should be) in the slab 1496 * that we've read in; determine its offset in the 1497 * slab, validate that it's not corrupt, and add to 1498 * our base address to find the umem_bufctl_t. (Note 1499 * that we don't need to add the size of the bufctl 1500 * to our offset calculation because of the slop that's 1501 * allocated for the buffer at ubase.) 1502 */ 1503 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase; 1504 1505 if (offs > chunks * chunksize) { 1506 mdb_warn("found corrupt bufctl ptr %p" 1507 " in slab %p in cache %p\n", bcp, 1508 wsp->walk_addr, addr); 1509 break; 1510 } 1511 1512 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs)); 1513 buf = KMEM_BUF(cp, bcp); 1514 } 1515 1516 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1517 1518 if (ndx > slabsize / cp->cache_bufsize) { 1519 /* 1520 * This is very wrong; we have managed to find 1521 * a buffer in the slab which shouldn't 1522 * actually be here. Emit a warning, and 1523 * try to continue. 1524 */ 1525 mdb_warn("buf %p is out of range for " 1526 "slab %p, cache %p\n", buf, sp, addr); 1527 } else if (type & KM_ALLOCATED) { 1528 /* 1529 * we have found a buffer on the slab's freelist; 1530 * clear its entry 1531 */ 1532 valid[ndx] = 0; 1533 } else { 1534 /* 1535 * Report this freed buffer 1536 */ 1537 if (type & KM_BUFCTL) { 1538 ret = bufctl_walk_callback(cp, wsp, 1539 (uintptr_t)bcp); 1540 } else { 1541 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1542 } 1543 if (ret != WALK_NEXT) 1544 return (ret); 1545 } 1546 1547 bcp = bc.bc_next; 1548 } 1549 1550 if (bcp != NULL) { 1551 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1552 sp, addr, bcp)); 1553 } 1554 1555 /* 1556 * If we are walking freed buffers, the loop above handled reporting 1557 * them. 1558 */ 1559 if (type & KM_FREE) 1560 return (WALK_NEXT); 1561 1562 if (type & KM_BUFCTL) { 1563 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1564 "cache %p\n", addr); 1565 return (WALK_ERR); 1566 } 1567 1568 /* 1569 * Report allocated buffers, skipping buffers in the magazine layer. 1570 * We only get this far for small-slab caches. 1571 */ 1572 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1573 buf = (char *)kbase + i * chunksize; 1574 1575 if (!valid[i]) 1576 continue; /* on slab freelist */ 1577 1578 if (magcnt > 0 && 1579 bsearch(&buf, maglist, magcnt, sizeof (void *), 1580 addrcmp) != NULL) 1581 continue; /* in magazine layer */ 1582 1583 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1584 } 1585 return (ret); 1586 } 1587 1588 void 1589 kmem_walk_fini(mdb_walk_state_t *wsp) 1590 { 1591 kmem_walk_t *kmw = wsp->walk_data; 1592 uintptr_t chunksize; 1593 uintptr_t slabsize; 1594 1595 if (kmw == NULL) 1596 return; 1597 1598 if (kmw->kmw_maglist != NULL) 1599 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1600 1601 chunksize = kmw->kmw_cp->cache_chunksize; 1602 slabsize = kmw->kmw_cp->cache_slabsize; 1603 1604 if (kmw->kmw_valid != NULL) 1605 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1606 if (kmw->kmw_ubase != NULL) 1607 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1608 1609 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1610 mdb_free(kmw, sizeof (kmem_walk_t)); 1611 } 1612 1613 /*ARGSUSED*/ 1614 static int 1615 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1616 { 1617 /* 1618 * Buffers allocated from NOTOUCH caches can also show up as freed 1619 * memory in other caches. This can be a little confusing, so we 1620 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1621 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1622 */ 1623 if (c->cache_cflags & KMC_NOTOUCH) 1624 return (WALK_NEXT); 1625 1626 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1627 wsp->walk_cbdata, addr) == -1) 1628 return (WALK_DONE); 1629 1630 return (WALK_NEXT); 1631 } 1632 1633 #define KMEM_WALK_ALL(name, wsp) { \ 1634 wsp->walk_data = (name); \ 1635 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1636 return (WALK_ERR); \ 1637 return (WALK_DONE); \ 1638 } 1639 1640 int 1641 kmem_walk_init(mdb_walk_state_t *wsp) 1642 { 1643 if (wsp->walk_arg != NULL) 1644 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1645 1646 if (wsp->walk_addr == NULL) 1647 KMEM_WALK_ALL("kmem", wsp); 1648 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1649 } 1650 1651 int 1652 bufctl_walk_init(mdb_walk_state_t *wsp) 1653 { 1654 if (wsp->walk_addr == NULL) 1655 KMEM_WALK_ALL("bufctl", wsp); 1656 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1657 } 1658 1659 int 1660 freemem_walk_init(mdb_walk_state_t *wsp) 1661 { 1662 if (wsp->walk_addr == NULL) 1663 KMEM_WALK_ALL("freemem", wsp); 1664 return (kmem_walk_init_common(wsp, KM_FREE)); 1665 } 1666 1667 int 1668 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1669 { 1670 if (wsp->walk_addr == NULL) 1671 KMEM_WALK_ALL("freemem_constructed", wsp); 1672 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1673 } 1674 1675 int 1676 freectl_walk_init(mdb_walk_state_t *wsp) 1677 { 1678 if (wsp->walk_addr == NULL) 1679 KMEM_WALK_ALL("freectl", wsp); 1680 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1681 } 1682 1683 int 1684 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1685 { 1686 if (wsp->walk_addr == NULL) 1687 KMEM_WALK_ALL("freectl_constructed", wsp); 1688 return (kmem_walk_init_common(wsp, 1689 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1690 } 1691 1692 typedef struct bufctl_history_walk { 1693 void *bhw_next; 1694 kmem_cache_t *bhw_cache; 1695 kmem_slab_t *bhw_slab; 1696 hrtime_t bhw_timestamp; 1697 } bufctl_history_walk_t; 1698 1699 int 1700 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1701 { 1702 bufctl_history_walk_t *bhw; 1703 kmem_bufctl_audit_t bc; 1704 kmem_bufctl_audit_t bcn; 1705 1706 if (wsp->walk_addr == NULL) { 1707 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1708 return (WALK_ERR); 1709 } 1710 1711 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1712 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1713 return (WALK_ERR); 1714 } 1715 1716 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1717 bhw->bhw_timestamp = 0; 1718 bhw->bhw_cache = bc.bc_cache; 1719 bhw->bhw_slab = bc.bc_slab; 1720 1721 /* 1722 * sometimes the first log entry matches the base bufctl; in that 1723 * case, skip the base bufctl. 1724 */ 1725 if (bc.bc_lastlog != NULL && 1726 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1727 bc.bc_addr == bcn.bc_addr && 1728 bc.bc_cache == bcn.bc_cache && 1729 bc.bc_slab == bcn.bc_slab && 1730 bc.bc_timestamp == bcn.bc_timestamp && 1731 bc.bc_thread == bcn.bc_thread) 1732 bhw->bhw_next = bc.bc_lastlog; 1733 else 1734 bhw->bhw_next = (void *)wsp->walk_addr; 1735 1736 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1737 wsp->walk_data = bhw; 1738 1739 return (WALK_NEXT); 1740 } 1741 1742 int 1743 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1744 { 1745 bufctl_history_walk_t *bhw = wsp->walk_data; 1746 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1747 uintptr_t baseaddr = wsp->walk_addr; 1748 kmem_bufctl_audit_t bc; 1749 1750 if (addr == NULL) 1751 return (WALK_DONE); 1752 1753 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1754 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1755 return (WALK_ERR); 1756 } 1757 1758 /* 1759 * The bufctl is only valid if the address, cache, and slab are 1760 * correct. We also check that the timestamp is decreasing, to 1761 * prevent infinite loops. 1762 */ 1763 if ((uintptr_t)bc.bc_addr != baseaddr || 1764 bc.bc_cache != bhw->bhw_cache || 1765 bc.bc_slab != bhw->bhw_slab || 1766 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1767 return (WALK_DONE); 1768 1769 bhw->bhw_next = bc.bc_lastlog; 1770 bhw->bhw_timestamp = bc.bc_timestamp; 1771 1772 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1773 } 1774 1775 void 1776 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1777 { 1778 bufctl_history_walk_t *bhw = wsp->walk_data; 1779 1780 mdb_free(bhw, sizeof (*bhw)); 1781 } 1782 1783 typedef struct kmem_log_walk { 1784 kmem_bufctl_audit_t *klw_base; 1785 kmem_bufctl_audit_t **klw_sorted; 1786 kmem_log_header_t klw_lh; 1787 size_t klw_size; 1788 size_t klw_maxndx; 1789 size_t klw_ndx; 1790 } kmem_log_walk_t; 1791 1792 int 1793 kmem_log_walk_init(mdb_walk_state_t *wsp) 1794 { 1795 uintptr_t lp = wsp->walk_addr; 1796 kmem_log_walk_t *klw; 1797 kmem_log_header_t *lhp; 1798 int maxndx, i, j, k; 1799 1800 /* 1801 * By default (global walk), walk the kmem_transaction_log. Otherwise 1802 * read the log whose kmem_log_header_t is stored at walk_addr. 1803 */ 1804 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1805 mdb_warn("failed to read 'kmem_transaction_log'"); 1806 return (WALK_ERR); 1807 } 1808 1809 if (lp == NULL) { 1810 mdb_warn("log is disabled\n"); 1811 return (WALK_ERR); 1812 } 1813 1814 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1815 lhp = &klw->klw_lh; 1816 1817 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1818 mdb_warn("failed to read log header at %p", lp); 1819 mdb_free(klw, sizeof (kmem_log_walk_t)); 1820 return (WALK_ERR); 1821 } 1822 1823 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1824 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1825 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1826 1827 if (mdb_vread(klw->klw_base, klw->klw_size, 1828 (uintptr_t)lhp->lh_base) == -1) { 1829 mdb_warn("failed to read log at base %p", lhp->lh_base); 1830 mdb_free(klw->klw_base, klw->klw_size); 1831 mdb_free(klw, sizeof (kmem_log_walk_t)); 1832 return (WALK_ERR); 1833 } 1834 1835 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1836 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1837 1838 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1839 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1840 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1841 1842 for (j = 0; j < maxndx; j++) 1843 klw->klw_sorted[k++] = &chunk[j]; 1844 } 1845 1846 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1847 (int(*)(const void *, const void *))bufctlcmp); 1848 1849 klw->klw_maxndx = k; 1850 wsp->walk_data = klw; 1851 1852 return (WALK_NEXT); 1853 } 1854 1855 int 1856 kmem_log_walk_step(mdb_walk_state_t *wsp) 1857 { 1858 kmem_log_walk_t *klw = wsp->walk_data; 1859 kmem_bufctl_audit_t *bcp; 1860 1861 if (klw->klw_ndx == klw->klw_maxndx) 1862 return (WALK_DONE); 1863 1864 bcp = klw->klw_sorted[klw->klw_ndx++]; 1865 1866 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1867 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1868 } 1869 1870 void 1871 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1872 { 1873 kmem_log_walk_t *klw = wsp->walk_data; 1874 1875 mdb_free(klw->klw_base, klw->klw_size); 1876 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1877 sizeof (kmem_bufctl_audit_t *)); 1878 mdb_free(klw, sizeof (kmem_log_walk_t)); 1879 } 1880 1881 typedef struct allocdby_bufctl { 1882 uintptr_t abb_addr; 1883 hrtime_t abb_ts; 1884 } allocdby_bufctl_t; 1885 1886 typedef struct allocdby_walk { 1887 const char *abw_walk; 1888 uintptr_t abw_thread; 1889 size_t abw_nbufs; 1890 size_t abw_size; 1891 allocdby_bufctl_t *abw_buf; 1892 size_t abw_ndx; 1893 } allocdby_walk_t; 1894 1895 int 1896 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1897 allocdby_walk_t *abw) 1898 { 1899 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1900 return (WALK_NEXT); 1901 1902 if (abw->abw_nbufs == abw->abw_size) { 1903 allocdby_bufctl_t *buf; 1904 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1905 1906 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1907 1908 bcopy(abw->abw_buf, buf, oldsize); 1909 mdb_free(abw->abw_buf, oldsize); 1910 1911 abw->abw_size <<= 1; 1912 abw->abw_buf = buf; 1913 } 1914 1915 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1916 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1917 abw->abw_nbufs++; 1918 1919 return (WALK_NEXT); 1920 } 1921 1922 /*ARGSUSED*/ 1923 int 1924 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1925 { 1926 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1927 abw, addr) == -1) { 1928 mdb_warn("couldn't walk bufctl for cache %p", addr); 1929 return (WALK_DONE); 1930 } 1931 1932 return (WALK_NEXT); 1933 } 1934 1935 static int 1936 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1937 { 1938 if (lhs->abb_ts < rhs->abb_ts) 1939 return (1); 1940 if (lhs->abb_ts > rhs->abb_ts) 1941 return (-1); 1942 return (0); 1943 } 1944 1945 static int 1946 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1947 { 1948 allocdby_walk_t *abw; 1949 1950 if (wsp->walk_addr == NULL) { 1951 mdb_warn("allocdby walk doesn't support global walks\n"); 1952 return (WALK_ERR); 1953 } 1954 1955 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1956 1957 abw->abw_thread = wsp->walk_addr; 1958 abw->abw_walk = walk; 1959 abw->abw_size = 128; /* something reasonable */ 1960 abw->abw_buf = 1961 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1962 1963 wsp->walk_data = abw; 1964 1965 if (mdb_walk("kmem_cache", 1966 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1967 mdb_warn("couldn't walk kmem_cache"); 1968 allocdby_walk_fini(wsp); 1969 return (WALK_ERR); 1970 } 1971 1972 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1973 (int(*)(const void *, const void *))allocdby_cmp); 1974 1975 return (WALK_NEXT); 1976 } 1977 1978 int 1979 allocdby_walk_init(mdb_walk_state_t *wsp) 1980 { 1981 return (allocdby_walk_init_common(wsp, "bufctl")); 1982 } 1983 1984 int 1985 freedby_walk_init(mdb_walk_state_t *wsp) 1986 { 1987 return (allocdby_walk_init_common(wsp, "freectl")); 1988 } 1989 1990 int 1991 allocdby_walk_step(mdb_walk_state_t *wsp) 1992 { 1993 allocdby_walk_t *abw = wsp->walk_data; 1994 kmem_bufctl_audit_t bc; 1995 uintptr_t addr; 1996 1997 if (abw->abw_ndx == abw->abw_nbufs) 1998 return (WALK_DONE); 1999 2000 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 2001 2002 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2003 mdb_warn("couldn't read bufctl at %p", addr); 2004 return (WALK_DONE); 2005 } 2006 2007 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 2008 } 2009 2010 void 2011 allocdby_walk_fini(mdb_walk_state_t *wsp) 2012 { 2013 allocdby_walk_t *abw = wsp->walk_data; 2014 2015 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 2016 mdb_free(abw, sizeof (allocdby_walk_t)); 2017 } 2018 2019 /*ARGSUSED*/ 2020 int 2021 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 2022 { 2023 char c[MDB_SYM_NAMLEN]; 2024 GElf_Sym sym; 2025 int i; 2026 2027 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2028 for (i = 0; i < bcp->bc_depth; i++) { 2029 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2030 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2031 continue; 2032 if (strncmp(c, "kmem_", 5) == 0) 2033 continue; 2034 mdb_printf("%s+0x%lx", 2035 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2036 break; 2037 } 2038 mdb_printf("\n"); 2039 2040 return (WALK_NEXT); 2041 } 2042 2043 static int 2044 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2045 { 2046 if (!(flags & DCMD_ADDRSPEC)) 2047 return (DCMD_USAGE); 2048 2049 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2050 2051 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2052 mdb_warn("can't walk '%s' for %p", w, addr); 2053 return (DCMD_ERR); 2054 } 2055 2056 return (DCMD_OK); 2057 } 2058 2059 /*ARGSUSED*/ 2060 int 2061 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2062 { 2063 return (allocdby_common(addr, flags, "allocdby")); 2064 } 2065 2066 /*ARGSUSED*/ 2067 int 2068 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2069 { 2070 return (allocdby_common(addr, flags, "freedby")); 2071 } 2072 2073 /* 2074 * Return a string describing the address in relation to the given thread's 2075 * stack. 2076 * 2077 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2078 * 2079 * - If the address is above the stack pointer, return an empty string 2080 * signifying that the address is active. 2081 * 2082 * - If the address is below the stack pointer, and the thread is not on proc, 2083 * return " (below sp)". 2084 * 2085 * - If the address is below the stack pointer, and the thread is on proc, 2086 * return " (possibly below sp)". Depending on context, we may or may not 2087 * have an accurate t_sp. 2088 */ 2089 static const char * 2090 stack_active(const kthread_t *t, uintptr_t addr) 2091 { 2092 uintptr_t panicstk; 2093 GElf_Sym sym; 2094 2095 if (t->t_state == TS_FREE) 2096 return (" (inactive interrupt thread)"); 2097 2098 /* 2099 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2100 * no longer relates to the thread's real stack. 2101 */ 2102 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2103 panicstk = (uintptr_t)sym.st_value; 2104 2105 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2106 return (""); 2107 } 2108 2109 if (addr >= t->t_sp + STACK_BIAS) 2110 return (""); 2111 2112 if (t->t_state == TS_ONPROC) 2113 return (" (possibly below sp)"); 2114 2115 return (" (below sp)"); 2116 } 2117 2118 /* 2119 * Additional state for the kmem and vmem ::whatis handlers 2120 */ 2121 typedef struct whatis_info { 2122 mdb_whatis_t *wi_w; 2123 const kmem_cache_t *wi_cache; 2124 const vmem_t *wi_vmem; 2125 vmem_t *wi_msb_arena; 2126 size_t wi_slab_size; 2127 uint_t wi_slab_found; 2128 uint_t wi_kmem_lite_count; 2129 uint_t wi_freemem; 2130 } whatis_info_t; 2131 2132 /* call one of our dcmd functions with "-v" and the provided address */ 2133 static void 2134 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2135 { 2136 mdb_arg_t a; 2137 a.a_type = MDB_TYPE_STRING; 2138 a.a_un.a_str = "-v"; 2139 2140 mdb_printf(":\n"); 2141 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2142 } 2143 2144 static void 2145 whatis_print_kmf_lite(uintptr_t btaddr, size_t count) 2146 { 2147 #define KMEM_LITE_MAX 16 2148 pc_t callers[KMEM_LITE_MAX]; 2149 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN; 2150 2151 kmem_buftag_t bt; 2152 intptr_t stat; 2153 const char *plural = ""; 2154 int i; 2155 2156 /* validate our arguments and read in the buftag */ 2157 if (count == 0 || count > KMEM_LITE_MAX || 2158 mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2159 return; 2160 2161 /* validate the buffer state and read in the callers */ 2162 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2163 2164 if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE || 2165 mdb_vread(callers, count * sizeof (pc_t), 2166 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2167 return; 2168 2169 /* If there aren't any filled in callers, bail */ 2170 if (callers[0] == uninit) 2171 return; 2172 2173 plural = (callers[1] == uninit) ? "" : "s"; 2174 2175 /* Everything's done and checked; print them out */ 2176 mdb_printf(":\n"); 2177 2178 mdb_inc_indent(8); 2179 mdb_printf("recent caller%s: %a", plural, callers[0]); 2180 for (i = 1; i < count; i++) { 2181 if (callers[i] == uninit) 2182 break; 2183 mdb_printf(", %a", callers[i]); 2184 } 2185 mdb_dec_indent(8); 2186 } 2187 2188 static void 2189 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 2190 uintptr_t baddr) 2191 { 2192 mdb_whatis_t *w = wi->wi_w; 2193 2194 const kmem_cache_t *cp = wi->wi_cache; 2195 /* LINTED pointer cast may result in improper alignment */ 2196 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr); 2197 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 2198 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT)); 2199 2200 mdb_whatis_report_object(w, maddr, addr, ""); 2201 2202 if (baddr != 0 && !call_printer) 2203 mdb_printf("bufctl %p ", baddr); 2204 2205 mdb_printf("%s from %s", 2206 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 2207 2208 if (baddr != 0 && call_printer) { 2209 whatis_call_printer(bufctl, baddr); 2210 return; 2211 } 2212 2213 /* for KMF_LITE caches, try to print out the previous callers */ 2214 if (!quiet && (cp->cache_flags & KMF_LITE)) 2215 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count); 2216 2217 mdb_printf("\n"); 2218 } 2219 2220 /*ARGSUSED*/ 2221 static int 2222 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi) 2223 { 2224 mdb_whatis_t *w = wi->wi_w; 2225 2226 uintptr_t cur; 2227 size_t size = wi->wi_cache->cache_bufsize; 2228 2229 while (mdb_whatis_match(w, addr, size, &cur)) 2230 whatis_print_kmem(wi, cur, addr, NULL); 2231 2232 return (WHATIS_WALKRET(w)); 2233 } 2234 2235 /*ARGSUSED*/ 2236 static int 2237 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi) 2238 { 2239 mdb_whatis_t *w = wi->wi_w; 2240 2241 uintptr_t cur; 2242 uintptr_t addr = (uintptr_t)bcp->bc_addr; 2243 size_t size = wi->wi_cache->cache_bufsize; 2244 2245 while (mdb_whatis_match(w, addr, size, &cur)) 2246 whatis_print_kmem(wi, cur, addr, baddr); 2247 2248 return (WHATIS_WALKRET(w)); 2249 } 2250 2251 static int 2252 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 2253 { 2254 mdb_whatis_t *w = wi->wi_w; 2255 2256 size_t size = vs->vs_end - vs->vs_start; 2257 uintptr_t cur; 2258 2259 /* We're not interested in anything but alloc and free segments */ 2260 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 2261 return (WALK_NEXT); 2262 2263 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 2264 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 2265 2266 /* 2267 * If we're not printing it seperately, provide the vmem_seg 2268 * pointer if it has a stack trace. 2269 */ 2270 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 2271 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) || 2272 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2273 mdb_printf("vmem_seg %p ", addr); 2274 } 2275 2276 mdb_printf("%s from the %s vmem arena", 2277 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 2278 wi->wi_vmem->vm_name); 2279 2280 if (!(mdb_whatis_flags(w) & WHATIS_QUIET)) 2281 whatis_call_printer(vmem_seg, addr); 2282 else 2283 mdb_printf("\n"); 2284 } 2285 2286 return (WHATIS_WALKRET(w)); 2287 } 2288 2289 static int 2290 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 2291 { 2292 mdb_whatis_t *w = wi->wi_w; 2293 const char *nm = vmem->vm_name; 2294 2295 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0); 2296 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2297 2298 if (identifier != idspace) 2299 return (WALK_NEXT); 2300 2301 wi->wi_vmem = vmem; 2302 2303 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2304 mdb_printf("Searching vmem arena %s...\n", nm); 2305 2306 if (mdb_pwalk("vmem_seg", 2307 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 2308 mdb_warn("can't walk vmem_seg for %p", addr); 2309 return (WALK_NEXT); 2310 } 2311 2312 return (WHATIS_WALKRET(w)); 2313 } 2314 2315 /*ARGSUSED*/ 2316 static int 2317 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi) 2318 { 2319 mdb_whatis_t *w = wi->wi_w; 2320 2321 /* It must overlap with the slab data, or it's not interesting */ 2322 if (mdb_whatis_overlaps(w, 2323 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 2324 wi->wi_slab_found++; 2325 return (WALK_DONE); 2326 } 2327 return (WALK_NEXT); 2328 } 2329 2330 static int 2331 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2332 { 2333 mdb_whatis_t *w = wi->wi_w; 2334 2335 char *walk, *freewalk; 2336 mdb_walk_cb_t func; 2337 int do_bufctl; 2338 2339 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0); 2340 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2341 2342 if (identifier != idspace) 2343 return (WALK_NEXT); 2344 2345 /* Override the '-b' flag as necessary */ 2346 if (!(c->cache_flags & KMF_HASH)) 2347 do_bufctl = FALSE; /* no bufctls to walk */ 2348 else if (c->cache_flags & KMF_AUDIT) 2349 do_bufctl = TRUE; /* we always want debugging info */ 2350 else 2351 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2352 2353 if (do_bufctl) { 2354 walk = "bufctl"; 2355 freewalk = "freectl"; 2356 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2357 } else { 2358 walk = "kmem"; 2359 freewalk = "freemem"; 2360 func = (mdb_walk_cb_t)whatis_walk_kmem; 2361 } 2362 2363 wi->wi_cache = c; 2364 2365 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2366 mdb_printf("Searching %s...\n", c->cache_name); 2367 2368 /* 2369 * If more then two buffers live on each slab, figure out if we're 2370 * interested in anything in any slab before doing the more expensive 2371 * kmem/freemem (bufctl/freectl) walkers. 2372 */ 2373 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2374 if (!(c->cache_flags & KMF_HASH)) 2375 wi->wi_slab_size -= sizeof (kmem_slab_t); 2376 2377 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2378 wi->wi_slab_found = 0; 2379 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2380 addr) == -1) { 2381 mdb_warn("can't find kmem_slab walker"); 2382 return (WALK_DONE); 2383 } 2384 if (wi->wi_slab_found == 0) 2385 return (WALK_NEXT); 2386 } 2387 2388 wi->wi_freemem = FALSE; 2389 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2390 mdb_warn("can't find %s walker", walk); 2391 return (WALK_DONE); 2392 } 2393 2394 if (mdb_whatis_done(w)) 2395 return (WALK_DONE); 2396 2397 /* 2398 * We have searched for allocated memory; now search for freed memory. 2399 */ 2400 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2401 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2402 2403 wi->wi_freemem = TRUE; 2404 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2405 mdb_warn("can't find %s walker", freewalk); 2406 return (WALK_DONE); 2407 } 2408 2409 return (WHATIS_WALKRET(w)); 2410 } 2411 2412 static int 2413 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2414 { 2415 if (c->cache_arena == wi->wi_msb_arena || 2416 (c->cache_cflags & KMC_NOTOUCH)) 2417 return (WALK_NEXT); 2418 2419 return (whatis_walk_cache(addr, c, wi)); 2420 } 2421 2422 static int 2423 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2424 { 2425 if (c->cache_arena != wi->wi_msb_arena) 2426 return (WALK_NEXT); 2427 2428 return (whatis_walk_cache(addr, c, wi)); 2429 } 2430 2431 static int 2432 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2433 { 2434 if (c->cache_arena == wi->wi_msb_arena || 2435 !(c->cache_cflags & KMC_NOTOUCH)) 2436 return (WALK_NEXT); 2437 2438 return (whatis_walk_cache(addr, c, wi)); 2439 } 2440 2441 static int 2442 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w) 2443 { 2444 uintptr_t cur; 2445 uintptr_t saddr; 2446 size_t size; 2447 2448 /* 2449 * Often, one calls ::whatis on an address from a thread structure. 2450 * We use this opportunity to short circuit this case... 2451 */ 2452 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur)) 2453 mdb_whatis_report_object(w, cur, addr, 2454 "allocated as a thread structure\n"); 2455 2456 /* 2457 * Now check the stack 2458 */ 2459 if (t->t_stkbase == NULL) 2460 return (WALK_NEXT); 2461 2462 /* 2463 * This assumes that t_stk is the end of the stack, but it's really 2464 * only the initial stack pointer for the thread. Arguments to the 2465 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So 2466 * that 't->t_stk::whatis' reports "part of t's stack", we include 2467 * t_stk in the range (the "+ 1", below), but the kernel should 2468 * really include the full stack bounds where we can find it. 2469 */ 2470 saddr = (uintptr_t)t->t_stkbase; 2471 size = (uintptr_t)t->t_stk - saddr + 1; 2472 while (mdb_whatis_match(w, saddr, size, &cur)) 2473 mdb_whatis_report_object(w, cur, cur, 2474 "in thread %p's stack%s\n", addr, stack_active(t, cur)); 2475 2476 return (WHATIS_WALKRET(w)); 2477 } 2478 2479 static void 2480 whatis_modctl_match(mdb_whatis_t *w, const char *name, 2481 uintptr_t base, size_t size, const char *where) 2482 { 2483 uintptr_t cur; 2484 2485 /* 2486 * Since we're searching for addresses inside a module, we report 2487 * them as symbols. 2488 */ 2489 while (mdb_whatis_match(w, base, size, &cur)) 2490 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where); 2491 } 2492 2493 static int 2494 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w) 2495 { 2496 char name[MODMAXNAMELEN]; 2497 struct module mod; 2498 Shdr shdr; 2499 2500 if (m->mod_mp == NULL) 2501 return (WALK_NEXT); 2502 2503 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2504 mdb_warn("couldn't read modctl %p's module", addr); 2505 return (WALK_NEXT); 2506 } 2507 2508 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2509 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2510 2511 whatis_modctl_match(w, name, 2512 (uintptr_t)mod.text, mod.text_size, "text segment"); 2513 whatis_modctl_match(w, name, 2514 (uintptr_t)mod.data, mod.data_size, "data segment"); 2515 whatis_modctl_match(w, name, 2516 (uintptr_t)mod.bss, mod.bss_size, "bss segment"); 2517 2518 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2519 mdb_warn("couldn't read symbol header for %p's module", addr); 2520 return (WALK_NEXT); 2521 } 2522 2523 whatis_modctl_match(w, name, 2524 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab"); 2525 whatis_modctl_match(w, name, 2526 (uintptr_t)mod.symspace, mod.symsize, "symtab"); 2527 2528 return (WHATIS_WALKRET(w)); 2529 } 2530 2531 /*ARGSUSED*/ 2532 static int 2533 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w) 2534 { 2535 uintptr_t cur; 2536 2537 uintptr_t base = (uintptr_t)seg->pages; 2538 size_t size = (uintptr_t)seg->epages - base; 2539 2540 while (mdb_whatis_match(w, base, size, &cur)) { 2541 /* round our found pointer down to the page_t base. */ 2542 size_t offset = (cur - base) % sizeof (page_t); 2543 2544 mdb_whatis_report_object(w, cur, cur - offset, 2545 "allocated as a page structure\n"); 2546 } 2547 2548 return (WHATIS_WALKRET(w)); 2549 } 2550 2551 /*ARGSUSED*/ 2552 static int 2553 whatis_run_modules(mdb_whatis_t *w, void *arg) 2554 { 2555 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) { 2556 mdb_warn("couldn't find modctl walker"); 2557 return (1); 2558 } 2559 return (0); 2560 } 2561 2562 /*ARGSUSED*/ 2563 static int 2564 whatis_run_threads(mdb_whatis_t *w, void *ignored) 2565 { 2566 /* 2567 * Now search all thread stacks. Yes, this is a little weak; we 2568 * can save a lot of work by first checking to see if the 2569 * address is in segkp vs. segkmem. But hey, computers are 2570 * fast. 2571 */ 2572 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) { 2573 mdb_warn("couldn't find thread walker"); 2574 return (1); 2575 } 2576 return (0); 2577 } 2578 2579 /*ARGSUSED*/ 2580 static int 2581 whatis_run_pages(mdb_whatis_t *w, void *ignored) 2582 { 2583 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) { 2584 mdb_warn("couldn't find memseg walker"); 2585 return (1); 2586 } 2587 return (0); 2588 } 2589 2590 /*ARGSUSED*/ 2591 static int 2592 whatis_run_kmem(mdb_whatis_t *w, void *ignored) 2593 { 2594 whatis_info_t wi; 2595 2596 bzero(&wi, sizeof (wi)); 2597 wi.wi_w = w; 2598 2599 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1) 2600 mdb_warn("unable to readvar \"kmem_msb_arena\""); 2601 2602 if (mdb_readvar(&wi.wi_kmem_lite_count, 2603 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16) 2604 wi.wi_kmem_lite_count = 0; 2605 2606 /* 2607 * We process kmem caches in the following order: 2608 * 2609 * non-KMC_NOTOUCH, non-metadata (typically the most interesting) 2610 * metadata (can be huge with KMF_AUDIT) 2611 * KMC_NOTOUCH, non-metadata (see kmem_walk_all()) 2612 */ 2613 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2614 &wi) == -1 || 2615 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2616 &wi) == -1 || 2617 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2618 &wi) == -1) { 2619 mdb_warn("couldn't find kmem_cache walker"); 2620 return (1); 2621 } 2622 return (0); 2623 } 2624 2625 /*ARGSUSED*/ 2626 static int 2627 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2628 { 2629 whatis_info_t wi; 2630 2631 bzero(&wi, sizeof (wi)); 2632 wi.wi_w = w; 2633 2634 if (mdb_walk("vmem_postfix", 2635 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2636 mdb_warn("couldn't find vmem_postfix walker"); 2637 return (1); 2638 } 2639 return (0); 2640 } 2641 2642 typedef struct kmem_log_cpu { 2643 uintptr_t kmc_low; 2644 uintptr_t kmc_high; 2645 } kmem_log_cpu_t; 2646 2647 typedef struct kmem_log_data { 2648 uintptr_t kmd_addr; 2649 kmem_log_cpu_t *kmd_cpu; 2650 } kmem_log_data_t; 2651 2652 int 2653 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2654 kmem_log_data_t *kmd) 2655 { 2656 int i; 2657 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2658 size_t bufsize; 2659 2660 for (i = 0; i < NCPU; i++) { 2661 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2662 break; 2663 } 2664 2665 if (kmd->kmd_addr) { 2666 if (b->bc_cache == NULL) 2667 return (WALK_NEXT); 2668 2669 if (mdb_vread(&bufsize, sizeof (bufsize), 2670 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2671 mdb_warn( 2672 "failed to read cache_bufsize for cache at %p", 2673 b->bc_cache); 2674 return (WALK_ERR); 2675 } 2676 2677 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2678 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2679 return (WALK_NEXT); 2680 } 2681 2682 if (i == NCPU) 2683 mdb_printf(" "); 2684 else 2685 mdb_printf("%3d", i); 2686 2687 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2688 b->bc_timestamp, b->bc_thread); 2689 2690 return (WALK_NEXT); 2691 } 2692 2693 /*ARGSUSED*/ 2694 int 2695 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2696 { 2697 kmem_log_header_t lh; 2698 kmem_cpu_log_header_t clh; 2699 uintptr_t lhp, clhp; 2700 int ncpus; 2701 uintptr_t *cpu; 2702 GElf_Sym sym; 2703 kmem_log_cpu_t *kmc; 2704 int i; 2705 kmem_log_data_t kmd; 2706 uint_t opt_b = FALSE; 2707 2708 if (mdb_getopts(argc, argv, 2709 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2710 return (DCMD_USAGE); 2711 2712 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2713 mdb_warn("failed to read 'kmem_transaction_log'"); 2714 return (DCMD_ERR); 2715 } 2716 2717 if (lhp == NULL) { 2718 mdb_warn("no kmem transaction log\n"); 2719 return (DCMD_ERR); 2720 } 2721 2722 mdb_readvar(&ncpus, "ncpus"); 2723 2724 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2725 mdb_warn("failed to read log header at %p", lhp); 2726 return (DCMD_ERR); 2727 } 2728 2729 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2730 2731 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2732 2733 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2734 mdb_warn("couldn't find 'cpu' array"); 2735 return (DCMD_ERR); 2736 } 2737 2738 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2739 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2740 NCPU * sizeof (uintptr_t), sym.st_size); 2741 return (DCMD_ERR); 2742 } 2743 2744 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2745 mdb_warn("failed to read cpu array at %p", sym.st_value); 2746 return (DCMD_ERR); 2747 } 2748 2749 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2750 kmd.kmd_addr = NULL; 2751 kmd.kmd_cpu = kmc; 2752 2753 for (i = 0; i < NCPU; i++) { 2754 2755 if (cpu[i] == NULL) 2756 continue; 2757 2758 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2759 mdb_warn("cannot read cpu %d's log header at %p", 2760 i, clhp); 2761 return (DCMD_ERR); 2762 } 2763 2764 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2765 (uintptr_t)lh.lh_base; 2766 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2767 2768 clhp += sizeof (kmem_cpu_log_header_t); 2769 } 2770 2771 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2772 "TIMESTAMP", "THREAD"); 2773 2774 /* 2775 * If we have been passed an address, print out only log entries 2776 * corresponding to that address. If opt_b is specified, then interpret 2777 * the address as a bufctl. 2778 */ 2779 if (flags & DCMD_ADDRSPEC) { 2780 kmem_bufctl_audit_t b; 2781 2782 if (opt_b) { 2783 kmd.kmd_addr = addr; 2784 } else { 2785 if (mdb_vread(&b, 2786 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2787 mdb_warn("failed to read bufctl at %p", addr); 2788 return (DCMD_ERR); 2789 } 2790 2791 (void) kmem_log_walk(addr, &b, &kmd); 2792 2793 return (DCMD_OK); 2794 } 2795 } 2796 2797 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2798 mdb_warn("can't find kmem log walker"); 2799 return (DCMD_ERR); 2800 } 2801 2802 return (DCMD_OK); 2803 } 2804 2805 typedef struct bufctl_history_cb { 2806 int bhc_flags; 2807 int bhc_argc; 2808 const mdb_arg_t *bhc_argv; 2809 int bhc_ret; 2810 } bufctl_history_cb_t; 2811 2812 /*ARGSUSED*/ 2813 static int 2814 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2815 { 2816 bufctl_history_cb_t *bhc = arg; 2817 2818 bhc->bhc_ret = 2819 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2820 2821 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2822 2823 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2824 } 2825 2826 void 2827 bufctl_help(void) 2828 { 2829 mdb_printf("%s", 2830 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2831 mdb_dec_indent(2); 2832 mdb_printf("%<b>OPTIONS%</b>\n"); 2833 mdb_inc_indent(2); 2834 mdb_printf("%s", 2835 " -v Display the full content of the bufctl, including its stack trace\n" 2836 " -h retrieve the bufctl's transaction history, if available\n" 2837 " -a addr\n" 2838 " filter out bufctls not involving the buffer at addr\n" 2839 " -c caller\n" 2840 " filter out bufctls without the function/PC in their stack trace\n" 2841 " -e earliest\n" 2842 " filter out bufctls timestamped before earliest\n" 2843 " -l latest\n" 2844 " filter out bufctls timestamped after latest\n" 2845 " -t thread\n" 2846 " filter out bufctls not involving thread\n"); 2847 } 2848 2849 int 2850 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2851 { 2852 kmem_bufctl_audit_t bc; 2853 uint_t verbose = FALSE; 2854 uint_t history = FALSE; 2855 uint_t in_history = FALSE; 2856 uintptr_t caller = NULL, thread = NULL; 2857 uintptr_t laddr, haddr, baddr = NULL; 2858 hrtime_t earliest = 0, latest = 0; 2859 int i, depth; 2860 char c[MDB_SYM_NAMLEN]; 2861 GElf_Sym sym; 2862 2863 if (mdb_getopts(argc, argv, 2864 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2865 'h', MDB_OPT_SETBITS, TRUE, &history, 2866 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2867 'c', MDB_OPT_UINTPTR, &caller, 2868 't', MDB_OPT_UINTPTR, &thread, 2869 'e', MDB_OPT_UINT64, &earliest, 2870 'l', MDB_OPT_UINT64, &latest, 2871 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2872 return (DCMD_USAGE); 2873 2874 if (!(flags & DCMD_ADDRSPEC)) 2875 return (DCMD_USAGE); 2876 2877 if (in_history && !history) 2878 return (DCMD_USAGE); 2879 2880 if (history && !in_history) { 2881 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2882 UM_SLEEP | UM_GC); 2883 bufctl_history_cb_t bhc; 2884 2885 nargv[0].a_type = MDB_TYPE_STRING; 2886 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2887 2888 for (i = 0; i < argc; i++) 2889 nargv[i + 1] = argv[i]; 2890 2891 /* 2892 * When in history mode, we treat each element as if it 2893 * were in a seperate loop, so that the headers group 2894 * bufctls with similar histories. 2895 */ 2896 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2897 bhc.bhc_argc = argc + 1; 2898 bhc.bhc_argv = nargv; 2899 bhc.bhc_ret = DCMD_OK; 2900 2901 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2902 addr) == -1) { 2903 mdb_warn("unable to walk bufctl_history"); 2904 return (DCMD_ERR); 2905 } 2906 2907 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2908 mdb_printf("\n"); 2909 2910 return (bhc.bhc_ret); 2911 } 2912 2913 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2914 if (verbose) { 2915 mdb_printf("%16s %16s %16s %16s\n" 2916 "%<u>%16s %16s %16s %16s%</u>\n", 2917 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2918 "", "CACHE", "LASTLOG", "CONTENTS"); 2919 } else { 2920 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2921 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2922 } 2923 } 2924 2925 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2926 mdb_warn("couldn't read bufctl at %p", addr); 2927 return (DCMD_ERR); 2928 } 2929 2930 /* 2931 * Guard against bogus bc_depth in case the bufctl is corrupt or 2932 * the address does not really refer to a bufctl. 2933 */ 2934 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2935 2936 if (caller != NULL) { 2937 laddr = caller; 2938 haddr = caller + sizeof (caller); 2939 2940 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2941 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2942 /* 2943 * We were provided an exact symbol value; any 2944 * address in the function is valid. 2945 */ 2946 laddr = (uintptr_t)sym.st_value; 2947 haddr = (uintptr_t)sym.st_value + sym.st_size; 2948 } 2949 2950 for (i = 0; i < depth; i++) 2951 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2952 break; 2953 2954 if (i == depth) 2955 return (DCMD_OK); 2956 } 2957 2958 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2959 return (DCMD_OK); 2960 2961 if (earliest != 0 && bc.bc_timestamp < earliest) 2962 return (DCMD_OK); 2963 2964 if (latest != 0 && bc.bc_timestamp > latest) 2965 return (DCMD_OK); 2966 2967 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2968 return (DCMD_OK); 2969 2970 if (flags & DCMD_PIPE_OUT) { 2971 mdb_printf("%#lr\n", addr); 2972 return (DCMD_OK); 2973 } 2974 2975 if (verbose) { 2976 mdb_printf( 2977 "%<b>%16p%</b> %16p %16llx %16p\n" 2978 "%16s %16p %16p %16p\n", 2979 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2980 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2981 2982 mdb_inc_indent(17); 2983 for (i = 0; i < depth; i++) 2984 mdb_printf("%a\n", bc.bc_stack[i]); 2985 mdb_dec_indent(17); 2986 mdb_printf("\n"); 2987 } else { 2988 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2989 bc.bc_timestamp, bc.bc_thread); 2990 2991 for (i = 0; i < depth; i++) { 2992 if (mdb_lookup_by_addr(bc.bc_stack[i], 2993 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2994 continue; 2995 if (strncmp(c, "kmem_", 5) == 0) 2996 continue; 2997 mdb_printf(" %a\n", bc.bc_stack[i]); 2998 break; 2999 } 3000 3001 if (i >= depth) 3002 mdb_printf("\n"); 3003 } 3004 3005 return (DCMD_OK); 3006 } 3007 3008 typedef struct kmem_verify { 3009 uint64_t *kmv_buf; /* buffer to read cache contents into */ 3010 size_t kmv_size; /* number of bytes in kmv_buf */ 3011 int kmv_corruption; /* > 0 if corruption found. */ 3012 int kmv_besilent; /* report actual corruption sites */ 3013 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3014 } kmem_verify_t; 3015 3016 /* 3017 * verify_pattern() 3018 * verify that buf is filled with the pattern pat. 3019 */ 3020 static int64_t 3021 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3022 { 3023 /*LINTED*/ 3024 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3025 uint64_t *buf; 3026 3027 for (buf = buf_arg; buf < bufend; buf++) 3028 if (*buf != pat) 3029 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3030 return (-1); 3031 } 3032 3033 /* 3034 * verify_buftag() 3035 * verify that btp->bt_bxstat == (bcp ^ pat) 3036 */ 3037 static int 3038 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3039 { 3040 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3041 } 3042 3043 /* 3044 * verify_free() 3045 * verify the integrity of a free block of memory by checking 3046 * that it is filled with 0xdeadbeef and that its buftag is sane. 3047 */ 3048 /*ARGSUSED1*/ 3049 static int 3050 verify_free(uintptr_t addr, const void *data, void *private) 3051 { 3052 kmem_verify_t *kmv = (kmem_verify_t *)private; 3053 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3054 int64_t corrupt; /* corruption offset */ 3055 kmem_buftag_t *buftagp; /* ptr to buftag */ 3056 kmem_cache_t *cp = &kmv->kmv_cache; 3057 int besilent = kmv->kmv_besilent; 3058 3059 /*LINTED*/ 3060 buftagp = KMEM_BUFTAG(cp, buf); 3061 3062 /* 3063 * Read the buffer to check. 3064 */ 3065 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3066 if (!besilent) 3067 mdb_warn("couldn't read %p", addr); 3068 return (WALK_NEXT); 3069 } 3070 3071 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3072 KMEM_FREE_PATTERN)) >= 0) { 3073 if (!besilent) 3074 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3075 addr, (uintptr_t)addr + corrupt); 3076 goto corrupt; 3077 } 3078 /* 3079 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3080 * the first bytes of the buffer, hence we cannot check for red 3081 * zone corruption. 3082 */ 3083 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3084 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3085 if (!besilent) 3086 mdb_printf("buffer %p (free) seems to " 3087 "have a corrupt redzone pattern\n", addr); 3088 goto corrupt; 3089 } 3090 3091 /* 3092 * confirm bufctl pointer integrity. 3093 */ 3094 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3095 if (!besilent) 3096 mdb_printf("buffer %p (free) has a corrupt " 3097 "buftag\n", addr); 3098 goto corrupt; 3099 } 3100 3101 return (WALK_NEXT); 3102 corrupt: 3103 kmv->kmv_corruption++; 3104 return (WALK_NEXT); 3105 } 3106 3107 /* 3108 * verify_alloc() 3109 * Verify that the buftag of an allocated buffer makes sense with respect 3110 * to the buffer. 3111 */ 3112 /*ARGSUSED1*/ 3113 static int 3114 verify_alloc(uintptr_t addr, const void *data, void *private) 3115 { 3116 kmem_verify_t *kmv = (kmem_verify_t *)private; 3117 kmem_cache_t *cp = &kmv->kmv_cache; 3118 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3119 /*LINTED*/ 3120 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3121 uint32_t *ip = (uint32_t *)buftagp; 3122 uint8_t *bp = (uint8_t *)buf; 3123 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3124 int besilent = kmv->kmv_besilent; 3125 3126 /* 3127 * Read the buffer to check. 3128 */ 3129 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3130 if (!besilent) 3131 mdb_warn("couldn't read %p", addr); 3132 return (WALK_NEXT); 3133 } 3134 3135 /* 3136 * There are two cases to handle: 3137 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3138 * 0xfeedfacefeedface at the end of it 3139 * 2. If the buf was alloc'd using kmem_alloc, it will have 3140 * 0xbb just past the end of the region in use. At the buftag, 3141 * it will have 0xfeedface (or, if the whole buffer is in use, 3142 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3143 * endianness), followed by 32 bits containing the offset of the 3144 * 0xbb byte in the buffer. 3145 * 3146 * Finally, the two 32-bit words that comprise the second half of the 3147 * buftag should xor to KMEM_BUFTAG_ALLOC 3148 */ 3149 3150 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3151 looks_ok = 1; 3152 else if (!KMEM_SIZE_VALID(ip[1])) 3153 size_ok = 0; 3154 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3155 looks_ok = 1; 3156 else 3157 size_ok = 0; 3158 3159 if (!size_ok) { 3160 if (!besilent) 3161 mdb_printf("buffer %p (allocated) has a corrupt " 3162 "redzone size encoding\n", addr); 3163 goto corrupt; 3164 } 3165 3166 if (!looks_ok) { 3167 if (!besilent) 3168 mdb_printf("buffer %p (allocated) has a corrupt " 3169 "redzone signature\n", addr); 3170 goto corrupt; 3171 } 3172 3173 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3174 if (!besilent) 3175 mdb_printf("buffer %p (allocated) has a " 3176 "corrupt buftag\n", addr); 3177 goto corrupt; 3178 } 3179 3180 return (WALK_NEXT); 3181 corrupt: 3182 kmv->kmv_corruption++; 3183 return (WALK_NEXT); 3184 } 3185 3186 /*ARGSUSED2*/ 3187 int 3188 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3189 { 3190 if (flags & DCMD_ADDRSPEC) { 3191 int check_alloc = 0, check_free = 0; 3192 kmem_verify_t kmv; 3193 3194 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3195 addr) == -1) { 3196 mdb_warn("couldn't read kmem_cache %p", addr); 3197 return (DCMD_ERR); 3198 } 3199 3200 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3201 sizeof (kmem_buftag_t); 3202 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3203 kmv.kmv_corruption = 0; 3204 3205 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3206 check_alloc = 1; 3207 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3208 check_free = 1; 3209 } else { 3210 if (!(flags & DCMD_LOOP)) { 3211 mdb_warn("cache %p (%s) does not have " 3212 "redzone checking enabled\n", addr, 3213 kmv.kmv_cache.cache_name); 3214 } 3215 return (DCMD_ERR); 3216 } 3217 3218 if (flags & DCMD_LOOP) { 3219 /* 3220 * table mode, don't print out every corrupt buffer 3221 */ 3222 kmv.kmv_besilent = 1; 3223 } else { 3224 mdb_printf("Summary for cache '%s'\n", 3225 kmv.kmv_cache.cache_name); 3226 mdb_inc_indent(2); 3227 kmv.kmv_besilent = 0; 3228 } 3229 3230 if (check_alloc) 3231 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3232 if (check_free) 3233 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3234 3235 if (flags & DCMD_LOOP) { 3236 if (kmv.kmv_corruption == 0) { 3237 mdb_printf("%-*s %?p clean\n", 3238 KMEM_CACHE_NAMELEN, 3239 kmv.kmv_cache.cache_name, addr); 3240 } else { 3241 char *s = ""; /* optional s in "buffer[s]" */ 3242 if (kmv.kmv_corruption > 1) 3243 s = "s"; 3244 3245 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3246 KMEM_CACHE_NAMELEN, 3247 kmv.kmv_cache.cache_name, addr, 3248 kmv.kmv_corruption, s); 3249 } 3250 } else { 3251 /* 3252 * This is the more verbose mode, when the user has 3253 * type addr::kmem_verify. If the cache was clean, 3254 * nothing will have yet been printed. So say something. 3255 */ 3256 if (kmv.kmv_corruption == 0) 3257 mdb_printf("clean\n"); 3258 3259 mdb_dec_indent(2); 3260 } 3261 } else { 3262 /* 3263 * If the user didn't specify a cache to verify, we'll walk all 3264 * kmem_cache's, specifying ourself as a callback for each... 3265 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3266 */ 3267 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3268 "Cache Name", "Addr", "Cache Integrity"); 3269 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3270 } 3271 3272 return (DCMD_OK); 3273 } 3274 3275 typedef struct vmem_node { 3276 struct vmem_node *vn_next; 3277 struct vmem_node *vn_parent; 3278 struct vmem_node *vn_sibling; 3279 struct vmem_node *vn_children; 3280 uintptr_t vn_addr; 3281 int vn_marked; 3282 vmem_t vn_vmem; 3283 } vmem_node_t; 3284 3285 typedef struct vmem_walk { 3286 vmem_node_t *vw_root; 3287 vmem_node_t *vw_current; 3288 } vmem_walk_t; 3289 3290 int 3291 vmem_walk_init(mdb_walk_state_t *wsp) 3292 { 3293 uintptr_t vaddr, paddr; 3294 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3295 vmem_walk_t *vw; 3296 3297 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3298 mdb_warn("couldn't read 'vmem_list'"); 3299 return (WALK_ERR); 3300 } 3301 3302 while (vaddr != NULL) { 3303 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3304 vp->vn_addr = vaddr; 3305 vp->vn_next = head; 3306 head = vp; 3307 3308 if (vaddr == wsp->walk_addr) 3309 current = vp; 3310 3311 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3312 mdb_warn("couldn't read vmem_t at %p", vaddr); 3313 goto err; 3314 } 3315 3316 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3317 } 3318 3319 for (vp = head; vp != NULL; vp = vp->vn_next) { 3320 3321 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3322 vp->vn_sibling = root; 3323 root = vp; 3324 continue; 3325 } 3326 3327 for (parent = head; parent != NULL; parent = parent->vn_next) { 3328 if (parent->vn_addr != paddr) 3329 continue; 3330 vp->vn_sibling = parent->vn_children; 3331 parent->vn_children = vp; 3332 vp->vn_parent = parent; 3333 break; 3334 } 3335 3336 if (parent == NULL) { 3337 mdb_warn("couldn't find %p's parent (%p)\n", 3338 vp->vn_addr, paddr); 3339 goto err; 3340 } 3341 } 3342 3343 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3344 vw->vw_root = root; 3345 3346 if (current != NULL) 3347 vw->vw_current = current; 3348 else 3349 vw->vw_current = root; 3350 3351 wsp->walk_data = vw; 3352 return (WALK_NEXT); 3353 err: 3354 for (vp = head; head != NULL; vp = head) { 3355 head = vp->vn_next; 3356 mdb_free(vp, sizeof (vmem_node_t)); 3357 } 3358 3359 return (WALK_ERR); 3360 } 3361 3362 int 3363 vmem_walk_step(mdb_walk_state_t *wsp) 3364 { 3365 vmem_walk_t *vw = wsp->walk_data; 3366 vmem_node_t *vp; 3367 int rval; 3368 3369 if ((vp = vw->vw_current) == NULL) 3370 return (WALK_DONE); 3371 3372 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3373 3374 if (vp->vn_children != NULL) { 3375 vw->vw_current = vp->vn_children; 3376 return (rval); 3377 } 3378 3379 do { 3380 vw->vw_current = vp->vn_sibling; 3381 vp = vp->vn_parent; 3382 } while (vw->vw_current == NULL && vp != NULL); 3383 3384 return (rval); 3385 } 3386 3387 /* 3388 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3389 * children are visited before their parent. We perform the postfix walk 3390 * iteratively (rather than recursively) to allow mdb to regain control 3391 * after each callback. 3392 */ 3393 int 3394 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3395 { 3396 vmem_walk_t *vw = wsp->walk_data; 3397 vmem_node_t *vp = vw->vw_current; 3398 int rval; 3399 3400 /* 3401 * If this node is marked, then we know that we have already visited 3402 * all of its children. If the node has any siblings, they need to 3403 * be visited next; otherwise, we need to visit the parent. Note 3404 * that vp->vn_marked will only be zero on the first invocation of 3405 * the step function. 3406 */ 3407 if (vp->vn_marked) { 3408 if (vp->vn_sibling != NULL) 3409 vp = vp->vn_sibling; 3410 else if (vp->vn_parent != NULL) 3411 vp = vp->vn_parent; 3412 else { 3413 /* 3414 * We have neither a parent, nor a sibling, and we 3415 * have already been visited; we're done. 3416 */ 3417 return (WALK_DONE); 3418 } 3419 } 3420 3421 /* 3422 * Before we visit this node, visit its children. 3423 */ 3424 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3425 vp = vp->vn_children; 3426 3427 vp->vn_marked = 1; 3428 vw->vw_current = vp; 3429 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3430 3431 return (rval); 3432 } 3433 3434 void 3435 vmem_walk_fini(mdb_walk_state_t *wsp) 3436 { 3437 vmem_walk_t *vw = wsp->walk_data; 3438 vmem_node_t *root = vw->vw_root; 3439 int done; 3440 3441 if (root == NULL) 3442 return; 3443 3444 if ((vw->vw_root = root->vn_children) != NULL) 3445 vmem_walk_fini(wsp); 3446 3447 vw->vw_root = root->vn_sibling; 3448 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3449 mdb_free(root, sizeof (vmem_node_t)); 3450 3451 if (done) { 3452 mdb_free(vw, sizeof (vmem_walk_t)); 3453 } else { 3454 vmem_walk_fini(wsp); 3455 } 3456 } 3457 3458 typedef struct vmem_seg_walk { 3459 uint8_t vsw_type; 3460 uintptr_t vsw_start; 3461 uintptr_t vsw_current; 3462 } vmem_seg_walk_t; 3463 3464 /*ARGSUSED*/ 3465 int 3466 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3467 { 3468 vmem_seg_walk_t *vsw; 3469 3470 if (wsp->walk_addr == NULL) { 3471 mdb_warn("vmem_%s does not support global walks\n", name); 3472 return (WALK_ERR); 3473 } 3474 3475 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3476 3477 vsw->vsw_type = type; 3478 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3479 vsw->vsw_current = vsw->vsw_start; 3480 3481 return (WALK_NEXT); 3482 } 3483 3484 /* 3485 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3486 */ 3487 #define VMEM_NONE 0 3488 3489 int 3490 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3491 { 3492 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3493 } 3494 3495 int 3496 vmem_free_walk_init(mdb_walk_state_t *wsp) 3497 { 3498 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3499 } 3500 3501 int 3502 vmem_span_walk_init(mdb_walk_state_t *wsp) 3503 { 3504 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3505 } 3506 3507 int 3508 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3509 { 3510 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3511 } 3512 3513 int 3514 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3515 { 3516 vmem_seg_t seg; 3517 vmem_seg_walk_t *vsw = wsp->walk_data; 3518 uintptr_t addr = vsw->vsw_current; 3519 static size_t seg_size = 0; 3520 int rval; 3521 3522 if (!seg_size) { 3523 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3524 mdb_warn("failed to read 'vmem_seg_size'"); 3525 seg_size = sizeof (vmem_seg_t); 3526 } 3527 } 3528 3529 if (seg_size < sizeof (seg)) 3530 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3531 3532 if (mdb_vread(&seg, seg_size, addr) == -1) { 3533 mdb_warn("couldn't read vmem_seg at %p", addr); 3534 return (WALK_ERR); 3535 } 3536 3537 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3538 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3539 rval = WALK_NEXT; 3540 } else { 3541 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3542 } 3543 3544 if (vsw->vsw_current == vsw->vsw_start) 3545 return (WALK_DONE); 3546 3547 return (rval); 3548 } 3549 3550 void 3551 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3552 { 3553 vmem_seg_walk_t *vsw = wsp->walk_data; 3554 3555 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3556 } 3557 3558 #define VMEM_NAMEWIDTH 22 3559 3560 int 3561 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3562 { 3563 vmem_t v, parent; 3564 vmem_kstat_t *vkp = &v.vm_kstat; 3565 uintptr_t paddr; 3566 int ident = 0; 3567 char c[VMEM_NAMEWIDTH]; 3568 3569 if (!(flags & DCMD_ADDRSPEC)) { 3570 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3571 mdb_warn("can't walk vmem"); 3572 return (DCMD_ERR); 3573 } 3574 return (DCMD_OK); 3575 } 3576 3577 if (DCMD_HDRSPEC(flags)) 3578 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3579 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3580 "TOTAL", "SUCCEED", "FAIL"); 3581 3582 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3583 mdb_warn("couldn't read vmem at %p", addr); 3584 return (DCMD_ERR); 3585 } 3586 3587 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3588 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3589 mdb_warn("couldn't trace %p's ancestry", addr); 3590 ident = 0; 3591 break; 3592 } 3593 paddr = (uintptr_t)parent.vm_source; 3594 } 3595 3596 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3597 3598 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3599 addr, VMEM_NAMEWIDTH, c, 3600 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3601 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3602 3603 return (DCMD_OK); 3604 } 3605 3606 void 3607 vmem_seg_help(void) 3608 { 3609 mdb_printf("%s", 3610 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3611 "\n" 3612 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3613 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3614 "information.\n"); 3615 mdb_dec_indent(2); 3616 mdb_printf("%<b>OPTIONS%</b>\n"); 3617 mdb_inc_indent(2); 3618 mdb_printf("%s", 3619 " -v Display the full content of the vmem_seg, including its stack trace\n" 3620 " -s report the size of the segment, instead of the end address\n" 3621 " -c caller\n" 3622 " filter out segments without the function/PC in their stack trace\n" 3623 " -e earliest\n" 3624 " filter out segments timestamped before earliest\n" 3625 " -l latest\n" 3626 " filter out segments timestamped after latest\n" 3627 " -m minsize\n" 3628 " filer out segments smaller than minsize\n" 3629 " -M maxsize\n" 3630 " filer out segments larger than maxsize\n" 3631 " -t thread\n" 3632 " filter out segments not involving thread\n" 3633 " -T type\n" 3634 " filter out segments not of type 'type'\n" 3635 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3636 } 3637 3638 /*ARGSUSED*/ 3639 int 3640 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3641 { 3642 vmem_seg_t vs; 3643 pc_t *stk = vs.vs_stack; 3644 uintptr_t sz; 3645 uint8_t t; 3646 const char *type = NULL; 3647 GElf_Sym sym; 3648 char c[MDB_SYM_NAMLEN]; 3649 int no_debug; 3650 int i; 3651 int depth; 3652 uintptr_t laddr, haddr; 3653 3654 uintptr_t caller = NULL, thread = NULL; 3655 uintptr_t minsize = 0, maxsize = 0; 3656 3657 hrtime_t earliest = 0, latest = 0; 3658 3659 uint_t size = 0; 3660 uint_t verbose = 0; 3661 3662 if (!(flags & DCMD_ADDRSPEC)) 3663 return (DCMD_USAGE); 3664 3665 if (mdb_getopts(argc, argv, 3666 'c', MDB_OPT_UINTPTR, &caller, 3667 'e', MDB_OPT_UINT64, &earliest, 3668 'l', MDB_OPT_UINT64, &latest, 3669 's', MDB_OPT_SETBITS, TRUE, &size, 3670 'm', MDB_OPT_UINTPTR, &minsize, 3671 'M', MDB_OPT_UINTPTR, &maxsize, 3672 't', MDB_OPT_UINTPTR, &thread, 3673 'T', MDB_OPT_STR, &type, 3674 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3675 NULL) != argc) 3676 return (DCMD_USAGE); 3677 3678 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3679 if (verbose) { 3680 mdb_printf("%16s %4s %16s %16s %16s\n" 3681 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3682 "ADDR", "TYPE", "START", "END", "SIZE", 3683 "", "", "THREAD", "TIMESTAMP", ""); 3684 } else { 3685 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3686 "START", size? "SIZE" : "END", "WHO"); 3687 } 3688 } 3689 3690 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3691 mdb_warn("couldn't read vmem_seg at %p", addr); 3692 return (DCMD_ERR); 3693 } 3694 3695 if (type != NULL) { 3696 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3697 t = VMEM_ALLOC; 3698 else if (strcmp(type, "FREE") == 0) 3699 t = VMEM_FREE; 3700 else if (strcmp(type, "SPAN") == 0) 3701 t = VMEM_SPAN; 3702 else if (strcmp(type, "ROTR") == 0 || 3703 strcmp(type, "ROTOR") == 0) 3704 t = VMEM_ROTOR; 3705 else if (strcmp(type, "WLKR") == 0 || 3706 strcmp(type, "WALKER") == 0) 3707 t = VMEM_WALKER; 3708 else { 3709 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3710 type); 3711 return (DCMD_ERR); 3712 } 3713 3714 if (vs.vs_type != t) 3715 return (DCMD_OK); 3716 } 3717 3718 sz = vs.vs_end - vs.vs_start; 3719 3720 if (minsize != 0 && sz < minsize) 3721 return (DCMD_OK); 3722 3723 if (maxsize != 0 && sz > maxsize) 3724 return (DCMD_OK); 3725 3726 t = vs.vs_type; 3727 depth = vs.vs_depth; 3728 3729 /* 3730 * debug info, when present, is only accurate for VMEM_ALLOC segments 3731 */ 3732 no_debug = (t != VMEM_ALLOC) || 3733 (depth == 0 || depth > VMEM_STACK_DEPTH); 3734 3735 if (no_debug) { 3736 if (caller != NULL || thread != NULL || earliest != 0 || 3737 latest != 0) 3738 return (DCMD_OK); /* not enough info */ 3739 } else { 3740 if (caller != NULL) { 3741 laddr = caller; 3742 haddr = caller + sizeof (caller); 3743 3744 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3745 sizeof (c), &sym) != -1 && 3746 caller == (uintptr_t)sym.st_value) { 3747 /* 3748 * We were provided an exact symbol value; any 3749 * address in the function is valid. 3750 */ 3751 laddr = (uintptr_t)sym.st_value; 3752 haddr = (uintptr_t)sym.st_value + sym.st_size; 3753 } 3754 3755 for (i = 0; i < depth; i++) 3756 if (vs.vs_stack[i] >= laddr && 3757 vs.vs_stack[i] < haddr) 3758 break; 3759 3760 if (i == depth) 3761 return (DCMD_OK); 3762 } 3763 3764 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3765 return (DCMD_OK); 3766 3767 if (earliest != 0 && vs.vs_timestamp < earliest) 3768 return (DCMD_OK); 3769 3770 if (latest != 0 && vs.vs_timestamp > latest) 3771 return (DCMD_OK); 3772 } 3773 3774 type = (t == VMEM_ALLOC ? "ALLC" : 3775 t == VMEM_FREE ? "FREE" : 3776 t == VMEM_SPAN ? "SPAN" : 3777 t == VMEM_ROTOR ? "ROTR" : 3778 t == VMEM_WALKER ? "WLKR" : 3779 "????"); 3780 3781 if (flags & DCMD_PIPE_OUT) { 3782 mdb_printf("%#lr\n", addr); 3783 return (DCMD_OK); 3784 } 3785 3786 if (verbose) { 3787 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3788 addr, type, vs.vs_start, vs.vs_end, sz); 3789 3790 if (no_debug) 3791 return (DCMD_OK); 3792 3793 mdb_printf("%16s %4s %16p %16llx\n", 3794 "", "", vs.vs_thread, vs.vs_timestamp); 3795 3796 mdb_inc_indent(17); 3797 for (i = 0; i < depth; i++) { 3798 mdb_printf("%a\n", stk[i]); 3799 } 3800 mdb_dec_indent(17); 3801 mdb_printf("\n"); 3802 } else { 3803 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3804 vs.vs_start, size? sz : vs.vs_end); 3805 3806 if (no_debug) { 3807 mdb_printf("\n"); 3808 return (DCMD_OK); 3809 } 3810 3811 for (i = 0; i < depth; i++) { 3812 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3813 c, sizeof (c), &sym) == -1) 3814 continue; 3815 if (strncmp(c, "vmem_", 5) == 0) 3816 continue; 3817 break; 3818 } 3819 mdb_printf(" %a\n", stk[i]); 3820 } 3821 return (DCMD_OK); 3822 } 3823 3824 typedef struct kmalog_data { 3825 uintptr_t kma_addr; 3826 hrtime_t kma_newest; 3827 } kmalog_data_t; 3828 3829 /*ARGSUSED*/ 3830 static int 3831 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3832 { 3833 char name[KMEM_CACHE_NAMELEN + 1]; 3834 hrtime_t delta; 3835 int i, depth; 3836 size_t bufsize; 3837 3838 if (bcp->bc_timestamp == 0) 3839 return (WALK_DONE); 3840 3841 if (kma->kma_newest == 0) 3842 kma->kma_newest = bcp->bc_timestamp; 3843 3844 if (kma->kma_addr) { 3845 if (mdb_vread(&bufsize, sizeof (bufsize), 3846 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3847 mdb_warn( 3848 "failed to read cache_bufsize for cache at %p", 3849 bcp->bc_cache); 3850 return (WALK_ERR); 3851 } 3852 3853 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3854 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3855 return (WALK_NEXT); 3856 } 3857 3858 delta = kma->kma_newest - bcp->bc_timestamp; 3859 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3860 3861 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3862 &bcp->bc_cache->cache_name) <= 0) 3863 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3864 3865 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3866 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3867 3868 for (i = 0; i < depth; i++) 3869 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3870 3871 return (WALK_NEXT); 3872 } 3873 3874 int 3875 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3876 { 3877 const char *logname = "kmem_transaction_log"; 3878 kmalog_data_t kma; 3879 3880 if (argc > 1) 3881 return (DCMD_USAGE); 3882 3883 kma.kma_newest = 0; 3884 if (flags & DCMD_ADDRSPEC) 3885 kma.kma_addr = addr; 3886 else 3887 kma.kma_addr = NULL; 3888 3889 if (argc > 0) { 3890 if (argv->a_type != MDB_TYPE_STRING) 3891 return (DCMD_USAGE); 3892 if (strcmp(argv->a_un.a_str, "fail") == 0) 3893 logname = "kmem_failure_log"; 3894 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3895 logname = "kmem_slab_log"; 3896 else 3897 return (DCMD_USAGE); 3898 } 3899 3900 if (mdb_readvar(&addr, logname) == -1) { 3901 mdb_warn("failed to read %s log header pointer"); 3902 return (DCMD_ERR); 3903 } 3904 3905 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3906 mdb_warn("failed to walk kmem log"); 3907 return (DCMD_ERR); 3908 } 3909 3910 return (DCMD_OK); 3911 } 3912 3913 /* 3914 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3915 * The first piece is a structure which we use to accumulate kmem_cache_t 3916 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3917 * walker; we either add all caches, or ones named explicitly as arguments. 3918 */ 3919 3920 typedef struct kmclist { 3921 const char *kmc_name; /* Name to match (or NULL) */ 3922 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3923 int kmc_nelems; /* Num entries in kmc_caches */ 3924 int kmc_size; /* Size of kmc_caches array */ 3925 } kmclist_t; 3926 3927 static int 3928 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3929 { 3930 void *p; 3931 int s; 3932 3933 if (kmc->kmc_name == NULL || 3934 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3935 /* 3936 * If we have a match, grow our array (if necessary), and then 3937 * add the virtual address of the matching cache to our list. 3938 */ 3939 if (kmc->kmc_nelems >= kmc->kmc_size) { 3940 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3941 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3942 3943 bcopy(kmc->kmc_caches, p, 3944 sizeof (uintptr_t) * kmc->kmc_size); 3945 3946 kmc->kmc_caches = p; 3947 kmc->kmc_size = s; 3948 } 3949 3950 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3951 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3952 } 3953 3954 return (WALK_NEXT); 3955 } 3956 3957 /* 3958 * The second piece of ::kmausers is a hash table of allocations. Each 3959 * allocation owner is identified by its stack trace and data_size. We then 3960 * track the total bytes of all such allocations, and the number of allocations 3961 * to report at the end. Once we have a list of caches, we walk through the 3962 * allocated bufctls of each, and update our hash table accordingly. 3963 */ 3964 3965 typedef struct kmowner { 3966 struct kmowner *kmo_head; /* First hash elt in bucket */ 3967 struct kmowner *kmo_next; /* Next hash elt in chain */ 3968 size_t kmo_signature; /* Hash table signature */ 3969 uint_t kmo_num; /* Number of allocations */ 3970 size_t kmo_data_size; /* Size of each allocation */ 3971 size_t kmo_total_size; /* Total bytes of allocation */ 3972 int kmo_depth; /* Depth of stack trace */ 3973 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3974 } kmowner_t; 3975 3976 typedef struct kmusers { 3977 uintptr_t kmu_addr; /* address of interest */ 3978 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3979 kmowner_t *kmu_hash; /* Hash table of owners */ 3980 int kmu_nelems; /* Number of entries in use */ 3981 int kmu_size; /* Total number of entries */ 3982 } kmusers_t; 3983 3984 static void 3985 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3986 size_t size, size_t data_size) 3987 { 3988 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3989 size_t bucket, signature = data_size; 3990 kmowner_t *kmo, *kmoend; 3991 3992 /* 3993 * If the hash table is full, double its size and rehash everything. 3994 */ 3995 if (kmu->kmu_nelems >= kmu->kmu_size) { 3996 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3997 3998 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3999 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 4000 kmu->kmu_hash = kmo; 4001 kmu->kmu_size = s; 4002 4003 kmoend = kmu->kmu_hash + kmu->kmu_size; 4004 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 4005 kmo->kmo_head = NULL; 4006 4007 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 4008 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 4009 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 4010 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4011 kmu->kmu_hash[bucket].kmo_head = kmo; 4012 } 4013 } 4014 4015 /* 4016 * Finish computing the hash signature from the stack trace, and then 4017 * see if the owner is in the hash table. If so, update our stats. 4018 */ 4019 for (i = 0; i < depth; i++) 4020 signature += bcp->bc_stack[i]; 4021 4022 bucket = signature & (kmu->kmu_size - 1); 4023 4024 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4025 if (kmo->kmo_signature == signature) { 4026 size_t difference = 0; 4027 4028 difference |= kmo->kmo_data_size - data_size; 4029 difference |= kmo->kmo_depth - depth; 4030 4031 for (i = 0; i < depth; i++) { 4032 difference |= kmo->kmo_stack[i] - 4033 bcp->bc_stack[i]; 4034 } 4035 4036 if (difference == 0) { 4037 kmo->kmo_total_size += size; 4038 kmo->kmo_num++; 4039 return; 4040 } 4041 } 4042 } 4043 4044 /* 4045 * If the owner is not yet hashed, grab the next element and fill it 4046 * in based on the allocation information. 4047 */ 4048 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4049 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4050 kmu->kmu_hash[bucket].kmo_head = kmo; 4051 4052 kmo->kmo_signature = signature; 4053 kmo->kmo_num = 1; 4054 kmo->kmo_data_size = data_size; 4055 kmo->kmo_total_size = size; 4056 kmo->kmo_depth = depth; 4057 4058 for (i = 0; i < depth; i++) 4059 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4060 } 4061 4062 /* 4063 * When ::kmausers is invoked without the -f flag, we simply update our hash 4064 * table with the information from each allocated bufctl. 4065 */ 4066 /*ARGSUSED*/ 4067 static int 4068 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4069 { 4070 const kmem_cache_t *cp = kmu->kmu_cache; 4071 4072 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4073 return (WALK_NEXT); 4074 } 4075 4076 /* 4077 * When ::kmausers is invoked with the -f flag, we print out the information 4078 * for each bufctl as well as updating the hash table. 4079 */ 4080 static int 4081 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4082 { 4083 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4084 const kmem_cache_t *cp = kmu->kmu_cache; 4085 kmem_bufctl_t bufctl; 4086 4087 if (kmu->kmu_addr) { 4088 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4089 mdb_warn("couldn't read bufctl at %p", addr); 4090 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4091 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4092 cp->cache_bufsize) 4093 return (WALK_NEXT); 4094 } 4095 4096 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4097 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4098 4099 for (i = 0; i < depth; i++) 4100 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4101 4102 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4103 return (WALK_NEXT); 4104 } 4105 4106 /* 4107 * We sort our results by allocation size before printing them. 4108 */ 4109 static int 4110 kmownercmp(const void *lp, const void *rp) 4111 { 4112 const kmowner_t *lhs = lp; 4113 const kmowner_t *rhs = rp; 4114 4115 return (rhs->kmo_total_size - lhs->kmo_total_size); 4116 } 4117 4118 /* 4119 * The main engine of ::kmausers is relatively straightforward: First we 4120 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4121 * iterate over the allocated bufctls of each cache in the list. Finally, 4122 * we sort and print our results. 4123 */ 4124 /*ARGSUSED*/ 4125 int 4126 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4127 { 4128 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4129 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4130 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4131 int do_all_caches = 1; /* Do all caches (no arguments) */ 4132 int opt_e = FALSE; /* Include "small" users */ 4133 int opt_f = FALSE; /* Print stack traces */ 4134 4135 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4136 kmowner_t *kmo, *kmoend; 4137 int i, oelems; 4138 4139 kmclist_t kmc; 4140 kmusers_t kmu; 4141 4142 bzero(&kmc, sizeof (kmc)); 4143 bzero(&kmu, sizeof (kmu)); 4144 4145 while ((i = mdb_getopts(argc, argv, 4146 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4147 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4148 4149 argv += i; /* skip past options we just processed */ 4150 argc -= i; /* adjust argc */ 4151 4152 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4153 return (DCMD_USAGE); 4154 4155 oelems = kmc.kmc_nelems; 4156 kmc.kmc_name = argv->a_un.a_str; 4157 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4158 4159 if (kmc.kmc_nelems == oelems) { 4160 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4161 return (DCMD_ERR); 4162 } 4163 4164 do_all_caches = 0; 4165 argv++; 4166 argc--; 4167 } 4168 4169 if (flags & DCMD_ADDRSPEC) { 4170 opt_f = TRUE; 4171 kmu.kmu_addr = addr; 4172 } else { 4173 kmu.kmu_addr = NULL; 4174 } 4175 4176 if (opt_e) 4177 mem_threshold = cnt_threshold = 0; 4178 4179 if (opt_f) 4180 callback = (mdb_walk_cb_t)kmause2; 4181 4182 if (do_all_caches) { 4183 kmc.kmc_name = NULL; /* match all cache names */ 4184 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4185 } 4186 4187 for (i = 0; i < kmc.kmc_nelems; i++) { 4188 uintptr_t cp = kmc.kmc_caches[i]; 4189 kmem_cache_t c; 4190 4191 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4192 mdb_warn("failed to read cache at %p", cp); 4193 continue; 4194 } 4195 4196 if (!(c.cache_flags & KMF_AUDIT)) { 4197 if (!do_all_caches) { 4198 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4199 c.cache_name); 4200 } 4201 continue; 4202 } 4203 4204 kmu.kmu_cache = &c; 4205 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4206 audited_caches++; 4207 } 4208 4209 if (audited_caches == 0 && do_all_caches) { 4210 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4211 return (DCMD_ERR); 4212 } 4213 4214 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4215 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4216 4217 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4218 if (kmo->kmo_total_size < mem_threshold && 4219 kmo->kmo_num < cnt_threshold) 4220 continue; 4221 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4222 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4223 for (i = 0; i < kmo->kmo_depth; i++) 4224 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4225 } 4226 4227 return (DCMD_OK); 4228 } 4229 4230 void 4231 kmausers_help(void) 4232 { 4233 mdb_printf( 4234 "Displays the largest users of the kmem allocator, sorted by \n" 4235 "trace. If one or more caches is specified, only those caches\n" 4236 "will be searched. By default, all caches are searched. If an\n" 4237 "address is specified, then only those allocations which include\n" 4238 "the given address are displayed. Specifying an address implies\n" 4239 "-f.\n" 4240 "\n" 4241 "\t-e\tInclude all users, not just the largest\n" 4242 "\t-f\tDisplay individual allocations. By default, users are\n" 4243 "\t\tgrouped by stack\n"); 4244 } 4245 4246 static int 4247 kmem_ready_check(void) 4248 { 4249 int ready; 4250 4251 if (mdb_readvar(&ready, "kmem_ready") < 0) 4252 return (-1); /* errno is set for us */ 4253 4254 return (ready); 4255 } 4256 4257 void 4258 kmem_statechange(void) 4259 { 4260 static int been_ready = 0; 4261 4262 if (been_ready) 4263 return; 4264 4265 if (kmem_ready_check() <= 0) 4266 return; 4267 4268 been_ready = 1; 4269 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4270 } 4271 4272 void 4273 kmem_init(void) 4274 { 4275 mdb_walker_t w = { 4276 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4277 list_walk_step, list_walk_fini 4278 }; 4279 4280 /* 4281 * If kmem is ready, we'll need to invoke the kmem_cache walker 4282 * immediately. Walkers in the linkage structure won't be ready until 4283 * _mdb_init returns, so we'll need to add this one manually. If kmem 4284 * is ready, we'll use the walker to initialize the caches. If kmem 4285 * isn't ready, we'll register a callback that will allow us to defer 4286 * cache walking until it is. 4287 */ 4288 if (mdb_add_walker(&w) != 0) { 4289 mdb_warn("failed to add kmem_cache walker"); 4290 return; 4291 } 4292 4293 kmem_statechange(); 4294 4295 /* register our ::whatis handlers */ 4296 mdb_whatis_register("modules", whatis_run_modules, NULL, 4297 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4298 mdb_whatis_register("threads", whatis_run_threads, NULL, 4299 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4300 mdb_whatis_register("pages", whatis_run_pages, NULL, 4301 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4302 mdb_whatis_register("kmem", whatis_run_kmem, NULL, 4303 WHATIS_PRIO_ALLOCATOR, 0); 4304 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 4305 WHATIS_PRIO_ALLOCATOR, 0); 4306 } 4307 4308 typedef struct whatthread { 4309 uintptr_t wt_target; 4310 int wt_verbose; 4311 } whatthread_t; 4312 4313 static int 4314 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4315 { 4316 uintptr_t current, data; 4317 4318 if (t->t_stkbase == NULL) 4319 return (WALK_NEXT); 4320 4321 /* 4322 * Warn about swapped out threads, but drive on anyway 4323 */ 4324 if (!(t->t_schedflag & TS_LOAD)) { 4325 mdb_warn("thread %p's stack swapped out\n", addr); 4326 return (WALK_NEXT); 4327 } 4328 4329 /* 4330 * Search the thread's stack for the given pointer. Note that it would 4331 * be more efficient to follow ::kgrep's lead and read in page-sized 4332 * chunks, but this routine is already fast and simple. 4333 */ 4334 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4335 current += sizeof (uintptr_t)) { 4336 if (mdb_vread(&data, sizeof (data), current) == -1) { 4337 mdb_warn("couldn't read thread %p's stack at %p", 4338 addr, current); 4339 return (WALK_ERR); 4340 } 4341 4342 if (data == w->wt_target) { 4343 if (w->wt_verbose) { 4344 mdb_printf("%p in thread %p's stack%s\n", 4345 current, addr, stack_active(t, current)); 4346 } else { 4347 mdb_printf("%#lr\n", addr); 4348 return (WALK_NEXT); 4349 } 4350 } 4351 } 4352 4353 return (WALK_NEXT); 4354 } 4355 4356 int 4357 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4358 { 4359 whatthread_t w; 4360 4361 if (!(flags & DCMD_ADDRSPEC)) 4362 return (DCMD_USAGE); 4363 4364 w.wt_verbose = FALSE; 4365 w.wt_target = addr; 4366 4367 if (mdb_getopts(argc, argv, 4368 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4369 return (DCMD_USAGE); 4370 4371 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4372 == -1) { 4373 mdb_warn("couldn't walk threads"); 4374 return (DCMD_ERR); 4375 } 4376 4377 return (DCMD_OK); 4378 } 4379