1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2018 Joyent, Inc. All rights reserved. 28 * Copyright (c) 2012 by Delphix. All rights reserved. 29 * Copyright 2024 Oxide Computer Company 30 */ 31 32 #include <mdb/mdb_param.h> 33 #include <mdb/mdb_modapi.h> 34 #include <mdb/mdb_ctf.h> 35 #include <mdb/mdb_whatis.h> 36 #include <sys/cpuvar.h> 37 #include <sys/kmem_impl.h> 38 #include <sys/vmem_impl.h> 39 #include <sys/machelf.h> 40 #include <sys/modctl.h> 41 #include <sys/kobj.h> 42 #include <sys/panic.h> 43 #include <sys/stack.h> 44 #include <sys/sysmacros.h> 45 #include <vm/page.h> 46 47 #include "avl.h" 48 #include "combined.h" 49 #include "dist.h" 50 #include "kmem.h" 51 #include "list.h" 52 53 #define dprintf(x) if (mdb_debug_level) { \ 54 mdb_printf("kmem debug: "); \ 55 /*CSTYLED*/\ 56 mdb_printf x ;\ 57 } 58 59 #define KM_ALLOCATED 0x01 60 #define KM_FREE 0x02 61 #define KM_BUFCTL 0x04 62 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 63 #define KM_HASH 0x10 64 65 static int mdb_debug_level = 0; 66 67 /*ARGSUSED*/ 68 static int 69 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 70 { 71 mdb_walker_t w; 72 char descr[64]; 73 74 (void) mdb_snprintf(descr, sizeof (descr), 75 "walk the %s cache", c->cache_name); 76 77 w.walk_name = c->cache_name; 78 w.walk_descr = descr; 79 w.walk_init = kmem_walk_init; 80 w.walk_step = kmem_walk_step; 81 w.walk_fini = kmem_walk_fini; 82 w.walk_init_arg = (void *)addr; 83 84 if (mdb_add_walker(&w) == -1) 85 mdb_warn("failed to add %s walker", c->cache_name); 86 87 return (WALK_NEXT); 88 } 89 90 /*ARGSUSED*/ 91 int 92 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 93 { 94 mdb_debug_level ^= 1; 95 96 mdb_printf("kmem: debugging is now %s\n", 97 mdb_debug_level ? "on" : "off"); 98 99 return (DCMD_OK); 100 } 101 102 int 103 kmem_cache_walk_init(mdb_walk_state_t *wsp) 104 { 105 GElf_Sym sym; 106 107 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) { 108 mdb_warn("couldn't find kmem_caches"); 109 return (WALK_ERR); 110 } 111 112 wsp->walk_addr = (uintptr_t)sym.st_value; 113 114 return (list_walk_init_named(wsp, "cache list", "cache")); 115 } 116 117 int 118 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 119 { 120 if (wsp->walk_addr == 0) { 121 mdb_warn("kmem_cpu_cache doesn't support global walks"); 122 return (WALK_ERR); 123 } 124 125 if (mdb_layered_walk("cpu", wsp) == -1) { 126 mdb_warn("couldn't walk 'cpu'"); 127 return (WALK_ERR); 128 } 129 130 wsp->walk_data = (void *)wsp->walk_addr; 131 132 return (WALK_NEXT); 133 } 134 135 int 136 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 137 { 138 uintptr_t caddr = (uintptr_t)wsp->walk_data; 139 const cpu_t *cpu = wsp->walk_layer; 140 kmem_cpu_cache_t cc; 141 142 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]); 143 144 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 145 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 146 return (WALK_ERR); 147 } 148 149 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 150 } 151 152 static int 153 kmem_slab_check(void *p, uintptr_t saddr, void *arg) 154 { 155 kmem_slab_t *sp = p; 156 uintptr_t caddr = (uintptr_t)arg; 157 if ((uintptr_t)sp->slab_cache != caddr) { 158 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 159 saddr, caddr, sp->slab_cache); 160 return (-1); 161 } 162 163 return (0); 164 } 165 166 static int 167 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg) 168 { 169 kmem_slab_t *sp = p; 170 171 int rc = kmem_slab_check(p, saddr, arg); 172 if (rc != 0) { 173 return (rc); 174 } 175 176 if (!KMEM_SLAB_IS_PARTIAL(sp)) { 177 mdb_warn("slab %p is not a partial slab\n", saddr); 178 return (-1); 179 } 180 181 return (0); 182 } 183 184 static int 185 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg) 186 { 187 kmem_slab_t *sp = p; 188 189 int rc = kmem_slab_check(p, saddr, arg); 190 if (rc != 0) { 191 return (rc); 192 } 193 194 if (!KMEM_SLAB_IS_ALL_USED(sp)) { 195 mdb_warn("slab %p is not completely allocated\n", saddr); 196 return (-1); 197 } 198 199 return (0); 200 } 201 202 typedef struct { 203 uintptr_t kns_cache_addr; 204 int kns_nslabs; 205 } kmem_nth_slab_t; 206 207 static int 208 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg) 209 { 210 kmem_nth_slab_t *chkp = arg; 211 212 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr); 213 if (rc != 0) { 214 return (rc); 215 } 216 217 return (chkp->kns_nslabs-- == 0 ? 1 : 0); 218 } 219 220 static int 221 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp) 222 { 223 uintptr_t caddr = wsp->walk_addr; 224 225 wsp->walk_addr = (uintptr_t)(caddr + 226 offsetof(kmem_cache_t, cache_complete_slabs)); 227 228 return (list_walk_init_checked(wsp, "slab list", "slab", 229 kmem_complete_slab_check, (void *)caddr)); 230 } 231 232 static int 233 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp) 234 { 235 uintptr_t caddr = wsp->walk_addr; 236 237 wsp->walk_addr = (uintptr_t)(caddr + 238 offsetof(kmem_cache_t, cache_partial_slabs)); 239 240 return (avl_walk_init_checked(wsp, "slab list", "slab", 241 kmem_partial_slab_check, (void *)caddr)); 242 } 243 244 int 245 kmem_slab_walk_init(mdb_walk_state_t *wsp) 246 { 247 uintptr_t caddr = wsp->walk_addr; 248 249 if (caddr == 0) { 250 mdb_warn("kmem_slab doesn't support global walks\n"); 251 return (WALK_ERR); 252 } 253 254 combined_walk_init(wsp); 255 combined_walk_add(wsp, 256 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini); 257 combined_walk_add(wsp, 258 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini); 259 260 return (WALK_NEXT); 261 } 262 263 static int 264 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp) 265 { 266 uintptr_t caddr = wsp->walk_addr; 267 kmem_nth_slab_t *chk; 268 269 chk = mdb_alloc(sizeof (kmem_nth_slab_t), 270 UM_SLEEP | UM_GC); 271 chk->kns_cache_addr = caddr; 272 chk->kns_nslabs = 1; 273 wsp->walk_addr = (uintptr_t)(caddr + 274 offsetof(kmem_cache_t, cache_complete_slabs)); 275 276 return (list_walk_init_checked(wsp, "slab list", "slab", 277 kmem_nth_slab_check, chk)); 278 } 279 280 int 281 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 282 { 283 uintptr_t caddr = wsp->walk_addr; 284 kmem_cache_t c; 285 286 if (caddr == 0) { 287 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 288 return (WALK_ERR); 289 } 290 291 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 292 mdb_warn("couldn't read kmem_cache at %p", caddr); 293 return (WALK_ERR); 294 } 295 296 combined_walk_init(wsp); 297 298 /* 299 * Some consumers (umem_walk_step(), in particular) require at 300 * least one callback if there are any buffers in the cache. So 301 * if there are *no* partial slabs, report the first full slab, if 302 * any. 303 * 304 * Yes, this is ugly, but it's cleaner than the other possibilities. 305 */ 306 if (c.cache_partial_slabs.avl_numnodes == 0) { 307 combined_walk_add(wsp, kmem_first_complete_slab_walk_init, 308 list_walk_step, list_walk_fini); 309 } else { 310 combined_walk_add(wsp, kmem_partial_slab_walk_init, 311 avl_walk_step, avl_walk_fini); 312 } 313 314 return (WALK_NEXT); 315 } 316 317 int 318 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 319 { 320 kmem_cache_t c; 321 const char *filter = NULL; 322 323 if (mdb_getopts(ac, argv, 324 'n', MDB_OPT_STR, &filter, 325 NULL) != ac) { 326 return (DCMD_USAGE); 327 } 328 329 if (!(flags & DCMD_ADDRSPEC)) { 330 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 331 mdb_warn("can't walk kmem_cache"); 332 return (DCMD_ERR); 333 } 334 return (DCMD_OK); 335 } 336 337 if (DCMD_HDRSPEC(flags)) 338 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 339 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 340 341 if (mdb_vread(&c, sizeof (c), addr) == -1) { 342 mdb_warn("couldn't read kmem_cache at %p", addr); 343 return (DCMD_ERR); 344 } 345 346 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) 347 return (DCMD_OK); 348 349 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 350 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 351 352 return (DCMD_OK); 353 } 354 355 void 356 kmem_cache_help(void) 357 { 358 mdb_printf("%s", "Print kernel memory caches.\n\n"); 359 mdb_dec_indent(2); 360 mdb_printf("%<b>OPTIONS%</b>\n"); 361 mdb_inc_indent(2); 362 mdb_printf("%s", 363 " -n name\n" 364 " name of kmem cache (or matching partial name)\n" 365 "\n" 366 "Column\tDescription\n" 367 "\n" 368 "ADDR\t\taddress of kmem cache\n" 369 "NAME\t\tname of kmem cache\n" 370 "FLAG\t\tvarious cache state flags\n" 371 "CFLAG\t\tcache creation flags\n" 372 "BUFSIZE\tobject size in bytes\n" 373 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n"); 374 } 375 376 #define LABEL_WIDTH 11 377 static void 378 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 379 size_t maxbuckets, size_t minbucketsize) 380 { 381 uint64_t total; 382 int buckets; 383 int i; 384 const int *distarray; 385 int complete[2]; 386 387 buckets = buffers_per_slab; 388 389 total = 0; 390 for (i = 0; i <= buffers_per_slab; i++) 391 total += ks_bucket[i]; 392 393 if (maxbuckets > 1) 394 buckets = MIN(buckets, maxbuckets); 395 396 if (minbucketsize > 1) { 397 /* 398 * minbucketsize does not apply to the first bucket reserved 399 * for completely allocated slabs 400 */ 401 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 402 minbucketsize)); 403 if ((buckets < 2) && (buffers_per_slab > 1)) { 404 buckets = 2; 405 minbucketsize = (buffers_per_slab - 1); 406 } 407 } 408 409 /* 410 * The first printed bucket is reserved for completely allocated slabs. 411 * Passing (buckets - 1) excludes that bucket from the generated 412 * distribution, since we're handling it as a special case. 413 */ 414 complete[0] = buffers_per_slab; 415 complete[1] = buffers_per_slab + 1; 416 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1); 417 418 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 419 dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 420 421 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 422 /* 423 * Print bucket ranges in descending order after the first bucket for 424 * completely allocated slabs, so a person can see immediately whether 425 * or not there is fragmentation without having to scan possibly 426 * multiple screens of output. Starting at (buckets - 2) excludes the 427 * extra terminating bucket. 428 */ 429 for (i = buckets - 2; i >= 0; i--) { 430 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH); 431 } 432 mdb_printf("\n"); 433 } 434 #undef LABEL_WIDTH 435 436 /*ARGSUSED*/ 437 static int 438 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 439 { 440 *is_slab = B_TRUE; 441 return (WALK_DONE); 442 } 443 444 /*ARGSUSED*/ 445 static int 446 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 447 boolean_t *is_slab) 448 { 449 /* 450 * The "kmem_partial_slab" walker reports the first full slab if there 451 * are no partial slabs (for the sake of consumers that require at least 452 * one callback if there are any buffers in the cache). 453 */ 454 *is_slab = KMEM_SLAB_IS_PARTIAL(sp); 455 return (WALK_DONE); 456 } 457 458 typedef struct kmem_slab_usage { 459 int ksu_refcnt; /* count of allocated buffers on slab */ 460 boolean_t ksu_nomove; /* slab marked non-reclaimable */ 461 } kmem_slab_usage_t; 462 463 typedef struct kmem_slab_stats { 464 const kmem_cache_t *ks_cp; 465 int ks_slabs; /* slabs in cache */ 466 int ks_partial_slabs; /* partially allocated slabs in cache */ 467 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 468 int ks_max_buffers_per_slab; /* max buffers per slab */ 469 int ks_usage_len; /* ks_usage array length */ 470 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 471 uint_t *ks_bucket; /* slab usage distribution */ 472 } kmem_slab_stats_t; 473 474 /*ARGSUSED*/ 475 static int 476 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 477 kmem_slab_stats_t *ks) 478 { 479 kmem_slab_usage_t *ksu; 480 long unused; 481 482 ks->ks_slabs++; 483 ks->ks_bucket[sp->slab_refcnt]++; 484 485 unused = (sp->slab_chunks - sp->slab_refcnt); 486 if (unused == 0) { 487 return (WALK_NEXT); 488 } 489 490 ks->ks_partial_slabs++; 491 ks->ks_unused_buffers += unused; 492 493 if (ks->ks_partial_slabs > ks->ks_usage_len) { 494 kmem_slab_usage_t *usage; 495 int len = ks->ks_usage_len; 496 497 len = (len == 0 ? 16 : len * 2); 498 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 499 if (ks->ks_usage != NULL) { 500 bcopy(ks->ks_usage, usage, 501 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 502 mdb_free(ks->ks_usage, 503 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 504 } 505 ks->ks_usage = usage; 506 ks->ks_usage_len = len; 507 } 508 509 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 510 ksu->ksu_refcnt = sp->slab_refcnt; 511 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE); 512 return (WALK_NEXT); 513 } 514 515 static void 516 kmem_slabs_header() 517 { 518 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 519 "", "", "Partial", "", "Unused", ""); 520 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 521 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 522 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 523 "-------------------------", "--------", "--------", "---------", 524 "---------", "------"); 525 } 526 527 int 528 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 529 { 530 kmem_cache_t c; 531 kmem_slab_stats_t stats; 532 mdb_walk_cb_t cb; 533 int pct; 534 int tenths_pct; 535 size_t maxbuckets = 1; 536 size_t minbucketsize = 0; 537 const char *filter = NULL; 538 const char *name = NULL; 539 uint_t opt_v = FALSE; 540 boolean_t buckets = B_FALSE; 541 boolean_t skip = B_FALSE; 542 543 if (mdb_getopts(argc, argv, 544 'B', MDB_OPT_UINTPTR, &minbucketsize, 545 'b', MDB_OPT_UINTPTR, &maxbuckets, 546 'n', MDB_OPT_STR, &filter, 547 'N', MDB_OPT_STR, &name, 548 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 549 NULL) != argc) { 550 return (DCMD_USAGE); 551 } 552 553 if ((maxbuckets != 1) || (minbucketsize != 0)) { 554 buckets = B_TRUE; 555 } 556 557 if (!(flags & DCMD_ADDRSPEC)) { 558 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 559 argv) == -1) { 560 mdb_warn("can't walk kmem_cache"); 561 return (DCMD_ERR); 562 } 563 return (DCMD_OK); 564 } 565 566 if (mdb_vread(&c, sizeof (c), addr) == -1) { 567 mdb_warn("couldn't read kmem_cache at %p", addr); 568 return (DCMD_ERR); 569 } 570 571 if (name == NULL) { 572 skip = ((filter != NULL) && 573 (strstr(c.cache_name, filter) == NULL)); 574 } else if (filter == NULL) { 575 skip = (strcmp(c.cache_name, name) != 0); 576 } else { 577 /* match either -n or -N */ 578 skip = ((strcmp(c.cache_name, name) != 0) && 579 (strstr(c.cache_name, filter) == NULL)); 580 } 581 582 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) { 583 kmem_slabs_header(); 584 } else if ((opt_v || buckets) && !skip) { 585 if (DCMD_HDRSPEC(flags)) { 586 kmem_slabs_header(); 587 } else { 588 boolean_t is_slab = B_FALSE; 589 const char *walker_name; 590 if (opt_v) { 591 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 592 walker_name = "kmem_slab_partial"; 593 } else { 594 cb = (mdb_walk_cb_t)kmem_first_slab; 595 walker_name = "kmem_slab"; 596 } 597 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 598 if (is_slab) { 599 kmem_slabs_header(); 600 } 601 } 602 } 603 604 if (skip) { 605 return (DCMD_OK); 606 } 607 608 bzero(&stats, sizeof (kmem_slab_stats_t)); 609 stats.ks_cp = &c; 610 stats.ks_max_buffers_per_slab = c.cache_maxchunks; 611 /* +1 to include a zero bucket */ 612 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) * 613 sizeof (*stats.ks_bucket), UM_SLEEP); 614 cb = (mdb_walk_cb_t)kmem_slablist_stat; 615 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 616 617 if (c.cache_buftotal == 0) { 618 pct = 0; 619 tenths_pct = 0; 620 } else { 621 uint64_t n = stats.ks_unused_buffers * 10000; 622 pct = (int)(n / c.cache_buftotal); 623 tenths_pct = pct - ((pct / 100) * 100); 624 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 625 if (tenths_pct == 10) { 626 pct += 100; 627 tenths_pct = 0; 628 } 629 } 630 631 pct /= 100; 632 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 633 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 634 stats.ks_unused_buffers, pct, tenths_pct); 635 636 if (maxbuckets == 0) { 637 maxbuckets = stats.ks_max_buffers_per_slab; 638 } 639 640 if (((maxbuckets > 1) || (minbucketsize > 0)) && 641 (stats.ks_slabs > 0)) { 642 mdb_printf("\n"); 643 kmem_slabs_print_dist(stats.ks_bucket, 644 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize); 645 } 646 647 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) * 648 sizeof (*stats.ks_bucket)); 649 650 if (!opt_v) { 651 return (DCMD_OK); 652 } 653 654 if (opt_v && (stats.ks_partial_slabs > 0)) { 655 int i; 656 kmem_slab_usage_t *ksu; 657 658 mdb_printf(" %d complete (%d), %d partial:", 659 (stats.ks_slabs - stats.ks_partial_slabs), 660 stats.ks_max_buffers_per_slab, 661 stats.ks_partial_slabs); 662 663 for (i = 0; i < stats.ks_partial_slabs; i++) { 664 ksu = &stats.ks_usage[i]; 665 mdb_printf(" %d%s", ksu->ksu_refcnt, 666 (ksu->ksu_nomove ? "*" : "")); 667 } 668 mdb_printf("\n\n"); 669 } 670 671 if (stats.ks_usage_len > 0) { 672 mdb_free(stats.ks_usage, 673 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 674 } 675 676 return (DCMD_OK); 677 } 678 679 void 680 kmem_slabs_help(void) 681 { 682 mdb_printf("%s", 683 "Display slab usage per kmem cache.\n\n"); 684 mdb_dec_indent(2); 685 mdb_printf("%<b>OPTIONS%</b>\n"); 686 mdb_inc_indent(2); 687 mdb_printf("%s", 688 " -n name\n" 689 " name of kmem cache (or matching partial name)\n" 690 " -N name\n" 691 " exact name of kmem cache\n" 692 " -b maxbins\n" 693 " Print a distribution of allocated buffers per slab using at\n" 694 " most maxbins bins. The first bin is reserved for completely\n" 695 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 696 " effect as specifying the maximum allocated buffers per slab\n" 697 " or setting minbinsize to 1 (-B 1).\n" 698 " -B minbinsize\n" 699 " Print a distribution of allocated buffers per slab, making\n" 700 " all bins (except the first, reserved for completely allocated\n" 701 " slabs) at least minbinsize buffers apart.\n" 702 " -v verbose output: List the allocated buffer count of each partial\n" 703 " slab on the free list in order from front to back to show how\n" 704 " closely the slabs are ordered by usage. For example\n" 705 "\n" 706 " 10 complete, 3 partial (8): 7 3 1\n" 707 "\n" 708 " means there are thirteen slabs with eight buffers each, including\n" 709 " three partially allocated slabs with less than all eight buffers\n" 710 " allocated.\n" 711 "\n" 712 " Buffer allocations are always from the front of the partial slab\n" 713 " list. When a buffer is freed from a completely used slab, that\n" 714 " slab is added to the front of the partial slab list. Assuming\n" 715 " that all buffers are equally likely to be freed soon, the\n" 716 " desired order of partial slabs is most-used at the front of the\n" 717 " list and least-used at the back (as in the example above).\n" 718 " However, if a slab contains an allocated buffer that will not\n" 719 " soon be freed, it would be better for that slab to be at the\n" 720 " front where all of its buffers can be allocated. Taking a slab\n" 721 " off the partial slab list (either with all buffers freed or all\n" 722 " buffers allocated) reduces cache fragmentation.\n" 723 "\n" 724 " A slab's allocated buffer count representing a partial slab (9 in\n" 725 " the example below) may be marked as follows:\n" 726 "\n" 727 " 9* An asterisk indicates that kmem has marked the slab non-\n" 728 " reclaimable because the kmem client refused to move one of the\n" 729 " slab's buffers. Since kmem does not expect to completely free the\n" 730 " slab, it moves it to the front of the list in the hope of\n" 731 " completely allocating it instead. A slab marked with an asterisk\n" 732 " stays marked for as long as it remains on the partial slab list.\n" 733 "\n" 734 "Column\t\tDescription\n" 735 "\n" 736 "Cache Name\t\tname of kmem cache\n" 737 "Slabs\t\t\ttotal slab count\n" 738 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 739 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 740 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 741 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 742 "\t\t\t for accounting structures (debug mode), slab\n" 743 "\t\t\t coloring (incremental small offsets to stagger\n" 744 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 745 } 746 747 static int 748 addrcmp(const void *lhs, const void *rhs) 749 { 750 uintptr_t p1 = *((uintptr_t *)lhs); 751 uintptr_t p2 = *((uintptr_t *)rhs); 752 753 if (p1 < p2) 754 return (-1); 755 if (p1 > p2) 756 return (1); 757 return (0); 758 } 759 760 static int 761 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 762 { 763 const kmem_bufctl_audit_t *bcp1 = *lhs; 764 const kmem_bufctl_audit_t *bcp2 = *rhs; 765 766 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 767 return (-1); 768 769 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 770 return (1); 771 772 return (0); 773 } 774 775 typedef struct kmem_hash_walk { 776 uintptr_t *kmhw_table; 777 size_t kmhw_nelems; 778 size_t kmhw_pos; 779 kmem_bufctl_t kmhw_cur; 780 } kmem_hash_walk_t; 781 782 int 783 kmem_hash_walk_init(mdb_walk_state_t *wsp) 784 { 785 kmem_hash_walk_t *kmhw; 786 uintptr_t *hash; 787 kmem_cache_t c; 788 uintptr_t haddr, addr = wsp->walk_addr; 789 size_t nelems; 790 size_t hsize; 791 792 if (addr == 0) { 793 mdb_warn("kmem_hash doesn't support global walks\n"); 794 return (WALK_ERR); 795 } 796 797 if (mdb_vread(&c, sizeof (c), addr) == -1) { 798 mdb_warn("couldn't read cache at addr %p", addr); 799 return (WALK_ERR); 800 } 801 802 if (!(c.cache_flags & KMF_HASH)) { 803 mdb_warn("cache %p doesn't have a hash table\n", addr); 804 return (WALK_DONE); /* nothing to do */ 805 } 806 807 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 808 kmhw->kmhw_cur.bc_next = NULL; 809 kmhw->kmhw_pos = 0; 810 811 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 812 hsize = nelems * sizeof (uintptr_t); 813 haddr = (uintptr_t)c.cache_hash_table; 814 815 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 816 if (mdb_vread(hash, hsize, haddr) == -1) { 817 mdb_warn("failed to read hash table at %p", haddr); 818 mdb_free(hash, hsize); 819 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 820 return (WALK_ERR); 821 } 822 823 wsp->walk_data = kmhw; 824 825 return (WALK_NEXT); 826 } 827 828 int 829 kmem_hash_walk_step(mdb_walk_state_t *wsp) 830 { 831 kmem_hash_walk_t *kmhw = wsp->walk_data; 832 uintptr_t addr = 0; 833 834 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == 0) { 835 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 836 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != 0) 837 break; 838 } 839 } 840 if (addr == 0) 841 return (WALK_DONE); 842 843 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 844 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 845 return (WALK_ERR); 846 } 847 848 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 849 } 850 851 void 852 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 853 { 854 kmem_hash_walk_t *kmhw = wsp->walk_data; 855 856 if (kmhw == NULL) 857 return; 858 859 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 860 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 861 } 862 863 /* 864 * Find the address of the bufctl structure for the address 'buf' in cache 865 * 'cp', which is at address caddr, and place it in *out. 866 */ 867 static int 868 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 869 { 870 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 871 kmem_bufctl_t *bcp; 872 kmem_bufctl_t bc; 873 874 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 875 mdb_warn("unable to read hash bucket for %p in cache %p", 876 buf, caddr); 877 return (-1); 878 } 879 880 while (bcp != NULL) { 881 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 882 (uintptr_t)bcp) == -1) { 883 mdb_warn("unable to read bufctl at %p", bcp); 884 return (-1); 885 } 886 if (bc.bc_addr == buf) { 887 *out = (uintptr_t)bcp; 888 return (0); 889 } 890 bcp = bc.bc_next; 891 } 892 893 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 894 return (-1); 895 } 896 897 int 898 kmem_get_magsize(const kmem_cache_t *cp) 899 { 900 uintptr_t addr = (uintptr_t)cp->cache_magtype; 901 GElf_Sym mt_sym; 902 kmem_magtype_t mt; 903 int res; 904 905 /* 906 * if cpu 0 has a non-zero magsize, it must be correct. caches 907 * with KMF_NOMAGAZINE have disabled their magazine layers, so 908 * it is okay to return 0 for them. 909 */ 910 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 911 (cp->cache_flags & KMF_NOMAGAZINE)) 912 return (res); 913 914 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 915 mdb_warn("unable to read 'kmem_magtype'"); 916 } else if (addr < mt_sym.st_value || 917 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 918 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 919 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 920 cp->cache_name, addr); 921 return (0); 922 } 923 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 924 mdb_warn("unable to read magtype at %a", addr); 925 return (0); 926 } 927 return (mt.mt_magsize); 928 } 929 930 /*ARGSUSED*/ 931 static int 932 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 933 { 934 *est -= (sp->slab_chunks - sp->slab_refcnt); 935 936 return (WALK_NEXT); 937 } 938 939 /* 940 * Returns an upper bound on the number of allocated buffers in a given 941 * cache. 942 */ 943 size_t 944 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 945 { 946 int magsize; 947 size_t cache_est; 948 949 cache_est = cp->cache_buftotal; 950 951 (void) mdb_pwalk("kmem_slab_partial", 952 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 953 954 if ((magsize = kmem_get_magsize(cp)) != 0) { 955 size_t mag_est = cp->cache_full.ml_total * magsize; 956 957 if (cache_est >= mag_est) { 958 cache_est -= mag_est; 959 } else { 960 mdb_warn("cache %p's magazine layer holds more buffers " 961 "than the slab layer.\n", addr); 962 } 963 } 964 return (cache_est); 965 } 966 967 #define READMAG_ROUNDS(rounds) { \ 968 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 969 mdb_warn("couldn't read magazine at %p", kmp); \ 970 goto fail; \ 971 } \ 972 for (i = 0; i < rounds; i++) { \ 973 maglist[magcnt++] = mp->mag_round[i]; \ 974 if (magcnt == magmax) { \ 975 mdb_warn("%d magazines exceeds fudge factor\n", \ 976 magcnt); \ 977 goto fail; \ 978 } \ 979 } \ 980 } 981 982 int 983 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 984 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 985 { 986 kmem_magazine_t *kmp, *mp; 987 void **maglist = NULL; 988 int i, cpu; 989 size_t magsize, magmax, magbsize; 990 size_t magcnt = 0; 991 992 /* 993 * Read the magtype out of the cache, after verifying the pointer's 994 * correctness. 995 */ 996 magsize = kmem_get_magsize(cp); 997 if (magsize == 0) { 998 *maglistp = NULL; 999 *magcntp = 0; 1000 *magmaxp = 0; 1001 return (WALK_NEXT); 1002 } 1003 1004 /* 1005 * There are several places where we need to go buffer hunting: 1006 * the per-CPU loaded magazine, the per-CPU spare full magazine, 1007 * and the full magazine list in the depot. 1008 * 1009 * For an upper bound on the number of buffers in the magazine 1010 * layer, we have the number of magazines on the cache_full 1011 * list plus at most two magazines per CPU (the loaded and the 1012 * spare). Toss in 100 magazines as a fudge factor in case this 1013 * is live (the number "100" comes from the same fudge factor in 1014 * crash(8)). 1015 */ 1016 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 1017 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 1018 1019 if (magbsize >= PAGESIZE / 2) { 1020 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 1021 addr, magbsize); 1022 return (WALK_ERR); 1023 } 1024 1025 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 1026 mp = mdb_alloc(magbsize, alloc_flags); 1027 if (mp == NULL || maglist == NULL) 1028 goto fail; 1029 1030 /* 1031 * First up: the magazines in the depot (i.e. on the cache_full list). 1032 */ 1033 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 1034 READMAG_ROUNDS(magsize); 1035 kmp = mp->mag_next; 1036 1037 if (kmp == cp->cache_full.ml_list) 1038 break; /* cache_full list loop detected */ 1039 } 1040 1041 dprintf(("cache_full list done\n")); 1042 1043 /* 1044 * Now whip through the CPUs, snagging the loaded magazines 1045 * and full spares. 1046 * 1047 * In order to prevent inconsistent dumps, rounds and prounds 1048 * are copied aside before dumping begins. 1049 */ 1050 for (cpu = 0; cpu < ncpus; cpu++) { 1051 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 1052 short rounds, prounds; 1053 1054 if (KMEM_DUMPCC(ccp)) { 1055 rounds = ccp->cc_dump_rounds; 1056 prounds = ccp->cc_dump_prounds; 1057 } else { 1058 rounds = ccp->cc_rounds; 1059 prounds = ccp->cc_prounds; 1060 } 1061 1062 dprintf(("reading cpu cache %p\n", 1063 (uintptr_t)ccp - (uintptr_t)cp + addr)); 1064 1065 if (rounds > 0 && 1066 (kmp = ccp->cc_loaded) != NULL) { 1067 dprintf(("reading %d loaded rounds\n", rounds)); 1068 READMAG_ROUNDS(rounds); 1069 } 1070 1071 if (prounds > 0 && 1072 (kmp = ccp->cc_ploaded) != NULL) { 1073 dprintf(("reading %d previously loaded rounds\n", 1074 prounds)); 1075 READMAG_ROUNDS(prounds); 1076 } 1077 } 1078 1079 dprintf(("magazine layer: %d buffers\n", magcnt)); 1080 1081 if (!(alloc_flags & UM_GC)) 1082 mdb_free(mp, magbsize); 1083 1084 *maglistp = maglist; 1085 *magcntp = magcnt; 1086 *magmaxp = magmax; 1087 1088 return (WALK_NEXT); 1089 1090 fail: 1091 if (!(alloc_flags & UM_GC)) { 1092 if (mp) 1093 mdb_free(mp, magbsize); 1094 if (maglist) 1095 mdb_free(maglist, magmax * sizeof (void *)); 1096 } 1097 return (WALK_ERR); 1098 } 1099 1100 static int 1101 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 1102 { 1103 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 1104 } 1105 1106 static int 1107 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 1108 { 1109 kmem_bufctl_audit_t b; 1110 1111 /* 1112 * if KMF_AUDIT is not set, we know that we're looking at a 1113 * kmem_bufctl_t. 1114 */ 1115 if (!(cp->cache_flags & KMF_AUDIT) || 1116 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1117 (void) memset(&b, 0, sizeof (b)); 1118 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1119 mdb_warn("unable to read bufctl at %p", buf); 1120 return (WALK_ERR); 1121 } 1122 } 1123 1124 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1125 } 1126 1127 typedef struct kmem_walk { 1128 int kmw_type; 1129 1130 uintptr_t kmw_addr; /* cache address */ 1131 kmem_cache_t *kmw_cp; 1132 size_t kmw_csize; 1133 1134 /* 1135 * magazine layer 1136 */ 1137 void **kmw_maglist; 1138 size_t kmw_max; 1139 size_t kmw_count; 1140 size_t kmw_pos; 1141 1142 /* 1143 * slab layer 1144 */ 1145 char *kmw_valid; /* to keep track of freed buffers */ 1146 char *kmw_ubase; /* buffer for slab data */ 1147 } kmem_walk_t; 1148 1149 static int 1150 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1151 { 1152 kmem_walk_t *kmw; 1153 int ncpus, csize; 1154 kmem_cache_t *cp; 1155 size_t vm_quantum; 1156 1157 size_t magmax, magcnt; 1158 void **maglist = NULL; 1159 uint_t chunksize = 1, slabsize = 1; 1160 int status = WALK_ERR; 1161 uintptr_t addr = wsp->walk_addr; 1162 const char *layered; 1163 1164 type &= ~KM_HASH; 1165 1166 if (addr == 0) { 1167 mdb_warn("kmem walk doesn't support global walks\n"); 1168 return (WALK_ERR); 1169 } 1170 1171 dprintf(("walking %p\n", addr)); 1172 1173 /* 1174 * First we need to figure out how many CPUs are configured in the 1175 * system to know how much to slurp out. 1176 */ 1177 mdb_readvar(&ncpus, "max_ncpus"); 1178 1179 csize = KMEM_CACHE_SIZE(ncpus); 1180 cp = mdb_alloc(csize, UM_SLEEP); 1181 1182 if (mdb_vread(cp, csize, addr) == -1) { 1183 mdb_warn("couldn't read cache at addr %p", addr); 1184 goto out2; 1185 } 1186 1187 /* 1188 * It's easy for someone to hand us an invalid cache address. 1189 * Unfortunately, it is hard for this walker to survive an 1190 * invalid cache cleanly. So we make sure that: 1191 * 1192 * 1. the vmem arena for the cache is readable, 1193 * 2. the vmem arena's quantum is a power of 2, 1194 * 3. our slabsize is a multiple of the quantum, and 1195 * 4. our chunksize is >0 and less than our slabsize. 1196 */ 1197 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1198 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1199 vm_quantum == 0 || 1200 (vm_quantum & (vm_quantum - 1)) != 0 || 1201 cp->cache_slabsize < vm_quantum || 1202 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1203 cp->cache_chunksize == 0 || 1204 cp->cache_chunksize > cp->cache_slabsize) { 1205 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1206 goto out2; 1207 } 1208 1209 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1210 1211 if (cp->cache_buftotal == 0) { 1212 mdb_free(cp, csize); 1213 return (WALK_DONE); 1214 } 1215 1216 /* 1217 * If they ask for bufctls, but it's a small-slab cache, 1218 * there is nothing to report. 1219 */ 1220 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1221 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1222 cp->cache_flags)); 1223 mdb_free(cp, csize); 1224 return (WALK_DONE); 1225 } 1226 1227 /* 1228 * If they want constructed buffers, but there's no constructor or 1229 * the cache has DEADBEEF checking enabled, there is nothing to report. 1230 */ 1231 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1232 cp->cache_constructor == NULL || 1233 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1234 mdb_free(cp, csize); 1235 return (WALK_DONE); 1236 } 1237 1238 /* 1239 * Read in the contents of the magazine layer 1240 */ 1241 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1242 &magmax, UM_SLEEP) == WALK_ERR) 1243 goto out2; 1244 1245 /* 1246 * We have all of the buffers from the magazines; if we are walking 1247 * allocated buffers, sort them so we can bsearch them later. 1248 */ 1249 if (type & KM_ALLOCATED) 1250 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1251 1252 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1253 1254 kmw->kmw_type = type; 1255 kmw->kmw_addr = addr; 1256 kmw->kmw_cp = cp; 1257 kmw->kmw_csize = csize; 1258 kmw->kmw_maglist = maglist; 1259 kmw->kmw_max = magmax; 1260 kmw->kmw_count = magcnt; 1261 kmw->kmw_pos = 0; 1262 1263 /* 1264 * When walking allocated buffers in a KMF_HASH cache, we walk the 1265 * hash table instead of the slab layer. 1266 */ 1267 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1268 layered = "kmem_hash"; 1269 1270 kmw->kmw_type |= KM_HASH; 1271 } else { 1272 /* 1273 * If we are walking freed buffers, we only need the 1274 * magazine layer plus the partially allocated slabs. 1275 * To walk allocated buffers, we need all of the slabs. 1276 */ 1277 if (type & KM_ALLOCATED) 1278 layered = "kmem_slab"; 1279 else 1280 layered = "kmem_slab_partial"; 1281 1282 /* 1283 * for small-slab caches, we read in the entire slab. For 1284 * freed buffers, we can just walk the freelist. For 1285 * allocated buffers, we use a 'valid' array to track 1286 * the freed buffers. 1287 */ 1288 if (!(cp->cache_flags & KMF_HASH)) { 1289 chunksize = cp->cache_chunksize; 1290 slabsize = cp->cache_slabsize; 1291 1292 kmw->kmw_ubase = mdb_alloc(slabsize + 1293 sizeof (kmem_bufctl_t), UM_SLEEP); 1294 1295 if (type & KM_ALLOCATED) 1296 kmw->kmw_valid = 1297 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1298 } 1299 } 1300 1301 status = WALK_NEXT; 1302 1303 if (mdb_layered_walk(layered, wsp) == -1) { 1304 mdb_warn("unable to start layered '%s' walk", layered); 1305 status = WALK_ERR; 1306 } 1307 1308 out1: 1309 if (status == WALK_ERR) { 1310 if (kmw->kmw_valid) 1311 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1312 1313 if (kmw->kmw_ubase) 1314 mdb_free(kmw->kmw_ubase, slabsize + 1315 sizeof (kmem_bufctl_t)); 1316 1317 if (kmw->kmw_maglist) 1318 mdb_free(kmw->kmw_maglist, 1319 kmw->kmw_max * sizeof (uintptr_t)); 1320 1321 mdb_free(kmw, sizeof (kmem_walk_t)); 1322 wsp->walk_data = NULL; 1323 } 1324 1325 out2: 1326 if (status == WALK_ERR) 1327 mdb_free(cp, csize); 1328 1329 return (status); 1330 } 1331 1332 int 1333 kmem_walk_step(mdb_walk_state_t *wsp) 1334 { 1335 kmem_walk_t *kmw = wsp->walk_data; 1336 int type = kmw->kmw_type; 1337 kmem_cache_t *cp = kmw->kmw_cp; 1338 1339 void **maglist = kmw->kmw_maglist; 1340 int magcnt = kmw->kmw_count; 1341 1342 uintptr_t chunksize, slabsize; 1343 uintptr_t addr; 1344 const kmem_slab_t *sp; 1345 const kmem_bufctl_t *bcp; 1346 kmem_bufctl_t bc; 1347 1348 int chunks; 1349 char *kbase; 1350 void *buf; 1351 int i, ret; 1352 1353 char *valid, *ubase; 1354 1355 /* 1356 * first, handle the 'kmem_hash' layered walk case 1357 */ 1358 if (type & KM_HASH) { 1359 /* 1360 * We have a buffer which has been allocated out of the 1361 * global layer. We need to make sure that it's not 1362 * actually sitting in a magazine before we report it as 1363 * an allocated buffer. 1364 */ 1365 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1366 1367 if (magcnt > 0 && 1368 bsearch(&buf, maglist, magcnt, sizeof (void *), 1369 addrcmp) != NULL) 1370 return (WALK_NEXT); 1371 1372 if (type & KM_BUFCTL) 1373 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1374 1375 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1376 } 1377 1378 ret = WALK_NEXT; 1379 1380 addr = kmw->kmw_addr; 1381 1382 /* 1383 * If we're walking freed buffers, report everything in the 1384 * magazine layer before processing the first slab. 1385 */ 1386 if ((type & KM_FREE) && magcnt != 0) { 1387 kmw->kmw_count = 0; /* only do this once */ 1388 for (i = 0; i < magcnt; i++) { 1389 buf = maglist[i]; 1390 1391 if (type & KM_BUFCTL) { 1392 uintptr_t out; 1393 1394 if (cp->cache_flags & KMF_BUFTAG) { 1395 kmem_buftag_t *btp; 1396 kmem_buftag_t tag; 1397 1398 /* LINTED - alignment */ 1399 btp = KMEM_BUFTAG(cp, buf); 1400 if (mdb_vread(&tag, sizeof (tag), 1401 (uintptr_t)btp) == -1) { 1402 mdb_warn("reading buftag for " 1403 "%p at %p", buf, btp); 1404 continue; 1405 } 1406 out = (uintptr_t)tag.bt_bufctl; 1407 } else { 1408 if (kmem_hash_lookup(cp, addr, buf, 1409 &out) == -1) 1410 continue; 1411 } 1412 ret = bufctl_walk_callback(cp, wsp, out); 1413 } else { 1414 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1415 } 1416 1417 if (ret != WALK_NEXT) 1418 return (ret); 1419 } 1420 } 1421 1422 /* 1423 * If they want constructed buffers, we're finished, since the 1424 * magazine layer holds them all. 1425 */ 1426 if (type & KM_CONSTRUCTED) 1427 return (WALK_DONE); 1428 1429 /* 1430 * Handle the buffers in the current slab 1431 */ 1432 chunksize = cp->cache_chunksize; 1433 slabsize = cp->cache_slabsize; 1434 1435 sp = wsp->walk_layer; 1436 chunks = sp->slab_chunks; 1437 kbase = sp->slab_base; 1438 1439 dprintf(("kbase is %p\n", kbase)); 1440 1441 if (!(cp->cache_flags & KMF_HASH)) { 1442 valid = kmw->kmw_valid; 1443 ubase = kmw->kmw_ubase; 1444 1445 if (mdb_vread(ubase, chunks * chunksize, 1446 (uintptr_t)kbase) == -1) { 1447 mdb_warn("failed to read slab contents at %p", kbase); 1448 return (WALK_ERR); 1449 } 1450 1451 /* 1452 * Set up the valid map as fully allocated -- we'll punch 1453 * out the freelist. 1454 */ 1455 if (type & KM_ALLOCATED) 1456 (void) memset(valid, 1, chunks); 1457 } else { 1458 valid = NULL; 1459 ubase = NULL; 1460 } 1461 1462 /* 1463 * walk the slab's freelist 1464 */ 1465 bcp = sp->slab_head; 1466 1467 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1468 1469 /* 1470 * since we could be in the middle of allocating a buffer, 1471 * our refcnt could be one higher than it aught. So we 1472 * check one further on the freelist than the count allows. 1473 */ 1474 for (i = sp->slab_refcnt; i <= chunks; i++) { 1475 uint_t ndx; 1476 1477 dprintf(("bcp is %p\n", bcp)); 1478 1479 if (bcp == NULL) { 1480 if (i == chunks) 1481 break; 1482 mdb_warn( 1483 "slab %p in cache %p freelist too short by %d\n", 1484 sp, addr, chunks - i); 1485 break; 1486 } 1487 1488 if (cp->cache_flags & KMF_HASH) { 1489 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1490 mdb_warn("failed to read bufctl ptr at %p", 1491 bcp); 1492 break; 1493 } 1494 buf = bc.bc_addr; 1495 } else { 1496 /* 1497 * Otherwise the buffer is (or should be) in the slab 1498 * that we've read in; determine its offset in the 1499 * slab, validate that it's not corrupt, and add to 1500 * our base address to find the umem_bufctl_t. (Note 1501 * that we don't need to add the size of the bufctl 1502 * to our offset calculation because of the slop that's 1503 * allocated for the buffer at ubase.) 1504 */ 1505 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase; 1506 1507 if (offs > chunks * chunksize) { 1508 mdb_warn("found corrupt bufctl ptr %p" 1509 " in slab %p in cache %p\n", bcp, 1510 wsp->walk_addr, addr); 1511 break; 1512 } 1513 1514 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs)); 1515 buf = KMEM_BUF(cp, bcp); 1516 } 1517 1518 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1519 1520 if (ndx > slabsize / cp->cache_bufsize) { 1521 /* 1522 * This is very wrong; we have managed to find 1523 * a buffer in the slab which shouldn't 1524 * actually be here. Emit a warning, and 1525 * try to continue. 1526 */ 1527 mdb_warn("buf %p is out of range for " 1528 "slab %p, cache %p\n", buf, sp, addr); 1529 } else if (type & KM_ALLOCATED) { 1530 /* 1531 * we have found a buffer on the slab's freelist; 1532 * clear its entry 1533 */ 1534 valid[ndx] = 0; 1535 } else { 1536 /* 1537 * Report this freed buffer 1538 */ 1539 if (type & KM_BUFCTL) { 1540 ret = bufctl_walk_callback(cp, wsp, 1541 (uintptr_t)bcp); 1542 } else { 1543 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1544 } 1545 if (ret != WALK_NEXT) 1546 return (ret); 1547 } 1548 1549 bcp = bc.bc_next; 1550 } 1551 1552 if (bcp != NULL) { 1553 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1554 sp, addr, bcp)); 1555 } 1556 1557 /* 1558 * If we are walking freed buffers, the loop above handled reporting 1559 * them. 1560 */ 1561 if (type & KM_FREE) 1562 return (WALK_NEXT); 1563 1564 if (type & KM_BUFCTL) { 1565 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1566 "cache %p\n", addr); 1567 return (WALK_ERR); 1568 } 1569 1570 /* 1571 * Report allocated buffers, skipping buffers in the magazine layer. 1572 * We only get this far for small-slab caches. 1573 */ 1574 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1575 buf = (char *)kbase + i * chunksize; 1576 1577 if (!valid[i]) 1578 continue; /* on slab freelist */ 1579 1580 if (magcnt > 0 && 1581 bsearch(&buf, maglist, magcnt, sizeof (void *), 1582 addrcmp) != NULL) 1583 continue; /* in magazine layer */ 1584 1585 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1586 } 1587 return (ret); 1588 } 1589 1590 void 1591 kmem_walk_fini(mdb_walk_state_t *wsp) 1592 { 1593 kmem_walk_t *kmw = wsp->walk_data; 1594 uintptr_t chunksize; 1595 uintptr_t slabsize; 1596 1597 if (kmw == NULL) 1598 return; 1599 1600 if (kmw->kmw_maglist != NULL) 1601 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1602 1603 chunksize = kmw->kmw_cp->cache_chunksize; 1604 slabsize = kmw->kmw_cp->cache_slabsize; 1605 1606 if (kmw->kmw_valid != NULL) 1607 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1608 if (kmw->kmw_ubase != NULL) 1609 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1610 1611 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1612 mdb_free(kmw, sizeof (kmem_walk_t)); 1613 } 1614 1615 /*ARGSUSED*/ 1616 static int 1617 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1618 { 1619 /* 1620 * Buffers allocated from NOTOUCH caches can also show up as freed 1621 * memory in other caches. This can be a little confusing, so we 1622 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1623 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1624 */ 1625 if (c->cache_cflags & KMC_NOTOUCH) 1626 return (WALK_NEXT); 1627 1628 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1629 wsp->walk_cbdata, addr) == -1) 1630 return (WALK_DONE); 1631 1632 return (WALK_NEXT); 1633 } 1634 1635 #define KMEM_WALK_ALL(name, wsp) { \ 1636 wsp->walk_data = (name); \ 1637 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1638 return (WALK_ERR); \ 1639 return (WALK_DONE); \ 1640 } 1641 1642 int 1643 kmem_walk_init(mdb_walk_state_t *wsp) 1644 { 1645 if (wsp->walk_arg != NULL) 1646 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1647 1648 if (wsp->walk_addr == 0) 1649 KMEM_WALK_ALL("kmem", wsp); 1650 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1651 } 1652 1653 int 1654 bufctl_walk_init(mdb_walk_state_t *wsp) 1655 { 1656 if (wsp->walk_addr == 0) 1657 KMEM_WALK_ALL("bufctl", wsp); 1658 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1659 } 1660 1661 int 1662 freemem_walk_init(mdb_walk_state_t *wsp) 1663 { 1664 if (wsp->walk_addr == 0) 1665 KMEM_WALK_ALL("freemem", wsp); 1666 return (kmem_walk_init_common(wsp, KM_FREE)); 1667 } 1668 1669 int 1670 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1671 { 1672 if (wsp->walk_addr == 0) 1673 KMEM_WALK_ALL("freemem_constructed", wsp); 1674 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1675 } 1676 1677 int 1678 freectl_walk_init(mdb_walk_state_t *wsp) 1679 { 1680 if (wsp->walk_addr == 0) 1681 KMEM_WALK_ALL("freectl", wsp); 1682 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1683 } 1684 1685 int 1686 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1687 { 1688 if (wsp->walk_addr == 0) 1689 KMEM_WALK_ALL("freectl_constructed", wsp); 1690 return (kmem_walk_init_common(wsp, 1691 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1692 } 1693 1694 typedef struct bufctl_history_walk { 1695 void *bhw_next; 1696 kmem_cache_t *bhw_cache; 1697 kmem_slab_t *bhw_slab; 1698 hrtime_t bhw_timestamp; 1699 } bufctl_history_walk_t; 1700 1701 int 1702 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1703 { 1704 bufctl_history_walk_t *bhw; 1705 kmem_bufctl_audit_t bc; 1706 kmem_bufctl_audit_t bcn; 1707 1708 if (wsp->walk_addr == 0) { 1709 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1710 return (WALK_ERR); 1711 } 1712 1713 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1714 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1715 return (WALK_ERR); 1716 } 1717 1718 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1719 bhw->bhw_timestamp = 0; 1720 bhw->bhw_cache = bc.bc_cache; 1721 bhw->bhw_slab = bc.bc_slab; 1722 1723 /* 1724 * sometimes the first log entry matches the base bufctl; in that 1725 * case, skip the base bufctl. 1726 */ 1727 if (bc.bc_lastlog != NULL && 1728 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1729 bc.bc_addr == bcn.bc_addr && 1730 bc.bc_cache == bcn.bc_cache && 1731 bc.bc_slab == bcn.bc_slab && 1732 bc.bc_timestamp == bcn.bc_timestamp && 1733 bc.bc_thread == bcn.bc_thread) 1734 bhw->bhw_next = bc.bc_lastlog; 1735 else 1736 bhw->bhw_next = (void *)wsp->walk_addr; 1737 1738 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1739 wsp->walk_data = bhw; 1740 1741 return (WALK_NEXT); 1742 } 1743 1744 int 1745 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1746 { 1747 bufctl_history_walk_t *bhw = wsp->walk_data; 1748 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1749 uintptr_t baseaddr = wsp->walk_addr; 1750 kmem_bufctl_audit_t bc; 1751 1752 if (addr == 0) 1753 return (WALK_DONE); 1754 1755 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1756 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1757 return (WALK_ERR); 1758 } 1759 1760 /* 1761 * The bufctl is only valid if the address, cache, and slab are 1762 * correct. We also check that the timestamp is decreasing, to 1763 * prevent infinite loops. 1764 */ 1765 if ((uintptr_t)bc.bc_addr != baseaddr || 1766 bc.bc_cache != bhw->bhw_cache || 1767 bc.bc_slab != bhw->bhw_slab || 1768 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1769 return (WALK_DONE); 1770 1771 bhw->bhw_next = bc.bc_lastlog; 1772 bhw->bhw_timestamp = bc.bc_timestamp; 1773 1774 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1775 } 1776 1777 void 1778 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1779 { 1780 bufctl_history_walk_t *bhw = wsp->walk_data; 1781 1782 mdb_free(bhw, sizeof (*bhw)); 1783 } 1784 1785 typedef struct kmem_log_walk { 1786 kmem_bufctl_audit_t *klw_base; 1787 kmem_bufctl_audit_t **klw_sorted; 1788 kmem_log_header_t klw_lh; 1789 size_t klw_size; 1790 size_t klw_maxndx; 1791 size_t klw_ndx; 1792 } kmem_log_walk_t; 1793 1794 int 1795 kmem_log_walk_init(mdb_walk_state_t *wsp) 1796 { 1797 uintptr_t lp = wsp->walk_addr; 1798 kmem_log_walk_t *klw; 1799 kmem_log_header_t *lhp; 1800 int maxndx, i, j, k; 1801 1802 /* 1803 * By default (global walk), walk the kmem_transaction_log. Otherwise 1804 * read the log whose kmem_log_header_t is stored at walk_addr. 1805 */ 1806 if (lp == 0 && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1807 mdb_warn("failed to read 'kmem_transaction_log'"); 1808 return (WALK_ERR); 1809 } 1810 1811 if (lp == 0) { 1812 mdb_warn("log is disabled\n"); 1813 return (WALK_ERR); 1814 } 1815 1816 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1817 lhp = &klw->klw_lh; 1818 1819 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1820 mdb_warn("failed to read log header at %p", lp); 1821 mdb_free(klw, sizeof (kmem_log_walk_t)); 1822 return (WALK_ERR); 1823 } 1824 1825 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1826 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1827 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1828 1829 if (mdb_vread(klw->klw_base, klw->klw_size, 1830 (uintptr_t)lhp->lh_base) == -1) { 1831 mdb_warn("failed to read log at base %p", lhp->lh_base); 1832 mdb_free(klw->klw_base, klw->klw_size); 1833 mdb_free(klw, sizeof (kmem_log_walk_t)); 1834 return (WALK_ERR); 1835 } 1836 1837 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1838 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1839 1840 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1841 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1842 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1843 1844 for (j = 0; j < maxndx; j++) 1845 klw->klw_sorted[k++] = &chunk[j]; 1846 } 1847 1848 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1849 (int(*)(const void *, const void *))bufctlcmp); 1850 1851 klw->klw_maxndx = k; 1852 wsp->walk_data = klw; 1853 1854 return (WALK_NEXT); 1855 } 1856 1857 int 1858 kmem_log_walk_step(mdb_walk_state_t *wsp) 1859 { 1860 kmem_log_walk_t *klw = wsp->walk_data; 1861 kmem_bufctl_audit_t *bcp; 1862 1863 if (klw->klw_ndx == klw->klw_maxndx) 1864 return (WALK_DONE); 1865 1866 bcp = klw->klw_sorted[klw->klw_ndx++]; 1867 1868 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1869 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1870 } 1871 1872 void 1873 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1874 { 1875 kmem_log_walk_t *klw = wsp->walk_data; 1876 1877 mdb_free(klw->klw_base, klw->klw_size); 1878 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1879 sizeof (kmem_bufctl_audit_t *)); 1880 mdb_free(klw, sizeof (kmem_log_walk_t)); 1881 } 1882 1883 typedef struct allocdby_bufctl { 1884 uintptr_t abb_addr; 1885 hrtime_t abb_ts; 1886 } allocdby_bufctl_t; 1887 1888 typedef struct allocdby_walk { 1889 const char *abw_walk; 1890 uintptr_t abw_thread; 1891 size_t abw_nbufs; 1892 size_t abw_size; 1893 allocdby_bufctl_t *abw_buf; 1894 size_t abw_ndx; 1895 } allocdby_walk_t; 1896 1897 int 1898 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1899 allocdby_walk_t *abw) 1900 { 1901 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1902 return (WALK_NEXT); 1903 1904 if (abw->abw_nbufs == abw->abw_size) { 1905 allocdby_bufctl_t *buf; 1906 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1907 1908 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1909 1910 bcopy(abw->abw_buf, buf, oldsize); 1911 mdb_free(abw->abw_buf, oldsize); 1912 1913 abw->abw_size <<= 1; 1914 abw->abw_buf = buf; 1915 } 1916 1917 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1918 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1919 abw->abw_nbufs++; 1920 1921 return (WALK_NEXT); 1922 } 1923 1924 /*ARGSUSED*/ 1925 int 1926 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1927 { 1928 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1929 abw, addr) == -1) { 1930 mdb_warn("couldn't walk bufctl for cache %p", addr); 1931 return (WALK_DONE); 1932 } 1933 1934 return (WALK_NEXT); 1935 } 1936 1937 static int 1938 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1939 { 1940 if (lhs->abb_ts < rhs->abb_ts) 1941 return (1); 1942 if (lhs->abb_ts > rhs->abb_ts) 1943 return (-1); 1944 return (0); 1945 } 1946 1947 static int 1948 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1949 { 1950 allocdby_walk_t *abw; 1951 1952 if (wsp->walk_addr == 0) { 1953 mdb_warn("allocdby walk doesn't support global walks\n"); 1954 return (WALK_ERR); 1955 } 1956 1957 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1958 1959 abw->abw_thread = wsp->walk_addr; 1960 abw->abw_walk = walk; 1961 abw->abw_size = 128; /* something reasonable */ 1962 abw->abw_buf = 1963 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1964 1965 wsp->walk_data = abw; 1966 1967 if (mdb_walk("kmem_cache", 1968 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1969 mdb_warn("couldn't walk kmem_cache"); 1970 allocdby_walk_fini(wsp); 1971 return (WALK_ERR); 1972 } 1973 1974 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1975 (int(*)(const void *, const void *))allocdby_cmp); 1976 1977 return (WALK_NEXT); 1978 } 1979 1980 int 1981 allocdby_walk_init(mdb_walk_state_t *wsp) 1982 { 1983 return (allocdby_walk_init_common(wsp, "bufctl")); 1984 } 1985 1986 int 1987 freedby_walk_init(mdb_walk_state_t *wsp) 1988 { 1989 return (allocdby_walk_init_common(wsp, "freectl")); 1990 } 1991 1992 int 1993 allocdby_walk_step(mdb_walk_state_t *wsp) 1994 { 1995 allocdby_walk_t *abw = wsp->walk_data; 1996 kmem_bufctl_audit_t bc; 1997 uintptr_t addr; 1998 1999 if (abw->abw_ndx == abw->abw_nbufs) 2000 return (WALK_DONE); 2001 2002 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 2003 2004 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2005 mdb_warn("couldn't read bufctl at %p", addr); 2006 return (WALK_DONE); 2007 } 2008 2009 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 2010 } 2011 2012 void 2013 allocdby_walk_fini(mdb_walk_state_t *wsp) 2014 { 2015 allocdby_walk_t *abw = wsp->walk_data; 2016 2017 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 2018 mdb_free(abw, sizeof (allocdby_walk_t)); 2019 } 2020 2021 /*ARGSUSED*/ 2022 int 2023 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 2024 { 2025 char c[MDB_SYM_NAMLEN]; 2026 GElf_Sym sym; 2027 int i; 2028 2029 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 2030 for (i = 0; i < bcp->bc_depth; i++) { 2031 if (mdb_lookup_by_addr(bcp->bc_stack[i], 2032 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2033 continue; 2034 if (strncmp(c, "kmem_", 5) == 0) 2035 continue; 2036 mdb_printf("%s+0x%lx", 2037 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 2038 break; 2039 } 2040 mdb_printf("\n"); 2041 2042 return (WALK_NEXT); 2043 } 2044 2045 static int 2046 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 2047 { 2048 if (!(flags & DCMD_ADDRSPEC)) 2049 return (DCMD_USAGE); 2050 2051 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 2052 2053 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 2054 mdb_warn("can't walk '%s' for %p", w, addr); 2055 return (DCMD_ERR); 2056 } 2057 2058 return (DCMD_OK); 2059 } 2060 2061 /*ARGSUSED*/ 2062 int 2063 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2064 { 2065 return (allocdby_common(addr, flags, "allocdby")); 2066 } 2067 2068 /*ARGSUSED*/ 2069 int 2070 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2071 { 2072 return (allocdby_common(addr, flags, "freedby")); 2073 } 2074 2075 /* 2076 * Return a string describing the address in relation to the given thread's 2077 * stack. 2078 * 2079 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 2080 * 2081 * - If the address is above the stack pointer, return an empty string 2082 * signifying that the address is active. 2083 * 2084 * - If the address is below the stack pointer, and the thread is not on proc, 2085 * return " (below sp)". 2086 * 2087 * - If the address is below the stack pointer, and the thread is on proc, 2088 * return " (possibly below sp)". Depending on context, we may or may not 2089 * have an accurate t_sp. 2090 */ 2091 static const char * 2092 stack_active(const kthread_t *t, uintptr_t addr) 2093 { 2094 uintptr_t panicstk; 2095 GElf_Sym sym; 2096 2097 if (t->t_state == TS_FREE) 2098 return (" (inactive interrupt thread)"); 2099 2100 /* 2101 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 2102 * no longer relates to the thread's real stack. 2103 */ 2104 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 2105 panicstk = (uintptr_t)sym.st_value; 2106 2107 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 2108 return (""); 2109 } 2110 2111 if (addr >= t->t_sp + STACK_BIAS) 2112 return (""); 2113 2114 if (t->t_state == TS_ONPROC) 2115 return (" (possibly below sp)"); 2116 2117 return (" (below sp)"); 2118 } 2119 2120 /* 2121 * Additional state for the kmem and vmem ::whatis handlers 2122 */ 2123 typedef struct whatis_info { 2124 mdb_whatis_t *wi_w; 2125 const kmem_cache_t *wi_cache; 2126 const vmem_t *wi_vmem; 2127 vmem_t *wi_msb_arena; 2128 size_t wi_slab_size; 2129 uint_t wi_slab_found; 2130 uint_t wi_kmem_lite_count; 2131 uint_t wi_freemem; 2132 } whatis_info_t; 2133 2134 /* call one of our dcmd functions with "-v" and the provided address */ 2135 static void 2136 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) 2137 { 2138 mdb_arg_t a; 2139 a.a_type = MDB_TYPE_STRING; 2140 a.a_un.a_str = "-v"; 2141 2142 mdb_printf(":\n"); 2143 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); 2144 } 2145 2146 static void 2147 whatis_print_kmf_lite(uintptr_t btaddr, size_t count) 2148 { 2149 #define KMEM_LITE_MAX 16 2150 pc_t callers[KMEM_LITE_MAX]; 2151 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN; 2152 2153 kmem_buftag_t bt; 2154 intptr_t stat; 2155 const char *plural = ""; 2156 int i; 2157 2158 /* validate our arguments and read in the buftag */ 2159 if (count == 0 || count > KMEM_LITE_MAX || 2160 mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2161 return; 2162 2163 /* validate the buffer state and read in the callers */ 2164 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2165 2166 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 2167 return; 2168 2169 if (mdb_vread(callers, count * sizeof (pc_t), 2170 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2171 return; 2172 2173 /* If there aren't any filled in callers, bail */ 2174 if (callers[0] == uninit) 2175 return; 2176 2177 plural = (callers[1] == uninit) ? "" : "s"; 2178 2179 /* Everything's done and checked; print them out */ 2180 mdb_printf(":\n"); 2181 2182 mdb_inc_indent(8); 2183 mdb_printf("recent caller%s: %a", plural, callers[0]); 2184 for (i = 1; i < count; i++) { 2185 if (callers[i] == uninit) 2186 break; 2187 mdb_printf(", %a", callers[i]); 2188 } 2189 mdb_dec_indent(8); 2190 } 2191 2192 static void 2193 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, 2194 uintptr_t baddr) 2195 { 2196 mdb_whatis_t *w = wi->wi_w; 2197 2198 const kmem_cache_t *cp = wi->wi_cache; 2199 /* LINTED pointer cast may result in improper alignment */ 2200 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr); 2201 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); 2202 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT)); 2203 2204 mdb_whatis_report_object(w, maddr, addr, ""); 2205 2206 if (baddr != 0 && !call_printer) 2207 mdb_printf("bufctl %p ", baddr); 2208 2209 mdb_printf("%s from %s", 2210 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); 2211 2212 if (baddr != 0 && call_printer) { 2213 whatis_call_printer(bufctl, baddr); 2214 return; 2215 } 2216 2217 /* for KMF_LITE caches, try to print out the previous callers */ 2218 if (!quiet && (cp->cache_flags & KMF_LITE)) 2219 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count); 2220 2221 mdb_printf("\n"); 2222 } 2223 2224 /*ARGSUSED*/ 2225 static int 2226 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi) 2227 { 2228 mdb_whatis_t *w = wi->wi_w; 2229 2230 uintptr_t cur; 2231 size_t size = wi->wi_cache->cache_bufsize; 2232 2233 while (mdb_whatis_match(w, addr, size, &cur)) 2234 whatis_print_kmem(wi, cur, addr, 0); 2235 2236 return (WHATIS_WALKRET(w)); 2237 } 2238 2239 /*ARGSUSED*/ 2240 static int 2241 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi) 2242 { 2243 mdb_whatis_t *w = wi->wi_w; 2244 2245 uintptr_t cur; 2246 uintptr_t addr = (uintptr_t)bcp->bc_addr; 2247 size_t size = wi->wi_cache->cache_bufsize; 2248 2249 while (mdb_whatis_match(w, addr, size, &cur)) 2250 whatis_print_kmem(wi, cur, addr, baddr); 2251 2252 return (WHATIS_WALKRET(w)); 2253 } 2254 2255 static int 2256 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) 2257 { 2258 mdb_whatis_t *w = wi->wi_w; 2259 2260 size_t size = vs->vs_end - vs->vs_start; 2261 uintptr_t cur; 2262 2263 /* We're not interested in anything but alloc and free segments */ 2264 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) 2265 return (WALK_NEXT); 2266 2267 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { 2268 mdb_whatis_report_object(w, cur, vs->vs_start, ""); 2269 2270 /* 2271 * If we're not printing it seperately, provide the vmem_seg 2272 * pointer if it has a stack trace. 2273 */ 2274 if ((mdb_whatis_flags(w) & WHATIS_QUIET) && 2275 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) || 2276 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { 2277 mdb_printf("vmem_seg %p ", addr); 2278 } 2279 2280 mdb_printf("%s from the %s vmem arena", 2281 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", 2282 wi->wi_vmem->vm_name); 2283 2284 if (!(mdb_whatis_flags(w) & WHATIS_QUIET)) 2285 whatis_call_printer(vmem_seg, addr); 2286 else 2287 mdb_printf("\n"); 2288 } 2289 2290 return (WHATIS_WALKRET(w)); 2291 } 2292 2293 static int 2294 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) 2295 { 2296 mdb_whatis_t *w = wi->wi_w; 2297 const char *nm = vmem->vm_name; 2298 2299 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0); 2300 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2301 2302 if (identifier != idspace) 2303 return (WALK_NEXT); 2304 2305 wi->wi_vmem = vmem; 2306 2307 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2308 mdb_printf("Searching vmem arena %s...\n", nm); 2309 2310 if (mdb_pwalk("vmem_seg", 2311 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { 2312 mdb_warn("can't walk vmem_seg for %p", addr); 2313 return (WALK_NEXT); 2314 } 2315 2316 return (WHATIS_WALKRET(w)); 2317 } 2318 2319 /*ARGSUSED*/ 2320 static int 2321 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi) 2322 { 2323 mdb_whatis_t *w = wi->wi_w; 2324 2325 /* It must overlap with the slab data, or it's not interesting */ 2326 if (mdb_whatis_overlaps(w, 2327 (uintptr_t)sp->slab_base, wi->wi_slab_size)) { 2328 wi->wi_slab_found++; 2329 return (WALK_DONE); 2330 } 2331 return (WALK_NEXT); 2332 } 2333 2334 static int 2335 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2336 { 2337 mdb_whatis_t *w = wi->wi_w; 2338 2339 char *walk, *freewalk; 2340 mdb_walk_cb_t func; 2341 int do_bufctl; 2342 2343 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0); 2344 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0); 2345 2346 if (identifier != idspace) 2347 return (WALK_NEXT); 2348 2349 /* Override the '-b' flag as necessary */ 2350 if (!(c->cache_flags & KMF_HASH)) 2351 do_bufctl = FALSE; /* no bufctls to walk */ 2352 else if (c->cache_flags & KMF_AUDIT) 2353 do_bufctl = TRUE; /* we always want debugging info */ 2354 else 2355 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); 2356 2357 if (do_bufctl) { 2358 walk = "bufctl"; 2359 freewalk = "freectl"; 2360 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2361 } else { 2362 walk = "kmem"; 2363 freewalk = "freemem"; 2364 func = (mdb_walk_cb_t)whatis_walk_kmem; 2365 } 2366 2367 wi->wi_cache = c; 2368 2369 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2370 mdb_printf("Searching %s...\n", c->cache_name); 2371 2372 /* 2373 * If more then two buffers live on each slab, figure out if we're 2374 * interested in anything in any slab before doing the more expensive 2375 * kmem/freemem (bufctl/freectl) walkers. 2376 */ 2377 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; 2378 if (!(c->cache_flags & KMF_HASH)) 2379 wi->wi_slab_size -= sizeof (kmem_slab_t); 2380 2381 if ((wi->wi_slab_size / c->cache_chunksize) > 2) { 2382 wi->wi_slab_found = 0; 2383 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, 2384 addr) == -1) { 2385 mdb_warn("can't find kmem_slab walker"); 2386 return (WALK_DONE); 2387 } 2388 if (wi->wi_slab_found == 0) 2389 return (WALK_NEXT); 2390 } 2391 2392 wi->wi_freemem = FALSE; 2393 if (mdb_pwalk(walk, func, wi, addr) == -1) { 2394 mdb_warn("can't find %s walker", walk); 2395 return (WALK_DONE); 2396 } 2397 2398 if (mdb_whatis_done(w)) 2399 return (WALK_DONE); 2400 2401 /* 2402 * We have searched for allocated memory; now search for freed memory. 2403 */ 2404 if (mdb_whatis_flags(w) & WHATIS_VERBOSE) 2405 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2406 2407 wi->wi_freemem = TRUE; 2408 if (mdb_pwalk(freewalk, func, wi, addr) == -1) { 2409 mdb_warn("can't find %s walker", freewalk); 2410 return (WALK_DONE); 2411 } 2412 2413 return (WHATIS_WALKRET(w)); 2414 } 2415 2416 static int 2417 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2418 { 2419 if (c->cache_arena == wi->wi_msb_arena || 2420 (c->cache_cflags & KMC_NOTOUCH)) 2421 return (WALK_NEXT); 2422 2423 return (whatis_walk_cache(addr, c, wi)); 2424 } 2425 2426 static int 2427 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2428 { 2429 if (c->cache_arena != wi->wi_msb_arena) 2430 return (WALK_NEXT); 2431 2432 return (whatis_walk_cache(addr, c, wi)); 2433 } 2434 2435 static int 2436 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi) 2437 { 2438 if (c->cache_arena == wi->wi_msb_arena || 2439 !(c->cache_cflags & KMC_NOTOUCH)) 2440 return (WALK_NEXT); 2441 2442 return (whatis_walk_cache(addr, c, wi)); 2443 } 2444 2445 static int 2446 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w) 2447 { 2448 uintptr_t cur; 2449 uintptr_t saddr; 2450 size_t size; 2451 2452 /* 2453 * Often, one calls ::whatis on an address from a thread structure. 2454 * We use this opportunity to short circuit this case... 2455 */ 2456 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur)) 2457 mdb_whatis_report_object(w, cur, addr, 2458 "allocated as a thread structure\n"); 2459 2460 /* 2461 * Now check the stack 2462 */ 2463 if (t->t_stkbase == NULL) 2464 return (WALK_NEXT); 2465 2466 /* 2467 * This assumes that t_stk is the end of the stack, but it's really 2468 * only the initial stack pointer for the thread. Arguments to the 2469 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So 2470 * that 't->t_stk::whatis' reports "part of t's stack", we include 2471 * t_stk in the range (the "+ 1", below), but the kernel should 2472 * really include the full stack bounds where we can find it. 2473 */ 2474 saddr = (uintptr_t)t->t_stkbase; 2475 size = (uintptr_t)t->t_stk - saddr + 1; 2476 while (mdb_whatis_match(w, saddr, size, &cur)) 2477 mdb_whatis_report_object(w, cur, cur, 2478 "in thread %p's stack%s\n", addr, stack_active(t, cur)); 2479 2480 return (WHATIS_WALKRET(w)); 2481 } 2482 2483 static void 2484 whatis_modctl_match(mdb_whatis_t *w, const char *name, 2485 uintptr_t base, size_t size, const char *where) 2486 { 2487 uintptr_t cur; 2488 2489 /* 2490 * Since we're searching for addresses inside a module, we report 2491 * them as symbols. 2492 */ 2493 while (mdb_whatis_match(w, base, size, &cur)) 2494 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where); 2495 } 2496 2497 static int 2498 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w) 2499 { 2500 char name[MODMAXNAMELEN]; 2501 struct module mod; 2502 Shdr shdr; 2503 2504 if (m->mod_mp == NULL) 2505 return (WALK_NEXT); 2506 2507 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2508 mdb_warn("couldn't read modctl %p's module", addr); 2509 return (WALK_NEXT); 2510 } 2511 2512 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2513 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2514 2515 whatis_modctl_match(w, name, 2516 (uintptr_t)mod.text, mod.text_size, "text segment"); 2517 whatis_modctl_match(w, name, 2518 (uintptr_t)mod.data, mod.data_size, "data segment"); 2519 whatis_modctl_match(w, name, 2520 (uintptr_t)mod.bss, mod.bss_size, "bss segment"); 2521 2522 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2523 mdb_warn("couldn't read symbol header for %p's module", addr); 2524 return (WALK_NEXT); 2525 } 2526 2527 whatis_modctl_match(w, name, 2528 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab"); 2529 whatis_modctl_match(w, name, 2530 (uintptr_t)mod.symspace, mod.symsize, "symtab"); 2531 2532 return (WHATIS_WALKRET(w)); 2533 } 2534 2535 /*ARGSUSED*/ 2536 static int 2537 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w) 2538 { 2539 uintptr_t cur; 2540 2541 uintptr_t base = (uintptr_t)seg->pages; 2542 size_t size = (uintptr_t)seg->epages - base; 2543 2544 while (mdb_whatis_match(w, base, size, &cur)) { 2545 /* round our found pointer down to the page_t base. */ 2546 size_t offset = (cur - base) % sizeof (page_t); 2547 2548 mdb_whatis_report_object(w, cur, cur - offset, 2549 "allocated as a page structure\n"); 2550 } 2551 2552 return (WHATIS_WALKRET(w)); 2553 } 2554 2555 /*ARGSUSED*/ 2556 static int 2557 whatis_run_modules(mdb_whatis_t *w, void *arg) 2558 { 2559 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) { 2560 mdb_warn("couldn't find modctl walker"); 2561 return (1); 2562 } 2563 return (0); 2564 } 2565 2566 /*ARGSUSED*/ 2567 static int 2568 whatis_run_threads(mdb_whatis_t *w, void *ignored) 2569 { 2570 /* 2571 * Now search all thread stacks. Yes, this is a little weak; we 2572 * can save a lot of work by first checking to see if the 2573 * address is in segkp vs. segkmem. But hey, computers are 2574 * fast. 2575 */ 2576 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) { 2577 mdb_warn("couldn't find thread walker"); 2578 return (1); 2579 } 2580 return (0); 2581 } 2582 2583 /*ARGSUSED*/ 2584 static int 2585 whatis_run_pages(mdb_whatis_t *w, void *ignored) 2586 { 2587 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) { 2588 mdb_warn("couldn't find memseg walker"); 2589 return (1); 2590 } 2591 return (0); 2592 } 2593 2594 /*ARGSUSED*/ 2595 static int 2596 whatis_run_kmem(mdb_whatis_t *w, void *ignored) 2597 { 2598 whatis_info_t wi; 2599 2600 bzero(&wi, sizeof (wi)); 2601 wi.wi_w = w; 2602 2603 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1) 2604 mdb_warn("unable to readvar \"kmem_msb_arena\""); 2605 2606 if (mdb_readvar(&wi.wi_kmem_lite_count, 2607 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16) 2608 wi.wi_kmem_lite_count = 0; 2609 2610 /* 2611 * We process kmem caches in the following order: 2612 * 2613 * non-KMC_NOTOUCH, non-metadata (typically the most interesting) 2614 * metadata (can be huge with KMF_AUDIT) 2615 * KMC_NOTOUCH, non-metadata (see kmem_walk_all()) 2616 */ 2617 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch, 2618 &wi) == -1 || 2619 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata, 2620 &wi) == -1 || 2621 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch, 2622 &wi) == -1) { 2623 mdb_warn("couldn't find kmem_cache walker"); 2624 return (1); 2625 } 2626 return (0); 2627 } 2628 2629 /*ARGSUSED*/ 2630 static int 2631 whatis_run_vmem(mdb_whatis_t *w, void *ignored) 2632 { 2633 whatis_info_t wi; 2634 2635 bzero(&wi, sizeof (wi)); 2636 wi.wi_w = w; 2637 2638 if (mdb_walk("vmem_postfix", 2639 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { 2640 mdb_warn("couldn't find vmem_postfix walker"); 2641 return (1); 2642 } 2643 return (0); 2644 } 2645 2646 typedef struct kmem_log_cpu { 2647 uintptr_t kmc_low; 2648 uintptr_t kmc_high; 2649 } kmem_log_cpu_t; 2650 2651 typedef struct kmem_log_data { 2652 uintptr_t kmd_addr; 2653 kmem_log_cpu_t *kmd_cpu; 2654 } kmem_log_data_t; 2655 2656 int 2657 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2658 kmem_log_data_t *kmd) 2659 { 2660 int i; 2661 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2662 size_t bufsize; 2663 2664 for (i = 0; i < NCPU; i++) { 2665 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2666 break; 2667 } 2668 2669 if (kmd->kmd_addr) { 2670 if (b->bc_cache == NULL) 2671 return (WALK_NEXT); 2672 2673 if (mdb_vread(&bufsize, sizeof (bufsize), 2674 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2675 mdb_warn( 2676 "failed to read cache_bufsize for cache at %p", 2677 b->bc_cache); 2678 return (WALK_ERR); 2679 } 2680 2681 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2682 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2683 return (WALK_NEXT); 2684 } 2685 2686 if (i == NCPU) 2687 mdb_printf(" "); 2688 else 2689 mdb_printf("%3d", i); 2690 2691 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2692 b->bc_timestamp, b->bc_thread); 2693 2694 return (WALK_NEXT); 2695 } 2696 2697 /*ARGSUSED*/ 2698 int 2699 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2700 { 2701 kmem_log_header_t lh; 2702 kmem_cpu_log_header_t clh; 2703 uintptr_t lhp, clhp; 2704 int ncpus; 2705 uintptr_t *cpu; 2706 GElf_Sym sym; 2707 kmem_log_cpu_t *kmc; 2708 int i; 2709 kmem_log_data_t kmd; 2710 uint_t opt_b = FALSE; 2711 2712 if (mdb_getopts(argc, argv, 2713 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2714 return (DCMD_USAGE); 2715 2716 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2717 mdb_warn("failed to read 'kmem_transaction_log'"); 2718 return (DCMD_ERR); 2719 } 2720 2721 if (lhp == 0) { 2722 mdb_warn("no kmem transaction log\n"); 2723 return (DCMD_ERR); 2724 } 2725 2726 mdb_readvar(&ncpus, "ncpus"); 2727 2728 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2729 mdb_warn("failed to read log header at %p", lhp); 2730 return (DCMD_ERR); 2731 } 2732 2733 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2734 2735 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2736 2737 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2738 mdb_warn("couldn't find 'cpu' array"); 2739 return (DCMD_ERR); 2740 } 2741 2742 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2743 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2744 NCPU * sizeof (uintptr_t), sym.st_size); 2745 return (DCMD_ERR); 2746 } 2747 2748 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2749 mdb_warn("failed to read cpu array at %p", sym.st_value); 2750 return (DCMD_ERR); 2751 } 2752 2753 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2754 kmd.kmd_addr = 0; 2755 kmd.kmd_cpu = kmc; 2756 2757 for (i = 0; i < NCPU; i++) { 2758 2759 if (cpu[i] == 0) 2760 continue; 2761 2762 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2763 mdb_warn("cannot read cpu %d's log header at %p", 2764 i, clhp); 2765 return (DCMD_ERR); 2766 } 2767 2768 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2769 (uintptr_t)lh.lh_base; 2770 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2771 2772 clhp += sizeof (kmem_cpu_log_header_t); 2773 } 2774 2775 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2776 "TIMESTAMP", "THREAD"); 2777 2778 /* 2779 * If we have been passed an address, print out only log entries 2780 * corresponding to that address. If opt_b is specified, then interpret 2781 * the address as a bufctl. 2782 */ 2783 if (flags & DCMD_ADDRSPEC) { 2784 kmem_bufctl_audit_t b; 2785 2786 if (opt_b) { 2787 kmd.kmd_addr = addr; 2788 } else { 2789 if (mdb_vread(&b, 2790 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2791 mdb_warn("failed to read bufctl at %p", addr); 2792 return (DCMD_ERR); 2793 } 2794 2795 (void) kmem_log_walk(addr, &b, &kmd); 2796 2797 return (DCMD_OK); 2798 } 2799 } 2800 2801 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2802 mdb_warn("can't find kmem log walker"); 2803 return (DCMD_ERR); 2804 } 2805 2806 return (DCMD_OK); 2807 } 2808 2809 typedef struct bufctl_history_cb { 2810 int bhc_flags; 2811 int bhc_argc; 2812 const mdb_arg_t *bhc_argv; 2813 int bhc_ret; 2814 } bufctl_history_cb_t; 2815 2816 /*ARGSUSED*/ 2817 static int 2818 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2819 { 2820 bufctl_history_cb_t *bhc = arg; 2821 2822 bhc->bhc_ret = 2823 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2824 2825 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2826 2827 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2828 } 2829 2830 void 2831 bufctl_help(void) 2832 { 2833 mdb_printf("%s", 2834 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n"); 2835 mdb_dec_indent(2); 2836 mdb_printf("%<b>OPTIONS%</b>\n"); 2837 mdb_inc_indent(2); 2838 mdb_printf("%s", 2839 " -v Display the full content of the bufctl, including its stack trace\n" 2840 " -h retrieve the bufctl's transaction history, if available\n" 2841 " -a addr\n" 2842 " filter out bufctls not involving the buffer at addr\n" 2843 " -c caller\n" 2844 " filter out bufctls without the function/PC in their stack trace\n" 2845 " -e earliest\n" 2846 " filter out bufctls timestamped before earliest\n" 2847 " -l latest\n" 2848 " filter out bufctls timestamped after latest\n" 2849 " -t thread\n" 2850 " filter out bufctls not involving thread\n"); 2851 } 2852 2853 int 2854 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2855 { 2856 kmem_bufctl_audit_t bc; 2857 uint_t verbose = FALSE; 2858 uint_t history = FALSE; 2859 uint_t in_history = FALSE; 2860 uintptr_t caller = 0, thread = 0; 2861 uintptr_t laddr, haddr, baddr = 0; 2862 hrtime_t earliest = 0, latest = 0; 2863 int i, depth; 2864 char c[MDB_SYM_NAMLEN]; 2865 GElf_Sym sym; 2866 2867 if (mdb_getopts(argc, argv, 2868 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2869 'h', MDB_OPT_SETBITS, TRUE, &history, 2870 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2871 'c', MDB_OPT_UINTPTR, &caller, 2872 't', MDB_OPT_UINTPTR, &thread, 2873 'e', MDB_OPT_UINT64, &earliest, 2874 'l', MDB_OPT_UINT64, &latest, 2875 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2876 return (DCMD_USAGE); 2877 2878 if (!(flags & DCMD_ADDRSPEC)) 2879 return (DCMD_USAGE); 2880 2881 if (in_history && !history) 2882 return (DCMD_USAGE); 2883 2884 if (history && !in_history) { 2885 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2886 UM_SLEEP | UM_GC); 2887 bufctl_history_cb_t bhc; 2888 2889 nargv[0].a_type = MDB_TYPE_STRING; 2890 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2891 2892 for (i = 0; i < argc; i++) 2893 nargv[i + 1] = argv[i]; 2894 2895 /* 2896 * When in history mode, we treat each element as if it 2897 * were in a seperate loop, so that the headers group 2898 * bufctls with similar histories. 2899 */ 2900 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2901 bhc.bhc_argc = argc + 1; 2902 bhc.bhc_argv = nargv; 2903 bhc.bhc_ret = DCMD_OK; 2904 2905 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2906 addr) == -1) { 2907 mdb_warn("unable to walk bufctl_history"); 2908 return (DCMD_ERR); 2909 } 2910 2911 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2912 mdb_printf("\n"); 2913 2914 return (bhc.bhc_ret); 2915 } 2916 2917 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2918 if (verbose) { 2919 mdb_printf("%16s %16s %16s %16s\n" 2920 "%<u>%16s %16s %16s %16s%</u>\n", 2921 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2922 "", "CACHE", "LASTLOG", "CONTENTS"); 2923 } else { 2924 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2925 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2926 } 2927 } 2928 2929 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2930 mdb_warn("couldn't read bufctl at %p", addr); 2931 return (DCMD_ERR); 2932 } 2933 2934 /* 2935 * Guard against bogus bc_depth in case the bufctl is corrupt or 2936 * the address does not really refer to a bufctl. 2937 */ 2938 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2939 2940 if (caller != 0) { 2941 laddr = caller; 2942 haddr = caller + sizeof (caller); 2943 2944 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2945 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2946 /* 2947 * We were provided an exact symbol value; any 2948 * address in the function is valid. 2949 */ 2950 laddr = (uintptr_t)sym.st_value; 2951 haddr = (uintptr_t)sym.st_value + sym.st_size; 2952 } 2953 2954 for (i = 0; i < depth; i++) 2955 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2956 break; 2957 2958 if (i == depth) 2959 return (DCMD_OK); 2960 } 2961 2962 if (thread != 0 && (uintptr_t)bc.bc_thread != thread) 2963 return (DCMD_OK); 2964 2965 if (earliest != 0 && bc.bc_timestamp < earliest) 2966 return (DCMD_OK); 2967 2968 if (latest != 0 && bc.bc_timestamp > latest) 2969 return (DCMD_OK); 2970 2971 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2972 return (DCMD_OK); 2973 2974 if (flags & DCMD_PIPE_OUT) { 2975 mdb_printf("%#lr\n", addr); 2976 return (DCMD_OK); 2977 } 2978 2979 if (verbose) { 2980 mdb_printf( 2981 "%<b>%16p%</b> %16p %16llx %16p\n" 2982 "%16s %16p %16p %16p\n", 2983 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2984 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2985 2986 mdb_inc_indent(17); 2987 for (i = 0; i < depth; i++) 2988 mdb_printf("%a\n", bc.bc_stack[i]); 2989 mdb_dec_indent(17); 2990 mdb_printf("\n"); 2991 } else { 2992 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2993 bc.bc_timestamp, bc.bc_thread); 2994 2995 for (i = 0; i < depth; i++) { 2996 if (mdb_lookup_by_addr(bc.bc_stack[i], 2997 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2998 continue; 2999 if (strncmp(c, "kmem_", 5) == 0) 3000 continue; 3001 mdb_printf(" %a\n", bc.bc_stack[i]); 3002 break; 3003 } 3004 3005 if (i >= depth) 3006 mdb_printf("\n"); 3007 } 3008 3009 return (DCMD_OK); 3010 } 3011 3012 typedef struct kmem_verify { 3013 uint64_t *kmv_buf; /* buffer to read cache contents into */ 3014 size_t kmv_size; /* number of bytes in kmv_buf */ 3015 int kmv_corruption; /* > 0 if corruption found. */ 3016 uint_t kmv_flags; /* dcmd flags */ 3017 struct kmem_cache kmv_cache; /* the cache we're operating on */ 3018 } kmem_verify_t; 3019 3020 /* 3021 * verify_pattern() 3022 * verify that buf is filled with the pattern pat. 3023 */ 3024 static int64_t 3025 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 3026 { 3027 /*LINTED*/ 3028 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 3029 uint64_t *buf; 3030 3031 for (buf = buf_arg; buf < bufend; buf++) 3032 if (*buf != pat) 3033 return ((uintptr_t)buf - (uintptr_t)buf_arg); 3034 return (-1); 3035 } 3036 3037 /* 3038 * verify_buftag() 3039 * verify that btp->bt_bxstat == (bcp ^ pat) 3040 */ 3041 static int 3042 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 3043 { 3044 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 3045 } 3046 3047 /* 3048 * verify_free() 3049 * verify the integrity of a free block of memory by checking 3050 * that it is filled with 0xdeadbeef and that its buftag is sane. 3051 */ 3052 /*ARGSUSED1*/ 3053 static int 3054 verify_free(uintptr_t addr, const void *data, void *private) 3055 { 3056 kmem_verify_t *kmv = (kmem_verify_t *)private; 3057 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3058 int64_t corrupt; /* corruption offset */ 3059 kmem_buftag_t *buftagp; /* ptr to buftag */ 3060 kmem_cache_t *cp = &kmv->kmv_cache; 3061 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT)); 3062 3063 /*LINTED*/ 3064 buftagp = KMEM_BUFTAG(cp, buf); 3065 3066 /* 3067 * Read the buffer to check. 3068 */ 3069 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3070 if (!besilent) 3071 mdb_warn("couldn't read %p", addr); 3072 return (WALK_NEXT); 3073 } 3074 3075 if ((corrupt = verify_pattern(buf, cp->cache_verify, 3076 KMEM_FREE_PATTERN)) >= 0) { 3077 if (!besilent) 3078 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 3079 addr, (uintptr_t)addr + corrupt); 3080 goto corrupt; 3081 } 3082 /* 3083 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 3084 * the first bytes of the buffer, hence we cannot check for red 3085 * zone corruption. 3086 */ 3087 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 3088 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 3089 if (!besilent) 3090 mdb_printf("buffer %p (free) seems to " 3091 "have a corrupt redzone pattern\n", addr); 3092 goto corrupt; 3093 } 3094 3095 /* 3096 * confirm bufctl pointer integrity. 3097 */ 3098 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 3099 if (!besilent) 3100 mdb_printf("buffer %p (free) has a corrupt " 3101 "buftag\n", addr); 3102 goto corrupt; 3103 } 3104 3105 return (WALK_NEXT); 3106 corrupt: 3107 if (kmv->kmv_flags & DCMD_PIPE_OUT) 3108 mdb_printf("%p\n", addr); 3109 kmv->kmv_corruption++; 3110 return (WALK_NEXT); 3111 } 3112 3113 /* 3114 * verify_alloc() 3115 * Verify that the buftag of an allocated buffer makes sense with respect 3116 * to the buffer. 3117 */ 3118 /*ARGSUSED1*/ 3119 static int 3120 verify_alloc(uintptr_t addr, const void *data, void *private) 3121 { 3122 kmem_verify_t *kmv = (kmem_verify_t *)private; 3123 kmem_cache_t *cp = &kmv->kmv_cache; 3124 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3125 /*LINTED*/ 3126 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3127 uint32_t *ip = (uint32_t *)buftagp; 3128 uint8_t *bp = (uint8_t *)buf; 3129 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3130 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT)); 3131 3132 /* 3133 * Read the buffer to check. 3134 */ 3135 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3136 if (!besilent) 3137 mdb_warn("couldn't read %p", addr); 3138 return (WALK_NEXT); 3139 } 3140 3141 /* 3142 * There are two cases to handle: 3143 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3144 * 0xfeedfacefeedface at the end of it 3145 * 2. If the buf was alloc'd using kmem_alloc, it will have 3146 * 0xbb just past the end of the region in use. At the buftag, 3147 * it will have 0xfeedface (or, if the whole buffer is in use, 3148 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3149 * endianness), followed by 32 bits containing the offset of the 3150 * 0xbb byte in the buffer. 3151 * 3152 * Finally, the two 32-bit words that comprise the second half of the 3153 * buftag should xor to KMEM_BUFTAG_ALLOC 3154 */ 3155 3156 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3157 looks_ok = 1; 3158 else if (!KMEM_SIZE_VALID(ip[1])) 3159 size_ok = 0; 3160 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3161 looks_ok = 1; 3162 else 3163 size_ok = 0; 3164 3165 if (!size_ok) { 3166 if (!besilent) 3167 mdb_printf("buffer %p (allocated) has a corrupt " 3168 "redzone size encoding\n", addr); 3169 goto corrupt; 3170 } 3171 3172 if (!looks_ok) { 3173 if (!besilent) 3174 mdb_printf("buffer %p (allocated) has a corrupt " 3175 "redzone signature\n", addr); 3176 goto corrupt; 3177 } 3178 3179 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3180 if (!besilent) 3181 mdb_printf("buffer %p (allocated) has a " 3182 "corrupt buftag\n", addr); 3183 goto corrupt; 3184 } 3185 3186 return (WALK_NEXT); 3187 corrupt: 3188 if (kmv->kmv_flags & DCMD_PIPE_OUT) 3189 mdb_printf("%p\n", addr); 3190 3191 kmv->kmv_corruption++; 3192 return (WALK_NEXT); 3193 } 3194 3195 /*ARGSUSED2*/ 3196 int 3197 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3198 { 3199 if (flags & DCMD_ADDRSPEC) { 3200 int check_alloc = 0, check_free = 0; 3201 kmem_verify_t kmv; 3202 3203 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3204 addr) == -1) { 3205 mdb_warn("couldn't read kmem_cache %p", addr); 3206 return (DCMD_ERR); 3207 } 3208 3209 if ((kmv.kmv_cache.cache_dump.kd_unsafe || 3210 kmv.kmv_cache.cache_dump.kd_alloc_fails) && 3211 !(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) { 3212 mdb_warn("WARNING: cache was used during dump: " 3213 "corruption may be incorrectly reported\n"); 3214 } 3215 3216 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3217 sizeof (kmem_buftag_t); 3218 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3219 kmv.kmv_corruption = 0; 3220 kmv.kmv_flags = flags; 3221 3222 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3223 check_alloc = 1; 3224 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3225 check_free = 1; 3226 } else { 3227 if (!(flags & DCMD_LOOP)) { 3228 mdb_warn("cache %p (%s) does not have " 3229 "redzone checking enabled\n", addr, 3230 kmv.kmv_cache.cache_name); 3231 } 3232 return (DCMD_ERR); 3233 } 3234 3235 if (!(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) { 3236 mdb_printf("Summary for cache '%s'\n", 3237 kmv.kmv_cache.cache_name); 3238 mdb_inc_indent(2); 3239 } 3240 3241 if (check_alloc) 3242 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3243 if (check_free) 3244 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3245 3246 if (!(flags & DCMD_PIPE_OUT)) { 3247 if (flags & DCMD_LOOP) { 3248 if (kmv.kmv_corruption == 0) { 3249 mdb_printf("%-*s %?p clean\n", 3250 KMEM_CACHE_NAMELEN, 3251 kmv.kmv_cache.cache_name, addr); 3252 } else { 3253 mdb_printf("%-*s %?p %d corrupt " 3254 "buffer%s\n", KMEM_CACHE_NAMELEN, 3255 kmv.kmv_cache.cache_name, addr, 3256 kmv.kmv_corruption, 3257 kmv.kmv_corruption > 1 ? "s" : ""); 3258 } 3259 } else { 3260 /* 3261 * This is the more verbose mode, when the user 3262 * typed addr::kmem_verify. If the cache was 3263 * clean, nothing will have yet been printed. So 3264 * say something. 3265 */ 3266 if (kmv.kmv_corruption == 0) 3267 mdb_printf("clean\n"); 3268 3269 mdb_dec_indent(2); 3270 } 3271 } 3272 } else { 3273 /* 3274 * If the user didn't specify a cache to verify, we'll walk all 3275 * kmem_cache's, specifying ourself as a callback for each... 3276 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3277 */ 3278 3279 if (!(flags & DCMD_PIPE_OUT)) { 3280 uintptr_t dump_curr; 3281 uintptr_t dump_end; 3282 3283 if (mdb_readvar(&dump_curr, "kmem_dump_curr") != -1 && 3284 mdb_readvar(&dump_end, "kmem_dump_end") != -1 && 3285 dump_curr == dump_end) { 3286 mdb_warn("WARNING: exceeded kmem_dump_size; " 3287 "corruption may be incorrectly reported\n"); 3288 } 3289 3290 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", 3291 KMEM_CACHE_NAMELEN, "Cache Name", "Addr", 3292 "Cache Integrity"); 3293 } 3294 3295 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3296 } 3297 3298 return (DCMD_OK); 3299 } 3300 3301 typedef struct vmem_node { 3302 struct vmem_node *vn_next; 3303 struct vmem_node *vn_parent; 3304 struct vmem_node *vn_sibling; 3305 struct vmem_node *vn_children; 3306 uintptr_t vn_addr; 3307 int vn_marked; 3308 vmem_t vn_vmem; 3309 } vmem_node_t; 3310 3311 typedef struct vmem_walk { 3312 vmem_node_t *vw_root; 3313 vmem_node_t *vw_current; 3314 } vmem_walk_t; 3315 3316 int 3317 vmem_walk_init(mdb_walk_state_t *wsp) 3318 { 3319 uintptr_t vaddr, paddr; 3320 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3321 vmem_walk_t *vw; 3322 3323 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3324 mdb_warn("couldn't read 'vmem_list'"); 3325 return (WALK_ERR); 3326 } 3327 3328 while (vaddr != 0) { 3329 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3330 vp->vn_addr = vaddr; 3331 vp->vn_next = head; 3332 head = vp; 3333 3334 if (vaddr == wsp->walk_addr) 3335 current = vp; 3336 3337 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3338 mdb_warn("couldn't read vmem_t at %p", vaddr); 3339 goto err; 3340 } 3341 3342 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3343 } 3344 3345 for (vp = head; vp != NULL; vp = vp->vn_next) { 3346 3347 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == 0) { 3348 vp->vn_sibling = root; 3349 root = vp; 3350 continue; 3351 } 3352 3353 for (parent = head; parent != NULL; parent = parent->vn_next) { 3354 if (parent->vn_addr != paddr) 3355 continue; 3356 vp->vn_sibling = parent->vn_children; 3357 parent->vn_children = vp; 3358 vp->vn_parent = parent; 3359 break; 3360 } 3361 3362 if (parent == NULL) { 3363 mdb_warn("couldn't find %p's parent (%p)\n", 3364 vp->vn_addr, paddr); 3365 goto err; 3366 } 3367 } 3368 3369 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3370 vw->vw_root = root; 3371 3372 if (current != NULL) 3373 vw->vw_current = current; 3374 else 3375 vw->vw_current = root; 3376 3377 wsp->walk_data = vw; 3378 return (WALK_NEXT); 3379 err: 3380 for (vp = head; head != NULL; vp = head) { 3381 head = vp->vn_next; 3382 mdb_free(vp, sizeof (vmem_node_t)); 3383 } 3384 3385 return (WALK_ERR); 3386 } 3387 3388 int 3389 vmem_walk_step(mdb_walk_state_t *wsp) 3390 { 3391 vmem_walk_t *vw = wsp->walk_data; 3392 vmem_node_t *vp; 3393 int rval; 3394 3395 if ((vp = vw->vw_current) == NULL) 3396 return (WALK_DONE); 3397 3398 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3399 3400 if (vp->vn_children != NULL) { 3401 vw->vw_current = vp->vn_children; 3402 return (rval); 3403 } 3404 3405 do { 3406 vw->vw_current = vp->vn_sibling; 3407 vp = vp->vn_parent; 3408 } while (vw->vw_current == NULL && vp != NULL); 3409 3410 return (rval); 3411 } 3412 3413 /* 3414 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3415 * children are visited before their parent. We perform the postfix walk 3416 * iteratively (rather than recursively) to allow mdb to regain control 3417 * after each callback. 3418 */ 3419 int 3420 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3421 { 3422 vmem_walk_t *vw = wsp->walk_data; 3423 vmem_node_t *vp = vw->vw_current; 3424 int rval; 3425 3426 /* 3427 * If this node is marked, then we know that we have already visited 3428 * all of its children. If the node has any siblings, they need to 3429 * be visited next; otherwise, we need to visit the parent. Note 3430 * that vp->vn_marked will only be zero on the first invocation of 3431 * the step function. 3432 */ 3433 if (vp->vn_marked) { 3434 if (vp->vn_sibling != NULL) 3435 vp = vp->vn_sibling; 3436 else if (vp->vn_parent != NULL) 3437 vp = vp->vn_parent; 3438 else { 3439 /* 3440 * We have neither a parent, nor a sibling, and we 3441 * have already been visited; we're done. 3442 */ 3443 return (WALK_DONE); 3444 } 3445 } 3446 3447 /* 3448 * Before we visit this node, visit its children. 3449 */ 3450 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3451 vp = vp->vn_children; 3452 3453 vp->vn_marked = 1; 3454 vw->vw_current = vp; 3455 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3456 3457 return (rval); 3458 } 3459 3460 void 3461 vmem_walk_fini(mdb_walk_state_t *wsp) 3462 { 3463 vmem_walk_t *vw = wsp->walk_data; 3464 vmem_node_t *root = vw->vw_root; 3465 int done; 3466 3467 if (root == NULL) 3468 return; 3469 3470 if ((vw->vw_root = root->vn_children) != NULL) 3471 vmem_walk_fini(wsp); 3472 3473 vw->vw_root = root->vn_sibling; 3474 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3475 mdb_free(root, sizeof (vmem_node_t)); 3476 3477 if (done) { 3478 mdb_free(vw, sizeof (vmem_walk_t)); 3479 } else { 3480 vmem_walk_fini(wsp); 3481 } 3482 } 3483 3484 typedef struct vmem_seg_walk { 3485 uint8_t vsw_type; 3486 uintptr_t vsw_start; 3487 uintptr_t vsw_current; 3488 } vmem_seg_walk_t; 3489 3490 /*ARGSUSED*/ 3491 int 3492 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3493 { 3494 vmem_seg_walk_t *vsw; 3495 3496 if (wsp->walk_addr == 0) { 3497 mdb_warn("vmem_%s does not support global walks\n", name); 3498 return (WALK_ERR); 3499 } 3500 3501 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3502 3503 vsw->vsw_type = type; 3504 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3505 vsw->vsw_current = vsw->vsw_start; 3506 3507 return (WALK_NEXT); 3508 } 3509 3510 /* 3511 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3512 */ 3513 #define VMEM_NONE 0 3514 3515 int 3516 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3517 { 3518 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3519 } 3520 3521 int 3522 vmem_free_walk_init(mdb_walk_state_t *wsp) 3523 { 3524 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3525 } 3526 3527 int 3528 vmem_span_walk_init(mdb_walk_state_t *wsp) 3529 { 3530 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3531 } 3532 3533 int 3534 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3535 { 3536 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3537 } 3538 3539 int 3540 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3541 { 3542 vmem_seg_t seg; 3543 vmem_seg_walk_t *vsw = wsp->walk_data; 3544 uintptr_t addr = vsw->vsw_current; 3545 static size_t seg_size = 0; 3546 int rval; 3547 3548 if (!seg_size) { 3549 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3550 mdb_warn("failed to read 'vmem_seg_size'"); 3551 seg_size = sizeof (vmem_seg_t); 3552 } 3553 } 3554 3555 if (seg_size < sizeof (seg)) 3556 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3557 3558 if (mdb_vread(&seg, seg_size, addr) == -1) { 3559 mdb_warn("couldn't read vmem_seg at %p", addr); 3560 return (WALK_ERR); 3561 } 3562 3563 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3564 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3565 rval = WALK_NEXT; 3566 } else { 3567 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3568 } 3569 3570 if (vsw->vsw_current == vsw->vsw_start) 3571 return (WALK_DONE); 3572 3573 return (rval); 3574 } 3575 3576 void 3577 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3578 { 3579 vmem_seg_walk_t *vsw = wsp->walk_data; 3580 3581 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3582 } 3583 3584 #define VMEM_NAMEWIDTH 22 3585 3586 int 3587 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3588 { 3589 vmem_t v, parent; 3590 vmem_kstat_t *vkp = &v.vm_kstat; 3591 uintptr_t paddr; 3592 int ident = 0; 3593 char c[VMEM_NAMEWIDTH]; 3594 3595 if (!(flags & DCMD_ADDRSPEC)) { 3596 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3597 mdb_warn("can't walk vmem"); 3598 return (DCMD_ERR); 3599 } 3600 return (DCMD_OK); 3601 } 3602 3603 if (DCMD_HDRSPEC(flags)) 3604 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3605 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3606 "TOTAL", "SUCCEED", "FAIL"); 3607 3608 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3609 mdb_warn("couldn't read vmem at %p", addr); 3610 return (DCMD_ERR); 3611 } 3612 3613 for (paddr = (uintptr_t)v.vm_source; paddr != 0; ident += 2) { 3614 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3615 mdb_warn("couldn't trace %p's ancestry", addr); 3616 ident = 0; 3617 break; 3618 } 3619 paddr = (uintptr_t)parent.vm_source; 3620 } 3621 3622 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3623 3624 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3625 addr, VMEM_NAMEWIDTH, c, 3626 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3627 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3628 3629 return (DCMD_OK); 3630 } 3631 3632 void 3633 vmem_seg_help(void) 3634 { 3635 mdb_printf("%s", 3636 "Display the contents of vmem_seg_ts, with optional filtering.\n\n" 3637 "\n" 3638 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3639 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3640 "information.\n"); 3641 mdb_dec_indent(2); 3642 mdb_printf("%<b>OPTIONS%</b>\n"); 3643 mdb_inc_indent(2); 3644 mdb_printf("%s", 3645 " -v Display the full content of the vmem_seg, including its stack trace\n" 3646 " -s report the size of the segment, instead of the end address\n" 3647 " -c caller\n" 3648 " filter out segments without the function/PC in their stack trace\n" 3649 " -e earliest\n" 3650 " filter out segments timestamped before earliest\n" 3651 " -l latest\n" 3652 " filter out segments timestamped after latest\n" 3653 " -m minsize\n" 3654 " filer out segments smaller than minsize\n" 3655 " -M maxsize\n" 3656 " filer out segments larger than maxsize\n" 3657 " -t thread\n" 3658 " filter out segments not involving thread\n" 3659 " -T type\n" 3660 " filter out segments not of type 'type'\n" 3661 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3662 } 3663 3664 /*ARGSUSED*/ 3665 int 3666 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3667 { 3668 vmem_seg_t vs; 3669 pc_t *stk = vs.vs_stack; 3670 uintptr_t sz; 3671 uint8_t t; 3672 const char *type = NULL; 3673 GElf_Sym sym; 3674 char c[MDB_SYM_NAMLEN]; 3675 int no_debug; 3676 int i; 3677 int depth; 3678 uintptr_t laddr, haddr; 3679 3680 uintptr_t caller = 0, thread = 0; 3681 uintptr_t minsize = 0, maxsize = 0; 3682 3683 hrtime_t earliest = 0, latest = 0; 3684 3685 uint_t size = 0; 3686 uint_t verbose = 0; 3687 3688 if (!(flags & DCMD_ADDRSPEC)) 3689 return (DCMD_USAGE); 3690 3691 if (mdb_getopts(argc, argv, 3692 'c', MDB_OPT_UINTPTR, &caller, 3693 'e', MDB_OPT_UINT64, &earliest, 3694 'l', MDB_OPT_UINT64, &latest, 3695 's', MDB_OPT_SETBITS, TRUE, &size, 3696 'm', MDB_OPT_UINTPTR, &minsize, 3697 'M', MDB_OPT_UINTPTR, &maxsize, 3698 't', MDB_OPT_UINTPTR, &thread, 3699 'T', MDB_OPT_STR, &type, 3700 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3701 NULL) != argc) 3702 return (DCMD_USAGE); 3703 3704 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3705 if (verbose) { 3706 mdb_printf("%16s %4s %16s %16s %16s\n" 3707 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3708 "ADDR", "TYPE", "START", "END", "SIZE", 3709 "", "", "THREAD", "TIMESTAMP", ""); 3710 } else { 3711 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3712 "START", size? "SIZE" : "END", "WHO"); 3713 } 3714 } 3715 3716 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3717 mdb_warn("couldn't read vmem_seg at %p", addr); 3718 return (DCMD_ERR); 3719 } 3720 3721 if (type != NULL) { 3722 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3723 t = VMEM_ALLOC; 3724 else if (strcmp(type, "FREE") == 0) 3725 t = VMEM_FREE; 3726 else if (strcmp(type, "SPAN") == 0) 3727 t = VMEM_SPAN; 3728 else if (strcmp(type, "ROTR") == 0 || 3729 strcmp(type, "ROTOR") == 0) 3730 t = VMEM_ROTOR; 3731 else if (strcmp(type, "WLKR") == 0 || 3732 strcmp(type, "WALKER") == 0) 3733 t = VMEM_WALKER; 3734 else { 3735 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3736 type); 3737 return (DCMD_ERR); 3738 } 3739 3740 if (vs.vs_type != t) 3741 return (DCMD_OK); 3742 } 3743 3744 sz = vs.vs_end - vs.vs_start; 3745 3746 if (minsize != 0 && sz < minsize) 3747 return (DCMD_OK); 3748 3749 if (maxsize != 0 && sz > maxsize) 3750 return (DCMD_OK); 3751 3752 t = vs.vs_type; 3753 depth = vs.vs_depth; 3754 3755 /* 3756 * debug info, when present, is only accurate for VMEM_ALLOC segments 3757 */ 3758 no_debug = (t != VMEM_ALLOC) || 3759 (depth == 0 || depth > VMEM_STACK_DEPTH); 3760 3761 if (no_debug) { 3762 if (caller != 0 || thread != 0 || earliest != 0 || latest != 0) 3763 return (DCMD_OK); /* not enough info */ 3764 } else { 3765 if (caller != 0) { 3766 laddr = caller; 3767 haddr = caller + sizeof (caller); 3768 3769 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3770 sizeof (c), &sym) != -1 && 3771 caller == (uintptr_t)sym.st_value) { 3772 /* 3773 * We were provided an exact symbol value; any 3774 * address in the function is valid. 3775 */ 3776 laddr = (uintptr_t)sym.st_value; 3777 haddr = (uintptr_t)sym.st_value + sym.st_size; 3778 } 3779 3780 for (i = 0; i < depth; i++) 3781 if (vs.vs_stack[i] >= laddr && 3782 vs.vs_stack[i] < haddr) 3783 break; 3784 3785 if (i == depth) 3786 return (DCMD_OK); 3787 } 3788 3789 if (thread != 0 && (uintptr_t)vs.vs_thread != thread) 3790 return (DCMD_OK); 3791 3792 if (earliest != 0 && vs.vs_timestamp < earliest) 3793 return (DCMD_OK); 3794 3795 if (latest != 0 && vs.vs_timestamp > latest) 3796 return (DCMD_OK); 3797 } 3798 3799 type = (t == VMEM_ALLOC ? "ALLC" : 3800 t == VMEM_FREE ? "FREE" : 3801 t == VMEM_SPAN ? "SPAN" : 3802 t == VMEM_ROTOR ? "ROTR" : 3803 t == VMEM_WALKER ? "WLKR" : 3804 "????"); 3805 3806 if (flags & DCMD_PIPE_OUT) { 3807 mdb_printf("%#lr\n", addr); 3808 return (DCMD_OK); 3809 } 3810 3811 if (verbose) { 3812 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16ld\n", 3813 addr, type, vs.vs_start, vs.vs_end, sz); 3814 3815 if (no_debug) 3816 return (DCMD_OK); 3817 3818 mdb_printf("%16s %4s %16p %16llx\n", 3819 "", "", vs.vs_thread, vs.vs_timestamp); 3820 3821 mdb_inc_indent(17); 3822 for (i = 0; i < depth; i++) { 3823 mdb_printf("%a\n", stk[i]); 3824 } 3825 mdb_dec_indent(17); 3826 mdb_printf("\n"); 3827 } else { 3828 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3829 vs.vs_start, size? sz : vs.vs_end); 3830 3831 if (no_debug) { 3832 mdb_printf("\n"); 3833 return (DCMD_OK); 3834 } 3835 3836 for (i = 0; i < depth; i++) { 3837 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3838 c, sizeof (c), &sym) == -1) 3839 continue; 3840 if (strncmp(c, "vmem_", 5) == 0) 3841 continue; 3842 break; 3843 } 3844 mdb_printf(" %a\n", stk[i]); 3845 } 3846 return (DCMD_OK); 3847 } 3848 3849 typedef struct kmalog_data { 3850 uintptr_t kma_addr; 3851 hrtime_t kma_newest; 3852 } kmalog_data_t; 3853 3854 /*ARGSUSED*/ 3855 static int 3856 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3857 { 3858 char name[KMEM_CACHE_NAMELEN + 1]; 3859 hrtime_t delta; 3860 int i, depth; 3861 size_t bufsize; 3862 3863 if (bcp->bc_timestamp == 0) 3864 return (WALK_DONE); 3865 3866 if (kma->kma_newest == 0) 3867 kma->kma_newest = bcp->bc_timestamp; 3868 3869 if (kma->kma_addr) { 3870 if (mdb_vread(&bufsize, sizeof (bufsize), 3871 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3872 mdb_warn( 3873 "failed to read cache_bufsize for cache at %p", 3874 bcp->bc_cache); 3875 return (WALK_ERR); 3876 } 3877 3878 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3879 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3880 return (WALK_NEXT); 3881 } 3882 3883 delta = kma->kma_newest - bcp->bc_timestamp; 3884 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3885 3886 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3887 &bcp->bc_cache->cache_name) <= 0) 3888 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3889 3890 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3891 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3892 3893 for (i = 0; i < depth; i++) 3894 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3895 3896 return (WALK_NEXT); 3897 } 3898 3899 int 3900 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3901 { 3902 const char *logname = "kmem_transaction_log"; 3903 kmalog_data_t kma; 3904 3905 if (argc > 1) 3906 return (DCMD_USAGE); 3907 3908 kma.kma_newest = 0; 3909 if (flags & DCMD_ADDRSPEC) 3910 kma.kma_addr = addr; 3911 else 3912 kma.kma_addr = 0; 3913 3914 if (argc > 0) { 3915 if (argv->a_type != MDB_TYPE_STRING) 3916 return (DCMD_USAGE); 3917 if (strcmp(argv->a_un.a_str, "fail") == 0) 3918 logname = "kmem_failure_log"; 3919 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3920 logname = "kmem_slab_log"; 3921 else if (strcmp(argv->a_un.a_str, "zerosized") == 0) 3922 logname = "kmem_zerosized_log"; 3923 else 3924 return (DCMD_USAGE); 3925 } 3926 3927 if (mdb_readvar(&addr, logname) == -1) { 3928 mdb_warn("failed to read %s log header pointer"); 3929 return (DCMD_ERR); 3930 } 3931 3932 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3933 mdb_warn("failed to walk kmem log"); 3934 return (DCMD_ERR); 3935 } 3936 3937 return (DCMD_OK); 3938 } 3939 3940 /* 3941 * As the final lure for die-hard crash(8) users, we provide ::kmausers here. 3942 * The first piece is a structure which we use to accumulate kmem_cache_t 3943 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3944 * walker; we either add all caches, or ones named explicitly as arguments. 3945 */ 3946 3947 typedef struct kmclist { 3948 const char *kmc_name; /* Name to match (or NULL) */ 3949 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3950 int kmc_nelems; /* Num entries in kmc_caches */ 3951 int kmc_size; /* Size of kmc_caches array */ 3952 } kmclist_t; 3953 3954 static int 3955 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3956 { 3957 void *p; 3958 int s; 3959 3960 if (kmc->kmc_name == NULL || 3961 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3962 /* 3963 * If we have a match, grow our array (if necessary), and then 3964 * add the virtual address of the matching cache to our list. 3965 */ 3966 if (kmc->kmc_nelems >= kmc->kmc_size) { 3967 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3968 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3969 3970 bcopy(kmc->kmc_caches, p, 3971 sizeof (uintptr_t) * kmc->kmc_size); 3972 3973 kmc->kmc_caches = p; 3974 kmc->kmc_size = s; 3975 } 3976 3977 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3978 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3979 } 3980 3981 return (WALK_NEXT); 3982 } 3983 3984 /* 3985 * The second piece of ::kmausers is a hash table of allocations. Each 3986 * allocation owner is identified by its stack trace and data_size. We then 3987 * track the total bytes of all such allocations, and the number of allocations 3988 * to report at the end. Once we have a list of caches, we walk through the 3989 * allocated bufctls of each, and update our hash table accordingly. 3990 */ 3991 3992 typedef struct kmowner { 3993 struct kmowner *kmo_head; /* First hash elt in bucket */ 3994 struct kmowner *kmo_next; /* Next hash elt in chain */ 3995 size_t kmo_signature; /* Hash table signature */ 3996 uint_t kmo_num; /* Number of allocations */ 3997 size_t kmo_data_size; /* Size of each allocation */ 3998 size_t kmo_total_size; /* Total bytes of allocation */ 3999 int kmo_depth; /* Depth of stack trace */ 4000 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 4001 } kmowner_t; 4002 4003 typedef struct kmusers { 4004 uintptr_t kmu_addr; /* address of interest */ 4005 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 4006 kmowner_t *kmu_hash; /* Hash table of owners */ 4007 int kmu_nelems; /* Number of entries in use */ 4008 int kmu_size; /* Total number of entries */ 4009 } kmusers_t; 4010 4011 static void 4012 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 4013 size_t size, size_t data_size) 4014 { 4015 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4016 size_t bucket, signature = data_size; 4017 kmowner_t *kmo, *kmoend; 4018 4019 /* 4020 * If the hash table is full, double its size and rehash everything. 4021 */ 4022 if (kmu->kmu_nelems >= kmu->kmu_size) { 4023 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 4024 4025 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 4026 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 4027 kmu->kmu_hash = kmo; 4028 kmu->kmu_size = s; 4029 4030 kmoend = kmu->kmu_hash + kmu->kmu_size; 4031 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 4032 kmo->kmo_head = NULL; 4033 4034 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 4035 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 4036 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 4037 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4038 kmu->kmu_hash[bucket].kmo_head = kmo; 4039 } 4040 } 4041 4042 /* 4043 * Finish computing the hash signature from the stack trace, and then 4044 * see if the owner is in the hash table. If so, update our stats. 4045 */ 4046 for (i = 0; i < depth; i++) 4047 signature += bcp->bc_stack[i]; 4048 4049 bucket = signature & (kmu->kmu_size - 1); 4050 4051 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 4052 if (kmo->kmo_signature == signature) { 4053 size_t difference = 0; 4054 4055 difference |= kmo->kmo_data_size - data_size; 4056 difference |= kmo->kmo_depth - depth; 4057 4058 for (i = 0; i < depth; i++) { 4059 difference |= kmo->kmo_stack[i] - 4060 bcp->bc_stack[i]; 4061 } 4062 4063 if (difference == 0) { 4064 kmo->kmo_total_size += size; 4065 kmo->kmo_num++; 4066 return; 4067 } 4068 } 4069 } 4070 4071 /* 4072 * If the owner is not yet hashed, grab the next element and fill it 4073 * in based on the allocation information. 4074 */ 4075 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 4076 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 4077 kmu->kmu_hash[bucket].kmo_head = kmo; 4078 4079 kmo->kmo_signature = signature; 4080 kmo->kmo_num = 1; 4081 kmo->kmo_data_size = data_size; 4082 kmo->kmo_total_size = size; 4083 kmo->kmo_depth = depth; 4084 4085 for (i = 0; i < depth; i++) 4086 kmo->kmo_stack[i] = bcp->bc_stack[i]; 4087 } 4088 4089 /* 4090 * When ::kmausers is invoked without the -f flag, we simply update our hash 4091 * table with the information from each allocated bufctl. 4092 */ 4093 /*ARGSUSED*/ 4094 static int 4095 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4096 { 4097 const kmem_cache_t *cp = kmu->kmu_cache; 4098 4099 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4100 return (WALK_NEXT); 4101 } 4102 4103 /* 4104 * When ::kmausers is invoked with the -f flag, we print out the information 4105 * for each bufctl as well as updating the hash table. 4106 */ 4107 static int 4108 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 4109 { 4110 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 4111 const kmem_cache_t *cp = kmu->kmu_cache; 4112 kmem_bufctl_t bufctl; 4113 4114 if (kmu->kmu_addr) { 4115 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 4116 mdb_warn("couldn't read bufctl at %p", addr); 4117 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 4118 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 4119 cp->cache_bufsize) 4120 return (WALK_NEXT); 4121 } 4122 4123 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4124 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4125 4126 for (i = 0; i < depth; i++) 4127 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4128 4129 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4130 return (WALK_NEXT); 4131 } 4132 4133 /* 4134 * We sort our results by allocation size before printing them. 4135 */ 4136 static int 4137 kmownercmp(const void *lp, const void *rp) 4138 { 4139 const kmowner_t *lhs = lp; 4140 const kmowner_t *rhs = rp; 4141 4142 return (rhs->kmo_total_size - lhs->kmo_total_size); 4143 } 4144 4145 /* 4146 * The main engine of ::kmausers is relatively straightforward: First we 4147 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4148 * iterate over the allocated bufctls of each cache in the list. Finally, 4149 * we sort and print our results. 4150 */ 4151 /*ARGSUSED*/ 4152 int 4153 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4154 { 4155 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4156 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4157 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4158 int do_all_caches = 1; /* Do all caches (no arguments) */ 4159 int opt_e = FALSE; /* Include "small" users */ 4160 int opt_f = FALSE; /* Print stack traces */ 4161 4162 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4163 kmowner_t *kmo, *kmoend; 4164 int i, oelems; 4165 4166 kmclist_t kmc; 4167 kmusers_t kmu; 4168 4169 bzero(&kmc, sizeof (kmc)); 4170 bzero(&kmu, sizeof (kmu)); 4171 4172 while ((i = mdb_getopts(argc, argv, 4173 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4174 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4175 4176 argv += i; /* skip past options we just processed */ 4177 argc -= i; /* adjust argc */ 4178 4179 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4180 return (DCMD_USAGE); 4181 4182 oelems = kmc.kmc_nelems; 4183 kmc.kmc_name = argv->a_un.a_str; 4184 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4185 4186 if (kmc.kmc_nelems == oelems) { 4187 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4188 return (DCMD_ERR); 4189 } 4190 4191 do_all_caches = 0; 4192 argv++; 4193 argc--; 4194 } 4195 4196 if (flags & DCMD_ADDRSPEC) { 4197 opt_f = TRUE; 4198 kmu.kmu_addr = addr; 4199 } else { 4200 kmu.kmu_addr = 0; 4201 } 4202 4203 if (opt_e) 4204 mem_threshold = cnt_threshold = 0; 4205 4206 if (opt_f) 4207 callback = (mdb_walk_cb_t)kmause2; 4208 4209 if (do_all_caches) { 4210 kmc.kmc_name = NULL; /* match all cache names */ 4211 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4212 } 4213 4214 for (i = 0; i < kmc.kmc_nelems; i++) { 4215 uintptr_t cp = kmc.kmc_caches[i]; 4216 kmem_cache_t c; 4217 4218 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4219 mdb_warn("failed to read cache at %p", cp); 4220 continue; 4221 } 4222 4223 if (!(c.cache_flags & KMF_AUDIT)) { 4224 if (!do_all_caches) { 4225 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4226 c.cache_name); 4227 } 4228 continue; 4229 } 4230 4231 kmu.kmu_cache = &c; 4232 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4233 audited_caches++; 4234 } 4235 4236 if (audited_caches == 0 && do_all_caches) { 4237 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4238 return (DCMD_ERR); 4239 } 4240 4241 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4242 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4243 4244 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4245 if (kmo->kmo_total_size < mem_threshold && 4246 kmo->kmo_num < cnt_threshold) 4247 continue; 4248 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4249 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4250 for (i = 0; i < kmo->kmo_depth; i++) 4251 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4252 } 4253 4254 return (DCMD_OK); 4255 } 4256 4257 void 4258 kmausers_help(void) 4259 { 4260 mdb_printf( 4261 "Displays the largest users of the kmem allocator, sorted by \n" 4262 "trace. If one or more caches is specified, only those caches\n" 4263 "will be searched. By default, all caches are searched. If an\n" 4264 "address is specified, then only those allocations which include\n" 4265 "the given address are displayed. Specifying an address implies\n" 4266 "-f.\n" 4267 "\n" 4268 "\t-e\tInclude all users, not just the largest\n" 4269 "\t-f\tDisplay individual allocations. By default, users are\n" 4270 "\t\tgrouped by stack\n"); 4271 } 4272 4273 static int 4274 kmem_ready_check(void) 4275 { 4276 int ready; 4277 4278 if (mdb_readvar(&ready, "kmem_ready") < 0) 4279 return (-1); /* errno is set for us */ 4280 4281 return (ready); 4282 } 4283 4284 void 4285 kmem_statechange(void) 4286 { 4287 static int been_ready = 0; 4288 4289 if (been_ready) 4290 return; 4291 4292 if (kmem_ready_check() <= 0) 4293 return; 4294 4295 been_ready = 1; 4296 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4297 } 4298 4299 void 4300 kmem_init(void) 4301 { 4302 mdb_walker_t w = { 4303 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4304 list_walk_step, list_walk_fini 4305 }; 4306 4307 /* 4308 * If kmem is ready, we'll need to invoke the kmem_cache walker 4309 * immediately. Walkers in the linkage structure won't be ready until 4310 * _mdb_init returns, so we'll need to add this one manually. If kmem 4311 * is ready, we'll use the walker to initialize the caches. If kmem 4312 * isn't ready, we'll register a callback that will allow us to defer 4313 * cache walking until it is. 4314 */ 4315 if (mdb_add_walker(&w) != 0) { 4316 mdb_warn("failed to add kmem_cache walker"); 4317 return; 4318 } 4319 4320 kmem_statechange(); 4321 4322 /* register our ::whatis handlers */ 4323 mdb_whatis_register("modules", whatis_run_modules, NULL, 4324 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4325 mdb_whatis_register("threads", whatis_run_threads, NULL, 4326 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4327 mdb_whatis_register("pages", whatis_run_pages, NULL, 4328 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID); 4329 mdb_whatis_register("kmem", whatis_run_kmem, NULL, 4330 WHATIS_PRIO_ALLOCATOR, 0); 4331 mdb_whatis_register("vmem", whatis_run_vmem, NULL, 4332 WHATIS_PRIO_ALLOCATOR, 0); 4333 } 4334 4335 typedef struct whatthread { 4336 uintptr_t wt_target; 4337 int wt_verbose; 4338 } whatthread_t; 4339 4340 static int 4341 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4342 { 4343 uintptr_t current, data; 4344 4345 if (t->t_stkbase == NULL) 4346 return (WALK_NEXT); 4347 4348 /* 4349 * Warn about swapped out threads, but drive on anyway 4350 */ 4351 if (!(t->t_schedflag & TS_LOAD)) { 4352 mdb_warn("thread %p's stack swapped out\n", addr); 4353 return (WALK_NEXT); 4354 } 4355 4356 /* 4357 * Search the thread's stack for the given pointer. Note that it would 4358 * be more efficient to follow ::kgrep's lead and read in page-sized 4359 * chunks, but this routine is already fast and simple. 4360 */ 4361 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4362 current += sizeof (uintptr_t)) { 4363 if (mdb_vread(&data, sizeof (data), current) == -1) { 4364 mdb_warn("couldn't read thread %p's stack at %p", 4365 addr, current); 4366 return (WALK_ERR); 4367 } 4368 4369 if (data == w->wt_target) { 4370 if (w->wt_verbose) { 4371 mdb_printf("%p in thread %p's stack%s\n", 4372 current, addr, stack_active(t, current)); 4373 } else { 4374 mdb_printf("%#lr\n", addr); 4375 return (WALK_NEXT); 4376 } 4377 } 4378 } 4379 4380 return (WALK_NEXT); 4381 } 4382 4383 int 4384 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4385 { 4386 whatthread_t w; 4387 4388 if (!(flags & DCMD_ADDRSPEC)) 4389 return (DCMD_USAGE); 4390 4391 w.wt_verbose = FALSE; 4392 w.wt_target = addr; 4393 4394 if (mdb_getopts(argc, argv, 4395 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4396 return (DCMD_USAGE); 4397 4398 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4399 == -1) { 4400 mdb_warn("couldn't walk threads"); 4401 return (DCMD_ERR); 4402 } 4403 4404 return (DCMD_OK); 4405 } 4406