1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <mdb/mdb_param.h> 29 #include <mdb/mdb_modapi.h> 30 #include <mdb/mdb_ctf.h> 31 #include <sys/cpuvar.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/vmem_impl.h> 34 #include <sys/machelf.h> 35 #include <sys/modctl.h> 36 #include <sys/kobj.h> 37 #include <sys/panic.h> 38 #include <sys/stack.h> 39 #include <sys/sysmacros.h> 40 #include <vm/page.h> 41 42 #include "kmem.h" 43 #include "leaky.h" 44 45 #define dprintf(x) if (mdb_debug_level) { \ 46 mdb_printf("kmem debug: "); \ 47 /*CSTYLED*/\ 48 mdb_printf x ;\ 49 } 50 51 #define KM_ALLOCATED 0x01 52 #define KM_FREE 0x02 53 #define KM_BUFCTL 0x04 54 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 55 #define KM_HASH 0x10 56 57 static int mdb_debug_level = 0; 58 59 /*ARGSUSED*/ 60 static int 61 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 62 { 63 mdb_walker_t w; 64 char descr[64]; 65 66 (void) mdb_snprintf(descr, sizeof (descr), 67 "walk the %s cache", c->cache_name); 68 69 w.walk_name = c->cache_name; 70 w.walk_descr = descr; 71 w.walk_init = kmem_walk_init; 72 w.walk_step = kmem_walk_step; 73 w.walk_fini = kmem_walk_fini; 74 w.walk_init_arg = (void *)addr; 75 76 if (mdb_add_walker(&w) == -1) 77 mdb_warn("failed to add %s walker", c->cache_name); 78 79 return (WALK_NEXT); 80 } 81 82 /*ARGSUSED*/ 83 int 84 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 85 { 86 mdb_debug_level ^= 1; 87 88 mdb_printf("kmem: debugging is now %s\n", 89 mdb_debug_level ? "on" : "off"); 90 91 return (DCMD_OK); 92 } 93 94 typedef struct { 95 uintptr_t kcw_first; 96 uintptr_t kcw_current; 97 } kmem_cache_walk_t; 98 99 int 100 kmem_cache_walk_init(mdb_walk_state_t *wsp) 101 { 102 kmem_cache_walk_t *kcw; 103 kmem_cache_t c; 104 uintptr_t cp; 105 GElf_Sym sym; 106 107 if (mdb_lookup_by_name("kmem_null_cache", &sym) == -1) { 108 mdb_warn("couldn't find kmem_null_cache"); 109 return (WALK_ERR); 110 } 111 112 cp = (uintptr_t)sym.st_value; 113 114 if (mdb_vread(&c, sizeof (kmem_cache_t), cp) == -1) { 115 mdb_warn("couldn't read cache at %p", cp); 116 return (WALK_ERR); 117 } 118 119 kcw = mdb_alloc(sizeof (kmem_cache_walk_t), UM_SLEEP); 120 121 kcw->kcw_first = cp; 122 kcw->kcw_current = (uintptr_t)c.cache_next; 123 wsp->walk_data = kcw; 124 125 return (WALK_NEXT); 126 } 127 128 int 129 kmem_cache_walk_step(mdb_walk_state_t *wsp) 130 { 131 kmem_cache_walk_t *kcw = wsp->walk_data; 132 kmem_cache_t c; 133 int status; 134 135 if (mdb_vread(&c, sizeof (kmem_cache_t), kcw->kcw_current) == -1) { 136 mdb_warn("couldn't read cache at %p", kcw->kcw_current); 137 return (WALK_DONE); 138 } 139 140 status = wsp->walk_callback(kcw->kcw_current, &c, wsp->walk_cbdata); 141 142 if ((kcw->kcw_current = (uintptr_t)c.cache_next) == kcw->kcw_first) 143 return (WALK_DONE); 144 145 return (status); 146 } 147 148 void 149 kmem_cache_walk_fini(mdb_walk_state_t *wsp) 150 { 151 kmem_cache_walk_t *kcw = wsp->walk_data; 152 mdb_free(kcw, sizeof (kmem_cache_walk_t)); 153 } 154 155 int 156 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 157 { 158 if (wsp->walk_addr == NULL) { 159 mdb_warn("kmem_cpu_cache doesn't support global walks"); 160 return (WALK_ERR); 161 } 162 163 if (mdb_layered_walk("cpu", wsp) == -1) { 164 mdb_warn("couldn't walk 'cpu'"); 165 return (WALK_ERR); 166 } 167 168 wsp->walk_data = (void *)wsp->walk_addr; 169 170 return (WALK_NEXT); 171 } 172 173 int 174 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 175 { 176 uintptr_t caddr = (uintptr_t)wsp->walk_data; 177 const cpu_t *cpu = wsp->walk_layer; 178 kmem_cpu_cache_t cc; 179 180 caddr += cpu->cpu_cache_offset; 181 182 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 183 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 184 return (WALK_ERR); 185 } 186 187 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 188 } 189 190 int 191 kmem_slab_walk_init(mdb_walk_state_t *wsp) 192 { 193 uintptr_t caddr = wsp->walk_addr; 194 kmem_cache_t c; 195 196 if (caddr == NULL) { 197 mdb_warn("kmem_slab doesn't support global walks\n"); 198 return (WALK_ERR); 199 } 200 201 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 202 mdb_warn("couldn't read kmem_cache at %p", caddr); 203 return (WALK_ERR); 204 } 205 206 wsp->walk_data = 207 (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab)); 208 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; 209 210 return (WALK_NEXT); 211 } 212 213 int 214 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 215 { 216 uintptr_t caddr = wsp->walk_addr; 217 kmem_cache_t c; 218 219 if (caddr == NULL) { 220 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 221 return (WALK_ERR); 222 } 223 224 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 225 mdb_warn("couldn't read kmem_cache at %p", caddr); 226 return (WALK_ERR); 227 } 228 229 wsp->walk_data = 230 (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab)); 231 wsp->walk_addr = (uintptr_t)c.cache_freelist; 232 233 /* 234 * Some consumers (umem_walk_step(), in particular) require at 235 * least one callback if there are any buffers in the cache. So 236 * if there are *no* partial slabs, report the last full slab, if 237 * any. 238 * 239 * Yes, this is ugly, but it's cleaner than the other possibilities. 240 */ 241 if ((uintptr_t)wsp->walk_data == wsp->walk_addr) 242 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; 243 244 return (WALK_NEXT); 245 } 246 247 int 248 kmem_slab_walk_step(mdb_walk_state_t *wsp) 249 { 250 kmem_slab_t s; 251 uintptr_t addr = wsp->walk_addr; 252 uintptr_t saddr = (uintptr_t)wsp->walk_data; 253 uintptr_t caddr = saddr - offsetof(kmem_cache_t, cache_nullslab); 254 255 if (addr == saddr) 256 return (WALK_DONE); 257 258 if (mdb_vread(&s, sizeof (s), addr) == -1) { 259 mdb_warn("failed to read slab at %p", wsp->walk_addr); 260 return (WALK_ERR); 261 } 262 263 if ((uintptr_t)s.slab_cache != caddr) { 264 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 265 addr, caddr, s.slab_cache); 266 return (WALK_ERR); 267 } 268 269 wsp->walk_addr = (uintptr_t)s.slab_next; 270 271 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); 272 } 273 274 int 275 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 276 { 277 kmem_cache_t c; 278 279 if (!(flags & DCMD_ADDRSPEC)) { 280 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 281 mdb_warn("can't walk kmem_cache"); 282 return (DCMD_ERR); 283 } 284 return (DCMD_OK); 285 } 286 287 if (DCMD_HDRSPEC(flags)) 288 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 289 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 290 291 if (mdb_vread(&c, sizeof (c), addr) == -1) { 292 mdb_warn("couldn't read kmem_cache at %p", addr); 293 return (DCMD_ERR); 294 } 295 296 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 297 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 298 299 return (DCMD_OK); 300 } 301 302 typedef struct kmem_slab_usage { 303 int ksu_refcnt; /* count of allocated buffers on slab */ 304 } kmem_slab_usage_t; 305 306 typedef struct kmem_slab_stats { 307 int ks_slabs; /* slabs in cache */ 308 int ks_partial_slabs; /* partially allocated slabs in cache */ 309 uint64_t ks_unused_buffers; /* total unused buffers in cache */ 310 int ks_buffers_per_slab; /* buffers per slab */ 311 int ks_usage_len; /* ks_usage array length */ 312 kmem_slab_usage_t *ks_usage; /* partial slab usage */ 313 uint_t *ks_bucket; /* slab usage distribution */ 314 } kmem_slab_stats_t; 315 316 #define LABEL_WIDTH 11 317 static void 318 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab, 319 size_t maxbuckets, size_t minbucketsize) 320 { 321 uint64_t total; 322 int buckets; 323 int i; 324 const int *distarray; 325 int complete[2]; 326 327 buckets = buffers_per_slab; 328 329 total = 0; 330 for (i = 0; i <= buffers_per_slab; i++) 331 total += ks_bucket[i]; 332 333 if (maxbuckets > 1) 334 buckets = MIN(buckets, maxbuckets); 335 336 if (minbucketsize > 1) { 337 /* 338 * minbucketsize does not apply to the first bucket reserved 339 * for completely allocated slabs 340 */ 341 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) / 342 minbucketsize)); 343 if ((buckets < 2) && (buffers_per_slab > 1)) { 344 buckets = 2; 345 minbucketsize = (buffers_per_slab - 1); 346 } 347 } 348 349 /* 350 * The first printed bucket is reserved for completely allocated slabs. 351 * Passing (buckets - 1) excludes that bucket from the generated 352 * distribution, since we're handling it as a special case. 353 */ 354 complete[0] = buffers_per_slab; 355 complete[1] = buffers_per_slab + 1; 356 distarray = mdb_dist_linear(buckets - 1, 1, buffers_per_slab - 1); 357 358 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated"); 359 mdb_dist_print_header("Buffers", LABEL_WIDTH, "Slabs"); 360 361 mdb_dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH); 362 /* 363 * Print bucket ranges in descending order after the first bucket for 364 * completely allocated slabs, so a person can see immediately whether 365 * or not there is fragmentation without having to scan possibly 366 * multiple screens of output. Starting at (buckets - 2) excludes the 367 * extra terminating bucket. 368 */ 369 for (i = buckets - 2; i >= 0; i--) { 370 mdb_dist_print_bucket(distarray, i, ks_bucket, total, 371 LABEL_WIDTH); 372 } 373 mdb_printf("\n"); 374 } 375 #undef LABEL_WIDTH 376 377 /*ARGSUSED*/ 378 static int 379 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab) 380 { 381 *is_slab = B_TRUE; 382 return (WALK_DONE); 383 } 384 385 /*ARGSUSED*/ 386 static int 387 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp, 388 boolean_t *is_slab) 389 { 390 /* 391 * The "kmem_partial_slab" walker reports the last full slab if there 392 * are no partial slabs (for the sake of consumers that require at least 393 * one callback if there are any buffers in the cache). 394 */ 395 *is_slab = ((sp->slab_refcnt > 0) && 396 (sp->slab_refcnt < sp->slab_chunks)); 397 return (WALK_DONE); 398 } 399 400 /*ARGSUSED*/ 401 static int 402 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp, 403 kmem_slab_stats_t *ks) 404 { 405 kmem_slab_usage_t *ksu; 406 long unused; 407 408 ks->ks_slabs++; 409 if (ks->ks_buffers_per_slab == 0) { 410 ks->ks_buffers_per_slab = sp->slab_chunks; 411 /* +1 to include a zero bucket */ 412 ks->ks_bucket = mdb_zalloc((ks->ks_buffers_per_slab + 1) * 413 sizeof (*ks->ks_bucket), UM_SLEEP | UM_GC); 414 } 415 ks->ks_bucket[sp->slab_refcnt]++; 416 417 unused = (sp->slab_chunks - sp->slab_refcnt); 418 if (unused == 0) { 419 return (WALK_NEXT); 420 } 421 422 ks->ks_partial_slabs++; 423 ks->ks_unused_buffers += unused; 424 425 if (ks->ks_partial_slabs > ks->ks_usage_len) { 426 kmem_slab_usage_t *usage; 427 int len = ks->ks_usage_len; 428 429 len = (len == 0 ? 16 : len * 2); 430 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP); 431 if (ks->ks_usage != NULL) { 432 bcopy(ks->ks_usage, usage, 433 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 434 mdb_free(ks->ks_usage, 435 ks->ks_usage_len * sizeof (kmem_slab_usage_t)); 436 } 437 ks->ks_usage = usage; 438 ks->ks_usage_len = len; 439 } 440 441 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1]; 442 ksu->ksu_refcnt = sp->slab_refcnt; 443 return (WALK_NEXT); 444 } 445 446 static void 447 kmem_slabs_header() 448 { 449 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 450 "", "", "Partial", "", "Unused", ""); 451 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 452 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste"); 453 mdb_printf("%-25s %8s %8s %9s %9s %6s\n", 454 "-------------------------", "--------", "--------", "---------", 455 "---------", "------"); 456 } 457 458 int 459 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 460 { 461 kmem_cache_t c; 462 kmem_slab_stats_t stats; 463 mdb_walk_cb_t cb; 464 int pct; 465 int tenths_pct; 466 size_t maxbuckets = 1; 467 size_t minbucketsize = 0; 468 const char *filter = NULL; 469 uint_t opt_v = FALSE; 470 boolean_t verbose = B_FALSE; 471 boolean_t skip = B_FALSE; 472 473 if (mdb_getopts(argc, argv, 474 'B', MDB_OPT_UINTPTR, &minbucketsize, 475 'b', MDB_OPT_UINTPTR, &maxbuckets, 476 'n', MDB_OPT_STR, &filter, 477 'v', MDB_OPT_SETBITS, TRUE, &opt_v, 478 NULL) != argc) { 479 return (DCMD_USAGE); 480 } 481 482 if (opt_v || (maxbuckets != 1) || (minbucketsize != 0)) { 483 verbose = 1; 484 } 485 486 if (!(flags & DCMD_ADDRSPEC)) { 487 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc, 488 argv) == -1) { 489 mdb_warn("can't walk kmem_cache"); 490 return (DCMD_ERR); 491 } 492 return (DCMD_OK); 493 } 494 495 if (mdb_vread(&c, sizeof (c), addr) == -1) { 496 mdb_warn("couldn't read kmem_cache at %p", addr); 497 return (DCMD_ERR); 498 } 499 500 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL)) { 501 skip = B_TRUE; 502 } 503 504 if (!verbose && DCMD_HDRSPEC(flags)) { 505 kmem_slabs_header(); 506 } else if (verbose && !skip) { 507 if (DCMD_HDRSPEC(flags)) { 508 kmem_slabs_header(); 509 } else { 510 boolean_t is_slab = B_FALSE; 511 const char *walker_name; 512 if (opt_v) { 513 cb = (mdb_walk_cb_t)kmem_first_partial_slab; 514 walker_name = "kmem_slab_partial"; 515 } else { 516 cb = (mdb_walk_cb_t)kmem_first_slab; 517 walker_name = "kmem_slab"; 518 } 519 (void) mdb_pwalk(walker_name, cb, &is_slab, addr); 520 if (is_slab) { 521 kmem_slabs_header(); 522 } 523 } 524 } 525 526 if (skip) { 527 return (DCMD_OK); 528 } 529 530 bzero(&stats, sizeof (kmem_slab_stats_t)); 531 cb = (mdb_walk_cb_t)kmem_slablist_stat; 532 (void) mdb_pwalk("kmem_slab", cb, &stats, addr); 533 534 if (c.cache_buftotal == 0) { 535 pct = 0; 536 tenths_pct = 0; 537 } else { 538 uint64_t n = stats.ks_unused_buffers * 10000; 539 pct = (int)(n / c.cache_buftotal); 540 tenths_pct = pct - ((pct / 100) * 100); 541 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */ 542 if (tenths_pct == 10) { 543 pct += 100; 544 tenths_pct = 0; 545 } 546 } 547 548 pct /= 100; 549 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name, 550 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal, 551 stats.ks_unused_buffers, pct, tenths_pct); 552 553 if (!verbose) { 554 return (DCMD_OK); 555 } 556 557 if (maxbuckets == 0) { 558 maxbuckets = stats.ks_buffers_per_slab; 559 } 560 561 if (((maxbuckets > 1) || (minbucketsize > 0)) && 562 (stats.ks_slabs > 0)) { 563 mdb_printf("\n"); 564 kmem_slabs_print_dist(stats.ks_bucket, 565 stats.ks_buffers_per_slab, maxbuckets, minbucketsize); 566 } 567 568 if (opt_v && (stats.ks_partial_slabs > 0)) { 569 int i; 570 kmem_slab_usage_t *ksu; 571 572 mdb_printf(" %d complete, %d partial", 573 (stats.ks_slabs - stats.ks_partial_slabs), 574 stats.ks_partial_slabs); 575 if (stats.ks_partial_slabs > 0) { 576 mdb_printf(" (%d):", stats.ks_buffers_per_slab); 577 } 578 for (i = 0; i < stats.ks_partial_slabs; i++) { 579 ksu = &stats.ks_usage[i]; 580 mdb_printf(" %d", ksu->ksu_refcnt); 581 } 582 mdb_printf("\n\n"); 583 } 584 585 if (stats.ks_usage_len > 0) { 586 mdb_free(stats.ks_usage, 587 stats.ks_usage_len * sizeof (kmem_slab_usage_t)); 588 } 589 590 return (DCMD_OK); 591 } 592 593 void 594 kmem_slabs_help(void) 595 { 596 mdb_printf("%s\n", 597 "Display slab usage per kmem cache.\n"); 598 mdb_dec_indent(2); 599 mdb_printf("%<b>OPTIONS%</b>\n"); 600 mdb_inc_indent(2); 601 mdb_printf("%s", 602 " -n name\n" 603 " name of kmem cache (or matching partial name)\n" 604 " -b maxbins\n" 605 " Print a distribution of allocated buffers per slab using at\n" 606 " most maxbins bins. The first bin is reserved for completely\n" 607 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n" 608 " effect as specifying the maximum allocated buffers per slab\n" 609 " or setting minbinsize to 1 (-B 1).\n" 610 " -B minbinsize\n" 611 " Print a distribution of allocated buffers per slab, making\n" 612 " all bins (except the first, reserved for completely allocated\n" 613 " slabs) at least minbinsize buffers apart.\n" 614 " -v verbose output: List the allocated buffer count of each partial\n" 615 " slab on the free list in order from front to back to show how\n" 616 " closely the slabs are ordered by usage. For example\n" 617 "\n" 618 " 10 complete, 3 partial (8): 7 3 1\n" 619 "\n" 620 " means there are thirteen slabs with eight buffers each, including\n" 621 " three partially allocated slabs with less than all eight buffers\n" 622 " allocated.\n" 623 "\n" 624 " Buffer allocations are always from the front of the partial slab\n" 625 " list. When a buffer is freed from a completely used slab, that\n" 626 " slab is added to the front of the partial slab list. Assuming\n" 627 " that all buffers are equally likely to be freed soon, the\n" 628 " desired order of partial slabs is most-used at the front of the\n" 629 " list and least-used at the back (as in the example above).\n" 630 " However, if a slab contains an allocated buffer that will not\n" 631 " soon be freed, it would be better for that slab to be at the\n" 632 " front where it can get used up. Taking a slab off the partial\n" 633 " slab list (either with all buffers freed or all buffers\n" 634 " allocated) reduces cache fragmentation.\n" 635 "\n" 636 "Column\t\tDescription\n" 637 "\n" 638 "Cache Name\t\tname of kmem cache\n" 639 "Slabs\t\t\ttotal slab count\n" 640 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n" 641 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n" 642 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n" 643 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n" 644 "\t\t\t for accounting structures (debug mode), slab\n" 645 "\t\t\t coloring (incremental small offsets to stagger\n" 646 "\t\t\t buffer alignment), or the per-CPU magazine layer\n"); 647 } 648 649 static int 650 addrcmp(const void *lhs, const void *rhs) 651 { 652 uintptr_t p1 = *((uintptr_t *)lhs); 653 uintptr_t p2 = *((uintptr_t *)rhs); 654 655 if (p1 < p2) 656 return (-1); 657 if (p1 > p2) 658 return (1); 659 return (0); 660 } 661 662 static int 663 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 664 { 665 const kmem_bufctl_audit_t *bcp1 = *lhs; 666 const kmem_bufctl_audit_t *bcp2 = *rhs; 667 668 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 669 return (-1); 670 671 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 672 return (1); 673 674 return (0); 675 } 676 677 typedef struct kmem_hash_walk { 678 uintptr_t *kmhw_table; 679 size_t kmhw_nelems; 680 size_t kmhw_pos; 681 kmem_bufctl_t kmhw_cur; 682 } kmem_hash_walk_t; 683 684 int 685 kmem_hash_walk_init(mdb_walk_state_t *wsp) 686 { 687 kmem_hash_walk_t *kmhw; 688 uintptr_t *hash; 689 kmem_cache_t c; 690 uintptr_t haddr, addr = wsp->walk_addr; 691 size_t nelems; 692 size_t hsize; 693 694 if (addr == NULL) { 695 mdb_warn("kmem_hash doesn't support global walks\n"); 696 return (WALK_ERR); 697 } 698 699 if (mdb_vread(&c, sizeof (c), addr) == -1) { 700 mdb_warn("couldn't read cache at addr %p", addr); 701 return (WALK_ERR); 702 } 703 704 if (!(c.cache_flags & KMF_HASH)) { 705 mdb_warn("cache %p doesn't have a hash table\n", addr); 706 return (WALK_DONE); /* nothing to do */ 707 } 708 709 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 710 kmhw->kmhw_cur.bc_next = NULL; 711 kmhw->kmhw_pos = 0; 712 713 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 714 hsize = nelems * sizeof (uintptr_t); 715 haddr = (uintptr_t)c.cache_hash_table; 716 717 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 718 if (mdb_vread(hash, hsize, haddr) == -1) { 719 mdb_warn("failed to read hash table at %p", haddr); 720 mdb_free(hash, hsize); 721 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 722 return (WALK_ERR); 723 } 724 725 wsp->walk_data = kmhw; 726 727 return (WALK_NEXT); 728 } 729 730 int 731 kmem_hash_walk_step(mdb_walk_state_t *wsp) 732 { 733 kmem_hash_walk_t *kmhw = wsp->walk_data; 734 uintptr_t addr = NULL; 735 736 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 737 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 738 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 739 break; 740 } 741 } 742 if (addr == NULL) 743 return (WALK_DONE); 744 745 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 746 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 747 return (WALK_ERR); 748 } 749 750 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 751 } 752 753 void 754 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 755 { 756 kmem_hash_walk_t *kmhw = wsp->walk_data; 757 758 if (kmhw == NULL) 759 return; 760 761 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 762 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 763 } 764 765 /* 766 * Find the address of the bufctl structure for the address 'buf' in cache 767 * 'cp', which is at address caddr, and place it in *out. 768 */ 769 static int 770 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 771 { 772 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 773 kmem_bufctl_t *bcp; 774 kmem_bufctl_t bc; 775 776 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 777 mdb_warn("unable to read hash bucket for %p in cache %p", 778 buf, caddr); 779 return (-1); 780 } 781 782 while (bcp != NULL) { 783 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 784 (uintptr_t)bcp) == -1) { 785 mdb_warn("unable to read bufctl at %p", bcp); 786 return (-1); 787 } 788 if (bc.bc_addr == buf) { 789 *out = (uintptr_t)bcp; 790 return (0); 791 } 792 bcp = bc.bc_next; 793 } 794 795 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 796 return (-1); 797 } 798 799 int 800 kmem_get_magsize(const kmem_cache_t *cp) 801 { 802 uintptr_t addr = (uintptr_t)cp->cache_magtype; 803 GElf_Sym mt_sym; 804 kmem_magtype_t mt; 805 int res; 806 807 /* 808 * if cpu 0 has a non-zero magsize, it must be correct. caches 809 * with KMF_NOMAGAZINE have disabled their magazine layers, so 810 * it is okay to return 0 for them. 811 */ 812 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 813 (cp->cache_flags & KMF_NOMAGAZINE)) 814 return (res); 815 816 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 817 mdb_warn("unable to read 'kmem_magtype'"); 818 } else if (addr < mt_sym.st_value || 819 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 820 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 821 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 822 cp->cache_name, addr); 823 return (0); 824 } 825 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 826 mdb_warn("unable to read magtype at %a", addr); 827 return (0); 828 } 829 return (mt.mt_magsize); 830 } 831 832 /*ARGSUSED*/ 833 static int 834 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 835 { 836 *est -= (sp->slab_chunks - sp->slab_refcnt); 837 838 return (WALK_NEXT); 839 } 840 841 /* 842 * Returns an upper bound on the number of allocated buffers in a given 843 * cache. 844 */ 845 size_t 846 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 847 { 848 int magsize; 849 size_t cache_est; 850 851 cache_est = cp->cache_buftotal; 852 853 (void) mdb_pwalk("kmem_slab_partial", 854 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 855 856 if ((magsize = kmem_get_magsize(cp)) != 0) { 857 size_t mag_est = cp->cache_full.ml_total * magsize; 858 859 if (cache_est >= mag_est) { 860 cache_est -= mag_est; 861 } else { 862 mdb_warn("cache %p's magazine layer holds more buffers " 863 "than the slab layer.\n", addr); 864 } 865 } 866 return (cache_est); 867 } 868 869 #define READMAG_ROUNDS(rounds) { \ 870 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 871 mdb_warn("couldn't read magazine at %p", kmp); \ 872 goto fail; \ 873 } \ 874 for (i = 0; i < rounds; i++) { \ 875 maglist[magcnt++] = mp->mag_round[i]; \ 876 if (magcnt == magmax) { \ 877 mdb_warn("%d magazines exceeds fudge factor\n", \ 878 magcnt); \ 879 goto fail; \ 880 } \ 881 } \ 882 } 883 884 int 885 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 886 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 887 { 888 kmem_magazine_t *kmp, *mp; 889 void **maglist = NULL; 890 int i, cpu; 891 size_t magsize, magmax, magbsize; 892 size_t magcnt = 0; 893 894 /* 895 * Read the magtype out of the cache, after verifying the pointer's 896 * correctness. 897 */ 898 magsize = kmem_get_magsize(cp); 899 if (magsize == 0) { 900 *maglistp = NULL; 901 *magcntp = 0; 902 *magmaxp = 0; 903 return (WALK_NEXT); 904 } 905 906 /* 907 * There are several places where we need to go buffer hunting: 908 * the per-CPU loaded magazine, the per-CPU spare full magazine, 909 * and the full magazine list in the depot. 910 * 911 * For an upper bound on the number of buffers in the magazine 912 * layer, we have the number of magazines on the cache_full 913 * list plus at most two magazines per CPU (the loaded and the 914 * spare). Toss in 100 magazines as a fudge factor in case this 915 * is live (the number "100" comes from the same fudge factor in 916 * crash(1M)). 917 */ 918 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 919 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 920 921 if (magbsize >= PAGESIZE / 2) { 922 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 923 addr, magbsize); 924 return (WALK_ERR); 925 } 926 927 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 928 mp = mdb_alloc(magbsize, alloc_flags); 929 if (mp == NULL || maglist == NULL) 930 goto fail; 931 932 /* 933 * First up: the magazines in the depot (i.e. on the cache_full list). 934 */ 935 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 936 READMAG_ROUNDS(magsize); 937 kmp = mp->mag_next; 938 939 if (kmp == cp->cache_full.ml_list) 940 break; /* cache_full list loop detected */ 941 } 942 943 dprintf(("cache_full list done\n")); 944 945 /* 946 * Now whip through the CPUs, snagging the loaded magazines 947 * and full spares. 948 */ 949 for (cpu = 0; cpu < ncpus; cpu++) { 950 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 951 952 dprintf(("reading cpu cache %p\n", 953 (uintptr_t)ccp - (uintptr_t)cp + addr)); 954 955 if (ccp->cc_rounds > 0 && 956 (kmp = ccp->cc_loaded) != NULL) { 957 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 958 READMAG_ROUNDS(ccp->cc_rounds); 959 } 960 961 if (ccp->cc_prounds > 0 && 962 (kmp = ccp->cc_ploaded) != NULL) { 963 dprintf(("reading %d previously loaded rounds\n", 964 ccp->cc_prounds)); 965 READMAG_ROUNDS(ccp->cc_prounds); 966 } 967 } 968 969 dprintf(("magazine layer: %d buffers\n", magcnt)); 970 971 if (!(alloc_flags & UM_GC)) 972 mdb_free(mp, magbsize); 973 974 *maglistp = maglist; 975 *magcntp = magcnt; 976 *magmaxp = magmax; 977 978 return (WALK_NEXT); 979 980 fail: 981 if (!(alloc_flags & UM_GC)) { 982 if (mp) 983 mdb_free(mp, magbsize); 984 if (maglist) 985 mdb_free(maglist, magmax * sizeof (void *)); 986 } 987 return (WALK_ERR); 988 } 989 990 static int 991 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 992 { 993 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 994 } 995 996 static int 997 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 998 { 999 kmem_bufctl_audit_t b; 1000 1001 /* 1002 * if KMF_AUDIT is not set, we know that we're looking at a 1003 * kmem_bufctl_t. 1004 */ 1005 if (!(cp->cache_flags & KMF_AUDIT) || 1006 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 1007 (void) memset(&b, 0, sizeof (b)); 1008 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 1009 mdb_warn("unable to read bufctl at %p", buf); 1010 return (WALK_ERR); 1011 } 1012 } 1013 1014 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 1015 } 1016 1017 typedef struct kmem_walk { 1018 int kmw_type; 1019 1020 int kmw_addr; /* cache address */ 1021 kmem_cache_t *kmw_cp; 1022 size_t kmw_csize; 1023 1024 /* 1025 * magazine layer 1026 */ 1027 void **kmw_maglist; 1028 size_t kmw_max; 1029 size_t kmw_count; 1030 size_t kmw_pos; 1031 1032 /* 1033 * slab layer 1034 */ 1035 char *kmw_valid; /* to keep track of freed buffers */ 1036 char *kmw_ubase; /* buffer for slab data */ 1037 } kmem_walk_t; 1038 1039 static int 1040 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 1041 { 1042 kmem_walk_t *kmw; 1043 int ncpus, csize; 1044 kmem_cache_t *cp; 1045 size_t vm_quantum; 1046 1047 size_t magmax, magcnt; 1048 void **maglist = NULL; 1049 uint_t chunksize, slabsize; 1050 int status = WALK_ERR; 1051 uintptr_t addr = wsp->walk_addr; 1052 const char *layered; 1053 1054 type &= ~KM_HASH; 1055 1056 if (addr == NULL) { 1057 mdb_warn("kmem walk doesn't support global walks\n"); 1058 return (WALK_ERR); 1059 } 1060 1061 dprintf(("walking %p\n", addr)); 1062 1063 /* 1064 * First we need to figure out how many CPUs are configured in the 1065 * system to know how much to slurp out. 1066 */ 1067 mdb_readvar(&ncpus, "max_ncpus"); 1068 1069 csize = KMEM_CACHE_SIZE(ncpus); 1070 cp = mdb_alloc(csize, UM_SLEEP); 1071 1072 if (mdb_vread(cp, csize, addr) == -1) { 1073 mdb_warn("couldn't read cache at addr %p", addr); 1074 goto out2; 1075 } 1076 1077 /* 1078 * It's easy for someone to hand us an invalid cache address. 1079 * Unfortunately, it is hard for this walker to survive an 1080 * invalid cache cleanly. So we make sure that: 1081 * 1082 * 1. the vmem arena for the cache is readable, 1083 * 2. the vmem arena's quantum is a power of 2, 1084 * 3. our slabsize is a multiple of the quantum, and 1085 * 4. our chunksize is >0 and less than our slabsize. 1086 */ 1087 if (mdb_vread(&vm_quantum, sizeof (vm_quantum), 1088 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || 1089 vm_quantum == 0 || 1090 (vm_quantum & (vm_quantum - 1)) != 0 || 1091 cp->cache_slabsize < vm_quantum || 1092 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || 1093 cp->cache_chunksize == 0 || 1094 cp->cache_chunksize > cp->cache_slabsize) { 1095 mdb_warn("%p is not a valid kmem_cache_t\n", addr); 1096 goto out2; 1097 } 1098 1099 dprintf(("buf total is %d\n", cp->cache_buftotal)); 1100 1101 if (cp->cache_buftotal == 0) { 1102 mdb_free(cp, csize); 1103 return (WALK_DONE); 1104 } 1105 1106 /* 1107 * If they ask for bufctls, but it's a small-slab cache, 1108 * there is nothing to report. 1109 */ 1110 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 1111 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 1112 cp->cache_flags)); 1113 mdb_free(cp, csize); 1114 return (WALK_DONE); 1115 } 1116 1117 /* 1118 * If they want constructed buffers, but there's no constructor or 1119 * the cache has DEADBEEF checking enabled, there is nothing to report. 1120 */ 1121 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 1122 cp->cache_constructor == NULL || 1123 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 1124 mdb_free(cp, csize); 1125 return (WALK_DONE); 1126 } 1127 1128 /* 1129 * Read in the contents of the magazine layer 1130 */ 1131 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 1132 &magmax, UM_SLEEP) == WALK_ERR) 1133 goto out2; 1134 1135 /* 1136 * We have all of the buffers from the magazines; if we are walking 1137 * allocated buffers, sort them so we can bsearch them later. 1138 */ 1139 if (type & KM_ALLOCATED) 1140 qsort(maglist, magcnt, sizeof (void *), addrcmp); 1141 1142 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 1143 1144 kmw->kmw_type = type; 1145 kmw->kmw_addr = addr; 1146 kmw->kmw_cp = cp; 1147 kmw->kmw_csize = csize; 1148 kmw->kmw_maglist = maglist; 1149 kmw->kmw_max = magmax; 1150 kmw->kmw_count = magcnt; 1151 kmw->kmw_pos = 0; 1152 1153 /* 1154 * When walking allocated buffers in a KMF_HASH cache, we walk the 1155 * hash table instead of the slab layer. 1156 */ 1157 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 1158 layered = "kmem_hash"; 1159 1160 kmw->kmw_type |= KM_HASH; 1161 } else { 1162 /* 1163 * If we are walking freed buffers, we only need the 1164 * magazine layer plus the partially allocated slabs. 1165 * To walk allocated buffers, we need all of the slabs. 1166 */ 1167 if (type & KM_ALLOCATED) 1168 layered = "kmem_slab"; 1169 else 1170 layered = "kmem_slab_partial"; 1171 1172 /* 1173 * for small-slab caches, we read in the entire slab. For 1174 * freed buffers, we can just walk the freelist. For 1175 * allocated buffers, we use a 'valid' array to track 1176 * the freed buffers. 1177 */ 1178 if (!(cp->cache_flags & KMF_HASH)) { 1179 chunksize = cp->cache_chunksize; 1180 slabsize = cp->cache_slabsize; 1181 1182 kmw->kmw_ubase = mdb_alloc(slabsize + 1183 sizeof (kmem_bufctl_t), UM_SLEEP); 1184 1185 if (type & KM_ALLOCATED) 1186 kmw->kmw_valid = 1187 mdb_alloc(slabsize / chunksize, UM_SLEEP); 1188 } 1189 } 1190 1191 status = WALK_NEXT; 1192 1193 if (mdb_layered_walk(layered, wsp) == -1) { 1194 mdb_warn("unable to start layered '%s' walk", layered); 1195 status = WALK_ERR; 1196 } 1197 1198 out1: 1199 if (status == WALK_ERR) { 1200 if (kmw->kmw_valid) 1201 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1202 1203 if (kmw->kmw_ubase) 1204 mdb_free(kmw->kmw_ubase, slabsize + 1205 sizeof (kmem_bufctl_t)); 1206 1207 if (kmw->kmw_maglist) 1208 mdb_free(kmw->kmw_maglist, 1209 kmw->kmw_max * sizeof (uintptr_t)); 1210 1211 mdb_free(kmw, sizeof (kmem_walk_t)); 1212 wsp->walk_data = NULL; 1213 } 1214 1215 out2: 1216 if (status == WALK_ERR) 1217 mdb_free(cp, csize); 1218 1219 return (status); 1220 } 1221 1222 int 1223 kmem_walk_step(mdb_walk_state_t *wsp) 1224 { 1225 kmem_walk_t *kmw = wsp->walk_data; 1226 int type = kmw->kmw_type; 1227 kmem_cache_t *cp = kmw->kmw_cp; 1228 1229 void **maglist = kmw->kmw_maglist; 1230 int magcnt = kmw->kmw_count; 1231 1232 uintptr_t chunksize, slabsize; 1233 uintptr_t addr; 1234 const kmem_slab_t *sp; 1235 const kmem_bufctl_t *bcp; 1236 kmem_bufctl_t bc; 1237 1238 int chunks; 1239 char *kbase; 1240 void *buf; 1241 int i, ret; 1242 1243 char *valid, *ubase; 1244 1245 /* 1246 * first, handle the 'kmem_hash' layered walk case 1247 */ 1248 if (type & KM_HASH) { 1249 /* 1250 * We have a buffer which has been allocated out of the 1251 * global layer. We need to make sure that it's not 1252 * actually sitting in a magazine before we report it as 1253 * an allocated buffer. 1254 */ 1255 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 1256 1257 if (magcnt > 0 && 1258 bsearch(&buf, maglist, magcnt, sizeof (void *), 1259 addrcmp) != NULL) 1260 return (WALK_NEXT); 1261 1262 if (type & KM_BUFCTL) 1263 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 1264 1265 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 1266 } 1267 1268 ret = WALK_NEXT; 1269 1270 addr = kmw->kmw_addr; 1271 1272 /* 1273 * If we're walking freed buffers, report everything in the 1274 * magazine layer before processing the first slab. 1275 */ 1276 if ((type & KM_FREE) && magcnt != 0) { 1277 kmw->kmw_count = 0; /* only do this once */ 1278 for (i = 0; i < magcnt; i++) { 1279 buf = maglist[i]; 1280 1281 if (type & KM_BUFCTL) { 1282 uintptr_t out; 1283 1284 if (cp->cache_flags & KMF_BUFTAG) { 1285 kmem_buftag_t *btp; 1286 kmem_buftag_t tag; 1287 1288 /* LINTED - alignment */ 1289 btp = KMEM_BUFTAG(cp, buf); 1290 if (mdb_vread(&tag, sizeof (tag), 1291 (uintptr_t)btp) == -1) { 1292 mdb_warn("reading buftag for " 1293 "%p at %p", buf, btp); 1294 continue; 1295 } 1296 out = (uintptr_t)tag.bt_bufctl; 1297 } else { 1298 if (kmem_hash_lookup(cp, addr, buf, 1299 &out) == -1) 1300 continue; 1301 } 1302 ret = bufctl_walk_callback(cp, wsp, out); 1303 } else { 1304 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1305 } 1306 1307 if (ret != WALK_NEXT) 1308 return (ret); 1309 } 1310 } 1311 1312 /* 1313 * If they want constructed buffers, we're finished, since the 1314 * magazine layer holds them all. 1315 */ 1316 if (type & KM_CONSTRUCTED) 1317 return (WALK_DONE); 1318 1319 /* 1320 * Handle the buffers in the current slab 1321 */ 1322 chunksize = cp->cache_chunksize; 1323 slabsize = cp->cache_slabsize; 1324 1325 sp = wsp->walk_layer; 1326 chunks = sp->slab_chunks; 1327 kbase = sp->slab_base; 1328 1329 dprintf(("kbase is %p\n", kbase)); 1330 1331 if (!(cp->cache_flags & KMF_HASH)) { 1332 valid = kmw->kmw_valid; 1333 ubase = kmw->kmw_ubase; 1334 1335 if (mdb_vread(ubase, chunks * chunksize, 1336 (uintptr_t)kbase) == -1) { 1337 mdb_warn("failed to read slab contents at %p", kbase); 1338 return (WALK_ERR); 1339 } 1340 1341 /* 1342 * Set up the valid map as fully allocated -- we'll punch 1343 * out the freelist. 1344 */ 1345 if (type & KM_ALLOCATED) 1346 (void) memset(valid, 1, chunks); 1347 } else { 1348 valid = NULL; 1349 ubase = NULL; 1350 } 1351 1352 /* 1353 * walk the slab's freelist 1354 */ 1355 bcp = sp->slab_head; 1356 1357 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 1358 1359 /* 1360 * since we could be in the middle of allocating a buffer, 1361 * our refcnt could be one higher than it aught. So we 1362 * check one further on the freelist than the count allows. 1363 */ 1364 for (i = sp->slab_refcnt; i <= chunks; i++) { 1365 uint_t ndx; 1366 1367 dprintf(("bcp is %p\n", bcp)); 1368 1369 if (bcp == NULL) { 1370 if (i == chunks) 1371 break; 1372 mdb_warn( 1373 "slab %p in cache %p freelist too short by %d\n", 1374 sp, addr, chunks - i); 1375 break; 1376 } 1377 1378 if (cp->cache_flags & KMF_HASH) { 1379 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1380 mdb_warn("failed to read bufctl ptr at %p", 1381 bcp); 1382 break; 1383 } 1384 buf = bc.bc_addr; 1385 } else { 1386 /* 1387 * Otherwise the buffer is in the slab which 1388 * we've read in; we just need to determine 1389 * its offset in the slab to find the 1390 * kmem_bufctl_t. 1391 */ 1392 bc = *((kmem_bufctl_t *) 1393 ((uintptr_t)bcp - (uintptr_t)kbase + 1394 (uintptr_t)ubase)); 1395 1396 buf = KMEM_BUF(cp, bcp); 1397 } 1398 1399 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1400 1401 if (ndx > slabsize / cp->cache_bufsize) { 1402 /* 1403 * This is very wrong; we have managed to find 1404 * a buffer in the slab which shouldn't 1405 * actually be here. Emit a warning, and 1406 * try to continue. 1407 */ 1408 mdb_warn("buf %p is out of range for " 1409 "slab %p, cache %p\n", buf, sp, addr); 1410 } else if (type & KM_ALLOCATED) { 1411 /* 1412 * we have found a buffer on the slab's freelist; 1413 * clear its entry 1414 */ 1415 valid[ndx] = 0; 1416 } else { 1417 /* 1418 * Report this freed buffer 1419 */ 1420 if (type & KM_BUFCTL) { 1421 ret = bufctl_walk_callback(cp, wsp, 1422 (uintptr_t)bcp); 1423 } else { 1424 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1425 } 1426 if (ret != WALK_NEXT) 1427 return (ret); 1428 } 1429 1430 bcp = bc.bc_next; 1431 } 1432 1433 if (bcp != NULL) { 1434 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1435 sp, addr, bcp)); 1436 } 1437 1438 /* 1439 * If we are walking freed buffers, the loop above handled reporting 1440 * them. 1441 */ 1442 if (type & KM_FREE) 1443 return (WALK_NEXT); 1444 1445 if (type & KM_BUFCTL) { 1446 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1447 "cache %p\n", addr); 1448 return (WALK_ERR); 1449 } 1450 1451 /* 1452 * Report allocated buffers, skipping buffers in the magazine layer. 1453 * We only get this far for small-slab caches. 1454 */ 1455 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1456 buf = (char *)kbase + i * chunksize; 1457 1458 if (!valid[i]) 1459 continue; /* on slab freelist */ 1460 1461 if (magcnt > 0 && 1462 bsearch(&buf, maglist, magcnt, sizeof (void *), 1463 addrcmp) != NULL) 1464 continue; /* in magazine layer */ 1465 1466 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1467 } 1468 return (ret); 1469 } 1470 1471 void 1472 kmem_walk_fini(mdb_walk_state_t *wsp) 1473 { 1474 kmem_walk_t *kmw = wsp->walk_data; 1475 uintptr_t chunksize; 1476 uintptr_t slabsize; 1477 1478 if (kmw == NULL) 1479 return; 1480 1481 if (kmw->kmw_maglist != NULL) 1482 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1483 1484 chunksize = kmw->kmw_cp->cache_chunksize; 1485 slabsize = kmw->kmw_cp->cache_slabsize; 1486 1487 if (kmw->kmw_valid != NULL) 1488 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1489 if (kmw->kmw_ubase != NULL) 1490 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1491 1492 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1493 mdb_free(kmw, sizeof (kmem_walk_t)); 1494 } 1495 1496 /*ARGSUSED*/ 1497 static int 1498 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1499 { 1500 /* 1501 * Buffers allocated from NOTOUCH caches can also show up as freed 1502 * memory in other caches. This can be a little confusing, so we 1503 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1504 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1505 */ 1506 if (c->cache_cflags & KMC_NOTOUCH) 1507 return (WALK_NEXT); 1508 1509 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1510 wsp->walk_cbdata, addr) == -1) 1511 return (WALK_DONE); 1512 1513 return (WALK_NEXT); 1514 } 1515 1516 #define KMEM_WALK_ALL(name, wsp) { \ 1517 wsp->walk_data = (name); \ 1518 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1519 return (WALK_ERR); \ 1520 return (WALK_DONE); \ 1521 } 1522 1523 int 1524 kmem_walk_init(mdb_walk_state_t *wsp) 1525 { 1526 if (wsp->walk_arg != NULL) 1527 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1528 1529 if (wsp->walk_addr == NULL) 1530 KMEM_WALK_ALL("kmem", wsp); 1531 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1532 } 1533 1534 int 1535 bufctl_walk_init(mdb_walk_state_t *wsp) 1536 { 1537 if (wsp->walk_addr == NULL) 1538 KMEM_WALK_ALL("bufctl", wsp); 1539 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1540 } 1541 1542 int 1543 freemem_walk_init(mdb_walk_state_t *wsp) 1544 { 1545 if (wsp->walk_addr == NULL) 1546 KMEM_WALK_ALL("freemem", wsp); 1547 return (kmem_walk_init_common(wsp, KM_FREE)); 1548 } 1549 1550 int 1551 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1552 { 1553 if (wsp->walk_addr == NULL) 1554 KMEM_WALK_ALL("freemem_constructed", wsp); 1555 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1556 } 1557 1558 int 1559 freectl_walk_init(mdb_walk_state_t *wsp) 1560 { 1561 if (wsp->walk_addr == NULL) 1562 KMEM_WALK_ALL("freectl", wsp); 1563 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1564 } 1565 1566 int 1567 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1568 { 1569 if (wsp->walk_addr == NULL) 1570 KMEM_WALK_ALL("freectl_constructed", wsp); 1571 return (kmem_walk_init_common(wsp, 1572 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1573 } 1574 1575 typedef struct bufctl_history_walk { 1576 void *bhw_next; 1577 kmem_cache_t *bhw_cache; 1578 kmem_slab_t *bhw_slab; 1579 hrtime_t bhw_timestamp; 1580 } bufctl_history_walk_t; 1581 1582 int 1583 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1584 { 1585 bufctl_history_walk_t *bhw; 1586 kmem_bufctl_audit_t bc; 1587 kmem_bufctl_audit_t bcn; 1588 1589 if (wsp->walk_addr == NULL) { 1590 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1591 return (WALK_ERR); 1592 } 1593 1594 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1595 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1596 return (WALK_ERR); 1597 } 1598 1599 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1600 bhw->bhw_timestamp = 0; 1601 bhw->bhw_cache = bc.bc_cache; 1602 bhw->bhw_slab = bc.bc_slab; 1603 1604 /* 1605 * sometimes the first log entry matches the base bufctl; in that 1606 * case, skip the base bufctl. 1607 */ 1608 if (bc.bc_lastlog != NULL && 1609 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1610 bc.bc_addr == bcn.bc_addr && 1611 bc.bc_cache == bcn.bc_cache && 1612 bc.bc_slab == bcn.bc_slab && 1613 bc.bc_timestamp == bcn.bc_timestamp && 1614 bc.bc_thread == bcn.bc_thread) 1615 bhw->bhw_next = bc.bc_lastlog; 1616 else 1617 bhw->bhw_next = (void *)wsp->walk_addr; 1618 1619 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1620 wsp->walk_data = bhw; 1621 1622 return (WALK_NEXT); 1623 } 1624 1625 int 1626 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1627 { 1628 bufctl_history_walk_t *bhw = wsp->walk_data; 1629 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1630 uintptr_t baseaddr = wsp->walk_addr; 1631 kmem_bufctl_audit_t bc; 1632 1633 if (addr == NULL) 1634 return (WALK_DONE); 1635 1636 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1637 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1638 return (WALK_ERR); 1639 } 1640 1641 /* 1642 * The bufctl is only valid if the address, cache, and slab are 1643 * correct. We also check that the timestamp is decreasing, to 1644 * prevent infinite loops. 1645 */ 1646 if ((uintptr_t)bc.bc_addr != baseaddr || 1647 bc.bc_cache != bhw->bhw_cache || 1648 bc.bc_slab != bhw->bhw_slab || 1649 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1650 return (WALK_DONE); 1651 1652 bhw->bhw_next = bc.bc_lastlog; 1653 bhw->bhw_timestamp = bc.bc_timestamp; 1654 1655 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1656 } 1657 1658 void 1659 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1660 { 1661 bufctl_history_walk_t *bhw = wsp->walk_data; 1662 1663 mdb_free(bhw, sizeof (*bhw)); 1664 } 1665 1666 typedef struct kmem_log_walk { 1667 kmem_bufctl_audit_t *klw_base; 1668 kmem_bufctl_audit_t **klw_sorted; 1669 kmem_log_header_t klw_lh; 1670 size_t klw_size; 1671 size_t klw_maxndx; 1672 size_t klw_ndx; 1673 } kmem_log_walk_t; 1674 1675 int 1676 kmem_log_walk_init(mdb_walk_state_t *wsp) 1677 { 1678 uintptr_t lp = wsp->walk_addr; 1679 kmem_log_walk_t *klw; 1680 kmem_log_header_t *lhp; 1681 int maxndx, i, j, k; 1682 1683 /* 1684 * By default (global walk), walk the kmem_transaction_log. Otherwise 1685 * read the log whose kmem_log_header_t is stored at walk_addr. 1686 */ 1687 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1688 mdb_warn("failed to read 'kmem_transaction_log'"); 1689 return (WALK_ERR); 1690 } 1691 1692 if (lp == NULL) { 1693 mdb_warn("log is disabled\n"); 1694 return (WALK_ERR); 1695 } 1696 1697 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1698 lhp = &klw->klw_lh; 1699 1700 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1701 mdb_warn("failed to read log header at %p", lp); 1702 mdb_free(klw, sizeof (kmem_log_walk_t)); 1703 return (WALK_ERR); 1704 } 1705 1706 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1707 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1708 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1709 1710 if (mdb_vread(klw->klw_base, klw->klw_size, 1711 (uintptr_t)lhp->lh_base) == -1) { 1712 mdb_warn("failed to read log at base %p", lhp->lh_base); 1713 mdb_free(klw->klw_base, klw->klw_size); 1714 mdb_free(klw, sizeof (kmem_log_walk_t)); 1715 return (WALK_ERR); 1716 } 1717 1718 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1719 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1720 1721 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1722 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1723 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1724 1725 for (j = 0; j < maxndx; j++) 1726 klw->klw_sorted[k++] = &chunk[j]; 1727 } 1728 1729 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1730 (int(*)(const void *, const void *))bufctlcmp); 1731 1732 klw->klw_maxndx = k; 1733 wsp->walk_data = klw; 1734 1735 return (WALK_NEXT); 1736 } 1737 1738 int 1739 kmem_log_walk_step(mdb_walk_state_t *wsp) 1740 { 1741 kmem_log_walk_t *klw = wsp->walk_data; 1742 kmem_bufctl_audit_t *bcp; 1743 1744 if (klw->klw_ndx == klw->klw_maxndx) 1745 return (WALK_DONE); 1746 1747 bcp = klw->klw_sorted[klw->klw_ndx++]; 1748 1749 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1750 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1751 } 1752 1753 void 1754 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1755 { 1756 kmem_log_walk_t *klw = wsp->walk_data; 1757 1758 mdb_free(klw->klw_base, klw->klw_size); 1759 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1760 sizeof (kmem_bufctl_audit_t *)); 1761 mdb_free(klw, sizeof (kmem_log_walk_t)); 1762 } 1763 1764 typedef struct allocdby_bufctl { 1765 uintptr_t abb_addr; 1766 hrtime_t abb_ts; 1767 } allocdby_bufctl_t; 1768 1769 typedef struct allocdby_walk { 1770 const char *abw_walk; 1771 uintptr_t abw_thread; 1772 size_t abw_nbufs; 1773 size_t abw_size; 1774 allocdby_bufctl_t *abw_buf; 1775 size_t abw_ndx; 1776 } allocdby_walk_t; 1777 1778 int 1779 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1780 allocdby_walk_t *abw) 1781 { 1782 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1783 return (WALK_NEXT); 1784 1785 if (abw->abw_nbufs == abw->abw_size) { 1786 allocdby_bufctl_t *buf; 1787 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1788 1789 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1790 1791 bcopy(abw->abw_buf, buf, oldsize); 1792 mdb_free(abw->abw_buf, oldsize); 1793 1794 abw->abw_size <<= 1; 1795 abw->abw_buf = buf; 1796 } 1797 1798 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1799 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1800 abw->abw_nbufs++; 1801 1802 return (WALK_NEXT); 1803 } 1804 1805 /*ARGSUSED*/ 1806 int 1807 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1808 { 1809 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1810 abw, addr) == -1) { 1811 mdb_warn("couldn't walk bufctl for cache %p", addr); 1812 return (WALK_DONE); 1813 } 1814 1815 return (WALK_NEXT); 1816 } 1817 1818 static int 1819 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1820 { 1821 if (lhs->abb_ts < rhs->abb_ts) 1822 return (1); 1823 if (lhs->abb_ts > rhs->abb_ts) 1824 return (-1); 1825 return (0); 1826 } 1827 1828 static int 1829 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1830 { 1831 allocdby_walk_t *abw; 1832 1833 if (wsp->walk_addr == NULL) { 1834 mdb_warn("allocdby walk doesn't support global walks\n"); 1835 return (WALK_ERR); 1836 } 1837 1838 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1839 1840 abw->abw_thread = wsp->walk_addr; 1841 abw->abw_walk = walk; 1842 abw->abw_size = 128; /* something reasonable */ 1843 abw->abw_buf = 1844 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1845 1846 wsp->walk_data = abw; 1847 1848 if (mdb_walk("kmem_cache", 1849 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1850 mdb_warn("couldn't walk kmem_cache"); 1851 allocdby_walk_fini(wsp); 1852 return (WALK_ERR); 1853 } 1854 1855 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1856 (int(*)(const void *, const void *))allocdby_cmp); 1857 1858 return (WALK_NEXT); 1859 } 1860 1861 int 1862 allocdby_walk_init(mdb_walk_state_t *wsp) 1863 { 1864 return (allocdby_walk_init_common(wsp, "bufctl")); 1865 } 1866 1867 int 1868 freedby_walk_init(mdb_walk_state_t *wsp) 1869 { 1870 return (allocdby_walk_init_common(wsp, "freectl")); 1871 } 1872 1873 int 1874 allocdby_walk_step(mdb_walk_state_t *wsp) 1875 { 1876 allocdby_walk_t *abw = wsp->walk_data; 1877 kmem_bufctl_audit_t bc; 1878 uintptr_t addr; 1879 1880 if (abw->abw_ndx == abw->abw_nbufs) 1881 return (WALK_DONE); 1882 1883 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1884 1885 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1886 mdb_warn("couldn't read bufctl at %p", addr); 1887 return (WALK_DONE); 1888 } 1889 1890 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1891 } 1892 1893 void 1894 allocdby_walk_fini(mdb_walk_state_t *wsp) 1895 { 1896 allocdby_walk_t *abw = wsp->walk_data; 1897 1898 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1899 mdb_free(abw, sizeof (allocdby_walk_t)); 1900 } 1901 1902 /*ARGSUSED*/ 1903 int 1904 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 1905 { 1906 char c[MDB_SYM_NAMLEN]; 1907 GElf_Sym sym; 1908 int i; 1909 1910 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 1911 for (i = 0; i < bcp->bc_depth; i++) { 1912 if (mdb_lookup_by_addr(bcp->bc_stack[i], 1913 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 1914 continue; 1915 if (strncmp(c, "kmem_", 5) == 0) 1916 continue; 1917 mdb_printf("%s+0x%lx", 1918 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 1919 break; 1920 } 1921 mdb_printf("\n"); 1922 1923 return (WALK_NEXT); 1924 } 1925 1926 static int 1927 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 1928 { 1929 if (!(flags & DCMD_ADDRSPEC)) 1930 return (DCMD_USAGE); 1931 1932 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 1933 1934 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 1935 mdb_warn("can't walk '%s' for %p", w, addr); 1936 return (DCMD_ERR); 1937 } 1938 1939 return (DCMD_OK); 1940 } 1941 1942 /*ARGSUSED*/ 1943 int 1944 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1945 { 1946 return (allocdby_common(addr, flags, "allocdby")); 1947 } 1948 1949 /*ARGSUSED*/ 1950 int 1951 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1952 { 1953 return (allocdby_common(addr, flags, "freedby")); 1954 } 1955 1956 /* 1957 * Return a string describing the address in relation to the given thread's 1958 * stack. 1959 * 1960 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 1961 * 1962 * - If the address is above the stack pointer, return an empty string 1963 * signifying that the address is active. 1964 * 1965 * - If the address is below the stack pointer, and the thread is not on proc, 1966 * return " (below sp)". 1967 * 1968 * - If the address is below the stack pointer, and the thread is on proc, 1969 * return " (possibly below sp)". Depending on context, we may or may not 1970 * have an accurate t_sp. 1971 */ 1972 static const char * 1973 stack_active(const kthread_t *t, uintptr_t addr) 1974 { 1975 uintptr_t panicstk; 1976 GElf_Sym sym; 1977 1978 if (t->t_state == TS_FREE) 1979 return (" (inactive interrupt thread)"); 1980 1981 /* 1982 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 1983 * no longer relates to the thread's real stack. 1984 */ 1985 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 1986 panicstk = (uintptr_t)sym.st_value; 1987 1988 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 1989 return (""); 1990 } 1991 1992 if (addr >= t->t_sp + STACK_BIAS) 1993 return (""); 1994 1995 if (t->t_state == TS_ONPROC) 1996 return (" (possibly below sp)"); 1997 1998 return (" (below sp)"); 1999 } 2000 2001 typedef struct whatis { 2002 uintptr_t w_addr; 2003 const kmem_cache_t *w_cache; 2004 const vmem_t *w_vmem; 2005 size_t w_slab_align; 2006 int w_slab_found; 2007 int w_found; 2008 int w_kmem_lite_count; 2009 uint_t w_verbose; 2010 uint_t w_freemem; 2011 uint_t w_all; 2012 uint_t w_bufctl; 2013 uint_t w_idspace; 2014 } whatis_t; 2015 2016 static void 2017 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 2018 { 2019 /* LINTED pointer cast may result in improper alignment */ 2020 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr); 2021 intptr_t stat; 2022 int count = 0; 2023 int i; 2024 pc_t callers[16]; 2025 2026 if (w->w_cache->cache_flags & KMF_REDZONE) { 2027 kmem_buftag_t bt; 2028 2029 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 2030 goto done; 2031 2032 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 2033 2034 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 2035 goto done; 2036 2037 /* 2038 * provide the bufctl ptr if it has useful information 2039 */ 2040 if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT)) 2041 baddr = (uintptr_t)bt.bt_bufctl; 2042 2043 if (w->w_cache->cache_flags & KMF_LITE) { 2044 count = w->w_kmem_lite_count; 2045 2046 if (count * sizeof (pc_t) > sizeof (callers)) 2047 count = 0; 2048 2049 if (count > 0 && 2050 mdb_vread(callers, count * sizeof (pc_t), 2051 btaddr + 2052 offsetof(kmem_buftag_lite_t, bt_history)) == -1) 2053 count = 0; 2054 2055 /* 2056 * skip unused callers 2057 */ 2058 while (count > 0 && callers[count - 1] == 2059 (pc_t)KMEM_UNINITIALIZED_PATTERN) 2060 count--; 2061 } 2062 } 2063 2064 done: 2065 if (baddr == 0) 2066 mdb_printf("%p is %p+%p, %s from %s\n", 2067 w->w_addr, addr, w->w_addr - addr, 2068 w->w_freemem == FALSE ? "allocated" : "freed", 2069 w->w_cache->cache_name); 2070 else 2071 mdb_printf("%p is %p+%p, bufctl %p %s from %s\n", 2072 w->w_addr, addr, w->w_addr - addr, baddr, 2073 w->w_freemem == FALSE ? "allocated" : "freed", 2074 w->w_cache->cache_name); 2075 2076 if (count > 0) { 2077 mdb_inc_indent(8); 2078 mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"", 2079 callers[0], (count != 1)? ", ":"\n"); 2080 for (i = 1; i < count; i++) 2081 mdb_printf("%a%s", callers[i], 2082 (i + 1 < count)? ", ":"\n"); 2083 mdb_dec_indent(8); 2084 } 2085 } 2086 2087 /*ARGSUSED*/ 2088 static int 2089 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w) 2090 { 2091 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2092 return (WALK_NEXT); 2093 2094 whatis_print_kmem(addr, 0, w); 2095 w->w_found++; 2096 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2097 } 2098 2099 static int 2100 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 2101 { 2102 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 2103 return (WALK_NEXT); 2104 2105 mdb_printf("%p is %p+%p ", w->w_addr, 2106 vs->vs_start, w->w_addr - vs->vs_start); 2107 2108 /* 2109 * Always provide the vmem_seg pointer if it has a stack trace. 2110 */ 2111 if (w->w_bufctl == TRUE || 2112 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) { 2113 mdb_printf("(vmem_seg %p) ", addr); 2114 } 2115 2116 mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ? 2117 "freed " : "", w->w_vmem->vm_name); 2118 2119 w->w_found++; 2120 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2121 } 2122 2123 static int 2124 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 2125 { 2126 const char *nm = vmem->vm_name; 2127 w->w_vmem = vmem; 2128 w->w_freemem = FALSE; 2129 2130 if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2131 return (WALK_NEXT); 2132 2133 if (w->w_verbose) 2134 mdb_printf("Searching vmem arena %s...\n", nm); 2135 2136 if (mdb_pwalk("vmem_alloc", 2137 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2138 mdb_warn("can't walk vmem seg for %p", addr); 2139 return (WALK_NEXT); 2140 } 2141 2142 if (w->w_found && w->w_all == FALSE) 2143 return (WALK_DONE); 2144 2145 if (w->w_verbose) 2146 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 2147 2148 w->w_freemem = TRUE; 2149 2150 if (mdb_pwalk("vmem_free", 2151 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 2152 mdb_warn("can't walk vmem seg for %p", addr); 2153 return (WALK_NEXT); 2154 } 2155 2156 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2157 } 2158 2159 /*ARGSUSED*/ 2160 static int 2161 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w) 2162 { 2163 uintptr_t addr; 2164 2165 if (bcp == NULL) 2166 return (WALK_NEXT); 2167 2168 addr = (uintptr_t)bcp->bc_addr; 2169 2170 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 2171 return (WALK_NEXT); 2172 2173 whatis_print_kmem(addr, baddr, w); 2174 w->w_found++; 2175 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2176 } 2177 2178 /*ARGSUSED*/ 2179 static int 2180 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w) 2181 { 2182 uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align); 2183 2184 if ((w->w_addr - base) >= w->w_cache->cache_slabsize) 2185 return (WALK_NEXT); 2186 2187 w->w_slab_found++; 2188 return (WALK_DONE); 2189 } 2190 2191 static int 2192 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2193 { 2194 char *walk, *freewalk; 2195 mdb_walk_cb_t func; 2196 vmem_t *vmp = c->cache_arena; 2197 2198 if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 2199 return (WALK_NEXT); 2200 2201 if (w->w_bufctl == FALSE) { 2202 walk = "kmem"; 2203 freewalk = "freemem"; 2204 func = (mdb_walk_cb_t)whatis_walk_kmem; 2205 } else { 2206 walk = "bufctl"; 2207 freewalk = "freectl"; 2208 func = (mdb_walk_cb_t)whatis_walk_bufctl; 2209 } 2210 2211 w->w_cache = c; 2212 2213 if (w->w_verbose) 2214 mdb_printf("Searching %s's slabs...\n", c->cache_name); 2215 2216 /* 2217 * Verify that the address is in one of the cache's slabs. If not, 2218 * we can skip the more expensive walkers. (this is purely a 2219 * heuristic -- as long as there are no false-negatives, we'll be fine) 2220 * 2221 * We try to get the cache's arena's quantum, since to accurately 2222 * get the base of a slab, you have to align it to the quantum. If 2223 * it doesn't look sensible, we fall back to not aligning. 2224 */ 2225 if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align), 2226 (uintptr_t)&vmp->vm_quantum) == -1) { 2227 mdb_warn("unable to read %p->cache_arena->vm_quantum", c); 2228 w->w_slab_align = 1; 2229 } 2230 2231 if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 || 2232 (w->w_slab_align & (w->w_slab_align - 1))) { 2233 mdb_warn("%p's arena has invalid quantum (0x%p)\n", c, 2234 w->w_slab_align); 2235 w->w_slab_align = 1; 2236 } 2237 2238 w->w_slab_found = 0; 2239 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w, 2240 addr) == -1) { 2241 mdb_warn("can't find kmem_slab walker"); 2242 return (WALK_DONE); 2243 } 2244 if (w->w_slab_found == 0) 2245 return (WALK_NEXT); 2246 2247 if (c->cache_flags & KMF_LITE) { 2248 if (mdb_readvar(&w->w_kmem_lite_count, 2249 "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16) 2250 w->w_kmem_lite_count = 0; 2251 } 2252 2253 if (w->w_verbose) 2254 mdb_printf("Searching %s...\n", c->cache_name); 2255 2256 w->w_freemem = FALSE; 2257 2258 if (mdb_pwalk(walk, func, w, addr) == -1) { 2259 mdb_warn("can't find %s walker", walk); 2260 return (WALK_DONE); 2261 } 2262 2263 if (w->w_found && w->w_all == FALSE) 2264 return (WALK_DONE); 2265 2266 /* 2267 * We have searched for allocated memory; now search for freed memory. 2268 */ 2269 if (w->w_verbose) 2270 mdb_printf("Searching %s for free memory...\n", c->cache_name); 2271 2272 w->w_freemem = TRUE; 2273 2274 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 2275 mdb_warn("can't find %s walker", freewalk); 2276 return (WALK_DONE); 2277 } 2278 2279 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 2280 } 2281 2282 static int 2283 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2284 { 2285 if (c->cache_cflags & KMC_NOTOUCH) 2286 return (WALK_NEXT); 2287 2288 return (whatis_walk_cache(addr, c, w)); 2289 } 2290 2291 static int 2292 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 2293 { 2294 if (!(c->cache_cflags & KMC_NOTOUCH)) 2295 return (WALK_NEXT); 2296 2297 return (whatis_walk_cache(addr, c, w)); 2298 } 2299 2300 static int 2301 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w) 2302 { 2303 /* 2304 * Often, one calls ::whatis on an address from a thread structure. 2305 * We use this opportunity to short circuit this case... 2306 */ 2307 if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) { 2308 mdb_printf("%p is %p+%p, allocated as a thread structure\n", 2309 w->w_addr, addr, w->w_addr - addr); 2310 w->w_found++; 2311 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2312 } 2313 2314 if (w->w_addr < (uintptr_t)t->t_stkbase || 2315 w->w_addr > (uintptr_t)t->t_stk) 2316 return (WALK_NEXT); 2317 2318 if (t->t_stkbase == NULL) 2319 return (WALK_NEXT); 2320 2321 mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr, 2322 stack_active(t, w->w_addr)); 2323 2324 w->w_found++; 2325 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2326 } 2327 2328 static int 2329 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w) 2330 { 2331 struct module mod; 2332 char name[MODMAXNAMELEN], *where; 2333 char c[MDB_SYM_NAMLEN]; 2334 Shdr shdr; 2335 GElf_Sym sym; 2336 2337 if (m->mod_mp == NULL) 2338 return (WALK_NEXT); 2339 2340 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 2341 mdb_warn("couldn't read modctl %p's module", addr); 2342 return (WALK_NEXT); 2343 } 2344 2345 if (w->w_addr >= (uintptr_t)mod.text && 2346 w->w_addr < (uintptr_t)mod.text + mod.text_size) { 2347 where = "text segment"; 2348 goto found; 2349 } 2350 2351 if (w->w_addr >= (uintptr_t)mod.data && 2352 w->w_addr < (uintptr_t)mod.data + mod.data_size) { 2353 where = "data segment"; 2354 goto found; 2355 } 2356 2357 if (w->w_addr >= (uintptr_t)mod.bss && 2358 w->w_addr < (uintptr_t)mod.bss + mod.bss_size) { 2359 where = "bss"; 2360 goto found; 2361 } 2362 2363 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 2364 mdb_warn("couldn't read symbol header for %p's module", addr); 2365 return (WALK_NEXT); 2366 } 2367 2368 if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr < 2369 (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) { 2370 where = "symtab"; 2371 goto found; 2372 } 2373 2374 if (w->w_addr >= (uintptr_t)mod.symspace && 2375 w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) { 2376 where = "symspace"; 2377 goto found; 2378 } 2379 2380 return (WALK_NEXT); 2381 2382 found: 2383 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2384 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2385 2386 mdb_printf("%p is ", w->w_addr); 2387 2388 /* 2389 * If we found this address in a module, then there's a chance that 2390 * it's actually a named symbol. Try the symbol lookup. 2391 */ 2392 if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c), 2393 &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value && 2394 w->w_addr < (uintptr_t)sym.st_value + sym.st_size) { 2395 mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value); 2396 } 2397 2398 mdb_printf("in %s's %s\n", name, where); 2399 2400 w->w_found++; 2401 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2402 } 2403 2404 /*ARGSUSED*/ 2405 static int 2406 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w) 2407 { 2408 static int machsize = 0; 2409 mdb_ctf_id_t id; 2410 2411 if (machsize == 0) { 2412 if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0) 2413 machsize = mdb_ctf_type_size(id); 2414 else { 2415 mdb_warn("could not get size of page_t"); 2416 machsize = sizeof (page_t); 2417 } 2418 } 2419 2420 if (w->w_addr < addr || w->w_addr >= addr + machsize) 2421 return (WALK_NEXT); 2422 2423 mdb_printf("%p is %p+%p, allocated as a page structure\n", 2424 w->w_addr, addr, w->w_addr - addr); 2425 2426 w->w_found++; 2427 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2428 } 2429 2430 int 2431 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2432 { 2433 whatis_t w; 2434 2435 if (!(flags & DCMD_ADDRSPEC)) 2436 return (DCMD_USAGE); 2437 2438 w.w_verbose = FALSE; 2439 w.w_bufctl = FALSE; 2440 w.w_all = FALSE; 2441 w.w_idspace = FALSE; 2442 2443 if (mdb_getopts(argc, argv, 2444 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2445 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2446 'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace, 2447 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc) 2448 return (DCMD_USAGE); 2449 2450 w.w_addr = addr; 2451 w.w_found = 0; 2452 2453 if (w.w_verbose) 2454 mdb_printf("Searching modules...\n"); 2455 2456 if (!w.w_idspace) { 2457 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w) 2458 == -1) { 2459 mdb_warn("couldn't find modctl walker"); 2460 return (DCMD_ERR); 2461 } 2462 2463 if (w.w_found && w.w_all == FALSE) 2464 return (DCMD_OK); 2465 2466 /* 2467 * Now search all thread stacks. Yes, this is a little weak; we 2468 * can save a lot of work by first checking to see if the 2469 * address is in segkp vs. segkmem. But hey, computers are 2470 * fast. 2471 */ 2472 if (w.w_verbose) 2473 mdb_printf("Searching threads...\n"); 2474 2475 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w) 2476 == -1) { 2477 mdb_warn("couldn't find thread walker"); 2478 return (DCMD_ERR); 2479 } 2480 2481 if (w.w_found && w.w_all == FALSE) 2482 return (DCMD_OK); 2483 2484 if (w.w_verbose) 2485 mdb_printf("Searching page structures...\n"); 2486 2487 if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w) 2488 == -1) { 2489 mdb_warn("couldn't find page walker"); 2490 return (DCMD_ERR); 2491 } 2492 2493 if (w.w_found && w.w_all == FALSE) 2494 return (DCMD_OK); 2495 } 2496 2497 if (mdb_walk("kmem_cache", 2498 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2499 mdb_warn("couldn't find kmem_cache walker"); 2500 return (DCMD_ERR); 2501 } 2502 2503 if (w.w_found && w.w_all == FALSE) 2504 return (DCMD_OK); 2505 2506 if (mdb_walk("kmem_cache", 2507 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2508 mdb_warn("couldn't find kmem_cache walker"); 2509 return (DCMD_ERR); 2510 } 2511 2512 if (w.w_found && w.w_all == FALSE) 2513 return (DCMD_OK); 2514 2515 if (mdb_walk("vmem_postfix", 2516 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2517 mdb_warn("couldn't find vmem_postfix walker"); 2518 return (DCMD_ERR); 2519 } 2520 2521 if (w.w_found == 0) 2522 mdb_printf("%p is unknown\n", addr); 2523 2524 return (DCMD_OK); 2525 } 2526 2527 void 2528 whatis_help(void) 2529 { 2530 mdb_printf( 2531 "Given a virtual address, attempt to determine where it came\n" 2532 "from.\n" 2533 "\n" 2534 "\t-v\tVerbose output; display caches/arenas/etc as they are\n" 2535 "\t\tsearched\n" 2536 "\t-a\tFind all possible sources. Default behavior is to stop at\n" 2537 "\t\tthe first (most specific) source.\n" 2538 "\t-i\tSearch only identifier arenas and caches. By default\n" 2539 "\t\tthese are ignored.\n" 2540 "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n" 2541 "\t\trespectively. Warning: if the buffer exists, but does not\n" 2542 "\t\thave a bufctl, it will not be reported.\n"); 2543 } 2544 2545 typedef struct kmem_log_cpu { 2546 uintptr_t kmc_low; 2547 uintptr_t kmc_high; 2548 } kmem_log_cpu_t; 2549 2550 typedef struct kmem_log_data { 2551 uintptr_t kmd_addr; 2552 kmem_log_cpu_t *kmd_cpu; 2553 } kmem_log_data_t; 2554 2555 int 2556 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2557 kmem_log_data_t *kmd) 2558 { 2559 int i; 2560 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2561 size_t bufsize; 2562 2563 for (i = 0; i < NCPU; i++) { 2564 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2565 break; 2566 } 2567 2568 if (kmd->kmd_addr) { 2569 if (b->bc_cache == NULL) 2570 return (WALK_NEXT); 2571 2572 if (mdb_vread(&bufsize, sizeof (bufsize), 2573 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2574 mdb_warn( 2575 "failed to read cache_bufsize for cache at %p", 2576 b->bc_cache); 2577 return (WALK_ERR); 2578 } 2579 2580 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2581 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2582 return (WALK_NEXT); 2583 } 2584 2585 if (i == NCPU) 2586 mdb_printf(" "); 2587 else 2588 mdb_printf("%3d", i); 2589 2590 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2591 b->bc_timestamp, b->bc_thread); 2592 2593 return (WALK_NEXT); 2594 } 2595 2596 /*ARGSUSED*/ 2597 int 2598 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2599 { 2600 kmem_log_header_t lh; 2601 kmem_cpu_log_header_t clh; 2602 uintptr_t lhp, clhp; 2603 int ncpus; 2604 uintptr_t *cpu; 2605 GElf_Sym sym; 2606 kmem_log_cpu_t *kmc; 2607 int i; 2608 kmem_log_data_t kmd; 2609 uint_t opt_b = FALSE; 2610 2611 if (mdb_getopts(argc, argv, 2612 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2613 return (DCMD_USAGE); 2614 2615 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2616 mdb_warn("failed to read 'kmem_transaction_log'"); 2617 return (DCMD_ERR); 2618 } 2619 2620 if (lhp == NULL) { 2621 mdb_warn("no kmem transaction log\n"); 2622 return (DCMD_ERR); 2623 } 2624 2625 mdb_readvar(&ncpus, "ncpus"); 2626 2627 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2628 mdb_warn("failed to read log header at %p", lhp); 2629 return (DCMD_ERR); 2630 } 2631 2632 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2633 2634 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2635 2636 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2637 mdb_warn("couldn't find 'cpu' array"); 2638 return (DCMD_ERR); 2639 } 2640 2641 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2642 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2643 NCPU * sizeof (uintptr_t), sym.st_size); 2644 return (DCMD_ERR); 2645 } 2646 2647 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2648 mdb_warn("failed to read cpu array at %p", sym.st_value); 2649 return (DCMD_ERR); 2650 } 2651 2652 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2653 kmd.kmd_addr = NULL; 2654 kmd.kmd_cpu = kmc; 2655 2656 for (i = 0; i < NCPU; i++) { 2657 2658 if (cpu[i] == NULL) 2659 continue; 2660 2661 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2662 mdb_warn("cannot read cpu %d's log header at %p", 2663 i, clhp); 2664 return (DCMD_ERR); 2665 } 2666 2667 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2668 (uintptr_t)lh.lh_base; 2669 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2670 2671 clhp += sizeof (kmem_cpu_log_header_t); 2672 } 2673 2674 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2675 "TIMESTAMP", "THREAD"); 2676 2677 /* 2678 * If we have been passed an address, print out only log entries 2679 * corresponding to that address. If opt_b is specified, then interpret 2680 * the address as a bufctl. 2681 */ 2682 if (flags & DCMD_ADDRSPEC) { 2683 kmem_bufctl_audit_t b; 2684 2685 if (opt_b) { 2686 kmd.kmd_addr = addr; 2687 } else { 2688 if (mdb_vread(&b, 2689 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2690 mdb_warn("failed to read bufctl at %p", addr); 2691 return (DCMD_ERR); 2692 } 2693 2694 (void) kmem_log_walk(addr, &b, &kmd); 2695 2696 return (DCMD_OK); 2697 } 2698 } 2699 2700 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2701 mdb_warn("can't find kmem log walker"); 2702 return (DCMD_ERR); 2703 } 2704 2705 return (DCMD_OK); 2706 } 2707 2708 typedef struct bufctl_history_cb { 2709 int bhc_flags; 2710 int bhc_argc; 2711 const mdb_arg_t *bhc_argv; 2712 int bhc_ret; 2713 } bufctl_history_cb_t; 2714 2715 /*ARGSUSED*/ 2716 static int 2717 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2718 { 2719 bufctl_history_cb_t *bhc = arg; 2720 2721 bhc->bhc_ret = 2722 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2723 2724 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2725 2726 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2727 } 2728 2729 void 2730 bufctl_help(void) 2731 { 2732 mdb_printf("%s\n", 2733 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n"); 2734 mdb_dec_indent(2); 2735 mdb_printf("%<b>OPTIONS%</b>\n"); 2736 mdb_inc_indent(2); 2737 mdb_printf("%s", 2738 " -v Display the full content of the bufctl, including its stack trace\n" 2739 " -h retrieve the bufctl's transaction history, if available\n" 2740 " -a addr\n" 2741 " filter out bufctls not involving the buffer at addr\n" 2742 " -c caller\n" 2743 " filter out bufctls without the function/PC in their stack trace\n" 2744 " -e earliest\n" 2745 " filter out bufctls timestamped before earliest\n" 2746 " -l latest\n" 2747 " filter out bufctls timestamped after latest\n" 2748 " -t thread\n" 2749 " filter out bufctls not involving thread\n"); 2750 } 2751 2752 int 2753 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2754 { 2755 kmem_bufctl_audit_t bc; 2756 uint_t verbose = FALSE; 2757 uint_t history = FALSE; 2758 uint_t in_history = FALSE; 2759 uintptr_t caller = NULL, thread = NULL; 2760 uintptr_t laddr, haddr, baddr = NULL; 2761 hrtime_t earliest = 0, latest = 0; 2762 int i, depth; 2763 char c[MDB_SYM_NAMLEN]; 2764 GElf_Sym sym; 2765 2766 if (mdb_getopts(argc, argv, 2767 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2768 'h', MDB_OPT_SETBITS, TRUE, &history, 2769 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2770 'c', MDB_OPT_UINTPTR, &caller, 2771 't', MDB_OPT_UINTPTR, &thread, 2772 'e', MDB_OPT_UINT64, &earliest, 2773 'l', MDB_OPT_UINT64, &latest, 2774 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2775 return (DCMD_USAGE); 2776 2777 if (!(flags & DCMD_ADDRSPEC)) 2778 return (DCMD_USAGE); 2779 2780 if (in_history && !history) 2781 return (DCMD_USAGE); 2782 2783 if (history && !in_history) { 2784 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2785 UM_SLEEP | UM_GC); 2786 bufctl_history_cb_t bhc; 2787 2788 nargv[0].a_type = MDB_TYPE_STRING; 2789 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2790 2791 for (i = 0; i < argc; i++) 2792 nargv[i + 1] = argv[i]; 2793 2794 /* 2795 * When in history mode, we treat each element as if it 2796 * were in a seperate loop, so that the headers group 2797 * bufctls with similar histories. 2798 */ 2799 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2800 bhc.bhc_argc = argc + 1; 2801 bhc.bhc_argv = nargv; 2802 bhc.bhc_ret = DCMD_OK; 2803 2804 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2805 addr) == -1) { 2806 mdb_warn("unable to walk bufctl_history"); 2807 return (DCMD_ERR); 2808 } 2809 2810 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2811 mdb_printf("\n"); 2812 2813 return (bhc.bhc_ret); 2814 } 2815 2816 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2817 if (verbose) { 2818 mdb_printf("%16s %16s %16s %16s\n" 2819 "%<u>%16s %16s %16s %16s%</u>\n", 2820 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2821 "", "CACHE", "LASTLOG", "CONTENTS"); 2822 } else { 2823 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2824 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2825 } 2826 } 2827 2828 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2829 mdb_warn("couldn't read bufctl at %p", addr); 2830 return (DCMD_ERR); 2831 } 2832 2833 /* 2834 * Guard against bogus bc_depth in case the bufctl is corrupt or 2835 * the address does not really refer to a bufctl. 2836 */ 2837 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2838 2839 if (caller != NULL) { 2840 laddr = caller; 2841 haddr = caller + sizeof (caller); 2842 2843 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2844 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2845 /* 2846 * We were provided an exact symbol value; any 2847 * address in the function is valid. 2848 */ 2849 laddr = (uintptr_t)sym.st_value; 2850 haddr = (uintptr_t)sym.st_value + sym.st_size; 2851 } 2852 2853 for (i = 0; i < depth; i++) 2854 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2855 break; 2856 2857 if (i == depth) 2858 return (DCMD_OK); 2859 } 2860 2861 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2862 return (DCMD_OK); 2863 2864 if (earliest != 0 && bc.bc_timestamp < earliest) 2865 return (DCMD_OK); 2866 2867 if (latest != 0 && bc.bc_timestamp > latest) 2868 return (DCMD_OK); 2869 2870 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2871 return (DCMD_OK); 2872 2873 if (flags & DCMD_PIPE_OUT) { 2874 mdb_printf("%#lr\n", addr); 2875 return (DCMD_OK); 2876 } 2877 2878 if (verbose) { 2879 mdb_printf( 2880 "%<b>%16p%</b> %16p %16llx %16p\n" 2881 "%16s %16p %16p %16p\n", 2882 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2883 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2884 2885 mdb_inc_indent(17); 2886 for (i = 0; i < depth; i++) 2887 mdb_printf("%a\n", bc.bc_stack[i]); 2888 mdb_dec_indent(17); 2889 mdb_printf("\n"); 2890 } else { 2891 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2892 bc.bc_timestamp, bc.bc_thread); 2893 2894 for (i = 0; i < depth; i++) { 2895 if (mdb_lookup_by_addr(bc.bc_stack[i], 2896 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2897 continue; 2898 if (strncmp(c, "kmem_", 5) == 0) 2899 continue; 2900 mdb_printf(" %a\n", bc.bc_stack[i]); 2901 break; 2902 } 2903 2904 if (i >= depth) 2905 mdb_printf("\n"); 2906 } 2907 2908 return (DCMD_OK); 2909 } 2910 2911 typedef struct kmem_verify { 2912 uint64_t *kmv_buf; /* buffer to read cache contents into */ 2913 size_t kmv_size; /* number of bytes in kmv_buf */ 2914 int kmv_corruption; /* > 0 if corruption found. */ 2915 int kmv_besilent; /* report actual corruption sites */ 2916 struct kmem_cache kmv_cache; /* the cache we're operating on */ 2917 } kmem_verify_t; 2918 2919 /* 2920 * verify_pattern() 2921 * verify that buf is filled with the pattern pat. 2922 */ 2923 static int64_t 2924 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2925 { 2926 /*LINTED*/ 2927 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 2928 uint64_t *buf; 2929 2930 for (buf = buf_arg; buf < bufend; buf++) 2931 if (*buf != pat) 2932 return ((uintptr_t)buf - (uintptr_t)buf_arg); 2933 return (-1); 2934 } 2935 2936 /* 2937 * verify_buftag() 2938 * verify that btp->bt_bxstat == (bcp ^ pat) 2939 */ 2940 static int 2941 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 2942 { 2943 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 2944 } 2945 2946 /* 2947 * verify_free() 2948 * verify the integrity of a free block of memory by checking 2949 * that it is filled with 0xdeadbeef and that its buftag is sane. 2950 */ 2951 /*ARGSUSED1*/ 2952 static int 2953 verify_free(uintptr_t addr, const void *data, void *private) 2954 { 2955 kmem_verify_t *kmv = (kmem_verify_t *)private; 2956 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 2957 int64_t corrupt; /* corruption offset */ 2958 kmem_buftag_t *buftagp; /* ptr to buftag */ 2959 kmem_cache_t *cp = &kmv->kmv_cache; 2960 int besilent = kmv->kmv_besilent; 2961 2962 /*LINTED*/ 2963 buftagp = KMEM_BUFTAG(cp, buf); 2964 2965 /* 2966 * Read the buffer to check. 2967 */ 2968 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 2969 if (!besilent) 2970 mdb_warn("couldn't read %p", addr); 2971 return (WALK_NEXT); 2972 } 2973 2974 if ((corrupt = verify_pattern(buf, cp->cache_verify, 2975 KMEM_FREE_PATTERN)) >= 0) { 2976 if (!besilent) 2977 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 2978 addr, (uintptr_t)addr + corrupt); 2979 goto corrupt; 2980 } 2981 /* 2982 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 2983 * the first bytes of the buffer, hence we cannot check for red 2984 * zone corruption. 2985 */ 2986 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 2987 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 2988 if (!besilent) 2989 mdb_printf("buffer %p (free) seems to " 2990 "have a corrupt redzone pattern\n", addr); 2991 goto corrupt; 2992 } 2993 2994 /* 2995 * confirm bufctl pointer integrity. 2996 */ 2997 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 2998 if (!besilent) 2999 mdb_printf("buffer %p (free) has a corrupt " 3000 "buftag\n", addr); 3001 goto corrupt; 3002 } 3003 3004 return (WALK_NEXT); 3005 corrupt: 3006 kmv->kmv_corruption++; 3007 return (WALK_NEXT); 3008 } 3009 3010 /* 3011 * verify_alloc() 3012 * Verify that the buftag of an allocated buffer makes sense with respect 3013 * to the buffer. 3014 */ 3015 /*ARGSUSED1*/ 3016 static int 3017 verify_alloc(uintptr_t addr, const void *data, void *private) 3018 { 3019 kmem_verify_t *kmv = (kmem_verify_t *)private; 3020 kmem_cache_t *cp = &kmv->kmv_cache; 3021 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 3022 /*LINTED*/ 3023 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 3024 uint32_t *ip = (uint32_t *)buftagp; 3025 uint8_t *bp = (uint8_t *)buf; 3026 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 3027 int besilent = kmv->kmv_besilent; 3028 3029 /* 3030 * Read the buffer to check. 3031 */ 3032 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 3033 if (!besilent) 3034 mdb_warn("couldn't read %p", addr); 3035 return (WALK_NEXT); 3036 } 3037 3038 /* 3039 * There are two cases to handle: 3040 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 3041 * 0xfeedfacefeedface at the end of it 3042 * 2. If the buf was alloc'd using kmem_alloc, it will have 3043 * 0xbb just past the end of the region in use. At the buftag, 3044 * it will have 0xfeedface (or, if the whole buffer is in use, 3045 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 3046 * endianness), followed by 32 bits containing the offset of the 3047 * 0xbb byte in the buffer. 3048 * 3049 * Finally, the two 32-bit words that comprise the second half of the 3050 * buftag should xor to KMEM_BUFTAG_ALLOC 3051 */ 3052 3053 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 3054 looks_ok = 1; 3055 else if (!KMEM_SIZE_VALID(ip[1])) 3056 size_ok = 0; 3057 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 3058 looks_ok = 1; 3059 else 3060 size_ok = 0; 3061 3062 if (!size_ok) { 3063 if (!besilent) 3064 mdb_printf("buffer %p (allocated) has a corrupt " 3065 "redzone size encoding\n", addr); 3066 goto corrupt; 3067 } 3068 3069 if (!looks_ok) { 3070 if (!besilent) 3071 mdb_printf("buffer %p (allocated) has a corrupt " 3072 "redzone signature\n", addr); 3073 goto corrupt; 3074 } 3075 3076 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 3077 if (!besilent) 3078 mdb_printf("buffer %p (allocated) has a " 3079 "corrupt buftag\n", addr); 3080 goto corrupt; 3081 } 3082 3083 return (WALK_NEXT); 3084 corrupt: 3085 kmv->kmv_corruption++; 3086 return (WALK_NEXT); 3087 } 3088 3089 /*ARGSUSED2*/ 3090 int 3091 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3092 { 3093 if (flags & DCMD_ADDRSPEC) { 3094 int check_alloc = 0, check_free = 0; 3095 kmem_verify_t kmv; 3096 3097 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 3098 addr) == -1) { 3099 mdb_warn("couldn't read kmem_cache %p", addr); 3100 return (DCMD_ERR); 3101 } 3102 3103 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 3104 sizeof (kmem_buftag_t); 3105 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 3106 kmv.kmv_corruption = 0; 3107 3108 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 3109 check_alloc = 1; 3110 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 3111 check_free = 1; 3112 } else { 3113 if (!(flags & DCMD_LOOP)) { 3114 mdb_warn("cache %p (%s) does not have " 3115 "redzone checking enabled\n", addr, 3116 kmv.kmv_cache.cache_name); 3117 } 3118 return (DCMD_ERR); 3119 } 3120 3121 if (flags & DCMD_LOOP) { 3122 /* 3123 * table mode, don't print out every corrupt buffer 3124 */ 3125 kmv.kmv_besilent = 1; 3126 } else { 3127 mdb_printf("Summary for cache '%s'\n", 3128 kmv.kmv_cache.cache_name); 3129 mdb_inc_indent(2); 3130 kmv.kmv_besilent = 0; 3131 } 3132 3133 if (check_alloc) 3134 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 3135 if (check_free) 3136 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 3137 3138 if (flags & DCMD_LOOP) { 3139 if (kmv.kmv_corruption == 0) { 3140 mdb_printf("%-*s %?p clean\n", 3141 KMEM_CACHE_NAMELEN, 3142 kmv.kmv_cache.cache_name, addr); 3143 } else { 3144 char *s = ""; /* optional s in "buffer[s]" */ 3145 if (kmv.kmv_corruption > 1) 3146 s = "s"; 3147 3148 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 3149 KMEM_CACHE_NAMELEN, 3150 kmv.kmv_cache.cache_name, addr, 3151 kmv.kmv_corruption, s); 3152 } 3153 } else { 3154 /* 3155 * This is the more verbose mode, when the user has 3156 * type addr::kmem_verify. If the cache was clean, 3157 * nothing will have yet been printed. So say something. 3158 */ 3159 if (kmv.kmv_corruption == 0) 3160 mdb_printf("clean\n"); 3161 3162 mdb_dec_indent(2); 3163 } 3164 } else { 3165 /* 3166 * If the user didn't specify a cache to verify, we'll walk all 3167 * kmem_cache's, specifying ourself as a callback for each... 3168 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 3169 */ 3170 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 3171 "Cache Name", "Addr", "Cache Integrity"); 3172 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 3173 } 3174 3175 return (DCMD_OK); 3176 } 3177 3178 typedef struct vmem_node { 3179 struct vmem_node *vn_next; 3180 struct vmem_node *vn_parent; 3181 struct vmem_node *vn_sibling; 3182 struct vmem_node *vn_children; 3183 uintptr_t vn_addr; 3184 int vn_marked; 3185 vmem_t vn_vmem; 3186 } vmem_node_t; 3187 3188 typedef struct vmem_walk { 3189 vmem_node_t *vw_root; 3190 vmem_node_t *vw_current; 3191 } vmem_walk_t; 3192 3193 int 3194 vmem_walk_init(mdb_walk_state_t *wsp) 3195 { 3196 uintptr_t vaddr, paddr; 3197 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 3198 vmem_walk_t *vw; 3199 3200 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 3201 mdb_warn("couldn't read 'vmem_list'"); 3202 return (WALK_ERR); 3203 } 3204 3205 while (vaddr != NULL) { 3206 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 3207 vp->vn_addr = vaddr; 3208 vp->vn_next = head; 3209 head = vp; 3210 3211 if (vaddr == wsp->walk_addr) 3212 current = vp; 3213 3214 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 3215 mdb_warn("couldn't read vmem_t at %p", vaddr); 3216 goto err; 3217 } 3218 3219 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 3220 } 3221 3222 for (vp = head; vp != NULL; vp = vp->vn_next) { 3223 3224 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 3225 vp->vn_sibling = root; 3226 root = vp; 3227 continue; 3228 } 3229 3230 for (parent = head; parent != NULL; parent = parent->vn_next) { 3231 if (parent->vn_addr != paddr) 3232 continue; 3233 vp->vn_sibling = parent->vn_children; 3234 parent->vn_children = vp; 3235 vp->vn_parent = parent; 3236 break; 3237 } 3238 3239 if (parent == NULL) { 3240 mdb_warn("couldn't find %p's parent (%p)\n", 3241 vp->vn_addr, paddr); 3242 goto err; 3243 } 3244 } 3245 3246 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 3247 vw->vw_root = root; 3248 3249 if (current != NULL) 3250 vw->vw_current = current; 3251 else 3252 vw->vw_current = root; 3253 3254 wsp->walk_data = vw; 3255 return (WALK_NEXT); 3256 err: 3257 for (vp = head; head != NULL; vp = head) { 3258 head = vp->vn_next; 3259 mdb_free(vp, sizeof (vmem_node_t)); 3260 } 3261 3262 return (WALK_ERR); 3263 } 3264 3265 int 3266 vmem_walk_step(mdb_walk_state_t *wsp) 3267 { 3268 vmem_walk_t *vw = wsp->walk_data; 3269 vmem_node_t *vp; 3270 int rval; 3271 3272 if ((vp = vw->vw_current) == NULL) 3273 return (WALK_DONE); 3274 3275 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3276 3277 if (vp->vn_children != NULL) { 3278 vw->vw_current = vp->vn_children; 3279 return (rval); 3280 } 3281 3282 do { 3283 vw->vw_current = vp->vn_sibling; 3284 vp = vp->vn_parent; 3285 } while (vw->vw_current == NULL && vp != NULL); 3286 3287 return (rval); 3288 } 3289 3290 /* 3291 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 3292 * children are visited before their parent. We perform the postfix walk 3293 * iteratively (rather than recursively) to allow mdb to regain control 3294 * after each callback. 3295 */ 3296 int 3297 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 3298 { 3299 vmem_walk_t *vw = wsp->walk_data; 3300 vmem_node_t *vp = vw->vw_current; 3301 int rval; 3302 3303 /* 3304 * If this node is marked, then we know that we have already visited 3305 * all of its children. If the node has any siblings, they need to 3306 * be visited next; otherwise, we need to visit the parent. Note 3307 * that vp->vn_marked will only be zero on the first invocation of 3308 * the step function. 3309 */ 3310 if (vp->vn_marked) { 3311 if (vp->vn_sibling != NULL) 3312 vp = vp->vn_sibling; 3313 else if (vp->vn_parent != NULL) 3314 vp = vp->vn_parent; 3315 else { 3316 /* 3317 * We have neither a parent, nor a sibling, and we 3318 * have already been visited; we're done. 3319 */ 3320 return (WALK_DONE); 3321 } 3322 } 3323 3324 /* 3325 * Before we visit this node, visit its children. 3326 */ 3327 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 3328 vp = vp->vn_children; 3329 3330 vp->vn_marked = 1; 3331 vw->vw_current = vp; 3332 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 3333 3334 return (rval); 3335 } 3336 3337 void 3338 vmem_walk_fini(mdb_walk_state_t *wsp) 3339 { 3340 vmem_walk_t *vw = wsp->walk_data; 3341 vmem_node_t *root = vw->vw_root; 3342 int done; 3343 3344 if (root == NULL) 3345 return; 3346 3347 if ((vw->vw_root = root->vn_children) != NULL) 3348 vmem_walk_fini(wsp); 3349 3350 vw->vw_root = root->vn_sibling; 3351 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 3352 mdb_free(root, sizeof (vmem_node_t)); 3353 3354 if (done) { 3355 mdb_free(vw, sizeof (vmem_walk_t)); 3356 } else { 3357 vmem_walk_fini(wsp); 3358 } 3359 } 3360 3361 typedef struct vmem_seg_walk { 3362 uint8_t vsw_type; 3363 uintptr_t vsw_start; 3364 uintptr_t vsw_current; 3365 } vmem_seg_walk_t; 3366 3367 /*ARGSUSED*/ 3368 int 3369 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 3370 { 3371 vmem_seg_walk_t *vsw; 3372 3373 if (wsp->walk_addr == NULL) { 3374 mdb_warn("vmem_%s does not support global walks\n", name); 3375 return (WALK_ERR); 3376 } 3377 3378 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3379 3380 vsw->vsw_type = type; 3381 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3382 vsw->vsw_current = vsw->vsw_start; 3383 3384 return (WALK_NEXT); 3385 } 3386 3387 /* 3388 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3389 */ 3390 #define VMEM_NONE 0 3391 3392 int 3393 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3394 { 3395 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3396 } 3397 3398 int 3399 vmem_free_walk_init(mdb_walk_state_t *wsp) 3400 { 3401 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3402 } 3403 3404 int 3405 vmem_span_walk_init(mdb_walk_state_t *wsp) 3406 { 3407 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3408 } 3409 3410 int 3411 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3412 { 3413 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3414 } 3415 3416 int 3417 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3418 { 3419 vmem_seg_t seg; 3420 vmem_seg_walk_t *vsw = wsp->walk_data; 3421 uintptr_t addr = vsw->vsw_current; 3422 static size_t seg_size = 0; 3423 int rval; 3424 3425 if (!seg_size) { 3426 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3427 mdb_warn("failed to read 'vmem_seg_size'"); 3428 seg_size = sizeof (vmem_seg_t); 3429 } 3430 } 3431 3432 if (seg_size < sizeof (seg)) 3433 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3434 3435 if (mdb_vread(&seg, seg_size, addr) == -1) { 3436 mdb_warn("couldn't read vmem_seg at %p", addr); 3437 return (WALK_ERR); 3438 } 3439 3440 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3441 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3442 rval = WALK_NEXT; 3443 } else { 3444 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3445 } 3446 3447 if (vsw->vsw_current == vsw->vsw_start) 3448 return (WALK_DONE); 3449 3450 return (rval); 3451 } 3452 3453 void 3454 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3455 { 3456 vmem_seg_walk_t *vsw = wsp->walk_data; 3457 3458 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3459 } 3460 3461 #define VMEM_NAMEWIDTH 22 3462 3463 int 3464 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3465 { 3466 vmem_t v, parent; 3467 vmem_kstat_t *vkp = &v.vm_kstat; 3468 uintptr_t paddr; 3469 int ident = 0; 3470 char c[VMEM_NAMEWIDTH]; 3471 3472 if (!(flags & DCMD_ADDRSPEC)) { 3473 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3474 mdb_warn("can't walk vmem"); 3475 return (DCMD_ERR); 3476 } 3477 return (DCMD_OK); 3478 } 3479 3480 if (DCMD_HDRSPEC(flags)) 3481 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3482 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3483 "TOTAL", "SUCCEED", "FAIL"); 3484 3485 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3486 mdb_warn("couldn't read vmem at %p", addr); 3487 return (DCMD_ERR); 3488 } 3489 3490 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3491 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3492 mdb_warn("couldn't trace %p's ancestry", addr); 3493 ident = 0; 3494 break; 3495 } 3496 paddr = (uintptr_t)parent.vm_source; 3497 } 3498 3499 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3500 3501 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3502 addr, VMEM_NAMEWIDTH, c, 3503 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3504 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3505 3506 return (DCMD_OK); 3507 } 3508 3509 void 3510 vmem_seg_help(void) 3511 { 3512 mdb_printf("%s\n", 3513 "Display the contents of vmem_seg_ts, with optional filtering.\n" 3514 "\n" 3515 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3516 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3517 "information.\n"); 3518 mdb_dec_indent(2); 3519 mdb_printf("%<b>OPTIONS%</b>\n"); 3520 mdb_inc_indent(2); 3521 mdb_printf("%s", 3522 " -v Display the full content of the vmem_seg, including its stack trace\n" 3523 " -s report the size of the segment, instead of the end address\n" 3524 " -c caller\n" 3525 " filter out segments without the function/PC in their stack trace\n" 3526 " -e earliest\n" 3527 " filter out segments timestamped before earliest\n" 3528 " -l latest\n" 3529 " filter out segments timestamped after latest\n" 3530 " -m minsize\n" 3531 " filer out segments smaller than minsize\n" 3532 " -M maxsize\n" 3533 " filer out segments larger than maxsize\n" 3534 " -t thread\n" 3535 " filter out segments not involving thread\n" 3536 " -T type\n" 3537 " filter out segments not of type 'type'\n" 3538 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3539 } 3540 3541 /*ARGSUSED*/ 3542 int 3543 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3544 { 3545 vmem_seg_t vs; 3546 pc_t *stk = vs.vs_stack; 3547 uintptr_t sz; 3548 uint8_t t; 3549 const char *type = NULL; 3550 GElf_Sym sym; 3551 char c[MDB_SYM_NAMLEN]; 3552 int no_debug; 3553 int i; 3554 int depth; 3555 uintptr_t laddr, haddr; 3556 3557 uintptr_t caller = NULL, thread = NULL; 3558 uintptr_t minsize = 0, maxsize = 0; 3559 3560 hrtime_t earliest = 0, latest = 0; 3561 3562 uint_t size = 0; 3563 uint_t verbose = 0; 3564 3565 if (!(flags & DCMD_ADDRSPEC)) 3566 return (DCMD_USAGE); 3567 3568 if (mdb_getopts(argc, argv, 3569 'c', MDB_OPT_UINTPTR, &caller, 3570 'e', MDB_OPT_UINT64, &earliest, 3571 'l', MDB_OPT_UINT64, &latest, 3572 's', MDB_OPT_SETBITS, TRUE, &size, 3573 'm', MDB_OPT_UINTPTR, &minsize, 3574 'M', MDB_OPT_UINTPTR, &maxsize, 3575 't', MDB_OPT_UINTPTR, &thread, 3576 'T', MDB_OPT_STR, &type, 3577 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3578 NULL) != argc) 3579 return (DCMD_USAGE); 3580 3581 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3582 if (verbose) { 3583 mdb_printf("%16s %4s %16s %16s %16s\n" 3584 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3585 "ADDR", "TYPE", "START", "END", "SIZE", 3586 "", "", "THREAD", "TIMESTAMP", ""); 3587 } else { 3588 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3589 "START", size? "SIZE" : "END", "WHO"); 3590 } 3591 } 3592 3593 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3594 mdb_warn("couldn't read vmem_seg at %p", addr); 3595 return (DCMD_ERR); 3596 } 3597 3598 if (type != NULL) { 3599 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3600 t = VMEM_ALLOC; 3601 else if (strcmp(type, "FREE") == 0) 3602 t = VMEM_FREE; 3603 else if (strcmp(type, "SPAN") == 0) 3604 t = VMEM_SPAN; 3605 else if (strcmp(type, "ROTR") == 0 || 3606 strcmp(type, "ROTOR") == 0) 3607 t = VMEM_ROTOR; 3608 else if (strcmp(type, "WLKR") == 0 || 3609 strcmp(type, "WALKER") == 0) 3610 t = VMEM_WALKER; 3611 else { 3612 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3613 type); 3614 return (DCMD_ERR); 3615 } 3616 3617 if (vs.vs_type != t) 3618 return (DCMD_OK); 3619 } 3620 3621 sz = vs.vs_end - vs.vs_start; 3622 3623 if (minsize != 0 && sz < minsize) 3624 return (DCMD_OK); 3625 3626 if (maxsize != 0 && sz > maxsize) 3627 return (DCMD_OK); 3628 3629 t = vs.vs_type; 3630 depth = vs.vs_depth; 3631 3632 /* 3633 * debug info, when present, is only accurate for VMEM_ALLOC segments 3634 */ 3635 no_debug = (t != VMEM_ALLOC) || 3636 (depth == 0 || depth > VMEM_STACK_DEPTH); 3637 3638 if (no_debug) { 3639 if (caller != NULL || thread != NULL || earliest != 0 || 3640 latest != 0) 3641 return (DCMD_OK); /* not enough info */ 3642 } else { 3643 if (caller != NULL) { 3644 laddr = caller; 3645 haddr = caller + sizeof (caller); 3646 3647 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3648 sizeof (c), &sym) != -1 && 3649 caller == (uintptr_t)sym.st_value) { 3650 /* 3651 * We were provided an exact symbol value; any 3652 * address in the function is valid. 3653 */ 3654 laddr = (uintptr_t)sym.st_value; 3655 haddr = (uintptr_t)sym.st_value + sym.st_size; 3656 } 3657 3658 for (i = 0; i < depth; i++) 3659 if (vs.vs_stack[i] >= laddr && 3660 vs.vs_stack[i] < haddr) 3661 break; 3662 3663 if (i == depth) 3664 return (DCMD_OK); 3665 } 3666 3667 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3668 return (DCMD_OK); 3669 3670 if (earliest != 0 && vs.vs_timestamp < earliest) 3671 return (DCMD_OK); 3672 3673 if (latest != 0 && vs.vs_timestamp > latest) 3674 return (DCMD_OK); 3675 } 3676 3677 type = (t == VMEM_ALLOC ? "ALLC" : 3678 t == VMEM_FREE ? "FREE" : 3679 t == VMEM_SPAN ? "SPAN" : 3680 t == VMEM_ROTOR ? "ROTR" : 3681 t == VMEM_WALKER ? "WLKR" : 3682 "????"); 3683 3684 if (flags & DCMD_PIPE_OUT) { 3685 mdb_printf("%#lr\n", addr); 3686 return (DCMD_OK); 3687 } 3688 3689 if (verbose) { 3690 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3691 addr, type, vs.vs_start, vs.vs_end, sz); 3692 3693 if (no_debug) 3694 return (DCMD_OK); 3695 3696 mdb_printf("%16s %4s %16p %16llx\n", 3697 "", "", vs.vs_thread, vs.vs_timestamp); 3698 3699 mdb_inc_indent(17); 3700 for (i = 0; i < depth; i++) { 3701 mdb_printf("%a\n", stk[i]); 3702 } 3703 mdb_dec_indent(17); 3704 mdb_printf("\n"); 3705 } else { 3706 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3707 vs.vs_start, size? sz : vs.vs_end); 3708 3709 if (no_debug) { 3710 mdb_printf("\n"); 3711 return (DCMD_OK); 3712 } 3713 3714 for (i = 0; i < depth; i++) { 3715 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3716 c, sizeof (c), &sym) == -1) 3717 continue; 3718 if (strncmp(c, "vmem_", 5) == 0) 3719 continue; 3720 break; 3721 } 3722 mdb_printf(" %a\n", stk[i]); 3723 } 3724 return (DCMD_OK); 3725 } 3726 3727 typedef struct kmalog_data { 3728 uintptr_t kma_addr; 3729 hrtime_t kma_newest; 3730 } kmalog_data_t; 3731 3732 /*ARGSUSED*/ 3733 static int 3734 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3735 { 3736 char name[KMEM_CACHE_NAMELEN + 1]; 3737 hrtime_t delta; 3738 int i, depth; 3739 size_t bufsize; 3740 3741 if (bcp->bc_timestamp == 0) 3742 return (WALK_DONE); 3743 3744 if (kma->kma_newest == 0) 3745 kma->kma_newest = bcp->bc_timestamp; 3746 3747 if (kma->kma_addr) { 3748 if (mdb_vread(&bufsize, sizeof (bufsize), 3749 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3750 mdb_warn( 3751 "failed to read cache_bufsize for cache at %p", 3752 bcp->bc_cache); 3753 return (WALK_ERR); 3754 } 3755 3756 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3757 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3758 return (WALK_NEXT); 3759 } 3760 3761 delta = kma->kma_newest - bcp->bc_timestamp; 3762 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3763 3764 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3765 &bcp->bc_cache->cache_name) <= 0) 3766 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3767 3768 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3769 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3770 3771 for (i = 0; i < depth; i++) 3772 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3773 3774 return (WALK_NEXT); 3775 } 3776 3777 int 3778 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3779 { 3780 const char *logname = "kmem_transaction_log"; 3781 kmalog_data_t kma; 3782 3783 if (argc > 1) 3784 return (DCMD_USAGE); 3785 3786 kma.kma_newest = 0; 3787 if (flags & DCMD_ADDRSPEC) 3788 kma.kma_addr = addr; 3789 else 3790 kma.kma_addr = NULL; 3791 3792 if (argc > 0) { 3793 if (argv->a_type != MDB_TYPE_STRING) 3794 return (DCMD_USAGE); 3795 if (strcmp(argv->a_un.a_str, "fail") == 0) 3796 logname = "kmem_failure_log"; 3797 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3798 logname = "kmem_slab_log"; 3799 else 3800 return (DCMD_USAGE); 3801 } 3802 3803 if (mdb_readvar(&addr, logname) == -1) { 3804 mdb_warn("failed to read %s log header pointer"); 3805 return (DCMD_ERR); 3806 } 3807 3808 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3809 mdb_warn("failed to walk kmem log"); 3810 return (DCMD_ERR); 3811 } 3812 3813 return (DCMD_OK); 3814 } 3815 3816 /* 3817 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3818 * The first piece is a structure which we use to accumulate kmem_cache_t 3819 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3820 * walker; we either add all caches, or ones named explicitly as arguments. 3821 */ 3822 3823 typedef struct kmclist { 3824 const char *kmc_name; /* Name to match (or NULL) */ 3825 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3826 int kmc_nelems; /* Num entries in kmc_caches */ 3827 int kmc_size; /* Size of kmc_caches array */ 3828 } kmclist_t; 3829 3830 static int 3831 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3832 { 3833 void *p; 3834 int s; 3835 3836 if (kmc->kmc_name == NULL || 3837 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3838 /* 3839 * If we have a match, grow our array (if necessary), and then 3840 * add the virtual address of the matching cache to our list. 3841 */ 3842 if (kmc->kmc_nelems >= kmc->kmc_size) { 3843 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3844 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3845 3846 bcopy(kmc->kmc_caches, p, 3847 sizeof (uintptr_t) * kmc->kmc_size); 3848 3849 kmc->kmc_caches = p; 3850 kmc->kmc_size = s; 3851 } 3852 3853 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3854 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3855 } 3856 3857 return (WALK_NEXT); 3858 } 3859 3860 /* 3861 * The second piece of ::kmausers is a hash table of allocations. Each 3862 * allocation owner is identified by its stack trace and data_size. We then 3863 * track the total bytes of all such allocations, and the number of allocations 3864 * to report at the end. Once we have a list of caches, we walk through the 3865 * allocated bufctls of each, and update our hash table accordingly. 3866 */ 3867 3868 typedef struct kmowner { 3869 struct kmowner *kmo_head; /* First hash elt in bucket */ 3870 struct kmowner *kmo_next; /* Next hash elt in chain */ 3871 size_t kmo_signature; /* Hash table signature */ 3872 uint_t kmo_num; /* Number of allocations */ 3873 size_t kmo_data_size; /* Size of each allocation */ 3874 size_t kmo_total_size; /* Total bytes of allocation */ 3875 int kmo_depth; /* Depth of stack trace */ 3876 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3877 } kmowner_t; 3878 3879 typedef struct kmusers { 3880 uintptr_t kmu_addr; /* address of interest */ 3881 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3882 kmowner_t *kmu_hash; /* Hash table of owners */ 3883 int kmu_nelems; /* Number of entries in use */ 3884 int kmu_size; /* Total number of entries */ 3885 } kmusers_t; 3886 3887 static void 3888 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3889 size_t size, size_t data_size) 3890 { 3891 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3892 size_t bucket, signature = data_size; 3893 kmowner_t *kmo, *kmoend; 3894 3895 /* 3896 * If the hash table is full, double its size and rehash everything. 3897 */ 3898 if (kmu->kmu_nelems >= kmu->kmu_size) { 3899 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3900 3901 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3902 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 3903 kmu->kmu_hash = kmo; 3904 kmu->kmu_size = s; 3905 3906 kmoend = kmu->kmu_hash + kmu->kmu_size; 3907 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 3908 kmo->kmo_head = NULL; 3909 3910 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 3911 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 3912 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 3913 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 3914 kmu->kmu_hash[bucket].kmo_head = kmo; 3915 } 3916 } 3917 3918 /* 3919 * Finish computing the hash signature from the stack trace, and then 3920 * see if the owner is in the hash table. If so, update our stats. 3921 */ 3922 for (i = 0; i < depth; i++) 3923 signature += bcp->bc_stack[i]; 3924 3925 bucket = signature & (kmu->kmu_size - 1); 3926 3927 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 3928 if (kmo->kmo_signature == signature) { 3929 size_t difference = 0; 3930 3931 difference |= kmo->kmo_data_size - data_size; 3932 difference |= kmo->kmo_depth - depth; 3933 3934 for (i = 0; i < depth; i++) { 3935 difference |= kmo->kmo_stack[i] - 3936 bcp->bc_stack[i]; 3937 } 3938 3939 if (difference == 0) { 3940 kmo->kmo_total_size += size; 3941 kmo->kmo_num++; 3942 return; 3943 } 3944 } 3945 } 3946 3947 /* 3948 * If the owner is not yet hashed, grab the next element and fill it 3949 * in based on the allocation information. 3950 */ 3951 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 3952 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 3953 kmu->kmu_hash[bucket].kmo_head = kmo; 3954 3955 kmo->kmo_signature = signature; 3956 kmo->kmo_num = 1; 3957 kmo->kmo_data_size = data_size; 3958 kmo->kmo_total_size = size; 3959 kmo->kmo_depth = depth; 3960 3961 for (i = 0; i < depth; i++) 3962 kmo->kmo_stack[i] = bcp->bc_stack[i]; 3963 } 3964 3965 /* 3966 * When ::kmausers is invoked without the -f flag, we simply update our hash 3967 * table with the information from each allocated bufctl. 3968 */ 3969 /*ARGSUSED*/ 3970 static int 3971 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 3972 { 3973 const kmem_cache_t *cp = kmu->kmu_cache; 3974 3975 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3976 return (WALK_NEXT); 3977 } 3978 3979 /* 3980 * When ::kmausers is invoked with the -f flag, we print out the information 3981 * for each bufctl as well as updating the hash table. 3982 */ 3983 static int 3984 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 3985 { 3986 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3987 const kmem_cache_t *cp = kmu->kmu_cache; 3988 kmem_bufctl_t bufctl; 3989 3990 if (kmu->kmu_addr) { 3991 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 3992 mdb_warn("couldn't read bufctl at %p", addr); 3993 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 3994 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 3995 cp->cache_bufsize) 3996 return (WALK_NEXT); 3997 } 3998 3999 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 4000 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 4001 4002 for (i = 0; i < depth; i++) 4003 mdb_printf("\t %a\n", bcp->bc_stack[i]); 4004 4005 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 4006 return (WALK_NEXT); 4007 } 4008 4009 /* 4010 * We sort our results by allocation size before printing them. 4011 */ 4012 static int 4013 kmownercmp(const void *lp, const void *rp) 4014 { 4015 const kmowner_t *lhs = lp; 4016 const kmowner_t *rhs = rp; 4017 4018 return (rhs->kmo_total_size - lhs->kmo_total_size); 4019 } 4020 4021 /* 4022 * The main engine of ::kmausers is relatively straightforward: First we 4023 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 4024 * iterate over the allocated bufctls of each cache in the list. Finally, 4025 * we sort and print our results. 4026 */ 4027 /*ARGSUSED*/ 4028 int 4029 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4030 { 4031 int mem_threshold = 8192; /* Minimum # bytes for printing */ 4032 int cnt_threshold = 100; /* Minimum # blocks for printing */ 4033 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 4034 int do_all_caches = 1; /* Do all caches (no arguments) */ 4035 int opt_e = FALSE; /* Include "small" users */ 4036 int opt_f = FALSE; /* Print stack traces */ 4037 4038 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 4039 kmowner_t *kmo, *kmoend; 4040 int i, oelems; 4041 4042 kmclist_t kmc; 4043 kmusers_t kmu; 4044 4045 bzero(&kmc, sizeof (kmc)); 4046 bzero(&kmu, sizeof (kmu)); 4047 4048 while ((i = mdb_getopts(argc, argv, 4049 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 4050 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 4051 4052 argv += i; /* skip past options we just processed */ 4053 argc -= i; /* adjust argc */ 4054 4055 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 4056 return (DCMD_USAGE); 4057 4058 oelems = kmc.kmc_nelems; 4059 kmc.kmc_name = argv->a_un.a_str; 4060 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4061 4062 if (kmc.kmc_nelems == oelems) { 4063 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 4064 return (DCMD_ERR); 4065 } 4066 4067 do_all_caches = 0; 4068 argv++; 4069 argc--; 4070 } 4071 4072 if (flags & DCMD_ADDRSPEC) { 4073 opt_f = TRUE; 4074 kmu.kmu_addr = addr; 4075 } else { 4076 kmu.kmu_addr = NULL; 4077 } 4078 4079 if (opt_e) 4080 mem_threshold = cnt_threshold = 0; 4081 4082 if (opt_f) 4083 callback = (mdb_walk_cb_t)kmause2; 4084 4085 if (do_all_caches) { 4086 kmc.kmc_name = NULL; /* match all cache names */ 4087 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 4088 } 4089 4090 for (i = 0; i < kmc.kmc_nelems; i++) { 4091 uintptr_t cp = kmc.kmc_caches[i]; 4092 kmem_cache_t c; 4093 4094 if (mdb_vread(&c, sizeof (c), cp) == -1) { 4095 mdb_warn("failed to read cache at %p", cp); 4096 continue; 4097 } 4098 4099 if (!(c.cache_flags & KMF_AUDIT)) { 4100 if (!do_all_caches) { 4101 mdb_warn("KMF_AUDIT is not enabled for %s\n", 4102 c.cache_name); 4103 } 4104 continue; 4105 } 4106 4107 kmu.kmu_cache = &c; 4108 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 4109 audited_caches++; 4110 } 4111 4112 if (audited_caches == 0 && do_all_caches) { 4113 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 4114 return (DCMD_ERR); 4115 } 4116 4117 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 4118 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 4119 4120 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 4121 if (kmo->kmo_total_size < mem_threshold && 4122 kmo->kmo_num < cnt_threshold) 4123 continue; 4124 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 4125 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 4126 for (i = 0; i < kmo->kmo_depth; i++) 4127 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 4128 } 4129 4130 return (DCMD_OK); 4131 } 4132 4133 void 4134 kmausers_help(void) 4135 { 4136 mdb_printf( 4137 "Displays the largest users of the kmem allocator, sorted by \n" 4138 "trace. If one or more caches is specified, only those caches\n" 4139 "will be searched. By default, all caches are searched. If an\n" 4140 "address is specified, then only those allocations which include\n" 4141 "the given address are displayed. Specifying an address implies\n" 4142 "-f.\n" 4143 "\n" 4144 "\t-e\tInclude all users, not just the largest\n" 4145 "\t-f\tDisplay individual allocations. By default, users are\n" 4146 "\t\tgrouped by stack\n"); 4147 } 4148 4149 static int 4150 kmem_ready_check(void) 4151 { 4152 int ready; 4153 4154 if (mdb_readvar(&ready, "kmem_ready") < 0) 4155 return (-1); /* errno is set for us */ 4156 4157 return (ready); 4158 } 4159 4160 /*ARGSUSED*/ 4161 static void 4162 kmem_statechange_cb(void *arg) 4163 { 4164 static int been_ready = 0; 4165 4166 leaky_cleanup(1); /* state changes invalidate leaky state */ 4167 4168 if (been_ready) 4169 return; 4170 4171 if (kmem_ready_check() <= 0) 4172 return; 4173 4174 been_ready = 1; 4175 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 4176 } 4177 4178 void 4179 kmem_init(void) 4180 { 4181 mdb_walker_t w = { 4182 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 4183 kmem_cache_walk_step, kmem_cache_walk_fini 4184 }; 4185 4186 /* 4187 * If kmem is ready, we'll need to invoke the kmem_cache walker 4188 * immediately. Walkers in the linkage structure won't be ready until 4189 * _mdb_init returns, so we'll need to add this one manually. If kmem 4190 * is ready, we'll use the walker to initialize the caches. If kmem 4191 * isn't ready, we'll register a callback that will allow us to defer 4192 * cache walking until it is. 4193 */ 4194 if (mdb_add_walker(&w) != 0) { 4195 mdb_warn("failed to add kmem_cache walker"); 4196 return; 4197 } 4198 4199 (void) mdb_callback_add(MDB_CALLBACK_STCHG, kmem_statechange_cb, NULL); 4200 kmem_statechange_cb(NULL); 4201 } 4202 4203 typedef struct whatthread { 4204 uintptr_t wt_target; 4205 int wt_verbose; 4206 } whatthread_t; 4207 4208 static int 4209 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 4210 { 4211 uintptr_t current, data; 4212 4213 if (t->t_stkbase == NULL) 4214 return (WALK_NEXT); 4215 4216 /* 4217 * Warn about swapped out threads, but drive on anyway 4218 */ 4219 if (!(t->t_schedflag & TS_LOAD)) { 4220 mdb_warn("thread %p's stack swapped out\n", addr); 4221 return (WALK_NEXT); 4222 } 4223 4224 /* 4225 * Search the thread's stack for the given pointer. Note that it would 4226 * be more efficient to follow ::kgrep's lead and read in page-sized 4227 * chunks, but this routine is already fast and simple. 4228 */ 4229 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 4230 current += sizeof (uintptr_t)) { 4231 if (mdb_vread(&data, sizeof (data), current) == -1) { 4232 mdb_warn("couldn't read thread %p's stack at %p", 4233 addr, current); 4234 return (WALK_ERR); 4235 } 4236 4237 if (data == w->wt_target) { 4238 if (w->wt_verbose) { 4239 mdb_printf("%p in thread %p's stack%s\n", 4240 current, addr, stack_active(t, current)); 4241 } else { 4242 mdb_printf("%#lr\n", addr); 4243 return (WALK_NEXT); 4244 } 4245 } 4246 } 4247 4248 return (WALK_NEXT); 4249 } 4250 4251 int 4252 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 4253 { 4254 whatthread_t w; 4255 4256 if (!(flags & DCMD_ADDRSPEC)) 4257 return (DCMD_USAGE); 4258 4259 w.wt_verbose = FALSE; 4260 w.wt_target = addr; 4261 4262 if (mdb_getopts(argc, argv, 4263 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 4264 return (DCMD_USAGE); 4265 4266 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 4267 == -1) { 4268 mdb_warn("couldn't walk threads"); 4269 return (DCMD_ERR); 4270 } 4271 4272 return (DCMD_OK); 4273 } 4274