1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <mdb/mdb_param.h> 30 #include <mdb/mdb_modapi.h> 31 #include <mdb/mdb_ctf.h> 32 #include <sys/cpuvar.h> 33 #include <sys/kmem_impl.h> 34 #include <sys/vmem_impl.h> 35 #include <sys/machelf.h> 36 #include <sys/modctl.h> 37 #include <sys/kobj.h> 38 #include <sys/panic.h> 39 #include <sys/stack.h> 40 #include <sys/sysmacros.h> 41 #include <vm/page.h> 42 43 #include "kmem.h" 44 45 #define dprintf(x) if (mdb_debug_level) { \ 46 mdb_printf("kmem debug: "); \ 47 /*CSTYLED*/\ 48 mdb_printf x ;\ 49 } 50 51 #define KM_ALLOCATED 0x01 52 #define KM_FREE 0x02 53 #define KM_BUFCTL 0x04 54 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */ 55 #define KM_HASH 0x10 56 57 static int mdb_debug_level = 0; 58 59 static void *kmem_ready_cbhdl; 60 61 /*ARGSUSED*/ 62 static int 63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored) 64 { 65 mdb_walker_t w; 66 char descr[64]; 67 68 (void) mdb_snprintf(descr, sizeof (descr), 69 "walk the %s cache", c->cache_name); 70 71 w.walk_name = c->cache_name; 72 w.walk_descr = descr; 73 w.walk_init = kmem_walk_init; 74 w.walk_step = kmem_walk_step; 75 w.walk_fini = kmem_walk_fini; 76 w.walk_init_arg = (void *)addr; 77 78 if (mdb_add_walker(&w) == -1) 79 mdb_warn("failed to add %s walker", c->cache_name); 80 81 return (WALK_NEXT); 82 } 83 84 /*ARGSUSED*/ 85 int 86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 87 { 88 mdb_debug_level ^= 1; 89 90 mdb_printf("kmem: debugging is now %s\n", 91 mdb_debug_level ? "on" : "off"); 92 93 return (DCMD_OK); 94 } 95 96 typedef struct { 97 uintptr_t kcw_first; 98 uintptr_t kcw_current; 99 } kmem_cache_walk_t; 100 101 int 102 kmem_cache_walk_init(mdb_walk_state_t *wsp) 103 { 104 kmem_cache_walk_t *kcw; 105 kmem_cache_t c; 106 uintptr_t cp; 107 GElf_Sym sym; 108 109 if (mdb_lookup_by_name("kmem_null_cache", &sym) == -1) { 110 mdb_warn("couldn't find kmem_null_cache"); 111 return (WALK_ERR); 112 } 113 114 cp = (uintptr_t)sym.st_value; 115 116 if (mdb_vread(&c, sizeof (kmem_cache_t), cp) == -1) { 117 mdb_warn("couldn't read cache at %p", cp); 118 return (WALK_ERR); 119 } 120 121 kcw = mdb_alloc(sizeof (kmem_cache_walk_t), UM_SLEEP); 122 123 kcw->kcw_first = cp; 124 kcw->kcw_current = (uintptr_t)c.cache_next; 125 wsp->walk_data = kcw; 126 127 return (WALK_NEXT); 128 } 129 130 int 131 kmem_cache_walk_step(mdb_walk_state_t *wsp) 132 { 133 kmem_cache_walk_t *kcw = wsp->walk_data; 134 kmem_cache_t c; 135 int status; 136 137 if (mdb_vread(&c, sizeof (kmem_cache_t), kcw->kcw_current) == -1) { 138 mdb_warn("couldn't read cache at %p", kcw->kcw_current); 139 return (WALK_DONE); 140 } 141 142 status = wsp->walk_callback(kcw->kcw_current, &c, wsp->walk_cbdata); 143 144 if ((kcw->kcw_current = (uintptr_t)c.cache_next) == kcw->kcw_first) 145 return (WALK_DONE); 146 147 return (status); 148 } 149 150 void 151 kmem_cache_walk_fini(mdb_walk_state_t *wsp) 152 { 153 kmem_cache_walk_t *kcw = wsp->walk_data; 154 mdb_free(kcw, sizeof (kmem_cache_walk_t)); 155 } 156 157 int 158 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp) 159 { 160 if (wsp->walk_addr == NULL) { 161 mdb_warn("kmem_cpu_cache doesn't support global walks"); 162 return (WALK_ERR); 163 } 164 165 if (mdb_layered_walk("cpu", wsp) == -1) { 166 mdb_warn("couldn't walk 'cpu'"); 167 return (WALK_ERR); 168 } 169 170 wsp->walk_data = (void *)wsp->walk_addr; 171 172 return (WALK_NEXT); 173 } 174 175 int 176 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp) 177 { 178 uintptr_t caddr = (uintptr_t)wsp->walk_data; 179 const cpu_t *cpu = wsp->walk_layer; 180 kmem_cpu_cache_t cc; 181 182 caddr += cpu->cpu_cache_offset; 183 184 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) { 185 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr); 186 return (WALK_ERR); 187 } 188 189 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); 190 } 191 192 int 193 kmem_slab_walk_init(mdb_walk_state_t *wsp) 194 { 195 uintptr_t caddr = wsp->walk_addr; 196 kmem_cache_t c; 197 198 if (caddr == NULL) { 199 mdb_warn("kmem_slab doesn't support global walks\n"); 200 return (WALK_ERR); 201 } 202 203 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 204 mdb_warn("couldn't read kmem_cache at %p", caddr); 205 return (WALK_ERR); 206 } 207 208 wsp->walk_data = 209 (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab)); 210 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; 211 212 return (WALK_NEXT); 213 } 214 215 int 216 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp) 217 { 218 uintptr_t caddr = wsp->walk_addr; 219 kmem_cache_t c; 220 221 if (caddr == NULL) { 222 mdb_warn("kmem_slab_partial doesn't support global walks\n"); 223 return (WALK_ERR); 224 } 225 226 if (mdb_vread(&c, sizeof (c), caddr) == -1) { 227 mdb_warn("couldn't read kmem_cache at %p", caddr); 228 return (WALK_ERR); 229 } 230 231 wsp->walk_data = 232 (void *)(caddr + offsetof(kmem_cache_t, cache_nullslab)); 233 wsp->walk_addr = (uintptr_t)c.cache_freelist; 234 235 /* 236 * Some consumers (umem_walk_step(), in particular) require at 237 * least one callback if there are any buffers in the cache. So 238 * if there are *no* partial slabs, report the last full slab, if 239 * any. 240 * 241 * Yes, this is ugly, but it's cleaner than the other possibilities. 242 */ 243 if ((uintptr_t)wsp->walk_data == wsp->walk_addr) 244 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; 245 246 return (WALK_NEXT); 247 } 248 249 int 250 kmem_slab_walk_step(mdb_walk_state_t *wsp) 251 { 252 kmem_slab_t s; 253 uintptr_t addr = wsp->walk_addr; 254 uintptr_t saddr = (uintptr_t)wsp->walk_data; 255 uintptr_t caddr = saddr - offsetof(kmem_cache_t, cache_nullslab); 256 257 if (addr == saddr) 258 return (WALK_DONE); 259 260 if (mdb_vread(&s, sizeof (s), addr) == -1) { 261 mdb_warn("failed to read slab at %p", wsp->walk_addr); 262 return (WALK_ERR); 263 } 264 265 if ((uintptr_t)s.slab_cache != caddr) { 266 mdb_warn("slab %p isn't in cache %p (in cache %p)\n", 267 addr, caddr, s.slab_cache); 268 return (WALK_ERR); 269 } 270 271 wsp->walk_addr = (uintptr_t)s.slab_next; 272 273 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); 274 } 275 276 int 277 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) 278 { 279 kmem_cache_t c; 280 281 if (!(flags & DCMD_ADDRSPEC)) { 282 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) { 283 mdb_warn("can't walk kmem_cache"); 284 return (DCMD_ERR); 285 } 286 return (DCMD_OK); 287 } 288 289 if (DCMD_HDRSPEC(flags)) 290 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME", 291 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); 292 293 if (mdb_vread(&c, sizeof (c), addr) == -1) { 294 mdb_warn("couldn't read kmem_cache at %p", addr); 295 return (DCMD_ERR); 296 } 297 298 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name, 299 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); 300 301 return (DCMD_OK); 302 } 303 304 static int 305 addrcmp(const void *lhs, const void *rhs) 306 { 307 uintptr_t p1 = *((uintptr_t *)lhs); 308 uintptr_t p2 = *((uintptr_t *)rhs); 309 310 if (p1 < p2) 311 return (-1); 312 if (p1 > p2) 313 return (1); 314 return (0); 315 } 316 317 static int 318 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs) 319 { 320 const kmem_bufctl_audit_t *bcp1 = *lhs; 321 const kmem_bufctl_audit_t *bcp2 = *rhs; 322 323 if (bcp1->bc_timestamp > bcp2->bc_timestamp) 324 return (-1); 325 326 if (bcp1->bc_timestamp < bcp2->bc_timestamp) 327 return (1); 328 329 return (0); 330 } 331 332 typedef struct kmem_hash_walk { 333 uintptr_t *kmhw_table; 334 size_t kmhw_nelems; 335 size_t kmhw_pos; 336 kmem_bufctl_t kmhw_cur; 337 } kmem_hash_walk_t; 338 339 int 340 kmem_hash_walk_init(mdb_walk_state_t *wsp) 341 { 342 kmem_hash_walk_t *kmhw; 343 uintptr_t *hash; 344 kmem_cache_t c; 345 uintptr_t haddr, addr = wsp->walk_addr; 346 size_t nelems; 347 size_t hsize; 348 349 if (addr == NULL) { 350 mdb_warn("kmem_hash doesn't support global walks\n"); 351 return (WALK_ERR); 352 } 353 354 if (mdb_vread(&c, sizeof (c), addr) == -1) { 355 mdb_warn("couldn't read cache at addr %p", addr); 356 return (WALK_ERR); 357 } 358 359 if (!(c.cache_flags & KMF_HASH)) { 360 mdb_warn("cache %p doesn't have a hash table\n", addr); 361 return (WALK_DONE); /* nothing to do */ 362 } 363 364 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP); 365 kmhw->kmhw_cur.bc_next = NULL; 366 kmhw->kmhw_pos = 0; 367 368 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1; 369 hsize = nelems * sizeof (uintptr_t); 370 haddr = (uintptr_t)c.cache_hash_table; 371 372 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP); 373 if (mdb_vread(hash, hsize, haddr) == -1) { 374 mdb_warn("failed to read hash table at %p", haddr); 375 mdb_free(hash, hsize); 376 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 377 return (WALK_ERR); 378 } 379 380 wsp->walk_data = kmhw; 381 382 return (WALK_NEXT); 383 } 384 385 int 386 kmem_hash_walk_step(mdb_walk_state_t *wsp) 387 { 388 kmem_hash_walk_t *kmhw = wsp->walk_data; 389 uintptr_t addr = NULL; 390 391 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) { 392 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) { 393 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL) 394 break; 395 } 396 } 397 if (addr == NULL) 398 return (WALK_DONE); 399 400 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) { 401 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr); 402 return (WALK_ERR); 403 } 404 405 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata)); 406 } 407 408 void 409 kmem_hash_walk_fini(mdb_walk_state_t *wsp) 410 { 411 kmem_hash_walk_t *kmhw = wsp->walk_data; 412 413 if (kmhw == NULL) 414 return; 415 416 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t)); 417 mdb_free(kmhw, sizeof (kmem_hash_walk_t)); 418 } 419 420 /* 421 * Find the address of the bufctl structure for the address 'buf' in cache 422 * 'cp', which is at address caddr, and place it in *out. 423 */ 424 static int 425 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) 426 { 427 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf); 428 kmem_bufctl_t *bcp; 429 kmem_bufctl_t bc; 430 431 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) { 432 mdb_warn("unable to read hash bucket for %p in cache %p", 433 buf, caddr); 434 return (-1); 435 } 436 437 while (bcp != NULL) { 438 if (mdb_vread(&bc, sizeof (kmem_bufctl_t), 439 (uintptr_t)bcp) == -1) { 440 mdb_warn("unable to read bufctl at %p", bcp); 441 return (-1); 442 } 443 if (bc.bc_addr == buf) { 444 *out = (uintptr_t)bcp; 445 return (0); 446 } 447 bcp = bc.bc_next; 448 } 449 450 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); 451 return (-1); 452 } 453 454 int 455 kmem_get_magsize(const kmem_cache_t *cp) 456 { 457 uintptr_t addr = (uintptr_t)cp->cache_magtype; 458 GElf_Sym mt_sym; 459 kmem_magtype_t mt; 460 int res; 461 462 /* 463 * if cpu 0 has a non-zero magsize, it must be correct. caches 464 * with KMF_NOMAGAZINE have disabled their magazine layers, so 465 * it is okay to return 0 for them. 466 */ 467 if ((res = cp->cache_cpu[0].cc_magsize) != 0 || 468 (cp->cache_flags & KMF_NOMAGAZINE)) 469 return (res); 470 471 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) { 472 mdb_warn("unable to read 'kmem_magtype'"); 473 } else if (addr < mt_sym.st_value || 474 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || 475 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { 476 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", 477 cp->cache_name, addr); 478 return (0); 479 } 480 if (mdb_vread(&mt, sizeof (mt), addr) == -1) { 481 mdb_warn("unable to read magtype at %a", addr); 482 return (0); 483 } 484 return (mt.mt_magsize); 485 } 486 487 /*ARGSUSED*/ 488 static int 489 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est) 490 { 491 *est -= (sp->slab_chunks - sp->slab_refcnt); 492 493 return (WALK_NEXT); 494 } 495 496 /* 497 * Returns an upper bound on the number of allocated buffers in a given 498 * cache. 499 */ 500 size_t 501 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp) 502 { 503 int magsize; 504 size_t cache_est; 505 506 cache_est = cp->cache_buftotal; 507 508 (void) mdb_pwalk("kmem_slab_partial", 509 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr); 510 511 if ((magsize = kmem_get_magsize(cp)) != 0) { 512 size_t mag_est = cp->cache_full.ml_total * magsize; 513 514 if (cache_est >= mag_est) { 515 cache_est -= mag_est; 516 } else { 517 mdb_warn("cache %p's magazine layer holds more buffers " 518 "than the slab layer.\n", addr); 519 } 520 } 521 return (cache_est); 522 } 523 524 #define READMAG_ROUNDS(rounds) { \ 525 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \ 526 mdb_warn("couldn't read magazine at %p", kmp); \ 527 goto fail; \ 528 } \ 529 for (i = 0; i < rounds; i++) { \ 530 maglist[magcnt++] = mp->mag_round[i]; \ 531 if (magcnt == magmax) { \ 532 mdb_warn("%d magazines exceeds fudge factor\n", \ 533 magcnt); \ 534 goto fail; \ 535 } \ 536 } \ 537 } 538 539 int 540 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus, 541 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags) 542 { 543 kmem_magazine_t *kmp, *mp; 544 void **maglist = NULL; 545 int i, cpu; 546 size_t magsize, magmax, magbsize; 547 size_t magcnt = 0; 548 549 /* 550 * Read the magtype out of the cache, after verifying the pointer's 551 * correctness. 552 */ 553 magsize = kmem_get_magsize(cp); 554 if (magsize == 0) 555 magsize = 1; 556 557 /* 558 * There are several places where we need to go buffer hunting: 559 * the per-CPU loaded magazine, the per-CPU spare full magazine, 560 * and the full magazine list in the depot. 561 * 562 * For an upper bound on the number of buffers in the magazine 563 * layer, we have the number of magazines on the cache_full 564 * list plus at most two magazines per CPU (the loaded and the 565 * spare). Toss in 100 magazines as a fudge factor in case this 566 * is live (the number "100" comes from the same fudge factor in 567 * crash(1M)). 568 */ 569 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize; 570 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]); 571 572 if (magbsize >= PAGESIZE / 2) { 573 mdb_warn("magazine size for cache %p unreasonable (%x)\n", 574 addr, magbsize); 575 goto fail; 576 } 577 578 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags); 579 mp = mdb_alloc(magbsize, alloc_flags); 580 if (mp == NULL || maglist == NULL) 581 goto fail; 582 583 /* 584 * First up: the magazines in the depot (i.e. on the cache_full list). 585 */ 586 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) { 587 READMAG_ROUNDS(magsize); 588 kmp = mp->mag_next; 589 590 if (kmp == cp->cache_full.ml_list) 591 break; /* cache_full list loop detected */ 592 } 593 594 dprintf(("cache_full list done\n")); 595 596 /* 597 * Now whip through the CPUs, snagging the loaded magazines 598 * and full spares. 599 */ 600 for (cpu = 0; cpu < ncpus; cpu++) { 601 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; 602 603 dprintf(("reading cpu cache %p\n", 604 (uintptr_t)ccp - (uintptr_t)cp + addr)); 605 606 if (ccp->cc_rounds > 0 && 607 (kmp = ccp->cc_loaded) != NULL) { 608 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); 609 READMAG_ROUNDS(ccp->cc_rounds); 610 } 611 612 if (ccp->cc_prounds > 0 && 613 (kmp = ccp->cc_ploaded) != NULL) { 614 dprintf(("reading %d previously loaded rounds\n", 615 ccp->cc_prounds)); 616 READMAG_ROUNDS(ccp->cc_prounds); 617 } 618 } 619 620 dprintf(("magazine layer: %d buffers\n", magcnt)); 621 622 if (!(alloc_flags & UM_GC)) 623 mdb_free(mp, magbsize); 624 625 *maglistp = maglist; 626 *magcntp = magcnt; 627 *magmaxp = magmax; 628 629 return (WALK_NEXT); 630 631 fail: 632 if (!(alloc_flags & UM_GC)) { 633 if (mp) 634 mdb_free(mp, magbsize); 635 if (maglist) 636 mdb_free(maglist, magmax * sizeof (void *)); 637 } 638 return (WALK_ERR); 639 } 640 641 static int 642 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) 643 { 644 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); 645 } 646 647 static int 648 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) 649 { 650 kmem_bufctl_audit_t b; 651 652 /* 653 * if KMF_AUDIT is not set, we know that we're looking at a 654 * kmem_bufctl_t. 655 */ 656 if (!(cp->cache_flags & KMF_AUDIT) || 657 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) { 658 (void) memset(&b, 0, sizeof (b)); 659 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) { 660 mdb_warn("unable to read bufctl at %p", buf); 661 return (WALK_ERR); 662 } 663 } 664 665 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata)); 666 } 667 668 typedef struct kmem_walk { 669 int kmw_type; 670 671 int kmw_addr; /* cache address */ 672 kmem_cache_t *kmw_cp; 673 size_t kmw_csize; 674 675 /* 676 * magazine layer 677 */ 678 void **kmw_maglist; 679 size_t kmw_max; 680 size_t kmw_count; 681 size_t kmw_pos; 682 683 /* 684 * slab layer 685 */ 686 char *kmw_valid; /* to keep track of freed buffers */ 687 char *kmw_ubase; /* buffer for slab data */ 688 } kmem_walk_t; 689 690 static int 691 kmem_walk_init_common(mdb_walk_state_t *wsp, int type) 692 { 693 kmem_walk_t *kmw; 694 int ncpus, csize; 695 kmem_cache_t *cp; 696 697 size_t magmax, magcnt; 698 void **maglist = NULL; 699 uint_t chunksize, slabsize; 700 int status = WALK_ERR; 701 uintptr_t addr = wsp->walk_addr; 702 const char *layered; 703 704 type &= ~KM_HASH; 705 706 if (addr == NULL) { 707 mdb_warn("kmem walk doesn't support global walks\n"); 708 return (WALK_ERR); 709 } 710 711 dprintf(("walking %p\n", addr)); 712 713 /* 714 * First we need to figure out how many CPUs are configured in the 715 * system to know how much to slurp out. 716 */ 717 mdb_readvar(&ncpus, "max_ncpus"); 718 719 csize = KMEM_CACHE_SIZE(ncpus); 720 cp = mdb_alloc(csize, UM_SLEEP); 721 722 if (mdb_vread(cp, csize, addr) == -1) { 723 mdb_warn("couldn't read cache at addr %p", addr); 724 goto out2; 725 } 726 727 dprintf(("buf total is %d\n", cp->cache_buftotal)); 728 729 if (cp->cache_buftotal == 0) { 730 mdb_free(cp, csize); 731 return (WALK_DONE); 732 } 733 734 /* 735 * If they ask for bufctls, but it's a small-slab cache, 736 * there is nothing to report. 737 */ 738 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) { 739 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n", 740 cp->cache_flags)); 741 mdb_free(cp, csize); 742 return (WALK_DONE); 743 } 744 745 /* 746 * If they want constructed buffers, but there's no constructor or 747 * the cache has DEADBEEF checking enabled, there is nothing to report. 748 */ 749 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) || 750 cp->cache_constructor == NULL || 751 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) { 752 mdb_free(cp, csize); 753 return (WALK_DONE); 754 } 755 756 /* 757 * Read in the contents of the magazine layer 758 */ 759 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt, 760 &magmax, UM_SLEEP) == WALK_ERR) 761 goto out2; 762 763 /* 764 * We have all of the buffers from the magazines; if we are walking 765 * allocated buffers, sort them so we can bsearch them later. 766 */ 767 if (type & KM_ALLOCATED) 768 qsort(maglist, magcnt, sizeof (void *), addrcmp); 769 770 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP); 771 772 kmw->kmw_type = type; 773 kmw->kmw_addr = addr; 774 kmw->kmw_cp = cp; 775 kmw->kmw_csize = csize; 776 kmw->kmw_maglist = maglist; 777 kmw->kmw_max = magmax; 778 kmw->kmw_count = magcnt; 779 kmw->kmw_pos = 0; 780 781 /* 782 * When walking allocated buffers in a KMF_HASH cache, we walk the 783 * hash table instead of the slab layer. 784 */ 785 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) { 786 layered = "kmem_hash"; 787 788 kmw->kmw_type |= KM_HASH; 789 } else { 790 /* 791 * If we are walking freed buffers, we only need the 792 * magazine layer plus the partially allocated slabs. 793 * To walk allocated buffers, we need all of the slabs. 794 */ 795 if (type & KM_ALLOCATED) 796 layered = "kmem_slab"; 797 else 798 layered = "kmem_slab_partial"; 799 800 /* 801 * for small-slab caches, we read in the entire slab. For 802 * freed buffers, we can just walk the freelist. For 803 * allocated buffers, we use a 'valid' array to track 804 * the freed buffers. 805 */ 806 if (!(cp->cache_flags & KMF_HASH)) { 807 chunksize = cp->cache_chunksize; 808 slabsize = cp->cache_slabsize; 809 810 kmw->kmw_ubase = mdb_alloc(slabsize + 811 sizeof (kmem_bufctl_t), UM_SLEEP); 812 813 if (type & KM_ALLOCATED) 814 kmw->kmw_valid = 815 mdb_alloc(slabsize / chunksize, UM_SLEEP); 816 } 817 } 818 819 status = WALK_NEXT; 820 821 if (mdb_layered_walk(layered, wsp) == -1) { 822 mdb_warn("unable to start layered '%s' walk", layered); 823 status = WALK_ERR; 824 } 825 826 out1: 827 if (status == WALK_ERR) { 828 if (kmw->kmw_valid) 829 mdb_free(kmw->kmw_valid, slabsize / chunksize); 830 831 if (kmw->kmw_ubase) 832 mdb_free(kmw->kmw_ubase, slabsize + 833 sizeof (kmem_bufctl_t)); 834 835 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (uintptr_t)); 836 mdb_free(kmw, sizeof (kmem_walk_t)); 837 wsp->walk_data = NULL; 838 } 839 840 out2: 841 if (status == WALK_ERR) 842 mdb_free(cp, csize); 843 844 return (status); 845 } 846 847 int 848 kmem_walk_step(mdb_walk_state_t *wsp) 849 { 850 kmem_walk_t *kmw = wsp->walk_data; 851 int type = kmw->kmw_type; 852 kmem_cache_t *cp = kmw->kmw_cp; 853 854 void **maglist = kmw->kmw_maglist; 855 int magcnt = kmw->kmw_count; 856 857 uintptr_t chunksize, slabsize; 858 uintptr_t addr; 859 const kmem_slab_t *sp; 860 const kmem_bufctl_t *bcp; 861 kmem_bufctl_t bc; 862 863 int chunks; 864 char *kbase; 865 void *buf; 866 int i, ret; 867 868 char *valid, *ubase; 869 870 /* 871 * first, handle the 'kmem_hash' layered walk case 872 */ 873 if (type & KM_HASH) { 874 /* 875 * We have a buffer which has been allocated out of the 876 * global layer. We need to make sure that it's not 877 * actually sitting in a magazine before we report it as 878 * an allocated buffer. 879 */ 880 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr; 881 882 if (magcnt > 0 && 883 bsearch(&buf, maglist, magcnt, sizeof (void *), 884 addrcmp) != NULL) 885 return (WALK_NEXT); 886 887 if (type & KM_BUFCTL) 888 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); 889 890 return (kmem_walk_callback(wsp, (uintptr_t)buf)); 891 } 892 893 ret = WALK_NEXT; 894 895 addr = kmw->kmw_addr; 896 897 /* 898 * If we're walking freed buffers, report everything in the 899 * magazine layer before processing the first slab. 900 */ 901 if ((type & KM_FREE) && magcnt != 0) { 902 kmw->kmw_count = 0; /* only do this once */ 903 for (i = 0; i < magcnt; i++) { 904 buf = maglist[i]; 905 906 if (type & KM_BUFCTL) { 907 uintptr_t out; 908 909 if (cp->cache_flags & KMF_BUFTAG) { 910 kmem_buftag_t *btp; 911 kmem_buftag_t tag; 912 913 /* LINTED - alignment */ 914 btp = KMEM_BUFTAG(cp, buf); 915 if (mdb_vread(&tag, sizeof (tag), 916 (uintptr_t)btp) == -1) { 917 mdb_warn("reading buftag for " 918 "%p at %p", buf, btp); 919 continue; 920 } 921 out = (uintptr_t)tag.bt_bufctl; 922 } else { 923 if (kmem_hash_lookup(cp, addr, buf, 924 &out) == -1) 925 continue; 926 } 927 ret = bufctl_walk_callback(cp, wsp, out); 928 } else { 929 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 930 } 931 932 if (ret != WALK_NEXT) 933 return (ret); 934 } 935 } 936 937 /* 938 * If they want constructed buffers, we're finished, since the 939 * magazine layer holds them all. 940 */ 941 if (type & KM_CONSTRUCTED) 942 return (WALK_DONE); 943 944 /* 945 * Handle the buffers in the current slab 946 */ 947 chunksize = cp->cache_chunksize; 948 slabsize = cp->cache_slabsize; 949 950 sp = wsp->walk_layer; 951 chunks = sp->slab_chunks; 952 kbase = sp->slab_base; 953 954 dprintf(("kbase is %p\n", kbase)); 955 956 if (!(cp->cache_flags & KMF_HASH)) { 957 valid = kmw->kmw_valid; 958 ubase = kmw->kmw_ubase; 959 960 if (mdb_vread(ubase, chunks * chunksize, 961 (uintptr_t)kbase) == -1) { 962 mdb_warn("failed to read slab contents at %p", kbase); 963 return (WALK_ERR); 964 } 965 966 /* 967 * Set up the valid map as fully allocated -- we'll punch 968 * out the freelist. 969 */ 970 if (type & KM_ALLOCATED) 971 (void) memset(valid, 1, chunks); 972 } else { 973 valid = NULL; 974 ubase = NULL; 975 } 976 977 /* 978 * walk the slab's freelist 979 */ 980 bcp = sp->slab_head; 981 982 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); 983 984 /* 985 * since we could be in the middle of allocating a buffer, 986 * our refcnt could be one higher than it aught. So we 987 * check one further on the freelist than the count allows. 988 */ 989 for (i = sp->slab_refcnt; i <= chunks; i++) { 990 uint_t ndx; 991 992 dprintf(("bcp is %p\n", bcp)); 993 994 if (bcp == NULL) { 995 if (i == chunks) 996 break; 997 mdb_warn( 998 "slab %p in cache %p freelist too short by %d\n", 999 sp, addr, chunks - i); 1000 break; 1001 } 1002 1003 if (cp->cache_flags & KMF_HASH) { 1004 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { 1005 mdb_warn("failed to read bufctl ptr at %p", 1006 bcp); 1007 break; 1008 } 1009 buf = bc.bc_addr; 1010 } else { 1011 /* 1012 * Otherwise the buffer is in the slab which 1013 * we've read in; we just need to determine 1014 * its offset in the slab to find the 1015 * kmem_bufctl_t. 1016 */ 1017 bc = *((kmem_bufctl_t *) 1018 ((uintptr_t)bcp - (uintptr_t)kbase + 1019 (uintptr_t)ubase)); 1020 1021 buf = KMEM_BUF(cp, bcp); 1022 } 1023 1024 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; 1025 1026 if (ndx > slabsize / cp->cache_bufsize) { 1027 /* 1028 * This is very wrong; we have managed to find 1029 * a buffer in the slab which shouldn't 1030 * actually be here. Emit a warning, and 1031 * try to continue. 1032 */ 1033 mdb_warn("buf %p is out of range for " 1034 "slab %p, cache %p\n", buf, sp, addr); 1035 } else if (type & KM_ALLOCATED) { 1036 /* 1037 * we have found a buffer on the slab's freelist; 1038 * clear its entry 1039 */ 1040 valid[ndx] = 0; 1041 } else { 1042 /* 1043 * Report this freed buffer 1044 */ 1045 if (type & KM_BUFCTL) { 1046 ret = bufctl_walk_callback(cp, wsp, 1047 (uintptr_t)bcp); 1048 } else { 1049 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1050 } 1051 if (ret != WALK_NEXT) 1052 return (ret); 1053 } 1054 1055 bcp = bc.bc_next; 1056 } 1057 1058 if (bcp != NULL) { 1059 dprintf(("slab %p in cache %p freelist too long (%p)\n", 1060 sp, addr, bcp)); 1061 } 1062 1063 /* 1064 * If we are walking freed buffers, the loop above handled reporting 1065 * them. 1066 */ 1067 if (type & KM_FREE) 1068 return (WALK_NEXT); 1069 1070 if (type & KM_BUFCTL) { 1071 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for " 1072 "cache %p\n", addr); 1073 return (WALK_ERR); 1074 } 1075 1076 /* 1077 * Report allocated buffers, skipping buffers in the magazine layer. 1078 * We only get this far for small-slab caches. 1079 */ 1080 for (i = 0; ret == WALK_NEXT && i < chunks; i++) { 1081 buf = (char *)kbase + i * chunksize; 1082 1083 if (!valid[i]) 1084 continue; /* on slab freelist */ 1085 1086 if (magcnt > 0 && 1087 bsearch(&buf, maglist, magcnt, sizeof (void *), 1088 addrcmp) != NULL) 1089 continue; /* in magazine layer */ 1090 1091 ret = kmem_walk_callback(wsp, (uintptr_t)buf); 1092 } 1093 return (ret); 1094 } 1095 1096 void 1097 kmem_walk_fini(mdb_walk_state_t *wsp) 1098 { 1099 kmem_walk_t *kmw = wsp->walk_data; 1100 uintptr_t chunksize; 1101 uintptr_t slabsize; 1102 1103 if (kmw == NULL) 1104 return; 1105 1106 if (kmw->kmw_maglist != NULL) 1107 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *)); 1108 1109 chunksize = kmw->kmw_cp->cache_chunksize; 1110 slabsize = kmw->kmw_cp->cache_slabsize; 1111 1112 if (kmw->kmw_valid != NULL) 1113 mdb_free(kmw->kmw_valid, slabsize / chunksize); 1114 if (kmw->kmw_ubase != NULL) 1115 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t)); 1116 1117 mdb_free(kmw->kmw_cp, kmw->kmw_csize); 1118 mdb_free(kmw, sizeof (kmem_walk_t)); 1119 } 1120 1121 /*ARGSUSED*/ 1122 static int 1123 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp) 1124 { 1125 /* 1126 * Buffers allocated from NOTOUCH caches can also show up as freed 1127 * memory in other caches. This can be a little confusing, so we 1128 * don't walk NOTOUCH caches when walking all caches (thereby assuring 1129 * that "::walk kmem" and "::walk freemem" yield disjoint output). 1130 */ 1131 if (c->cache_cflags & KMC_NOTOUCH) 1132 return (WALK_NEXT); 1133 1134 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, 1135 wsp->walk_cbdata, addr) == -1) 1136 return (WALK_DONE); 1137 1138 return (WALK_NEXT); 1139 } 1140 1141 #define KMEM_WALK_ALL(name, wsp) { \ 1142 wsp->walk_data = (name); \ 1143 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \ 1144 return (WALK_ERR); \ 1145 return (WALK_DONE); \ 1146 } 1147 1148 int 1149 kmem_walk_init(mdb_walk_state_t *wsp) 1150 { 1151 if (wsp->walk_arg != NULL) 1152 wsp->walk_addr = (uintptr_t)wsp->walk_arg; 1153 1154 if (wsp->walk_addr == NULL) 1155 KMEM_WALK_ALL("kmem", wsp); 1156 return (kmem_walk_init_common(wsp, KM_ALLOCATED)); 1157 } 1158 1159 int 1160 bufctl_walk_init(mdb_walk_state_t *wsp) 1161 { 1162 if (wsp->walk_addr == NULL) 1163 KMEM_WALK_ALL("bufctl", wsp); 1164 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL)); 1165 } 1166 1167 int 1168 freemem_walk_init(mdb_walk_state_t *wsp) 1169 { 1170 if (wsp->walk_addr == NULL) 1171 KMEM_WALK_ALL("freemem", wsp); 1172 return (kmem_walk_init_common(wsp, KM_FREE)); 1173 } 1174 1175 int 1176 freemem_constructed_walk_init(mdb_walk_state_t *wsp) 1177 { 1178 if (wsp->walk_addr == NULL) 1179 KMEM_WALK_ALL("freemem_constructed", wsp); 1180 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED)); 1181 } 1182 1183 int 1184 freectl_walk_init(mdb_walk_state_t *wsp) 1185 { 1186 if (wsp->walk_addr == NULL) 1187 KMEM_WALK_ALL("freectl", wsp); 1188 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL)); 1189 } 1190 1191 int 1192 freectl_constructed_walk_init(mdb_walk_state_t *wsp) 1193 { 1194 if (wsp->walk_addr == NULL) 1195 KMEM_WALK_ALL("freectl_constructed", wsp); 1196 return (kmem_walk_init_common(wsp, 1197 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED)); 1198 } 1199 1200 typedef struct bufctl_history_walk { 1201 void *bhw_next; 1202 kmem_cache_t *bhw_cache; 1203 kmem_slab_t *bhw_slab; 1204 hrtime_t bhw_timestamp; 1205 } bufctl_history_walk_t; 1206 1207 int 1208 bufctl_history_walk_init(mdb_walk_state_t *wsp) 1209 { 1210 bufctl_history_walk_t *bhw; 1211 kmem_bufctl_audit_t bc; 1212 kmem_bufctl_audit_t bcn; 1213 1214 if (wsp->walk_addr == NULL) { 1215 mdb_warn("bufctl_history walk doesn't support global walks\n"); 1216 return (WALK_ERR); 1217 } 1218 1219 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { 1220 mdb_warn("unable to read bufctl at %p", wsp->walk_addr); 1221 return (WALK_ERR); 1222 } 1223 1224 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); 1225 bhw->bhw_timestamp = 0; 1226 bhw->bhw_cache = bc.bc_cache; 1227 bhw->bhw_slab = bc.bc_slab; 1228 1229 /* 1230 * sometimes the first log entry matches the base bufctl; in that 1231 * case, skip the base bufctl. 1232 */ 1233 if (bc.bc_lastlog != NULL && 1234 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && 1235 bc.bc_addr == bcn.bc_addr && 1236 bc.bc_cache == bcn.bc_cache && 1237 bc.bc_slab == bcn.bc_slab && 1238 bc.bc_timestamp == bcn.bc_timestamp && 1239 bc.bc_thread == bcn.bc_thread) 1240 bhw->bhw_next = bc.bc_lastlog; 1241 else 1242 bhw->bhw_next = (void *)wsp->walk_addr; 1243 1244 wsp->walk_addr = (uintptr_t)bc.bc_addr; 1245 wsp->walk_data = bhw; 1246 1247 return (WALK_NEXT); 1248 } 1249 1250 int 1251 bufctl_history_walk_step(mdb_walk_state_t *wsp) 1252 { 1253 bufctl_history_walk_t *bhw = wsp->walk_data; 1254 uintptr_t addr = (uintptr_t)bhw->bhw_next; 1255 uintptr_t baseaddr = wsp->walk_addr; 1256 kmem_bufctl_audit_t bc; 1257 1258 if (addr == NULL) 1259 return (WALK_DONE); 1260 1261 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1262 mdb_warn("unable to read bufctl at %p", bhw->bhw_next); 1263 return (WALK_ERR); 1264 } 1265 1266 /* 1267 * The bufctl is only valid if the address, cache, and slab are 1268 * correct. We also check that the timestamp is decreasing, to 1269 * prevent infinite loops. 1270 */ 1271 if ((uintptr_t)bc.bc_addr != baseaddr || 1272 bc.bc_cache != bhw->bhw_cache || 1273 bc.bc_slab != bhw->bhw_slab || 1274 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp)) 1275 return (WALK_DONE); 1276 1277 bhw->bhw_next = bc.bc_lastlog; 1278 bhw->bhw_timestamp = bc.bc_timestamp; 1279 1280 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1281 } 1282 1283 void 1284 bufctl_history_walk_fini(mdb_walk_state_t *wsp) 1285 { 1286 bufctl_history_walk_t *bhw = wsp->walk_data; 1287 1288 mdb_free(bhw, sizeof (*bhw)); 1289 } 1290 1291 typedef struct kmem_log_walk { 1292 kmem_bufctl_audit_t *klw_base; 1293 kmem_bufctl_audit_t **klw_sorted; 1294 kmem_log_header_t klw_lh; 1295 size_t klw_size; 1296 size_t klw_maxndx; 1297 size_t klw_ndx; 1298 } kmem_log_walk_t; 1299 1300 int 1301 kmem_log_walk_init(mdb_walk_state_t *wsp) 1302 { 1303 uintptr_t lp = wsp->walk_addr; 1304 kmem_log_walk_t *klw; 1305 kmem_log_header_t *lhp; 1306 int maxndx, i, j, k; 1307 1308 /* 1309 * By default (global walk), walk the kmem_transaction_log. Otherwise 1310 * read the log whose kmem_log_header_t is stored at walk_addr. 1311 */ 1312 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) { 1313 mdb_warn("failed to read 'kmem_transaction_log'"); 1314 return (WALK_ERR); 1315 } 1316 1317 if (lp == NULL) { 1318 mdb_warn("log is disabled\n"); 1319 return (WALK_ERR); 1320 } 1321 1322 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP); 1323 lhp = &klw->klw_lh; 1324 1325 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) { 1326 mdb_warn("failed to read log header at %p", lp); 1327 mdb_free(klw, sizeof (kmem_log_walk_t)); 1328 return (WALK_ERR); 1329 } 1330 1331 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks; 1332 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP); 1333 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1; 1334 1335 if (mdb_vread(klw->klw_base, klw->klw_size, 1336 (uintptr_t)lhp->lh_base) == -1) { 1337 mdb_warn("failed to read log at base %p", lhp->lh_base); 1338 mdb_free(klw->klw_base, klw->klw_size); 1339 mdb_free(klw, sizeof (kmem_log_walk_t)); 1340 return (WALK_ERR); 1341 } 1342 1343 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * 1344 sizeof (kmem_bufctl_audit_t *), UM_SLEEP); 1345 1346 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { 1347 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *) 1348 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize); 1349 1350 for (j = 0; j < maxndx; j++) 1351 klw->klw_sorted[k++] = &chunk[j]; 1352 } 1353 1354 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *), 1355 (int(*)(const void *, const void *))bufctlcmp); 1356 1357 klw->klw_maxndx = k; 1358 wsp->walk_data = klw; 1359 1360 return (WALK_NEXT); 1361 } 1362 1363 int 1364 kmem_log_walk_step(mdb_walk_state_t *wsp) 1365 { 1366 kmem_log_walk_t *klw = wsp->walk_data; 1367 kmem_bufctl_audit_t *bcp; 1368 1369 if (klw->klw_ndx == klw->klw_maxndx) 1370 return (WALK_DONE); 1371 1372 bcp = klw->klw_sorted[klw->klw_ndx++]; 1373 1374 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base + 1375 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata)); 1376 } 1377 1378 void 1379 kmem_log_walk_fini(mdb_walk_state_t *wsp) 1380 { 1381 kmem_log_walk_t *klw = wsp->walk_data; 1382 1383 mdb_free(klw->klw_base, klw->klw_size); 1384 mdb_free(klw->klw_sorted, klw->klw_maxndx * 1385 sizeof (kmem_bufctl_audit_t *)); 1386 mdb_free(klw, sizeof (kmem_log_walk_t)); 1387 } 1388 1389 typedef struct allocdby_bufctl { 1390 uintptr_t abb_addr; 1391 hrtime_t abb_ts; 1392 } allocdby_bufctl_t; 1393 1394 typedef struct allocdby_walk { 1395 const char *abw_walk; 1396 uintptr_t abw_thread; 1397 size_t abw_nbufs; 1398 size_t abw_size; 1399 allocdby_bufctl_t *abw_buf; 1400 size_t abw_ndx; 1401 } allocdby_walk_t; 1402 1403 int 1404 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp, 1405 allocdby_walk_t *abw) 1406 { 1407 if ((uintptr_t)bcp->bc_thread != abw->abw_thread) 1408 return (WALK_NEXT); 1409 1410 if (abw->abw_nbufs == abw->abw_size) { 1411 allocdby_bufctl_t *buf; 1412 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; 1413 1414 buf = mdb_zalloc(oldsize << 1, UM_SLEEP); 1415 1416 bcopy(abw->abw_buf, buf, oldsize); 1417 mdb_free(abw->abw_buf, oldsize); 1418 1419 abw->abw_size <<= 1; 1420 abw->abw_buf = buf; 1421 } 1422 1423 abw->abw_buf[abw->abw_nbufs].abb_addr = addr; 1424 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; 1425 abw->abw_nbufs++; 1426 1427 return (WALK_NEXT); 1428 } 1429 1430 /*ARGSUSED*/ 1431 int 1432 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw) 1433 { 1434 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, 1435 abw, addr) == -1) { 1436 mdb_warn("couldn't walk bufctl for cache %p", addr); 1437 return (WALK_DONE); 1438 } 1439 1440 return (WALK_NEXT); 1441 } 1442 1443 static int 1444 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) 1445 { 1446 if (lhs->abb_ts < rhs->abb_ts) 1447 return (1); 1448 if (lhs->abb_ts > rhs->abb_ts) 1449 return (-1); 1450 return (0); 1451 } 1452 1453 static int 1454 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) 1455 { 1456 allocdby_walk_t *abw; 1457 1458 if (wsp->walk_addr == NULL) { 1459 mdb_warn("allocdby walk doesn't support global walks\n"); 1460 return (WALK_ERR); 1461 } 1462 1463 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); 1464 1465 abw->abw_thread = wsp->walk_addr; 1466 abw->abw_walk = walk; 1467 abw->abw_size = 128; /* something reasonable */ 1468 abw->abw_buf = 1469 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); 1470 1471 wsp->walk_data = abw; 1472 1473 if (mdb_walk("kmem_cache", 1474 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { 1475 mdb_warn("couldn't walk kmem_cache"); 1476 allocdby_walk_fini(wsp); 1477 return (WALK_ERR); 1478 } 1479 1480 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), 1481 (int(*)(const void *, const void *))allocdby_cmp); 1482 1483 return (WALK_NEXT); 1484 } 1485 1486 int 1487 allocdby_walk_init(mdb_walk_state_t *wsp) 1488 { 1489 return (allocdby_walk_init_common(wsp, "bufctl")); 1490 } 1491 1492 int 1493 freedby_walk_init(mdb_walk_state_t *wsp) 1494 { 1495 return (allocdby_walk_init_common(wsp, "freectl")); 1496 } 1497 1498 int 1499 allocdby_walk_step(mdb_walk_state_t *wsp) 1500 { 1501 allocdby_walk_t *abw = wsp->walk_data; 1502 kmem_bufctl_audit_t bc; 1503 uintptr_t addr; 1504 1505 if (abw->abw_ndx == abw->abw_nbufs) 1506 return (WALK_DONE); 1507 1508 addr = abw->abw_buf[abw->abw_ndx++].abb_addr; 1509 1510 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 1511 mdb_warn("couldn't read bufctl at %p", addr); 1512 return (WALK_DONE); 1513 } 1514 1515 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata)); 1516 } 1517 1518 void 1519 allocdby_walk_fini(mdb_walk_state_t *wsp) 1520 { 1521 allocdby_walk_t *abw = wsp->walk_data; 1522 1523 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); 1524 mdb_free(abw, sizeof (allocdby_walk_t)); 1525 } 1526 1527 /*ARGSUSED*/ 1528 int 1529 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored) 1530 { 1531 char c[MDB_SYM_NAMLEN]; 1532 GElf_Sym sym; 1533 int i; 1534 1535 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); 1536 for (i = 0; i < bcp->bc_depth; i++) { 1537 if (mdb_lookup_by_addr(bcp->bc_stack[i], 1538 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 1539 continue; 1540 if (strncmp(c, "kmem_", 5) == 0) 1541 continue; 1542 mdb_printf("%s+0x%lx", 1543 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); 1544 break; 1545 } 1546 mdb_printf("\n"); 1547 1548 return (WALK_NEXT); 1549 } 1550 1551 static int 1552 allocdby_common(uintptr_t addr, uint_t flags, const char *w) 1553 { 1554 if (!(flags & DCMD_ADDRSPEC)) 1555 return (DCMD_USAGE); 1556 1557 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); 1558 1559 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { 1560 mdb_warn("can't walk '%s' for %p", w, addr); 1561 return (DCMD_ERR); 1562 } 1563 1564 return (DCMD_OK); 1565 } 1566 1567 /*ARGSUSED*/ 1568 int 1569 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1570 { 1571 return (allocdby_common(addr, flags, "allocdby")); 1572 } 1573 1574 /*ARGSUSED*/ 1575 int 1576 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 1577 { 1578 return (allocdby_common(addr, flags, "freedby")); 1579 } 1580 1581 /* 1582 * Return a string describing the address in relation to the given thread's 1583 * stack. 1584 * 1585 * - If the thread state is TS_FREE, return " (inactive interrupt thread)". 1586 * 1587 * - If the address is above the stack pointer, return an empty string 1588 * signifying that the address is active. 1589 * 1590 * - If the address is below the stack pointer, and the thread is not on proc, 1591 * return " (below sp)". 1592 * 1593 * - If the address is below the stack pointer, and the thread is on proc, 1594 * return " (possibly below sp)". Depending on context, we may or may not 1595 * have an accurate t_sp. 1596 */ 1597 static const char * 1598 stack_active(const kthread_t *t, uintptr_t addr) 1599 { 1600 uintptr_t panicstk; 1601 GElf_Sym sym; 1602 1603 if (t->t_state == TS_FREE) 1604 return (" (inactive interrupt thread)"); 1605 1606 /* 1607 * Check to see if we're on the panic stack. If so, ignore t_sp, as it 1608 * no longer relates to the thread's real stack. 1609 */ 1610 if (mdb_lookup_by_name("panic_stack", &sym) == 0) { 1611 panicstk = (uintptr_t)sym.st_value; 1612 1613 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE) 1614 return (""); 1615 } 1616 1617 if (addr >= t->t_sp + STACK_BIAS) 1618 return (""); 1619 1620 if (t->t_state == TS_ONPROC) 1621 return (" (possibly below sp)"); 1622 1623 return (" (below sp)"); 1624 } 1625 1626 typedef struct whatis { 1627 uintptr_t w_addr; 1628 const kmem_cache_t *w_cache; 1629 const vmem_t *w_vmem; 1630 size_t w_slab_align; 1631 int w_slab_found; 1632 int w_found; 1633 int w_kmem_lite_count; 1634 uint_t w_verbose; 1635 uint_t w_freemem; 1636 uint_t w_all; 1637 uint_t w_bufctl; 1638 uint_t w_idspace; 1639 } whatis_t; 1640 1641 static void 1642 whatis_print_kmem(uintptr_t addr, uintptr_t baddr, whatis_t *w) 1643 { 1644 /* LINTED pointer cast may result in improper alignment */ 1645 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(w->w_cache, addr); 1646 intptr_t stat; 1647 int count = 0; 1648 int i; 1649 pc_t callers[16]; 1650 1651 if (w->w_cache->cache_flags & KMF_REDZONE) { 1652 kmem_buftag_t bt; 1653 1654 if (mdb_vread(&bt, sizeof (bt), btaddr) == -1) 1655 goto done; 1656 1657 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat; 1658 1659 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE) 1660 goto done; 1661 1662 /* 1663 * provide the bufctl ptr if it has useful information 1664 */ 1665 if (baddr == 0 && (w->w_cache->cache_flags & KMF_AUDIT)) 1666 baddr = (uintptr_t)bt.bt_bufctl; 1667 1668 if (w->w_cache->cache_flags & KMF_LITE) { 1669 count = w->w_kmem_lite_count; 1670 1671 if (count * sizeof (pc_t) > sizeof (callers)) 1672 count = 0; 1673 1674 if (count > 0 && 1675 mdb_vread(callers, count * sizeof (pc_t), 1676 btaddr + 1677 offsetof(kmem_buftag_lite_t, bt_history)) == -1) 1678 count = 0; 1679 1680 /* 1681 * skip unused callers 1682 */ 1683 while (count > 0 && callers[count - 1] == 1684 (pc_t)KMEM_UNINITIALIZED_PATTERN) 1685 count--; 1686 } 1687 } 1688 1689 done: 1690 if (baddr == 0) 1691 mdb_printf("%p is %p+%p, %s from %s\n", 1692 w->w_addr, addr, w->w_addr - addr, 1693 w->w_freemem == FALSE ? "allocated" : "freed", 1694 w->w_cache->cache_name); 1695 else 1696 mdb_printf("%p is %p+%p, bufctl %p %s from %s\n", 1697 w->w_addr, addr, w->w_addr - addr, baddr, 1698 w->w_freemem == FALSE ? "allocated" : "freed", 1699 w->w_cache->cache_name); 1700 1701 if (count > 0) { 1702 mdb_inc_indent(8); 1703 mdb_printf("recent caller%s: %a%s", (count != 1)? "s":"", 1704 callers[0], (count != 1)? ", ":"\n"); 1705 for (i = 1; i < count; i++) 1706 mdb_printf("%a%s", callers[i], 1707 (i + 1 < count)? ", ":"\n"); 1708 mdb_dec_indent(8); 1709 } 1710 } 1711 1712 /*ARGSUSED*/ 1713 static int 1714 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_t *w) 1715 { 1716 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 1717 return (WALK_NEXT); 1718 1719 whatis_print_kmem(addr, 0, w); 1720 w->w_found++; 1721 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1722 } 1723 1724 static int 1725 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_t *w) 1726 { 1727 if (w->w_addr < vs->vs_start || w->w_addr >= vs->vs_end) 1728 return (WALK_NEXT); 1729 1730 mdb_printf("%p is %p+%p ", w->w_addr, 1731 vs->vs_start, w->w_addr - vs->vs_start); 1732 1733 /* 1734 * Always provide the vmem_seg pointer if it has a stack trace. 1735 */ 1736 if (w->w_bufctl == TRUE || 1737 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0)) { 1738 mdb_printf("(vmem_seg %p) ", addr); 1739 } 1740 1741 mdb_printf("%sfrom %s vmem arena\n", w->w_freemem == TRUE ? 1742 "freed " : "", w->w_vmem->vm_name); 1743 1744 w->w_found++; 1745 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1746 } 1747 1748 static int 1749 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_t *w) 1750 { 1751 const char *nm = vmem->vm_name; 1752 w->w_vmem = vmem; 1753 w->w_freemem = FALSE; 1754 1755 if (((vmem->vm_cflags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 1756 return (WALK_NEXT); 1757 1758 if (w->w_verbose) 1759 mdb_printf("Searching vmem arena %s...\n", nm); 1760 1761 if (mdb_pwalk("vmem_alloc", 1762 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 1763 mdb_warn("can't walk vmem seg for %p", addr); 1764 return (WALK_NEXT); 1765 } 1766 1767 if (w->w_found && w->w_all == FALSE) 1768 return (WALK_DONE); 1769 1770 if (w->w_verbose) 1771 mdb_printf("Searching vmem arena %s for free virtual...\n", nm); 1772 1773 w->w_freemem = TRUE; 1774 1775 if (mdb_pwalk("vmem_free", 1776 (mdb_walk_cb_t)whatis_walk_seg, w, addr) == -1) { 1777 mdb_warn("can't walk vmem seg for %p", addr); 1778 return (WALK_NEXT); 1779 } 1780 1781 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 1782 } 1783 1784 /*ARGSUSED*/ 1785 static int 1786 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_t *w) 1787 { 1788 uintptr_t addr; 1789 1790 if (bcp == NULL) 1791 return (WALK_NEXT); 1792 1793 addr = (uintptr_t)bcp->bc_addr; 1794 1795 if (w->w_addr < addr || w->w_addr >= addr + w->w_cache->cache_bufsize) 1796 return (WALK_NEXT); 1797 1798 whatis_print_kmem(addr, baddr, w); 1799 w->w_found++; 1800 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1801 } 1802 1803 /*ARGSUSED*/ 1804 static int 1805 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_t *w) 1806 { 1807 uintptr_t base = P2ALIGN((uintptr_t)sp->slab_base, w->w_slab_align); 1808 1809 if ((w->w_addr - base) >= w->w_cache->cache_slabsize) 1810 return (WALK_NEXT); 1811 1812 w->w_slab_found++; 1813 return (WALK_DONE); 1814 } 1815 1816 static int 1817 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 1818 { 1819 char *walk, *freewalk; 1820 mdb_walk_cb_t func; 1821 vmem_t *vmp = c->cache_arena; 1822 1823 if (((c->cache_flags & VMC_IDENTIFIER) != 0) ^ w->w_idspace) 1824 return (WALK_NEXT); 1825 1826 if (w->w_bufctl == FALSE) { 1827 walk = "kmem"; 1828 freewalk = "freemem"; 1829 func = (mdb_walk_cb_t)whatis_walk_kmem; 1830 } else { 1831 walk = "bufctl"; 1832 freewalk = "freectl"; 1833 func = (mdb_walk_cb_t)whatis_walk_bufctl; 1834 } 1835 1836 w->w_cache = c; 1837 1838 if (w->w_verbose) 1839 mdb_printf("Searching %s's slabs...\n", c->cache_name); 1840 1841 /* 1842 * Verify that the address is in one of the cache's slabs. If not, 1843 * we can skip the more expensive walkers. (this is purely a 1844 * heuristic -- as long as there are no false-negatives, we'll be fine) 1845 * 1846 * We try to get the cache's arena's quantum, since to accurately 1847 * get the base of a slab, you have to align it to the quantum. If 1848 * it doesn't look sensible, we fall back to not aligning. 1849 */ 1850 if (mdb_vread(&w->w_slab_align, sizeof (w->w_slab_align), 1851 (uintptr_t)&vmp->vm_quantum) == -1) { 1852 mdb_warn("unable to read %p->cache_arena->vm_quantum", c); 1853 w->w_slab_align = 1; 1854 } 1855 1856 if ((c->cache_slabsize < w->w_slab_align) || w->w_slab_align == 0 || 1857 (w->w_slab_align & (w->w_slab_align - 1))) { 1858 mdb_warn("%p's arena has invalid quantum (0x%p)\n", c, 1859 w->w_slab_align); 1860 w->w_slab_align = 1; 1861 } 1862 1863 w->w_slab_found = 0; 1864 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, w, 1865 addr) == -1) { 1866 mdb_warn("can't find kmem_slab walker"); 1867 return (WALK_DONE); 1868 } 1869 if (w->w_slab_found == 0) 1870 return (WALK_NEXT); 1871 1872 if (c->cache_flags & KMF_LITE) { 1873 if (mdb_readvar(&w->w_kmem_lite_count, 1874 "kmem_lite_count") == -1 || w->w_kmem_lite_count > 16) 1875 w->w_kmem_lite_count = 0; 1876 } 1877 1878 if (w->w_verbose) 1879 mdb_printf("Searching %s...\n", c->cache_name); 1880 1881 w->w_freemem = FALSE; 1882 1883 if (mdb_pwalk(walk, func, w, addr) == -1) { 1884 mdb_warn("can't find %s walker", walk); 1885 return (WALK_DONE); 1886 } 1887 1888 if (w->w_found && w->w_all == FALSE) 1889 return (WALK_DONE); 1890 1891 /* 1892 * We have searched for allocated memory; now search for freed memory. 1893 */ 1894 if (w->w_verbose) 1895 mdb_printf("Searching %s for free memory...\n", c->cache_name); 1896 1897 w->w_freemem = TRUE; 1898 1899 if (mdb_pwalk(freewalk, func, w, addr) == -1) { 1900 mdb_warn("can't find %s walker", freewalk); 1901 return (WALK_DONE); 1902 } 1903 1904 return (w->w_found && w->w_all == FALSE ? WALK_DONE : WALK_NEXT); 1905 } 1906 1907 static int 1908 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 1909 { 1910 if (c->cache_cflags & KMC_NOTOUCH) 1911 return (WALK_NEXT); 1912 1913 return (whatis_walk_cache(addr, c, w)); 1914 } 1915 1916 static int 1917 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_t *w) 1918 { 1919 if (!(c->cache_cflags & KMC_NOTOUCH)) 1920 return (WALK_NEXT); 1921 1922 return (whatis_walk_cache(addr, c, w)); 1923 } 1924 1925 static int 1926 whatis_walk_thread(uintptr_t addr, const kthread_t *t, whatis_t *w) 1927 { 1928 /* 1929 * Often, one calls ::whatis on an address from a thread structure. 1930 * We use this opportunity to short circuit this case... 1931 */ 1932 if (w->w_addr >= addr && w->w_addr < addr + sizeof (kthread_t)) { 1933 mdb_printf("%p is %p+%p, allocated as a thread structure\n", 1934 w->w_addr, addr, w->w_addr - addr); 1935 w->w_found++; 1936 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1937 } 1938 1939 if (w->w_addr < (uintptr_t)t->t_stkbase || 1940 w->w_addr > (uintptr_t)t->t_stk) 1941 return (WALK_NEXT); 1942 1943 if (t->t_stkbase == NULL) 1944 return (WALK_NEXT); 1945 1946 mdb_printf("%p is in thread %p's stack%s\n", w->w_addr, addr, 1947 stack_active(t, w->w_addr)); 1948 1949 w->w_found++; 1950 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 1951 } 1952 1953 static int 1954 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, whatis_t *w) 1955 { 1956 struct module mod; 1957 char name[MODMAXNAMELEN], *where; 1958 char c[MDB_SYM_NAMLEN]; 1959 Shdr shdr; 1960 GElf_Sym sym; 1961 1962 if (m->mod_mp == NULL) 1963 return (WALK_NEXT); 1964 1965 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) { 1966 mdb_warn("couldn't read modctl %p's module", addr); 1967 return (WALK_NEXT); 1968 } 1969 1970 if (w->w_addr >= (uintptr_t)mod.text && 1971 w->w_addr < (uintptr_t)mod.text + mod.text_size) { 1972 where = "text segment"; 1973 goto found; 1974 } 1975 1976 if (w->w_addr >= (uintptr_t)mod.data && 1977 w->w_addr < (uintptr_t)mod.data + mod.data_size) { 1978 where = "data segment"; 1979 goto found; 1980 } 1981 1982 if (w->w_addr >= (uintptr_t)mod.bss && 1983 w->w_addr < (uintptr_t)mod.bss + mod.bss_size) { 1984 where = "bss"; 1985 goto found; 1986 } 1987 1988 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) { 1989 mdb_warn("couldn't read symbol header for %p's module", addr); 1990 return (WALK_NEXT); 1991 } 1992 1993 if (w->w_addr >= (uintptr_t)mod.symtbl && w->w_addr < 1994 (uintptr_t)mod.symtbl + (uintptr_t)mod.nsyms * shdr.sh_entsize) { 1995 where = "symtab"; 1996 goto found; 1997 } 1998 1999 if (w->w_addr >= (uintptr_t)mod.symspace && 2000 w->w_addr < (uintptr_t)mod.symspace + (uintptr_t)mod.symsize) { 2001 where = "symspace"; 2002 goto found; 2003 } 2004 2005 return (WALK_NEXT); 2006 2007 found: 2008 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1) 2009 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr); 2010 2011 mdb_printf("%p is ", w->w_addr); 2012 2013 /* 2014 * If we found this address in a module, then there's a chance that 2015 * it's actually a named symbol. Try the symbol lookup. 2016 */ 2017 if (mdb_lookup_by_addr(w->w_addr, MDB_SYM_FUZZY, c, sizeof (c), 2018 &sym) != -1 && w->w_addr >= (uintptr_t)sym.st_value && 2019 w->w_addr < (uintptr_t)sym.st_value + sym.st_size) { 2020 mdb_printf("%s+%lx ", c, w->w_addr - (uintptr_t)sym.st_value); 2021 } 2022 2023 mdb_printf("in %s's %s\n", name, where); 2024 2025 w->w_found++; 2026 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2027 } 2028 2029 /*ARGSUSED*/ 2030 static int 2031 whatis_walk_page(uintptr_t addr, const void *ignored, whatis_t *w) 2032 { 2033 static int machsize = 0; 2034 mdb_ctf_id_t id; 2035 2036 if (machsize == 0) { 2037 if (mdb_ctf_lookup_by_name("unix`page_t", &id) == 0) 2038 machsize = mdb_ctf_type_size(id); 2039 else { 2040 mdb_warn("could not get size of page_t"); 2041 machsize = sizeof (page_t); 2042 } 2043 } 2044 2045 if (w->w_addr < addr || w->w_addr >= addr + machsize) 2046 return (WALK_NEXT); 2047 2048 mdb_printf("%p is %p+%p, allocated as a page structure\n", 2049 w->w_addr, addr, w->w_addr - addr); 2050 2051 w->w_found++; 2052 return (w->w_all == TRUE ? WALK_NEXT : WALK_DONE); 2053 } 2054 2055 int 2056 whatis(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2057 { 2058 whatis_t w; 2059 2060 if (!(flags & DCMD_ADDRSPEC)) 2061 return (DCMD_USAGE); 2062 2063 w.w_verbose = FALSE; 2064 w.w_bufctl = FALSE; 2065 w.w_all = FALSE; 2066 w.w_idspace = FALSE; 2067 2068 if (mdb_getopts(argc, argv, 2069 'v', MDB_OPT_SETBITS, TRUE, &w.w_verbose, 2070 'a', MDB_OPT_SETBITS, TRUE, &w.w_all, 2071 'i', MDB_OPT_SETBITS, TRUE, &w.w_idspace, 2072 'b', MDB_OPT_SETBITS, TRUE, &w.w_bufctl, NULL) != argc) 2073 return (DCMD_USAGE); 2074 2075 w.w_addr = addr; 2076 w.w_found = 0; 2077 2078 if (w.w_verbose) 2079 mdb_printf("Searching modules...\n"); 2080 2081 if (!w.w_idspace) { 2082 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, &w) 2083 == -1) { 2084 mdb_warn("couldn't find modctl walker"); 2085 return (DCMD_ERR); 2086 } 2087 2088 if (w.w_found && w.w_all == FALSE) 2089 return (DCMD_OK); 2090 2091 /* 2092 * Now search all thread stacks. Yes, this is a little weak; we 2093 * can save a lot of work by first checking to see if the 2094 * address is in segkp vs. segkmem. But hey, computers are 2095 * fast. 2096 */ 2097 if (w.w_verbose) 2098 mdb_printf("Searching threads...\n"); 2099 2100 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, &w) 2101 == -1) { 2102 mdb_warn("couldn't find thread walker"); 2103 return (DCMD_ERR); 2104 } 2105 2106 if (w.w_found && w.w_all == FALSE) 2107 return (DCMD_OK); 2108 2109 if (w.w_verbose) 2110 mdb_printf("Searching page structures...\n"); 2111 2112 if (mdb_walk("page", (mdb_walk_cb_t)whatis_walk_page, &w) 2113 == -1) { 2114 mdb_warn("couldn't find page walker"); 2115 return (DCMD_ERR); 2116 } 2117 2118 if (w.w_found && w.w_all == FALSE) 2119 return (DCMD_OK); 2120 } 2121 2122 if (mdb_walk("kmem_cache", 2123 (mdb_walk_cb_t)whatis_walk_touch, &w) == -1) { 2124 mdb_warn("couldn't find kmem_cache walker"); 2125 return (DCMD_ERR); 2126 } 2127 2128 if (w.w_found && w.w_all == FALSE) 2129 return (DCMD_OK); 2130 2131 if (mdb_walk("kmem_cache", 2132 (mdb_walk_cb_t)whatis_walk_notouch, &w) == -1) { 2133 mdb_warn("couldn't find kmem_cache walker"); 2134 return (DCMD_ERR); 2135 } 2136 2137 if (w.w_found && w.w_all == FALSE) 2138 return (DCMD_OK); 2139 2140 if (mdb_walk("vmem_postfix", 2141 (mdb_walk_cb_t)whatis_walk_vmem, &w) == -1) { 2142 mdb_warn("couldn't find vmem_postfix walker"); 2143 return (DCMD_ERR); 2144 } 2145 2146 if (w.w_found == 0) 2147 mdb_printf("%p is unknown\n", addr); 2148 2149 return (DCMD_OK); 2150 } 2151 2152 void 2153 whatis_help(void) 2154 { 2155 mdb_printf( 2156 "Given a virtual address, attempt to determine where it came\n" 2157 "from.\n" 2158 "\n" 2159 "\t-v\tVerbose output; display caches/arenas/etc as they are\n" 2160 "\t\tsearched\n" 2161 "\t-a\tFind all possible sources. Default behavior is to stop at\n" 2162 "\t\tthe first (most specific) source.\n" 2163 "\t-i\tSearch only identifier arenas and caches. By default\n" 2164 "\t\tthese are ignored.\n" 2165 "\t-b\tReport bufctls and vmem_segs for matches in kmem and vmem,\n" 2166 "\t\trespectively. Warning: if the buffer exists, but does not\n" 2167 "\t\thave a bufctl, it will not be reported.\n"); 2168 } 2169 2170 typedef struct kmem_log_cpu { 2171 uintptr_t kmc_low; 2172 uintptr_t kmc_high; 2173 } kmem_log_cpu_t; 2174 2175 typedef struct kmem_log_data { 2176 uintptr_t kmd_addr; 2177 kmem_log_cpu_t *kmd_cpu; 2178 } kmem_log_data_t; 2179 2180 int 2181 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b, 2182 kmem_log_data_t *kmd) 2183 { 2184 int i; 2185 kmem_log_cpu_t *kmc = kmd->kmd_cpu; 2186 size_t bufsize; 2187 2188 for (i = 0; i < NCPU; i++) { 2189 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high) 2190 break; 2191 } 2192 2193 if (kmd->kmd_addr) { 2194 if (b->bc_cache == NULL) 2195 return (WALK_NEXT); 2196 2197 if (mdb_vread(&bufsize, sizeof (bufsize), 2198 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) { 2199 mdb_warn( 2200 "failed to read cache_bufsize for cache at %p", 2201 b->bc_cache); 2202 return (WALK_ERR); 2203 } 2204 2205 if (kmd->kmd_addr < (uintptr_t)b->bc_addr || 2206 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize) 2207 return (WALK_NEXT); 2208 } 2209 2210 if (i == NCPU) 2211 mdb_printf(" "); 2212 else 2213 mdb_printf("%3d", i); 2214 2215 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, 2216 b->bc_timestamp, b->bc_thread); 2217 2218 return (WALK_NEXT); 2219 } 2220 2221 /*ARGSUSED*/ 2222 int 2223 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2224 { 2225 kmem_log_header_t lh; 2226 kmem_cpu_log_header_t clh; 2227 uintptr_t lhp, clhp; 2228 int ncpus; 2229 uintptr_t *cpu; 2230 GElf_Sym sym; 2231 kmem_log_cpu_t *kmc; 2232 int i; 2233 kmem_log_data_t kmd; 2234 uint_t opt_b = FALSE; 2235 2236 if (mdb_getopts(argc, argv, 2237 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc) 2238 return (DCMD_USAGE); 2239 2240 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) { 2241 mdb_warn("failed to read 'kmem_transaction_log'"); 2242 return (DCMD_ERR); 2243 } 2244 2245 if (lhp == NULL) { 2246 mdb_warn("no kmem transaction log\n"); 2247 return (DCMD_ERR); 2248 } 2249 2250 mdb_readvar(&ncpus, "ncpus"); 2251 2252 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) { 2253 mdb_warn("failed to read log header at %p", lhp); 2254 return (DCMD_ERR); 2255 } 2256 2257 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); 2258 2259 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC); 2260 2261 if (mdb_lookup_by_name("cpu", &sym) == -1) { 2262 mdb_warn("couldn't find 'cpu' array"); 2263 return (DCMD_ERR); 2264 } 2265 2266 if (sym.st_size != NCPU * sizeof (uintptr_t)) { 2267 mdb_warn("expected 'cpu' to be of size %d; found %d\n", 2268 NCPU * sizeof (uintptr_t), sym.st_size); 2269 return (DCMD_ERR); 2270 } 2271 2272 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) { 2273 mdb_warn("failed to read cpu array at %p", sym.st_value); 2274 return (DCMD_ERR); 2275 } 2276 2277 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC); 2278 kmd.kmd_addr = NULL; 2279 kmd.kmd_cpu = kmc; 2280 2281 for (i = 0; i < NCPU; i++) { 2282 2283 if (cpu[i] == NULL) 2284 continue; 2285 2286 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { 2287 mdb_warn("cannot read cpu %d's log header at %p", 2288 i, clhp); 2289 return (DCMD_ERR); 2290 } 2291 2292 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize + 2293 (uintptr_t)lh.lh_base; 2294 kmc[i].kmc_high = (uintptr_t)clh.clh_current; 2295 2296 clhp += sizeof (kmem_cpu_log_header_t); 2297 } 2298 2299 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR", 2300 "TIMESTAMP", "THREAD"); 2301 2302 /* 2303 * If we have been passed an address, print out only log entries 2304 * corresponding to that address. If opt_b is specified, then interpret 2305 * the address as a bufctl. 2306 */ 2307 if (flags & DCMD_ADDRSPEC) { 2308 kmem_bufctl_audit_t b; 2309 2310 if (opt_b) { 2311 kmd.kmd_addr = addr; 2312 } else { 2313 if (mdb_vread(&b, 2314 sizeof (kmem_bufctl_audit_t), addr) == -1) { 2315 mdb_warn("failed to read bufctl at %p", addr); 2316 return (DCMD_ERR); 2317 } 2318 2319 (void) kmem_log_walk(addr, &b, &kmd); 2320 2321 return (DCMD_OK); 2322 } 2323 } 2324 2325 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) { 2326 mdb_warn("can't find kmem log walker"); 2327 return (DCMD_ERR); 2328 } 2329 2330 return (DCMD_OK); 2331 } 2332 2333 typedef struct bufctl_history_cb { 2334 int bhc_flags; 2335 int bhc_argc; 2336 const mdb_arg_t *bhc_argv; 2337 int bhc_ret; 2338 } bufctl_history_cb_t; 2339 2340 /*ARGSUSED*/ 2341 static int 2342 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) 2343 { 2344 bufctl_history_cb_t *bhc = arg; 2345 2346 bhc->bhc_ret = 2347 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); 2348 2349 bhc->bhc_flags &= ~DCMD_LOOPFIRST; 2350 2351 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); 2352 } 2353 2354 void 2355 bufctl_help(void) 2356 { 2357 mdb_printf("%s\n", 2358 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n"); 2359 mdb_dec_indent(2); 2360 mdb_printf("%<b>OPTIONS%</b>\n"); 2361 mdb_inc_indent(2); 2362 mdb_printf("%s", 2363 " -v Display the full content of the bufctl, including its stack trace\n" 2364 " -h retrieve the bufctl's transaction history, if available\n" 2365 " -a addr\n" 2366 " filter out bufctls not involving the buffer at addr\n" 2367 " -c caller\n" 2368 " filter out bufctls without the function/PC in their stack trace\n" 2369 " -e earliest\n" 2370 " filter out bufctls timestamped before earliest\n" 2371 " -l latest\n" 2372 " filter out bufctls timestamped after latest\n" 2373 " -t thread\n" 2374 " filter out bufctls not involving thread\n"); 2375 } 2376 2377 int 2378 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2379 { 2380 kmem_bufctl_audit_t bc; 2381 uint_t verbose = FALSE; 2382 uint_t history = FALSE; 2383 uint_t in_history = FALSE; 2384 uintptr_t caller = NULL, thread = NULL; 2385 uintptr_t laddr, haddr, baddr = NULL; 2386 hrtime_t earliest = 0, latest = 0; 2387 int i, depth; 2388 char c[MDB_SYM_NAMLEN]; 2389 GElf_Sym sym; 2390 2391 if (mdb_getopts(argc, argv, 2392 'v', MDB_OPT_SETBITS, TRUE, &verbose, 2393 'h', MDB_OPT_SETBITS, TRUE, &history, 2394 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ 2395 'c', MDB_OPT_UINTPTR, &caller, 2396 't', MDB_OPT_UINTPTR, &thread, 2397 'e', MDB_OPT_UINT64, &earliest, 2398 'l', MDB_OPT_UINT64, &latest, 2399 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) 2400 return (DCMD_USAGE); 2401 2402 if (!(flags & DCMD_ADDRSPEC)) 2403 return (DCMD_USAGE); 2404 2405 if (in_history && !history) 2406 return (DCMD_USAGE); 2407 2408 if (history && !in_history) { 2409 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), 2410 UM_SLEEP | UM_GC); 2411 bufctl_history_cb_t bhc; 2412 2413 nargv[0].a_type = MDB_TYPE_STRING; 2414 nargv[0].a_un.a_str = "-H"; /* prevent recursion */ 2415 2416 for (i = 0; i < argc; i++) 2417 nargv[i + 1] = argv[i]; 2418 2419 /* 2420 * When in history mode, we treat each element as if it 2421 * were in a seperate loop, so that the headers group 2422 * bufctls with similar histories. 2423 */ 2424 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; 2425 bhc.bhc_argc = argc + 1; 2426 bhc.bhc_argv = nargv; 2427 bhc.bhc_ret = DCMD_OK; 2428 2429 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, 2430 addr) == -1) { 2431 mdb_warn("unable to walk bufctl_history"); 2432 return (DCMD_ERR); 2433 } 2434 2435 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) 2436 mdb_printf("\n"); 2437 2438 return (bhc.bhc_ret); 2439 } 2440 2441 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 2442 if (verbose) { 2443 mdb_printf("%16s %16s %16s %16s\n" 2444 "%<u>%16s %16s %16s %16s%</u>\n", 2445 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", 2446 "", "CACHE", "LASTLOG", "CONTENTS"); 2447 } else { 2448 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n", 2449 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER"); 2450 } 2451 } 2452 2453 if (mdb_vread(&bc, sizeof (bc), addr) == -1) { 2454 mdb_warn("couldn't read bufctl at %p", addr); 2455 return (DCMD_ERR); 2456 } 2457 2458 /* 2459 * Guard against bogus bc_depth in case the bufctl is corrupt or 2460 * the address does not really refer to a bufctl. 2461 */ 2462 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH); 2463 2464 if (caller != NULL) { 2465 laddr = caller; 2466 haddr = caller + sizeof (caller); 2467 2468 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), 2469 &sym) != -1 && caller == (uintptr_t)sym.st_value) { 2470 /* 2471 * We were provided an exact symbol value; any 2472 * address in the function is valid. 2473 */ 2474 laddr = (uintptr_t)sym.st_value; 2475 haddr = (uintptr_t)sym.st_value + sym.st_size; 2476 } 2477 2478 for (i = 0; i < depth; i++) 2479 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr) 2480 break; 2481 2482 if (i == depth) 2483 return (DCMD_OK); 2484 } 2485 2486 if (thread != NULL && (uintptr_t)bc.bc_thread != thread) 2487 return (DCMD_OK); 2488 2489 if (earliest != 0 && bc.bc_timestamp < earliest) 2490 return (DCMD_OK); 2491 2492 if (latest != 0 && bc.bc_timestamp > latest) 2493 return (DCMD_OK); 2494 2495 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr) 2496 return (DCMD_OK); 2497 2498 if (flags & DCMD_PIPE_OUT) { 2499 mdb_printf("%#lr\n", addr); 2500 return (DCMD_OK); 2501 } 2502 2503 if (verbose) { 2504 mdb_printf( 2505 "%<b>%16p%</b> %16p %16llx %16p\n" 2506 "%16s %16p %16p %16p\n", 2507 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread, 2508 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents); 2509 2510 mdb_inc_indent(17); 2511 for (i = 0; i < depth; i++) 2512 mdb_printf("%a\n", bc.bc_stack[i]); 2513 mdb_dec_indent(17); 2514 mdb_printf("\n"); 2515 } else { 2516 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr, 2517 bc.bc_timestamp, bc.bc_thread); 2518 2519 for (i = 0; i < depth; i++) { 2520 if (mdb_lookup_by_addr(bc.bc_stack[i], 2521 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) 2522 continue; 2523 if (strncmp(c, "kmem_", 5) == 0) 2524 continue; 2525 mdb_printf(" %a\n", bc.bc_stack[i]); 2526 break; 2527 } 2528 2529 if (i >= depth) 2530 mdb_printf("\n"); 2531 } 2532 2533 return (DCMD_OK); 2534 } 2535 2536 typedef struct kmem_verify { 2537 uint64_t *kmv_buf; /* buffer to read cache contents into */ 2538 size_t kmv_size; /* number of bytes in kmv_buf */ 2539 int kmv_corruption; /* > 0 if corruption found. */ 2540 int kmv_besilent; /* report actual corruption sites */ 2541 struct kmem_cache kmv_cache; /* the cache we're operating on */ 2542 } kmem_verify_t; 2543 2544 /* 2545 * verify_pattern() 2546 * verify that buf is filled with the pattern pat. 2547 */ 2548 static int64_t 2549 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) 2550 { 2551 /*LINTED*/ 2552 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); 2553 uint64_t *buf; 2554 2555 for (buf = buf_arg; buf < bufend; buf++) 2556 if (*buf != pat) 2557 return ((uintptr_t)buf - (uintptr_t)buf_arg); 2558 return (-1); 2559 } 2560 2561 /* 2562 * verify_buftag() 2563 * verify that btp->bt_bxstat == (bcp ^ pat) 2564 */ 2565 static int 2566 verify_buftag(kmem_buftag_t *btp, uintptr_t pat) 2567 { 2568 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); 2569 } 2570 2571 /* 2572 * verify_free() 2573 * verify the integrity of a free block of memory by checking 2574 * that it is filled with 0xdeadbeef and that its buftag is sane. 2575 */ 2576 /*ARGSUSED1*/ 2577 static int 2578 verify_free(uintptr_t addr, const void *data, void *private) 2579 { 2580 kmem_verify_t *kmv = (kmem_verify_t *)private; 2581 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 2582 int64_t corrupt; /* corruption offset */ 2583 kmem_buftag_t *buftagp; /* ptr to buftag */ 2584 kmem_cache_t *cp = &kmv->kmv_cache; 2585 int besilent = kmv->kmv_besilent; 2586 2587 /*LINTED*/ 2588 buftagp = KMEM_BUFTAG(cp, buf); 2589 2590 /* 2591 * Read the buffer to check. 2592 */ 2593 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 2594 if (!besilent) 2595 mdb_warn("couldn't read %p", addr); 2596 return (WALK_NEXT); 2597 } 2598 2599 if ((corrupt = verify_pattern(buf, cp->cache_verify, 2600 KMEM_FREE_PATTERN)) >= 0) { 2601 if (!besilent) 2602 mdb_printf("buffer %p (free) seems corrupted, at %p\n", 2603 addr, (uintptr_t)addr + corrupt); 2604 goto corrupt; 2605 } 2606 /* 2607 * When KMF_LITE is set, buftagp->bt_redzone is used to hold 2608 * the first bytes of the buffer, hence we cannot check for red 2609 * zone corruption. 2610 */ 2611 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH && 2612 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) { 2613 if (!besilent) 2614 mdb_printf("buffer %p (free) seems to " 2615 "have a corrupt redzone pattern\n", addr); 2616 goto corrupt; 2617 } 2618 2619 /* 2620 * confirm bufctl pointer integrity. 2621 */ 2622 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) { 2623 if (!besilent) 2624 mdb_printf("buffer %p (free) has a corrupt " 2625 "buftag\n", addr); 2626 goto corrupt; 2627 } 2628 2629 return (WALK_NEXT); 2630 corrupt: 2631 kmv->kmv_corruption++; 2632 return (WALK_NEXT); 2633 } 2634 2635 /* 2636 * verify_alloc() 2637 * Verify that the buftag of an allocated buffer makes sense with respect 2638 * to the buffer. 2639 */ 2640 /*ARGSUSED1*/ 2641 static int 2642 verify_alloc(uintptr_t addr, const void *data, void *private) 2643 { 2644 kmem_verify_t *kmv = (kmem_verify_t *)private; 2645 kmem_cache_t *cp = &kmv->kmv_cache; 2646 uint64_t *buf = kmv->kmv_buf; /* buf to validate */ 2647 /*LINTED*/ 2648 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf); 2649 uint32_t *ip = (uint32_t *)buftagp; 2650 uint8_t *bp = (uint8_t *)buf; 2651 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ 2652 int besilent = kmv->kmv_besilent; 2653 2654 /* 2655 * Read the buffer to check. 2656 */ 2657 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) { 2658 if (!besilent) 2659 mdb_warn("couldn't read %p", addr); 2660 return (WALK_NEXT); 2661 } 2662 2663 /* 2664 * There are two cases to handle: 2665 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have 2666 * 0xfeedfacefeedface at the end of it 2667 * 2. If the buf was alloc'd using kmem_alloc, it will have 2668 * 0xbb just past the end of the region in use. At the buftag, 2669 * it will have 0xfeedface (or, if the whole buffer is in use, 2670 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on 2671 * endianness), followed by 32 bits containing the offset of the 2672 * 0xbb byte in the buffer. 2673 * 2674 * Finally, the two 32-bit words that comprise the second half of the 2675 * buftag should xor to KMEM_BUFTAG_ALLOC 2676 */ 2677 2678 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN) 2679 looks_ok = 1; 2680 else if (!KMEM_SIZE_VALID(ip[1])) 2681 size_ok = 0; 2682 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE) 2683 looks_ok = 1; 2684 else 2685 size_ok = 0; 2686 2687 if (!size_ok) { 2688 if (!besilent) 2689 mdb_printf("buffer %p (allocated) has a corrupt " 2690 "redzone size encoding\n", addr); 2691 goto corrupt; 2692 } 2693 2694 if (!looks_ok) { 2695 if (!besilent) 2696 mdb_printf("buffer %p (allocated) has a corrupt " 2697 "redzone signature\n", addr); 2698 goto corrupt; 2699 } 2700 2701 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) { 2702 if (!besilent) 2703 mdb_printf("buffer %p (allocated) has a " 2704 "corrupt buftag\n", addr); 2705 goto corrupt; 2706 } 2707 2708 return (WALK_NEXT); 2709 corrupt: 2710 kmv->kmv_corruption++; 2711 return (WALK_NEXT); 2712 } 2713 2714 /*ARGSUSED2*/ 2715 int 2716 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 2717 { 2718 if (flags & DCMD_ADDRSPEC) { 2719 int check_alloc = 0, check_free = 0; 2720 kmem_verify_t kmv; 2721 2722 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache), 2723 addr) == -1) { 2724 mdb_warn("couldn't read kmem_cache %p", addr); 2725 return (DCMD_ERR); 2726 } 2727 2728 kmv.kmv_size = kmv.kmv_cache.cache_buftag + 2729 sizeof (kmem_buftag_t); 2730 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC); 2731 kmv.kmv_corruption = 0; 2732 2733 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) { 2734 check_alloc = 1; 2735 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF) 2736 check_free = 1; 2737 } else { 2738 if (!(flags & DCMD_LOOP)) { 2739 mdb_warn("cache %p (%s) does not have " 2740 "redzone checking enabled\n", addr, 2741 kmv.kmv_cache.cache_name); 2742 } 2743 return (DCMD_ERR); 2744 } 2745 2746 if (flags & DCMD_LOOP) { 2747 /* 2748 * table mode, don't print out every corrupt buffer 2749 */ 2750 kmv.kmv_besilent = 1; 2751 } else { 2752 mdb_printf("Summary for cache '%s'\n", 2753 kmv.kmv_cache.cache_name); 2754 mdb_inc_indent(2); 2755 kmv.kmv_besilent = 0; 2756 } 2757 2758 if (check_alloc) 2759 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr); 2760 if (check_free) 2761 (void) mdb_pwalk("freemem", verify_free, &kmv, addr); 2762 2763 if (flags & DCMD_LOOP) { 2764 if (kmv.kmv_corruption == 0) { 2765 mdb_printf("%-*s %?p clean\n", 2766 KMEM_CACHE_NAMELEN, 2767 kmv.kmv_cache.cache_name, addr); 2768 } else { 2769 char *s = ""; /* optional s in "buffer[s]" */ 2770 if (kmv.kmv_corruption > 1) 2771 s = "s"; 2772 2773 mdb_printf("%-*s %?p %d corrupt buffer%s\n", 2774 KMEM_CACHE_NAMELEN, 2775 kmv.kmv_cache.cache_name, addr, 2776 kmv.kmv_corruption, s); 2777 } 2778 } else { 2779 /* 2780 * This is the more verbose mode, when the user has 2781 * type addr::kmem_verify. If the cache was clean, 2782 * nothing will have yet been printed. So say something. 2783 */ 2784 if (kmv.kmv_corruption == 0) 2785 mdb_printf("clean\n"); 2786 2787 mdb_dec_indent(2); 2788 } 2789 } else { 2790 /* 2791 * If the user didn't specify a cache to verify, we'll walk all 2792 * kmem_cache's, specifying ourself as a callback for each... 2793 * this is the equivalent of '::walk kmem_cache .::kmem_verify' 2794 */ 2795 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN, 2796 "Cache Name", "Addr", "Cache Integrity"); 2797 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL)); 2798 } 2799 2800 return (DCMD_OK); 2801 } 2802 2803 typedef struct vmem_node { 2804 struct vmem_node *vn_next; 2805 struct vmem_node *vn_parent; 2806 struct vmem_node *vn_sibling; 2807 struct vmem_node *vn_children; 2808 uintptr_t vn_addr; 2809 int vn_marked; 2810 vmem_t vn_vmem; 2811 } vmem_node_t; 2812 2813 typedef struct vmem_walk { 2814 vmem_node_t *vw_root; 2815 vmem_node_t *vw_current; 2816 } vmem_walk_t; 2817 2818 int 2819 vmem_walk_init(mdb_walk_state_t *wsp) 2820 { 2821 uintptr_t vaddr, paddr; 2822 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; 2823 vmem_walk_t *vw; 2824 2825 if (mdb_readvar(&vaddr, "vmem_list") == -1) { 2826 mdb_warn("couldn't read 'vmem_list'"); 2827 return (WALK_ERR); 2828 } 2829 2830 while (vaddr != NULL) { 2831 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); 2832 vp->vn_addr = vaddr; 2833 vp->vn_next = head; 2834 head = vp; 2835 2836 if (vaddr == wsp->walk_addr) 2837 current = vp; 2838 2839 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { 2840 mdb_warn("couldn't read vmem_t at %p", vaddr); 2841 goto err; 2842 } 2843 2844 vaddr = (uintptr_t)vp->vn_vmem.vm_next; 2845 } 2846 2847 for (vp = head; vp != NULL; vp = vp->vn_next) { 2848 2849 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { 2850 vp->vn_sibling = root; 2851 root = vp; 2852 continue; 2853 } 2854 2855 for (parent = head; parent != NULL; parent = parent->vn_next) { 2856 if (parent->vn_addr != paddr) 2857 continue; 2858 vp->vn_sibling = parent->vn_children; 2859 parent->vn_children = vp; 2860 vp->vn_parent = parent; 2861 break; 2862 } 2863 2864 if (parent == NULL) { 2865 mdb_warn("couldn't find %p's parent (%p)\n", 2866 vp->vn_addr, paddr); 2867 goto err; 2868 } 2869 } 2870 2871 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); 2872 vw->vw_root = root; 2873 2874 if (current != NULL) 2875 vw->vw_current = current; 2876 else 2877 vw->vw_current = root; 2878 2879 wsp->walk_data = vw; 2880 return (WALK_NEXT); 2881 err: 2882 for (vp = head; head != NULL; vp = head) { 2883 head = vp->vn_next; 2884 mdb_free(vp, sizeof (vmem_node_t)); 2885 } 2886 2887 return (WALK_ERR); 2888 } 2889 2890 int 2891 vmem_walk_step(mdb_walk_state_t *wsp) 2892 { 2893 vmem_walk_t *vw = wsp->walk_data; 2894 vmem_node_t *vp; 2895 int rval; 2896 2897 if ((vp = vw->vw_current) == NULL) 2898 return (WALK_DONE); 2899 2900 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 2901 2902 if (vp->vn_children != NULL) { 2903 vw->vw_current = vp->vn_children; 2904 return (rval); 2905 } 2906 2907 do { 2908 vw->vw_current = vp->vn_sibling; 2909 vp = vp->vn_parent; 2910 } while (vw->vw_current == NULL && vp != NULL); 2911 2912 return (rval); 2913 } 2914 2915 /* 2916 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all 2917 * children are visited before their parent. We perform the postfix walk 2918 * iteratively (rather than recursively) to allow mdb to regain control 2919 * after each callback. 2920 */ 2921 int 2922 vmem_postfix_walk_step(mdb_walk_state_t *wsp) 2923 { 2924 vmem_walk_t *vw = wsp->walk_data; 2925 vmem_node_t *vp = vw->vw_current; 2926 int rval; 2927 2928 /* 2929 * If this node is marked, then we know that we have already visited 2930 * all of its children. If the node has any siblings, they need to 2931 * be visited next; otherwise, we need to visit the parent. Note 2932 * that vp->vn_marked will only be zero on the first invocation of 2933 * the step function. 2934 */ 2935 if (vp->vn_marked) { 2936 if (vp->vn_sibling != NULL) 2937 vp = vp->vn_sibling; 2938 else if (vp->vn_parent != NULL) 2939 vp = vp->vn_parent; 2940 else { 2941 /* 2942 * We have neither a parent, nor a sibling, and we 2943 * have already been visited; we're done. 2944 */ 2945 return (WALK_DONE); 2946 } 2947 } 2948 2949 /* 2950 * Before we visit this node, visit its children. 2951 */ 2952 while (vp->vn_children != NULL && !vp->vn_children->vn_marked) 2953 vp = vp->vn_children; 2954 2955 vp->vn_marked = 1; 2956 vw->vw_current = vp; 2957 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); 2958 2959 return (rval); 2960 } 2961 2962 void 2963 vmem_walk_fini(mdb_walk_state_t *wsp) 2964 { 2965 vmem_walk_t *vw = wsp->walk_data; 2966 vmem_node_t *root = vw->vw_root; 2967 int done; 2968 2969 if (root == NULL) 2970 return; 2971 2972 if ((vw->vw_root = root->vn_children) != NULL) 2973 vmem_walk_fini(wsp); 2974 2975 vw->vw_root = root->vn_sibling; 2976 done = (root->vn_sibling == NULL && root->vn_parent == NULL); 2977 mdb_free(root, sizeof (vmem_node_t)); 2978 2979 if (done) { 2980 mdb_free(vw, sizeof (vmem_walk_t)); 2981 } else { 2982 vmem_walk_fini(wsp); 2983 } 2984 } 2985 2986 typedef struct vmem_seg_walk { 2987 uint8_t vsw_type; 2988 uintptr_t vsw_start; 2989 uintptr_t vsw_current; 2990 } vmem_seg_walk_t; 2991 2992 /*ARGSUSED*/ 2993 int 2994 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) 2995 { 2996 vmem_seg_walk_t *vsw; 2997 2998 if (wsp->walk_addr == NULL) { 2999 mdb_warn("vmem_%s does not support global walks\n", name); 3000 return (WALK_ERR); 3001 } 3002 3003 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); 3004 3005 vsw->vsw_type = type; 3006 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0); 3007 vsw->vsw_current = vsw->vsw_start; 3008 3009 return (WALK_NEXT); 3010 } 3011 3012 /* 3013 * vmem segments can't have type 0 (this should be added to vmem_impl.h). 3014 */ 3015 #define VMEM_NONE 0 3016 3017 int 3018 vmem_alloc_walk_init(mdb_walk_state_t *wsp) 3019 { 3020 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); 3021 } 3022 3023 int 3024 vmem_free_walk_init(mdb_walk_state_t *wsp) 3025 { 3026 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); 3027 } 3028 3029 int 3030 vmem_span_walk_init(mdb_walk_state_t *wsp) 3031 { 3032 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); 3033 } 3034 3035 int 3036 vmem_seg_walk_init(mdb_walk_state_t *wsp) 3037 { 3038 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); 3039 } 3040 3041 int 3042 vmem_seg_walk_step(mdb_walk_state_t *wsp) 3043 { 3044 vmem_seg_t seg; 3045 vmem_seg_walk_t *vsw = wsp->walk_data; 3046 uintptr_t addr = vsw->vsw_current; 3047 static size_t seg_size = 0; 3048 int rval; 3049 3050 if (!seg_size) { 3051 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) { 3052 mdb_warn("failed to read 'vmem_seg_size'"); 3053 seg_size = sizeof (vmem_seg_t); 3054 } 3055 } 3056 3057 if (seg_size < sizeof (seg)) 3058 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); 3059 3060 if (mdb_vread(&seg, seg_size, addr) == -1) { 3061 mdb_warn("couldn't read vmem_seg at %p", addr); 3062 return (WALK_ERR); 3063 } 3064 3065 vsw->vsw_current = (uintptr_t)seg.vs_anext; 3066 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { 3067 rval = WALK_NEXT; 3068 } else { 3069 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); 3070 } 3071 3072 if (vsw->vsw_current == vsw->vsw_start) 3073 return (WALK_DONE); 3074 3075 return (rval); 3076 } 3077 3078 void 3079 vmem_seg_walk_fini(mdb_walk_state_t *wsp) 3080 { 3081 vmem_seg_walk_t *vsw = wsp->walk_data; 3082 3083 mdb_free(vsw, sizeof (vmem_seg_walk_t)); 3084 } 3085 3086 #define VMEM_NAMEWIDTH 22 3087 3088 int 3089 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3090 { 3091 vmem_t v, parent; 3092 vmem_kstat_t *vkp = &v.vm_kstat; 3093 uintptr_t paddr; 3094 int ident = 0; 3095 char c[VMEM_NAMEWIDTH]; 3096 3097 if (!(flags & DCMD_ADDRSPEC)) { 3098 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { 3099 mdb_warn("can't walk vmem"); 3100 return (DCMD_ERR); 3101 } 3102 return (DCMD_OK); 3103 } 3104 3105 if (DCMD_HDRSPEC(flags)) 3106 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", 3107 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", 3108 "TOTAL", "SUCCEED", "FAIL"); 3109 3110 if (mdb_vread(&v, sizeof (v), addr) == -1) { 3111 mdb_warn("couldn't read vmem at %p", addr); 3112 return (DCMD_ERR); 3113 } 3114 3115 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { 3116 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { 3117 mdb_warn("couldn't trace %p's ancestry", addr); 3118 ident = 0; 3119 break; 3120 } 3121 paddr = (uintptr_t)parent.vm_source; 3122 } 3123 3124 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); 3125 3126 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", 3127 addr, VMEM_NAMEWIDTH, c, 3128 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64, 3129 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64); 3130 3131 return (DCMD_OK); 3132 } 3133 3134 void 3135 vmem_seg_help(void) 3136 { 3137 mdb_printf("%s\n", 3138 "Display the contents of vmem_seg_ts, with optional filtering.\n" 3139 "\n" 3140 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" 3141 "representing a single chunk of data. Only ALLOC segments have debugging\n" 3142 "information.\n"); 3143 mdb_dec_indent(2); 3144 mdb_printf("%<b>OPTIONS%</b>\n"); 3145 mdb_inc_indent(2); 3146 mdb_printf("%s", 3147 " -v Display the full content of the vmem_seg, including its stack trace\n" 3148 " -s report the size of the segment, instead of the end address\n" 3149 " -c caller\n" 3150 " filter out segments without the function/PC in their stack trace\n" 3151 " -e earliest\n" 3152 " filter out segments timestamped before earliest\n" 3153 " -l latest\n" 3154 " filter out segments timestamped after latest\n" 3155 " -m minsize\n" 3156 " filer out segments smaller than minsize\n" 3157 " -M maxsize\n" 3158 " filer out segments larger than maxsize\n" 3159 " -t thread\n" 3160 " filter out segments not involving thread\n" 3161 " -T type\n" 3162 " filter out segments not of type 'type'\n" 3163 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); 3164 } 3165 3166 /*ARGSUSED*/ 3167 int 3168 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3169 { 3170 vmem_seg_t vs; 3171 pc_t *stk = vs.vs_stack; 3172 uintptr_t sz; 3173 uint8_t t; 3174 const char *type = NULL; 3175 GElf_Sym sym; 3176 char c[MDB_SYM_NAMLEN]; 3177 int no_debug; 3178 int i; 3179 int depth; 3180 uintptr_t laddr, haddr; 3181 3182 uintptr_t caller = NULL, thread = NULL; 3183 uintptr_t minsize = 0, maxsize = 0; 3184 3185 hrtime_t earliest = 0, latest = 0; 3186 3187 uint_t size = 0; 3188 uint_t verbose = 0; 3189 3190 if (!(flags & DCMD_ADDRSPEC)) 3191 return (DCMD_USAGE); 3192 3193 if (mdb_getopts(argc, argv, 3194 'c', MDB_OPT_UINTPTR, &caller, 3195 'e', MDB_OPT_UINT64, &earliest, 3196 'l', MDB_OPT_UINT64, &latest, 3197 's', MDB_OPT_SETBITS, TRUE, &size, 3198 'm', MDB_OPT_UINTPTR, &minsize, 3199 'M', MDB_OPT_UINTPTR, &maxsize, 3200 't', MDB_OPT_UINTPTR, &thread, 3201 'T', MDB_OPT_STR, &type, 3202 'v', MDB_OPT_SETBITS, TRUE, &verbose, 3203 NULL) != argc) 3204 return (DCMD_USAGE); 3205 3206 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 3207 if (verbose) { 3208 mdb_printf("%16s %4s %16s %16s %16s\n" 3209 "%<u>%16s %4s %16s %16s %16s%</u>\n", 3210 "ADDR", "TYPE", "START", "END", "SIZE", 3211 "", "", "THREAD", "TIMESTAMP", ""); 3212 } else { 3213 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", 3214 "START", size? "SIZE" : "END", "WHO"); 3215 } 3216 } 3217 3218 if (mdb_vread(&vs, sizeof (vs), addr) == -1) { 3219 mdb_warn("couldn't read vmem_seg at %p", addr); 3220 return (DCMD_ERR); 3221 } 3222 3223 if (type != NULL) { 3224 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) 3225 t = VMEM_ALLOC; 3226 else if (strcmp(type, "FREE") == 0) 3227 t = VMEM_FREE; 3228 else if (strcmp(type, "SPAN") == 0) 3229 t = VMEM_SPAN; 3230 else if (strcmp(type, "ROTR") == 0 || 3231 strcmp(type, "ROTOR") == 0) 3232 t = VMEM_ROTOR; 3233 else if (strcmp(type, "WLKR") == 0 || 3234 strcmp(type, "WALKER") == 0) 3235 t = VMEM_WALKER; 3236 else { 3237 mdb_warn("\"%s\" is not a recognized vmem_seg type\n", 3238 type); 3239 return (DCMD_ERR); 3240 } 3241 3242 if (vs.vs_type != t) 3243 return (DCMD_OK); 3244 } 3245 3246 sz = vs.vs_end - vs.vs_start; 3247 3248 if (minsize != 0 && sz < minsize) 3249 return (DCMD_OK); 3250 3251 if (maxsize != 0 && sz > maxsize) 3252 return (DCMD_OK); 3253 3254 t = vs.vs_type; 3255 depth = vs.vs_depth; 3256 3257 /* 3258 * debug info, when present, is only accurate for VMEM_ALLOC segments 3259 */ 3260 no_debug = (t != VMEM_ALLOC) || 3261 (depth == 0 || depth > VMEM_STACK_DEPTH); 3262 3263 if (no_debug) { 3264 if (caller != NULL || thread != NULL || earliest != 0 || 3265 latest != 0) 3266 return (DCMD_OK); /* not enough info */ 3267 } else { 3268 if (caller != NULL) { 3269 laddr = caller; 3270 haddr = caller + sizeof (caller); 3271 3272 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, 3273 sizeof (c), &sym) != -1 && 3274 caller == (uintptr_t)sym.st_value) { 3275 /* 3276 * We were provided an exact symbol value; any 3277 * address in the function is valid. 3278 */ 3279 laddr = (uintptr_t)sym.st_value; 3280 haddr = (uintptr_t)sym.st_value + sym.st_size; 3281 } 3282 3283 for (i = 0; i < depth; i++) 3284 if (vs.vs_stack[i] >= laddr && 3285 vs.vs_stack[i] < haddr) 3286 break; 3287 3288 if (i == depth) 3289 return (DCMD_OK); 3290 } 3291 3292 if (thread != NULL && (uintptr_t)vs.vs_thread != thread) 3293 return (DCMD_OK); 3294 3295 if (earliest != 0 && vs.vs_timestamp < earliest) 3296 return (DCMD_OK); 3297 3298 if (latest != 0 && vs.vs_timestamp > latest) 3299 return (DCMD_OK); 3300 } 3301 3302 type = (t == VMEM_ALLOC ? "ALLC" : 3303 t == VMEM_FREE ? "FREE" : 3304 t == VMEM_SPAN ? "SPAN" : 3305 t == VMEM_ROTOR ? "ROTR" : 3306 t == VMEM_WALKER ? "WLKR" : 3307 "????"); 3308 3309 if (flags & DCMD_PIPE_OUT) { 3310 mdb_printf("%#lr\n", addr); 3311 return (DCMD_OK); 3312 } 3313 3314 if (verbose) { 3315 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n", 3316 addr, type, vs.vs_start, vs.vs_end, sz); 3317 3318 if (no_debug) 3319 return (DCMD_OK); 3320 3321 mdb_printf("%16s %4s %16p %16llx\n", 3322 "", "", vs.vs_thread, vs.vs_timestamp); 3323 3324 mdb_inc_indent(17); 3325 for (i = 0; i < depth; i++) { 3326 mdb_printf("%a\n", stk[i]); 3327 } 3328 mdb_dec_indent(17); 3329 mdb_printf("\n"); 3330 } else { 3331 mdb_printf("%0?p %4s %0?p %0?p", addr, type, 3332 vs.vs_start, size? sz : vs.vs_end); 3333 3334 if (no_debug) { 3335 mdb_printf("\n"); 3336 return (DCMD_OK); 3337 } 3338 3339 for (i = 0; i < depth; i++) { 3340 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, 3341 c, sizeof (c), &sym) == -1) 3342 continue; 3343 if (strncmp(c, "vmem_", 5) == 0) 3344 continue; 3345 break; 3346 } 3347 mdb_printf(" %a\n", stk[i]); 3348 } 3349 return (DCMD_OK); 3350 } 3351 3352 typedef struct kmalog_data { 3353 uintptr_t kma_addr; 3354 hrtime_t kma_newest; 3355 } kmalog_data_t; 3356 3357 /*ARGSUSED*/ 3358 static int 3359 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma) 3360 { 3361 char name[KMEM_CACHE_NAMELEN + 1]; 3362 hrtime_t delta; 3363 int i, depth; 3364 size_t bufsize; 3365 3366 if (bcp->bc_timestamp == 0) 3367 return (WALK_DONE); 3368 3369 if (kma->kma_newest == 0) 3370 kma->kma_newest = bcp->bc_timestamp; 3371 3372 if (kma->kma_addr) { 3373 if (mdb_vread(&bufsize, sizeof (bufsize), 3374 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) { 3375 mdb_warn( 3376 "failed to read cache_bufsize for cache at %p", 3377 bcp->bc_cache); 3378 return (WALK_ERR); 3379 } 3380 3381 if (kma->kma_addr < (uintptr_t)bcp->bc_addr || 3382 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize) 3383 return (WALK_NEXT); 3384 } 3385 3386 delta = kma->kma_newest - bcp->bc_timestamp; 3387 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3388 3389 if (mdb_readstr(name, sizeof (name), (uintptr_t) 3390 &bcp->bc_cache->cache_name) <= 0) 3391 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); 3392 3393 mdb_printf("\nT-%lld.%09lld addr=%p %s\n", 3394 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); 3395 3396 for (i = 0; i < depth; i++) 3397 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3398 3399 return (WALK_NEXT); 3400 } 3401 3402 int 3403 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3404 { 3405 const char *logname = "kmem_transaction_log"; 3406 kmalog_data_t kma; 3407 3408 if (argc > 1) 3409 return (DCMD_USAGE); 3410 3411 kma.kma_newest = 0; 3412 if (flags & DCMD_ADDRSPEC) 3413 kma.kma_addr = addr; 3414 else 3415 kma.kma_addr = NULL; 3416 3417 if (argc > 0) { 3418 if (argv->a_type != MDB_TYPE_STRING) 3419 return (DCMD_USAGE); 3420 if (strcmp(argv->a_un.a_str, "fail") == 0) 3421 logname = "kmem_failure_log"; 3422 else if (strcmp(argv->a_un.a_str, "slab") == 0) 3423 logname = "kmem_slab_log"; 3424 else 3425 return (DCMD_USAGE); 3426 } 3427 3428 if (mdb_readvar(&addr, logname) == -1) { 3429 mdb_warn("failed to read %s log header pointer"); 3430 return (DCMD_ERR); 3431 } 3432 3433 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) { 3434 mdb_warn("failed to walk kmem log"); 3435 return (DCMD_ERR); 3436 } 3437 3438 return (DCMD_OK); 3439 } 3440 3441 /* 3442 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here. 3443 * The first piece is a structure which we use to accumulate kmem_cache_t 3444 * addresses of interest. The kmc_add is used as a callback for the kmem_cache 3445 * walker; we either add all caches, or ones named explicitly as arguments. 3446 */ 3447 3448 typedef struct kmclist { 3449 const char *kmc_name; /* Name to match (or NULL) */ 3450 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */ 3451 int kmc_nelems; /* Num entries in kmc_caches */ 3452 int kmc_size; /* Size of kmc_caches array */ 3453 } kmclist_t; 3454 3455 static int 3456 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc) 3457 { 3458 void *p; 3459 int s; 3460 3461 if (kmc->kmc_name == NULL || 3462 strcmp(cp->cache_name, kmc->kmc_name) == 0) { 3463 /* 3464 * If we have a match, grow our array (if necessary), and then 3465 * add the virtual address of the matching cache to our list. 3466 */ 3467 if (kmc->kmc_nelems >= kmc->kmc_size) { 3468 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256; 3469 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); 3470 3471 bcopy(kmc->kmc_caches, p, 3472 sizeof (uintptr_t) * kmc->kmc_size); 3473 3474 kmc->kmc_caches = p; 3475 kmc->kmc_size = s; 3476 } 3477 3478 kmc->kmc_caches[kmc->kmc_nelems++] = addr; 3479 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT); 3480 } 3481 3482 return (WALK_NEXT); 3483 } 3484 3485 /* 3486 * The second piece of ::kmausers is a hash table of allocations. Each 3487 * allocation owner is identified by its stack trace and data_size. We then 3488 * track the total bytes of all such allocations, and the number of allocations 3489 * to report at the end. Once we have a list of caches, we walk through the 3490 * allocated bufctls of each, and update our hash table accordingly. 3491 */ 3492 3493 typedef struct kmowner { 3494 struct kmowner *kmo_head; /* First hash elt in bucket */ 3495 struct kmowner *kmo_next; /* Next hash elt in chain */ 3496 size_t kmo_signature; /* Hash table signature */ 3497 uint_t kmo_num; /* Number of allocations */ 3498 size_t kmo_data_size; /* Size of each allocation */ 3499 size_t kmo_total_size; /* Total bytes of allocation */ 3500 int kmo_depth; /* Depth of stack trace */ 3501 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */ 3502 } kmowner_t; 3503 3504 typedef struct kmusers { 3505 uintptr_t kmu_addr; /* address of interest */ 3506 const kmem_cache_t *kmu_cache; /* Current kmem cache */ 3507 kmowner_t *kmu_hash; /* Hash table of owners */ 3508 int kmu_nelems; /* Number of entries in use */ 3509 int kmu_size; /* Total number of entries */ 3510 } kmusers_t; 3511 3512 static void 3513 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp, 3514 size_t size, size_t data_size) 3515 { 3516 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3517 size_t bucket, signature = data_size; 3518 kmowner_t *kmo, *kmoend; 3519 3520 /* 3521 * If the hash table is full, double its size and rehash everything. 3522 */ 3523 if (kmu->kmu_nelems >= kmu->kmu_size) { 3524 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024; 3525 3526 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC); 3527 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size); 3528 kmu->kmu_hash = kmo; 3529 kmu->kmu_size = s; 3530 3531 kmoend = kmu->kmu_hash + kmu->kmu_size; 3532 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) 3533 kmo->kmo_head = NULL; 3534 3535 kmoend = kmu->kmu_hash + kmu->kmu_nelems; 3536 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) { 3537 bucket = kmo->kmo_signature & (kmu->kmu_size - 1); 3538 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 3539 kmu->kmu_hash[bucket].kmo_head = kmo; 3540 } 3541 } 3542 3543 /* 3544 * Finish computing the hash signature from the stack trace, and then 3545 * see if the owner is in the hash table. If so, update our stats. 3546 */ 3547 for (i = 0; i < depth; i++) 3548 signature += bcp->bc_stack[i]; 3549 3550 bucket = signature & (kmu->kmu_size - 1); 3551 3552 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) { 3553 if (kmo->kmo_signature == signature) { 3554 size_t difference = 0; 3555 3556 difference |= kmo->kmo_data_size - data_size; 3557 difference |= kmo->kmo_depth - depth; 3558 3559 for (i = 0; i < depth; i++) { 3560 difference |= kmo->kmo_stack[i] - 3561 bcp->bc_stack[i]; 3562 } 3563 3564 if (difference == 0) { 3565 kmo->kmo_total_size += size; 3566 kmo->kmo_num++; 3567 return; 3568 } 3569 } 3570 } 3571 3572 /* 3573 * If the owner is not yet hashed, grab the next element and fill it 3574 * in based on the allocation information. 3575 */ 3576 kmo = &kmu->kmu_hash[kmu->kmu_nelems++]; 3577 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head; 3578 kmu->kmu_hash[bucket].kmo_head = kmo; 3579 3580 kmo->kmo_signature = signature; 3581 kmo->kmo_num = 1; 3582 kmo->kmo_data_size = data_size; 3583 kmo->kmo_total_size = size; 3584 kmo->kmo_depth = depth; 3585 3586 for (i = 0; i < depth; i++) 3587 kmo->kmo_stack[i] = bcp->bc_stack[i]; 3588 } 3589 3590 /* 3591 * When ::kmausers is invoked without the -f flag, we simply update our hash 3592 * table with the information from each allocated bufctl. 3593 */ 3594 /*ARGSUSED*/ 3595 static int 3596 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 3597 { 3598 const kmem_cache_t *cp = kmu->kmu_cache; 3599 3600 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3601 return (WALK_NEXT); 3602 } 3603 3604 /* 3605 * When ::kmausers is invoked with the -f flag, we print out the information 3606 * for each bufctl as well as updating the hash table. 3607 */ 3608 static int 3609 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu) 3610 { 3611 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH); 3612 const kmem_cache_t *cp = kmu->kmu_cache; 3613 kmem_bufctl_t bufctl; 3614 3615 if (kmu->kmu_addr) { 3616 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1) 3617 mdb_warn("couldn't read bufctl at %p", addr); 3618 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr || 3619 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr + 3620 cp->cache_bufsize) 3621 return (WALK_NEXT); 3622 } 3623 3624 mdb_printf("size %d, addr %p, thread %p, cache %s\n", 3625 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); 3626 3627 for (i = 0; i < depth; i++) 3628 mdb_printf("\t %a\n", bcp->bc_stack[i]); 3629 3630 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize); 3631 return (WALK_NEXT); 3632 } 3633 3634 /* 3635 * We sort our results by allocation size before printing them. 3636 */ 3637 static int 3638 kmownercmp(const void *lp, const void *rp) 3639 { 3640 const kmowner_t *lhs = lp; 3641 const kmowner_t *rhs = rp; 3642 3643 return (rhs->kmo_total_size - lhs->kmo_total_size); 3644 } 3645 3646 /* 3647 * The main engine of ::kmausers is relatively straightforward: First we 3648 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we 3649 * iterate over the allocated bufctls of each cache in the list. Finally, 3650 * we sort and print our results. 3651 */ 3652 /*ARGSUSED*/ 3653 int 3654 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3655 { 3656 int mem_threshold = 8192; /* Minimum # bytes for printing */ 3657 int cnt_threshold = 100; /* Minimum # blocks for printing */ 3658 int audited_caches = 0; /* Number of KMF_AUDIT caches found */ 3659 int do_all_caches = 1; /* Do all caches (no arguments) */ 3660 int opt_e = FALSE; /* Include "small" users */ 3661 int opt_f = FALSE; /* Print stack traces */ 3662 3663 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1; 3664 kmowner_t *kmo, *kmoend; 3665 int i, oelems; 3666 3667 kmclist_t kmc; 3668 kmusers_t kmu; 3669 3670 bzero(&kmc, sizeof (kmc)); 3671 bzero(&kmu, sizeof (kmu)); 3672 3673 while ((i = mdb_getopts(argc, argv, 3674 'e', MDB_OPT_SETBITS, TRUE, &opt_e, 3675 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { 3676 3677 argv += i; /* skip past options we just processed */ 3678 argc -= i; /* adjust argc */ 3679 3680 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') 3681 return (DCMD_USAGE); 3682 3683 oelems = kmc.kmc_nelems; 3684 kmc.kmc_name = argv->a_un.a_str; 3685 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 3686 3687 if (kmc.kmc_nelems == oelems) { 3688 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name); 3689 return (DCMD_ERR); 3690 } 3691 3692 do_all_caches = 0; 3693 argv++; 3694 argc--; 3695 } 3696 3697 if (flags & DCMD_ADDRSPEC) { 3698 opt_f = TRUE; 3699 kmu.kmu_addr = addr; 3700 } else { 3701 kmu.kmu_addr = NULL; 3702 } 3703 3704 if (opt_e) 3705 mem_threshold = cnt_threshold = 0; 3706 3707 if (opt_f) 3708 callback = (mdb_walk_cb_t)kmause2; 3709 3710 if (do_all_caches) { 3711 kmc.kmc_name = NULL; /* match all cache names */ 3712 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc); 3713 } 3714 3715 for (i = 0; i < kmc.kmc_nelems; i++) { 3716 uintptr_t cp = kmc.kmc_caches[i]; 3717 kmem_cache_t c; 3718 3719 if (mdb_vread(&c, sizeof (c), cp) == -1) { 3720 mdb_warn("failed to read cache at %p", cp); 3721 continue; 3722 } 3723 3724 if (!(c.cache_flags & KMF_AUDIT)) { 3725 if (!do_all_caches) { 3726 mdb_warn("KMF_AUDIT is not enabled for %s\n", 3727 c.cache_name); 3728 } 3729 continue; 3730 } 3731 3732 kmu.kmu_cache = &c; 3733 (void) mdb_pwalk("bufctl", callback, &kmu, cp); 3734 audited_caches++; 3735 } 3736 3737 if (audited_caches == 0 && do_all_caches) { 3738 mdb_warn("KMF_AUDIT is not enabled for any caches\n"); 3739 return (DCMD_ERR); 3740 } 3741 3742 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp); 3743 kmoend = kmu.kmu_hash + kmu.kmu_nelems; 3744 3745 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) { 3746 if (kmo->kmo_total_size < mem_threshold && 3747 kmo->kmo_num < cnt_threshold) 3748 continue; 3749 mdb_printf("%lu bytes for %u allocations with data size %lu:\n", 3750 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size); 3751 for (i = 0; i < kmo->kmo_depth; i++) 3752 mdb_printf("\t %a\n", kmo->kmo_stack[i]); 3753 } 3754 3755 return (DCMD_OK); 3756 } 3757 3758 void 3759 kmausers_help(void) 3760 { 3761 mdb_printf( 3762 "Displays the largest users of the kmem allocator, sorted by \n" 3763 "trace. If one or more caches is specified, only those caches\n" 3764 "will be searched. By default, all caches are searched. If an\n" 3765 "address is specified, then only those allocations which include\n" 3766 "the given address are displayed. Specifying an address implies\n" 3767 "-f.\n" 3768 "\n" 3769 "\t-e\tInclude all users, not just the largest\n" 3770 "\t-f\tDisplay individual allocations. By default, users are\n" 3771 "\t\tgrouped by stack\n"); 3772 } 3773 3774 static int 3775 kmem_ready_check(void) 3776 { 3777 int ready; 3778 3779 if (mdb_readvar(&ready, "kmem_ready") < 0) 3780 return (-1); /* errno is set for us */ 3781 3782 return (ready); 3783 } 3784 3785 /*ARGSUSED*/ 3786 static void 3787 kmem_ready_cb(void *arg) 3788 { 3789 if (kmem_ready_check() <= 0) 3790 return; 3791 3792 if (kmem_ready_cbhdl != NULL) { 3793 mdb_callback_remove(kmem_ready_cbhdl); 3794 kmem_ready_cbhdl = NULL; 3795 } 3796 3797 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL); 3798 } 3799 3800 void 3801 kmem_init(void) 3802 { 3803 mdb_walker_t w = { 3804 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init, 3805 kmem_cache_walk_step, kmem_cache_walk_fini 3806 }; 3807 3808 /* 3809 * If kmem is ready, we'll need to invoke the kmem_cache walker 3810 * immediately. Walkers in the linkage structure won't be ready until 3811 * _mdb_init returns, so we'll need to add this one manually. If kmem 3812 * is ready, we'll use the walker to initialize the caches. If kmem 3813 * isn't ready, we'll register a callback that will allow us to defer 3814 * cache walking until it is. 3815 */ 3816 if (mdb_add_walker(&w) != 0) { 3817 mdb_warn("failed to add kmem_cache walker"); 3818 return; 3819 } 3820 3821 if (kmem_ready_check() > 0) { 3822 kmem_ready_cb(NULL); 3823 } else { 3824 kmem_ready_cbhdl = mdb_callback_add(MDB_CALLBACK_STCHG, 3825 kmem_ready_cb, NULL); 3826 } 3827 } 3828 3829 typedef struct whatthread { 3830 uintptr_t wt_target; 3831 int wt_verbose; 3832 } whatthread_t; 3833 3834 static int 3835 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w) 3836 { 3837 uintptr_t current, data; 3838 3839 if (t->t_stkbase == NULL) 3840 return (WALK_NEXT); 3841 3842 /* 3843 * Warn about swapped out threads, but drive on anyway 3844 */ 3845 if (!(t->t_schedflag & TS_LOAD)) { 3846 mdb_warn("thread %p's stack swapped out\n", addr); 3847 return (WALK_NEXT); 3848 } 3849 3850 /* 3851 * Search the thread's stack for the given pointer. Note that it would 3852 * be more efficient to follow ::kgrep's lead and read in page-sized 3853 * chunks, but this routine is already fast and simple. 3854 */ 3855 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk; 3856 current += sizeof (uintptr_t)) { 3857 if (mdb_vread(&data, sizeof (data), current) == -1) { 3858 mdb_warn("couldn't read thread %p's stack at %p", 3859 addr, current); 3860 return (WALK_ERR); 3861 } 3862 3863 if (data == w->wt_target) { 3864 if (w->wt_verbose) { 3865 mdb_printf("%p in thread %p's stack%s\n", 3866 current, addr, stack_active(t, current)); 3867 } else { 3868 mdb_printf("%#lr\n", addr); 3869 return (WALK_NEXT); 3870 } 3871 } 3872 } 3873 3874 return (WALK_NEXT); 3875 } 3876 3877 int 3878 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 3879 { 3880 whatthread_t w; 3881 3882 if (!(flags & DCMD_ADDRSPEC)) 3883 return (DCMD_USAGE); 3884 3885 w.wt_verbose = FALSE; 3886 w.wt_target = addr; 3887 3888 if (mdb_getopts(argc, argv, 3889 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc) 3890 return (DCMD_USAGE); 3891 3892 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w) 3893 == -1) { 3894 mdb_warn("couldn't walk threads"); 3895 return (DCMD_ERR); 3896 } 3897 3898 return (DCMD_OK); 3899 } 3900