1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_modapi.h> 27 #include <mdb/mdb_ctf.h> 28 29 #include <sys/types.h> 30 #include <sys/regset.h> 31 #include <sys/stack.h> 32 #include <sys/thread.h> 33 #include <sys/modctl.h> 34 35 #include "findstack.h" 36 #include "thread.h" 37 #include "sobj.h" 38 39 typedef struct findstack_info { 40 uintptr_t *fsi_stack; /* place to record frames */ 41 42 uintptr_t fsi_sp; /* stack pointer */ 43 uintptr_t fsi_pc; /* pc */ 44 uintptr_t fsi_sobj_ops; /* sobj_ops */ 45 46 uint_t fsi_tstate; /* t_state */ 47 48 uchar_t fsi_depth; /* stack depth */ 49 uchar_t fsi_failed; /* search failed */ 50 uchar_t fsi_overflow; /* stack was deeper than max_depth */ 51 uchar_t fsi_panic; /* thread called panic() */ 52 53 uchar_t fsi_max_depth; /* stack frames available */ 54 } findstack_info_t; 55 #define FSI_FAIL_BADTHREAD 1 56 #define FSI_FAIL_NOTINMEMORY 2 57 #define FSI_FAIL_THREADCORRUPT 3 58 #define FSI_FAIL_STACKNOTFOUND 4 59 60 #ifndef STACK_BIAS 61 #define STACK_BIAS 0 62 #endif 63 64 #define fs_dprintf(x) \ 65 if (findstack_debug_on) { \ 66 mdb_printf("findstack debug: "); \ 67 /*CSTYLED*/ \ 68 mdb_printf x ; \ 69 } 70 71 static int findstack_debug_on = 0; 72 73 #if defined(__i386) || defined(__amd64) 74 struct rwindow { 75 uintptr_t rw_fp; 76 uintptr_t rw_rtn; 77 }; 78 #endif 79 80 #define TOO_BIG_FOR_A_STACK (1024 * 1024) 81 82 #define KTOU(p) ((p) - kbase + ubase) 83 #define UTOK(p) ((p) - ubase + kbase) 84 85 #define CRAWL_FOUNDALL (-1) 86 87 /* 88 * Given a stack pointer, try to crawl down it to the bottom. 89 * "frame" is a VA in MDB's address space. 90 * 91 * Returns the number of frames successfully crawled down, or 92 * CRAWL_FOUNDALL if it got to the bottom of the stack. 93 */ 94 static int 95 crawl(uintptr_t frame, uintptr_t kbase, uintptr_t ktop, uintptr_t ubase, 96 int kill_fp, findstack_info_t *fsip) 97 { 98 int levels = 0; 99 100 fsip->fsi_depth = 0; 101 fsip->fsi_overflow = 0; 102 103 fs_dprintf(("<0> frame = %p, kbase = %p, ktop = %p, ubase = %p\n", 104 frame, kbase, ktop, ubase)); 105 for (;;) { 106 uintptr_t fp; 107 long *fpp = (long *)&((struct rwindow *)frame)->rw_fp; 108 109 fs_dprintf(("<1> fpp = %p, frame = %p\n", fpp, frame)); 110 111 if ((frame & (STACK_ALIGN - 1)) != 0) 112 break; 113 114 fp = ((struct rwindow *)frame)->rw_fp + STACK_BIAS; 115 if (fsip->fsi_depth < fsip->fsi_max_depth) 116 fsip->fsi_stack[fsip->fsi_depth++] = 117 ((struct rwindow *)frame)->rw_rtn; 118 else 119 fsip->fsi_overflow = 1; 120 121 fs_dprintf(("<2> fp = %p\n", fp)); 122 123 if (fp == ktop) 124 return (CRAWL_FOUNDALL); 125 fs_dprintf(("<3> not at base\n")); 126 127 #if defined(__i386) || defined(__amd64) 128 if (ktop - fp == sizeof (struct rwindow)) { 129 fs_dprintf(("<4> found base\n")); 130 return (CRAWL_FOUNDALL); 131 } 132 #endif 133 134 fs_dprintf(("<5> fp = %p, kbase = %p, ktop - size = %p\n", 135 fp, kbase, ktop - sizeof (struct rwindow))); 136 137 if (fp < kbase || fp >= (ktop - sizeof (struct rwindow))) 138 break; 139 140 frame = KTOU(fp); 141 fs_dprintf(("<6> frame = %p\n", frame)); 142 143 /* 144 * NULL out the old %fp so we don't go down this stack 145 * more than once. 146 */ 147 if (kill_fp) { 148 fs_dprintf(("<7> fpp = %p\n", fpp)); 149 *fpp = NULL; 150 } 151 152 fs_dprintf(("<8> levels = %d\n", levels)); 153 levels++; 154 } 155 156 return (levels); 157 } 158 159 /* 160 * "sp" is a kernel VA. 161 */ 162 static int 163 print_stack(uintptr_t sp, uintptr_t pc, uintptr_t addr, 164 int argc, const mdb_arg_t *argv, int free_state) 165 { 166 int showargs = 0, count, err; 167 168 count = mdb_getopts(argc, argv, 169 'v', MDB_OPT_SETBITS, TRUE, &showargs, NULL); 170 argc -= count; 171 argv += count; 172 173 if (argc > 1 || (argc == 1 && argv->a_type != MDB_TYPE_STRING)) 174 return (DCMD_USAGE); 175 176 mdb_printf("stack pointer for thread %p%s: %p\n", 177 addr, (free_state ? " (TS_FREE)" : ""), sp); 178 if (pc != 0) 179 mdb_printf("[ %0?lr %a() ]\n", sp, pc); 180 181 mdb_inc_indent(2); 182 mdb_set_dot(sp); 183 184 if (argc == 1) 185 err = mdb_eval(argv->a_un.a_str); 186 else if (showargs) 187 err = mdb_eval("<.$C"); 188 else 189 err = mdb_eval("<.$C0"); 190 191 mdb_dec_indent(2); 192 193 return ((err == -1) ? DCMD_ABORT : DCMD_OK); 194 } 195 196 /*ARGSUSED*/ 197 static int 198 do_findstack(uintptr_t addr, findstack_info_t *fsip, uint_t print_warnings) 199 { 200 kthread_t thr; 201 size_t stksz; 202 uintptr_t ubase, utop; 203 uintptr_t kbase, ktop; 204 uintptr_t win, sp; 205 206 fsip->fsi_failed = 0; 207 fsip->fsi_pc = 0; 208 fsip->fsi_sp = 0; 209 fsip->fsi_depth = 0; 210 fsip->fsi_overflow = 0; 211 212 bzero(&thr, sizeof (thr)); 213 if (mdb_ctf_vread(&thr, "kthread_t", addr, 214 MDB_CTF_VREAD_IGNORE_ALL) == -1) { 215 if (print_warnings) 216 mdb_warn("couldn't read thread at %p\n", addr); 217 fsip->fsi_failed = FSI_FAIL_BADTHREAD; 218 return (DCMD_ERR); 219 } 220 221 fsip->fsi_sobj_ops = (uintptr_t)thr.t_sobj_ops; 222 fsip->fsi_tstate = thr.t_state; 223 fsip->fsi_panic = !!(thr.t_flag & T_PANIC); 224 225 if ((thr.t_schedflag & TS_LOAD) == 0) { 226 if (print_warnings) 227 mdb_warn("thread %p isn't in memory\n", addr); 228 fsip->fsi_failed = FSI_FAIL_NOTINMEMORY; 229 return (DCMD_ERR); 230 } 231 232 if (thr.t_stk < thr.t_stkbase) { 233 if (print_warnings) 234 mdb_warn( 235 "stack base or stack top corrupt for thread %p\n", 236 addr); 237 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT; 238 return (DCMD_ERR); 239 } 240 241 kbase = (uintptr_t)thr.t_stkbase; 242 ktop = (uintptr_t)thr.t_stk; 243 stksz = ktop - kbase; 244 245 #ifdef __amd64 246 /* 247 * The stack on amd64 is intentionally misaligned, so ignore the top 248 * half-frame. See thread_stk_init(). When handling traps, the frame 249 * is automatically aligned by the hardware, so we only alter ktop if 250 * needed. 251 */ 252 if ((ktop & (STACK_ALIGN - 1)) != 0) 253 ktop -= STACK_ENTRY_ALIGN; 254 #endif 255 256 /* 257 * If the stack size is larger than a meg, assume that it's bogus. 258 */ 259 if (stksz > TOO_BIG_FOR_A_STACK) { 260 if (print_warnings) 261 mdb_warn("stack size for thread %p is too big to be " 262 "reasonable\n", addr); 263 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT; 264 return (DCMD_ERR); 265 } 266 267 /* 268 * This could be (and was) a UM_GC allocation. Unfortunately, 269 * stksz tends to be very large. As currently implemented, dcmds 270 * invoked as part of pipelines don't have their UM_GC-allocated 271 * memory freed until the pipeline completes. With stksz in the 272 * neighborhood of 20k, the popular ::walk thread |::findstack 273 * pipeline can easily run memory-constrained debuggers (kmdb) out 274 * of memory. This can be changed back to a gc-able allocation when 275 * the debugger is changed to free UM_GC memory more promptly. 276 */ 277 ubase = (uintptr_t)mdb_alloc(stksz, UM_SLEEP); 278 utop = ubase + stksz; 279 if (mdb_vread((caddr_t)ubase, stksz, kbase) != stksz) { 280 mdb_free((void *)ubase, stksz); 281 if (print_warnings) 282 mdb_warn("couldn't read entire stack for thread %p\n", 283 addr); 284 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT; 285 return (DCMD_ERR); 286 } 287 288 /* 289 * Try the saved %sp first, if it looks reasonable. 290 */ 291 sp = KTOU((uintptr_t)thr.t_sp + STACK_BIAS); 292 if (sp >= ubase && sp <= utop) { 293 if (crawl(sp, kbase, ktop, ubase, 0, fsip) == CRAWL_FOUNDALL) { 294 fsip->fsi_sp = (uintptr_t)thr.t_sp; 295 #if !defined(__i386) 296 fsip->fsi_pc = (uintptr_t)thr.t_pc; 297 #endif 298 goto found; 299 } 300 } 301 302 /* 303 * Now walk through the whole stack, starting at the base, 304 * trying every possible "window". 305 */ 306 for (win = ubase; 307 win + sizeof (struct rwindow) <= utop; 308 win += sizeof (struct rwindow *)) { 309 if (crawl(win, kbase, ktop, ubase, 1, fsip) == CRAWL_FOUNDALL) { 310 fsip->fsi_sp = UTOK(win) - STACK_BIAS; 311 goto found; 312 } 313 } 314 315 /* 316 * We didn't conclusively find the stack. So we'll take another lap, 317 * and print out anything that looks possible. 318 */ 319 if (print_warnings) 320 mdb_printf("Possible stack pointers for thread %p:\n", addr); 321 (void) mdb_vread((caddr_t)ubase, stksz, kbase); 322 323 for (win = ubase; 324 win + sizeof (struct rwindow) <= utop; 325 win += sizeof (struct rwindow *)) { 326 uintptr_t fp = ((struct rwindow *)win)->rw_fp; 327 int levels; 328 329 if ((levels = crawl(win, kbase, ktop, ubase, 1, fsip)) > 1) { 330 if (print_warnings) 331 mdb_printf(" %p (%d)\n", fp, levels); 332 } else if (levels == CRAWL_FOUNDALL) { 333 /* 334 * If this is a live system, the stack could change 335 * between the two mdb_vread(ubase, utop, kbase)'s, 336 * and we could have a fully valid stack here. 337 */ 338 fsip->fsi_sp = UTOK(win) - STACK_BIAS; 339 goto found; 340 } 341 } 342 343 fsip->fsi_depth = 0; 344 fsip->fsi_overflow = 0; 345 fsip->fsi_failed = FSI_FAIL_STACKNOTFOUND; 346 347 mdb_free((void *)ubase, stksz); 348 return (DCMD_ERR); 349 found: 350 mdb_free((void *)ubase, stksz); 351 return (DCMD_OK); 352 } 353 354 int 355 findstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 356 { 357 findstack_info_t fsi; 358 int retval; 359 360 if (!(flags & DCMD_ADDRSPEC)) 361 return (DCMD_USAGE); 362 363 bzero(&fsi, sizeof (fsi)); 364 365 if ((retval = do_findstack(addr, &fsi, 1)) != DCMD_OK || 366 fsi.fsi_failed) 367 return (retval); 368 369 return (print_stack(fsi.fsi_sp, fsi.fsi_pc, addr, 370 argc, argv, fsi.fsi_tstate == TS_FREE)); 371 } 372 373 /*ARGSUSED*/ 374 int 375 findstack_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *av) 376 { 377 findstack_debug_on ^= 1; 378 379 mdb_printf("findstack: debugging is now %s\n", 380 findstack_debug_on ? "on" : "off"); 381 382 return (DCMD_OK); 383 } 384 385 static void 386 uppercase(char *p) 387 { 388 for (; *p != '\0'; p++) { 389 if (*p >= 'a' && *p <= 'z') 390 *p += 'A' - 'a'; 391 } 392 } 393 394 static void 395 sobj_to_text(uintptr_t addr, char *out, size_t out_sz) 396 { 397 sobj_ops_to_text(addr, out, out_sz); 398 uppercase(out); 399 } 400 401 #define SOBJ_ALL 1 402 static int 403 text_to_sobj(const char *text, uintptr_t *out) 404 { 405 if (strcasecmp(text, "ALL") == 0) { 406 *out = SOBJ_ALL; 407 return (0); 408 } 409 return (sobj_text_to_ops(text, out)); 410 } 411 412 #define TSTATE_PANIC -2U 413 static int 414 text_to_tstate(const char *text, uint_t *out) 415 { 416 if (strcasecmp(text, "panic") == 0) 417 *out = TSTATE_PANIC; 418 else if (thread_text_to_state(text, out) != 0) { 419 mdb_warn("tstate \"%s\" not recognized\n", text); 420 return (-1); 421 } 422 return (0); 423 } 424 425 static void 426 tstate_to_text(uint_t tstate, uint_t paniced, char *out, size_t out_sz) 427 { 428 if (paniced) 429 mdb_snprintf(out, out_sz, "panic"); 430 else 431 thread_state_to_text(tstate, out, out_sz); 432 uppercase(out); 433 } 434 435 typedef struct stacks_entry { 436 struct stacks_entry *se_next; 437 struct stacks_entry *se_dup; /* dups of this stack */ 438 uintptr_t se_thread; 439 uintptr_t se_sp; 440 uintptr_t se_sobj_ops; 441 uint32_t se_tstate; 442 uint32_t se_count; /* # threads w/ this stack */ 443 uint8_t se_overflow; 444 uint8_t se_depth; 445 uint8_t se_failed; /* failure reason; FSI_FAIL_* */ 446 uint8_t se_panic; 447 uintptr_t se_stack[1]; 448 } stacks_entry_t; 449 #define STACKS_ENTRY_SIZE(x) OFFSETOF(stacks_entry_t, se_stack[(x)]) 450 451 #define STACKS_HSIZE 127 452 453 /* Maximum stack depth reported in stacks */ 454 #define STACKS_MAX_DEPTH 254 455 456 typedef struct stacks_info { 457 size_t si_count; /* total stacks_entry_ts (incl dups) */ 458 size_t si_entries; /* # entries in hash table */ 459 stacks_entry_t **si_hash; /* hash table */ 460 461 findstack_info_t si_fsi; /* transient callback state */ 462 } stacks_info_t; 463 464 465 /* global state cached between invocations */ 466 #define STACKS_STATE_CLEAN 0 467 #define STACKS_STATE_DIRTY 1 468 #define STACKS_STATE_DONE 2 469 static uint_t stacks_state = STACKS_STATE_CLEAN; 470 static stacks_entry_t **stacks_hash; 471 static stacks_entry_t **stacks_array; 472 static size_t stacks_array_size; 473 474 size_t 475 stacks_hash_entry(stacks_entry_t *sep) 476 { 477 size_t depth = sep->se_depth; 478 uintptr_t *stack = sep->se_stack; 479 480 uint64_t total = depth; 481 482 while (depth > 0) { 483 total += *stack; 484 stack++; depth--; 485 } 486 487 return (total % STACKS_HSIZE); 488 } 489 490 /* 491 * This is used to both compare stacks for equality and to sort the final 492 * list of unique stacks. forsort specifies the latter behavior, which 493 * additionally: 494 * compares se_count, and 495 * sorts the stacks by text function name. 496 * 497 * The equality test is independent of se_count, and doesn't care about 498 * relative ordering, so we don't do the extra work of looking up symbols 499 * for the stack addresses. 500 */ 501 int 502 stacks_entry_comp_impl(stacks_entry_t *l, stacks_entry_t *r, 503 uint_t forsort) 504 { 505 int idx; 506 507 int depth = MIN(l->se_depth, r->se_depth); 508 509 /* no matter what, panic stacks come last. */ 510 if (l->se_panic > r->se_panic) 511 return (1); 512 if (l->se_panic < r->se_panic) 513 return (-1); 514 515 if (forsort) { 516 /* put large counts earlier */ 517 if (l->se_count > r->se_count) 518 return (-1); 519 if (l->se_count < r->se_count) 520 return (1); 521 } 522 523 if (l->se_tstate > r->se_tstate) 524 return (1); 525 if (l->se_tstate < r->se_tstate) 526 return (-1); 527 528 if (l->se_failed > r->se_failed) 529 return (1); 530 if (l->se_failed < r->se_failed) 531 return (-1); 532 533 for (idx = 0; idx < depth; idx++) { 534 char lbuf[MDB_SYM_NAMLEN]; 535 char rbuf[MDB_SYM_NAMLEN]; 536 537 int rval; 538 uintptr_t laddr = l->se_stack[idx]; 539 uintptr_t raddr = r->se_stack[idx]; 540 541 if (laddr == raddr) 542 continue; 543 544 if (forsort && 545 mdb_lookup_by_addr(laddr, MDB_SYM_FUZZY, 546 lbuf, sizeof (lbuf), NULL) != -1 && 547 mdb_lookup_by_addr(raddr, MDB_SYM_FUZZY, 548 rbuf, sizeof (rbuf), NULL) != -1 && 549 (rval = strcmp(lbuf, rbuf)) != 0) 550 return (rval); 551 552 if (laddr > raddr) 553 return (1); 554 return (-1); 555 } 556 557 if (l->se_overflow > r->se_overflow) 558 return (-1); 559 if (l->se_overflow < r->se_overflow) 560 return (1); 561 562 if (l->se_depth > r->se_depth) 563 return (1); 564 if (l->se_depth < r->se_depth) 565 return (-1); 566 567 if (l->se_sobj_ops > r->se_sobj_ops) 568 return (1); 569 if (l->se_sobj_ops < r->se_sobj_ops) 570 return (-1); 571 572 return (0); 573 } 574 575 int 576 stacks_entry_comp(const void *l_arg, const void *r_arg) 577 { 578 stacks_entry_t * const *lp = l_arg; 579 stacks_entry_t * const *rp = r_arg; 580 581 return (stacks_entry_comp_impl(*lp, *rp, 1)); 582 } 583 584 void 585 stacks_cleanup(int force) 586 { 587 int idx = 0; 588 stacks_entry_t *cur, *next; 589 590 if (stacks_state == STACKS_STATE_CLEAN) 591 return; 592 593 if (!force && stacks_state == STACKS_STATE_DONE) 594 return; 595 596 /* 597 * Until the array is sorted and stable, stacks_hash will be non-NULL. 598 * This way, we can get at all of the data, even if qsort() was 599 * interrupted while mucking with the array. 600 */ 601 if (stacks_hash != NULL) { 602 for (idx = 0; idx < STACKS_HSIZE; idx++) { 603 while ((cur = stacks_hash[idx]) != NULL) { 604 while ((next = cur->se_dup) != NULL) { 605 cur->se_dup = next->se_dup; 606 mdb_free(next, 607 STACKS_ENTRY_SIZE(next->se_depth)); 608 } 609 next = cur->se_next; 610 stacks_hash[idx] = next; 611 mdb_free(cur, STACKS_ENTRY_SIZE(cur->se_depth)); 612 } 613 } 614 if (stacks_array != NULL) 615 mdb_free(stacks_array, 616 stacks_array_size * sizeof (*stacks_array)); 617 618 } else if (stacks_array != NULL) { 619 for (idx = 0; idx < stacks_array_size; idx++) { 620 if ((cur = stacks_array[idx]) != NULL) { 621 while ((next = cur->se_dup) != NULL) { 622 cur->se_dup = next->se_dup; 623 mdb_free(next, 624 STACKS_ENTRY_SIZE(next->se_depth)); 625 } 626 stacks_array[idx] = NULL; 627 mdb_free(cur, STACKS_ENTRY_SIZE(cur->se_depth)); 628 } 629 } 630 mdb_free(stacks_array, 631 stacks_array_size * sizeof (*stacks_array)); 632 } 633 634 stacks_array_size = 0; 635 stacks_state = STACKS_STATE_CLEAN; 636 } 637 638 /*ARGSUSED*/ 639 int 640 stacks_thread_cb(uintptr_t addr, const void *ignored, void *cbarg) 641 { 642 stacks_info_t *sip = cbarg; 643 findstack_info_t *fsip = &sip->si_fsi; 644 645 stacks_entry_t **sepp, *nsep, *sep; 646 int idx; 647 size_t depth; 648 649 if (do_findstack(addr, fsip, 0) != DCMD_OK && 650 fsip->fsi_failed == FSI_FAIL_BADTHREAD) { 651 mdb_warn("couldn't read thread at %p\n", addr); 652 return (WALK_NEXT); 653 } 654 655 sip->si_count++; 656 657 depth = fsip->fsi_depth; 658 nsep = mdb_zalloc(STACKS_ENTRY_SIZE(depth), UM_SLEEP); 659 nsep->se_thread = addr; 660 nsep->se_sp = fsip->fsi_sp; 661 nsep->se_sobj_ops = fsip->fsi_sobj_ops; 662 nsep->se_tstate = fsip->fsi_tstate; 663 nsep->se_count = 1; 664 nsep->se_overflow = fsip->fsi_overflow; 665 nsep->se_depth = depth; 666 nsep->se_failed = fsip->fsi_failed; 667 nsep->se_panic = fsip->fsi_panic; 668 669 for (idx = 0; idx < depth; idx++) 670 nsep->se_stack[idx] = fsip->fsi_stack[idx]; 671 672 for (sepp = &sip->si_hash[stacks_hash_entry(nsep)]; 673 (sep = *sepp) != NULL; 674 sepp = &sep->se_next) { 675 676 if (stacks_entry_comp_impl(sep, nsep, 0) != 0) 677 continue; 678 679 nsep->se_dup = sep->se_dup; 680 sep->se_dup = nsep; 681 sep->se_count++; 682 return (WALK_NEXT); 683 } 684 685 nsep->se_next = NULL; 686 *sepp = nsep; 687 sip->si_entries++; 688 689 return (WALK_NEXT); 690 } 691 692 int 693 stacks_run_tlist(mdb_pipe_t *tlist, stacks_info_t *si) 694 { 695 size_t idx; 696 size_t found = 0; 697 kthread_t kt; 698 int ret; 699 700 for (idx = 0; idx < tlist->pipe_len; idx++) { 701 uintptr_t addr = tlist->pipe_data[idx]; 702 703 if (mdb_vread(&kt, sizeof (kt), addr) == -1) { 704 mdb_warn("unable to read kthread_t at %p", addr); 705 continue; 706 } 707 found++; 708 709 ret = stacks_thread_cb(addr, &kt, si); 710 if (ret == WALK_DONE) 711 break; 712 if (ret != WALK_NEXT) 713 return (-1); 714 } 715 716 if (found) 717 return (0); 718 return (-1); 719 } 720 721 int 722 stacks_run(int verbose, mdb_pipe_t *tlist) 723 { 724 stacks_info_t si; 725 findstack_info_t *fsip = &si.si_fsi; 726 size_t idx; 727 stacks_entry_t **cur; 728 729 bzero(&si, sizeof (si)); 730 731 stacks_state = STACKS_STATE_DIRTY; 732 733 stacks_hash = si.si_hash = 734 mdb_zalloc(STACKS_HSIZE * sizeof (*si.si_hash), UM_SLEEP); 735 si.si_entries = 0; 736 si.si_count = 0; 737 738 fsip->fsi_max_depth = STACKS_MAX_DEPTH; 739 fsip->fsi_stack = 740 mdb_alloc(fsip->fsi_max_depth * sizeof (*fsip->fsi_stack), 741 UM_SLEEP | UM_GC); 742 743 if (verbose) 744 mdb_warn("stacks: processing kernel threads\n"); 745 746 if (tlist != NULL) { 747 if (stacks_run_tlist(tlist, &si)) 748 return (DCMD_ERR); 749 } else { 750 if (mdb_walk("thread", stacks_thread_cb, &si) != 0) { 751 mdb_warn("cannot walk \"thread\""); 752 return (DCMD_ERR); 753 } 754 } 755 756 if (verbose) 757 mdb_warn("stacks: %d unique stacks / %d threads\n", 758 si.si_entries, si.si_count); 759 760 stacks_array_size = si.si_entries; 761 stacks_array = 762 mdb_zalloc(si.si_entries * sizeof (*stacks_array), UM_SLEEP); 763 cur = stacks_array; 764 for (idx = 0; idx < STACKS_HSIZE; idx++) { 765 stacks_entry_t *sep; 766 for (sep = si.si_hash[idx]; sep != NULL; sep = sep->se_next) 767 *(cur++) = sep; 768 } 769 770 if (cur != stacks_array + si.si_entries) { 771 mdb_warn("stacks: miscounted array size (%d != size: %d)\n", 772 (cur - stacks_array), stacks_array_size); 773 return (DCMD_ERR); 774 } 775 qsort(stacks_array, si.si_entries, sizeof (*stacks_array), 776 stacks_entry_comp); 777 778 /* Now that we're done, free the hash table */ 779 stacks_hash = NULL; 780 mdb_free(si.si_hash, STACKS_HSIZE * sizeof (*si.si_hash)); 781 782 if (tlist == NULL) 783 stacks_state = STACKS_STATE_DONE; 784 785 if (verbose) 786 mdb_warn("stacks: done\n"); 787 788 return (DCMD_OK); 789 } 790 791 static int 792 stacks_has_caller(stacks_entry_t *sep, uintptr_t addr) 793 { 794 uintptr_t laddr = addr; 795 uintptr_t haddr = addr + 1; 796 int idx; 797 char c[MDB_SYM_NAMLEN]; 798 GElf_Sym sym; 799 800 if (mdb_lookup_by_addr(addr, MDB_SYM_FUZZY, 801 c, sizeof (c), &sym) != -1 && 802 addr == (uintptr_t)sym.st_value) { 803 laddr = (uintptr_t)sym.st_value; 804 haddr = (uintptr_t)sym.st_value + sym.st_size; 805 } 806 807 for (idx = 0; idx < sep->se_depth; idx++) 808 if (sep->se_stack[idx] >= laddr && sep->se_stack[idx] < haddr) 809 return (1); 810 811 return (0); 812 } 813 814 typedef struct find_module_struct { 815 struct modctl *mcp; 816 const char *name; 817 } find_module_struct_t; 818 819 /*ARGSUSED*/ 820 int 821 find_module_cb(uintptr_t addr, const void *modctl_arg, void *cbarg) 822 { 823 find_module_struct_t *sp = cbarg; 824 char mod_modname[MODMAXNAMELEN + 1]; 825 const struct modctl *mp = modctl_arg; 826 827 if (!mp->mod_modname) 828 return (WALK_NEXT); 829 830 if (mdb_readstr(mod_modname, sizeof (mod_modname), 831 (uintptr_t)mp->mod_modname) == -1) { 832 mdb_warn("failed to read mod_modname in \"modctl\" walk"); 833 return (WALK_ERR); 834 } 835 836 if (strcmp(sp->name, mod_modname)) 837 return (WALK_NEXT); 838 839 sp->mcp = mdb_alloc(sizeof (*sp->mcp), UM_SLEEP | UM_GC); 840 bcopy(mp, sp->mcp, sizeof (*sp->mcp)); 841 return (WALK_DONE); 842 } 843 844 static struct modctl * 845 find_module(const char *name) 846 { 847 find_module_struct_t mptr; 848 849 mptr.name = name; 850 mptr.mcp = NULL; 851 852 if (mdb_walk("modctl", find_module_cb, &mptr) != 0) 853 mdb_warn("cannot walk \"modctl\""); 854 return (mptr.mcp); 855 } 856 857 static int 858 stacks_has_module(stacks_entry_t *sep, struct modctl *mp) 859 { 860 int idx; 861 862 if (mp == NULL) 863 return (0); 864 865 for (idx = 0; idx < sep->se_depth; idx++) 866 if (sep->se_stack[idx] >= (uintptr_t)mp->mod_text && 867 sep->se_stack[idx] < 868 ((uintptr_t)mp->mod_text + mp->mod_text_size)) 869 return (1); 870 return (0); 871 } 872 873 874 static int 875 uintptrcomp(const void *lp, const void *rp) 876 { 877 uintptr_t lhs = *(const uintptr_t *)lp; 878 uintptr_t rhs = *(const uintptr_t *)rp; 879 if (lhs > rhs) 880 return (1); 881 if (lhs < rhs) 882 return (-1); 883 return (0); 884 } 885 886 /*ARGSUSED*/ 887 static void 888 print_sobj_help(int type, const char *name, const char *ops_name, void *ign) 889 { 890 mdb_printf(" %s", name); 891 } 892 893 /*ARGSUSED*/ 894 static void 895 print_tstate_help(uint_t state, const char *name, void *ignored) 896 { 897 mdb_printf(" %s", name); 898 } 899 900 void 901 stacks_help(void) 902 { 903 mdb_printf( 904 "::stacks processes all of the thread stacks on the system, grouping\n" 905 "together threads which have the same:\n" 906 "\n" 907 " * Thread state,\n" 908 " * Sync object type, and\n" 909 " * PCs in their stack trace.\n" 910 "\n" 911 "The default output (no address or options) is just a dump of the thread\n" 912 "groups in the system. For a view of active threads, use \"::stacks -i\",\n" 913 "which filters out FREE threads (interrupt threads which are currently\n" 914 "inactive) and threads sleeping on a CV. (Note that those threads may still\n" 915 "be noteworthy; this is just for a first glance.) More general filtering\n" 916 "options are described below, in the \"FILTERS\" section.\n" 917 "\n" 918 "::stacks can be used in a pipeline. The input to ::stacks is one or more\n" 919 "thread pointers. For example, to get a summary of threads in a process,\n" 920 "you can do:\n" 921 "\n" 922 " %<b>procp%</b>::walk thread | ::stacks\n" 923 "\n" 924 "When output into a pipe, ::stacks prints all of the threads input,\n" 925 "filtered by the given filtering options. This means that multiple\n" 926 "::stacks invocations can be piped together to achieve more complicated\n" 927 "filters. For example, to get threads which have both 'fop_read' and\n" 928 "'cv_wait_sig_swap' in their stack trace, you could do:\n" 929 "\n" 930 " ::stacks -c fop_read | ::stacks -c cv_wait_sig_swap_core\n" 931 "\n" 932 "To get the full list of threads in each group, use the '-a' flag:\n" 933 "\n" 934 " ::stacks -a\n" 935 "\n"); 936 mdb_dec_indent(2); 937 mdb_printf("%<b>OPTIONS%</b>\n"); 938 mdb_inc_indent(2); 939 mdb_printf("%s", 940 " -a Print all of the grouped threads, instead of just a count.\n" 941 " -f Force a re-run of the thread stack gathering.\n" 942 " -v Be verbose about thread stack gathering.\n" 943 "\n"); 944 mdb_dec_indent(2); 945 mdb_printf("%<b>FILTERS%</b>\n"); 946 mdb_inc_indent(2); 947 mdb_printf("%s", 948 " -i Show active threads; equivalent to '-S CV -T FREE'.\n" 949 " -c func[+offset]\n" 950 " Only print threads whose stacks contain func/func+offset.\n" 951 " -C func[+offset]\n" 952 " Only print threads whose stacks do not contain func/func+offset.\n" 953 " -m module\n" 954 " Only print threads whose stacks contain functions from module.\n" 955 " -M module\n" 956 " Only print threads whose stacks do not contain functions from\n" 957 " module.\n" 958 " -s {type | ALL}\n" 959 " Only print threads which are on a 'type' synchronization object\n" 960 " (SOBJ).\n" 961 " -S {type | ALL}\n" 962 " Only print threads which are not on a 'type' SOBJ.\n" 963 " -t tstate\n" 964 " Only print threads which are in thread state 'tstate'.\n" 965 " -T tstate\n" 966 " Only print threads which are not in thread state 'tstate'.\n" 967 "\n"); 968 mdb_printf(" SOBJ types:"); 969 sobj_type_walk(print_sobj_help, NULL); 970 mdb_printf("\n"); 971 mdb_printf("Thread states:"); 972 thread_walk_states(print_tstate_help, NULL); 973 mdb_printf(" panic\n"); 974 } 975 976 /*ARGSUSED*/ 977 int 978 stacks(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 979 { 980 size_t idx; 981 982 char *seen = NULL; 983 984 const char *caller_str = NULL; 985 const char *excl_caller_str = NULL; 986 uintptr_t caller = 0, excl_caller = 0; 987 const char *module_str = NULL; 988 const char *excl_module_str = NULL; 989 struct modctl *module = NULL, *excl_module = NULL; 990 const char *sobj = NULL; 991 const char *excl_sobj = NULL; 992 uintptr_t sobj_ops = 0, excl_sobj_ops = 0; 993 const char *tstate_str = NULL; 994 const char *excl_tstate_str = NULL; 995 uint_t tstate = -1U; 996 uint_t excl_tstate = -1U; 997 uint_t printed = 0; 998 999 uint_t all = 0; 1000 uint_t force = 0; 1001 uint_t interesting = 0; 1002 uint_t verbose = 0; 1003 1004 /* 1005 * We have a slight behavior difference between having piped 1006 * input and 'addr::stacks'. Without a pipe, we assume the 1007 * thread pointer given is a representative thread, and so 1008 * we include all similar threads in the system in our output. 1009 * 1010 * With a pipe, we filter down to just the threads in our 1011 * input. 1012 */ 1013 uint_t addrspec = (flags & DCMD_ADDRSPEC); 1014 uint_t only_matching = addrspec && (flags & DCMD_PIPE); 1015 1016 mdb_pipe_t p; 1017 1018 if (mdb_getopts(argc, argv, 1019 'a', MDB_OPT_SETBITS, TRUE, &all, 1020 'f', MDB_OPT_SETBITS, TRUE, &force, 1021 'i', MDB_OPT_SETBITS, TRUE, &interesting, 1022 'v', MDB_OPT_SETBITS, TRUE, &verbose, 1023 'c', MDB_OPT_STR, &caller_str, 1024 'C', MDB_OPT_STR, &excl_caller_str, 1025 'm', MDB_OPT_STR, &module_str, 1026 'M', MDB_OPT_STR, &excl_module_str, 1027 's', MDB_OPT_STR, &sobj, 1028 'S', MDB_OPT_STR, &excl_sobj, 1029 't', MDB_OPT_STR, &tstate_str, 1030 'T', MDB_OPT_STR, &excl_tstate_str, 1031 NULL) != argc) 1032 return (DCMD_USAGE); 1033 1034 if (interesting) { 1035 if (sobj != NULL || excl_sobj != NULL || 1036 tstate_str != NULL || excl_tstate_str != NULL) { 1037 mdb_warn( 1038 "stacks: -i is incompatible with -[sStT]\n"); 1039 return (DCMD_USAGE); 1040 } 1041 excl_sobj = "CV"; 1042 excl_tstate_str = "FREE"; 1043 } 1044 1045 if (caller_str != NULL) { 1046 mdb_set_dot(0); 1047 if (mdb_eval(caller_str) != 0) { 1048 mdb_warn("stacks: evaluation of \"%s\" failed", 1049 caller_str); 1050 return (DCMD_ABORT); 1051 } 1052 caller = mdb_get_dot(); 1053 } 1054 1055 if (excl_caller_str != NULL) { 1056 mdb_set_dot(0); 1057 if (mdb_eval(excl_caller_str) != 0) { 1058 mdb_warn("stacks: evaluation of \"%s\" failed", 1059 excl_caller_str); 1060 return (DCMD_ABORT); 1061 } 1062 excl_caller = mdb_get_dot(); 1063 } 1064 mdb_set_dot(addr); 1065 1066 if (module_str != NULL && 1067 (module = find_module(module_str)) == NULL) { 1068 mdb_warn("stacks: module \"%s\" is unknown", module_str); 1069 return (DCMD_ABORT); 1070 } 1071 1072 if (excl_module_str != NULL && 1073 (excl_module = find_module(excl_module_str)) == NULL) { 1074 mdb_warn("stacks: module \"%s\" is unknown", excl_module_str); 1075 return (DCMD_ABORT); 1076 } 1077 1078 if (sobj != NULL && 1079 text_to_sobj(sobj, &sobj_ops) != 0) 1080 return (DCMD_USAGE); 1081 1082 if (excl_sobj != NULL && 1083 text_to_sobj(excl_sobj, &excl_sobj_ops) != 0) 1084 return (DCMD_USAGE); 1085 1086 if (sobj_ops != 0 && excl_sobj_ops != 0) { 1087 mdb_warn("stacks: only one of -s and -S can be specified\n"); 1088 return (DCMD_USAGE); 1089 } 1090 1091 if (tstate_str != NULL && 1092 text_to_tstate(tstate_str, &tstate) != 0) 1093 return (DCMD_USAGE); 1094 1095 if (excl_tstate_str != NULL && 1096 text_to_tstate(excl_tstate_str, &excl_tstate) != 0) 1097 return (DCMD_USAGE); 1098 1099 if (tstate != -1U && excl_tstate != -1U) { 1100 mdb_warn("stacks: only one of -t and -T can be specified\n"); 1101 return (DCMD_USAGE); 1102 } 1103 1104 /* 1105 * If there's an address specified, we're going to further filter 1106 * to only entries which have an address in the input. To reduce 1107 * overhead (and make the sorted output come out right), we 1108 * use mdb_get_pipe() to grab the entire pipeline of input, then 1109 * use qsort() and bsearch() to speed up the search. 1110 */ 1111 if (addrspec) { 1112 mdb_get_pipe(&p); 1113 if (p.pipe_data == NULL || p.pipe_len == 0) { 1114 p.pipe_data = &addr; 1115 p.pipe_len = 1; 1116 } 1117 qsort(p.pipe_data, p.pipe_len, sizeof (uintptr_t), 1118 uintptrcomp); 1119 1120 /* remove any duplicates in the data */ 1121 idx = 0; 1122 while (idx < p.pipe_len - 1) { 1123 uintptr_t *data = &p.pipe_data[idx]; 1124 size_t len = p.pipe_len - idx; 1125 1126 if (data[0] == data[1]) { 1127 memmove(data, data + 1, 1128 (len - 1) * sizeof (*data)); 1129 p.pipe_len--; 1130 continue; /* repeat without incrementing idx */ 1131 } 1132 idx++; 1133 } 1134 1135 seen = mdb_zalloc(p.pipe_len, UM_SLEEP | UM_GC); 1136 } 1137 1138 /* 1139 * Force a cleanup if we're connected to a live system. Never 1140 * do a cleanup after the first invocation around the loop. 1141 */ 1142 force |= (mdb_get_state() == MDB_STATE_RUNNING); 1143 if (force && (flags & (DCMD_LOOPFIRST|DCMD_LOOP)) == DCMD_LOOP) 1144 force = 0; 1145 1146 stacks_cleanup(force); 1147 1148 if (stacks_state == STACKS_STATE_CLEAN) { 1149 int res = stacks_run(verbose, addrspec ? &p : NULL); 1150 if (res != DCMD_OK) 1151 return (res); 1152 } 1153 1154 for (idx = 0; idx < stacks_array_size; idx++) { 1155 stacks_entry_t *sep = stacks_array[idx]; 1156 stacks_entry_t *cur = sep; 1157 int frame; 1158 size_t count = sep->se_count; 1159 1160 if (addrspec) { 1161 stacks_entry_t *head = NULL, *tail = NULL, *sp; 1162 size_t foundcount = 0; 1163 /* 1164 * We use the now-unused hash chain field se_next to 1165 * link together the dups which match our list. 1166 */ 1167 for (sp = sep; sp != NULL; sp = sp->se_dup) { 1168 uintptr_t *entry = bsearch(&sp->se_thread, 1169 p.pipe_data, p.pipe_len, sizeof (uintptr_t), 1170 uintptrcomp); 1171 if (entry != NULL) { 1172 foundcount++; 1173 seen[entry - p.pipe_data]++; 1174 if (head == NULL) 1175 head = sp; 1176 else 1177 tail->se_next = sp; 1178 tail = sp; 1179 sp->se_next = NULL; 1180 } 1181 } 1182 if (head == NULL) 1183 continue; /* no match, skip entry */ 1184 1185 if (only_matching) { 1186 cur = sep = head; 1187 count = foundcount; 1188 } 1189 } 1190 1191 if (caller != 0 && !stacks_has_caller(sep, caller)) 1192 continue; 1193 if (excl_caller != 0 && stacks_has_caller(sep, excl_caller)) 1194 continue; 1195 if (module != 0 && !stacks_has_module(sep, module)) 1196 continue; 1197 if (excl_module != 0 && stacks_has_module(sep, excl_module)) 1198 continue; 1199 1200 if (tstate != -1U) { 1201 if (tstate == TSTATE_PANIC) { 1202 if (!sep->se_panic) 1203 continue; 1204 } else if (sep->se_panic || sep->se_tstate != tstate) 1205 continue; 1206 } 1207 if (excl_tstate != -1U) { 1208 if (excl_tstate == TSTATE_PANIC) { 1209 if (sep->se_panic) 1210 continue; 1211 } else if (!sep->se_panic && 1212 sep->se_tstate == excl_tstate) 1213 continue; 1214 } 1215 1216 if (sobj_ops == SOBJ_ALL) { 1217 if (sep->se_sobj_ops == 0) 1218 continue; 1219 } else if (sobj_ops != 0) { 1220 if (sobj_ops != sep->se_sobj_ops) 1221 continue; 1222 } 1223 1224 if (!(interesting && sep->se_panic)) { 1225 if (excl_sobj_ops == SOBJ_ALL) { 1226 if (sep->se_sobj_ops != 0) 1227 continue; 1228 } else if (excl_sobj_ops != 0) { 1229 if (excl_sobj_ops == sep->se_sobj_ops) 1230 continue; 1231 } 1232 } 1233 1234 if (flags & DCMD_PIPE_OUT) { 1235 while (sep != NULL) { 1236 mdb_printf("%lr\n", sep->se_thread); 1237 sep = only_matching ? 1238 sep->se_next : sep->se_dup; 1239 } 1240 continue; 1241 } 1242 1243 if (all || !printed) { 1244 mdb_printf("%<u>%-?s %-8s %-?s %8s%</u>\n", 1245 "THREAD", "STATE", "SOBJ", "COUNT"); 1246 printed = 1; 1247 } 1248 1249 do { 1250 char state[20]; 1251 char sobj[100]; 1252 1253 tstate_to_text(cur->se_tstate, cur->se_panic, 1254 state, sizeof (state)); 1255 sobj_to_text(cur->se_sobj_ops, 1256 sobj, sizeof (sobj)); 1257 1258 if (cur == sep) 1259 mdb_printf("%?p %-8s %-?s %8d\n", 1260 cur->se_thread, state, sobj, count); 1261 else 1262 mdb_printf("%?p %-8s %-?s %8s\n", 1263 cur->se_thread, state, sobj, "-"); 1264 1265 cur = only_matching ? cur->se_next : cur->se_dup; 1266 } while (all && cur != NULL); 1267 1268 if (sep->se_failed != 0) { 1269 char *reason; 1270 switch (sep->se_failed) { 1271 case FSI_FAIL_NOTINMEMORY: 1272 reason = "thread not in memory"; 1273 break; 1274 case FSI_FAIL_THREADCORRUPT: 1275 reason = "thread structure stack info corrupt"; 1276 break; 1277 case FSI_FAIL_STACKNOTFOUND: 1278 reason = "no consistent stack found"; 1279 break; 1280 default: 1281 reason = "unknown failure"; 1282 break; 1283 } 1284 mdb_printf("%?s <%s>\n", "", reason); 1285 } 1286 1287 for (frame = 0; frame < sep->se_depth; frame++) 1288 mdb_printf("%?s %a\n", "", sep->se_stack[frame]); 1289 if (sep->se_overflow) 1290 mdb_printf("%?s ... truncated ...\n", ""); 1291 mdb_printf("\n"); 1292 } 1293 1294 if (flags & DCMD_ADDRSPEC) { 1295 for (idx = 0; idx < p.pipe_len; idx++) 1296 if (seen[idx] == 0) 1297 mdb_warn("stacks: %p not in thread list\n", 1298 p.pipe_data[idx]); 1299 } 1300 return (DCMD_OK); 1301 } 1302