1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <mdb/mdb_modapi.h> 27 #include <mdb/mdb_ctf.h> 28 29 #include <sys/types.h> 30 #include <sys/regset.h> 31 #include <sys/stack.h> 32 #include <sys/thread.h> 33 34 #include "findstack.h" 35 #include "thread.h" 36 #include "sobj.h" 37 38 typedef struct findstack_info { 39 uintptr_t *fsi_stack; /* place to record frames */ 40 41 uintptr_t fsi_sp; /* stack pointer */ 42 uintptr_t fsi_pc; /* pc */ 43 uintptr_t fsi_sobj_ops; /* sobj_ops */ 44 45 uint_t fsi_tstate; /* t_state */ 46 47 uchar_t fsi_depth; /* stack depth */ 48 uchar_t fsi_failed; /* search failed */ 49 uchar_t fsi_overflow; /* stack was deeper than max_depth */ 50 uchar_t fsi_panic; /* thread called panic() */ 51 52 uchar_t fsi_max_depth; /* stack frames available */ 53 } findstack_info_t; 54 #define FSI_FAIL_BADTHREAD 1 55 #define FSI_FAIL_NOTINMEMORY 2 56 #define FSI_FAIL_THREADCORRUPT 3 57 #define FSI_FAIL_STACKNOTFOUND 4 58 59 #ifndef STACK_BIAS 60 #define STACK_BIAS 0 61 #endif 62 63 #define fs_dprintf(x) \ 64 if (findstack_debug_on) { \ 65 mdb_printf("findstack debug: "); \ 66 /*CSTYLED*/ \ 67 mdb_printf x ; \ 68 } 69 70 static int findstack_debug_on = 0; 71 72 #if defined(__i386) || defined(__amd64) 73 struct rwindow { 74 uintptr_t rw_fp; 75 uintptr_t rw_rtn; 76 }; 77 #endif 78 79 #define TOO_BIG_FOR_A_STACK (1024 * 1024) 80 81 #define KTOU(p) ((p) - kbase + ubase) 82 #define UTOK(p) ((p) - ubase + kbase) 83 84 #define CRAWL_FOUNDALL (-1) 85 86 /* 87 * Given a stack pointer, try to crawl down it to the bottom. 88 * "frame" is a VA in MDB's address space. 89 * 90 * Returns the number of frames successfully crawled down, or 91 * CRAWL_FOUNDALL if it got to the bottom of the stack. 92 */ 93 static int 94 crawl(uintptr_t frame, uintptr_t kbase, uintptr_t ktop, uintptr_t ubase, 95 int kill_fp, findstack_info_t *fsip) 96 { 97 int levels = 0; 98 99 fsip->fsi_depth = 0; 100 fsip->fsi_overflow = 0; 101 102 fs_dprintf(("<0> frame = %p, kbase = %p, ktop = %p, ubase = %p\n", 103 frame, kbase, ktop, ubase)); 104 for (;;) { 105 uintptr_t fp; 106 long *fpp = (long *)&((struct rwindow *)frame)->rw_fp; 107 108 fs_dprintf(("<1> fpp = %p, frame = %p\n", fpp, frame)); 109 110 if ((frame & (STACK_ALIGN - 1)) != 0) 111 break; 112 113 fp = ((struct rwindow *)frame)->rw_fp + STACK_BIAS; 114 if (fsip->fsi_depth < fsip->fsi_max_depth) 115 fsip->fsi_stack[fsip->fsi_depth++] = 116 ((struct rwindow *)frame)->rw_rtn; 117 else 118 fsip->fsi_overflow = 1; 119 120 fs_dprintf(("<2> fp = %p\n", fp)); 121 122 if (fp == ktop) 123 return (CRAWL_FOUNDALL); 124 fs_dprintf(("<3> not at base\n")); 125 126 #if defined(__i386) || defined(__amd64) 127 if (ktop - fp == sizeof (struct rwindow)) { 128 fs_dprintf(("<4> found base\n")); 129 return (CRAWL_FOUNDALL); 130 } 131 #endif 132 133 fs_dprintf(("<5> fp = %p, kbase = %p, ktop - size = %p\n", 134 fp, kbase, ktop - sizeof (struct rwindow))); 135 136 if (fp < kbase || fp >= (ktop - sizeof (struct rwindow))) 137 break; 138 139 frame = KTOU(fp); 140 fs_dprintf(("<6> frame = %p\n", frame)); 141 142 /* 143 * NULL out the old %fp so we don't go down this stack 144 * more than once. 145 */ 146 if (kill_fp) { 147 fs_dprintf(("<7> fpp = %p\n", fpp)); 148 *fpp = NULL; 149 } 150 151 fs_dprintf(("<8> levels = %d\n", levels)); 152 levels++; 153 } 154 155 return (levels); 156 } 157 158 /* 159 * "sp" is a kernel VA. 160 */ 161 static int 162 print_stack(uintptr_t sp, uintptr_t pc, uintptr_t addr, 163 int argc, const mdb_arg_t *argv, int free_state) 164 { 165 int showargs = 0, count, err; 166 167 count = mdb_getopts(argc, argv, 168 'v', MDB_OPT_SETBITS, TRUE, &showargs, NULL); 169 argc -= count; 170 argv += count; 171 172 if (argc > 1 || (argc == 1 && argv->a_type != MDB_TYPE_STRING)) 173 return (DCMD_USAGE); 174 175 mdb_printf("stack pointer for thread %p%s: %p\n", 176 addr, (free_state ? " (TS_FREE)" : ""), sp); 177 if (pc != 0) 178 mdb_printf("[ %0?lr %a() ]\n", sp, pc); 179 180 mdb_inc_indent(2); 181 mdb_set_dot(sp); 182 183 if (argc == 1) 184 err = mdb_eval(argv->a_un.a_str); 185 else if (showargs) 186 err = mdb_eval("<.$C"); 187 else 188 err = mdb_eval("<.$C0"); 189 190 mdb_dec_indent(2); 191 192 return ((err == -1) ? DCMD_ABORT : DCMD_OK); 193 } 194 195 /*ARGSUSED*/ 196 static int 197 do_findstack(uintptr_t addr, findstack_info_t *fsip, uint_t print_warnings) 198 { 199 kthread_t thr; 200 size_t stksz; 201 uintptr_t ubase, utop; 202 uintptr_t kbase, ktop; 203 uintptr_t win, sp; 204 205 fsip->fsi_failed = 0; 206 fsip->fsi_pc = 0; 207 fsip->fsi_sp = 0; 208 fsip->fsi_depth = 0; 209 fsip->fsi_overflow = 0; 210 211 bzero(&thr, sizeof (thr)); 212 if (mdb_ctf_vread(&thr, "kthread_t", addr, 213 MDB_CTF_VREAD_IGNORE_ALL) == -1) { 214 if (print_warnings) 215 mdb_warn("couldn't read thread at %p\n", addr); 216 fsip->fsi_failed = FSI_FAIL_BADTHREAD; 217 return (DCMD_ERR); 218 } 219 220 fsip->fsi_sobj_ops = (uintptr_t)thr.t_sobj_ops; 221 fsip->fsi_tstate = thr.t_state; 222 fsip->fsi_panic = !!(thr.t_flag & T_PANIC); 223 224 if ((thr.t_schedflag & TS_LOAD) == 0) { 225 if (print_warnings) 226 mdb_warn("thread %p isn't in memory\n", addr); 227 fsip->fsi_failed = FSI_FAIL_NOTINMEMORY; 228 return (DCMD_ERR); 229 } 230 231 if (thr.t_stk < thr.t_stkbase) { 232 if (print_warnings) 233 mdb_warn( 234 "stack base or stack top corrupt for thread %p\n", 235 addr); 236 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT; 237 return (DCMD_ERR); 238 } 239 240 kbase = (uintptr_t)thr.t_stkbase; 241 ktop = (uintptr_t)thr.t_stk; 242 stksz = ktop - kbase; 243 244 #ifdef __amd64 245 /* 246 * The stack on amd64 is intentionally misaligned, so ignore the top 247 * half-frame. See thread_stk_init(). When handling traps, the frame 248 * is automatically aligned by the hardware, so we only alter ktop if 249 * needed. 250 */ 251 if ((ktop & (STACK_ALIGN - 1)) != 0) 252 ktop -= STACK_ENTRY_ALIGN; 253 #endif 254 255 /* 256 * If the stack size is larger than a meg, assume that it's bogus. 257 */ 258 if (stksz > TOO_BIG_FOR_A_STACK) { 259 if (print_warnings) 260 mdb_warn("stack size for thread %p is too big to be " 261 "reasonable\n", addr); 262 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT; 263 return (DCMD_ERR); 264 } 265 266 /* 267 * This could be (and was) a UM_GC allocation. Unfortunately, 268 * stksz tends to be very large. As currently implemented, dcmds 269 * invoked as part of pipelines don't have their UM_GC-allocated 270 * memory freed until the pipeline completes. With stksz in the 271 * neighborhood of 20k, the popular ::walk thread |::findstack 272 * pipeline can easily run memory-constrained debuggers (kmdb) out 273 * of memory. This can be changed back to a gc-able allocation when 274 * the debugger is changed to free UM_GC memory more promptly. 275 */ 276 ubase = (uintptr_t)mdb_alloc(stksz, UM_SLEEP); 277 utop = ubase + stksz; 278 if (mdb_vread((caddr_t)ubase, stksz, kbase) != stksz) { 279 mdb_free((void *)ubase, stksz); 280 if (print_warnings) 281 mdb_warn("couldn't read entire stack for thread %p\n", 282 addr); 283 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT; 284 return (DCMD_ERR); 285 } 286 287 /* 288 * Try the saved %sp first, if it looks reasonable. 289 */ 290 sp = KTOU((uintptr_t)thr.t_sp + STACK_BIAS); 291 if (sp >= ubase && sp <= utop) { 292 if (crawl(sp, kbase, ktop, ubase, 0, fsip) == CRAWL_FOUNDALL) { 293 fsip->fsi_sp = (uintptr_t)thr.t_sp; 294 #if !defined(__i386) 295 fsip->fsi_pc = (uintptr_t)thr.t_pc; 296 #endif 297 goto found; 298 } 299 } 300 301 /* 302 * Now walk through the whole stack, starting at the base, 303 * trying every possible "window". 304 */ 305 for (win = ubase; 306 win + sizeof (struct rwindow) <= utop; 307 win += sizeof (struct rwindow *)) { 308 if (crawl(win, kbase, ktop, ubase, 1, fsip) == CRAWL_FOUNDALL) { 309 fsip->fsi_sp = UTOK(win) - STACK_BIAS; 310 goto found; 311 } 312 } 313 314 /* 315 * We didn't conclusively find the stack. So we'll take another lap, 316 * and print out anything that looks possible. 317 */ 318 if (print_warnings) 319 mdb_printf("Possible stack pointers for thread %p:\n", addr); 320 (void) mdb_vread((caddr_t)ubase, stksz, kbase); 321 322 for (win = ubase; 323 win + sizeof (struct rwindow) <= utop; 324 win += sizeof (struct rwindow *)) { 325 uintptr_t fp = ((struct rwindow *)win)->rw_fp; 326 int levels; 327 328 if ((levels = crawl(win, kbase, ktop, ubase, 1, fsip)) > 1) { 329 if (print_warnings) 330 mdb_printf(" %p (%d)\n", fp, levels); 331 } else if (levels == CRAWL_FOUNDALL) { 332 /* 333 * If this is a live system, the stack could change 334 * between the two mdb_vread(ubase, utop, kbase)'s, 335 * and we could have a fully valid stack here. 336 */ 337 fsip->fsi_sp = UTOK(win) - STACK_BIAS; 338 goto found; 339 } 340 } 341 342 fsip->fsi_depth = 0; 343 fsip->fsi_overflow = 0; 344 fsip->fsi_failed = FSI_FAIL_STACKNOTFOUND; 345 346 mdb_free((void *)ubase, stksz); 347 return (DCMD_ERR); 348 found: 349 mdb_free((void *)ubase, stksz); 350 return (DCMD_OK); 351 } 352 353 int 354 findstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 355 { 356 findstack_info_t fsi; 357 int retval; 358 359 if (!(flags & DCMD_ADDRSPEC)) 360 return (DCMD_USAGE); 361 362 bzero(&fsi, sizeof (fsi)); 363 364 if ((retval = do_findstack(addr, &fsi, 1)) != DCMD_OK || 365 fsi.fsi_failed) 366 return (retval); 367 368 return (print_stack(fsi.fsi_sp, fsi.fsi_pc, addr, 369 argc, argv, fsi.fsi_tstate == TS_FREE)); 370 } 371 372 /*ARGSUSED*/ 373 int 374 findstack_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *av) 375 { 376 findstack_debug_on ^= 1; 377 378 mdb_printf("findstack: debugging is now %s\n", 379 findstack_debug_on ? "on" : "off"); 380 381 return (DCMD_OK); 382 } 383 384 static void 385 uppercase(char *p) 386 { 387 for (; *p != '\0'; p++) { 388 if (*p >= 'a' && *p <= 'z') 389 *p += 'A' - 'a'; 390 } 391 } 392 393 static void 394 sobj_to_text(uintptr_t addr, char *out, size_t out_sz) 395 { 396 sobj_ops_to_text(addr, out, out_sz); 397 uppercase(out); 398 } 399 400 #define SOBJ_ALL 1 401 static int 402 text_to_sobj(const char *text, uintptr_t *out) 403 { 404 if (strcasecmp(text, "ALL") == 0) { 405 *out = SOBJ_ALL; 406 return (0); 407 } 408 return (sobj_text_to_ops(text, out)); 409 } 410 411 #define TSTATE_PANIC -2U 412 static int 413 text_to_tstate(const char *text, uint_t *out) 414 { 415 if (strcasecmp(text, "panic") == 0) 416 *out = TSTATE_PANIC; 417 else if (thread_text_to_state(text, out) != 0) { 418 mdb_warn("tstate \"%s\" not recognized\n", text); 419 return (-1); 420 } 421 return (0); 422 } 423 424 static void 425 tstate_to_text(uint_t tstate, uint_t paniced, char *out, size_t out_sz) 426 { 427 if (paniced) 428 mdb_snprintf(out, out_sz, "panic"); 429 else 430 thread_state_to_text(tstate, out, out_sz); 431 uppercase(out); 432 } 433 434 typedef struct stacks_entry { 435 struct stacks_entry *se_next; 436 struct stacks_entry *se_dup; /* dups of this stack */ 437 uintptr_t se_thread; 438 uintptr_t se_sp; 439 uintptr_t se_sobj_ops; 440 uint32_t se_tstate; 441 uint32_t se_count; /* # threads w/ this stack */ 442 uint8_t se_overflow; 443 uint8_t se_depth; 444 uint8_t se_failed; /* failure reason; FSI_FAIL_* */ 445 uint8_t se_panic; 446 uintptr_t se_stack[1]; 447 } stacks_entry_t; 448 #define STACKS_ENTRY_SIZE(x) OFFSETOF(stacks_entry_t, se_stack[(x)]) 449 450 #define STACKS_HSIZE 127 451 452 /* Maximum stack depth reported in stacks */ 453 #define STACKS_MAX_DEPTH 254 454 455 typedef struct stacks_info { 456 size_t si_count; /* total stacks_entry_ts (incl dups) */ 457 size_t si_entries; /* # entries in hash table */ 458 stacks_entry_t **si_hash; /* hash table */ 459 460 findstack_info_t si_fsi; /* transient callback state */ 461 } stacks_info_t; 462 463 464 /* global state cached between invocations */ 465 #define STACKS_STATE_CLEAN 0 466 #define STACKS_STATE_DIRTY 1 467 #define STACKS_STATE_DONE 2 468 static uint_t stacks_state = STACKS_STATE_CLEAN; 469 static stacks_entry_t **stacks_hash; 470 static stacks_entry_t **stacks_array; 471 static size_t stacks_array_size; 472 473 size_t 474 stacks_hash_entry(stacks_entry_t *sep) 475 { 476 size_t depth = sep->se_depth; 477 uintptr_t *stack = sep->se_stack; 478 479 uint64_t total = depth; 480 481 while (depth > 0) { 482 total += *stack; 483 stack++; depth--; 484 } 485 486 return (total % STACKS_HSIZE); 487 } 488 489 /* 490 * This is used to both compare stacks for equality and to sort the final 491 * list of unique stacks. forsort specifies the latter behavior, which 492 * additionally: 493 * compares se_count, and 494 * sorts the stacks by text function name. 495 * 496 * The equality test is independent of se_count, and doesn't care about 497 * relative ordering, so we don't do the extra work of looking up symbols 498 * for the stack addresses. 499 */ 500 int 501 stacks_entry_comp_impl(stacks_entry_t *l, stacks_entry_t *r, 502 uint_t forsort) 503 { 504 int idx; 505 506 int depth = MIN(l->se_depth, r->se_depth); 507 508 /* no matter what, panic stacks come last. */ 509 if (l->se_panic > r->se_panic) 510 return (1); 511 if (l->se_panic < r->se_panic) 512 return (-1); 513 514 if (forsort) { 515 /* put large counts earlier */ 516 if (l->se_count > r->se_count) 517 return (-1); 518 if (l->se_count < r->se_count) 519 return (1); 520 } 521 522 if (l->se_tstate > r->se_tstate) 523 return (1); 524 if (l->se_tstate < r->se_tstate) 525 return (-1); 526 527 if (l->se_failed > r->se_failed) 528 return (1); 529 if (l->se_failed < r->se_failed) 530 return (-1); 531 532 for (idx = 0; idx < depth; idx++) { 533 char lbuf[MDB_SYM_NAMLEN]; 534 char rbuf[MDB_SYM_NAMLEN]; 535 536 int rval; 537 uintptr_t laddr = l->se_stack[idx]; 538 uintptr_t raddr = r->se_stack[idx]; 539 540 if (laddr == raddr) 541 continue; 542 543 if (forsort && 544 mdb_lookup_by_addr(laddr, MDB_SYM_FUZZY, 545 lbuf, sizeof (lbuf), NULL) != -1 && 546 mdb_lookup_by_addr(raddr, MDB_SYM_FUZZY, 547 rbuf, sizeof (rbuf), NULL) != -1 && 548 (rval = strcmp(lbuf, rbuf)) != 0) 549 return (rval); 550 551 if (laddr > raddr) 552 return (1); 553 return (-1); 554 } 555 556 if (l->se_overflow > r->se_overflow) 557 return (-1); 558 if (l->se_overflow < r->se_overflow) 559 return (1); 560 561 if (l->se_depth > r->se_depth) 562 return (1); 563 if (l->se_depth < r->se_depth) 564 return (-1); 565 566 if (l->se_sobj_ops > r->se_sobj_ops) 567 return (1); 568 if (l->se_sobj_ops < r->se_sobj_ops) 569 return (-1); 570 571 return (0); 572 } 573 574 int 575 stacks_entry_comp(const void *l_arg, const void *r_arg) 576 { 577 stacks_entry_t * const *lp = l_arg; 578 stacks_entry_t * const *rp = r_arg; 579 580 return (stacks_entry_comp_impl(*lp, *rp, 1)); 581 } 582 583 void 584 stacks_cleanup(int force) 585 { 586 int idx = 0; 587 stacks_entry_t *cur, *next; 588 589 if (stacks_state == STACKS_STATE_CLEAN) 590 return; 591 592 if (!force && stacks_state == STACKS_STATE_DONE) 593 return; 594 595 /* 596 * Until the array is sorted and stable, stacks_hash will be non-NULL. 597 * This way, we can get at all of the data, even if qsort() was 598 * interrupted while mucking with the array. 599 */ 600 if (stacks_hash != NULL) { 601 for (idx = 0; idx < STACKS_HSIZE; idx++) { 602 while ((cur = stacks_hash[idx]) != NULL) { 603 while ((next = cur->se_dup) != NULL) { 604 cur->se_dup = next->se_dup; 605 mdb_free(next, 606 STACKS_ENTRY_SIZE(next->se_depth)); 607 } 608 next = cur->se_next; 609 stacks_hash[idx] = next; 610 mdb_free(cur, STACKS_ENTRY_SIZE(cur->se_depth)); 611 } 612 } 613 if (stacks_array != NULL) 614 mdb_free(stacks_array, 615 stacks_array_size * sizeof (*stacks_array)); 616 617 } else if (stacks_array != NULL) { 618 for (idx = 0; idx < stacks_array_size; idx++) { 619 if ((cur = stacks_array[idx]) != NULL) { 620 while ((next = cur->se_dup) != NULL) { 621 cur->se_dup = next->se_dup; 622 mdb_free(next, 623 STACKS_ENTRY_SIZE(next->se_depth)); 624 } 625 stacks_array[idx] = NULL; 626 mdb_free(cur, STACKS_ENTRY_SIZE(cur->se_depth)); 627 } 628 } 629 mdb_free(stacks_array, 630 stacks_array_size * sizeof (*stacks_array)); 631 } 632 633 stacks_array_size = 0; 634 stacks_state = STACKS_STATE_CLEAN; 635 } 636 637 /*ARGSUSED*/ 638 int 639 stacks_thread_cb(uintptr_t addr, const void *ignored, void *cbarg) 640 { 641 stacks_info_t *sip = cbarg; 642 findstack_info_t *fsip = &sip->si_fsi; 643 644 stacks_entry_t **sepp, *nsep, *sep; 645 int idx; 646 size_t depth; 647 648 if (do_findstack(addr, fsip, 0) != DCMD_OK && 649 fsip->fsi_failed == FSI_FAIL_BADTHREAD) { 650 mdb_warn("couldn't read thread at %p\n", addr); 651 return (WALK_NEXT); 652 } 653 654 sip->si_count++; 655 656 depth = fsip->fsi_depth; 657 nsep = mdb_zalloc(STACKS_ENTRY_SIZE(depth), UM_SLEEP); 658 nsep->se_thread = addr; 659 nsep->se_sp = fsip->fsi_sp; 660 nsep->se_sobj_ops = fsip->fsi_sobj_ops; 661 nsep->se_tstate = fsip->fsi_tstate; 662 nsep->se_count = 1; 663 nsep->se_overflow = fsip->fsi_overflow; 664 nsep->se_depth = depth; 665 nsep->se_failed = fsip->fsi_failed; 666 nsep->se_panic = fsip->fsi_panic; 667 668 for (idx = 0; idx < depth; idx++) 669 nsep->se_stack[idx] = fsip->fsi_stack[idx]; 670 671 for (sepp = &sip->si_hash[stacks_hash_entry(nsep)]; 672 (sep = *sepp) != NULL; 673 sepp = &sep->se_next) { 674 675 if (stacks_entry_comp_impl(sep, nsep, 0) != 0) 676 continue; 677 678 nsep->se_dup = sep->se_dup; 679 sep->se_dup = nsep; 680 sep->se_count++; 681 return (WALK_NEXT); 682 } 683 684 nsep->se_next = NULL; 685 *sepp = nsep; 686 sip->si_entries++; 687 688 return (WALK_NEXT); 689 } 690 691 int 692 stacks_run(int verbose) 693 { 694 stacks_info_t si; 695 findstack_info_t *fsip = &si.si_fsi; 696 size_t idx; 697 stacks_entry_t **cur; 698 699 bzero(&si, sizeof (si)); 700 701 stacks_state = STACKS_STATE_DIRTY; 702 703 stacks_hash = si.si_hash = 704 mdb_zalloc(STACKS_HSIZE * sizeof (*si.si_hash), UM_SLEEP); 705 si.si_entries = 0; 706 si.si_count = 0; 707 708 fsip->fsi_max_depth = STACKS_MAX_DEPTH; 709 fsip->fsi_stack = 710 mdb_alloc(fsip->fsi_max_depth * sizeof (*fsip->fsi_stack), 711 UM_SLEEP | UM_GC); 712 713 if (verbose) 714 mdb_warn("stacks: processing kernel threads\n"); 715 716 if (mdb_walk("thread", stacks_thread_cb, &si) != 0) { 717 mdb_warn("cannot walk \"thread\""); 718 return (DCMD_ERR); 719 } 720 721 if (verbose) 722 mdb_warn("stacks: %d unique stacks / %d threads\n", 723 si.si_entries, si.si_count); 724 725 stacks_array_size = si.si_entries; 726 stacks_array = 727 mdb_zalloc(si.si_entries * sizeof (*stacks_array), UM_SLEEP); 728 cur = stacks_array; 729 for (idx = 0; idx < STACKS_HSIZE; idx++) { 730 stacks_entry_t *sep; 731 for (sep = si.si_hash[idx]; sep != NULL; sep = sep->se_next) 732 *(cur++) = sep; 733 } 734 735 if (cur != stacks_array + si.si_entries) { 736 mdb_warn("stacks: miscounted array size (%d != size: %d)\n", 737 (cur - stacks_array), stacks_array_size); 738 return (DCMD_ERR); 739 } 740 qsort(stacks_array, si.si_entries, sizeof (*stacks_array), 741 stacks_entry_comp); 742 743 /* Now that we're done, free the hash table */ 744 stacks_hash = NULL; 745 mdb_free(si.si_hash, STACKS_HSIZE * sizeof (*si.si_hash)); 746 747 stacks_state = STACKS_STATE_DONE; 748 749 if (verbose) 750 mdb_warn("stacks: done\n"); 751 752 return (DCMD_OK); 753 } 754 755 static int 756 stacks_has_caller(stacks_entry_t *sep, uintptr_t addr) 757 { 758 uintptr_t laddr = addr; 759 uintptr_t haddr = addr + 1; 760 int idx; 761 char c[MDB_SYM_NAMLEN]; 762 GElf_Sym sym; 763 764 if (mdb_lookup_by_addr(addr, MDB_SYM_FUZZY, 765 c, sizeof (c), &sym) != -1 && 766 addr == (uintptr_t)sym.st_value) { 767 laddr = (uintptr_t)sym.st_value; 768 haddr = (uintptr_t)sym.st_value + sym.st_size; 769 } 770 771 for (idx = 0; idx < sep->se_depth; idx++) 772 if (sep->se_stack[idx] >= laddr && sep->se_stack[idx] < haddr) 773 return (1); 774 775 return (0); 776 } 777 778 static int 779 uintptrcomp(const void *lp, const void *rp) 780 { 781 uintptr_t lhs = *(const uintptr_t *)lp; 782 uintptr_t rhs = *(const uintptr_t *)rp; 783 if (lhs > rhs) 784 return (1); 785 if (lhs < rhs) 786 return (-1); 787 return (0); 788 } 789 790 /*ARGSUSED*/ 791 static void 792 print_sobj_help(int type, const char *name, const char *ops_name, void *ign) 793 { 794 mdb_printf(" %s", name); 795 } 796 797 /*ARGSUSED*/ 798 static void 799 print_tstate_help(uint_t state, const char *name, void *ignored) 800 { 801 mdb_printf(" %s", name); 802 } 803 804 void 805 stacks_help(void) 806 { 807 mdb_printf( 808 "::stacks processes all of the thread stacks on the system, grouping\n" 809 "together threads which have the same:\n" 810 "\n" 811 " * Thread state,\n" 812 " * Sync object type, and\n" 813 " * PCs in their stack trace.\n" 814 "\n" 815 "The default output (no address or options) is just a dump of the thread\n" 816 "groups in the system. For a view of active threads, use \"::stacks -i\",\n" 817 "which filters out FREE threads (interrupt threads which are currently\n" 818 "inactive) and threads sleeping on a CV. (Note that those threads may still\n" 819 "be noteworthy; this is just for a first glance.) More general filtering\n" 820 "options are described below, in the \"FILTERS\" section.\n" 821 "\n" 822 "::stacks can be used in a pipeline. The input to ::stacks is one or more\n" 823 "thread pointers. For example, to get a summary of threads in a process,\n" 824 "you can do:\n" 825 "\n" 826 " %<b>procp%</b>::walk thread | ::stacks\n" 827 "\n" 828 "When output into a pipe, ::stacks prints all of the threads input,\n" 829 "filtered by the given filtering options. This means that multiple\n" 830 "::stacks invocations can be piped together to achieve more complicated\n" 831 "filters. For example, to get threads which have both 'fop_read' and\n" 832 "'cv_wait_sig_swap' in their stack trace, you could do:\n" 833 "\n" 834 " ::stacks -c fop_read | ::stacks -c cv_wait_sig_swap_core\n" 835 "\n" 836 "To get the full list of threads in each group, use the '-a' flag:\n" 837 "\n" 838 " ::stacks -a\n" 839 "\n"); 840 mdb_dec_indent(2); 841 mdb_printf("%<b>OPTIONS%</b>\n"); 842 mdb_inc_indent(2); 843 mdb_printf("%s", 844 " -a Print all of the grouped threads, instead of just a count.\n" 845 " -f Force a re-run of the thread stack gathering.\n" 846 " -v Be verbose about thread stack gathering.\n" 847 "\n"); 848 mdb_dec_indent(2); 849 mdb_printf("%<b>FILTERS%</b>\n"); 850 mdb_inc_indent(2); 851 mdb_printf("%s", 852 " -i Show active threads; equivalent to '-S CV -T FREE'.\n" 853 " -c func[+offset]\n" 854 " Only print threads whose stacks contain func/func+offset.\n" 855 " -C func[+offset]\n" 856 " Only print threads whose stacks do not contain func/func+offset.\n" 857 " -s {type | ALL}\n" 858 " Only print threads which are on a 'type' synchronization object\n" 859 " (SOBJ).\n" 860 " -S {type | ALL}\n" 861 " Only print threads which are not on a 'type' SOBJ.\n" 862 " -t tstate\n" 863 " Only print threads which are in thread state 'tstate'.\n" 864 " -T tstate\n" 865 " Only print threads which are not in thread state 'tstate'.\n" 866 "\n"); 867 mdb_printf(" SOBJ types:"); 868 sobj_type_walk(print_sobj_help, NULL); 869 mdb_printf("\n"); 870 mdb_printf("Thread states:"); 871 thread_walk_states(print_tstate_help, NULL); 872 mdb_printf(" panic\n"); 873 } 874 875 /*ARGSUSED*/ 876 int 877 stacks(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 878 { 879 size_t idx; 880 881 char *seen = NULL; 882 883 const char *caller_str = NULL; 884 const char *excl_caller_str = NULL; 885 uintptr_t caller = 0, excl_caller = 0; 886 const char *sobj = NULL; 887 const char *excl_sobj = NULL; 888 uintptr_t sobj_ops = 0, excl_sobj_ops = 0; 889 const char *tstate_str = NULL; 890 const char *excl_tstate_str = NULL; 891 uint_t tstate = -1U; 892 uint_t excl_tstate = -1U; 893 894 uint_t all = 0; 895 uint_t force = 0; 896 uint_t interesting = 0; 897 uint_t verbose = 0; 898 899 /* 900 * We have a slight behavior difference between having piped 901 * input and 'addr::stacks'. Without a pipe, we assume the 902 * thread pointer given is a representative thread, and so 903 * we include all similar threads in the system in our output. 904 * 905 * With a pipe, we filter down to just the threads in our 906 * input. 907 */ 908 uint_t addrspec = (flags & DCMD_ADDRSPEC); 909 uint_t only_matching = addrspec && (flags & DCMD_PIPE); 910 911 mdb_pipe_t p; 912 913 if (mdb_getopts(argc, argv, 914 'a', MDB_OPT_SETBITS, TRUE, &all, 915 'f', MDB_OPT_SETBITS, TRUE, &force, 916 'i', MDB_OPT_SETBITS, TRUE, &interesting, 917 'v', MDB_OPT_SETBITS, TRUE, &verbose, 918 'c', MDB_OPT_STR, &caller_str, 919 'C', MDB_OPT_STR, &excl_caller_str, 920 's', MDB_OPT_STR, &sobj, 921 'S', MDB_OPT_STR, &excl_sobj, 922 't', MDB_OPT_STR, &tstate_str, 923 'T', MDB_OPT_STR, &excl_tstate_str, 924 NULL) != argc) 925 return (DCMD_USAGE); 926 927 if (interesting) { 928 if (sobj != NULL || excl_sobj != NULL || 929 tstate_str != NULL || excl_tstate_str != NULL) { 930 mdb_warn( 931 "stacks: -i is incompatible with -[sStT]\n"); 932 return (DCMD_USAGE); 933 } 934 excl_sobj = "CV"; 935 excl_tstate_str = "FREE"; 936 } 937 938 if (caller_str != NULL) { 939 mdb_set_dot(0); 940 if (mdb_eval(caller_str) != 0) { 941 mdb_warn("stacks: evaluation of \"%s\" failed", 942 caller_str); 943 return (DCMD_ABORT); 944 } 945 caller = mdb_get_dot(); 946 } 947 948 if (excl_caller_str != NULL) { 949 mdb_set_dot(0); 950 if (mdb_eval(excl_caller_str) != 0) { 951 mdb_warn("stacks: evaluation of \"%s\" failed", 952 excl_caller_str); 953 return (DCMD_ABORT); 954 } 955 excl_caller = mdb_get_dot(); 956 } 957 mdb_set_dot(addr); 958 959 if (sobj != NULL && 960 text_to_sobj(sobj, &sobj_ops) != 0) 961 return (DCMD_USAGE); 962 963 if (excl_sobj != NULL && 964 text_to_sobj(excl_sobj, &excl_sobj_ops) != 0) 965 return (DCMD_USAGE); 966 967 if (sobj_ops != 0 && excl_sobj_ops != 0) { 968 mdb_warn("stacks: only one of -s and -S can be specified\n"); 969 return (DCMD_USAGE); 970 } 971 972 if (tstate_str && 973 text_to_tstate(tstate_str, &tstate) != 0) 974 return (DCMD_USAGE); 975 if (excl_tstate_str && 976 text_to_tstate(excl_tstate_str, &excl_tstate) != 0) 977 return (DCMD_USAGE); 978 979 if (tstate != -1U && excl_tstate != -1U) { 980 mdb_warn("stacks: only one of -t and -T can be specified\n"); 981 return (DCMD_USAGE); 982 } 983 984 /* 985 * Force a cleanup if we're connected to a live system. Never 986 * do a cleanup after the first invocation around the loop. 987 */ 988 force |= (mdb_get_state() == MDB_STATE_RUNNING); 989 if (force && (flags & (DCMD_LOOPFIRST|DCMD_LOOP)) == DCMD_LOOP) 990 force = 0; 991 992 stacks_cleanup(force); 993 994 if (stacks_state == STACKS_STATE_CLEAN) { 995 int res = stacks_run(verbose); 996 if (res != DCMD_OK) 997 return (res); 998 } 999 1000 if (!all && DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { 1001 mdb_printf("%<u>%-?s %-8s %-?s %8s%</u>\n", 1002 "THREAD", "STATE", "SOBJ", "COUNT"); 1003 } 1004 1005 /* 1006 * If there's an address specified, we're going to further filter 1007 * to only entries which have an address in the input. To reduce 1008 * overhead (and make the sorted output come out right), we 1009 * use mdb_get_pipe() to grab the entire pipeline of input, then 1010 * use qsort() and bsearch() to speed up the search. 1011 */ 1012 if (addrspec) { 1013 mdb_get_pipe(&p); 1014 if (p.pipe_data == NULL || p.pipe_len == 0) { 1015 p.pipe_data = &addr; 1016 p.pipe_len = 1; 1017 } 1018 qsort(p.pipe_data, p.pipe_len, sizeof (uintptr_t), 1019 uintptrcomp); 1020 1021 /* remove any duplicates in the data */ 1022 idx = 0; 1023 while (idx < p.pipe_len - 1) { 1024 uintptr_t *data = &p.pipe_data[idx]; 1025 size_t len = p.pipe_len - idx; 1026 1027 if (data[0] == data[1]) { 1028 memmove(data, data + 1, 1029 (len - 1) * sizeof (*data)); 1030 p.pipe_len--; 1031 continue; /* repeat without incrementing idx */ 1032 } 1033 idx++; 1034 } 1035 1036 seen = mdb_zalloc(p.pipe_len, UM_SLEEP | UM_GC); 1037 } 1038 1039 for (idx = 0; idx < stacks_array_size; idx++) { 1040 stacks_entry_t *sep = stacks_array[idx]; 1041 stacks_entry_t *cur = sep; 1042 int frame; 1043 size_t count = sep->se_count; 1044 1045 if (addrspec) { 1046 stacks_entry_t *head = NULL, *tail = NULL, *sp; 1047 size_t foundcount = 0; 1048 /* 1049 * We use the now-unused hash chain field se_next to 1050 * link together the dups which match our list. 1051 */ 1052 for (sp = sep; sp != NULL; sp = sp->se_dup) { 1053 uintptr_t *entry = bsearch(&sp->se_thread, 1054 p.pipe_data, p.pipe_len, sizeof (uintptr_t), 1055 uintptrcomp); 1056 if (entry != NULL) { 1057 foundcount++; 1058 seen[entry - p.pipe_data]++; 1059 if (head == NULL) 1060 head = sp; 1061 else 1062 tail->se_next = sp; 1063 tail = sp; 1064 sp->se_next = NULL; 1065 } 1066 } 1067 if (head == NULL) 1068 continue; /* no match, skip entry */ 1069 1070 if (only_matching) { 1071 cur = sep = head; 1072 count = foundcount; 1073 } 1074 } 1075 1076 if (caller != 0 && !stacks_has_caller(sep, caller)) 1077 continue; 1078 if (excl_caller != 0 && stacks_has_caller(sep, excl_caller)) 1079 continue; 1080 1081 if (tstate != -1U) { 1082 if (tstate == TSTATE_PANIC) { 1083 if (!sep->se_panic) 1084 continue; 1085 } else if (sep->se_panic || sep->se_tstate != tstate) 1086 continue; 1087 } 1088 if (excl_tstate != -1U) { 1089 if (excl_tstate == TSTATE_PANIC) { 1090 if (sep->se_panic) 1091 continue; 1092 } else if (!sep->se_panic && 1093 sep->se_tstate == excl_tstate) 1094 continue; 1095 } 1096 1097 if (sobj_ops == SOBJ_ALL) { 1098 if (sep->se_sobj_ops == 0) 1099 continue; 1100 } else if (sobj_ops != 0) { 1101 if (sobj_ops != sep->se_sobj_ops) 1102 continue; 1103 } 1104 1105 if (!(interesting && sep->se_panic)) { 1106 if (excl_sobj_ops == SOBJ_ALL) { 1107 if (sep->se_sobj_ops != 0) 1108 continue; 1109 } else if (excl_sobj_ops != 0) { 1110 if (excl_sobj_ops == sep->se_sobj_ops) 1111 continue; 1112 } 1113 } 1114 1115 if (flags & DCMD_PIPE_OUT) { 1116 while (sep != NULL) { 1117 mdb_printf("%lr\n", sep->se_thread); 1118 sep = only_matching ? 1119 sep->se_next : sep->se_dup; 1120 } 1121 continue; 1122 } 1123 1124 if (all) { 1125 mdb_printf("%<u>%-?s %-8s %-?s %8s%</u>\n", 1126 "THREAD", "STATE", "SOBJTYPE", "COUNT"); 1127 } 1128 1129 do { 1130 char state[20]; 1131 char sobj[100]; 1132 1133 tstate_to_text(cur->se_tstate, cur->se_panic, 1134 state, sizeof (state)); 1135 sobj_to_text(cur->se_sobj_ops, 1136 sobj, sizeof (sobj)); 1137 1138 if (cur == sep) 1139 mdb_printf("%?p %-8s %-?s %8d\n", 1140 cur->se_thread, state, sobj, count); 1141 else 1142 mdb_printf("%?p %-8s %-?s %8s\n", 1143 cur->se_thread, state, sobj, "-"); 1144 1145 cur = only_matching ? cur->se_next : cur->se_dup; 1146 } while (all && cur != NULL); 1147 1148 if (sep->se_failed != 0) { 1149 char *reason; 1150 switch (sep->se_failed) { 1151 case FSI_FAIL_NOTINMEMORY: 1152 reason = "thread not in memory"; 1153 break; 1154 case FSI_FAIL_THREADCORRUPT: 1155 reason = "thread structure stack info corrupt"; 1156 break; 1157 case FSI_FAIL_STACKNOTFOUND: 1158 reason = "no consistent stack found"; 1159 break; 1160 default: 1161 reason = "unknown failure"; 1162 break; 1163 } 1164 mdb_printf("%?s <%s>\n", "", reason); 1165 } 1166 1167 for (frame = 0; frame < sep->se_depth; frame++) 1168 mdb_printf("%?s %a\n", "", sep->se_stack[frame]); 1169 if (sep->se_overflow) 1170 mdb_printf("%?s ... truncated ...\n", ""); 1171 mdb_printf("\n"); 1172 } 1173 1174 if (flags & DCMD_ADDRSPEC) { 1175 for (idx = 0; idx < p.pipe_len; idx++) 1176 if (seen[idx] == 0) 1177 mdb_warn("stacks: %p not in thread list\n", 1178 p.pipe_data[idx]); 1179 } 1180 return (DCMD_OK); 1181 } 1182