xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/findstack.c (revision 9a5d73e03cd3312ddb571a748c40a63c58bd66e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ctf.h>
28 
29 #include <sys/types.h>
30 #include <sys/regset.h>
31 #include <sys/stack.h>
32 #include <sys/thread.h>
33 #include <sys/modctl.h>
34 
35 #include "findstack.h"
36 #include "thread.h"
37 #include "sobj.h"
38 
39 typedef struct findstack_info {
40 	uintptr_t	*fsi_stack;	/* place to record frames */
41 
42 	uintptr_t	fsi_sp;		/* stack pointer */
43 	uintptr_t	fsi_pc;		/* pc */
44 	uintptr_t	fsi_sobj_ops;	/* sobj_ops */
45 
46 	uint_t		fsi_tstate;	/* t_state */
47 
48 	uchar_t		fsi_depth;	/* stack depth */
49 	uchar_t		fsi_failed;	/* search failed */
50 	uchar_t		fsi_overflow;	/* stack was deeper than max_depth */
51 	uchar_t		fsi_panic;	/* thread called panic() */
52 
53 	uchar_t		fsi_max_depth;	/* stack frames available */
54 } findstack_info_t;
55 #define	FSI_FAIL_BADTHREAD	1
56 #define	FSI_FAIL_NOTINMEMORY	2
57 #define	FSI_FAIL_THREADCORRUPT	3
58 #define	FSI_FAIL_STACKNOTFOUND	4
59 
60 #ifndef STACK_BIAS
61 #define	STACK_BIAS	0
62 #endif
63 
64 #define	fs_dprintf(x)					\
65 	if (findstack_debug_on) {			\
66 		mdb_printf("findstack debug: ");	\
67 		/*CSTYLED*/				\
68 		mdb_printf x ;				\
69 	}
70 
71 static int findstack_debug_on = 0;
72 
73 #if defined(__i386) || defined(__amd64)
74 struct rwindow {
75 	uintptr_t rw_fp;
76 	uintptr_t rw_rtn;
77 };
78 #endif
79 
80 #define	TOO_BIG_FOR_A_STACK (1024 * 1024)
81 
82 #define	KTOU(p) ((p) - kbase + ubase)
83 #define	UTOK(p) ((p) - ubase + kbase)
84 
85 #define	CRAWL_FOUNDALL	(-1)
86 
87 /*
88  * Given a stack pointer, try to crawl down it to the bottom.
89  * "frame" is a VA in MDB's address space.
90  *
91  * Returns the number of frames successfully crawled down, or
92  * CRAWL_FOUNDALL if it got to the bottom of the stack.
93  */
94 static int
95 crawl(uintptr_t frame, uintptr_t kbase, uintptr_t ktop, uintptr_t ubase,
96     int kill_fp, findstack_info_t *fsip)
97 {
98 	int levels = 0;
99 
100 	fsip->fsi_depth = 0;
101 	fsip->fsi_overflow = 0;
102 
103 	fs_dprintf(("<0> frame = %p, kbase = %p, ktop = %p, ubase = %p\n",
104 	    frame, kbase, ktop, ubase));
105 	for (;;) {
106 		uintptr_t fp;
107 		long *fpp = (long *)&((struct rwindow *)frame)->rw_fp;
108 
109 		fs_dprintf(("<1> fpp = %p, frame = %p\n", fpp, frame));
110 
111 		if ((frame & (STACK_ALIGN - 1)) != 0)
112 			break;
113 
114 		fp = ((struct rwindow *)frame)->rw_fp + STACK_BIAS;
115 		if (fsip->fsi_depth < fsip->fsi_max_depth)
116 			fsip->fsi_stack[fsip->fsi_depth++] =
117 			    ((struct rwindow *)frame)->rw_rtn;
118 		else
119 			fsip->fsi_overflow = 1;
120 
121 		fs_dprintf(("<2> fp = %p\n", fp));
122 
123 		if (fp == ktop)
124 			return (CRAWL_FOUNDALL);
125 		fs_dprintf(("<3> not at base\n"));
126 
127 #if defined(__i386) || defined(__amd64)
128 		if (ktop - fp == sizeof (struct rwindow)) {
129 			fs_dprintf(("<4> found base\n"));
130 			return (CRAWL_FOUNDALL);
131 		}
132 #endif
133 
134 		fs_dprintf(("<5> fp = %p, kbase = %p, ktop - size = %p\n",
135 		    fp, kbase, ktop - sizeof (struct rwindow)));
136 
137 		if (fp < kbase || fp >= (ktop - sizeof (struct rwindow)))
138 			break;
139 
140 		frame = KTOU(fp);
141 		fs_dprintf(("<6> frame = %p\n", frame));
142 
143 		/*
144 		 * NULL out the old %fp so we don't go down this stack
145 		 * more than once.
146 		 */
147 		if (kill_fp) {
148 			fs_dprintf(("<7> fpp = %p\n", fpp));
149 			*fpp = NULL;
150 		}
151 
152 		fs_dprintf(("<8> levels = %d\n", levels));
153 		levels++;
154 	}
155 
156 	return (levels);
157 }
158 
159 /*
160  * "sp" is a kernel VA.
161  */
162 static int
163 print_stack(uintptr_t sp, uintptr_t pc, uintptr_t addr,
164     int argc, const mdb_arg_t *argv, int free_state)
165 {
166 	int showargs = 0, count, err;
167 
168 	count = mdb_getopts(argc, argv,
169 	    'v', MDB_OPT_SETBITS, TRUE, &showargs, NULL);
170 	argc -= count;
171 	argv += count;
172 
173 	if (argc > 1 || (argc == 1 && argv->a_type != MDB_TYPE_STRING))
174 		return (DCMD_USAGE);
175 
176 	mdb_printf("stack pointer for thread %p%s: %p\n",
177 	    addr, (free_state ? " (TS_FREE)" : ""), sp);
178 	if (pc != 0)
179 		mdb_printf("[ %0?lr %a() ]\n", sp, pc);
180 
181 	mdb_inc_indent(2);
182 	mdb_set_dot(sp);
183 
184 	if (argc == 1)
185 		err = mdb_eval(argv->a_un.a_str);
186 	else if (showargs)
187 		err = mdb_eval("<.$C");
188 	else
189 		err = mdb_eval("<.$C0");
190 
191 	mdb_dec_indent(2);
192 
193 	return ((err == -1) ? DCMD_ABORT : DCMD_OK);
194 }
195 
196 /*ARGSUSED*/
197 static int
198 do_findstack(uintptr_t addr, findstack_info_t *fsip, uint_t print_warnings)
199 {
200 	kthread_t thr;
201 	size_t stksz;
202 	uintptr_t ubase, utop;
203 	uintptr_t kbase, ktop;
204 	uintptr_t win, sp;
205 
206 	fsip->fsi_failed = 0;
207 	fsip->fsi_pc = 0;
208 	fsip->fsi_sp = 0;
209 	fsip->fsi_depth = 0;
210 	fsip->fsi_overflow = 0;
211 
212 	bzero(&thr, sizeof (thr));
213 	if (mdb_ctf_vread(&thr, "kthread_t", addr,
214 	    MDB_CTF_VREAD_IGNORE_ALL) == -1) {
215 		if (print_warnings)
216 			mdb_warn("couldn't read thread at %p\n", addr);
217 		fsip->fsi_failed = FSI_FAIL_BADTHREAD;
218 		return (DCMD_ERR);
219 	}
220 
221 	fsip->fsi_sobj_ops = (uintptr_t)thr.t_sobj_ops;
222 	fsip->fsi_tstate = thr.t_state;
223 	fsip->fsi_panic = !!(thr.t_flag & T_PANIC);
224 
225 	if ((thr.t_schedflag & TS_LOAD) == 0) {
226 		if (print_warnings)
227 			mdb_warn("thread %p isn't in memory\n", addr);
228 		fsip->fsi_failed = FSI_FAIL_NOTINMEMORY;
229 		return (DCMD_ERR);
230 	}
231 
232 	if (thr.t_stk < thr.t_stkbase) {
233 		if (print_warnings)
234 			mdb_warn(
235 			    "stack base or stack top corrupt for thread %p\n",
236 			    addr);
237 		fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
238 		return (DCMD_ERR);
239 	}
240 
241 	kbase = (uintptr_t)thr.t_stkbase;
242 	ktop = (uintptr_t)thr.t_stk;
243 	stksz = ktop - kbase;
244 
245 #ifdef __amd64
246 	/*
247 	 * The stack on amd64 is intentionally misaligned, so ignore the top
248 	 * half-frame.  See thread_stk_init().  When handling traps, the frame
249 	 * is automatically aligned by the hardware, so we only alter ktop if
250 	 * needed.
251 	 */
252 	if ((ktop & (STACK_ALIGN - 1)) != 0)
253 		ktop -= STACK_ENTRY_ALIGN;
254 #endif
255 
256 	/*
257 	 * If the stack size is larger than a meg, assume that it's bogus.
258 	 */
259 	if (stksz > TOO_BIG_FOR_A_STACK) {
260 		if (print_warnings)
261 			mdb_warn("stack size for thread %p is too big to be "
262 			    "reasonable\n", addr);
263 		fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
264 		return (DCMD_ERR);
265 	}
266 
267 	/*
268 	 * This could be (and was) a UM_GC allocation.  Unfortunately,
269 	 * stksz tends to be very large.  As currently implemented, dcmds
270 	 * invoked as part of pipelines don't have their UM_GC-allocated
271 	 * memory freed until the pipeline completes.  With stksz in the
272 	 * neighborhood of 20k, the popular ::walk thread |::findstack
273 	 * pipeline can easily run memory-constrained debuggers (kmdb) out
274 	 * of memory.  This can be changed back to a gc-able allocation when
275 	 * the debugger is changed to free UM_GC memory more promptly.
276 	 */
277 	ubase = (uintptr_t)mdb_alloc(stksz, UM_SLEEP);
278 	utop = ubase + stksz;
279 	if (mdb_vread((caddr_t)ubase, stksz, kbase) != stksz) {
280 		mdb_free((void *)ubase, stksz);
281 		if (print_warnings)
282 			mdb_warn("couldn't read entire stack for thread %p\n",
283 			    addr);
284 		fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
285 		return (DCMD_ERR);
286 	}
287 
288 	/*
289 	 * Try the saved %sp first, if it looks reasonable.
290 	 */
291 	sp = KTOU((uintptr_t)thr.t_sp + STACK_BIAS);
292 	if (sp >= ubase && sp <= utop) {
293 		if (crawl(sp, kbase, ktop, ubase, 0, fsip) == CRAWL_FOUNDALL) {
294 			fsip->fsi_sp = (uintptr_t)thr.t_sp;
295 #if !defined(__i386)
296 			fsip->fsi_pc = (uintptr_t)thr.t_pc;
297 #endif
298 			goto found;
299 		}
300 	}
301 
302 	/*
303 	 * Now walk through the whole stack, starting at the base,
304 	 * trying every possible "window".
305 	 */
306 	for (win = ubase;
307 	    win + sizeof (struct rwindow) <= utop;
308 	    win += sizeof (struct rwindow *)) {
309 		if (crawl(win, kbase, ktop, ubase, 1, fsip) == CRAWL_FOUNDALL) {
310 			fsip->fsi_sp = UTOK(win) - STACK_BIAS;
311 			goto found;
312 		}
313 	}
314 
315 	/*
316 	 * We didn't conclusively find the stack.  So we'll take another lap,
317 	 * and print out anything that looks possible.
318 	 */
319 	if (print_warnings)
320 		mdb_printf("Possible stack pointers for thread %p:\n", addr);
321 	(void) mdb_vread((caddr_t)ubase, stksz, kbase);
322 
323 	for (win = ubase;
324 	    win + sizeof (struct rwindow) <= utop;
325 	    win += sizeof (struct rwindow *)) {
326 		uintptr_t fp = ((struct rwindow *)win)->rw_fp;
327 		int levels;
328 
329 		if ((levels = crawl(win, kbase, ktop, ubase, 1, fsip)) > 1) {
330 			if (print_warnings)
331 				mdb_printf("  %p (%d)\n", fp, levels);
332 		} else if (levels == CRAWL_FOUNDALL) {
333 			/*
334 			 * If this is a live system, the stack could change
335 			 * between the two mdb_vread(ubase, utop, kbase)'s,
336 			 * and we could have a fully valid stack here.
337 			 */
338 			fsip->fsi_sp = UTOK(win) - STACK_BIAS;
339 			goto found;
340 		}
341 	}
342 
343 	fsip->fsi_depth = 0;
344 	fsip->fsi_overflow = 0;
345 	fsip->fsi_failed = FSI_FAIL_STACKNOTFOUND;
346 
347 	mdb_free((void *)ubase, stksz);
348 	return (DCMD_ERR);
349 found:
350 	mdb_free((void *)ubase, stksz);
351 	return (DCMD_OK);
352 }
353 
354 int
355 findstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
356 {
357 	findstack_info_t fsi;
358 	int retval;
359 
360 	if (!(flags & DCMD_ADDRSPEC))
361 		return (DCMD_USAGE);
362 
363 	bzero(&fsi, sizeof (fsi));
364 
365 	if ((retval = do_findstack(addr, &fsi, 1)) != DCMD_OK ||
366 	    fsi.fsi_failed)
367 		return (retval);
368 
369 	return (print_stack(fsi.fsi_sp, fsi.fsi_pc, addr,
370 	    argc, argv, fsi.fsi_tstate == TS_FREE));
371 }
372 
373 /*ARGSUSED*/
374 int
375 findstack_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *av)
376 {
377 	findstack_debug_on ^= 1;
378 
379 	mdb_printf("findstack: debugging is now %s\n",
380 	    findstack_debug_on ? "on" : "off");
381 
382 	return (DCMD_OK);
383 }
384 
385 static void
386 uppercase(char *p)
387 {
388 	for (; *p != '\0'; p++) {
389 		if (*p >= 'a' && *p <= 'z')
390 			*p += 'A' - 'a';
391 	}
392 }
393 
394 static void
395 sobj_to_text(uintptr_t addr, char *out, size_t out_sz)
396 {
397 	sobj_ops_to_text(addr, out, out_sz);
398 	uppercase(out);
399 }
400 
401 #define	SOBJ_ALL	1
402 static int
403 text_to_sobj(const char *text, uintptr_t *out)
404 {
405 	if (strcasecmp(text, "ALL") == 0) {
406 		*out = SOBJ_ALL;
407 		return (0);
408 	}
409 	return (sobj_text_to_ops(text, out));
410 }
411 
412 #define	TSTATE_PANIC	-2U
413 static int
414 text_to_tstate(const char *text, uint_t *out)
415 {
416 	if (strcasecmp(text, "panic") == 0)
417 		*out = TSTATE_PANIC;
418 	else if (thread_text_to_state(text, out) != 0) {
419 		mdb_warn("tstate \"%s\" not recognized\n", text);
420 		return (-1);
421 	}
422 	return (0);
423 }
424 
425 static void
426 tstate_to_text(uint_t tstate, uint_t paniced, char *out, size_t out_sz)
427 {
428 	if (paniced)
429 		mdb_snprintf(out, out_sz, "panic");
430 	else
431 		thread_state_to_text(tstate, out, out_sz);
432 	uppercase(out);
433 }
434 
435 typedef struct stacks_entry {
436 	struct stacks_entry	*se_next;
437 	struct stacks_entry	*se_dup;	/* dups of this stack */
438 	uintptr_t		se_thread;
439 	uintptr_t		se_sp;
440 	uintptr_t		se_sobj_ops;
441 	uint32_t		se_tstate;
442 	uint32_t		se_count;	/* # threads w/ this stack */
443 	uint8_t			se_overflow;
444 	uint8_t			se_depth;
445 	uint8_t			se_failed;	/* failure reason; FSI_FAIL_* */
446 	uint8_t			se_panic;
447 	uintptr_t		se_stack[1];
448 } stacks_entry_t;
449 #define	STACKS_ENTRY_SIZE(x) OFFSETOF(stacks_entry_t, se_stack[(x)])
450 
451 #define	STACKS_HSIZE 127
452 
453 /* Maximum stack depth reported in stacks */
454 #define	STACKS_MAX_DEPTH	254
455 
456 typedef struct stacks_info {
457 	size_t		si_count;	/* total stacks_entry_ts (incl dups) */
458 	size_t		si_entries;	/* # entries in hash table */
459 	stacks_entry_t	**si_hash;	/* hash table */
460 
461 	findstack_info_t si_fsi;	/* transient callback state */
462 } stacks_info_t;
463 
464 
465 /* global state cached between invocations */
466 #define	STACKS_STATE_CLEAN	0
467 #define	STACKS_STATE_DIRTY	1
468 #define	STACKS_STATE_DONE	2
469 static uint_t stacks_state = STACKS_STATE_CLEAN;
470 static stacks_entry_t **stacks_hash;
471 static stacks_entry_t **stacks_array;
472 static size_t stacks_array_size;
473 
474 size_t
475 stacks_hash_entry(stacks_entry_t *sep)
476 {
477 	size_t depth = sep->se_depth;
478 	uintptr_t *stack = sep->se_stack;
479 
480 	uint64_t total = depth;
481 
482 	while (depth > 0) {
483 		total += *stack;
484 		stack++; depth--;
485 	}
486 
487 	return (total % STACKS_HSIZE);
488 }
489 
490 /*
491  * This is used to both compare stacks for equality and to sort the final
492  * list of unique stacks.  forsort specifies the latter behavior, which
493  * additionally:
494  *	compares se_count, and
495  *	sorts the stacks by text function name.
496  *
497  * The equality test is independent of se_count, and doesn't care about
498  * relative ordering, so we don't do the extra work of looking up symbols
499  * for the stack addresses.
500  */
501 int
502 stacks_entry_comp_impl(stacks_entry_t *l, stacks_entry_t *r,
503     uint_t forsort)
504 {
505 	int idx;
506 
507 	int depth = MIN(l->se_depth, r->se_depth);
508 
509 	/* no matter what, panic stacks come last. */
510 	if (l->se_panic > r->se_panic)
511 		return (1);
512 	if (l->se_panic < r->se_panic)
513 		return (-1);
514 
515 	if (forsort) {
516 		/* put large counts earlier */
517 		if (l->se_count > r->se_count)
518 			return (-1);
519 		if (l->se_count < r->se_count)
520 			return (1);
521 	}
522 
523 	if (l->se_tstate > r->se_tstate)
524 		return (1);
525 	if (l->se_tstate < r->se_tstate)
526 		return (-1);
527 
528 	if (l->se_failed > r->se_failed)
529 		return (1);
530 	if (l->se_failed < r->se_failed)
531 		return (-1);
532 
533 	for (idx = 0; idx < depth; idx++) {
534 		char lbuf[MDB_SYM_NAMLEN];
535 		char rbuf[MDB_SYM_NAMLEN];
536 
537 		int rval;
538 		uintptr_t laddr = l->se_stack[idx];
539 		uintptr_t raddr = r->se_stack[idx];
540 
541 		if (laddr == raddr)
542 			continue;
543 
544 		if (forsort &&
545 		    mdb_lookup_by_addr(laddr, MDB_SYM_FUZZY,
546 		    lbuf, sizeof (lbuf), NULL) != -1 &&
547 		    mdb_lookup_by_addr(raddr, MDB_SYM_FUZZY,
548 		    rbuf, sizeof (rbuf), NULL) != -1 &&
549 		    (rval = strcmp(lbuf, rbuf)) != 0)
550 			return (rval);
551 
552 		if (laddr > raddr)
553 			return (1);
554 		return (-1);
555 	}
556 
557 	if (l->se_overflow > r->se_overflow)
558 		return (-1);
559 	if (l->se_overflow < r->se_overflow)
560 		return (1);
561 
562 	if (l->se_depth > r->se_depth)
563 		return (1);
564 	if (l->se_depth < r->se_depth)
565 		return (-1);
566 
567 	if (l->se_sobj_ops > r->se_sobj_ops)
568 		return (1);
569 	if (l->se_sobj_ops < r->se_sobj_ops)
570 		return (-1);
571 
572 	return (0);
573 }
574 
575 int
576 stacks_entry_comp(const void *l_arg, const void *r_arg)
577 {
578 	stacks_entry_t * const *lp = l_arg;
579 	stacks_entry_t * const *rp = r_arg;
580 
581 	return (stacks_entry_comp_impl(*lp, *rp, 1));
582 }
583 
584 void
585 stacks_cleanup(int force)
586 {
587 	int idx = 0;
588 	stacks_entry_t *cur, *next;
589 
590 	if (stacks_state == STACKS_STATE_CLEAN)
591 		return;
592 
593 	if (!force && stacks_state == STACKS_STATE_DONE)
594 		return;
595 
596 	/*
597 	 * Until the array is sorted and stable, stacks_hash will be non-NULL.
598 	 * This way, we can get at all of the data, even if qsort() was
599 	 * interrupted while mucking with the array.
600 	 */
601 	if (stacks_hash != NULL) {
602 		for (idx = 0; idx < STACKS_HSIZE; idx++) {
603 			while ((cur = stacks_hash[idx]) != NULL) {
604 				while ((next = cur->se_dup) != NULL) {
605 					cur->se_dup = next->se_dup;
606 					mdb_free(next,
607 					    STACKS_ENTRY_SIZE(next->se_depth));
608 				}
609 				next = cur->se_next;
610 				stacks_hash[idx] = next;
611 				mdb_free(cur, STACKS_ENTRY_SIZE(cur->se_depth));
612 			}
613 		}
614 		if (stacks_array != NULL)
615 			mdb_free(stacks_array,
616 			    stacks_array_size * sizeof (*stacks_array));
617 
618 	} else if (stacks_array != NULL) {
619 		for (idx = 0; idx < stacks_array_size; idx++) {
620 			if ((cur = stacks_array[idx]) != NULL) {
621 				while ((next = cur->se_dup) != NULL) {
622 					cur->se_dup = next->se_dup;
623 					mdb_free(next,
624 					    STACKS_ENTRY_SIZE(next->se_depth));
625 				}
626 				stacks_array[idx] = NULL;
627 				mdb_free(cur, STACKS_ENTRY_SIZE(cur->se_depth));
628 			}
629 		}
630 		mdb_free(stacks_array,
631 		    stacks_array_size * sizeof (*stacks_array));
632 	}
633 
634 	stacks_array_size = 0;
635 	stacks_state = STACKS_STATE_CLEAN;
636 }
637 
638 /*ARGSUSED*/
639 int
640 stacks_thread_cb(uintptr_t addr, const void *ignored, void *cbarg)
641 {
642 	stacks_info_t *sip = cbarg;
643 	findstack_info_t *fsip = &sip->si_fsi;
644 
645 	stacks_entry_t **sepp, *nsep, *sep;
646 	int idx;
647 	size_t depth;
648 
649 	if (do_findstack(addr, fsip, 0) != DCMD_OK &&
650 	    fsip->fsi_failed == FSI_FAIL_BADTHREAD) {
651 		mdb_warn("couldn't read thread at %p\n", addr);
652 		return (WALK_NEXT);
653 	}
654 
655 	sip->si_count++;
656 
657 	depth = fsip->fsi_depth;
658 	nsep = mdb_zalloc(STACKS_ENTRY_SIZE(depth), UM_SLEEP);
659 	nsep->se_thread = addr;
660 	nsep->se_sp = fsip->fsi_sp;
661 	nsep->se_sobj_ops = fsip->fsi_sobj_ops;
662 	nsep->se_tstate = fsip->fsi_tstate;
663 	nsep->se_count = 1;
664 	nsep->se_overflow = fsip->fsi_overflow;
665 	nsep->se_depth = depth;
666 	nsep->se_failed = fsip->fsi_failed;
667 	nsep->se_panic = fsip->fsi_panic;
668 
669 	for (idx = 0; idx < depth; idx++)
670 		nsep->se_stack[idx] = fsip->fsi_stack[idx];
671 
672 	for (sepp = &sip->si_hash[stacks_hash_entry(nsep)];
673 	    (sep = *sepp) != NULL;
674 	    sepp = &sep->se_next) {
675 
676 		if (stacks_entry_comp_impl(sep, nsep, 0) != 0)
677 			continue;
678 
679 		nsep->se_dup = sep->se_dup;
680 		sep->se_dup = nsep;
681 		sep->se_count++;
682 		return (WALK_NEXT);
683 	}
684 
685 	nsep->se_next = NULL;
686 	*sepp = nsep;
687 	sip->si_entries++;
688 
689 	return (WALK_NEXT);
690 }
691 
692 int
693 stacks_run(int verbose)
694 {
695 	stacks_info_t si;
696 	findstack_info_t *fsip = &si.si_fsi;
697 	size_t idx;
698 	stacks_entry_t **cur;
699 
700 	bzero(&si, sizeof (si));
701 
702 	stacks_state = STACKS_STATE_DIRTY;
703 
704 	stacks_hash = si.si_hash =
705 	    mdb_zalloc(STACKS_HSIZE * sizeof (*si.si_hash), UM_SLEEP);
706 	si.si_entries = 0;
707 	si.si_count = 0;
708 
709 	fsip->fsi_max_depth = STACKS_MAX_DEPTH;
710 	fsip->fsi_stack =
711 	    mdb_alloc(fsip->fsi_max_depth * sizeof (*fsip->fsi_stack),
712 	    UM_SLEEP | UM_GC);
713 
714 	if (verbose)
715 		mdb_warn("stacks: processing kernel threads\n");
716 
717 	if (mdb_walk("thread", stacks_thread_cb, &si) != 0) {
718 		mdb_warn("cannot walk \"thread\"");
719 		return (DCMD_ERR);
720 	}
721 
722 	if (verbose)
723 		mdb_warn("stacks: %d unique stacks / %d threads\n",
724 		    si.si_entries, si.si_count);
725 
726 	stacks_array_size = si.si_entries;
727 	stacks_array =
728 	    mdb_zalloc(si.si_entries * sizeof (*stacks_array), UM_SLEEP);
729 	cur = stacks_array;
730 	for (idx = 0; idx < STACKS_HSIZE; idx++) {
731 		stacks_entry_t *sep;
732 		for (sep = si.si_hash[idx]; sep != NULL; sep = sep->se_next)
733 			*(cur++) = sep;
734 	}
735 
736 	if (cur != stacks_array + si.si_entries) {
737 		mdb_warn("stacks: miscounted array size (%d != size: %d)\n",
738 		    (cur - stacks_array), stacks_array_size);
739 		return (DCMD_ERR);
740 	}
741 	qsort(stacks_array, si.si_entries, sizeof (*stacks_array),
742 	    stacks_entry_comp);
743 
744 	/* Now that we're done, free the hash table */
745 	stacks_hash = NULL;
746 	mdb_free(si.si_hash, STACKS_HSIZE * sizeof (*si.si_hash));
747 
748 	stacks_state = STACKS_STATE_DONE;
749 
750 	if (verbose)
751 		mdb_warn("stacks: done\n");
752 
753 	return (DCMD_OK);
754 }
755 
756 static int
757 stacks_has_caller(stacks_entry_t *sep, uintptr_t addr)
758 {
759 	uintptr_t laddr = addr;
760 	uintptr_t haddr = addr + 1;
761 	int idx;
762 	char c[MDB_SYM_NAMLEN];
763 	GElf_Sym sym;
764 
765 	if (mdb_lookup_by_addr(addr, MDB_SYM_FUZZY,
766 	    c, sizeof (c), &sym) != -1 &&
767 	    addr == (uintptr_t)sym.st_value) {
768 		laddr = (uintptr_t)sym.st_value;
769 		haddr = (uintptr_t)sym.st_value + sym.st_size;
770 	}
771 
772 	for (idx = 0; idx < sep->se_depth; idx++)
773 		if (sep->se_stack[idx] >= laddr && sep->se_stack[idx] < haddr)
774 			return (1);
775 
776 	return (0);
777 }
778 
779 typedef struct find_module_struct {
780 	struct modctl *mcp;
781 	const char *name;
782 } find_module_struct_t;
783 
784 /*ARGSUSED*/
785 int
786 find_module_cb(uintptr_t addr, const void *modctl_arg, void *cbarg)
787 {
788 	find_module_struct_t *sp = cbarg;
789 	char mod_modname[MODMAXNAMELEN + 1];
790 	const struct modctl *mp = modctl_arg;
791 
792 	if (!mp->mod_modname)
793 		return (WALK_NEXT);
794 
795 	if (mdb_readstr(mod_modname, sizeof (mod_modname),
796 	    (uintptr_t)mp->mod_modname) == -1) {
797 		mdb_warn("failed to read mod_modname in \"modctl\" walk");
798 		return (WALK_ERR);
799 	}
800 
801 	if (strcmp(sp->name, mod_modname))
802 		return (WALK_NEXT);
803 
804 	sp->mcp = mdb_alloc(sizeof (*sp->mcp), UM_SLEEP | UM_GC);
805 	bcopy(mp, sp->mcp, sizeof (*sp->mcp));
806 	return (WALK_DONE);
807 }
808 
809 static struct modctl *
810 find_module(const char *name)
811 {
812 	find_module_struct_t mptr;
813 
814 	mptr.name = name;
815 	mptr.mcp = NULL;
816 
817 	if (mdb_walk("modctl", find_module_cb, &mptr) != 0)
818 		mdb_warn("cannot walk \"modctl\"");
819 	return (mptr.mcp);
820 }
821 
822 static int
823 stacks_has_module(stacks_entry_t *sep, struct modctl *mp)
824 {
825 	int idx;
826 
827 	if (mp == NULL)
828 		return (0);
829 
830 	for (idx = 0; idx < sep->se_depth; idx++)
831 		if (sep->se_stack[idx] >= (uintptr_t)mp->mod_text &&
832 		    sep->se_stack[idx] <
833 		    ((uintptr_t)mp->mod_text + mp->mod_text_size))
834 			return (1);
835 	return (0);
836 }
837 
838 
839 static int
840 uintptrcomp(const void *lp, const void *rp)
841 {
842 	uintptr_t lhs = *(const uintptr_t *)lp;
843 	uintptr_t rhs = *(const uintptr_t *)rp;
844 	if (lhs > rhs)
845 		return (1);
846 	if (lhs < rhs)
847 		return (-1);
848 	return (0);
849 }
850 
851 /*ARGSUSED*/
852 static void
853 print_sobj_help(int type, const char *name, const char *ops_name, void *ign)
854 {
855 	mdb_printf(" %s", name);
856 }
857 
858 /*ARGSUSED*/
859 static void
860 print_tstate_help(uint_t state, const char *name, void *ignored)
861 {
862 	mdb_printf(" %s", name);
863 }
864 
865 void
866 stacks_help(void)
867 {
868 	mdb_printf(
869 "::stacks processes all of the thread stacks on the system, grouping\n"
870 "together threads which have the same:\n"
871 "\n"
872 "  * Thread state,\n"
873 "  * Sync object type, and\n"
874 "  * PCs in their stack trace.\n"
875 "\n"
876 "The default output (no address or options) is just a dump of the thread\n"
877 "groups in the system.  For a view of active threads, use \"::stacks -i\",\n"
878 "which filters out FREE threads (interrupt threads which are currently\n"
879 "inactive) and threads sleeping on a CV. (Note that those threads may still\n"
880 "be noteworthy; this is just for a first glance.)  More general filtering\n"
881 "options are described below, in the \"FILTERS\" section.\n"
882 "\n"
883 "::stacks can be used in a pipeline.  The input to ::stacks is one or more\n"
884 "thread pointers.  For example, to get a summary of threads in a process,\n"
885 "you can do:\n"
886 "\n"
887 "  %<b>procp%</b>::walk thread | ::stacks\n"
888 "\n"
889 "When output into a pipe, ::stacks prints all of the threads input,\n"
890 "filtered by the given filtering options.  This means that multiple\n"
891 "::stacks invocations can be piped together to achieve more complicated\n"
892 "filters.  For example, to get threads which have both 'fop_read' and\n"
893 "'cv_wait_sig_swap' in their stack trace, you could do:\n"
894 "\n"
895 "  ::stacks -c fop_read | ::stacks -c cv_wait_sig_swap_core\n"
896 "\n"
897 "To get the full list of threads in each group, use the '-a' flag:\n"
898 "\n"
899 "  ::stacks -a\n"
900 "\n");
901 	mdb_dec_indent(2);
902 	mdb_printf("%<b>OPTIONS%</b>\n");
903 	mdb_inc_indent(2);
904 	mdb_printf("%s",
905 "  -a    Print all of the grouped threads, instead of just a count.\n"
906 "  -f    Force a re-run of the thread stack gathering.\n"
907 "  -v    Be verbose about thread stack gathering.\n"
908 "\n");
909 	mdb_dec_indent(2);
910 	mdb_printf("%<b>FILTERS%</b>\n");
911 	mdb_inc_indent(2);
912 	mdb_printf("%s",
913 "  -i    Show active threads; equivalent to '-S CV -T FREE'.\n"
914 "  -c func[+offset]\n"
915 "        Only print threads whose stacks contain func/func+offset.\n"
916 "  -C func[+offset]\n"
917 "        Only print threads whose stacks do not contain func/func+offset.\n"
918 "  -m module\n"
919 "        Only print threads whose stacks contain functions from module.\n"
920 "  -M module\n"
921 "        Only print threads whose stacks do not contain functions from\n"
922 "        module.\n"
923 "  -s {type | ALL}\n"
924 "        Only print threads which are on a 'type' synchronization object\n"
925 "        (SOBJ).\n"
926 "  -S {type | ALL}\n"
927 "        Only print threads which are not on a 'type' SOBJ.\n"
928 "  -t tstate\n"
929 "        Only print threads which are in thread state 'tstate'.\n"
930 "  -T tstate\n"
931 "        Only print threads which are not in thread state 'tstate'.\n"
932 "\n");
933 	mdb_printf("   SOBJ types:");
934 	sobj_type_walk(print_sobj_help, NULL);
935 	mdb_printf("\n");
936 	mdb_printf("Thread states:");
937 	thread_walk_states(print_tstate_help, NULL);
938 	mdb_printf(" panic\n");
939 }
940 
941 /*ARGSUSED*/
942 int
943 stacks(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
944 {
945 	size_t idx;
946 
947 	char *seen = NULL;
948 
949 	const char *caller_str = NULL;
950 	const char *excl_caller_str = NULL;
951 	uintptr_t caller = 0, excl_caller = 0;
952 	const char *module_str = NULL;
953 	const char *excl_module_str = NULL;
954 	struct modctl *module = NULL, *excl_module = NULL;
955 	const char *sobj = NULL;
956 	const char *excl_sobj = NULL;
957 	uintptr_t sobj_ops = 0, excl_sobj_ops = 0;
958 	const char *tstate_str = NULL;
959 	const char *excl_tstate_str = NULL;
960 	uint_t tstate = -1U;
961 	uint_t excl_tstate = -1U;
962 
963 	uint_t all = 0;
964 	uint_t force = 0;
965 	uint_t interesting = 0;
966 	uint_t verbose = 0;
967 
968 	/*
969 	 * We have a slight behavior difference between having piped
970 	 * input and 'addr::stacks'.  Without a pipe, we assume the
971 	 * thread pointer given is a representative thread, and so
972 	 * we include all similar threads in the system in our output.
973 	 *
974 	 * With a pipe, we filter down to just the threads in our
975 	 * input.
976 	 */
977 	uint_t addrspec = (flags & DCMD_ADDRSPEC);
978 	uint_t only_matching = addrspec && (flags & DCMD_PIPE);
979 
980 	mdb_pipe_t p;
981 
982 	if (mdb_getopts(argc, argv,
983 	    'a', MDB_OPT_SETBITS, TRUE, &all,
984 	    'f', MDB_OPT_SETBITS, TRUE, &force,
985 	    'i', MDB_OPT_SETBITS, TRUE, &interesting,
986 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
987 	    'c', MDB_OPT_STR, &caller_str,
988 	    'C', MDB_OPT_STR, &excl_caller_str,
989 	    'm', MDB_OPT_STR, &module_str,
990 	    'M', MDB_OPT_STR, &excl_module_str,
991 	    's', MDB_OPT_STR, &sobj,
992 	    'S', MDB_OPT_STR, &excl_sobj,
993 	    't', MDB_OPT_STR, &tstate_str,
994 	    'T', MDB_OPT_STR, &excl_tstate_str,
995 	    NULL) != argc)
996 		return (DCMD_USAGE);
997 
998 	if (interesting) {
999 		if (sobj != NULL || excl_sobj != NULL ||
1000 		    tstate_str != NULL || excl_tstate_str != NULL) {
1001 			mdb_warn(
1002 			    "stacks: -i is incompatible with -[sStT]\n");
1003 			return (DCMD_USAGE);
1004 		}
1005 		excl_sobj = "CV";
1006 		excl_tstate_str = "FREE";
1007 	}
1008 
1009 	if (caller_str != NULL) {
1010 		mdb_set_dot(0);
1011 		if (mdb_eval(caller_str) != 0) {
1012 			mdb_warn("stacks: evaluation of \"%s\" failed",
1013 			    caller_str);
1014 			return (DCMD_ABORT);
1015 		}
1016 		caller = mdb_get_dot();
1017 	}
1018 
1019 	if (excl_caller_str != NULL) {
1020 		mdb_set_dot(0);
1021 		if (mdb_eval(excl_caller_str) != 0) {
1022 			mdb_warn("stacks: evaluation of \"%s\" failed",
1023 			    excl_caller_str);
1024 			return (DCMD_ABORT);
1025 		}
1026 		excl_caller = mdb_get_dot();
1027 	}
1028 	mdb_set_dot(addr);
1029 
1030 	if (module_str != NULL &&
1031 	    (module = find_module(module_str)) == NULL) {
1032 		mdb_warn("stacks: module \"%s\" is unknown", module_str);
1033 		return (DCMD_ABORT);
1034 	}
1035 
1036 	if (excl_module_str != NULL &&
1037 	    (excl_module = find_module(excl_module_str)) == NULL) {
1038 		mdb_warn("stacks: module \"%s\" is unknown", excl_module_str);
1039 		return (DCMD_ABORT);
1040 	}
1041 
1042 	if (sobj != NULL &&
1043 	    text_to_sobj(sobj, &sobj_ops) != 0)
1044 		return (DCMD_USAGE);
1045 
1046 	if (excl_sobj != NULL &&
1047 	    text_to_sobj(excl_sobj, &excl_sobj_ops) != 0)
1048 		return (DCMD_USAGE);
1049 
1050 	if (sobj_ops != 0 && excl_sobj_ops != 0) {
1051 		mdb_warn("stacks: only one of -s and -S can be specified\n");
1052 		return (DCMD_USAGE);
1053 	}
1054 
1055 	if (tstate_str != NULL &&
1056 	    text_to_tstate(tstate_str, &tstate) != 0)
1057 		return (DCMD_USAGE);
1058 
1059 	if (excl_tstate_str != NULL &&
1060 	    text_to_tstate(excl_tstate_str, &excl_tstate) != 0)
1061 		return (DCMD_USAGE);
1062 
1063 	if (tstate != -1U && excl_tstate != -1U) {
1064 		mdb_warn("stacks: only one of -t and -T can be specified\n");
1065 		return (DCMD_USAGE);
1066 	}
1067 
1068 	/*
1069 	 * Force a cleanup if we're connected to a live system. Never
1070 	 * do a cleanup after the first invocation around the loop.
1071 	 */
1072 	force |= (mdb_get_state() == MDB_STATE_RUNNING);
1073 	if (force && (flags & (DCMD_LOOPFIRST|DCMD_LOOP)) == DCMD_LOOP)
1074 		force = 0;
1075 
1076 	stacks_cleanup(force);
1077 
1078 	if (stacks_state == STACKS_STATE_CLEAN) {
1079 		int res = stacks_run(verbose);
1080 		if (res != DCMD_OK)
1081 			return (res);
1082 	}
1083 
1084 	if (!all && DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
1085 		mdb_printf("%<u>%-?s %-8s %-?s %8s%</u>\n",
1086 		    "THREAD", "STATE", "SOBJ", "COUNT");
1087 	}
1088 
1089 	/*
1090 	 * If there's an address specified, we're going to further filter
1091 	 * to only entries which have an address in the input.  To reduce
1092 	 * overhead (and make the sorted output come out right), we
1093 	 * use mdb_get_pipe() to grab the entire pipeline of input, then
1094 	 * use qsort() and bsearch() to speed up the search.
1095 	 */
1096 	if (addrspec) {
1097 		mdb_get_pipe(&p);
1098 		if (p.pipe_data == NULL || p.pipe_len == 0) {
1099 			p.pipe_data = &addr;
1100 			p.pipe_len = 1;
1101 		}
1102 		qsort(p.pipe_data, p.pipe_len, sizeof (uintptr_t),
1103 		    uintptrcomp);
1104 
1105 		/* remove any duplicates in the data */
1106 		idx = 0;
1107 		while (idx < p.pipe_len - 1) {
1108 			uintptr_t *data = &p.pipe_data[idx];
1109 			size_t len = p.pipe_len - idx;
1110 
1111 			if (data[0] == data[1]) {
1112 				memmove(data, data + 1,
1113 				    (len - 1) * sizeof (*data));
1114 				p.pipe_len--;
1115 				continue; /* repeat without incrementing idx */
1116 			}
1117 			idx++;
1118 		}
1119 
1120 		seen = mdb_zalloc(p.pipe_len, UM_SLEEP | UM_GC);
1121 	}
1122 
1123 	for (idx = 0; idx < stacks_array_size; idx++) {
1124 		stacks_entry_t *sep = stacks_array[idx];
1125 		stacks_entry_t *cur = sep;
1126 		int frame;
1127 		size_t count = sep->se_count;
1128 
1129 		if (addrspec) {
1130 			stacks_entry_t *head = NULL, *tail = NULL, *sp;
1131 			size_t foundcount = 0;
1132 			/*
1133 			 * We use the now-unused hash chain field se_next to
1134 			 * link together the dups which match our list.
1135 			 */
1136 			for (sp = sep; sp != NULL; sp = sp->se_dup) {
1137 				uintptr_t *entry = bsearch(&sp->se_thread,
1138 				    p.pipe_data, p.pipe_len, sizeof (uintptr_t),
1139 				    uintptrcomp);
1140 				if (entry != NULL) {
1141 					foundcount++;
1142 					seen[entry - p.pipe_data]++;
1143 					if (head == NULL)
1144 						head = sp;
1145 					else
1146 						tail->se_next = sp;
1147 					tail = sp;
1148 					sp->se_next = NULL;
1149 				}
1150 			}
1151 			if (head == NULL)
1152 				continue;	/* no match, skip entry */
1153 
1154 			if (only_matching) {
1155 				cur = sep = head;
1156 				count = foundcount;
1157 			}
1158 		}
1159 
1160 		if (caller != 0 && !stacks_has_caller(sep, caller))
1161 			continue;
1162 		if (excl_caller != 0 && stacks_has_caller(sep, excl_caller))
1163 			continue;
1164 		if (module != 0 && !stacks_has_module(sep, module))
1165 			continue;
1166 		if (excl_module != 0 && stacks_has_module(sep, excl_module))
1167 			continue;
1168 
1169 		if (tstate != -1U) {
1170 			if (tstate == TSTATE_PANIC) {
1171 				if (!sep->se_panic)
1172 					continue;
1173 			} else if (sep->se_panic || sep->se_tstate != tstate)
1174 				continue;
1175 		}
1176 		if (excl_tstate != -1U) {
1177 			if (excl_tstate == TSTATE_PANIC) {
1178 				if (sep->se_panic)
1179 					continue;
1180 			} else if (!sep->se_panic &&
1181 			    sep->se_tstate == excl_tstate)
1182 				continue;
1183 		}
1184 
1185 		if (sobj_ops == SOBJ_ALL) {
1186 			if (sep->se_sobj_ops == 0)
1187 				continue;
1188 		} else if (sobj_ops != 0) {
1189 			if (sobj_ops != sep->se_sobj_ops)
1190 				continue;
1191 		}
1192 
1193 		if (!(interesting && sep->se_panic)) {
1194 			if (excl_sobj_ops == SOBJ_ALL) {
1195 				if (sep->se_sobj_ops != 0)
1196 					continue;
1197 			} else if (excl_sobj_ops != 0) {
1198 				if (excl_sobj_ops == sep->se_sobj_ops)
1199 					continue;
1200 			}
1201 		}
1202 
1203 		if (flags & DCMD_PIPE_OUT) {
1204 			while (sep != NULL) {
1205 				mdb_printf("%lr\n", sep->se_thread);
1206 				sep = only_matching ?
1207 				    sep->se_next : sep->se_dup;
1208 			}
1209 			continue;
1210 		}
1211 
1212 		if (all) {
1213 			mdb_printf("%<u>%-?s %-8s %-?s %8s%</u>\n",
1214 			    "THREAD", "STATE", "SOBJTYPE", "COUNT");
1215 		}
1216 
1217 		do {
1218 			char state[20];
1219 			char sobj[100];
1220 
1221 			tstate_to_text(cur->se_tstate, cur->se_panic,
1222 			    state, sizeof (state));
1223 			sobj_to_text(cur->se_sobj_ops,
1224 			    sobj, sizeof (sobj));
1225 
1226 			if (cur == sep)
1227 				mdb_printf("%?p %-8s %-?s %8d\n",
1228 				    cur->se_thread, state, sobj, count);
1229 			else
1230 				mdb_printf("%?p %-8s %-?s %8s\n",
1231 				    cur->se_thread, state, sobj, "-");
1232 
1233 			cur = only_matching ? cur->se_next : cur->se_dup;
1234 		} while (all && cur != NULL);
1235 
1236 		if (sep->se_failed != 0) {
1237 			char *reason;
1238 			switch (sep->se_failed) {
1239 			case FSI_FAIL_NOTINMEMORY:
1240 				reason = "thread not in memory";
1241 				break;
1242 			case FSI_FAIL_THREADCORRUPT:
1243 				reason = "thread structure stack info corrupt";
1244 				break;
1245 			case FSI_FAIL_STACKNOTFOUND:
1246 				reason = "no consistent stack found";
1247 				break;
1248 			default:
1249 				reason = "unknown failure";
1250 				break;
1251 			}
1252 			mdb_printf("%?s <%s>\n", "", reason);
1253 		}
1254 
1255 		for (frame = 0; frame < sep->se_depth; frame++)
1256 			mdb_printf("%?s %a\n", "", sep->se_stack[frame]);
1257 		if (sep->se_overflow)
1258 			mdb_printf("%?s ... truncated ...\n", "");
1259 		mdb_printf("\n");
1260 	}
1261 
1262 	if (flags & DCMD_ADDRSPEC) {
1263 		for (idx = 0; idx < p.pipe_len; idx++)
1264 			if (seen[idx] == 0)
1265 				mdb_warn("stacks: %p not in thread list\n",
1266 				    p.pipe_data[idx]);
1267 	}
1268 	return (DCMD_OK);
1269 }
1270