xref: /illumos-gate/usr/src/cmd/mdb/common/modules/genunix/thread.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
27  * Copyright (c) 2018, Joyent, Inc.
28  */
29 
30 
31 #include <mdb/mdb_modapi.h>
32 #include <mdb/mdb_ks.h>
33 #include <mdb/mdb_ctf.h>
34 #include <sys/types.h>
35 #include <sys/thread.h>
36 #include <sys/lwp.h>
37 #include <sys/proc.h>
38 #include <sys/cpuvar.h>
39 #include <sys/cpupart.h>
40 #include <sys/disp.h>
41 #include <sys/taskq_impl.h>
42 #include <sys/stack.h>
43 #include "thread.h"
44 
45 #ifndef	STACK_BIAS
46 #define	STACK_BIAS	0
47 #endif
48 
49 typedef struct thread_walk {
50 	kthread_t *tw_thread;
51 	uintptr_t tw_last;
52 	uint_t tw_inproc;
53 	uint_t tw_step;
54 } thread_walk_t;
55 
56 int
57 thread_walk_init(mdb_walk_state_t *wsp)
58 {
59 	thread_walk_t *twp = mdb_alloc(sizeof (thread_walk_t), UM_SLEEP);
60 
61 	if (wsp->walk_addr == 0) {
62 		if (mdb_readvar(&wsp->walk_addr, "allthreads") == -1) {
63 			mdb_warn("failed to read 'allthreads'");
64 			mdb_free(twp, sizeof (thread_walk_t));
65 			return (WALK_ERR);
66 		}
67 
68 		twp->tw_inproc = FALSE;
69 
70 	} else {
71 		proc_t pr;
72 
73 		if (mdb_vread(&pr, sizeof (proc_t), wsp->walk_addr) == -1) {
74 			mdb_warn("failed to read proc at %p", wsp->walk_addr);
75 			mdb_free(twp, sizeof (thread_walk_t));
76 			return (WALK_ERR);
77 		}
78 
79 		wsp->walk_addr = (uintptr_t)pr.p_tlist;
80 		twp->tw_inproc = TRUE;
81 	}
82 
83 	twp->tw_thread = mdb_alloc(sizeof (kthread_t), UM_SLEEP);
84 	twp->tw_last = wsp->walk_addr;
85 	twp->tw_step = FALSE;
86 
87 	wsp->walk_data = twp;
88 	return (WALK_NEXT);
89 }
90 
91 int
92 thread_walk_step(mdb_walk_state_t *wsp)
93 {
94 	thread_walk_t *twp = (thread_walk_t *)wsp->walk_data;
95 	int status;
96 
97 	if (wsp->walk_addr == 0)
98 		return (WALK_DONE); /* Proc has 0 threads or allthreads = 0 */
99 
100 	if (twp->tw_step && wsp->walk_addr == twp->tw_last)
101 		return (WALK_DONE); /* We've wrapped around */
102 
103 	if (mdb_vread(twp->tw_thread, sizeof (kthread_t),
104 	    wsp->walk_addr) == -1) {
105 		mdb_warn("failed to read thread at %p", wsp->walk_addr);
106 		return (WALK_DONE);
107 	}
108 
109 	status = wsp->walk_callback(wsp->walk_addr, twp->tw_thread,
110 	    wsp->walk_cbdata);
111 
112 	if (twp->tw_inproc)
113 		wsp->walk_addr = (uintptr_t)twp->tw_thread->t_forw;
114 	else
115 		wsp->walk_addr = (uintptr_t)twp->tw_thread->t_next;
116 
117 	twp->tw_step = TRUE;
118 	return (status);
119 }
120 
121 void
122 thread_walk_fini(mdb_walk_state_t *wsp)
123 {
124 	thread_walk_t *twp = (thread_walk_t *)wsp->walk_data;
125 
126 	mdb_free(twp->tw_thread, sizeof (kthread_t));
127 	mdb_free(twp, sizeof (thread_walk_t));
128 }
129 
130 int
131 deathrow_walk_init(mdb_walk_state_t *wsp)
132 {
133 	if (mdb_layered_walk("thread_deathrow", wsp) == -1) {
134 		mdb_warn("couldn't walk 'thread_deathrow'");
135 		return (WALK_ERR);
136 	}
137 
138 	if (mdb_layered_walk("lwp_deathrow", wsp) == -1) {
139 		mdb_warn("couldn't walk 'lwp_deathrow'");
140 		return (WALK_ERR);
141 	}
142 
143 	return (WALK_NEXT);
144 }
145 
146 int
147 deathrow_walk_step(mdb_walk_state_t *wsp)
148 {
149 	kthread_t t;
150 	uintptr_t addr = wsp->walk_addr;
151 
152 	if (addr == 0)
153 		return (WALK_DONE);
154 
155 	if (mdb_vread(&t, sizeof (t), addr) == -1) {
156 		mdb_warn("couldn't read deathrow thread at %p", addr);
157 		return (WALK_ERR);
158 	}
159 
160 	wsp->walk_addr = (uintptr_t)t.t_forw;
161 
162 	return (wsp->walk_callback(addr, &t, wsp->walk_cbdata));
163 }
164 
165 int
166 thread_deathrow_walk_init(mdb_walk_state_t *wsp)
167 {
168 	if (mdb_readvar(&wsp->walk_addr, "thread_deathrow") == -1) {
169 		mdb_warn("couldn't read symbol 'thread_deathrow'");
170 		return (WALK_ERR);
171 	}
172 
173 	return (WALK_NEXT);
174 }
175 
176 int
177 lwp_deathrow_walk_init(mdb_walk_state_t *wsp)
178 {
179 	if (mdb_readvar(&wsp->walk_addr, "lwp_deathrow") == -1) {
180 		mdb_warn("couldn't read symbol 'lwp_deathrow'");
181 		return (WALK_ERR);
182 	}
183 
184 	return (WALK_NEXT);
185 }
186 
187 
188 typedef struct dispq_walk {
189 	int dw_npri;
190 	uintptr_t dw_dispq;
191 	uintptr_t dw_last;
192 } dispq_walk_t;
193 
194 int
195 cpu_dispq_walk_init(mdb_walk_state_t *wsp)
196 {
197 	uintptr_t addr = wsp->walk_addr;
198 	dispq_walk_t *dw;
199 	cpu_t cpu;
200 	dispq_t dispq;
201 	disp_t disp;
202 
203 	if (addr == 0) {
204 		mdb_warn("cpu_dispq walk needs a cpu_t address\n");
205 		return (WALK_ERR);
206 	}
207 
208 	if (mdb_vread(&cpu, sizeof (cpu_t), addr) == -1) {
209 		mdb_warn("failed to read cpu_t at %p", addr);
210 		return (WALK_ERR);
211 	}
212 
213 	if (mdb_vread(&disp, sizeof (disp_t), (uintptr_t)cpu.cpu_disp) == -1) {
214 		mdb_warn("failed to read disp_t at %p", cpu.cpu_disp);
215 		return (WALK_ERR);
216 	}
217 
218 	if (mdb_vread(&dispq, sizeof (dispq_t),
219 	    (uintptr_t)disp.disp_q) == -1) {
220 		mdb_warn("failed to read dispq_t at %p", disp.disp_q);
221 		return (WALK_ERR);
222 	}
223 
224 	dw = mdb_alloc(sizeof (dispq_walk_t), UM_SLEEP);
225 
226 	dw->dw_npri = disp.disp_npri;
227 	dw->dw_dispq = (uintptr_t)disp.disp_q;
228 	dw->dw_last = (uintptr_t)dispq.dq_last;
229 
230 	wsp->walk_addr = (uintptr_t)dispq.dq_first;
231 	wsp->walk_data = dw;
232 
233 	return (WALK_NEXT);
234 }
235 
236 int
237 cpupart_dispq_walk_init(mdb_walk_state_t *wsp)
238 {
239 	uintptr_t addr = wsp->walk_addr;
240 	dispq_walk_t *dw;
241 	cpupart_t cpupart;
242 	dispq_t dispq;
243 
244 	if (addr == 0) {
245 		mdb_warn("cpupart_dispq walk needs a cpupart_t address\n");
246 		return (WALK_ERR);
247 	}
248 
249 	if (mdb_vread(&cpupart, sizeof (cpupart_t), addr) == -1) {
250 		mdb_warn("failed to read cpupart_t at %p", addr);
251 		return (WALK_ERR);
252 	}
253 
254 	if (mdb_vread(&dispq, sizeof (dispq_t),
255 	    (uintptr_t)cpupart.cp_kp_queue.disp_q) == -1) {
256 		mdb_warn("failed to read dispq_t at %p",
257 		    cpupart.cp_kp_queue.disp_q);
258 		return (WALK_ERR);
259 	}
260 
261 	dw = mdb_alloc(sizeof (dispq_walk_t), UM_SLEEP);
262 
263 	dw->dw_npri = cpupart.cp_kp_queue.disp_npri;
264 	dw->dw_dispq = (uintptr_t)cpupart.cp_kp_queue.disp_q;
265 	dw->dw_last = (uintptr_t)dispq.dq_last;
266 
267 	wsp->walk_addr = (uintptr_t)dispq.dq_first;
268 	wsp->walk_data = dw;
269 
270 	return (WALK_NEXT);
271 }
272 
273 int
274 dispq_walk_step(mdb_walk_state_t *wsp)
275 {
276 	uintptr_t addr = wsp->walk_addr;
277 	dispq_walk_t *dw = wsp->walk_data;
278 	dispq_t dispq;
279 	kthread_t t;
280 
281 	while (addr == 0) {
282 		if (--dw->dw_npri == 0)
283 			return (WALK_DONE);
284 
285 		dw->dw_dispq += sizeof (dispq_t);
286 
287 		if (mdb_vread(&dispq, sizeof (dispq_t), dw->dw_dispq) == -1) {
288 			mdb_warn("failed to read dispq_t at %p", dw->dw_dispq);
289 			return (WALK_ERR);
290 		}
291 
292 		dw->dw_last = (uintptr_t)dispq.dq_last;
293 		addr = (uintptr_t)dispq.dq_first;
294 	}
295 
296 	if (mdb_vread(&t, sizeof (kthread_t), addr) == -1) {
297 		mdb_warn("failed to read kthread_t at %p", addr);
298 		return (WALK_ERR);
299 	}
300 
301 	if (addr == dw->dw_last)
302 		wsp->walk_addr = 0;
303 	else
304 		wsp->walk_addr = (uintptr_t)t.t_link;
305 
306 	return (wsp->walk_callback(addr, &t, wsp->walk_cbdata));
307 }
308 
309 void
310 dispq_walk_fini(mdb_walk_state_t *wsp)
311 {
312 	mdb_free(wsp->walk_data, sizeof (dispq_walk_t));
313 }
314 
315 struct thread_state {
316 	uint_t ts_state;
317 	const char *ts_name;
318 } thread_states[] = {
319 	{ TS_FREE,	"free" },
320 	{ TS_SLEEP,	"sleep" },
321 	{ TS_RUN,	"run" },
322 	{ TS_ONPROC,	"onproc" },
323 	{ TS_ZOMB,	"zomb" },
324 	{ TS_STOPPED,	"stopped" },
325 	{ TS_WAIT,	"wait" }
326 };
327 #define	NUM_THREAD_STATES (sizeof (thread_states) / sizeof (*thread_states))
328 
329 void
330 thread_state_to_text(uint_t state, char *out, size_t out_sz)
331 {
332 	int idx;
333 
334 	for (idx = 0; idx < NUM_THREAD_STATES; idx++) {
335 		struct thread_state *tsp = &thread_states[idx];
336 		if (tsp->ts_state == state) {
337 			mdb_snprintf(out, out_sz, "%s", tsp->ts_name);
338 			return;
339 		}
340 	}
341 	mdb_snprintf(out, out_sz, "inval/%02x", state);
342 }
343 
344 int
345 thread_text_to_state(const char *state, uint_t *out)
346 {
347 	int idx;
348 
349 	for (idx = 0; idx < NUM_THREAD_STATES; idx++) {
350 		struct thread_state *tsp = &thread_states[idx];
351 		if (strcasecmp(tsp->ts_name, state) == 0) {
352 			*out = tsp->ts_state;
353 			return (0);
354 		}
355 	}
356 	return (-1);
357 }
358 
359 void
360 thread_walk_states(void (*cbfunc)(uint_t, const char *, void *), void *cbarg)
361 {
362 	int idx;
363 
364 	for (idx = 0; idx < NUM_THREAD_STATES; idx++) {
365 		struct thread_state *tsp = &thread_states[idx];
366 		cbfunc(tsp->ts_state, tsp->ts_name, cbarg);
367 	}
368 }
369 
370 #define	TF_INTR		0x01
371 #define	TF_PROC		0x02
372 #define	TF_BLOCK	0x04
373 #define	TF_SIG		0x08
374 #define	TF_DISP		0x10
375 #define	TF_MERGE	0x20
376 
377 /*
378  * Display a kthread_t.
379  * This is a little complicated, as there is a lot of information that
380  * the user could be interested in.  The flags "ipbsd" are used to
381  * indicate which subset of the thread's members are to be displayed
382  * ('i' is the default).  If multiple options are specified, multiple
383  * sets of data will be displayed in a vaguely readable format.  If the
384  * 'm' option is specified, all the selected sets will be merged onto a
385  * single line for the benefit of those using wider-than-normal
386  * terminals.  Having a generic mechanism for doing this would be
387  * really useful, but is a project best left to another day.
388  */
389 
390 int
391 thread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
392 {
393 	kthread_t	t;
394 	uint_t		oflags = 0;
395 	uint_t		fflag = FALSE;
396 	int		first;
397 	char		stbuf[20];
398 
399 	/*
400 	 * "Gracefully" handle printing a boatload of stuff to the
401 	 * screen.  If we are not printing our first set of data, and
402 	 * we haven't been instructed to merge sets together, output a
403 	 * newline and indent such that the thread addresses form a
404 	 * column of their own.
405 	 */
406 #define	SPACER()				\
407 	if (first) {				\
408 		first = FALSE;			\
409 	} else if (!(oflags & TF_MERGE)) {	\
410 		mdb_printf("\n%?s", "");	\
411 	}
412 
413 	if (!(flags & DCMD_ADDRSPEC)) {
414 		if (mdb_walk_dcmd("thread", "thread", argc, argv) == -1) {
415 			mdb_warn("can't walk threads");
416 			return (DCMD_ERR);
417 		}
418 		return (DCMD_OK);
419 	}
420 
421 	if (mdb_getopts(argc, argv,
422 	    'f', MDB_OPT_SETBITS, TRUE, &fflag,
423 	    'i', MDB_OPT_SETBITS, TF_INTR, &oflags,
424 	    'p', MDB_OPT_SETBITS, TF_PROC, &oflags,
425 	    'b', MDB_OPT_SETBITS, TF_BLOCK, &oflags,
426 	    's', MDB_OPT_SETBITS, TF_SIG, &oflags,
427 	    'd', MDB_OPT_SETBITS, TF_DISP, &oflags,
428 	    'm', MDB_OPT_SETBITS, TF_MERGE, &oflags, NULL) != argc)
429 		return (DCMD_USAGE);
430 
431 	/*
432 	 * If no sets were specified, choose the 'i' set.
433 	 */
434 	if (!(oflags & ~TF_MERGE))
435 #ifdef	_LP64
436 		oflags = TF_INTR;
437 #else
438 		oflags = TF_INTR | TF_DISP | TF_MERGE;
439 #endif
440 
441 	/*
442 	 * Print the relevant headers; note use of SPACER().
443 	 */
444 	if (DCMD_HDRSPEC(flags)) {
445 		first = TRUE;
446 		mdb_printf("%<u>%?s%</u>", "ADDR");
447 		mdb_flush();
448 
449 		if (oflags & TF_PROC) {
450 			SPACER();
451 			mdb_printf("%<u> %?s %?s %?s%</u>",
452 			    "PROC", "LWP", "CRED");
453 		}
454 
455 		if (oflags & TF_INTR) {
456 			SPACER();
457 			mdb_printf("%<u> %8s %4s %4s %4s %5s %5s %3s %?s%</u>",
458 			    "STATE", "FLG", "PFLG",
459 			    "SFLG", "PRI", "EPRI", "PIL", "INTR");
460 		}
461 
462 		if (oflags & TF_BLOCK) {
463 			SPACER();
464 			mdb_printf("%<u> %?s %?s %?s %11s%</u>",
465 			    "WCHAN", "TS", "PITS", "SOBJ OPS");
466 		}
467 
468 		if (oflags & TF_SIG) {
469 			SPACER();
470 			mdb_printf("%<u> %?s %16s %16s%</u>",
471 			    "SIGQUEUE", "SIG PEND", "SIG HELD");
472 		}
473 
474 		if (oflags & TF_DISP) {
475 			SPACER();
476 			mdb_printf("%<u> %?s %5s %2s %-6s%</u>",
477 			    "DISPTIME", "BOUND", "PR", "SWITCH");
478 		}
479 		mdb_printf("\n");
480 	}
481 
482 	if (mdb_vread(&t, sizeof (kthread_t), addr) == -1) {
483 		mdb_warn("can't read kthread_t at %#lx", addr);
484 		return (DCMD_ERR);
485 	}
486 
487 	if (fflag && (t.t_state == TS_FREE))
488 		return (DCMD_OK);
489 
490 	first = TRUE;
491 	mdb_printf("%0?lx", addr);
492 
493 	/* process information */
494 	if (oflags & TF_PROC) {
495 		SPACER();
496 		mdb_printf(" %?p %?p %?p", t.t_procp, t.t_lwp, t.t_cred);
497 	}
498 
499 	/* priority/interrupt information */
500 	if (oflags & TF_INTR) {
501 		SPACER();
502 		thread_state_to_text(t.t_state, stbuf, sizeof (stbuf));
503 		if (t.t_intr == NULL) {
504 			mdb_printf(" %-8s %4x %4x %4x %5d %5d %3d %?s",
505 			    stbuf, t.t_flag, t.t_proc_flag, t.t_schedflag,
506 			    t.t_pri, t.t_epri, t.t_pil, "n/a");
507 		} else {
508 			mdb_printf(" %-8s %4x %4x %4x %5d %5d %3d %?p",
509 			    stbuf, t.t_flag, t.t_proc_flag, t.t_schedflag,
510 			    t.t_pri, t.t_epri, t.t_pil, t.t_intr);
511 		}
512 	}
513 
514 	/* blocking information */
515 	if (oflags & TF_BLOCK) {
516 		SPACER();
517 		(void) mdb_snprintf(stbuf, 20, "%a", t.t_sobj_ops);
518 		stbuf[11] = '\0';
519 		mdb_printf(" %?p %?p %?p %11s",
520 		    t.t_wchan, t.t_ts, t.t_prioinv, stbuf);
521 	}
522 
523 	/* signal information */
524 	if (oflags & TF_SIG) {
525 		SPACER();
526 		mdb_printf(" %?p %016llx %016llx",
527 		    t.t_sigqueue, t.t_sig, t.t_hold);
528 	}
529 
530 	/* dispatcher stuff */
531 	if (oflags & TF_DISP) {
532 		SPACER();
533 		mdb_printf(" %?lx %5d %2d ",
534 		    t.t_disp_time, t.t_bind_cpu, t.t_preempt);
535 		if (t.t_disp_time != 0)
536 			mdb_printf("t-%-4d",
537 			    (clock_t)mdb_get_lbolt() - t.t_disp_time);
538 		else
539 			mdb_printf("%-6s", "-");
540 	}
541 
542 	mdb_printf("\n");
543 
544 #undef SPACER
545 
546 	return (DCMD_OK);
547 }
548 
549 void
550 thread_help(void)
551 {
552 	mdb_printf(
553 	    "The flags -ipbsd control which information is displayed.  When\n"
554 	    "combined, the fields are displayed on separate lines unless the\n"
555 	    "-m option is given.\n"
556 	    "\n"
557 	    "\t-b\tprint blocked thread state\n"
558 	    "\t-d\tprint dispatcher state\n"
559 	    "\t-f\tignore freed threads\n"
560 	    "\t-i\tprint basic thread state (default)\n"
561 	    "\t-m\tdisplay results on a single line\n"
562 	    "\t-p\tprint process and lwp state\n"
563 	    "\t-s\tprint signal state\n");
564 }
565 
566 /*
567  * Return a string description of the thread, including the ID and the thread
568  * name.
569  *
570  * If ->t_name is NULL, and we're a system thread, we'll do a little more
571  * spelunking to find a useful string to return.
572  */
573 int
574 thread_getdesc(uintptr_t addr, boolean_t include_comm,
575     char *buf, size_t bufsize)
576 {
577 	char name[THREAD_NAME_MAX] = "";
578 	kthread_t t;
579 	proc_t p;
580 
581 	bzero(buf, bufsize);
582 
583 	if (mdb_vread(&t, sizeof (kthread_t), addr) == -1) {
584 		mdb_warn("failed to read kthread_t at %p", addr);
585 		return (-1);
586 	}
587 
588 	if (t.t_tid == 0) {
589 		taskq_t tq;
590 
591 		if (mdb_vread(&tq, sizeof (taskq_t),
592 		    (uintptr_t)t.t_taskq) == -1)
593 			tq.tq_name[0] = '\0';
594 
595 		if (t.t_name != NULL) {
596 			if (mdb_readstr(buf, bufsize,
597 			    (uintptr_t)t.t_name) == -1) {
598 				mdb_warn("error reading thread name");
599 			}
600 		} else if (tq.tq_name[0] != '\0') {
601 			(void) mdb_snprintf(buf, bufsize, "tq:%s", tq.tq_name);
602 		} else {
603 			mdb_snprintf(buf, bufsize, "%a()", t.t_startpc);
604 		}
605 
606 		return (buf[0] == '\0' ? -1 : 0);
607 	}
608 
609 	if (include_comm && mdb_vread(&p, sizeof (proc_t),
610 	    (uintptr_t)t.t_procp) == -1) {
611 		mdb_warn("failed to read proc at %p", t.t_procp);
612 		return (-1);
613 	}
614 
615 	if (t.t_name != NULL) {
616 		if (mdb_readstr(name, sizeof (name), (uintptr_t)t.t_name) == -1)
617 			mdb_warn("error reading thread name");
618 
619 		/*
620 		 * Just to be safe -- if mdb_readstr() succeeds, it always NUL
621 		 * terminates the output, but is unclear what it does on
622 		 * failure.  In that case we attempt to show any partial content
623 		 * w/ the warning in case it's useful, but explicitly
624 		 * NUL-terminate to be safe.
625 		 */
626 		buf[bufsize - 1] = '\0';
627 	}
628 
629 	if (name[0] != '\0') {
630 		if (include_comm) {
631 			(void) mdb_snprintf(buf, bufsize, "%s/%u [%s]",
632 			    p.p_user.u_comm, t.t_tid, name);
633 		} else {
634 			(void) mdb_snprintf(buf, bufsize, "%u [%s]",
635 			    t.t_tid, name);
636 		}
637 	} else {
638 		if (include_comm) {
639 			(void) mdb_snprintf(buf, bufsize, "%s/%u",
640 			    p.p_user.u_comm, t.t_tid);
641 		} else {
642 			(void) mdb_snprintf(buf, bufsize, "%u", t.t_tid);
643 		}
644 	}
645 
646 	return (buf[0] == '\0' ? -1 : 0);
647 }
648 
649 /*
650  * List a combination of kthread_t and proc_t. Add stack traces in verbose mode.
651  */
652 int
653 threadlist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
654 {
655 	int i;
656 	uint_t count =  0;
657 	uint_t verbose = FALSE;
658 	uint_t notaskq = FALSE;
659 	kthread_t t;
660 	char cmd[80];
661 	mdb_arg_t cmdarg;
662 
663 	if (!(flags & DCMD_ADDRSPEC)) {
664 		if (mdb_walk_dcmd("thread", "threadlist", argc, argv) == -1) {
665 			mdb_warn("can't walk threads");
666 			return (DCMD_ERR);
667 		}
668 		return (DCMD_OK);
669 	}
670 
671 	i = mdb_getopts(argc, argv,
672 	    't', MDB_OPT_SETBITS, TRUE, &notaskq,
673 	    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL);
674 
675 	if (i != argc) {
676 		if (i != argc - 1 || !verbose)
677 			return (DCMD_USAGE);
678 
679 		if (argv[i].a_type == MDB_TYPE_IMMEDIATE)
680 			count = (uint_t)argv[i].a_un.a_val;
681 		else
682 			count = (uint_t)mdb_strtoull(argv[i].a_un.a_str);
683 	}
684 
685 	if (DCMD_HDRSPEC(flags)) {
686 		if (verbose)
687 			mdb_printf("%<u>%?s %?s %?s %3s %3s %?s%</u>\n",
688 			    "ADDR", "PROC", "LWP", "CLS", "PRI", "WCHAN");
689 		else
690 			mdb_printf("%<u>%?s %?s %?s %s/%s%</u>\n",
691 			    "ADDR", "PROC", "LWP", "CMD", "LWPID");
692 	}
693 
694 	if (mdb_vread(&t, sizeof (kthread_t), addr) == -1) {
695 		mdb_warn("failed to read kthread_t at %p", addr);
696 		return (DCMD_ERR);
697 	}
698 
699 	if (notaskq && t.t_taskq != NULL)
700 		return (DCMD_OK);
701 
702 	if (t.t_state == TS_FREE)
703 		return (DCMD_OK);
704 
705 	if (!verbose) {
706 		char desc[128];
707 
708 		if (thread_getdesc(addr, B_TRUE, desc, sizeof (desc)) == -1)
709 			return (DCMD_ERR);
710 
711 		mdb_printf("%0?p %?p %?p %s\n", addr, t.t_procp, t.t_lwp, desc);
712 		return (DCMD_OK);
713 	}
714 
715 	mdb_printf("%0?p %?p %?p %3u %3d %?p\n",
716 	    addr, t.t_procp, t.t_lwp, t.t_cid, t.t_pri, t.t_wchan);
717 
718 	mdb_inc_indent(2);
719 
720 	mdb_printf("PC: %a\n", t.t_pc);
721 
722 	mdb_snprintf(cmd, sizeof (cmd), "<.$c%d", count);
723 	cmdarg.a_type = MDB_TYPE_STRING;
724 	cmdarg.a_un.a_str = cmd;
725 
726 	(void) mdb_call_dcmd("findstack", addr, flags, 1, &cmdarg);
727 
728 	mdb_dec_indent(2);
729 
730 	mdb_printf("\n");
731 
732 	return (DCMD_OK);
733 }
734 
735 void
736 threadlist_help(void)
737 {
738 	mdb_printf(
739 	    "   -v         print verbose output including C stack trace\n"
740 	    "   -t         skip threads belonging to a taskq\n"
741 	    "   count      print no more than count arguments (default 0)\n");
742 }
743 
744 static size_t
745 stk_compute_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp)
746 {
747 	size_t percent;
748 	size_t s;
749 
750 	if (t_stk > t_stkbase) {
751 		/* stack grows down */
752 		if (sp > t_stk) {
753 			return (0);
754 		}
755 		if (sp < t_stkbase) {
756 			return (100);
757 		}
758 		percent = t_stk - sp + 1;
759 		s = t_stk - t_stkbase + 1;
760 	} else {
761 		/* stack grows up */
762 		if (sp < t_stk) {
763 			return (0);
764 		}
765 		if (sp > t_stkbase) {
766 			return (100);
767 		}
768 		percent = sp - t_stk + 1;
769 		s = t_stkbase - t_stk + 1;
770 	}
771 	percent = ((100 * percent) / s) + 1;
772 	if (percent > 100) {
773 		percent = 100;
774 	}
775 	return (percent);
776 }
777 
778 /*
779  * Display kthread stack infos.
780  */
781 int
782 stackinfo(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
783 {
784 	kthread_t t;
785 	uint64_t *ptr;  /* pattern pointer */
786 	caddr_t	start;	/* kernel stack start */
787 	caddr_t end;	/* kernel stack end */
788 	caddr_t ustack;	/* userland copy of kernel stack */
789 	size_t usize;	/* userland copy of kernel stack size */
790 	caddr_t ustart;	/* userland copy of kernel stack, aligned start */
791 	caddr_t uend;	/* userland copy of kernel stack, aligned end */
792 	size_t percent = 0;
793 	uint_t all = FALSE; /* don't show TS_FREE kthread by default */
794 	uint_t history = FALSE;
795 	int i = 0;
796 	unsigned int ukmem_stackinfo;
797 	uintptr_t allthreads;
798 	char tdesc[128] = "";
799 
800 	/* handle options */
801 	if (mdb_getopts(argc, argv,
802 	    'a', MDB_OPT_SETBITS, TRUE, &all,
803 	    'h', MDB_OPT_SETBITS, TRUE, &history, NULL) != argc) {
804 		return (DCMD_USAGE);
805 	}
806 
807 	/* walk all kthread if needed */
808 	if ((history == FALSE) && !(flags & DCMD_ADDRSPEC)) {
809 		if (mdb_walk_dcmd("thread", "stackinfo", argc, argv) == -1) {
810 			mdb_warn("can't walk threads");
811 			return (DCMD_ERR);
812 		}
813 		return (DCMD_OK);
814 	}
815 
816 	/* read 'kmem_stackinfo' */
817 	if (mdb_readsym(&ukmem_stackinfo, sizeof (ukmem_stackinfo),
818 	    "kmem_stackinfo") == -1) {
819 		mdb_warn("failed to read 'kmem_stackinfo'\n");
820 		ukmem_stackinfo = 0;
821 	}
822 
823 	/* read 'allthreads' */
824 	if (mdb_readsym(&allthreads, sizeof (kthread_t *),
825 	    "allthreads") == -1) {
826 		mdb_warn("failed to read 'allthreads'\n");
827 		allthreads = 0;
828 	}
829 
830 	if (history == TRUE) {
831 		kmem_stkinfo_t *log;
832 		uintptr_t kaddr;
833 
834 		mdb_printf("Dead kthreads stack usage history:\n");
835 		if (ukmem_stackinfo == 0) {
836 			mdb_printf("Tunable kmem_stackinfo is unset, history ");
837 			mdb_printf("feature is off.\nUse ::help stackinfo ");
838 			mdb_printf("for more details.\n");
839 			return (DCMD_OK);
840 		}
841 
842 		mdb_printf("%<u>%?s%</u>", "THREAD");
843 		mdb_printf(" %<u>%?s%</u>", "STACK");
844 		mdb_printf("%<u>%s%</u>", "   SIZE  MAX LWP");
845 		mdb_printf("\n");
846 		usize = KMEM_STKINFO_LOG_SIZE * sizeof (kmem_stkinfo_t);
847 		log = (kmem_stkinfo_t *)mdb_alloc(usize, UM_SLEEP);
848 		if (mdb_readsym(&kaddr, sizeof (kaddr),
849 		    "kmem_stkinfo_log") == -1) {
850 			mdb_free((void *)log, usize);
851 			mdb_warn("failed to read 'kmem_stkinfo_log'\n");
852 			return (DCMD_ERR);
853 		}
854 		if (kaddr == 0) {
855 			mdb_free((void *)log, usize);
856 			return (DCMD_OK);
857 		}
858 		if (mdb_vread(log, usize, kaddr) == -1) {
859 			mdb_free((void *)log, usize);
860 			mdb_warn("failed to read %p\n", kaddr);
861 			return (DCMD_ERR);
862 		}
863 		for (i = 0; i < KMEM_STKINFO_LOG_SIZE; i++) {
864 			if (log[i].kthread == NULL) {
865 				continue;
866 			}
867 
868 			(void) thread_getdesc((uintptr_t)log[i].kthread,
869 			    B_TRUE, tdesc, sizeof (tdesc));
870 
871 			mdb_printf("%0?p %0?p %6x %3d%% %s\n",
872 			    log[i].kthread,
873 			    log[i].start,
874 			    (uint_t)log[i].stksz,
875 			    (int)log[i].percent, tdesc);
876 		}
877 		mdb_free((void *)log, usize);
878 		return (DCMD_OK);
879 	}
880 
881 	/* display header */
882 	if (DCMD_HDRSPEC(flags)) {
883 		if (ukmem_stackinfo == 0) {
884 			mdb_printf("Tunable kmem_stackinfo is unset, ");
885 			mdb_printf("MAX value is not available.\n");
886 			mdb_printf("Use ::help stackinfo for more details.\n");
887 		}
888 		mdb_printf("%<u>%?s%</u>", "THREAD");
889 		mdb_printf(" %<u>%?s%</u>", "STACK");
890 		mdb_printf("%<u>%s%</u>", "   SIZE  CUR  MAX LWP");
891 		mdb_printf("\n");
892 	}
893 
894 	/* read kthread */
895 	if (mdb_vread(&t, sizeof (kthread_t), addr) == -1) {
896 		mdb_warn("can't read kthread_t at %#lx\n", addr);
897 		return (DCMD_ERR);
898 	}
899 
900 	if (t.t_state == TS_FREE && all == FALSE) {
901 		return (DCMD_OK);
902 	}
903 
904 	/*
905 	 * Stack grows up or down, see thread_create(),
906 	 * compute stack memory aera start and end (start < end).
907 	 */
908 	if (t.t_stk > t.t_stkbase) {
909 		/* stack grows down */
910 		start = t.t_stkbase;
911 		end = t.t_stk;
912 	} else {
913 		/* stack grows up */
914 		start = t.t_stk;
915 		end = t.t_stkbase;
916 	}
917 
918 	/* display stack info */
919 	mdb_printf("%0?p %0?p", addr, start);
920 
921 	/* (end - start), kernel stack size as found in kthread_t */
922 	if ((end <= start) || ((end - start) > (1024 * 1024))) {
923 		/* negative or stack size > 1 meg, assume bogus */
924 		mdb_warn(" t_stk/t_stkbase problem\n");
925 		return (DCMD_ERR);
926 	}
927 
928 	/* display stack size */
929 	mdb_printf(" %6x", end - start);
930 
931 	/* display current stack usage */
932 	percent = stk_compute_percent(t.t_stk, t.t_stkbase,
933 	    (caddr_t)t.t_sp + STACK_BIAS);
934 
935 	mdb_printf(" %3d%%", percent);
936 	percent = 0;
937 
938 	(void) thread_getdesc(addr, B_TRUE, tdesc, sizeof (tdesc));
939 
940 	if (ukmem_stackinfo == 0) {
941 		mdb_printf("  n/a %s\n", tdesc);
942 		return (DCMD_OK);
943 	}
944 
945 	if ((((uintptr_t)start) & 0x7) != 0) {
946 		start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
947 	}
948 	end = (caddr_t)(((uintptr_t)end) & (~0x7));
949 	/* size to scan in userland copy of kernel stack */
950 	usize = end - start; /* is a multiple of 8 bytes */
951 
952 	/*
953 	 * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
954 	 * alignement for ustart and uend, in boundaries.
955 	 */
956 	ustart = ustack = (caddr_t)mdb_alloc(usize + 8, UM_SLEEP);
957 	if ((((uintptr_t)ustart) & 0x7) != 0) {
958 		ustart = (caddr_t)((((uintptr_t)ustart) & (~0x7)) + 8);
959 	}
960 	uend = ustart + usize;
961 
962 	/* read the kernel stack */
963 	if (mdb_vread(ustart, usize, (uintptr_t)start) != usize) {
964 		mdb_free((void *)ustack, usize + 8);
965 		mdb_printf("\n");
966 		mdb_warn("couldn't read entire stack\n");
967 		return (DCMD_ERR);
968 	}
969 
970 	/* scan the stack */
971 	if (t.t_stk > t.t_stkbase) {
972 		/* stack grows down */
973 #if defined(__i386) || defined(__amd64)
974 		/*
975 		 * 6 longs are pushed on stack, see thread_load(). Skip
976 		 * them, so if kthread has never run, percent is zero.
977 		 * 8 bytes alignement is preserved for a 32 bit kernel,
978 		 * 6 x 4 = 24, 24 is a multiple of 8.
979 		 */
980 		uend -= (6 * sizeof (long));
981 #endif
982 		ptr = (uint64_t *)((void *)ustart);
983 		while (ptr < (uint64_t *)((void *)uend)) {
984 			if (*ptr != KMEM_STKINFO_PATTERN) {
985 				percent = stk_compute_percent(uend,
986 				    ustart, (caddr_t)ptr);
987 				break;
988 			}
989 			ptr++;
990 		}
991 	} else {
992 		/* stack grows up */
993 		ptr = (uint64_t *)((void *)uend);
994 		ptr--;
995 		while (ptr >= (uint64_t *)((void *)ustart)) {
996 			if (*ptr != KMEM_STKINFO_PATTERN) {
997 				percent = stk_compute_percent(ustart,
998 				    uend, (caddr_t)ptr);
999 				break;
1000 			}
1001 			ptr--;
1002 		}
1003 	}
1004 
1005 	/* thread 't0' stack is not created by thread_create() */
1006 	if (addr == allthreads) {
1007 		percent = 0;
1008 	}
1009 	if (percent != 0) {
1010 		mdb_printf(" %3d%%", percent);
1011 	} else {
1012 		mdb_printf("  n/a");
1013 	}
1014 
1015 	mdb_printf(" %s\n", tdesc);
1016 
1017 	mdb_free((void *)ustack, usize + 8);
1018 	return (DCMD_OK);
1019 }
1020 
1021 void
1022 stackinfo_help(void)
1023 {
1024 	mdb_printf(
1025 	    "Shows kernel stacks real utilization, if /etc/system "
1026 	    "kmem_stackinfo tunable\n");
1027 	mdb_printf(
1028 	    "(an unsigned integer) is non zero at kthread creation time. ");
1029 	mdb_printf("For example:\n");
1030 	mdb_printf(
1031 	    "          THREAD            STACK   SIZE  CUR  MAX LWP\n");
1032 	mdb_printf(
1033 	    "ffffff014f5f2c20 ffffff0004153000   4f00   4%%  43%% init/1\n");
1034 	mdb_printf(
1035 	    "The stack size utilization for this kthread is at 4%%"
1036 	    " of its maximum size,\n");
1037 	mdb_printf(
1038 	    "but has already used up to 43%%, stack size is 4f00 bytes.\n");
1039 	mdb_printf(
1040 	    "MAX value can be shown as n/a (not available):\n");
1041 	mdb_printf(
1042 	    "  - for the very first kthread (sched/1)\n");
1043 	mdb_printf(
1044 	    "  - kmem_stackinfo was zero at kthread creation time\n");
1045 	mdb_printf(
1046 	    "  - kthread has not yet run\n");
1047 	mdb_printf("\n");
1048 	mdb_printf("Options:\n");
1049 	mdb_printf(
1050 	    "-a shows also TS_FREE kthreads (interrupt kthreads)\n");
1051 	mdb_printf(
1052 	    "-h shows history, dead kthreads that used their "
1053 	    "kernel stack the most\n");
1054 	mdb_printf(
1055 	    "\nSee illumos Modular Debugger Guide for detailed usage.\n");
1056 	mdb_flush();
1057 }
1058