xref: /illumos-gate/usr/src/cmd/mdb/i86pc/modules/unix/unix.c (revision 49b7860084dbba18bc00b29413d6182197f9fe93)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2015 Joyent, Inc.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ctf.h>
28 #include <sys/cpuvar.h>
29 #include <sys/systm.h>
30 #include <sys/traptrace.h>
31 #include <sys/x_call.h>
32 #include <sys/xc_levels.h>
33 #include <sys/avintr.h>
34 #include <sys/systm.h>
35 #include <sys/trap.h>
36 #include <sys/mutex.h>
37 #include <sys/mutex_impl.h>
38 #include "i86mmu.h"
39 #include "unix_sup.h"
40 #include <sys/apix.h>
41 #include <sys/x86_archext.h>
42 #include <sys/bitmap.h>
43 #include <sys/controlregs.h>
44 
45 #define	TT_HDLR_WIDTH	17
46 
47 
48 /* apix only */
49 static apix_impl_t *d_apixs[NCPU];
50 static int use_apix = 0;
51 
52 static int
53 ttrace_ttr_size_check(void)
54 {
55 	mdb_ctf_id_t ttrtid;
56 	ssize_t ttr_size;
57 
58 	if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
59 	    mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
60 		mdb_warn("failed to determine size of trap_trace_rec_t; "
61 		    "non-TRAPTRACE kernel?\n");
62 		return (0);
63 	}
64 
65 	if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
66 	    sizeof (trap_trace_rec_t)) {
67 		/*
68 		 * On Intel machines, this will happen when TTR_STACK_DEPTH
69 		 * is changed.  This code could be smarter, and could
70 		 * dynamically adapt to different depths, but not until a
71 		 * need for such adaptation is demonstrated.
72 		 */
73 		mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
74 		    "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
75 		return (0);
76 	}
77 
78 	return (1);
79 }
80 
81 int
82 ttrace_walk_init(mdb_walk_state_t *wsp)
83 {
84 	trap_trace_ctl_t *ttcp;
85 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
86 	int i;
87 
88 	if (!ttrace_ttr_size_check())
89 		return (WALK_ERR);
90 
91 	ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
92 
93 	if (wsp->walk_addr != NULL) {
94 		mdb_warn("ttrace only supports global walks\n");
95 		return (WALK_ERR);
96 	}
97 
98 	if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
99 		mdb_warn("symbol 'trap_trace_ctl' not found; "
100 		    "non-TRAPTRACE kernel?\n");
101 		mdb_free(ttcp, ttc_size);
102 		return (WALK_ERR);
103 	}
104 
105 	/*
106 	 * We'll poach the ttc_current pointer (which isn't used for
107 	 * anything) to store a pointer to our current TRAPTRACE record.
108 	 * This allows us to only keep the array of trap_trace_ctl structures
109 	 * as our walker state (ttc_current may be the only kernel data
110 	 * structure member added exclusively to make writing the mdb walker
111 	 * a little easier).
112 	 */
113 	for (i = 0; i < NCPU; i++) {
114 		trap_trace_ctl_t *ttc = &ttcp[i];
115 
116 		if (ttc->ttc_first == NULL)
117 			continue;
118 
119 		/*
120 		 * Assign ttc_current to be the last completed record.
121 		 * Note that the error checking (i.e. in the ttc_next ==
122 		 * ttc_first case) is performed in the step function.
123 		 */
124 		ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
125 	}
126 
127 	wsp->walk_data = ttcp;
128 	return (WALK_NEXT);
129 }
130 
131 int
132 ttrace_walk_step(mdb_walk_state_t *wsp)
133 {
134 	trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
135 	trap_trace_rec_t rec;
136 	int rval, i, recsize = sizeof (trap_trace_rec_t);
137 	hrtime_t latest = 0;
138 
139 	/*
140 	 * Loop through the CPUs, looking for the latest trap trace record
141 	 * (we want to walk through the trap trace records in reverse
142 	 * chronological order).
143 	 */
144 	for (i = 0; i < NCPU; i++) {
145 		ttc = &ttcp[i];
146 
147 		if (ttc->ttc_current == NULL)
148 			continue;
149 
150 		if (ttc->ttc_current < ttc->ttc_first)
151 			ttc->ttc_current = ttc->ttc_limit - recsize;
152 
153 		if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
154 			mdb_warn("couldn't read rec at %p", ttc->ttc_current);
155 			return (WALK_ERR);
156 		}
157 
158 		if (rec.ttr_stamp > latest) {
159 			latest = rec.ttr_stamp;
160 			latest_ttc = ttc;
161 		}
162 	}
163 
164 	if (latest == 0)
165 		return (WALK_DONE);
166 
167 	ttc = latest_ttc;
168 
169 	if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
170 		mdb_warn("couldn't read rec at %p", ttc->ttc_current);
171 		return (WALK_ERR);
172 	}
173 
174 	rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
175 
176 	if (ttc->ttc_current == ttc->ttc_next)
177 		ttc->ttc_current = NULL;
178 	else
179 		ttc->ttc_current -= sizeof (trap_trace_rec_t);
180 
181 	return (rval);
182 }
183 
184 void
185 ttrace_walk_fini(mdb_walk_state_t *wsp)
186 {
187 	mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
188 }
189 
190 static int
191 ttrace_syscall(trap_trace_rec_t *rec)
192 {
193 	GElf_Sym sym;
194 	int sysnum = rec->ttr_sysnum;
195 	uintptr_t addr;
196 	struct sysent sys;
197 
198 	mdb_printf("%-3x", sysnum);
199 
200 	if (rec->ttr_sysnum > NSYSCALL) {
201 		mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
202 		return (0);
203 	}
204 
205 	if (mdb_lookup_by_name("sysent", &sym) == -1) {
206 		mdb_warn("\ncouldn't find 'sysent'");
207 		return (-1);
208 	}
209 
210 	addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
211 
212 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
213 		mdb_warn("\nsysnum %d out-of-range\n", sysnum);
214 		return (-1);
215 	}
216 
217 	if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
218 		mdb_warn("\nfailed to read sysent at %p", addr);
219 		return (-1);
220 	}
221 
222 	mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
223 
224 	return (0);
225 }
226 
227 static int
228 ttrace_interrupt(trap_trace_rec_t *rec)
229 {
230 	GElf_Sym sym;
231 	uintptr_t addr;
232 	struct av_head hd;
233 	struct autovec av;
234 
235 	switch (rec->ttr_regs.r_trapno) {
236 	case T_SOFTINT:
237 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
238 		return (0);
239 	default:
240 		break;
241 	}
242 
243 	mdb_printf("%-3x ", rec->ttr_vector);
244 
245 	if (mdb_lookup_by_name("autovect", &sym) == -1) {
246 		mdb_warn("\ncouldn't find 'autovect'");
247 		return (-1);
248 	}
249 
250 	addr = (uintptr_t)sym.st_value +
251 	    rec->ttr_vector * sizeof (struct av_head);
252 
253 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
254 		mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
255 		return (-1);
256 	}
257 
258 	if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
259 		mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
260 		return (-1);
261 	}
262 
263 	if (hd.avh_link == NULL) {
264 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
265 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
266 		else
267 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
268 	} else {
269 		if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
270 			mdb_warn("couldn't read autovec at %p",
271 			    (uintptr_t)hd.avh_link);
272 		}
273 
274 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
275 	}
276 
277 	return (0);
278 }
279 
280 static int
281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
282 {
283 	struct autovec av;
284 	apix_impl_t apix;
285 	apix_vector_t apix_vector;
286 
287 	switch (rec->ttr_regs.r_trapno) {
288 	case T_SOFTINT:
289 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
290 		return (0);
291 	default:
292 		break;
293 	}
294 
295 	mdb_printf("%-3x ", rec->ttr_vector);
296 
297 	/* Read the per CPU apix entry */
298 	if (mdb_vread(&apix, sizeof (apix_impl_t),
299 	    (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
300 		mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
301 		return (-1);
302 	}
303 	if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
304 	    (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
305 		mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
306 		return (-1);
307 	}
308 	if (apix_vector.v_share == 0) {
309 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
310 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
311 		else
312 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
313 	} else {
314 		if (mdb_vread(&av, sizeof (struct autovec),
315 		    (uintptr_t)(apix_vector.v_autovect)) == -1) {
316 			mdb_warn("couldn't read autovec at %p",
317 			    (uintptr_t)apix_vector.v_autovect);
318 		}
319 
320 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
321 	}
322 
323 	return (0);
324 }
325 
326 
327 static struct {
328 	int tt_trapno;
329 	char *tt_name;
330 } ttrace_traps[] = {
331 	{ T_ZERODIV,	"divide-error" },
332 	{ T_SGLSTP,	"debug-exception" },
333 	{ T_NMIFLT,	"nmi-interrupt" },
334 	{ T_BPTFLT,	"breakpoint" },
335 	{ T_OVFLW,	"into-overflow" },
336 	{ T_BOUNDFLT,	"bound-exceeded" },
337 	{ T_ILLINST,	"invalid-opcode" },
338 	{ T_NOEXTFLT,	"device-not-avail" },
339 	{ T_DBLFLT,	"double-fault" },
340 	{ T_EXTOVRFLT,	"segment-overrun" },
341 	{ T_TSSFLT,	"invalid-tss" },
342 	{ T_SEGFLT,	"segment-not-pres" },
343 	{ T_STKFLT,	"stack-fault" },
344 	{ T_GPFLT,	"general-protectn" },
345 	{ T_PGFLT,	"page-fault" },
346 	{ T_EXTERRFLT,	"error-fault" },
347 	{ T_ALIGNMENT,	"alignment-check" },
348 	{ T_MCE,	"machine-check" },
349 	{ T_SIMDFPE,	"sse-exception" },
350 
351 	{ T_DBGENTR,	"debug-enter" },
352 	{ T_FASTTRAP,	"fasttrap-0xd2" },
353 	{ T_SYSCALLINT,	"syscall-0x91" },
354 	{ T_DTRACE_RET,	"dtrace-ret" },
355 	{ T_SOFTINT,	"softint" },
356 	{ T_INTERRUPT,	"interrupt" },
357 	{ T_FAULT,	"fault" },
358 	{ T_AST,	"ast" },
359 	{ T_SYSCALL,	"syscall" },
360 
361 	{ 0,		NULL }
362 };
363 
364 static int
365 ttrace_trap(trap_trace_rec_t *rec)
366 {
367 	int i;
368 
369 	if (rec->ttr_regs.r_trapno == T_AST)
370 		mdb_printf("%-3s ", "-");
371 	else
372 		mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
373 
374 	for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
375 		if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
376 			break;
377 	}
378 
379 	if (ttrace_traps[i].tt_name == NULL)
380 		mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
381 	else
382 		mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
383 
384 	return (0);
385 }
386 
387 static void
388 ttrace_intr_detail(trap_trace_rec_t *rec)
389 {
390 	mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
391 	    rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
392 }
393 
394 static struct {
395 	uchar_t t_marker;
396 	char *t_name;
397 	int (*t_hdlr)(trap_trace_rec_t *);
398 } ttrace_hdlr[] = {
399 	{ TT_SYSCALL, "sysc", ttrace_syscall },
400 	{ TT_SYSENTER, "syse", ttrace_syscall },
401 	{ TT_SYSC, "asys", ttrace_syscall },
402 	{ TT_SYSC64, "sc64", ttrace_syscall },
403 	{ TT_INTERRUPT, "intr", ttrace_interrupt },
404 	{ TT_TRAP, "trap", ttrace_trap },
405 	{ TT_EVENT, "evnt", ttrace_trap },
406 	{ 0, NULL, NULL }
407 };
408 
409 typedef struct ttrace_dcmd {
410 	processorid_t ttd_cpu;
411 	uint_t ttd_extended;
412 	trap_trace_ctl_t ttd_ttc[NCPU];
413 } ttrace_dcmd_t;
414 
415 #if defined(__amd64)
416 
417 #define	DUMP(reg) #reg, regs->r_##reg
418 #define	THREEREGS	"         %3s: %16lx %3s: %16lx %3s: %16lx\n"
419 
420 static void
421 ttrace_dumpregs(trap_trace_rec_t *rec)
422 {
423 	struct regs *regs = &rec->ttr_regs;
424 
425 	mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
426 	mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
427 	mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
428 	mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
429 	mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
430 	mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
431 	mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
432 	mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
433 	mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
434 	mdb_printf("\n");
435 }
436 
437 #else
438 
439 #define	DUMP(reg) #reg, regs->r_##reg
440 #define	FOURREGS	"         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
441 
442 static void
443 ttrace_dumpregs(trap_trace_rec_t *rec)
444 {
445 	struct regs *regs = &rec->ttr_regs;
446 
447 	mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
448 	mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
449 	mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
450 	mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
451 	    DUMP(pc), DUMP(cs));
452 	mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
453 	    "cr2", rec->ttr_cr2);
454 	mdb_printf("\n");
455 }
456 
457 #endif	/* __amd64 */
458 
459 int
460 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
461 {
462 	struct regs *regs = &rec->ttr_regs;
463 	processorid_t cpu = -1, i;
464 
465 	for (i = 0; i < NCPU; i++) {
466 		if (addr >= dcmd->ttd_ttc[i].ttc_first &&
467 		    addr < dcmd->ttd_ttc[i].ttc_limit) {
468 			cpu = i;
469 			break;
470 		}
471 	}
472 
473 	if (cpu == -1) {
474 		mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
475 		return (WALK_ERR);
476 	}
477 
478 	if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
479 		return (WALK_NEXT);
480 
481 	mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
482 
483 	for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
484 		if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
485 			continue;
486 		mdb_printf("%4s ", ttrace_hdlr[i].t_name);
487 		if (ttrace_hdlr[i].t_hdlr(rec) == -1)
488 			return (WALK_ERR);
489 	}
490 
491 	mdb_printf(" %a\n", regs->r_pc);
492 
493 	if (dcmd->ttd_extended == FALSE)
494 		return (WALK_NEXT);
495 
496 	if (rec->ttr_marker == TT_INTERRUPT)
497 		ttrace_intr_detail(rec);
498 	else
499 		ttrace_dumpregs(rec);
500 
501 	if (rec->ttr_sdepth > 0) {
502 		for (i = 0; i < rec->ttr_sdepth; i++) {
503 			if (i >= TTR_STACK_DEPTH) {
504 				mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
505 				    "should be <= %d)\n", " ", rec->ttr_sdepth,
506 				    TTR_STACK_DEPTH);
507 				break;
508 			}
509 
510 			mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
511 		}
512 		mdb_printf("\n");
513 	}
514 
515 	return (WALK_NEXT);
516 }
517 
518 int
519 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
520 {
521 	ttrace_dcmd_t dcmd;
522 	trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
523 	trap_trace_rec_t rec;
524 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
525 
526 	if (!ttrace_ttr_size_check())
527 		return (WALK_ERR);
528 
529 	bzero(&dcmd, sizeof (dcmd));
530 	dcmd.ttd_cpu = -1;
531 	dcmd.ttd_extended = FALSE;
532 
533 	if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
534 		mdb_warn("symbol 'trap_trace_ctl' not found; "
535 		    "non-TRAPTRACE kernel?\n");
536 		return (DCMD_ERR);
537 	}
538 
539 	if (mdb_getopts(argc, argv,
540 	    'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended, NULL) != argc)
541 		return (DCMD_USAGE);
542 
543 	if (DCMD_HDRSPEC(flags)) {
544 		mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
545 		    "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
546 		    " EIP");
547 	}
548 
549 	if (flags & DCMD_ADDRSPEC) {
550 		if (addr >= NCPU) {
551 			if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
552 				mdb_warn("couldn't read trap trace record "
553 				    "at %p", addr);
554 				return (DCMD_ERR);
555 			}
556 
557 			if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
558 				return (DCMD_ERR);
559 
560 			return (DCMD_OK);
561 		}
562 		dcmd.ttd_cpu = addr;
563 	}
564 
565 	if (mdb_readvar(&use_apix, "apix_enable") == -1) {
566 		mdb_warn("failed to read apix_enable");
567 		use_apix = 0;
568 	}
569 
570 	if (use_apix) {
571 		if (mdb_readvar(&d_apixs, "apixs") == -1) {
572 			mdb_warn("\nfailed to read apixs.");
573 			return (DCMD_ERR);
574 		}
575 		/* change to apix ttrace interrupt handler */
576 		ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
577 	}
578 
579 	if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
580 		mdb_warn("couldn't walk 'ttrace'");
581 		return (DCMD_ERR);
582 	}
583 
584 	return (DCMD_OK);
585 }
586 
587 /*ARGSUSED*/
588 int
589 mutex_owner_init(mdb_walk_state_t *wsp)
590 {
591 	return (WALK_NEXT);
592 }
593 
594 int
595 mutex_owner_step(mdb_walk_state_t *wsp)
596 {
597 	uintptr_t addr = wsp->walk_addr;
598 	mutex_impl_t mtx;
599 	uintptr_t owner;
600 	kthread_t thr;
601 
602 	if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
603 		return (WALK_ERR);
604 
605 	if (!MUTEX_TYPE_ADAPTIVE(&mtx))
606 		return (WALK_DONE);
607 
608 	if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == NULL)
609 		return (WALK_DONE);
610 
611 	if (mdb_vread(&thr, sizeof (thr), owner) != -1)
612 		(void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
613 
614 	return (WALK_DONE);
615 }
616 
617 static void
618 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
619 {
620 	const char *lastnm;
621 	uint_t lastval;
622 	char type[4];
623 
624 	switch (gate->sgd_type) {
625 	case SDT_SYSIGT:
626 		strcpy(type, "int");
627 		break;
628 	case SDT_SYSTGT:
629 		strcpy(type, "trp");
630 		break;
631 	case SDT_SYSTASKGT:
632 		strcpy(type, "tsk");
633 		break;
634 	default:
635 		(void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
636 	}
637 
638 #if defined(__amd64)
639 	lastnm = "IST";
640 	lastval = gate->sgd_ist;
641 #else
642 	lastnm = "STK";
643 	lastval = gate->sgd_stkcpy;
644 #endif
645 
646 	if (header) {
647 		mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
648 		    "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
649 		    "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
650 	}
651 
652 	mdb_printf("%s", label);
653 
654 	if (gate->sgd_type == SDT_SYSTASKGT)
655 		mdb_printf("%-30s ", "-");
656 	else
657 		mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
658 
659 	mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
660 	    gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
661 }
662 
663 /*ARGSUSED*/
664 static int
665 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
666 {
667 	gate_desc_t gate;
668 
669 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
670 		return (DCMD_USAGE);
671 
672 	if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
673 	    sizeof (gate_desc_t)) {
674 		mdb_warn("failed to read gate descriptor at %p\n", addr);
675 		return (DCMD_ERR);
676 	}
677 
678 	gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
679 
680 	return (DCMD_OK);
681 }
682 
683 /*ARGSUSED*/
684 static int
685 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
686 {
687 	int i;
688 
689 	if (!(flags & DCMD_ADDRSPEC)) {
690 		GElf_Sym idt0_va;
691 		gate_desc_t *idt0;
692 
693 		if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
694 			mdb_warn("failed to find VA of idt0");
695 			return (DCMD_ERR);
696 		}
697 
698 		addr = idt0_va.st_value;
699 		if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
700 			mdb_warn("failed to read idt0 at %p\n", addr);
701 			return (DCMD_ERR);
702 		}
703 
704 		addr = (uintptr_t)idt0;
705 	}
706 
707 	for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
708 		gate_desc_t gate;
709 		char label[6];
710 
711 		if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
712 		    sizeof (gate_desc_t)) {
713 			mdb_warn("failed to read gate descriptor at %p\n",
714 			    addr);
715 			return (DCMD_ERR);
716 		}
717 
718 		(void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
719 		gate_desc_dump(&gate, label, i == 0);
720 	}
721 
722 	return (DCMD_OK);
723 }
724 
725 static void
726 htables_help(void)
727 {
728 	mdb_printf(
729 	    "Given a (hat_t *), generates the list of all (htable_t *)s\n"
730 	    "that correspond to that address space\n");
731 }
732 
733 static void
734 report_maps_help(void)
735 {
736 	mdb_printf(
737 	    "Given a PFN, report HAT structures that map the page, or use\n"
738 	    "the page as a pagetable.\n"
739 	    "\n"
740 	    "-m Interpret the PFN as an MFN (machine frame number)\n");
741 }
742 
743 static void
744 ptable_help(void)
745 {
746 	mdb_printf(
747 	    "Given a PFN holding a page table, print its contents, and\n"
748 	    "the address of the corresponding htable structure.\n"
749 	    "\n"
750 	    "-m Interpret the PFN as an MFN (machine frame number)\n");
751 }
752 
753 /*
754  * NSEC_SHIFT is replicated here (it is not defined in a header file),
755  * but for amusement, the reader is directed to the comment that explains
756  * the rationale for this particular value on x86.  Spoiler:  the value is
757  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
758  * in that comment sounds too familiar, it's because your author also wrote
759  * that code -- some fifteen years prior to this writing in 2011...)
760  */
761 #define	NSEC_SHIFT 5
762 
763 /*ARGSUSED*/
764 static int
765 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
766 {
767 	uint32_t nsec_scale;
768 	hrtime_t tsc = addr, hrt;
769 	unsigned int *tscp = (unsigned int *)&tsc;
770 	uintptr_t scalehrtimef;
771 	uint64_t scale;
772 	GElf_Sym sym;
773 
774 	if (!(flags & DCMD_ADDRSPEC)) {
775 		if (argc != 1)
776 			return (DCMD_USAGE);
777 
778 		switch (argv[0].a_type) {
779 		case MDB_TYPE_STRING:
780 			tsc = mdb_strtoull(argv[0].a_un.a_str);
781 			break;
782 		case MDB_TYPE_IMMEDIATE:
783 			tsc = argv[0].a_un.a_val;
784 			break;
785 		default:
786 			return (DCMD_USAGE);
787 		}
788 	}
789 
790 	if (mdb_readsym(&scalehrtimef,
791 	    sizeof (scalehrtimef), "scalehrtimef") == -1) {
792 		mdb_warn("couldn't read 'scalehrtimef'");
793 		return (DCMD_ERR);
794 	}
795 
796 	if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
797 		mdb_warn("couldn't find 'tsc_scalehrtime'");
798 		return (DCMD_ERR);
799 	}
800 
801 	if (sym.st_value != scalehrtimef) {
802 		mdb_warn("::scalehrtime requires that scalehrtimef "
803 		    "be set to tsc_scalehrtime\n");
804 		return (DCMD_ERR);
805 	}
806 
807 	if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
808 		mdb_warn("couldn't read 'nsec_scale'");
809 		return (DCMD_ERR);
810 	}
811 
812 	scale = (uint64_t)nsec_scale;
813 
814 	hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
815 	hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
816 
817 	mdb_printf("0x%llx\n", hrt);
818 
819 	return (DCMD_OK);
820 }
821 
822 /*
823  * The x86 feature set is implemented as a bitmap array. That bitmap array is
824  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
825  * macro. We have the names for each of these features in unix's text segment
826  * so we do not have to duplicate them and instead just look them up.
827  */
828 /*ARGSUSED*/
829 static int
830 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
831     const mdb_arg_t *argv)
832 {
833 	void *fset;
834 	GElf_Sym sym;
835 	uintptr_t nptr;
836 	char name[128];
837 	int ii;
838 
839 	size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
840 
841 	if (argc != 0)
842 		return (DCMD_USAGE);
843 
844 	if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
845 		mdb_warn("couldn't find x86_feature_names");
846 		return (DCMD_ERR);
847 	}
848 
849 	fset = mdb_zalloc(sz, UM_NOSLEEP);
850 	if (fset == NULL) {
851 		mdb_warn("failed to allocate memory for x86_featureset");
852 		return (DCMD_ERR);
853 	}
854 
855 	if (mdb_readvar(fset, "x86_featureset") != sz) {
856 		mdb_warn("failed to read x86_featureset");
857 		mdb_free(fset, sz);
858 		return (DCMD_ERR);
859 	}
860 
861 	for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
862 		if (!BT_TEST((ulong_t *)fset, ii))
863 			continue;
864 
865 		if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
866 		    sizeof (void *) * ii) != sizeof (char *)) {
867 			mdb_warn("failed to read feature array %d", ii);
868 			mdb_free(fset, sz);
869 			return (DCMD_ERR);
870 		}
871 
872 		if (mdb_readstr(name, sizeof (name), nptr) == -1) {
873 			mdb_warn("failed to read feature %d", ii);
874 			mdb_free(fset, sz);
875 			return (DCMD_ERR);
876 		}
877 		mdb_printf("%s\n", name);
878 	}
879 
880 	mdb_free(fset, sz);
881 	return (DCMD_OK);
882 }
883 
884 #ifdef _KMDB
885 /* ARGSUSED */
886 static int
887 crregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
888 {
889 	ulong_t cr0, cr4;
890 	static const mdb_bitmask_t cr0_flag_bits[] = {
891 		{ "PE",		CR0_PE,		CR0_PE },
892 		{ "MP",		CR0_MP,		CR0_MP },
893 		{ "EM",		CR0_EM,		CR0_EM },
894 		{ "TS",		CR0_TS,		CR0_TS },
895 		{ "ET",		CR0_ET,		CR0_ET },
896 		{ "NE",		CR0_NE,		CR0_NE },
897 		{ "WP",		CR0_WP,		CR0_WP },
898 		{ "AM",		CR0_AM,		CR0_AM },
899 		{ "NW",		CR0_NW,		CR0_NW },
900 		{ "CD",		CR0_CD,		CR0_CD },
901 		{ "PG",		CR0_PG,		CR0_PG },
902 		{ NULL,		0,		0 }
903 	};
904 
905 	static const mdb_bitmask_t cr4_flag_bits[] = {
906 		{ "VME",	CR4_VME,	CR4_VME },
907 		{ "PVI",	CR4_PVI,	CR4_PVI },
908 		{ "TSD",	CR4_TSD,	CR4_TSD },
909 		{ "DE",		CR4_DE,		CR4_DE },
910 		{ "PSE",	CR4_PSE,	CR4_PSE },
911 		{ "PAE",	CR4_PAE,	CR4_PAE },
912 		{ "MCE",	CR4_MCE,	CR4_MCE },
913 		{ "PGE",	CR4_PGE,	CR4_PGE },
914 		{ "PCE",	CR4_PCE,	CR4_PCE },
915 		{ "OSFXSR",	CR4_OSFXSR,	CR4_OSFXSR },
916 		{ "OSXMMEXCPT",	CR4_OSXMMEXCPT,	CR4_OSXMMEXCPT },
917 		{ "VMXE",	CR4_VMXE,	CR4_VMXE },
918 		{ "SMXE",	CR4_SMXE,	CR4_SMXE },
919 		{ "OSXSAVE",	CR4_OSXSAVE,	CR4_OSXSAVE },
920 		{ "SMEP",	CR4_SMEP,	CR4_SMEP },
921 		{ NULL,		0,		0 }
922 	};
923 
924 	cr0 = kmdb_unix_getcr0();
925 	cr4 = kmdb_unix_getcr4();
926 	mdb_printf("%%cr0 = 0x%08x <%b>\n", cr0, cr0, cr0_flag_bits);
927 	mdb_printf("%%cr4 = 0x%08x <%b>\n", cr4, cr4, cr4_flag_bits);
928 	return (DCMD_OK);
929 }
930 #endif
931 
932 static const mdb_dcmd_t dcmds[] = {
933 	{ "gate_desc", ":", "dump a gate descriptor", gate_desc },
934 	{ "idt", ":[-v]", "dump an IDT", idt },
935 	{ "ttrace", "[-x]", "dump trap trace buffers", ttrace },
936 	{ "vatopfn", ":[-a as]", "translate address to physical page",
937 	    va2pfn_dcmd },
938 	{ "report_maps", ":[-m]",
939 	    "Given PFN, report mappings / page table usage",
940 	    report_maps_dcmd, report_maps_help },
941 	{ "htables", "", "Given hat_t *, lists all its htable_t * values",
942 	    htables_dcmd, htables_help },
943 	{ "ptable", ":[-m]", "Given PFN, dump contents of a page table",
944 	    ptable_dcmd, ptable_help },
945 	{ "pte", ":[-p XXXXX] [-l N]", "print human readable page table entry",
946 	    pte_dcmd },
947 	{ "pfntomfn", ":", "convert physical page to hypervisor machine page",
948 	    pfntomfn_dcmd },
949 	{ "mfntopfn", ":", "convert hypervisor machine page to physical page",
950 	    mfntopfn_dcmd },
951 	{ "memseg_list", ":", "show memseg list", memseg_list },
952 	{ "scalehrtime", ":",
953 	    "scale an unscaled high-res time", scalehrtime_cmd },
954 	{ "x86_featureset", NULL, "dump the x86_featureset vector",
955 		x86_featureset_cmd },
956 #ifdef _KMDB
957 	{ "crregs", NULL, "dump control registers", crregs_dcmd },
958 #endif
959 	{ NULL }
960 };
961 
962 static const mdb_walker_t walkers[] = {
963 	{ "ttrace", "walks trap trace buffers in reverse chronological order",
964 		ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
965 	{ "mutex_owner", "walks the owner of a mutex",
966 		mutex_owner_init, mutex_owner_step },
967 	{ "memseg", "walk the memseg structures",
968 		memseg_walk_init, memseg_walk_step, memseg_walk_fini },
969 	{ NULL }
970 };
971 
972 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
973 
974 const mdb_modinfo_t *
975 _mdb_init(void)
976 {
977 	return (&modinfo);
978 }
979 
980 void
981 _mdb_fini(void)
982 {
983 	free_mmu();
984 }
985