xref: /illumos-gate/usr/src/cmd/mdb/i86pc/modules/unix/unix.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2018 Joyent, Inc.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ctf.h>
28 #include <sys/cpuvar.h>
29 #include <sys/systm.h>
30 #include <sys/traptrace.h>
31 #include <sys/x_call.h>
32 #include <sys/xc_levels.h>
33 #include <sys/avintr.h>
34 #include <sys/systm.h>
35 #include <sys/trap.h>
36 #include <sys/mutex.h>
37 #include <sys/mutex_impl.h>
38 #include "i86mmu.h"
39 #include "unix_sup.h"
40 #include <sys/apix.h>
41 #include <sys/x86_archext.h>
42 #include <sys/bitmap.h>
43 #include <sys/controlregs.h>
44 
45 #define	TT_HDLR_WIDTH	17
46 
47 
48 /* apix only */
49 static apix_impl_t *d_apixs[NCPU];
50 static int use_apix = 0;
51 
52 static int
53 ttrace_ttr_size_check(void)
54 {
55 	mdb_ctf_id_t ttrtid;
56 	ssize_t ttr_size;
57 
58 	if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
59 	    mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
60 		mdb_warn("failed to determine size of trap_trace_rec_t; "
61 		    "non-TRAPTRACE kernel?\n");
62 		return (0);
63 	}
64 
65 	if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
66 	    sizeof (trap_trace_rec_t)) {
67 		/*
68 		 * On Intel machines, this will happen when TTR_STACK_DEPTH
69 		 * is changed.  This code could be smarter, and could
70 		 * dynamically adapt to different depths, but not until a
71 		 * need for such adaptation is demonstrated.
72 		 */
73 		mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
74 		    "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
75 		return (0);
76 	}
77 
78 	return (1);
79 }
80 
81 int
82 ttrace_walk_init(mdb_walk_state_t *wsp)
83 {
84 	trap_trace_ctl_t *ttcp;
85 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
86 	int i;
87 
88 	if (!ttrace_ttr_size_check())
89 		return (WALK_ERR);
90 
91 	ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
92 
93 	if (wsp->walk_addr != 0) {
94 		mdb_warn("ttrace only supports global walks\n");
95 		return (WALK_ERR);
96 	}
97 
98 	if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
99 		mdb_warn("symbol 'trap_trace_ctl' not found; "
100 		    "non-TRAPTRACE kernel?\n");
101 		mdb_free(ttcp, ttc_size);
102 		return (WALK_ERR);
103 	}
104 
105 	/*
106 	 * We'll poach the ttc_current pointer (which isn't used for
107 	 * anything) to store a pointer to our current TRAPTRACE record.
108 	 * This allows us to only keep the array of trap_trace_ctl structures
109 	 * as our walker state (ttc_current may be the only kernel data
110 	 * structure member added exclusively to make writing the mdb walker
111 	 * a little easier).
112 	 */
113 	for (i = 0; i < NCPU; i++) {
114 		trap_trace_ctl_t *ttc = &ttcp[i];
115 
116 		if (ttc->ttc_first == 0)
117 			continue;
118 
119 		/*
120 		 * Assign ttc_current to be the last completed record.
121 		 * Note that the error checking (i.e. in the ttc_next ==
122 		 * ttc_first case) is performed in the step function.
123 		 */
124 		ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
125 	}
126 
127 	wsp->walk_data = ttcp;
128 	return (WALK_NEXT);
129 }
130 
131 int
132 ttrace_walk_step(mdb_walk_state_t *wsp)
133 {
134 	trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
135 	trap_trace_rec_t rec;
136 	int rval, i, recsize = sizeof (trap_trace_rec_t);
137 	hrtime_t latest = 0;
138 
139 	/*
140 	 * Loop through the CPUs, looking for the latest trap trace record
141 	 * (we want to walk through the trap trace records in reverse
142 	 * chronological order).
143 	 */
144 	for (i = 0; i < NCPU; i++) {
145 		ttc = &ttcp[i];
146 
147 		if (ttc->ttc_current == 0)
148 			continue;
149 
150 		if (ttc->ttc_current < ttc->ttc_first)
151 			ttc->ttc_current = ttc->ttc_limit - recsize;
152 
153 		if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
154 			mdb_warn("couldn't read rec at %p", ttc->ttc_current);
155 			return (WALK_ERR);
156 		}
157 
158 		if (rec.ttr_stamp > latest) {
159 			latest = rec.ttr_stamp;
160 			latest_ttc = ttc;
161 		}
162 	}
163 
164 	if (latest == 0)
165 		return (WALK_DONE);
166 
167 	ttc = latest_ttc;
168 
169 	if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
170 		mdb_warn("couldn't read rec at %p", ttc->ttc_current);
171 		return (WALK_ERR);
172 	}
173 
174 	rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
175 
176 	if (ttc->ttc_current == ttc->ttc_next)
177 		ttc->ttc_current = 0;
178 	else
179 		ttc->ttc_current -= sizeof (trap_trace_rec_t);
180 
181 	return (rval);
182 }
183 
184 void
185 ttrace_walk_fini(mdb_walk_state_t *wsp)
186 {
187 	mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
188 }
189 
190 static int
191 ttrace_syscall(trap_trace_rec_t *rec)
192 {
193 	GElf_Sym sym;
194 	int sysnum = rec->ttr_sysnum;
195 	uintptr_t addr;
196 	struct sysent sys;
197 
198 	mdb_printf("%-3x", sysnum);
199 
200 	if (rec->ttr_sysnum > NSYSCALL) {
201 		mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
202 		return (0);
203 	}
204 
205 	if (mdb_lookup_by_name("sysent", &sym) == -1) {
206 		mdb_warn("\ncouldn't find 'sysent'");
207 		return (-1);
208 	}
209 
210 	addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
211 
212 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
213 		mdb_warn("\nsysnum %d out-of-range\n", sysnum);
214 		return (-1);
215 	}
216 
217 	if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
218 		mdb_warn("\nfailed to read sysent at %p", addr);
219 		return (-1);
220 	}
221 
222 	mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
223 
224 	return (0);
225 }
226 
227 static int
228 ttrace_interrupt(trap_trace_rec_t *rec)
229 {
230 	GElf_Sym sym;
231 	uintptr_t addr;
232 	struct av_head hd;
233 	struct autovec av;
234 
235 	switch (rec->ttr_regs.r_trapno) {
236 	case T_SOFTINT:
237 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
238 		return (0);
239 	default:
240 		break;
241 	}
242 
243 	mdb_printf("%-3x ", rec->ttr_vector);
244 
245 	if (mdb_lookup_by_name("autovect", &sym) == -1) {
246 		mdb_warn("\ncouldn't find 'autovect'");
247 		return (-1);
248 	}
249 
250 	addr = (uintptr_t)sym.st_value +
251 	    rec->ttr_vector * sizeof (struct av_head);
252 
253 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
254 		mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
255 		return (-1);
256 	}
257 
258 	if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
259 		mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
260 		return (-1);
261 	}
262 
263 	if (hd.avh_link == NULL) {
264 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
265 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
266 		else
267 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
268 	} else {
269 		if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
270 			mdb_warn("couldn't read autovec at %p",
271 			    (uintptr_t)hd.avh_link);
272 		}
273 
274 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
275 	}
276 
277 	return (0);
278 }
279 
280 static int
281 ttrace_apix_interrupt(trap_trace_rec_t *rec)
282 {
283 	struct autovec av;
284 	apix_impl_t apix;
285 	apix_vector_t apix_vector;
286 
287 	switch (rec->ttr_regs.r_trapno) {
288 	case T_SOFTINT:
289 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
290 		return (0);
291 	default:
292 		break;
293 	}
294 
295 	mdb_printf("%-3x ", rec->ttr_vector);
296 
297 	/* Read the per CPU apix entry */
298 	if (mdb_vread(&apix, sizeof (apix_impl_t),
299 	    (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
300 		mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
301 		return (-1);
302 	}
303 	if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
304 	    (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
305 		mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
306 		return (-1);
307 	}
308 	if (apix_vector.v_share == 0) {
309 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
310 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
311 		else
312 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
313 	} else {
314 		if (mdb_vread(&av, sizeof (struct autovec),
315 		    (uintptr_t)(apix_vector.v_autovect)) == -1) {
316 			mdb_warn("couldn't read autovec at %p",
317 			    (uintptr_t)apix_vector.v_autovect);
318 		}
319 
320 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
321 	}
322 
323 	return (0);
324 }
325 
326 
327 static struct {
328 	int tt_trapno;
329 	char *tt_name;
330 } ttrace_traps[] = {
331 	{ T_ZERODIV,	"divide-error" },
332 	{ T_SGLSTP,	"debug-exception" },
333 	{ T_NMIFLT,	"nmi-interrupt" },
334 	{ T_BPTFLT,	"breakpoint" },
335 	{ T_OVFLW,	"into-overflow" },
336 	{ T_BOUNDFLT,	"bound-exceeded" },
337 	{ T_ILLINST,	"invalid-opcode" },
338 	{ T_NOEXTFLT,	"device-not-avail" },
339 	{ T_DBLFLT,	"double-fault" },
340 	{ T_EXTOVRFLT,	"segment-overrun" },
341 	{ T_TSSFLT,	"invalid-tss" },
342 	{ T_SEGFLT,	"segment-not-pres" },
343 	{ T_STKFLT,	"stack-fault" },
344 	{ T_GPFLT,	"general-protectn" },
345 	{ T_PGFLT,	"page-fault" },
346 	{ T_EXTERRFLT,	"error-fault" },
347 	{ T_ALIGNMENT,	"alignment-check" },
348 	{ T_MCE,	"machine-check" },
349 	{ T_SIMDFPE,	"sse-exception" },
350 
351 	{ T_DBGENTR,	"debug-enter" },
352 	{ T_FASTTRAP,	"fasttrap-0xd2" },
353 	{ T_SYSCALLINT,	"syscall-0x91" },
354 	{ T_DTRACE_RET,	"dtrace-ret" },
355 	{ T_SOFTINT,	"softint" },
356 	{ T_INTERRUPT,	"interrupt" },
357 	{ T_FAULT,	"fault" },
358 	{ T_AST,	"ast" },
359 	{ T_SYSCALL,	"syscall" },
360 
361 	{ 0,		NULL }
362 };
363 
364 static int
365 ttrace_trap(trap_trace_rec_t *rec)
366 {
367 	int i;
368 
369 	if (rec->ttr_regs.r_trapno == T_AST)
370 		mdb_printf("%-3s ", "-");
371 	else
372 		mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
373 
374 	for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
375 		if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
376 			break;
377 	}
378 
379 	if (ttrace_traps[i].tt_name == NULL)
380 		mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
381 	else
382 		mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
383 
384 	return (0);
385 }
386 
387 static void
388 ttrace_intr_detail(trap_trace_rec_t *rec)
389 {
390 	mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
391 	    rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
392 }
393 
394 static struct {
395 	uchar_t t_marker;
396 	char *t_name;
397 	int (*t_hdlr)(trap_trace_rec_t *);
398 } ttrace_hdlr[] = {
399 	{ TT_SYSCALL, "sysc", ttrace_syscall },
400 	{ TT_SYSENTER, "syse", ttrace_syscall },
401 	{ TT_SYSC, "asys", ttrace_syscall },
402 	{ TT_SYSC64, "sc64", ttrace_syscall },
403 	{ TT_INTERRUPT, "intr", ttrace_interrupt },
404 	{ TT_TRAP, "trap", ttrace_trap },
405 	{ TT_EVENT, "evnt", ttrace_trap },
406 	{ 0, NULL, NULL }
407 };
408 
409 typedef struct ttrace_dcmd {
410 	processorid_t ttd_cpu;
411 	uint_t ttd_extended;
412 	uintptr_t ttd_kthread;
413 	trap_trace_ctl_t ttd_ttc[NCPU];
414 } ttrace_dcmd_t;
415 
416 #if defined(__amd64)
417 
418 #define	DUMP(reg) #reg, regs->r_##reg
419 #define	THREEREGS	"         %3s: %16lx %3s: %16lx %3s: %16lx\n"
420 
421 static void
422 ttrace_dumpregs(trap_trace_rec_t *rec)
423 {
424 	struct regs *regs = &rec->ttr_regs;
425 
426 	mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
427 	mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
428 	mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
429 	mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
430 	mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
431 	mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
432 	mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
433 	mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
434 	mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
435 	mdb_printf("         %3s: %16lx %3s: %16lx\n",
436 	    "fsb", regs->__r_fsbase,
437 	    "gsb", regs->__r_gsbase);
438 	mdb_printf("\n");
439 }
440 
441 #else
442 
443 #define	DUMP(reg) #reg, regs->r_##reg
444 #define	FOURREGS	"         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
445 
446 static void
447 ttrace_dumpregs(trap_trace_rec_t *rec)
448 {
449 	struct regs *regs = &rec->ttr_regs;
450 
451 	mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
452 	mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
453 	mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
454 	mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
455 	    DUMP(pc), DUMP(cs));
456 	mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
457 	    "cr2", rec->ttr_cr2);
458 	mdb_printf("\n");
459 }
460 
461 #endif	/* __amd64 */
462 
463 int
464 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
465 {
466 	struct regs *regs = &rec->ttr_regs;
467 	processorid_t cpu = -1, i;
468 
469 	for (i = 0; i < NCPU; i++) {
470 		if (addr >= dcmd->ttd_ttc[i].ttc_first &&
471 		    addr < dcmd->ttd_ttc[i].ttc_limit) {
472 			cpu = i;
473 			break;
474 		}
475 	}
476 
477 	if (cpu == -1) {
478 		mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
479 		return (WALK_ERR);
480 	}
481 
482 	if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
483 		return (WALK_NEXT);
484 
485 	if (dcmd->ttd_kthread != 0 &&
486 	    dcmd->ttd_kthread != rec->ttr_curthread)
487 		return (WALK_NEXT);
488 
489 	mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
490 
491 	for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
492 		if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
493 			continue;
494 		mdb_printf("%4s ", ttrace_hdlr[i].t_name);
495 		if (ttrace_hdlr[i].t_hdlr(rec) == -1)
496 			return (WALK_ERR);
497 	}
498 
499 	mdb_printf(" %a\n", regs->r_pc);
500 
501 	if (dcmd->ttd_extended == FALSE)
502 		return (WALK_NEXT);
503 
504 	if (rec->ttr_marker == TT_INTERRUPT)
505 		ttrace_intr_detail(rec);
506 	else
507 		ttrace_dumpregs(rec);
508 
509 	if (rec->ttr_sdepth > 0) {
510 		for (i = 0; i < rec->ttr_sdepth; i++) {
511 			if (i >= TTR_STACK_DEPTH) {
512 				mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
513 				    "should be <= %d)\n", " ", rec->ttr_sdepth,
514 				    TTR_STACK_DEPTH);
515 				break;
516 			}
517 
518 			mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
519 		}
520 		mdb_printf("\n");
521 	}
522 
523 	return (WALK_NEXT);
524 }
525 
526 int
527 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
528 {
529 	ttrace_dcmd_t dcmd;
530 	trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
531 	trap_trace_rec_t rec;
532 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
533 
534 	if (!ttrace_ttr_size_check())
535 		return (WALK_ERR);
536 
537 	bzero(&dcmd, sizeof (dcmd));
538 	dcmd.ttd_cpu = -1;
539 	dcmd.ttd_extended = FALSE;
540 
541 	if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
542 		mdb_warn("symbol 'trap_trace_ctl' not found; "
543 		    "non-TRAPTRACE kernel?\n");
544 		return (DCMD_ERR);
545 	}
546 
547 	if (mdb_getopts(argc, argv,
548 	    'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
549 	    't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
550 		return (DCMD_USAGE);
551 
552 	if (DCMD_HDRSPEC(flags)) {
553 		mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
554 		    "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
555 		    " EIP");
556 	}
557 
558 	if (flags & DCMD_ADDRSPEC) {
559 		if (addr >= NCPU) {
560 			if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
561 				mdb_warn("couldn't read trap trace record "
562 				    "at %p", addr);
563 				return (DCMD_ERR);
564 			}
565 
566 			if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
567 				return (DCMD_ERR);
568 
569 			return (DCMD_OK);
570 		}
571 		dcmd.ttd_cpu = addr;
572 	}
573 
574 	if (mdb_readvar(&use_apix, "apix_enable") == -1) {
575 		mdb_warn("failed to read apix_enable");
576 		use_apix = 0;
577 	}
578 
579 	if (use_apix) {
580 		if (mdb_readvar(&d_apixs, "apixs") == -1) {
581 			mdb_warn("\nfailed to read apixs.");
582 			return (DCMD_ERR);
583 		}
584 		/* change to apix ttrace interrupt handler */
585 		ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
586 	}
587 
588 	if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
589 		mdb_warn("couldn't walk 'ttrace'");
590 		return (DCMD_ERR);
591 	}
592 
593 	return (DCMD_OK);
594 }
595 
596 /*ARGSUSED*/
597 int
598 mutex_owner_init(mdb_walk_state_t *wsp)
599 {
600 	return (WALK_NEXT);
601 }
602 
603 int
604 mutex_owner_step(mdb_walk_state_t *wsp)
605 {
606 	uintptr_t addr = wsp->walk_addr;
607 	mutex_impl_t mtx;
608 	uintptr_t owner;
609 	kthread_t thr;
610 
611 	if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
612 		return (WALK_ERR);
613 
614 	if (!MUTEX_TYPE_ADAPTIVE(&mtx))
615 		return (WALK_DONE);
616 
617 	if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == 0)
618 		return (WALK_DONE);
619 
620 	if (mdb_vread(&thr, sizeof (thr), owner) != -1)
621 		(void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
622 
623 	return (WALK_DONE);
624 }
625 
626 static void
627 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
628 {
629 	const char *lastnm;
630 	uint_t lastval;
631 	char type[4];
632 
633 	switch (gate->sgd_type) {
634 	case SDT_SYSIGT:
635 		strcpy(type, "int");
636 		break;
637 	case SDT_SYSTGT:
638 		strcpy(type, "trp");
639 		break;
640 	case SDT_SYSTASKGT:
641 		strcpy(type, "tsk");
642 		break;
643 	default:
644 		(void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
645 	}
646 
647 #if defined(__amd64)
648 	lastnm = "IST";
649 	lastval = gate->sgd_ist;
650 #else
651 	lastnm = "STK";
652 	lastval = gate->sgd_stkcpy;
653 #endif
654 
655 	if (header) {
656 		mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
657 		    "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
658 		    "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
659 	}
660 
661 	mdb_printf("%s", label);
662 
663 	if (gate->sgd_type == SDT_SYSTASKGT)
664 		mdb_printf("%-30s ", "-");
665 	else
666 		mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
667 
668 	mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
669 	    gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
670 }
671 
672 /*ARGSUSED*/
673 static int
674 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
675 {
676 	gate_desc_t gate;
677 
678 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
679 		return (DCMD_USAGE);
680 
681 	if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
682 	    sizeof (gate_desc_t)) {
683 		mdb_warn("failed to read gate descriptor at %p\n", addr);
684 		return (DCMD_ERR);
685 	}
686 
687 	gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
688 
689 	return (DCMD_OK);
690 }
691 
692 /*ARGSUSED*/
693 static int
694 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
695 {
696 	int i;
697 
698 	if (!(flags & DCMD_ADDRSPEC)) {
699 		GElf_Sym idt0_va;
700 		gate_desc_t *idt0;
701 
702 		if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
703 			mdb_warn("failed to find VA of idt0");
704 			return (DCMD_ERR);
705 		}
706 
707 		addr = idt0_va.st_value;
708 		if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
709 			mdb_warn("failed to read idt0 at %p\n", addr);
710 			return (DCMD_ERR);
711 		}
712 
713 		addr = (uintptr_t)idt0;
714 	}
715 
716 	for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
717 		gate_desc_t gate;
718 		char label[6];
719 
720 		if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
721 		    sizeof (gate_desc_t)) {
722 			mdb_warn("failed to read gate descriptor at %p\n",
723 			    addr);
724 			return (DCMD_ERR);
725 		}
726 
727 		(void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
728 		gate_desc_dump(&gate, label, i == 0);
729 	}
730 
731 	return (DCMD_OK);
732 }
733 
734 static void
735 htables_help(void)
736 {
737 	mdb_printf(
738 	    "Given a (hat_t *), generates the list of all (htable_t *)s\n"
739 	    "that correspond to that address space\n");
740 }
741 
742 static void
743 report_maps_help(void)
744 {
745 	mdb_printf(
746 	    "Given a PFN, report HAT structures that map the page, or use\n"
747 	    "the page as a pagetable.\n"
748 	    "\n"
749 	    "-m Interpret the PFN as an MFN (machine frame number)\n");
750 }
751 
752 static void
753 ptable_help(void)
754 {
755 	mdb_printf(
756 	    "Given a PFN holding a page table, print its contents, and\n"
757 	    "the address of the corresponding htable structure.\n"
758 	    "\n"
759 	    "-m Interpret the PFN as an MFN (machine frame number)\n"
760 	    "-l force page table level (3 is top)\n");
761 }
762 
763 static void
764 ptmap_help(void)
765 {
766 	mdb_printf(
767 	    "Report all mappings represented by the page table hierarchy\n"
768 	    "rooted at the given cr3 value / physical address.\n"
769 	    "\n"
770 	    "-w run ::whatis on mapping start addresses\n");
771 }
772 
773 static const char *const scalehrtime_desc =
774 	"Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n"
775 	"are used as both a quick way of accumulating relative time (as for\n"
776 	"usage) and as a quick way of getting the absolute current time.\n"
777 	"These uses require slightly different scaling algorithms. By\n"
778 	"default, if a specified time is greater than half of the unscaled\n"
779 	"time at the last tick (that is, if the unscaled time represents\n"
780 	"more than half the time since boot), the timestamp is assumed to\n"
781 	"be absolute, and the scaling algorithm used mimics that which the\n"
782 	"kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n"
783 	"be relative, and the algorithm mimics scalehrtime(). This behavior\n"
784 	"can be overridden by forcing the unscaled time to be interpreted\n"
785 	"as relative (via -r) or absolute (via -a).\n";
786 
787 static void
788 scalehrtime_help(void)
789 {
790 	mdb_printf("%s", scalehrtime_desc);
791 }
792 
793 /*
794  * NSEC_SHIFT is replicated here (it is not defined in a header file),
795  * but for amusement, the reader is directed to the comment that explains
796  * the rationale for this particular value on x86.  Spoiler:  the value is
797  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
798  * in that comment sounds too familiar, it's because your author also wrote
799  * that code -- some fifteen years prior to this writing in 2011...)
800  */
801 #define	NSEC_SHIFT 5
802 
803 /*ARGSUSED*/
804 static int
805 scalehrtime_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
806 {
807 	uint32_t nsec_scale;
808 	hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1;
809 	unsigned int *tscp = (unsigned int *)&tsc;
810 	uintptr_t scalehrtimef;
811 	uint64_t scale;
812 	GElf_Sym sym;
813 	int expected = !(flags & DCMD_ADDRSPEC);
814 	uint_t absolute = FALSE, relative = FALSE;
815 
816 	if (mdb_getopts(argc, argv,
817 	    'a', MDB_OPT_SETBITS, TRUE, &absolute,
818 	    'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected)
819 		return (DCMD_USAGE);
820 
821 	if (absolute && relative) {
822 		mdb_warn("can't specify both -a and -r\n");
823 		return (DCMD_USAGE);
824 	}
825 
826 	if (expected == 1) {
827 		switch (argv[argc - 1].a_type) {
828 		case MDB_TYPE_STRING:
829 			tsc = mdb_strtoull(argv[argc - 1].a_un.a_str);
830 			break;
831 		case MDB_TYPE_IMMEDIATE:
832 			tsc = argv[argc - 1].a_un.a_val;
833 			break;
834 		default:
835 			return (DCMD_USAGE);
836 		}
837 	}
838 
839 	if (mdb_readsym(&scalehrtimef,
840 	    sizeof (scalehrtimef), "scalehrtimef") == -1) {
841 		mdb_warn("couldn't read 'scalehrtimef'");
842 		return (DCMD_ERR);
843 	}
844 
845 	if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
846 		mdb_warn("couldn't find 'tsc_scalehrtime'");
847 		return (DCMD_ERR);
848 	}
849 
850 	if (sym.st_value != scalehrtimef) {
851 		mdb_warn("::scalehrtime requires that scalehrtimef "
852 		    "be set to tsc_scalehrtime\n");
853 		return (DCMD_ERR);
854 	}
855 
856 	if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
857 		mdb_warn("couldn't read 'nsec_scale'");
858 		return (DCMD_ERR);
859 	}
860 
861 	if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) {
862 		mdb_warn("couldn't read 'tsc_last'");
863 		return (DCMD_ERR);
864 	}
865 
866 	if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) {
867 		mdb_warn("couldn't read 'tsc_hrtime_base'");
868 		return (DCMD_ERR);
869 	}
870 
871 	/*
872 	 * If our time is greater than half of tsc_last, we will take our
873 	 * delta against tsc_last, convert it, and add that to (or subtract it
874 	 * from) tsc_hrtime_base.  This mimics what the kernel actually does
875 	 * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much
876 	 * higher precision result than trying to convert a large tsc value.
877 	 */
878 	if (absolute || (tsc > (tsc_last >> 1) && !relative)) {
879 		if (tsc > tsc_last) {
880 			tsc = tsc - tsc_last;
881 		} else {
882 			tsc = tsc_last - tsc;
883 			mult = -1;
884 		}
885 	} else {
886 		base = 0;
887 	}
888 
889 	scale = (uint64_t)nsec_scale;
890 
891 	hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
892 	hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
893 
894 	mdb_printf("0x%llx\n", base + (hrt * mult));
895 
896 	return (DCMD_OK);
897 }
898 
899 /*
900  * The x86 feature set is implemented as a bitmap array. That bitmap array is
901  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
902  * macro. We have the names for each of these features in unix's text segment
903  * so we do not have to duplicate them and instead just look them up.
904  */
905 /*ARGSUSED*/
906 static int
907 x86_featureset_dcmd(uintptr_t addr, uint_t flags, int argc,
908     const mdb_arg_t *argv)
909 {
910 	void *fset;
911 	GElf_Sym sym;
912 	uintptr_t nptr;
913 	char name[128];
914 	int ii;
915 
916 	size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
917 
918 	if (argc != 0)
919 		return (DCMD_USAGE);
920 
921 	if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
922 		mdb_warn("couldn't find x86_feature_names");
923 		return (DCMD_ERR);
924 	}
925 
926 	fset = mdb_zalloc(sz, UM_NOSLEEP);
927 	if (fset == NULL) {
928 		mdb_warn("failed to allocate memory for x86_featureset");
929 		return (DCMD_ERR);
930 	}
931 
932 	if (mdb_readvar(fset, "x86_featureset") != sz) {
933 		mdb_warn("failed to read x86_featureset");
934 		mdb_free(fset, sz);
935 		return (DCMD_ERR);
936 	}
937 
938 	for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
939 		if (!BT_TEST((ulong_t *)fset, ii))
940 			continue;
941 
942 		if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
943 		    sizeof (void *) * ii) != sizeof (char *)) {
944 			mdb_warn("failed to read feature array %d", ii);
945 			mdb_free(fset, sz);
946 			return (DCMD_ERR);
947 		}
948 
949 		if (mdb_readstr(name, sizeof (name), nptr) == -1) {
950 			mdb_warn("failed to read feature %d", ii);
951 			mdb_free(fset, sz);
952 			return (DCMD_ERR);
953 		}
954 		mdb_printf("%s\n", name);
955 	}
956 
957 	mdb_free(fset, sz);
958 	return (DCMD_OK);
959 }
960 
961 #ifdef _KMDB
962 /* ARGSUSED */
963 static int
964 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
965 {
966 	ulong_t cr0, cr2, cr3, cr4;
967 	desctbr_t gdtr;
968 
969 	static const mdb_bitmask_t cr0_flag_bits[] = {
970 		{ "PE",		CR0_PE,		CR0_PE },
971 		{ "MP",		CR0_MP,		CR0_MP },
972 		{ "EM",		CR0_EM,		CR0_EM },
973 		{ "TS",		CR0_TS,		CR0_TS },
974 		{ "ET",		CR0_ET,		CR0_ET },
975 		{ "NE",		CR0_NE,		CR0_NE },
976 		{ "WP",		CR0_WP,		CR0_WP },
977 		{ "AM",		CR0_AM,		CR0_AM },
978 		{ "NW",		CR0_NW,		CR0_NW },
979 		{ "CD",		CR0_CD,		CR0_CD },
980 		{ "PG",		CR0_PG,		CR0_PG },
981 		{ NULL,		0,		0 }
982 	};
983 
984 	static const mdb_bitmask_t cr3_flag_bits[] = {
985 		{ "PCD",	CR3_PCD,	CR3_PCD },
986 		{ "PWT",	CR3_PWT,	CR3_PWT },
987 		{ NULL,		0,		0, }
988 	};
989 
990 	static const mdb_bitmask_t cr4_flag_bits[] = {
991 		{ "VME",	CR4_VME,	CR4_VME },
992 		{ "PVI",	CR4_PVI,	CR4_PVI },
993 		{ "TSD",	CR4_TSD,	CR4_TSD },
994 		{ "DE",		CR4_DE,		CR4_DE },
995 		{ "PSE",	CR4_PSE,	CR4_PSE },
996 		{ "PAE",	CR4_PAE,	CR4_PAE },
997 		{ "MCE",	CR4_MCE,	CR4_MCE },
998 		{ "PGE",	CR4_PGE,	CR4_PGE },
999 		{ "PCE",	CR4_PCE,	CR4_PCE },
1000 		{ "OSFXSR",	CR4_OSFXSR,	CR4_OSFXSR },
1001 		{ "OSXMMEXCPT",	CR4_OSXMMEXCPT,	CR4_OSXMMEXCPT },
1002 		{ "VMXE",	CR4_VMXE,	CR4_VMXE },
1003 		{ "SMXE",	CR4_SMXE,	CR4_SMXE },
1004 		{ "PCIDE",	CR4_PCIDE,	CR4_PCIDE },
1005 		{ "OSXSAVE",	CR4_OSXSAVE,	CR4_OSXSAVE },
1006 		{ "SMEP",	CR4_SMEP,	CR4_SMEP },
1007 		{ "SMAP",	CR4_SMAP,	CR4_SMAP },
1008 		{ NULL,		0,		0 }
1009 	};
1010 
1011 	cr0 = kmdb_unix_getcr0();
1012 	cr2 = kmdb_unix_getcr2();
1013 	cr3 = kmdb_unix_getcr3();
1014 	cr4 = kmdb_unix_getcr4();
1015 
1016 	kmdb_unix_getgdtr(&gdtr);
1017 
1018 	mdb_printf("%%cr0 = 0x%lx <%b>\n", cr0, cr0, cr0_flag_bits);
1019 	mdb_printf("%%cr2 = 0x%lx <%a>\n", cr2, cr2);
1020 
1021 	if ((cr4 & CR4_PCIDE)) {
1022 		mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx pcid:%lu>\n", cr3,
1023 		    cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET);
1024 	} else {
1025 		mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx flags:%b>\n", cr3,
1026 		    cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits);
1027 	}
1028 
1029 	mdb_printf("%%cr4 = 0x%lx <%b>\n", cr4, cr4, cr4_flag_bits);
1030 
1031 	mdb_printf("%%gdtr.base = 0x%lx, %%gdtr.limit = 0x%hx\n",
1032 	    gdtr.dtr_base, gdtr.dtr_limit);
1033 
1034 	return (DCMD_OK);
1035 }
1036 #endif
1037 
1038 extern void xcall_help(void);
1039 extern int xcall_dcmd(uintptr_t, uint_t, int, const mdb_arg_t *);
1040 
1041 static const mdb_dcmd_t dcmds[] = {
1042 	{ "gate_desc", ":", "dump a gate descriptor", gate_desc },
1043 	{ "idt", ":[-v]", "dump an IDT", idt },
1044 	{ "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
1045 	{ "vatopfn", ":[-a as]", "translate address to physical page",
1046 	    va2pfn_dcmd },
1047 	{ "report_maps", ":[-m]",
1048 	    "Given PFN, report mappings / page table usage",
1049 	    report_maps_dcmd, report_maps_help },
1050 	{ "htables", "", "Given hat_t *, lists all its htable_t * values",
1051 	    htables_dcmd, htables_help },
1052 	{ "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
1053 	    ptable_dcmd, ptable_help },
1054 	{ "ptmap", ":", "Given a cr3 value, dump all mappings",
1055 	    ptmap_dcmd, ptmap_help },
1056 	{ "pte", ":[-l N]", "print human readable page table entry",
1057 	    pte_dcmd },
1058 	{ "pfntomfn", ":", "convert physical page to hypervisor machine page",
1059 	    pfntomfn_dcmd },
1060 	{ "mfntopfn", ":", "convert hypervisor machine page to physical page",
1061 	    mfntopfn_dcmd },
1062 	{ "memseg_list", ":", "show memseg list", memseg_list },
1063 	{ "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time",
1064 	    scalehrtime_dcmd, scalehrtime_help },
1065 	{ "x86_featureset", NULL, "dump the x86_featureset vector",
1066 		x86_featureset_dcmd },
1067 	{ "xcall", ":", "print CPU cross-call state", xcall_dcmd, xcall_help },
1068 #ifdef _KMDB
1069 	{ "sysregs", NULL, "dump system registers", sysregs_dcmd },
1070 #endif
1071 	{ NULL }
1072 };
1073 
1074 static const mdb_walker_t walkers[] = {
1075 	{ "ttrace", "walks trap trace buffers in reverse chronological order",
1076 		ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1077 	{ "mutex_owner", "walks the owner of a mutex",
1078 		mutex_owner_init, mutex_owner_step },
1079 	{ "memseg", "walk the memseg structures",
1080 		memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1081 	{ NULL }
1082 };
1083 
1084 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1085 
1086 const mdb_modinfo_t *
1087 _mdb_init(void)
1088 {
1089 	return (&modinfo);
1090 }
1091 
1092 void
1093 _mdb_fini(void)
1094 {
1095 	free_mmu();
1096 }
1097