xref: /illumos-gate/usr/src/cmd/mdb/i86pc/modules/unix/unix.c (revision 2a295025ab352ac2f6469a947d5b5e2f9379f943)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
24  * Copyright 2019 Joyent, Inc.
25  */
26 
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <mdb/mdb_x86util.h>
30 #include <sys/cpuvar.h>
31 #include <sys/systm.h>
32 #include <sys/traptrace.h>
33 #include <sys/x_call.h>
34 #include <sys/xc_levels.h>
35 #include <sys/avintr.h>
36 #include <sys/systm.h>
37 #include <sys/trap.h>
38 #include <sys/mutex.h>
39 #include <sys/mutex_impl.h>
40 #include "i86mmu.h"
41 #include "unix_sup.h"
42 #include <sys/apix.h>
43 #include <sys/x86_archext.h>
44 #include <sys/bitmap.h>
45 #include <sys/controlregs.h>
46 
47 #define	TT_HDLR_WIDTH	17
48 
49 
50 /* apix only */
51 static apix_impl_t *d_apixs[NCPU];
52 static int use_apix = 0;
53 
54 static int
55 ttrace_ttr_size_check(void)
56 {
57 	mdb_ctf_id_t ttrtid;
58 	ssize_t ttr_size;
59 
60 	if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
61 	    mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
62 		mdb_warn("failed to determine size of trap_trace_rec_t; "
63 		    "non-TRAPTRACE kernel?\n");
64 		return (0);
65 	}
66 
67 	if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
68 	    sizeof (trap_trace_rec_t)) {
69 		/*
70 		 * On Intel machines, this will happen when TTR_STACK_DEPTH
71 		 * is changed.  This code could be smarter, and could
72 		 * dynamically adapt to different depths, but not until a
73 		 * need for such adaptation is demonstrated.
74 		 */
75 		mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
76 		    "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
77 		return (0);
78 	}
79 
80 	return (1);
81 }
82 
83 int
84 ttrace_walk_init(mdb_walk_state_t *wsp)
85 {
86 	trap_trace_ctl_t *ttcp;
87 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
88 	int i;
89 
90 	if (!ttrace_ttr_size_check())
91 		return (WALK_ERR);
92 
93 	ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
94 
95 	if (wsp->walk_addr != 0) {
96 		mdb_warn("ttrace only supports global walks\n");
97 		return (WALK_ERR);
98 	}
99 
100 	if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
101 		mdb_warn("symbol 'trap_trace_ctl' not found; "
102 		    "non-TRAPTRACE kernel?\n");
103 		mdb_free(ttcp, ttc_size);
104 		return (WALK_ERR);
105 	}
106 
107 	/*
108 	 * We'll poach the ttc_current pointer (which isn't used for
109 	 * anything) to store a pointer to our current TRAPTRACE record.
110 	 * This allows us to only keep the array of trap_trace_ctl structures
111 	 * as our walker state (ttc_current may be the only kernel data
112 	 * structure member added exclusively to make writing the mdb walker
113 	 * a little easier).
114 	 */
115 	for (i = 0; i < NCPU; i++) {
116 		trap_trace_ctl_t *ttc = &ttcp[i];
117 
118 		if (ttc->ttc_first == 0)
119 			continue;
120 
121 		/*
122 		 * Assign ttc_current to be the last completed record.
123 		 * Note that the error checking (i.e. in the ttc_next ==
124 		 * ttc_first case) is performed in the step function.
125 		 */
126 		ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
127 	}
128 
129 	wsp->walk_data = ttcp;
130 	return (WALK_NEXT);
131 }
132 
133 int
134 ttrace_walk_step(mdb_walk_state_t *wsp)
135 {
136 	trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
137 	trap_trace_rec_t rec;
138 	int rval, i, recsize = sizeof (trap_trace_rec_t);
139 	hrtime_t latest = 0;
140 
141 	/*
142 	 * Loop through the CPUs, looking for the latest trap trace record
143 	 * (we want to walk through the trap trace records in reverse
144 	 * chronological order).
145 	 */
146 	for (i = 0; i < NCPU; i++) {
147 		ttc = &ttcp[i];
148 
149 		if (ttc->ttc_current == 0)
150 			continue;
151 
152 		if (ttc->ttc_current < ttc->ttc_first)
153 			ttc->ttc_current = ttc->ttc_limit - recsize;
154 
155 		if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
156 			mdb_warn("couldn't read rec at %p", ttc->ttc_current);
157 			return (WALK_ERR);
158 		}
159 
160 		if (rec.ttr_stamp > latest) {
161 			latest = rec.ttr_stamp;
162 			latest_ttc = ttc;
163 		}
164 	}
165 
166 	if (latest == 0)
167 		return (WALK_DONE);
168 
169 	ttc = latest_ttc;
170 
171 	if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
172 		mdb_warn("couldn't read rec at %p", ttc->ttc_current);
173 		return (WALK_ERR);
174 	}
175 
176 	rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
177 
178 	if (ttc->ttc_current == ttc->ttc_next)
179 		ttc->ttc_current = 0;
180 	else
181 		ttc->ttc_current -= sizeof (trap_trace_rec_t);
182 
183 	return (rval);
184 }
185 
186 void
187 ttrace_walk_fini(mdb_walk_state_t *wsp)
188 {
189 	mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
190 }
191 
192 static int
193 ttrace_syscall(trap_trace_rec_t *rec)
194 {
195 	GElf_Sym sym;
196 	int sysnum = rec->ttr_sysnum;
197 	uintptr_t addr;
198 	struct sysent sys;
199 
200 	mdb_printf("%-3x", sysnum);
201 
202 	if (rec->ttr_sysnum > NSYSCALL) {
203 		mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
204 		return (0);
205 	}
206 
207 	if (mdb_lookup_by_name("sysent", &sym) == -1) {
208 		mdb_warn("\ncouldn't find 'sysent'");
209 		return (-1);
210 	}
211 
212 	addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
213 
214 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
215 		mdb_warn("\nsysnum %d out-of-range\n", sysnum);
216 		return (-1);
217 	}
218 
219 	if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
220 		mdb_warn("\nfailed to read sysent at %p", addr);
221 		return (-1);
222 	}
223 
224 	mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
225 
226 	return (0);
227 }
228 
229 static int
230 ttrace_interrupt(trap_trace_rec_t *rec)
231 {
232 	GElf_Sym sym;
233 	uintptr_t addr;
234 	struct av_head hd;
235 	struct autovec av;
236 
237 	switch (rec->ttr_regs.r_trapno) {
238 	case T_SOFTINT:
239 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
240 		return (0);
241 	default:
242 		break;
243 	}
244 
245 	mdb_printf("%-3x ", rec->ttr_vector);
246 
247 	if (mdb_lookup_by_name("autovect", &sym) == -1) {
248 		mdb_warn("\ncouldn't find 'autovect'");
249 		return (-1);
250 	}
251 
252 	addr = (uintptr_t)sym.st_value +
253 	    rec->ttr_vector * sizeof (struct av_head);
254 
255 	if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
256 		mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
257 		return (-1);
258 	}
259 
260 	if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
261 		mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
262 		return (-1);
263 	}
264 
265 	if (hd.avh_link == NULL) {
266 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
267 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
268 		else
269 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
270 	} else {
271 		if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
272 			mdb_warn("couldn't read autovec at %p",
273 			    (uintptr_t)hd.avh_link);
274 		}
275 
276 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
277 	}
278 
279 	return (0);
280 }
281 
282 static int
283 ttrace_apix_interrupt(trap_trace_rec_t *rec)
284 {
285 	struct autovec av;
286 	apix_impl_t apix;
287 	apix_vector_t apix_vector;
288 
289 	switch (rec->ttr_regs.r_trapno) {
290 	case T_SOFTINT:
291 		mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
292 		return (0);
293 	default:
294 		break;
295 	}
296 
297 	mdb_printf("%-3x ", rec->ttr_vector);
298 
299 	/* Read the per CPU apix entry */
300 	if (mdb_vread(&apix, sizeof (apix_impl_t),
301 	    (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
302 		mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
303 		return (-1);
304 	}
305 	if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
306 	    (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
307 		mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
308 		return (-1);
309 	}
310 	if (apix_vector.v_share == 0) {
311 		if (rec->ttr_ipl == XC_CPUPOKE_PIL)
312 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
313 		else
314 			mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
315 	} else {
316 		if (mdb_vread(&av, sizeof (struct autovec),
317 		    (uintptr_t)(apix_vector.v_autovect)) == -1) {
318 			mdb_warn("couldn't read autovec at %p",
319 			    (uintptr_t)apix_vector.v_autovect);
320 		}
321 
322 		mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
323 	}
324 
325 	return (0);
326 }
327 
328 
329 static struct {
330 	int tt_trapno;
331 	char *tt_name;
332 } ttrace_traps[] = {
333 	{ T_ZERODIV,	"divide-error" },
334 	{ T_SGLSTP,	"debug-exception" },
335 	{ T_NMIFLT,	"nmi-interrupt" },
336 	{ T_BPTFLT,	"breakpoint" },
337 	{ T_OVFLW,	"into-overflow" },
338 	{ T_BOUNDFLT,	"bound-exceeded" },
339 	{ T_ILLINST,	"invalid-opcode" },
340 	{ T_NOEXTFLT,	"device-not-avail" },
341 	{ T_DBLFLT,	"double-fault" },
342 	{ T_EXTOVRFLT,	"segment-overrun" },
343 	{ T_TSSFLT,	"invalid-tss" },
344 	{ T_SEGFLT,	"segment-not-pres" },
345 	{ T_STKFLT,	"stack-fault" },
346 	{ T_GPFLT,	"general-protectn" },
347 	{ T_PGFLT,	"page-fault" },
348 	{ T_EXTERRFLT,	"error-fault" },
349 	{ T_ALIGNMENT,	"alignment-check" },
350 	{ T_MCE,	"machine-check" },
351 	{ T_SIMDFPE,	"sse-exception" },
352 
353 	{ T_DBGENTR,	"debug-enter" },
354 	{ T_FASTTRAP,	"fasttrap-0xd2" },
355 	{ T_SYSCALLINT,	"syscall-0x91" },
356 	{ T_DTRACE_RET,	"dtrace-ret" },
357 	{ T_SOFTINT,	"softint" },
358 	{ T_INTERRUPT,	"interrupt" },
359 	{ T_FAULT,	"fault" },
360 	{ T_AST,	"ast" },
361 	{ T_SYSCALL,	"syscall" },
362 
363 	{ 0,		NULL }
364 };
365 
366 static int
367 ttrace_trap(trap_trace_rec_t *rec)
368 {
369 	int i;
370 
371 	if (rec->ttr_regs.r_trapno == T_AST)
372 		mdb_printf("%-3s ", "-");
373 	else
374 		mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
375 
376 	for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
377 		if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
378 			break;
379 	}
380 
381 	if (ttrace_traps[i].tt_name == NULL)
382 		mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
383 	else
384 		mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
385 
386 	return (0);
387 }
388 
389 static void
390 ttrace_intr_detail(trap_trace_rec_t *rec)
391 {
392 	mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
393 	    rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
394 }
395 
396 static struct {
397 	uchar_t t_marker;
398 	char *t_name;
399 	int (*t_hdlr)(trap_trace_rec_t *);
400 } ttrace_hdlr[] = {
401 	{ TT_SYSCALL, "sysc", ttrace_syscall },
402 	{ TT_SYSENTER, "syse", ttrace_syscall },
403 	{ TT_SYSC, "asys", ttrace_syscall },
404 	{ TT_SYSC64, "sc64", ttrace_syscall },
405 	{ TT_INTERRUPT, "intr", ttrace_interrupt },
406 	{ TT_TRAP, "trap", ttrace_trap },
407 	{ TT_EVENT, "evnt", ttrace_trap },
408 	{ 0, NULL, NULL }
409 };
410 
411 typedef struct ttrace_dcmd {
412 	processorid_t ttd_cpu;
413 	uint_t ttd_extended;
414 	uintptr_t ttd_kthread;
415 	trap_trace_ctl_t ttd_ttc[NCPU];
416 } ttrace_dcmd_t;
417 
418 #if defined(__amd64)
419 
420 #define	DUMP(reg) #reg, regs->r_##reg
421 #define	THREEREGS	"         %3s: %16lx %3s: %16lx %3s: %16lx\n"
422 
423 static void
424 ttrace_dumpregs(trap_trace_rec_t *rec)
425 {
426 	struct regs *regs = &rec->ttr_regs;
427 
428 	mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
429 	mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
430 	mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
431 	mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
432 	mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
433 	mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
434 	mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
435 	mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
436 	mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
437 	mdb_printf("         %3s: %16lx %3s: %16lx\n",
438 	    "fsb", regs->__r_fsbase,
439 	    "gsb", regs->__r_gsbase);
440 	mdb_printf("\n");
441 }
442 
443 #else
444 
445 #define	DUMP(reg) #reg, regs->r_##reg
446 #define	FOURREGS	"         %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
447 
448 static void
449 ttrace_dumpregs(trap_trace_rec_t *rec)
450 {
451 	struct regs *regs = &rec->ttr_regs;
452 
453 	mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
454 	mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
455 	mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
456 	mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
457 	    DUMP(pc), DUMP(cs));
458 	mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
459 	    "cr2", rec->ttr_cr2);
460 	mdb_printf("\n");
461 }
462 
463 #endif	/* __amd64 */
464 
465 int
466 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
467 {
468 	struct regs *regs = &rec->ttr_regs;
469 	processorid_t cpu = -1, i;
470 
471 	for (i = 0; i < NCPU; i++) {
472 		if (addr >= dcmd->ttd_ttc[i].ttc_first &&
473 		    addr < dcmd->ttd_ttc[i].ttc_limit) {
474 			cpu = i;
475 			break;
476 		}
477 	}
478 
479 	if (cpu == -1) {
480 		mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
481 		return (WALK_ERR);
482 	}
483 
484 	if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
485 		return (WALK_NEXT);
486 
487 	if (dcmd->ttd_kthread != 0 &&
488 	    dcmd->ttd_kthread != rec->ttr_curthread)
489 		return (WALK_NEXT);
490 
491 	mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
492 
493 	for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
494 		if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
495 			continue;
496 		mdb_printf("%4s ", ttrace_hdlr[i].t_name);
497 		if (ttrace_hdlr[i].t_hdlr(rec) == -1)
498 			return (WALK_ERR);
499 	}
500 
501 	mdb_printf(" %a\n", regs->r_pc);
502 
503 	if (dcmd->ttd_extended == FALSE)
504 		return (WALK_NEXT);
505 
506 	if (rec->ttr_marker == TT_INTERRUPT)
507 		ttrace_intr_detail(rec);
508 	else
509 		ttrace_dumpregs(rec);
510 
511 	if (rec->ttr_sdepth > 0) {
512 		for (i = 0; i < rec->ttr_sdepth; i++) {
513 			if (i >= TTR_STACK_DEPTH) {
514 				mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
515 				    "should be <= %d)\n", " ", rec->ttr_sdepth,
516 				    TTR_STACK_DEPTH);
517 				break;
518 			}
519 
520 			mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
521 		}
522 		mdb_printf("\n");
523 	}
524 
525 	return (WALK_NEXT);
526 }
527 
528 int
529 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
530 {
531 	ttrace_dcmd_t dcmd;
532 	trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
533 	trap_trace_rec_t rec;
534 	size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
535 
536 	if (!ttrace_ttr_size_check())
537 		return (WALK_ERR);
538 
539 	bzero(&dcmd, sizeof (dcmd));
540 	dcmd.ttd_cpu = -1;
541 	dcmd.ttd_extended = FALSE;
542 
543 	if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
544 		mdb_warn("symbol 'trap_trace_ctl' not found; "
545 		    "non-TRAPTRACE kernel?\n");
546 		return (DCMD_ERR);
547 	}
548 
549 	if (mdb_getopts(argc, argv,
550 	    'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
551 	    't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
552 		return (DCMD_USAGE);
553 
554 	if (DCMD_HDRSPEC(flags)) {
555 		mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
556 		    "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
557 		    " EIP");
558 	}
559 
560 	if (flags & DCMD_ADDRSPEC) {
561 		if (addr >= NCPU) {
562 			if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
563 				mdb_warn("couldn't read trap trace record "
564 				    "at %p", addr);
565 				return (DCMD_ERR);
566 			}
567 
568 			if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
569 				return (DCMD_ERR);
570 
571 			return (DCMD_OK);
572 		}
573 		dcmd.ttd_cpu = addr;
574 	}
575 
576 	if (mdb_readvar(&use_apix, "apix_enable") == -1) {
577 		mdb_warn("failed to read apix_enable");
578 		use_apix = 0;
579 	}
580 
581 	if (use_apix) {
582 		if (mdb_readvar(&d_apixs, "apixs") == -1) {
583 			mdb_warn("\nfailed to read apixs.");
584 			return (DCMD_ERR);
585 		}
586 		/* change to apix ttrace interrupt handler */
587 		ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
588 	}
589 
590 	if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
591 		mdb_warn("couldn't walk 'ttrace'");
592 		return (DCMD_ERR);
593 	}
594 
595 	return (DCMD_OK);
596 }
597 
598 /*ARGSUSED*/
599 int
600 mutex_owner_init(mdb_walk_state_t *wsp)
601 {
602 	return (WALK_NEXT);
603 }
604 
605 int
606 mutex_owner_step(mdb_walk_state_t *wsp)
607 {
608 	uintptr_t addr = wsp->walk_addr;
609 	mutex_impl_t mtx;
610 	uintptr_t owner;
611 	kthread_t thr;
612 
613 	if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
614 		return (WALK_ERR);
615 
616 	if (!MUTEX_TYPE_ADAPTIVE(&mtx))
617 		return (WALK_DONE);
618 
619 	if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == 0)
620 		return (WALK_DONE);
621 
622 	if (mdb_vread(&thr, sizeof (thr), owner) != -1)
623 		(void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
624 
625 	return (WALK_DONE);
626 }
627 
628 static void
629 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
630 {
631 	const char *lastnm;
632 	uint_t lastval;
633 	char type[4];
634 
635 	switch (gate->sgd_type) {
636 	case SDT_SYSIGT:
637 		strcpy(type, "int");
638 		break;
639 	case SDT_SYSTGT:
640 		strcpy(type, "trp");
641 		break;
642 	case SDT_SYSTASKGT:
643 		strcpy(type, "tsk");
644 		break;
645 	default:
646 		(void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
647 	}
648 
649 #if defined(__amd64)
650 	lastnm = "IST";
651 	lastval = gate->sgd_ist;
652 #else
653 	lastnm = "STK";
654 	lastval = gate->sgd_stkcpy;
655 #endif
656 
657 	if (header) {
658 		mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
659 		    "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
660 		    "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
661 	}
662 
663 	mdb_printf("%s", label);
664 
665 	if (gate->sgd_type == SDT_SYSTASKGT)
666 		mdb_printf("%-30s ", "-");
667 	else
668 		mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
669 
670 	mdb_printf("%4x  %d  %c %3s %2x\n", gate->sgd_selector,
671 	    gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
672 }
673 
674 /*ARGSUSED*/
675 static int
676 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
677 {
678 	gate_desc_t gate;
679 
680 	if (argc != 0 || !(flags & DCMD_ADDRSPEC))
681 		return (DCMD_USAGE);
682 
683 	if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
684 	    sizeof (gate_desc_t)) {
685 		mdb_warn("failed to read gate descriptor at %p\n", addr);
686 		return (DCMD_ERR);
687 	}
688 
689 	gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
690 
691 	return (DCMD_OK);
692 }
693 
694 /*ARGSUSED*/
695 static int
696 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
697 {
698 	int i;
699 
700 	if (!(flags & DCMD_ADDRSPEC)) {
701 		GElf_Sym idt0_va;
702 		gate_desc_t *idt0;
703 
704 		if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
705 			mdb_warn("failed to find VA of idt0");
706 			return (DCMD_ERR);
707 		}
708 
709 		addr = idt0_va.st_value;
710 		if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
711 			mdb_warn("failed to read idt0 at %p\n", addr);
712 			return (DCMD_ERR);
713 		}
714 
715 		addr = (uintptr_t)idt0;
716 	}
717 
718 	for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
719 		gate_desc_t gate;
720 		char label[6];
721 
722 		if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
723 		    sizeof (gate_desc_t)) {
724 			mdb_warn("failed to read gate descriptor at %p\n",
725 			    addr);
726 			return (DCMD_ERR);
727 		}
728 
729 		(void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
730 		gate_desc_dump(&gate, label, i == 0);
731 	}
732 
733 	return (DCMD_OK);
734 }
735 
736 static void
737 htables_help(void)
738 {
739 	mdb_printf(
740 	    "Given a (hat_t *), generates the list of all (htable_t *)s\n"
741 	    "that correspond to that address space\n");
742 }
743 
744 static void
745 report_maps_help(void)
746 {
747 	mdb_printf(
748 	    "Given a PFN, report HAT structures that map the page, or use\n"
749 	    "the page as a pagetable.\n"
750 	    "\n"
751 	    "-m Interpret the PFN as an MFN (machine frame number)\n");
752 }
753 
754 static void
755 ptable_help(void)
756 {
757 	mdb_printf(
758 	    "Given a PFN holding a page table, print its contents, and\n"
759 	    "the address of the corresponding htable structure.\n"
760 	    "\n"
761 	    "-m Interpret the PFN as an MFN (machine frame number)\n"
762 	    "-l force page table level (3 is top)\n");
763 }
764 
765 static void
766 ptmap_help(void)
767 {
768 	mdb_printf(
769 	    "Report all mappings represented by the page table hierarchy\n"
770 	    "rooted at the given cr3 value / physical address.\n"
771 	    "\n"
772 	    "-w run ::whatis on mapping start addresses\n");
773 }
774 
775 static const char *const scalehrtime_desc =
776 	"Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n"
777 	"are used as both a quick way of accumulating relative time (as for\n"
778 	"usage) and as a quick way of getting the absolute current time.\n"
779 	"These uses require slightly different scaling algorithms. By\n"
780 	"default, if a specified time is greater than half of the unscaled\n"
781 	"time at the last tick (that is, if the unscaled time represents\n"
782 	"more than half the time since boot), the timestamp is assumed to\n"
783 	"be absolute, and the scaling algorithm used mimics that which the\n"
784 	"kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n"
785 	"be relative, and the algorithm mimics scalehrtime(). This behavior\n"
786 	"can be overridden by forcing the unscaled time to be interpreted\n"
787 	"as relative (via -r) or absolute (via -a).\n";
788 
789 static void
790 scalehrtime_help(void)
791 {
792 	mdb_printf("%s", scalehrtime_desc);
793 }
794 
795 /*
796  * NSEC_SHIFT is replicated here (it is not defined in a header file),
797  * but for amusement, the reader is directed to the comment that explains
798  * the rationale for this particular value on x86.  Spoiler:  the value is
799  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
800  * in that comment sounds too familiar, it's because your author also wrote
801  * that code -- some fifteen years prior to this writing in 2011...)
802  */
803 #define	NSEC_SHIFT 5
804 
805 /*ARGSUSED*/
806 static int
807 scalehrtime_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
808 {
809 	uint32_t nsec_scale;
810 	hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1;
811 	unsigned int *tscp = (unsigned int *)&tsc;
812 	uintptr_t scalehrtimef;
813 	uint64_t scale;
814 	GElf_Sym sym;
815 	int expected = !(flags & DCMD_ADDRSPEC);
816 	uint_t absolute = FALSE, relative = FALSE;
817 
818 	if (mdb_getopts(argc, argv,
819 	    'a', MDB_OPT_SETBITS, TRUE, &absolute,
820 	    'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected)
821 		return (DCMD_USAGE);
822 
823 	if (absolute && relative) {
824 		mdb_warn("can't specify both -a and -r\n");
825 		return (DCMD_USAGE);
826 	}
827 
828 	if (expected == 1) {
829 		switch (argv[argc - 1].a_type) {
830 		case MDB_TYPE_STRING:
831 			tsc = mdb_strtoull(argv[argc - 1].a_un.a_str);
832 			break;
833 		case MDB_TYPE_IMMEDIATE:
834 			tsc = argv[argc - 1].a_un.a_val;
835 			break;
836 		default:
837 			return (DCMD_USAGE);
838 		}
839 	}
840 
841 	if (mdb_readsym(&scalehrtimef,
842 	    sizeof (scalehrtimef), "scalehrtimef") == -1) {
843 		mdb_warn("couldn't read 'scalehrtimef'");
844 		return (DCMD_ERR);
845 	}
846 
847 	if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
848 		mdb_warn("couldn't find 'tsc_scalehrtime'");
849 		return (DCMD_ERR);
850 	}
851 
852 	if (sym.st_value != scalehrtimef) {
853 		mdb_warn("::scalehrtime requires that scalehrtimef "
854 		    "be set to tsc_scalehrtime\n");
855 		return (DCMD_ERR);
856 	}
857 
858 	if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
859 		mdb_warn("couldn't read 'nsec_scale'");
860 		return (DCMD_ERR);
861 	}
862 
863 	if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) {
864 		mdb_warn("couldn't read 'tsc_last'");
865 		return (DCMD_ERR);
866 	}
867 
868 	if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) {
869 		mdb_warn("couldn't read 'tsc_hrtime_base'");
870 		return (DCMD_ERR);
871 	}
872 
873 	/*
874 	 * If our time is greater than half of tsc_last, we will take our
875 	 * delta against tsc_last, convert it, and add that to (or subtract it
876 	 * from) tsc_hrtime_base.  This mimics what the kernel actually does
877 	 * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much
878 	 * higher precision result than trying to convert a large tsc value.
879 	 */
880 	if (absolute || (tsc > (tsc_last >> 1) && !relative)) {
881 		if (tsc > tsc_last) {
882 			tsc = tsc - tsc_last;
883 		} else {
884 			tsc = tsc_last - tsc;
885 			mult = -1;
886 		}
887 	} else {
888 		base = 0;
889 	}
890 
891 	scale = (uint64_t)nsec_scale;
892 
893 	hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
894 	hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
895 
896 	mdb_printf("0x%llx\n", base + (hrt * mult));
897 
898 	return (DCMD_OK);
899 }
900 
901 /*
902  * The x86 feature set is implemented as a bitmap array. That bitmap array is
903  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
904  * macro. We have the names for each of these features in unix's text segment
905  * so we do not have to duplicate them and instead just look them up.
906  */
907 /*ARGSUSED*/
908 static int
909 x86_featureset_dcmd(uintptr_t addr, uint_t flags, int argc,
910     const mdb_arg_t *argv)
911 {
912 	void *fset;
913 	GElf_Sym sym;
914 	uintptr_t nptr;
915 	char name[128];
916 	int ii;
917 
918 	size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
919 
920 	if (argc != 0)
921 		return (DCMD_USAGE);
922 
923 	if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
924 		mdb_warn("couldn't find x86_feature_names");
925 		return (DCMD_ERR);
926 	}
927 
928 	fset = mdb_zalloc(sz, UM_NOSLEEP);
929 	if (fset == NULL) {
930 		mdb_warn("failed to allocate memory for x86_featureset");
931 		return (DCMD_ERR);
932 	}
933 
934 	if (flags & DCMD_ADDRSPEC) {
935 		if (mdb_vread(fset, sz, addr) != sz) {
936 			mdb_warn("failed to read x86_featureset from %p", addr);
937 			mdb_free(fset, sz);
938 			return (DCMD_ERR);
939 		}
940 	} else {
941 		if (mdb_readvar(fset, "x86_featureset") != sz) {
942 			mdb_warn("failed to read x86_featureset");
943 			mdb_free(fset, sz);
944 			return (DCMD_ERR);
945 		}
946 	}
947 
948 	for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
949 		if (!BT_TEST((ulong_t *)fset, ii))
950 			continue;
951 
952 		if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
953 		    sizeof (void *) * ii) != sizeof (char *)) {
954 			mdb_warn("failed to read feature array %d", ii);
955 			mdb_free(fset, sz);
956 			return (DCMD_ERR);
957 		}
958 
959 		if (mdb_readstr(name, sizeof (name), nptr) == -1) {
960 			mdb_printf("unknown feature 0x%x\n", ii);
961 		} else {
962 			mdb_printf("%s\n", name);
963 		}
964 	}
965 
966 	mdb_free(fset, sz);
967 	return (DCMD_OK);
968 }
969 
970 #ifdef _KMDB
971 /* ARGSUSED */
972 static int
973 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
974 {
975 	struct sysregs sregs = { 0 };
976 	desctbr_t gdtr;
977 	boolean_t longmode = B_FALSE;
978 
979 #ifdef __amd64
980 	longmode = B_TRUE;
981 #endif
982 
983 	sregs.sr_cr0 = kmdb_unix_getcr0();
984 	sregs.sr_cr2 = kmdb_unix_getcr2();
985 	sregs.sr_cr3 = kmdb_unix_getcr3();
986 	sregs.sr_cr4 = kmdb_unix_getcr4();
987 
988 	kmdb_unix_getgdtr(&gdtr);
989 	sregs.sr_gdtr.d_base = gdtr.dtr_base;
990 	sregs.sr_gdtr.d_lim = gdtr.dtr_limit;
991 
992 	mdb_x86_print_sysregs(&sregs, longmode);
993 
994 	return (DCMD_OK);
995 }
996 #endif
997 
998 extern void xcall_help(void);
999 extern int xcall_dcmd(uintptr_t, uint_t, int, const mdb_arg_t *);
1000 
1001 static const mdb_dcmd_t dcmds[] = {
1002 	{ "gate_desc", ":", "dump a gate descriptor", gate_desc },
1003 	{ "idt", ":[-v]", "dump an IDT", idt },
1004 	{ "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
1005 	{ "vatopfn", ":[-a as]", "translate address to physical page",
1006 	    va2pfn_dcmd },
1007 	{ "report_maps", ":[-m]",
1008 	    "Given PFN, report mappings / page table usage",
1009 	    report_maps_dcmd, report_maps_help },
1010 	{ "htables", "", "Given hat_t *, lists all its htable_t * values",
1011 	    htables_dcmd, htables_help },
1012 	{ "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
1013 	    ptable_dcmd, ptable_help },
1014 	{ "ptmap", ":", "Given a cr3 value, dump all mappings",
1015 	    ptmap_dcmd, ptmap_help },
1016 	{ "pte", ":[-l N]", "print human readable page table entry",
1017 	    pte_dcmd },
1018 	{ "pfntomfn", ":", "convert physical page to hypervisor machine page",
1019 	    pfntomfn_dcmd },
1020 	{ "mfntopfn", ":", "convert hypervisor machine page to physical page",
1021 	    mfntopfn_dcmd },
1022 	{ "memseg_list", ":", "show memseg list", memseg_list },
1023 	{ "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time",
1024 	    scalehrtime_dcmd, scalehrtime_help },
1025 	{ "x86_featureset", ":", "dump the x86_featureset vector",
1026 		x86_featureset_dcmd },
1027 	{ "xcall", ":", "print CPU cross-call state", xcall_dcmd, xcall_help },
1028 #ifdef _KMDB
1029 	{ "sysregs", NULL, "dump system registers", sysregs_dcmd },
1030 #endif
1031 	{ NULL }
1032 };
1033 
1034 static const mdb_walker_t walkers[] = {
1035 	{ "ttrace", "walks trap trace buffers in reverse chronological order",
1036 		ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1037 	{ "mutex_owner", "walks the owner of a mutex",
1038 		mutex_owner_init, mutex_owner_step },
1039 	{ "memseg", "walk the memseg structures",
1040 		memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1041 	{ NULL }
1042 };
1043 
1044 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1045 
1046 const mdb_modinfo_t *
1047 _mdb_init(void)
1048 {
1049 	return (&modinfo);
1050 }
1051 
1052 void
1053 _mdb_fini(void)
1054 {
1055 	free_mmu();
1056 }
1057