1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
24 * Copyright 2019 Joyent, Inc.
25 */
26
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <mdb/mdb_x86util.h>
30 #include <sys/cpuvar.h>
31 #include <sys/systm.h>
32 #include <sys/traptrace.h>
33 #include <sys/x_call.h>
34 #include <sys/xc_levels.h>
35 #include <sys/avintr.h>
36 #include <sys/systm.h>
37 #include <sys/trap.h>
38 #include <sys/mutex.h>
39 #include <sys/mutex_impl.h>
40 #include "i86mmu.h"
41 #include "unix_sup.h"
42 #include <sys/apix.h>
43 #include <sys/x86_archext.h>
44 #include <sys/bitmap.h>
45 #include <sys/controlregs.h>
46
47 #define TT_HDLR_WIDTH 17
48
49
50 /* apix only */
51 static apix_impl_t *d_apixs[NCPU];
52 static int use_apix = 0;
53
54 static int
ttrace_ttr_size_check(void)55 ttrace_ttr_size_check(void)
56 {
57 mdb_ctf_id_t ttrtid;
58 ssize_t ttr_size;
59
60 if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 ||
61 mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) {
62 mdb_warn("failed to determine size of trap_trace_rec_t; "
63 "non-TRAPTRACE kernel?\n");
64 return (0);
65 }
66
67 if ((ttr_size = mdb_ctf_type_size(ttrtid)) !=
68 sizeof (trap_trace_rec_t)) {
69 /*
70 * On Intel machines, this will happen when TTR_STACK_DEPTH
71 * is changed. This code could be smarter, and could
72 * dynamically adapt to different depths, but not until a
73 * need for such adaptation is demonstrated.
74 */
75 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't "
76 "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t));
77 return (0);
78 }
79
80 return (1);
81 }
82
83 int
ttrace_walk_init(mdb_walk_state_t * wsp)84 ttrace_walk_init(mdb_walk_state_t *wsp)
85 {
86 trap_trace_ctl_t *ttcp;
87 size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
88 int i;
89
90 if (!ttrace_ttr_size_check())
91 return (WALK_ERR);
92
93 ttcp = mdb_zalloc(ttc_size, UM_SLEEP);
94
95 if (wsp->walk_addr != 0) {
96 mdb_warn("ttrace only supports global walks\n");
97 return (WALK_ERR);
98 }
99
100 if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) {
101 mdb_warn("symbol 'trap_trace_ctl' not found; "
102 "non-TRAPTRACE kernel?\n");
103 mdb_free(ttcp, ttc_size);
104 return (WALK_ERR);
105 }
106
107 /*
108 * We'll poach the ttc_current pointer (which isn't used for
109 * anything) to store a pointer to our current TRAPTRACE record.
110 * This allows us to only keep the array of trap_trace_ctl structures
111 * as our walker state (ttc_current may be the only kernel data
112 * structure member added exclusively to make writing the mdb walker
113 * a little easier).
114 */
115 for (i = 0; i < NCPU; i++) {
116 trap_trace_ctl_t *ttc = &ttcp[i];
117
118 if (ttc->ttc_first == 0)
119 continue;
120
121 /*
122 * Assign ttc_current to be the last completed record.
123 * Note that the error checking (i.e. in the ttc_next ==
124 * ttc_first case) is performed in the step function.
125 */
126 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t);
127 }
128
129 wsp->walk_data = ttcp;
130 return (WALK_NEXT);
131 }
132
133 int
ttrace_walk_step(mdb_walk_state_t * wsp)134 ttrace_walk_step(mdb_walk_state_t *wsp)
135 {
136 trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc;
137 trap_trace_rec_t rec;
138 int rval, i, recsize = sizeof (trap_trace_rec_t);
139 hrtime_t latest = 0;
140
141 /*
142 * Loop through the CPUs, looking for the latest trap trace record
143 * (we want to walk through the trap trace records in reverse
144 * chronological order).
145 */
146 for (i = 0; i < NCPU; i++) {
147 ttc = &ttcp[i];
148
149 if (ttc->ttc_current == 0)
150 continue;
151
152 if (ttc->ttc_current < ttc->ttc_first)
153 ttc->ttc_current = ttc->ttc_limit - recsize;
154
155 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
156 mdb_warn("couldn't read rec at %p", ttc->ttc_current);
157 return (WALK_ERR);
158 }
159
160 if (rec.ttr_stamp > latest) {
161 latest = rec.ttr_stamp;
162 latest_ttc = ttc;
163 }
164 }
165
166 if (latest == 0)
167 return (WALK_DONE);
168
169 ttc = latest_ttc;
170
171 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) {
172 mdb_warn("couldn't read rec at %p", ttc->ttc_current);
173 return (WALK_ERR);
174 }
175
176 rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata);
177
178 if (ttc->ttc_current == ttc->ttc_next)
179 ttc->ttc_current = 0;
180 else
181 ttc->ttc_current -= sizeof (trap_trace_rec_t);
182
183 return (rval);
184 }
185
186 void
ttrace_walk_fini(mdb_walk_state_t * wsp)187 ttrace_walk_fini(mdb_walk_state_t *wsp)
188 {
189 mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU);
190 }
191
192 static int
ttrace_syscall(trap_trace_rec_t * rec)193 ttrace_syscall(trap_trace_rec_t *rec)
194 {
195 GElf_Sym sym;
196 int sysnum = rec->ttr_sysnum;
197 uintptr_t addr;
198 struct sysent sys;
199
200 mdb_printf("%-3x", sysnum);
201
202 if (rec->ttr_sysnum > NSYSCALL) {
203 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum);
204 return (0);
205 }
206
207 if (mdb_lookup_by_name("sysent", &sym) == -1) {
208 mdb_warn("\ncouldn't find 'sysent'");
209 return (-1);
210 }
211
212 addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent);
213
214 if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
215 mdb_warn("\nsysnum %d out-of-range\n", sysnum);
216 return (-1);
217 }
218
219 if (mdb_vread(&sys, sizeof (sys), addr) == -1) {
220 mdb_warn("\nfailed to read sysent at %p", addr);
221 return (-1);
222 }
223
224 mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc);
225
226 return (0);
227 }
228
229 static int
ttrace_interrupt(trap_trace_rec_t * rec)230 ttrace_interrupt(trap_trace_rec_t *rec)
231 {
232 GElf_Sym sym;
233 uintptr_t addr;
234 struct av_head hd;
235 struct autovec av;
236
237 switch (rec->ttr_regs.r_trapno) {
238 case T_SOFTINT:
239 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
240 return (0);
241 default:
242 break;
243 }
244
245 mdb_printf("%-3x ", rec->ttr_vector);
246
247 if (mdb_lookup_by_name("autovect", &sym) == -1) {
248 mdb_warn("\ncouldn't find 'autovect'");
249 return (-1);
250 }
251
252 addr = (uintptr_t)sym.st_value +
253 rec->ttr_vector * sizeof (struct av_head);
254
255 if (addr >= (uintptr_t)sym.st_value + sym.st_size) {
256 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector);
257 return (-1);
258 }
259
260 if (mdb_vread(&hd, sizeof (hd), addr) == -1) {
261 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector);
262 return (-1);
263 }
264
265 if (hd.avh_link == NULL) {
266 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
267 mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
268 else
269 mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
270 } else {
271 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) {
272 mdb_warn("couldn't read autovec at %p",
273 (uintptr_t)hd.avh_link);
274 }
275
276 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
277 }
278
279 return (0);
280 }
281
282 static int
ttrace_apix_interrupt(trap_trace_rec_t * rec)283 ttrace_apix_interrupt(trap_trace_rec_t *rec)
284 {
285 struct autovec av;
286 apix_impl_t apix;
287 apix_vector_t apix_vector;
288
289 switch (rec->ttr_regs.r_trapno) {
290 case T_SOFTINT:
291 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)");
292 return (0);
293 default:
294 break;
295 }
296
297 mdb_printf("%-3x ", rec->ttr_vector);
298
299 /* Read the per CPU apix entry */
300 if (mdb_vread(&apix, sizeof (apix_impl_t),
301 (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) {
302 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid);
303 return (-1);
304 }
305 if (mdb_vread(&apix_vector, sizeof (apix_vector_t),
306 (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) {
307 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector);
308 return (-1);
309 }
310 if (apix_vector.v_share == 0) {
311 if (rec->ttr_ipl == XC_CPUPOKE_PIL)
312 mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)");
313 else
314 mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)");
315 } else {
316 if (mdb_vread(&av, sizeof (struct autovec),
317 (uintptr_t)(apix_vector.v_autovect)) == -1) {
318 mdb_warn("couldn't read autovec at %p",
319 (uintptr_t)apix_vector.v_autovect);
320 }
321
322 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector);
323 }
324
325 return (0);
326 }
327
328
329 static struct {
330 int tt_trapno;
331 char *tt_name;
332 } ttrace_traps[] = {
333 { T_ZERODIV, "divide-error" },
334 { T_SGLSTP, "debug-exception" },
335 { T_NMIFLT, "nmi-interrupt" },
336 { T_BPTFLT, "breakpoint" },
337 { T_OVFLW, "into-overflow" },
338 { T_BOUNDFLT, "bound-exceeded" },
339 { T_ILLINST, "invalid-opcode" },
340 { T_NOEXTFLT, "device-not-avail" },
341 { T_DBLFLT, "double-fault" },
342 { T_EXTOVRFLT, "segment-overrun" },
343 { T_TSSFLT, "invalid-tss" },
344 { T_SEGFLT, "segment-not-pres" },
345 { T_STKFLT, "stack-fault" },
346 { T_GPFLT, "general-protectn" },
347 { T_PGFLT, "page-fault" },
348 { T_EXTERRFLT, "error-fault" },
349 { T_ALIGNMENT, "alignment-check" },
350 { T_MCE, "machine-check" },
351 { T_SIMDFPE, "sse-exception" },
352
353 { T_DBGENTR, "debug-enter" },
354 { T_FASTTRAP, "fasttrap-0xd2" },
355 { T_SYSCALLINT, "syscall-0x91" },
356 { T_DTRACE_RET, "dtrace-ret" },
357 { T_SOFTINT, "softint" },
358 { T_INTERRUPT, "interrupt" },
359 { T_FAULT, "fault" },
360 { T_AST, "ast" },
361 { T_SYSCALL, "syscall" },
362
363 { 0, NULL }
364 };
365
366 static int
ttrace_trap(trap_trace_rec_t * rec)367 ttrace_trap(trap_trace_rec_t *rec)
368 {
369 int i;
370
371 if (rec->ttr_regs.r_trapno == T_AST)
372 mdb_printf("%-3s ", "-");
373 else
374 mdb_printf("%-3x ", rec->ttr_regs.r_trapno);
375
376 for (i = 0; ttrace_traps[i].tt_name != NULL; i++) {
377 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno)
378 break;
379 }
380
381 if (ttrace_traps[i].tt_name == NULL)
382 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)");
383 else
384 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name);
385
386 return (0);
387 }
388
389 static void
ttrace_intr_detail(trap_trace_rec_t * rec)390 ttrace_intr_detail(trap_trace_rec_t *rec)
391 {
392 mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector,
393 rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl);
394 }
395
396 static struct {
397 uchar_t t_marker;
398 char *t_name;
399 int (*t_hdlr)(trap_trace_rec_t *);
400 } ttrace_hdlr[] = {
401 { TT_SYSCALL, "sysc", ttrace_syscall },
402 { TT_SYSENTER, "syse", ttrace_syscall },
403 { TT_SYSC, "asys", ttrace_syscall },
404 { TT_SYSC64, "sc64", ttrace_syscall },
405 { TT_INTERRUPT, "intr", ttrace_interrupt },
406 { TT_TRAP, "trap", ttrace_trap },
407 { TT_EVENT, "evnt", ttrace_trap },
408 { 0, NULL, NULL }
409 };
410
411 typedef struct ttrace_dcmd {
412 processorid_t ttd_cpu;
413 uint_t ttd_extended;
414 uintptr_t ttd_kthread;
415 trap_trace_ctl_t ttd_ttc[NCPU];
416 } ttrace_dcmd_t;
417
418 #if defined(__amd64)
419
420 #define DUMP(reg) #reg, regs->r_##reg
421 #define THREEREGS " %3s: %16lx %3s: %16lx %3s: %16lx\n"
422
423 static void
ttrace_dumpregs(trap_trace_rec_t * rec)424 ttrace_dumpregs(trap_trace_rec_t *rec)
425 {
426 struct regs *regs = &rec->ttr_regs;
427
428 mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx));
429 mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9));
430 mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp));
431 mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12));
432 mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15));
433 mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs));
434 mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err));
435 mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl));
436 mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2);
437 mdb_printf(" %3s: %16lx %3s: %16lx\n",
438 "fsb", regs->__r_fsbase,
439 "gsb", regs->__r_gsbase);
440 mdb_printf("\n");
441 }
442
443 #else
444
445 #define DUMP(reg) #reg, regs->r_##reg
446 #define FOURREGS " %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n"
447
448 static void
ttrace_dumpregs(trap_trace_rec_t * rec)449 ttrace_dumpregs(trap_trace_rec_t *rec)
450 {
451 struct regs *regs = &rec->ttr_regs;
452
453 mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds));
454 mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp));
455 mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax));
456 mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err),
457 DUMP(pc), DUMP(cs));
458 mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss),
459 "cr2", rec->ttr_cr2);
460 mdb_printf("\n");
461 }
462
463 #endif /* __amd64 */
464
465 int
ttrace_walk(uintptr_t addr,trap_trace_rec_t * rec,ttrace_dcmd_t * dcmd)466 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd)
467 {
468 struct regs *regs = &rec->ttr_regs;
469 processorid_t cpu = -1, i;
470
471 for (i = 0; i < NCPU; i++) {
472 if (addr >= dcmd->ttd_ttc[i].ttc_first &&
473 addr < dcmd->ttd_ttc[i].ttc_limit) {
474 cpu = i;
475 break;
476 }
477 }
478
479 if (cpu == -1) {
480 mdb_warn("couldn't find %p in any trap trace ctl\n", addr);
481 return (WALK_ERR);
482 }
483
484 if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu)
485 return (WALK_NEXT);
486
487 if (dcmd->ttd_kthread != 0 &&
488 dcmd->ttd_kthread != rec->ttr_curthread)
489 return (WALK_NEXT);
490
491 mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp);
492
493 for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) {
494 if (rec->ttr_marker != ttrace_hdlr[i].t_marker)
495 continue;
496 mdb_printf("%4s ", ttrace_hdlr[i].t_name);
497 if (ttrace_hdlr[i].t_hdlr(rec) == -1)
498 return (WALK_ERR);
499 }
500
501 mdb_printf(" %a\n", regs->r_pc);
502
503 if (dcmd->ttd_extended == FALSE)
504 return (WALK_NEXT);
505
506 if (rec->ttr_marker == TT_INTERRUPT)
507 ttrace_intr_detail(rec);
508 else
509 ttrace_dumpregs(rec);
510
511 if (rec->ttr_sdepth > 0) {
512 for (i = 0; i < rec->ttr_sdepth; i++) {
513 if (i >= TTR_STACK_DEPTH) {
514 mdb_printf("%17s*** invalid ttr_sdepth (is %d, "
515 "should be <= %d)\n", " ", rec->ttr_sdepth,
516 TTR_STACK_DEPTH);
517 break;
518 }
519
520 mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]);
521 }
522 mdb_printf("\n");
523 }
524
525 return (WALK_NEXT);
526 }
527
528 int
ttrace(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)529 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
530 {
531 ttrace_dcmd_t dcmd;
532 trap_trace_ctl_t *ttc = dcmd.ttd_ttc;
533 trap_trace_rec_t rec;
534 size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU;
535
536 if (!ttrace_ttr_size_check())
537 return (WALK_ERR);
538
539 bzero(&dcmd, sizeof (dcmd));
540 dcmd.ttd_cpu = -1;
541 dcmd.ttd_extended = FALSE;
542
543 if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) {
544 mdb_warn("symbol 'trap_trace_ctl' not found; "
545 "non-TRAPTRACE kernel?\n");
546 return (DCMD_ERR);
547 }
548
549 if (mdb_getopts(argc, argv,
550 'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended,
551 't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc)
552 return (DCMD_USAGE);
553
554 if (DCMD_HDRSPEC(flags)) {
555 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU",
556 "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER",
557 " EIP");
558 }
559
560 if (flags & DCMD_ADDRSPEC) {
561 if (addr >= NCPU) {
562 if (mdb_vread(&rec, sizeof (rec), addr) == -1) {
563 mdb_warn("couldn't read trap trace record "
564 "at %p", addr);
565 return (DCMD_ERR);
566 }
567
568 if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR)
569 return (DCMD_ERR);
570
571 return (DCMD_OK);
572 }
573 dcmd.ttd_cpu = addr;
574 }
575
576 if (mdb_readvar(&use_apix, "apix_enable") == -1) {
577 mdb_warn("failed to read apix_enable");
578 use_apix = 0;
579 }
580
581 if (use_apix) {
582 if (mdb_readvar(&d_apixs, "apixs") == -1) {
583 mdb_warn("\nfailed to read apixs.");
584 return (DCMD_ERR);
585 }
586 /* change to apix ttrace interrupt handler */
587 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt;
588 }
589
590 if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) {
591 mdb_warn("couldn't walk 'ttrace'");
592 return (DCMD_ERR);
593 }
594
595 return (DCMD_OK);
596 }
597
598 /*ARGSUSED*/
599 int
mutex_owner_init(mdb_walk_state_t * wsp)600 mutex_owner_init(mdb_walk_state_t *wsp)
601 {
602 return (WALK_NEXT);
603 }
604
605 int
mutex_owner_step(mdb_walk_state_t * wsp)606 mutex_owner_step(mdb_walk_state_t *wsp)
607 {
608 uintptr_t addr = wsp->walk_addr;
609 mutex_impl_t mtx;
610 uintptr_t owner;
611 kthread_t thr;
612
613 if (mdb_vread(&mtx, sizeof (mtx), addr) == -1)
614 return (WALK_ERR);
615
616 if (!MUTEX_TYPE_ADAPTIVE(&mtx))
617 return (WALK_DONE);
618
619 if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == 0)
620 return (WALK_DONE);
621
622 if (mdb_vread(&thr, sizeof (thr), owner) != -1)
623 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata);
624
625 return (WALK_DONE);
626 }
627
628 static void
gate_desc_dump(gate_desc_t * gate,const char * label,int header)629 gate_desc_dump(gate_desc_t *gate, const char *label, int header)
630 {
631 const char *lastnm;
632 uint_t lastval;
633 char type[4];
634
635 switch (gate->sgd_type) {
636 case SDT_SYSIGT:
637 strcpy(type, "int");
638 break;
639 case SDT_SYSTGT:
640 strcpy(type, "trp");
641 break;
642 case SDT_SYSTASKGT:
643 strcpy(type, "tsk");
644 break;
645 default:
646 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type);
647 }
648
649 #if defined(__amd64)
650 lastnm = "IST";
651 lastval = gate->sgd_ist;
652 #else
653 lastnm = "STK";
654 lastval = gate->sgd_stkcpy;
655 #endif
656
657 if (header) {
658 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> "
659 "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label),
660 "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm);
661 }
662
663 mdb_printf("%s", label);
664
665 if (gate->sgd_type == SDT_SYSTASKGT)
666 mdb_printf("%-30s ", "-");
667 else
668 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate));
669
670 mdb_printf("%4x %d %c %3s %2x\n", gate->sgd_selector,
671 gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval);
672 }
673
674 /*ARGSUSED*/
675 static int
gate_desc(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)676 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
677 {
678 gate_desc_t gate;
679
680 if (argc != 0 || !(flags & DCMD_ADDRSPEC))
681 return (DCMD_USAGE);
682
683 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
684 sizeof (gate_desc_t)) {
685 mdb_warn("failed to read gate descriptor at %p\n", addr);
686 return (DCMD_ERR);
687 }
688
689 gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags));
690
691 return (DCMD_OK);
692 }
693
694 /*ARGSUSED*/
695 static int
idt(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)696 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
697 {
698 int i;
699
700 if (!(flags & DCMD_ADDRSPEC)) {
701 GElf_Sym idt0_va;
702 gate_desc_t *idt0;
703
704 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) {
705 mdb_warn("failed to find VA of idt0");
706 return (DCMD_ERR);
707 }
708
709 addr = idt0_va.st_value;
710 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) {
711 mdb_warn("failed to read idt0 at %p\n", addr);
712 return (DCMD_ERR);
713 }
714
715 addr = (uintptr_t)idt0;
716 }
717
718 for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) {
719 gate_desc_t gate;
720 char label[6];
721
722 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) !=
723 sizeof (gate_desc_t)) {
724 mdb_warn("failed to read gate descriptor at %p\n",
725 addr);
726 return (DCMD_ERR);
727 }
728
729 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i);
730 gate_desc_dump(&gate, label, i == 0);
731 }
732
733 return (DCMD_OK);
734 }
735
736 static void
htables_help(void)737 htables_help(void)
738 {
739 mdb_printf(
740 "Given a (hat_t *), generates the list of all (htable_t *)s\n"
741 "that correspond to that address space\n");
742 }
743
744 static void
report_maps_help(void)745 report_maps_help(void)
746 {
747 mdb_printf(
748 "Given a PFN, report HAT structures that map the page, or use\n"
749 "the page as a pagetable.\n"
750 "\n"
751 "-m Interpret the PFN as an MFN (machine frame number)\n");
752 }
753
754 static void
ptable_help(void)755 ptable_help(void)
756 {
757 mdb_printf(
758 "Given a PFN holding a page table, print its contents, and\n"
759 "the address of the corresponding htable structure.\n"
760 "\n"
761 "-m Interpret the PFN as an MFN (machine frame number)\n"
762 "-l force page table level (3 is top)\n");
763 }
764
765 static void
ptmap_help(void)766 ptmap_help(void)
767 {
768 mdb_printf(
769 "Report all mappings represented by the page table hierarchy\n"
770 "rooted at the given cr3 value / physical address.\n"
771 "\n"
772 "-w run ::whatis on mapping start addresses\n");
773 }
774
775 static const char *const scalehrtime_desc =
776 "Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n"
777 "are used as both a quick way of accumulating relative time (as for\n"
778 "usage) and as a quick way of getting the absolute current time.\n"
779 "These uses require slightly different scaling algorithms. By\n"
780 "default, if a specified time is greater than half of the unscaled\n"
781 "time at the last tick (that is, if the unscaled time represents\n"
782 "more than half the time since boot), the timestamp is assumed to\n"
783 "be absolute, and the scaling algorithm used mimics that which the\n"
784 "kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n"
785 "be relative, and the algorithm mimics scalehrtime(). This behavior\n"
786 "can be overridden by forcing the unscaled time to be interpreted\n"
787 "as relative (via -r) or absolute (via -a).\n";
788
789 static void
scalehrtime_help(void)790 scalehrtime_help(void)
791 {
792 mdb_printf("%s", scalehrtime_desc);
793 }
794
795 /*
796 * NSEC_SHIFT is replicated here (it is not defined in a header file),
797 * but for amusement, the reader is directed to the comment that explains
798 * the rationale for this particular value on x86. Spoiler: the value is
799 * selected to accommodate 60 MHz Pentiums! (And a confession: if the voice
800 * in that comment sounds too familiar, it's because your author also wrote
801 * that code -- some fifteen years prior to this writing in 2011...)
802 */
803 #define NSEC_SHIFT 5
804
805 /*ARGSUSED*/
806 static int
scalehrtime_dcmd(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)807 scalehrtime_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
808 {
809 uint32_t nsec_scale;
810 hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1;
811 unsigned int *tscp = (unsigned int *)&tsc;
812 uintptr_t scalehrtimef;
813 uint64_t scale;
814 GElf_Sym sym;
815 int expected = !(flags & DCMD_ADDRSPEC);
816 uint_t absolute = FALSE, relative = FALSE;
817
818 if (mdb_getopts(argc, argv,
819 'a', MDB_OPT_SETBITS, TRUE, &absolute,
820 'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected)
821 return (DCMD_USAGE);
822
823 if (absolute && relative) {
824 mdb_warn("can't specify both -a and -r\n");
825 return (DCMD_USAGE);
826 }
827
828 if (expected == 1) {
829 switch (argv[argc - 1].a_type) {
830 case MDB_TYPE_STRING:
831 tsc = mdb_strtoull(argv[argc - 1].a_un.a_str);
832 break;
833 case MDB_TYPE_IMMEDIATE:
834 tsc = argv[argc - 1].a_un.a_val;
835 break;
836 default:
837 return (DCMD_USAGE);
838 }
839 }
840
841 if (mdb_readsym(&scalehrtimef,
842 sizeof (scalehrtimef), "scalehrtimef") == -1) {
843 mdb_warn("couldn't read 'scalehrtimef'");
844 return (DCMD_ERR);
845 }
846
847 if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
848 mdb_warn("couldn't find 'tsc_scalehrtime'");
849 return (DCMD_ERR);
850 }
851
852 if (sym.st_value != scalehrtimef) {
853 mdb_warn("::scalehrtime requires that scalehrtimef "
854 "be set to tsc_scalehrtime\n");
855 return (DCMD_ERR);
856 }
857
858 if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
859 mdb_warn("couldn't read 'nsec_scale'");
860 return (DCMD_ERR);
861 }
862
863 if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) {
864 mdb_warn("couldn't read 'tsc_last'");
865 return (DCMD_ERR);
866 }
867
868 if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) {
869 mdb_warn("couldn't read 'tsc_hrtime_base'");
870 return (DCMD_ERR);
871 }
872
873 /*
874 * If our time is greater than half of tsc_last, we will take our
875 * delta against tsc_last, convert it, and add that to (or subtract it
876 * from) tsc_hrtime_base. This mimics what the kernel actually does
877 * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much
878 * higher precision result than trying to convert a large tsc value.
879 */
880 if (absolute || (tsc > (tsc_last >> 1) && !relative)) {
881 if (tsc > tsc_last) {
882 tsc = tsc - tsc_last;
883 } else {
884 tsc = tsc_last - tsc;
885 mult = -1;
886 }
887 } else {
888 base = 0;
889 }
890
891 scale = (uint64_t)nsec_scale;
892
893 hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
894 hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
895
896 mdb_printf("0x%llx\n", base + (hrt * mult));
897
898 return (DCMD_OK);
899 }
900
901 /*
902 * The x86 feature set is implemented as a bitmap array. That bitmap array is
903 * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
904 * macro. We have the names for each of these features in unix's text segment
905 * so we do not have to duplicate them and instead just look them up.
906 */
907 /*ARGSUSED*/
908 static int
x86_featureset_dcmd(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)909 x86_featureset_dcmd(uintptr_t addr, uint_t flags, int argc,
910 const mdb_arg_t *argv)
911 {
912 void *fset;
913 GElf_Sym sym;
914 uintptr_t nptr;
915 char name[128];
916 int ii;
917
918 size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES);
919
920 if (argc != 0)
921 return (DCMD_USAGE);
922
923 if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) {
924 mdb_warn("couldn't find x86_feature_names");
925 return (DCMD_ERR);
926 }
927
928 fset = mdb_zalloc(sz, UM_NOSLEEP);
929 if (fset == NULL) {
930 mdb_warn("failed to allocate memory for x86_featureset");
931 return (DCMD_ERR);
932 }
933
934 if (flags & DCMD_ADDRSPEC) {
935 if (mdb_vread(fset, sz, addr) != sz) {
936 mdb_warn("failed to read x86_featureset from %p", addr);
937 mdb_free(fset, sz);
938 return (DCMD_ERR);
939 }
940 } else {
941 if (mdb_readvar(fset, "x86_featureset") != sz) {
942 mdb_warn("failed to read x86_featureset");
943 mdb_free(fset, sz);
944 return (DCMD_ERR);
945 }
946 }
947
948 for (ii = 0; ii < NUM_X86_FEATURES; ii++) {
949 if (!BT_TEST((ulong_t *)fset, ii))
950 continue;
951
952 if (mdb_vread(&nptr, sizeof (char *), sym.st_value +
953 sizeof (void *) * ii) != sizeof (char *)) {
954 mdb_warn("failed to read feature array %d", ii);
955 mdb_free(fset, sz);
956 return (DCMD_ERR);
957 }
958
959 if (mdb_readstr(name, sizeof (name), nptr) == -1) {
960 mdb_printf("unknown feature 0x%x\n", ii);
961 } else {
962 mdb_printf("%s\n", name);
963 }
964 }
965
966 mdb_free(fset, sz);
967 return (DCMD_OK);
968 }
969
970 #ifdef _KMDB
971 /* ARGSUSED */
972 static int
sysregs_dcmd(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)973 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
974 {
975 struct sysregs sregs = { 0 };
976 desctbr_t gdtr;
977 boolean_t longmode = B_FALSE;
978
979 #ifdef __amd64
980 longmode = B_TRUE;
981 #endif
982
983 sregs.sr_cr0 = kmdb_unix_getcr0();
984 sregs.sr_cr2 = kmdb_unix_getcr2();
985 sregs.sr_cr3 = kmdb_unix_getcr3();
986 sregs.sr_cr4 = kmdb_unix_getcr4();
987
988 kmdb_unix_getgdtr(&gdtr);
989 sregs.sr_gdtr.d_base = gdtr.dtr_base;
990 sregs.sr_gdtr.d_lim = gdtr.dtr_limit;
991
992 mdb_x86_print_sysregs(&sregs, longmode);
993
994 return (DCMD_OK);
995 }
996 #endif
997
998 extern void xcall_help(void);
999 extern int xcall_dcmd(uintptr_t, uint_t, int, const mdb_arg_t *);
1000
1001 static const mdb_dcmd_t dcmds[] = {
1002 { "gate_desc", ":", "dump a gate descriptor", gate_desc },
1003 { "idt", ":[-v]", "dump an IDT", idt },
1004 { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
1005 { "vatopfn", ":[-a as]", "translate address to physical page",
1006 va2pfn_dcmd },
1007 { "report_maps", ":[-m]",
1008 "Given PFN, report mappings / page table usage",
1009 report_maps_dcmd, report_maps_help },
1010 { "htables", "", "Given hat_t *, lists all its htable_t * values",
1011 htables_dcmd, htables_help },
1012 { "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
1013 ptable_dcmd, ptable_help },
1014 { "ptmap", ":", "Given a cr3 value, dump all mappings",
1015 ptmap_dcmd, ptmap_help },
1016 { "pte", ":[-l N]", "print human readable page table entry",
1017 pte_dcmd },
1018 { "pfntomfn", ":", "convert physical page to hypervisor machine page",
1019 pfntomfn_dcmd },
1020 { "mfntopfn", ":", "convert hypervisor machine page to physical page",
1021 mfntopfn_dcmd },
1022 { "memseg_list", ":", "show memseg list", memseg_list },
1023 { "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time",
1024 scalehrtime_dcmd, scalehrtime_help },
1025 { "x86_featureset", ":", "dump the x86_featureset vector",
1026 x86_featureset_dcmd },
1027 { "xcall", ":", "print CPU cross-call state", xcall_dcmd, xcall_help },
1028 #ifdef _KMDB
1029 { "sysregs", NULL, "dump system registers", sysregs_dcmd },
1030 #endif
1031 { NULL }
1032 };
1033
1034 static const mdb_walker_t walkers[] = {
1035 { "ttrace", "walks trap trace buffers in reverse chronological order",
1036 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1037 { "mutex_owner", "walks the owner of a mutex",
1038 mutex_owner_init, mutex_owner_step },
1039 { "memseg", "walk the memseg structures",
1040 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1041 { NULL }
1042 };
1043
1044 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1045
1046 const mdb_modinfo_t *
_mdb_init(void)1047 _mdb_init(void)
1048 {
1049 return (&modinfo);
1050 }
1051
1052 void
_mdb_fini(void)1053 _mdb_fini(void)
1054 {
1055 free_mmu();
1056 }
1057