1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2018 Joyent, Inc. 24 */ 25 26 #include <mdb/mdb_modapi.h> 27 #include <mdb/mdb_ctf.h> 28 #include <sys/cpuvar.h> 29 #include <sys/systm.h> 30 #include <sys/traptrace.h> 31 #include <sys/x_call.h> 32 #include <sys/xc_levels.h> 33 #include <sys/avintr.h> 34 #include <sys/systm.h> 35 #include <sys/trap.h> 36 #include <sys/mutex.h> 37 #include <sys/mutex_impl.h> 38 #include "i86mmu.h" 39 #include "unix_sup.h" 40 #include <sys/apix.h> 41 #include <sys/x86_archext.h> 42 #include <sys/bitmap.h> 43 #include <sys/controlregs.h> 44 45 #define TT_HDLR_WIDTH 17 46 47 48 /* apix only */ 49 static apix_impl_t *d_apixs[NCPU]; 50 static int use_apix = 0; 51 52 static int 53 ttrace_ttr_size_check(void) 54 { 55 mdb_ctf_id_t ttrtid; 56 ssize_t ttr_size; 57 58 if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 || 59 mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) { 60 mdb_warn("failed to determine size of trap_trace_rec_t; " 61 "non-TRAPTRACE kernel?\n"); 62 return (0); 63 } 64 65 if ((ttr_size = mdb_ctf_type_size(ttrtid)) != 66 sizeof (trap_trace_rec_t)) { 67 /* 68 * On Intel machines, this will happen when TTR_STACK_DEPTH 69 * is changed. This code could be smarter, and could 70 * dynamically adapt to different depths, but not until a 71 * need for such adaptation is demonstrated. 72 */ 73 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't " 74 "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t)); 75 return (0); 76 } 77 78 return (1); 79 } 80 81 int 82 ttrace_walk_init(mdb_walk_state_t *wsp) 83 { 84 trap_trace_ctl_t *ttcp; 85 size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU; 86 int i; 87 88 if (!ttrace_ttr_size_check()) 89 return (WALK_ERR); 90 91 ttcp = mdb_zalloc(ttc_size, UM_SLEEP); 92 93 if (wsp->walk_addr != 0) { 94 mdb_warn("ttrace only supports global walks\n"); 95 return (WALK_ERR); 96 } 97 98 if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) { 99 mdb_warn("symbol 'trap_trace_ctl' not found; " 100 "non-TRAPTRACE kernel?\n"); 101 mdb_free(ttcp, ttc_size); 102 return (WALK_ERR); 103 } 104 105 /* 106 * We'll poach the ttc_current pointer (which isn't used for 107 * anything) to store a pointer to our current TRAPTRACE record. 108 * This allows us to only keep the array of trap_trace_ctl structures 109 * as our walker state (ttc_current may be the only kernel data 110 * structure member added exclusively to make writing the mdb walker 111 * a little easier). 112 */ 113 for (i = 0; i < NCPU; i++) { 114 trap_trace_ctl_t *ttc = &ttcp[i]; 115 116 if (ttc->ttc_first == 0) 117 continue; 118 119 /* 120 * Assign ttc_current to be the last completed record. 121 * Note that the error checking (i.e. in the ttc_next == 122 * ttc_first case) is performed in the step function. 123 */ 124 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t); 125 } 126 127 wsp->walk_data = ttcp; 128 return (WALK_NEXT); 129 } 130 131 int 132 ttrace_walk_step(mdb_walk_state_t *wsp) 133 { 134 trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc; 135 trap_trace_rec_t rec; 136 int rval, i, recsize = sizeof (trap_trace_rec_t); 137 hrtime_t latest = 0; 138 139 /* 140 * Loop through the CPUs, looking for the latest trap trace record 141 * (we want to walk through the trap trace records in reverse 142 * chronological order). 143 */ 144 for (i = 0; i < NCPU; i++) { 145 ttc = &ttcp[i]; 146 147 if (ttc->ttc_current == 0) 148 continue; 149 150 if (ttc->ttc_current < ttc->ttc_first) 151 ttc->ttc_current = ttc->ttc_limit - recsize; 152 153 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) { 154 mdb_warn("couldn't read rec at %p", ttc->ttc_current); 155 return (WALK_ERR); 156 } 157 158 if (rec.ttr_stamp > latest) { 159 latest = rec.ttr_stamp; 160 latest_ttc = ttc; 161 } 162 } 163 164 if (latest == 0) 165 return (WALK_DONE); 166 167 ttc = latest_ttc; 168 169 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) { 170 mdb_warn("couldn't read rec at %p", ttc->ttc_current); 171 return (WALK_ERR); 172 } 173 174 rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata); 175 176 if (ttc->ttc_current == ttc->ttc_next) 177 ttc->ttc_current = 0; 178 else 179 ttc->ttc_current -= sizeof (trap_trace_rec_t); 180 181 return (rval); 182 } 183 184 void 185 ttrace_walk_fini(mdb_walk_state_t *wsp) 186 { 187 mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU); 188 } 189 190 static int 191 ttrace_syscall(trap_trace_rec_t *rec) 192 { 193 GElf_Sym sym; 194 int sysnum = rec->ttr_sysnum; 195 uintptr_t addr; 196 struct sysent sys; 197 198 mdb_printf("%-3x", sysnum); 199 200 if (rec->ttr_sysnum > NSYSCALL) { 201 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum); 202 return (0); 203 } 204 205 if (mdb_lookup_by_name("sysent", &sym) == -1) { 206 mdb_warn("\ncouldn't find 'sysent'"); 207 return (-1); 208 } 209 210 addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent); 211 212 if (addr >= (uintptr_t)sym.st_value + sym.st_size) { 213 mdb_warn("\nsysnum %d out-of-range\n", sysnum); 214 return (-1); 215 } 216 217 if (mdb_vread(&sys, sizeof (sys), addr) == -1) { 218 mdb_warn("\nfailed to read sysent at %p", addr); 219 return (-1); 220 } 221 222 mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc); 223 224 return (0); 225 } 226 227 static int 228 ttrace_interrupt(trap_trace_rec_t *rec) 229 { 230 GElf_Sym sym; 231 uintptr_t addr; 232 struct av_head hd; 233 struct autovec av; 234 235 switch (rec->ttr_regs.r_trapno) { 236 case T_SOFTINT: 237 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)"); 238 return (0); 239 default: 240 break; 241 } 242 243 mdb_printf("%-3x ", rec->ttr_vector); 244 245 if (mdb_lookup_by_name("autovect", &sym) == -1) { 246 mdb_warn("\ncouldn't find 'autovect'"); 247 return (-1); 248 } 249 250 addr = (uintptr_t)sym.st_value + 251 rec->ttr_vector * sizeof (struct av_head); 252 253 if (addr >= (uintptr_t)sym.st_value + sym.st_size) { 254 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector); 255 return (-1); 256 } 257 258 if (mdb_vread(&hd, sizeof (hd), addr) == -1) { 259 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector); 260 return (-1); 261 } 262 263 if (hd.avh_link == NULL) { 264 if (rec->ttr_ipl == XC_CPUPOKE_PIL) 265 mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)"); 266 else 267 mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)"); 268 } else { 269 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) { 270 mdb_warn("couldn't read autovec at %p", 271 (uintptr_t)hd.avh_link); 272 } 273 274 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector); 275 } 276 277 return (0); 278 } 279 280 static int 281 ttrace_apix_interrupt(trap_trace_rec_t *rec) 282 { 283 struct autovec av; 284 apix_impl_t apix; 285 apix_vector_t apix_vector; 286 287 switch (rec->ttr_regs.r_trapno) { 288 case T_SOFTINT: 289 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)"); 290 return (0); 291 default: 292 break; 293 } 294 295 mdb_printf("%-3x ", rec->ttr_vector); 296 297 /* Read the per CPU apix entry */ 298 if (mdb_vread(&apix, sizeof (apix_impl_t), 299 (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) { 300 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid); 301 return (-1); 302 } 303 if (mdb_vread(&apix_vector, sizeof (apix_vector_t), 304 (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) { 305 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector); 306 return (-1); 307 } 308 if (apix_vector.v_share == 0) { 309 if (rec->ttr_ipl == XC_CPUPOKE_PIL) 310 mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)"); 311 else 312 mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)"); 313 } else { 314 if (mdb_vread(&av, sizeof (struct autovec), 315 (uintptr_t)(apix_vector.v_autovect)) == -1) { 316 mdb_warn("couldn't read autovec at %p", 317 (uintptr_t)apix_vector.v_autovect); 318 } 319 320 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector); 321 } 322 323 return (0); 324 } 325 326 327 static struct { 328 int tt_trapno; 329 char *tt_name; 330 } ttrace_traps[] = { 331 { T_ZERODIV, "divide-error" }, 332 { T_SGLSTP, "debug-exception" }, 333 { T_NMIFLT, "nmi-interrupt" }, 334 { T_BPTFLT, "breakpoint" }, 335 { T_OVFLW, "into-overflow" }, 336 { T_BOUNDFLT, "bound-exceeded" }, 337 { T_ILLINST, "invalid-opcode" }, 338 { T_NOEXTFLT, "device-not-avail" }, 339 { T_DBLFLT, "double-fault" }, 340 { T_EXTOVRFLT, "segment-overrun" }, 341 { T_TSSFLT, "invalid-tss" }, 342 { T_SEGFLT, "segment-not-pres" }, 343 { T_STKFLT, "stack-fault" }, 344 { T_GPFLT, "general-protectn" }, 345 { T_PGFLT, "page-fault" }, 346 { T_EXTERRFLT, "error-fault" }, 347 { T_ALIGNMENT, "alignment-check" }, 348 { T_MCE, "machine-check" }, 349 { T_SIMDFPE, "sse-exception" }, 350 351 { T_DBGENTR, "debug-enter" }, 352 { T_FASTTRAP, "fasttrap-0xd2" }, 353 { T_SYSCALLINT, "syscall-0x91" }, 354 { T_DTRACE_RET, "dtrace-ret" }, 355 { T_SOFTINT, "softint" }, 356 { T_INTERRUPT, "interrupt" }, 357 { T_FAULT, "fault" }, 358 { T_AST, "ast" }, 359 { T_SYSCALL, "syscall" }, 360 361 { 0, NULL } 362 }; 363 364 static int 365 ttrace_trap(trap_trace_rec_t *rec) 366 { 367 int i; 368 369 if (rec->ttr_regs.r_trapno == T_AST) 370 mdb_printf("%-3s ", "-"); 371 else 372 mdb_printf("%-3x ", rec->ttr_regs.r_trapno); 373 374 for (i = 0; ttrace_traps[i].tt_name != NULL; i++) { 375 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno) 376 break; 377 } 378 379 if (ttrace_traps[i].tt_name == NULL) 380 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)"); 381 else 382 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name); 383 384 return (0); 385 } 386 387 static void 388 ttrace_intr_detail(trap_trace_rec_t *rec) 389 { 390 mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector, 391 rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl); 392 } 393 394 static struct { 395 uchar_t t_marker; 396 char *t_name; 397 int (*t_hdlr)(trap_trace_rec_t *); 398 } ttrace_hdlr[] = { 399 { TT_SYSCALL, "sysc", ttrace_syscall }, 400 { TT_SYSENTER, "syse", ttrace_syscall }, 401 { TT_SYSC, "asys", ttrace_syscall }, 402 { TT_SYSC64, "sc64", ttrace_syscall }, 403 { TT_INTERRUPT, "intr", ttrace_interrupt }, 404 { TT_TRAP, "trap", ttrace_trap }, 405 { TT_EVENT, "evnt", ttrace_trap }, 406 { 0, NULL, NULL } 407 }; 408 409 typedef struct ttrace_dcmd { 410 processorid_t ttd_cpu; 411 uint_t ttd_extended; 412 uintptr_t ttd_kthread; 413 trap_trace_ctl_t ttd_ttc[NCPU]; 414 } ttrace_dcmd_t; 415 416 #if defined(__amd64) 417 418 #define DUMP(reg) #reg, regs->r_##reg 419 #define THREEREGS " %3s: %16lx %3s: %16lx %3s: %16lx\n" 420 421 static void 422 ttrace_dumpregs(trap_trace_rec_t *rec) 423 { 424 struct regs *regs = &rec->ttr_regs; 425 426 mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx)); 427 mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9)); 428 mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp)); 429 mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12)); 430 mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15)); 431 mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs)); 432 mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err)); 433 mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl)); 434 mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2); 435 mdb_printf(" %3s: %16lx %3s: %16lx\n", 436 "fsb", regs->__r_fsbase, 437 "gsb", regs->__r_gsbase); 438 mdb_printf("\n"); 439 } 440 441 #else 442 443 #define DUMP(reg) #reg, regs->r_##reg 444 #define FOURREGS " %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n" 445 446 static void 447 ttrace_dumpregs(trap_trace_rec_t *rec) 448 { 449 struct regs *regs = &rec->ttr_regs; 450 451 mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds)); 452 mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp)); 453 mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax)); 454 mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err), 455 DUMP(pc), DUMP(cs)); 456 mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss), 457 "cr2", rec->ttr_cr2); 458 mdb_printf("\n"); 459 } 460 461 #endif /* __amd64 */ 462 463 int 464 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd) 465 { 466 struct regs *regs = &rec->ttr_regs; 467 processorid_t cpu = -1, i; 468 469 for (i = 0; i < NCPU; i++) { 470 if (addr >= dcmd->ttd_ttc[i].ttc_first && 471 addr < dcmd->ttd_ttc[i].ttc_limit) { 472 cpu = i; 473 break; 474 } 475 } 476 477 if (cpu == -1) { 478 mdb_warn("couldn't find %p in any trap trace ctl\n", addr); 479 return (WALK_ERR); 480 } 481 482 if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu) 483 return (WALK_NEXT); 484 485 if (dcmd->ttd_kthread != 0 && 486 dcmd->ttd_kthread != rec->ttr_curthread) 487 return (WALK_NEXT); 488 489 mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp); 490 491 for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) { 492 if (rec->ttr_marker != ttrace_hdlr[i].t_marker) 493 continue; 494 mdb_printf("%4s ", ttrace_hdlr[i].t_name); 495 if (ttrace_hdlr[i].t_hdlr(rec) == -1) 496 return (WALK_ERR); 497 } 498 499 mdb_printf(" %a\n", regs->r_pc); 500 501 if (dcmd->ttd_extended == FALSE) 502 return (WALK_NEXT); 503 504 if (rec->ttr_marker == TT_INTERRUPT) 505 ttrace_intr_detail(rec); 506 else 507 ttrace_dumpregs(rec); 508 509 if (rec->ttr_sdepth > 0) { 510 for (i = 0; i < rec->ttr_sdepth; i++) { 511 if (i >= TTR_STACK_DEPTH) { 512 mdb_printf("%17s*** invalid ttr_sdepth (is %d, " 513 "should be <= %d)\n", " ", rec->ttr_sdepth, 514 TTR_STACK_DEPTH); 515 break; 516 } 517 518 mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]); 519 } 520 mdb_printf("\n"); 521 } 522 523 return (WALK_NEXT); 524 } 525 526 int 527 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 528 { 529 ttrace_dcmd_t dcmd; 530 trap_trace_ctl_t *ttc = dcmd.ttd_ttc; 531 trap_trace_rec_t rec; 532 size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU; 533 534 if (!ttrace_ttr_size_check()) 535 return (WALK_ERR); 536 537 bzero(&dcmd, sizeof (dcmd)); 538 dcmd.ttd_cpu = -1; 539 dcmd.ttd_extended = FALSE; 540 541 if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) { 542 mdb_warn("symbol 'trap_trace_ctl' not found; " 543 "non-TRAPTRACE kernel?\n"); 544 return (DCMD_ERR); 545 } 546 547 if (mdb_getopts(argc, argv, 548 'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended, 549 't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc) 550 return (DCMD_USAGE); 551 552 if (DCMD_HDRSPEC(flags)) { 553 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU", 554 "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER", 555 " EIP"); 556 } 557 558 if (flags & DCMD_ADDRSPEC) { 559 if (addr >= NCPU) { 560 if (mdb_vread(&rec, sizeof (rec), addr) == -1) { 561 mdb_warn("couldn't read trap trace record " 562 "at %p", addr); 563 return (DCMD_ERR); 564 } 565 566 if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR) 567 return (DCMD_ERR); 568 569 return (DCMD_OK); 570 } 571 dcmd.ttd_cpu = addr; 572 } 573 574 if (mdb_readvar(&use_apix, "apix_enable") == -1) { 575 mdb_warn("failed to read apix_enable"); 576 use_apix = 0; 577 } 578 579 if (use_apix) { 580 if (mdb_readvar(&d_apixs, "apixs") == -1) { 581 mdb_warn("\nfailed to read apixs."); 582 return (DCMD_ERR); 583 } 584 /* change to apix ttrace interrupt handler */ 585 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt; 586 } 587 588 if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) { 589 mdb_warn("couldn't walk 'ttrace'"); 590 return (DCMD_ERR); 591 } 592 593 return (DCMD_OK); 594 } 595 596 /*ARGSUSED*/ 597 int 598 mutex_owner_init(mdb_walk_state_t *wsp) 599 { 600 return (WALK_NEXT); 601 } 602 603 int 604 mutex_owner_step(mdb_walk_state_t *wsp) 605 { 606 uintptr_t addr = wsp->walk_addr; 607 mutex_impl_t mtx; 608 uintptr_t owner; 609 kthread_t thr; 610 611 if (mdb_vread(&mtx, sizeof (mtx), addr) == -1) 612 return (WALK_ERR); 613 614 if (!MUTEX_TYPE_ADAPTIVE(&mtx)) 615 return (WALK_DONE); 616 617 if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == 0) 618 return (WALK_DONE); 619 620 if (mdb_vread(&thr, sizeof (thr), owner) != -1) 621 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata); 622 623 return (WALK_DONE); 624 } 625 626 static void 627 gate_desc_dump(gate_desc_t *gate, const char *label, int header) 628 { 629 const char *lastnm; 630 uint_t lastval; 631 char type[4]; 632 633 switch (gate->sgd_type) { 634 case SDT_SYSIGT: 635 strcpy(type, "int"); 636 break; 637 case SDT_SYSTGT: 638 strcpy(type, "trp"); 639 break; 640 case SDT_SYSTASKGT: 641 strcpy(type, "tsk"); 642 break; 643 default: 644 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type); 645 } 646 647 #if defined(__amd64) 648 lastnm = "IST"; 649 lastval = gate->sgd_ist; 650 #else 651 lastnm = "STK"; 652 lastval = gate->sgd_stkcpy; 653 #endif 654 655 if (header) { 656 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> " 657 "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label), 658 "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm); 659 } 660 661 mdb_printf("%s", label); 662 663 if (gate->sgd_type == SDT_SYSTASKGT) 664 mdb_printf("%-30s ", "-"); 665 else 666 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate)); 667 668 mdb_printf("%4x %d %c %3s %2x\n", gate->sgd_selector, 669 gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval); 670 } 671 672 /*ARGSUSED*/ 673 static int 674 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 675 { 676 gate_desc_t gate; 677 678 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) 679 return (DCMD_USAGE); 680 681 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) != 682 sizeof (gate_desc_t)) { 683 mdb_warn("failed to read gate descriptor at %p\n", addr); 684 return (DCMD_ERR); 685 } 686 687 gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags)); 688 689 return (DCMD_OK); 690 } 691 692 /*ARGSUSED*/ 693 static int 694 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 695 { 696 int i; 697 698 if (!(flags & DCMD_ADDRSPEC)) { 699 GElf_Sym idt0_va; 700 gate_desc_t *idt0; 701 702 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) { 703 mdb_warn("failed to find VA of idt0"); 704 return (DCMD_ERR); 705 } 706 707 addr = idt0_va.st_value; 708 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) { 709 mdb_warn("failed to read idt0 at %p\n", addr); 710 return (DCMD_ERR); 711 } 712 713 addr = (uintptr_t)idt0; 714 } 715 716 for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) { 717 gate_desc_t gate; 718 char label[6]; 719 720 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) != 721 sizeof (gate_desc_t)) { 722 mdb_warn("failed to read gate descriptor at %p\n", 723 addr); 724 return (DCMD_ERR); 725 } 726 727 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i); 728 gate_desc_dump(&gate, label, i == 0); 729 } 730 731 return (DCMD_OK); 732 } 733 734 static void 735 htables_help(void) 736 { 737 mdb_printf( 738 "Given a (hat_t *), generates the list of all (htable_t *)s\n" 739 "that correspond to that address space\n"); 740 } 741 742 static void 743 report_maps_help(void) 744 { 745 mdb_printf( 746 "Given a PFN, report HAT structures that map the page, or use\n" 747 "the page as a pagetable.\n" 748 "\n" 749 "-m Interpret the PFN as an MFN (machine frame number)\n"); 750 } 751 752 static void 753 ptable_help(void) 754 { 755 mdb_printf( 756 "Given a PFN holding a page table, print its contents, and\n" 757 "the address of the corresponding htable structure.\n" 758 "\n" 759 "-m Interpret the PFN as an MFN (machine frame number)\n" 760 "-l force page table level (3 is top)\n"); 761 } 762 763 static void 764 ptmap_help(void) 765 { 766 mdb_printf( 767 "Report all mappings represented by the page table hierarchy\n" 768 "rooted at the given cr3 value / physical address.\n" 769 "\n" 770 "-w run ::whatis on mapping start addresses\n"); 771 } 772 773 static const char *const scalehrtime_desc = 774 "Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n" 775 "are used as both a quick way of accumulating relative time (as for\n" 776 "usage) and as a quick way of getting the absolute current time.\n" 777 "These uses require slightly different scaling algorithms. By\n" 778 "default, if a specified time is greater than half of the unscaled\n" 779 "time at the last tick (that is, if the unscaled time represents\n" 780 "more than half the time since boot), the timestamp is assumed to\n" 781 "be absolute, and the scaling algorithm used mimics that which the\n" 782 "kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n" 783 "be relative, and the algorithm mimics scalehrtime(). This behavior\n" 784 "can be overridden by forcing the unscaled time to be interpreted\n" 785 "as relative (via -r) or absolute (via -a).\n"; 786 787 static void 788 scalehrtime_help(void) 789 { 790 mdb_printf("%s", scalehrtime_desc); 791 } 792 793 /* 794 * NSEC_SHIFT is replicated here (it is not defined in a header file), 795 * but for amusement, the reader is directed to the comment that explains 796 * the rationale for this particular value on x86. Spoiler: the value is 797 * selected to accommodate 60 MHz Pentiums! (And a confession: if the voice 798 * in that comment sounds too familiar, it's because your author also wrote 799 * that code -- some fifteen years prior to this writing in 2011...) 800 */ 801 #define NSEC_SHIFT 5 802 803 /*ARGSUSED*/ 804 static int 805 scalehrtime_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 806 { 807 uint32_t nsec_scale; 808 hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1; 809 unsigned int *tscp = (unsigned int *)&tsc; 810 uintptr_t scalehrtimef; 811 uint64_t scale; 812 GElf_Sym sym; 813 int expected = !(flags & DCMD_ADDRSPEC); 814 uint_t absolute = FALSE, relative = FALSE; 815 816 if (mdb_getopts(argc, argv, 817 'a', MDB_OPT_SETBITS, TRUE, &absolute, 818 'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected) 819 return (DCMD_USAGE); 820 821 if (absolute && relative) { 822 mdb_warn("can't specify both -a and -r\n"); 823 return (DCMD_USAGE); 824 } 825 826 if (expected == 1) { 827 switch (argv[argc - 1].a_type) { 828 case MDB_TYPE_STRING: 829 tsc = mdb_strtoull(argv[argc - 1].a_un.a_str); 830 break; 831 case MDB_TYPE_IMMEDIATE: 832 tsc = argv[argc - 1].a_un.a_val; 833 break; 834 default: 835 return (DCMD_USAGE); 836 } 837 } 838 839 if (mdb_readsym(&scalehrtimef, 840 sizeof (scalehrtimef), "scalehrtimef") == -1) { 841 mdb_warn("couldn't read 'scalehrtimef'"); 842 return (DCMD_ERR); 843 } 844 845 if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) { 846 mdb_warn("couldn't find 'tsc_scalehrtime'"); 847 return (DCMD_ERR); 848 } 849 850 if (sym.st_value != scalehrtimef) { 851 mdb_warn("::scalehrtime requires that scalehrtimef " 852 "be set to tsc_scalehrtime\n"); 853 return (DCMD_ERR); 854 } 855 856 if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) { 857 mdb_warn("couldn't read 'nsec_scale'"); 858 return (DCMD_ERR); 859 } 860 861 if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) { 862 mdb_warn("couldn't read 'tsc_last'"); 863 return (DCMD_ERR); 864 } 865 866 if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) { 867 mdb_warn("couldn't read 'tsc_hrtime_base'"); 868 return (DCMD_ERR); 869 } 870 871 /* 872 * If our time is greater than half of tsc_last, we will take our 873 * delta against tsc_last, convert it, and add that to (or subtract it 874 * from) tsc_hrtime_base. This mimics what the kernel actually does 875 * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much 876 * higher precision result than trying to convert a large tsc value. 877 */ 878 if (absolute || (tsc > (tsc_last >> 1) && !relative)) { 879 if (tsc > tsc_last) { 880 tsc = tsc - tsc_last; 881 } else { 882 tsc = tsc_last - tsc; 883 mult = -1; 884 } 885 } else { 886 base = 0; 887 } 888 889 scale = (uint64_t)nsec_scale; 890 891 hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT; 892 hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT); 893 894 mdb_printf("0x%llx\n", base + (hrt * mult)); 895 896 return (DCMD_OK); 897 } 898 899 /* 900 * The x86 feature set is implemented as a bitmap array. That bitmap array is 901 * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES) 902 * macro. We have the names for each of these features in unix's text segment 903 * so we do not have to duplicate them and instead just look them up. 904 */ 905 /*ARGSUSED*/ 906 static int 907 x86_featureset_dcmd(uintptr_t addr, uint_t flags, int argc, 908 const mdb_arg_t *argv) 909 { 910 void *fset; 911 GElf_Sym sym; 912 uintptr_t nptr; 913 char name[128]; 914 int ii; 915 916 size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES); 917 918 if (argc != 0) 919 return (DCMD_USAGE); 920 921 if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) { 922 mdb_warn("couldn't find x86_feature_names"); 923 return (DCMD_ERR); 924 } 925 926 fset = mdb_zalloc(sz, UM_NOSLEEP); 927 if (fset == NULL) { 928 mdb_warn("failed to allocate memory for x86_featureset"); 929 return (DCMD_ERR); 930 } 931 932 if (mdb_readvar(fset, "x86_featureset") != sz) { 933 mdb_warn("failed to read x86_featureset"); 934 mdb_free(fset, sz); 935 return (DCMD_ERR); 936 } 937 938 for (ii = 0; ii < NUM_X86_FEATURES; ii++) { 939 if (!BT_TEST((ulong_t *)fset, ii)) 940 continue; 941 942 if (mdb_vread(&nptr, sizeof (char *), sym.st_value + 943 sizeof (void *) * ii) != sizeof (char *)) { 944 mdb_warn("failed to read feature array %d", ii); 945 mdb_free(fset, sz); 946 return (DCMD_ERR); 947 } 948 949 if (mdb_readstr(name, sizeof (name), nptr) == -1) { 950 mdb_warn("failed to read feature %d", ii); 951 mdb_free(fset, sz); 952 return (DCMD_ERR); 953 } 954 mdb_printf("%s\n", name); 955 } 956 957 mdb_free(fset, sz); 958 return (DCMD_OK); 959 } 960 961 #ifdef _KMDB 962 /* ARGSUSED */ 963 static int 964 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 965 { 966 ulong_t cr0, cr2, cr3, cr4; 967 desctbr_t gdtr; 968 969 static const mdb_bitmask_t cr0_flag_bits[] = { 970 { "PE", CR0_PE, CR0_PE }, 971 { "MP", CR0_MP, CR0_MP }, 972 { "EM", CR0_EM, CR0_EM }, 973 { "TS", CR0_TS, CR0_TS }, 974 { "ET", CR0_ET, CR0_ET }, 975 { "NE", CR0_NE, CR0_NE }, 976 { "WP", CR0_WP, CR0_WP }, 977 { "AM", CR0_AM, CR0_AM }, 978 { "NW", CR0_NW, CR0_NW }, 979 { "CD", CR0_CD, CR0_CD }, 980 { "PG", CR0_PG, CR0_PG }, 981 { NULL, 0, 0 } 982 }; 983 984 static const mdb_bitmask_t cr3_flag_bits[] = { 985 { "PCD", CR3_PCD, CR3_PCD }, 986 { "PWT", CR3_PWT, CR3_PWT }, 987 { NULL, 0, 0, } 988 }; 989 990 static const mdb_bitmask_t cr4_flag_bits[] = { 991 { "VME", CR4_VME, CR4_VME }, 992 { "PVI", CR4_PVI, CR4_PVI }, 993 { "TSD", CR4_TSD, CR4_TSD }, 994 { "DE", CR4_DE, CR4_DE }, 995 { "PSE", CR4_PSE, CR4_PSE }, 996 { "PAE", CR4_PAE, CR4_PAE }, 997 { "MCE", CR4_MCE, CR4_MCE }, 998 { "PGE", CR4_PGE, CR4_PGE }, 999 { "PCE", CR4_PCE, CR4_PCE }, 1000 { "OSFXSR", CR4_OSFXSR, CR4_OSFXSR }, 1001 { "OSXMMEXCPT", CR4_OSXMMEXCPT, CR4_OSXMMEXCPT }, 1002 { "VMXE", CR4_VMXE, CR4_VMXE }, 1003 { "SMXE", CR4_SMXE, CR4_SMXE }, 1004 { "PCIDE", CR4_PCIDE, CR4_PCIDE }, 1005 { "OSXSAVE", CR4_OSXSAVE, CR4_OSXSAVE }, 1006 { "SMEP", CR4_SMEP, CR4_SMEP }, 1007 { "SMAP", CR4_SMAP, CR4_SMAP }, 1008 { NULL, 0, 0 } 1009 }; 1010 1011 cr0 = kmdb_unix_getcr0(); 1012 cr2 = kmdb_unix_getcr2(); 1013 cr3 = kmdb_unix_getcr3(); 1014 cr4 = kmdb_unix_getcr4(); 1015 1016 kmdb_unix_getgdtr(&gdtr); 1017 1018 mdb_printf("%%cr0 = 0x%lx <%b>\n", cr0, cr0, cr0_flag_bits); 1019 mdb_printf("%%cr2 = 0x%lx <%a>\n", cr2, cr2); 1020 1021 if ((cr4 & CR4_PCIDE)) { 1022 mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx pcid:%lu>\n", cr3, 1023 cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET); 1024 } else { 1025 mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx flags:%b>\n", cr3, 1026 cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits); 1027 } 1028 1029 mdb_printf("%%cr4 = 0x%lx <%b>\n", cr4, cr4, cr4_flag_bits); 1030 1031 mdb_printf("%%gdtr.base = 0x%lx, %%gdtr.limit = 0x%hx\n", 1032 gdtr.dtr_base, gdtr.dtr_limit); 1033 1034 return (DCMD_OK); 1035 } 1036 #endif 1037 1038 extern void xcall_help(void); 1039 extern int xcall_dcmd(uintptr_t, uint_t, int, const mdb_arg_t *); 1040 1041 static const mdb_dcmd_t dcmds[] = { 1042 { "gate_desc", ":", "dump a gate descriptor", gate_desc }, 1043 { "idt", ":[-v]", "dump an IDT", idt }, 1044 { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace }, 1045 { "vatopfn", ":[-a as]", "translate address to physical page", 1046 va2pfn_dcmd }, 1047 { "report_maps", ":[-m]", 1048 "Given PFN, report mappings / page table usage", 1049 report_maps_dcmd, report_maps_help }, 1050 { "htables", "", "Given hat_t *, lists all its htable_t * values", 1051 htables_dcmd, htables_help }, 1052 { "ptable", ":[-lm]", "Given PFN, dump contents of a page table", 1053 ptable_dcmd, ptable_help }, 1054 { "ptmap", ":", "Given a cr3 value, dump all mappings", 1055 ptmap_dcmd, ptmap_help }, 1056 { "pte", ":[-l N]", "print human readable page table entry", 1057 pte_dcmd }, 1058 { "pfntomfn", ":", "convert physical page to hypervisor machine page", 1059 pfntomfn_dcmd }, 1060 { "mfntopfn", ":", "convert hypervisor machine page to physical page", 1061 mfntopfn_dcmd }, 1062 { "memseg_list", ":", "show memseg list", memseg_list }, 1063 { "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time", 1064 scalehrtime_dcmd, scalehrtime_help }, 1065 { "x86_featureset", NULL, "dump the x86_featureset vector", 1066 x86_featureset_dcmd }, 1067 { "xcall", ":", "print CPU cross-call state", xcall_dcmd, xcall_help }, 1068 #ifdef _KMDB 1069 { "sysregs", NULL, "dump system registers", sysregs_dcmd }, 1070 #endif 1071 { NULL } 1072 }; 1073 1074 static const mdb_walker_t walkers[] = { 1075 { "ttrace", "walks trap trace buffers in reverse chronological order", 1076 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini }, 1077 { "mutex_owner", "walks the owner of a mutex", 1078 mutex_owner_init, mutex_owner_step }, 1079 { "memseg", "walk the memseg structures", 1080 memseg_walk_init, memseg_walk_step, memseg_walk_fini }, 1081 { NULL } 1082 }; 1083 1084 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers }; 1085 1086 const mdb_modinfo_t * 1087 _mdb_init(void) 1088 { 1089 return (&modinfo); 1090 } 1091 1092 void 1093 _mdb_fini(void) 1094 { 1095 free_mmu(); 1096 } 1097