1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. 24 * Copyright 2019 Joyent, Inc. 25 */ 26 27 #include <mdb/mdb_modapi.h> 28 #include <mdb/mdb_ctf.h> 29 #include <mdb/mdb_x86util.h> 30 #include <sys/cpuvar.h> 31 #include <sys/systm.h> 32 #include <sys/traptrace.h> 33 #include <sys/x_call.h> 34 #include <sys/xc_levels.h> 35 #include <sys/avintr.h> 36 #include <sys/systm.h> 37 #include <sys/trap.h> 38 #include <sys/mutex.h> 39 #include <sys/mutex_impl.h> 40 #include "i86mmu.h" 41 #include "unix_sup.h" 42 #include <sys/apix.h> 43 #include <sys/x86_archext.h> 44 #include <sys/bitmap.h> 45 #include <sys/controlregs.h> 46 47 #define TT_HDLR_WIDTH 17 48 49 50 /* apix only */ 51 static apix_impl_t *d_apixs[NCPU]; 52 static int use_apix = 0; 53 54 static int 55 ttrace_ttr_size_check(void) 56 { 57 mdb_ctf_id_t ttrtid; 58 ssize_t ttr_size; 59 60 if (mdb_ctf_lookup_by_name("trap_trace_rec_t", &ttrtid) != 0 || 61 mdb_ctf_type_resolve(ttrtid, &ttrtid) != 0) { 62 mdb_warn("failed to determine size of trap_trace_rec_t; " 63 "non-TRAPTRACE kernel?\n"); 64 return (0); 65 } 66 67 if ((ttr_size = mdb_ctf_type_size(ttrtid)) != 68 sizeof (trap_trace_rec_t)) { 69 /* 70 * On Intel machines, this will happen when TTR_STACK_DEPTH 71 * is changed. This code could be smarter, and could 72 * dynamically adapt to different depths, but not until a 73 * need for such adaptation is demonstrated. 74 */ 75 mdb_warn("size of trap_trace_rec_t (%d bytes) doesn't " 76 "match expected %d\n", ttr_size, sizeof (trap_trace_rec_t)); 77 return (0); 78 } 79 80 return (1); 81 } 82 83 int 84 ttrace_walk_init(mdb_walk_state_t *wsp) 85 { 86 trap_trace_ctl_t *ttcp; 87 size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU; 88 int i; 89 90 if (!ttrace_ttr_size_check()) 91 return (WALK_ERR); 92 93 ttcp = mdb_zalloc(ttc_size, UM_SLEEP); 94 95 if (wsp->walk_addr != 0) { 96 mdb_warn("ttrace only supports global walks\n"); 97 return (WALK_ERR); 98 } 99 100 if (mdb_readsym(ttcp, ttc_size, "trap_trace_ctl") == -1) { 101 mdb_warn("symbol 'trap_trace_ctl' not found; " 102 "non-TRAPTRACE kernel?\n"); 103 mdb_free(ttcp, ttc_size); 104 return (WALK_ERR); 105 } 106 107 /* 108 * We'll poach the ttc_current pointer (which isn't used for 109 * anything) to store a pointer to our current TRAPTRACE record. 110 * This allows us to only keep the array of trap_trace_ctl structures 111 * as our walker state (ttc_current may be the only kernel data 112 * structure member added exclusively to make writing the mdb walker 113 * a little easier). 114 */ 115 for (i = 0; i < NCPU; i++) { 116 trap_trace_ctl_t *ttc = &ttcp[i]; 117 118 if (ttc->ttc_first == 0) 119 continue; 120 121 /* 122 * Assign ttc_current to be the last completed record. 123 * Note that the error checking (i.e. in the ttc_next == 124 * ttc_first case) is performed in the step function. 125 */ 126 ttc->ttc_current = ttc->ttc_next - sizeof (trap_trace_rec_t); 127 } 128 129 wsp->walk_data = ttcp; 130 return (WALK_NEXT); 131 } 132 133 int 134 ttrace_walk_step(mdb_walk_state_t *wsp) 135 { 136 trap_trace_ctl_t *ttcp = wsp->walk_data, *ttc, *latest_ttc; 137 trap_trace_rec_t rec; 138 int rval, i, recsize = sizeof (trap_trace_rec_t); 139 hrtime_t latest = 0; 140 141 /* 142 * Loop through the CPUs, looking for the latest trap trace record 143 * (we want to walk through the trap trace records in reverse 144 * chronological order). 145 */ 146 for (i = 0; i < NCPU; i++) { 147 ttc = &ttcp[i]; 148 149 if (ttc->ttc_current == 0) 150 continue; 151 152 if (ttc->ttc_current < ttc->ttc_first) 153 ttc->ttc_current = ttc->ttc_limit - recsize; 154 155 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) { 156 mdb_warn("couldn't read rec at %p", ttc->ttc_current); 157 return (WALK_ERR); 158 } 159 160 if (rec.ttr_stamp > latest) { 161 latest = rec.ttr_stamp; 162 latest_ttc = ttc; 163 } 164 } 165 166 if (latest == 0) 167 return (WALK_DONE); 168 169 ttc = latest_ttc; 170 171 if (mdb_vread(&rec, sizeof (rec), ttc->ttc_current) == -1) { 172 mdb_warn("couldn't read rec at %p", ttc->ttc_current); 173 return (WALK_ERR); 174 } 175 176 rval = wsp->walk_callback(ttc->ttc_current, &rec, wsp->walk_cbdata); 177 178 if (ttc->ttc_current == ttc->ttc_next) 179 ttc->ttc_current = 0; 180 else 181 ttc->ttc_current -= sizeof (trap_trace_rec_t); 182 183 return (rval); 184 } 185 186 void 187 ttrace_walk_fini(mdb_walk_state_t *wsp) 188 { 189 mdb_free(wsp->walk_data, sizeof (trap_trace_ctl_t) * NCPU); 190 } 191 192 static int 193 ttrace_syscall(trap_trace_rec_t *rec) 194 { 195 GElf_Sym sym; 196 int sysnum = rec->ttr_sysnum; 197 uintptr_t addr; 198 struct sysent sys; 199 200 mdb_printf("%-3x", sysnum); 201 202 if (rec->ttr_sysnum > NSYSCALL) { 203 mdb_printf(" %-*d", TT_HDLR_WIDTH, rec->ttr_sysnum); 204 return (0); 205 } 206 207 if (mdb_lookup_by_name("sysent", &sym) == -1) { 208 mdb_warn("\ncouldn't find 'sysent'"); 209 return (-1); 210 } 211 212 addr = (uintptr_t)sym.st_value + sysnum * sizeof (struct sysent); 213 214 if (addr >= (uintptr_t)sym.st_value + sym.st_size) { 215 mdb_warn("\nsysnum %d out-of-range\n", sysnum); 216 return (-1); 217 } 218 219 if (mdb_vread(&sys, sizeof (sys), addr) == -1) { 220 mdb_warn("\nfailed to read sysent at %p", addr); 221 return (-1); 222 } 223 224 mdb_printf(" %-*a", TT_HDLR_WIDTH, sys.sy_callc); 225 226 return (0); 227 } 228 229 static int 230 ttrace_interrupt(trap_trace_rec_t *rec) 231 { 232 GElf_Sym sym; 233 uintptr_t addr; 234 struct av_head hd; 235 struct autovec av; 236 237 switch (rec->ttr_regs.r_trapno) { 238 case T_SOFTINT: 239 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)"); 240 return (0); 241 default: 242 break; 243 } 244 245 mdb_printf("%-3x ", rec->ttr_vector); 246 247 if (mdb_lookup_by_name("autovect", &sym) == -1) { 248 mdb_warn("\ncouldn't find 'autovect'"); 249 return (-1); 250 } 251 252 addr = (uintptr_t)sym.st_value + 253 rec->ttr_vector * sizeof (struct av_head); 254 255 if (addr >= (uintptr_t)sym.st_value + sym.st_size) { 256 mdb_warn("\nav_head for vec %x is corrupt\n", rec->ttr_vector); 257 return (-1); 258 } 259 260 if (mdb_vread(&hd, sizeof (hd), addr) == -1) { 261 mdb_warn("\ncouldn't read av_head for vec %x", rec->ttr_vector); 262 return (-1); 263 } 264 265 if (hd.avh_link == NULL) { 266 if (rec->ttr_ipl == XC_CPUPOKE_PIL) 267 mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)"); 268 else 269 mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)"); 270 } else { 271 if (mdb_vread(&av, sizeof (av), (uintptr_t)hd.avh_link) == -1) { 272 mdb_warn("couldn't read autovec at %p", 273 (uintptr_t)hd.avh_link); 274 } 275 276 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector); 277 } 278 279 return (0); 280 } 281 282 static int 283 ttrace_apix_interrupt(trap_trace_rec_t *rec) 284 { 285 struct autovec av; 286 apix_impl_t apix; 287 apix_vector_t apix_vector; 288 289 switch (rec->ttr_regs.r_trapno) { 290 case T_SOFTINT: 291 mdb_printf("%-3s %-*s", "-", TT_HDLR_WIDTH, "(fakesoftint)"); 292 return (0); 293 default: 294 break; 295 } 296 297 mdb_printf("%-3x ", rec->ttr_vector); 298 299 /* Read the per CPU apix entry */ 300 if (mdb_vread(&apix, sizeof (apix_impl_t), 301 (uintptr_t)d_apixs[rec->ttr_cpuid]) == -1) { 302 mdb_warn("\ncouldn't read apix[%d]", rec->ttr_cpuid); 303 return (-1); 304 } 305 if (mdb_vread(&apix_vector, sizeof (apix_vector_t), 306 (uintptr_t)apix.x_vectbl[rec->ttr_vector]) == -1) { 307 mdb_warn("\ncouldn't read apix_vector_t[%d]", rec->ttr_vector); 308 return (-1); 309 } 310 if (apix_vector.v_share == 0) { 311 if (rec->ttr_ipl == XC_CPUPOKE_PIL) 312 mdb_printf("%-*s", TT_HDLR_WIDTH, "(cpupoke)"); 313 else 314 mdb_printf("%-*s", TT_HDLR_WIDTH, "(spurious)"); 315 } else { 316 if (mdb_vread(&av, sizeof (struct autovec), 317 (uintptr_t)(apix_vector.v_autovect)) == -1) { 318 mdb_warn("couldn't read autovec at %p", 319 (uintptr_t)apix_vector.v_autovect); 320 } 321 322 mdb_printf("%-*a", TT_HDLR_WIDTH, av.av_vector); 323 } 324 325 return (0); 326 } 327 328 329 static struct { 330 int tt_trapno; 331 char *tt_name; 332 } ttrace_traps[] = { 333 { T_ZERODIV, "divide-error" }, 334 { T_SGLSTP, "debug-exception" }, 335 { T_NMIFLT, "nmi-interrupt" }, 336 { T_BPTFLT, "breakpoint" }, 337 { T_OVFLW, "into-overflow" }, 338 { T_BOUNDFLT, "bound-exceeded" }, 339 { T_ILLINST, "invalid-opcode" }, 340 { T_NOEXTFLT, "device-not-avail" }, 341 { T_DBLFLT, "double-fault" }, 342 { T_EXTOVRFLT, "segment-overrun" }, 343 { T_TSSFLT, "invalid-tss" }, 344 { T_SEGFLT, "segment-not-pres" }, 345 { T_STKFLT, "stack-fault" }, 346 { T_GPFLT, "general-protectn" }, 347 { T_PGFLT, "page-fault" }, 348 { T_EXTERRFLT, "error-fault" }, 349 { T_ALIGNMENT, "alignment-check" }, 350 { T_MCE, "machine-check" }, 351 { T_SIMDFPE, "sse-exception" }, 352 353 { T_DBGENTR, "debug-enter" }, 354 { T_FASTTRAP, "fasttrap-0xd2" }, 355 { T_SYSCALLINT, "syscall-0x91" }, 356 { T_DTRACE_RET, "dtrace-ret" }, 357 { T_SOFTINT, "softint" }, 358 { T_INTERRUPT, "interrupt" }, 359 { T_FAULT, "fault" }, 360 { T_AST, "ast" }, 361 { T_SYSCALL, "syscall" }, 362 363 { 0, NULL } 364 }; 365 366 static int 367 ttrace_trap(trap_trace_rec_t *rec) 368 { 369 int i; 370 371 if (rec->ttr_regs.r_trapno == T_AST) 372 mdb_printf("%-3s ", "-"); 373 else 374 mdb_printf("%-3x ", rec->ttr_regs.r_trapno); 375 376 for (i = 0; ttrace_traps[i].tt_name != NULL; i++) { 377 if (rec->ttr_regs.r_trapno == ttrace_traps[i].tt_trapno) 378 break; 379 } 380 381 if (ttrace_traps[i].tt_name == NULL) 382 mdb_printf("%-*s", TT_HDLR_WIDTH, "(unknown)"); 383 else 384 mdb_printf("%-*s", TT_HDLR_WIDTH, ttrace_traps[i].tt_name); 385 386 return (0); 387 } 388 389 static void 390 ttrace_intr_detail(trap_trace_rec_t *rec) 391 { 392 mdb_printf("\tirq %x ipl %d oldpri %d basepri %d\n", rec->ttr_vector, 393 rec->ttr_ipl, rec->ttr_pri, rec->ttr_spl); 394 } 395 396 static struct { 397 uchar_t t_marker; 398 char *t_name; 399 int (*t_hdlr)(trap_trace_rec_t *); 400 } ttrace_hdlr[] = { 401 { TT_SYSCALL, "sysc", ttrace_syscall }, 402 { TT_SYSENTER, "syse", ttrace_syscall }, 403 { TT_SYSC, "asys", ttrace_syscall }, 404 { TT_SYSC64, "sc64", ttrace_syscall }, 405 { TT_INTERRUPT, "intr", ttrace_interrupt }, 406 { TT_TRAP, "trap", ttrace_trap }, 407 { TT_EVENT, "evnt", ttrace_trap }, 408 { 0, NULL, NULL } 409 }; 410 411 typedef struct ttrace_dcmd { 412 processorid_t ttd_cpu; 413 uint_t ttd_extended; 414 uintptr_t ttd_kthread; 415 trap_trace_ctl_t ttd_ttc[NCPU]; 416 } ttrace_dcmd_t; 417 418 #if defined(__amd64) 419 420 #define DUMP(reg) #reg, regs->r_##reg 421 #define THREEREGS " %3s: %16lx %3s: %16lx %3s: %16lx\n" 422 423 static void 424 ttrace_dumpregs(trap_trace_rec_t *rec) 425 { 426 struct regs *regs = &rec->ttr_regs; 427 428 mdb_printf(THREEREGS, DUMP(rdi), DUMP(rsi), DUMP(rdx)); 429 mdb_printf(THREEREGS, DUMP(rcx), DUMP(r8), DUMP(r9)); 430 mdb_printf(THREEREGS, DUMP(rax), DUMP(rbx), DUMP(rbp)); 431 mdb_printf(THREEREGS, DUMP(r10), DUMP(r11), DUMP(r12)); 432 mdb_printf(THREEREGS, DUMP(r13), DUMP(r14), DUMP(r15)); 433 mdb_printf(THREEREGS, DUMP(ds), DUMP(es), DUMP(fs)); 434 mdb_printf(THREEREGS, DUMP(gs), "trp", regs->r_trapno, DUMP(err)); 435 mdb_printf(THREEREGS, DUMP(rip), DUMP(cs), DUMP(rfl)); 436 mdb_printf(THREEREGS, DUMP(rsp), DUMP(ss), "cr2", rec->ttr_cr2); 437 mdb_printf(" %3s: %16lx %3s: %16lx\n", 438 "fsb", regs->__r_fsbase, 439 "gsb", regs->__r_gsbase); 440 mdb_printf("\n"); 441 } 442 443 #else 444 445 #define DUMP(reg) #reg, regs->r_##reg 446 #define FOURREGS " %3s: %08x %3s: %08x %3s: %08x %3s: %08x\n" 447 448 static void 449 ttrace_dumpregs(trap_trace_rec_t *rec) 450 { 451 struct regs *regs = &rec->ttr_regs; 452 453 mdb_printf(FOURREGS, DUMP(gs), DUMP(fs), DUMP(es), DUMP(ds)); 454 mdb_printf(FOURREGS, DUMP(edi), DUMP(esi), DUMP(ebp), DUMP(esp)); 455 mdb_printf(FOURREGS, DUMP(ebx), DUMP(edx), DUMP(ecx), DUMP(eax)); 456 mdb_printf(FOURREGS, "trp", regs->r_trapno, DUMP(err), 457 DUMP(pc), DUMP(cs)); 458 mdb_printf(FOURREGS, DUMP(efl), "usp", regs->r_uesp, DUMP(ss), 459 "cr2", rec->ttr_cr2); 460 mdb_printf("\n"); 461 } 462 463 #endif /* __amd64 */ 464 465 int 466 ttrace_walk(uintptr_t addr, trap_trace_rec_t *rec, ttrace_dcmd_t *dcmd) 467 { 468 struct regs *regs = &rec->ttr_regs; 469 processorid_t cpu = -1, i; 470 471 for (i = 0; i < NCPU; i++) { 472 if (addr >= dcmd->ttd_ttc[i].ttc_first && 473 addr < dcmd->ttd_ttc[i].ttc_limit) { 474 cpu = i; 475 break; 476 } 477 } 478 479 if (cpu == -1) { 480 mdb_warn("couldn't find %p in any trap trace ctl\n", addr); 481 return (WALK_ERR); 482 } 483 484 if (dcmd->ttd_cpu != -1 && cpu != dcmd->ttd_cpu) 485 return (WALK_NEXT); 486 487 if (dcmd->ttd_kthread != 0 && 488 dcmd->ttd_kthread != rec->ttr_curthread) 489 return (WALK_NEXT); 490 491 mdb_printf("%3d %15llx ", cpu, rec->ttr_stamp); 492 493 for (i = 0; ttrace_hdlr[i].t_hdlr != NULL; i++) { 494 if (rec->ttr_marker != ttrace_hdlr[i].t_marker) 495 continue; 496 mdb_printf("%4s ", ttrace_hdlr[i].t_name); 497 if (ttrace_hdlr[i].t_hdlr(rec) == -1) 498 return (WALK_ERR); 499 } 500 501 mdb_printf(" %a\n", regs->r_pc); 502 503 if (dcmd->ttd_extended == FALSE) 504 return (WALK_NEXT); 505 506 if (rec->ttr_marker == TT_INTERRUPT) 507 ttrace_intr_detail(rec); 508 else 509 ttrace_dumpregs(rec); 510 511 if (rec->ttr_sdepth > 0) { 512 for (i = 0; i < rec->ttr_sdepth; i++) { 513 if (i >= TTR_STACK_DEPTH) { 514 mdb_printf("%17s*** invalid ttr_sdepth (is %d, " 515 "should be <= %d)\n", " ", rec->ttr_sdepth, 516 TTR_STACK_DEPTH); 517 break; 518 } 519 520 mdb_printf("%17s %a()\n", " ", rec->ttr_stack[i]); 521 } 522 mdb_printf("\n"); 523 } 524 525 return (WALK_NEXT); 526 } 527 528 int 529 ttrace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 530 { 531 ttrace_dcmd_t dcmd; 532 trap_trace_ctl_t *ttc = dcmd.ttd_ttc; 533 trap_trace_rec_t rec; 534 size_t ttc_size = sizeof (trap_trace_ctl_t) * NCPU; 535 536 if (!ttrace_ttr_size_check()) 537 return (WALK_ERR); 538 539 bzero(&dcmd, sizeof (dcmd)); 540 dcmd.ttd_cpu = -1; 541 dcmd.ttd_extended = FALSE; 542 543 if (mdb_readsym(ttc, ttc_size, "trap_trace_ctl") == -1) { 544 mdb_warn("symbol 'trap_trace_ctl' not found; " 545 "non-TRAPTRACE kernel?\n"); 546 return (DCMD_ERR); 547 } 548 549 if (mdb_getopts(argc, argv, 550 'x', MDB_OPT_SETBITS, TRUE, &dcmd.ttd_extended, 551 't', MDB_OPT_UINTPTR, &dcmd.ttd_kthread, NULL) != argc) 552 return (DCMD_USAGE); 553 554 if (DCMD_HDRSPEC(flags)) { 555 mdb_printf("%3s %15s %4s %2s %-*s%s\n", "CPU", 556 "TIMESTAMP", "TYPE", "Vec", TT_HDLR_WIDTH, "HANDLER", 557 " EIP"); 558 } 559 560 if (flags & DCMD_ADDRSPEC) { 561 if (addr >= NCPU) { 562 if (mdb_vread(&rec, sizeof (rec), addr) == -1) { 563 mdb_warn("couldn't read trap trace record " 564 "at %p", addr); 565 return (DCMD_ERR); 566 } 567 568 if (ttrace_walk(addr, &rec, &dcmd) == WALK_ERR) 569 return (DCMD_ERR); 570 571 return (DCMD_OK); 572 } 573 dcmd.ttd_cpu = addr; 574 } 575 576 if (mdb_readvar(&use_apix, "apix_enable") == -1) { 577 mdb_warn("failed to read apix_enable"); 578 use_apix = 0; 579 } 580 581 if (use_apix) { 582 if (mdb_readvar(&d_apixs, "apixs") == -1) { 583 mdb_warn("\nfailed to read apixs."); 584 return (DCMD_ERR); 585 } 586 /* change to apix ttrace interrupt handler */ 587 ttrace_hdlr[4].t_hdlr = ttrace_apix_interrupt; 588 } 589 590 if (mdb_walk("ttrace", (mdb_walk_cb_t)ttrace_walk, &dcmd) == -1) { 591 mdb_warn("couldn't walk 'ttrace'"); 592 return (DCMD_ERR); 593 } 594 595 return (DCMD_OK); 596 } 597 598 /*ARGSUSED*/ 599 int 600 mutex_owner_init(mdb_walk_state_t *wsp) 601 { 602 return (WALK_NEXT); 603 } 604 605 int 606 mutex_owner_step(mdb_walk_state_t *wsp) 607 { 608 uintptr_t addr = wsp->walk_addr; 609 mutex_impl_t mtx; 610 uintptr_t owner; 611 kthread_t thr; 612 613 if (mdb_vread(&mtx, sizeof (mtx), addr) == -1) 614 return (WALK_ERR); 615 616 if (!MUTEX_TYPE_ADAPTIVE(&mtx)) 617 return (WALK_DONE); 618 619 if ((owner = (uintptr_t)MUTEX_OWNER(&mtx)) == 0) 620 return (WALK_DONE); 621 622 if (mdb_vread(&thr, sizeof (thr), owner) != -1) 623 (void) wsp->walk_callback(owner, &thr, wsp->walk_cbdata); 624 625 return (WALK_DONE); 626 } 627 628 static void 629 gate_desc_dump(gate_desc_t *gate, const char *label, int header) 630 { 631 const char *lastnm; 632 uint_t lastval; 633 char type[4]; 634 635 switch (gate->sgd_type) { 636 case SDT_SYSIGT: 637 strcpy(type, "int"); 638 break; 639 case SDT_SYSTGT: 640 strcpy(type, "trp"); 641 break; 642 case SDT_SYSTASKGT: 643 strcpy(type, "tsk"); 644 break; 645 default: 646 (void) mdb_snprintf(type, sizeof (type), "%3x", gate->sgd_type); 647 } 648 649 #if defined(__amd64) 650 lastnm = "IST"; 651 lastval = gate->sgd_ist; 652 #else 653 lastnm = "STK"; 654 lastval = gate->sgd_stkcpy; 655 #endif 656 657 if (header) { 658 mdb_printf("%*s%<u>%-30s%</u> %<u>%-4s%</u> %<u>%3s%</u> " 659 "%<u>%1s%</u> %<u>%3s%</u> %<u>%3s%</u>\n", strlen(label), 660 "", "HANDLER", "SEL", "DPL", "P", "TYP", lastnm); 661 } 662 663 mdb_printf("%s", label); 664 665 if (gate->sgd_type == SDT_SYSTASKGT) 666 mdb_printf("%-30s ", "-"); 667 else 668 mdb_printf("%-30a ", GATESEG_GETOFFSET(gate)); 669 670 mdb_printf("%4x %d %c %3s %2x\n", gate->sgd_selector, 671 gate->sgd_dpl, (gate->sgd_p ? '+' : ' '), type, lastval); 672 } 673 674 /*ARGSUSED*/ 675 static int 676 gate_desc(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 677 { 678 gate_desc_t gate; 679 680 if (argc != 0 || !(flags & DCMD_ADDRSPEC)) 681 return (DCMD_USAGE); 682 683 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) != 684 sizeof (gate_desc_t)) { 685 mdb_warn("failed to read gate descriptor at %p\n", addr); 686 return (DCMD_ERR); 687 } 688 689 gate_desc_dump(&gate, "", DCMD_HDRSPEC(flags)); 690 691 return (DCMD_OK); 692 } 693 694 /*ARGSUSED*/ 695 static int 696 idt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 697 { 698 int i; 699 700 if (!(flags & DCMD_ADDRSPEC)) { 701 GElf_Sym idt0_va; 702 gate_desc_t *idt0; 703 704 if (mdb_lookup_by_name("idt0", &idt0_va) < 0) { 705 mdb_warn("failed to find VA of idt0"); 706 return (DCMD_ERR); 707 } 708 709 addr = idt0_va.st_value; 710 if (mdb_vread(&idt0, sizeof (idt0), addr) != sizeof (idt0)) { 711 mdb_warn("failed to read idt0 at %p\n", addr); 712 return (DCMD_ERR); 713 } 714 715 addr = (uintptr_t)idt0; 716 } 717 718 for (i = 0; i < NIDT; i++, addr += sizeof (gate_desc_t)) { 719 gate_desc_t gate; 720 char label[6]; 721 722 if (mdb_vread(&gate, sizeof (gate_desc_t), addr) != 723 sizeof (gate_desc_t)) { 724 mdb_warn("failed to read gate descriptor at %p\n", 725 addr); 726 return (DCMD_ERR); 727 } 728 729 (void) mdb_snprintf(label, sizeof (label), "%3d: ", i); 730 gate_desc_dump(&gate, label, i == 0); 731 } 732 733 return (DCMD_OK); 734 } 735 736 static void 737 htables_help(void) 738 { 739 mdb_printf( 740 "Given a (hat_t *), generates the list of all (htable_t *)s\n" 741 "that correspond to that address space\n"); 742 } 743 744 static void 745 report_maps_help(void) 746 { 747 mdb_printf( 748 "Given a PFN, report HAT structures that map the page, or use\n" 749 "the page as a pagetable.\n" 750 "\n" 751 "-m Interpret the PFN as an MFN (machine frame number)\n"); 752 } 753 754 static void 755 ptable_help(void) 756 { 757 mdb_printf( 758 "Given a PFN holding a page table, print its contents, and\n" 759 "the address of the corresponding htable structure.\n" 760 "\n" 761 "-m Interpret the PFN as an MFN (machine frame number)\n" 762 "-l force page table level (3 is top)\n"); 763 } 764 765 static void 766 ptmap_help(void) 767 { 768 mdb_printf( 769 "Report all mappings represented by the page table hierarchy\n" 770 "rooted at the given cr3 value / physical address.\n" 771 "\n" 772 "-w run ::whatis on mapping start addresses\n"); 773 } 774 775 static const char *const scalehrtime_desc = 776 "Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n" 777 "are used as both a quick way of accumulating relative time (as for\n" 778 "usage) and as a quick way of getting the absolute current time.\n" 779 "These uses require slightly different scaling algorithms. By\n" 780 "default, if a specified time is greater than half of the unscaled\n" 781 "time at the last tick (that is, if the unscaled time represents\n" 782 "more than half the time since boot), the timestamp is assumed to\n" 783 "be absolute, and the scaling algorithm used mimics that which the\n" 784 "kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n" 785 "be relative, and the algorithm mimics scalehrtime(). This behavior\n" 786 "can be overridden by forcing the unscaled time to be interpreted\n" 787 "as relative (via -r) or absolute (via -a).\n"; 788 789 static void 790 scalehrtime_help(void) 791 { 792 mdb_printf("%s", scalehrtime_desc); 793 } 794 795 /* 796 * NSEC_SHIFT is replicated here (it is not defined in a header file), 797 * but for amusement, the reader is directed to the comment that explains 798 * the rationale for this particular value on x86. Spoiler: the value is 799 * selected to accommodate 60 MHz Pentiums! (And a confession: if the voice 800 * in that comment sounds too familiar, it's because your author also wrote 801 * that code -- some fifteen years prior to this writing in 2011...) 802 */ 803 #define NSEC_SHIFT 5 804 805 /*ARGSUSED*/ 806 static int 807 scalehrtime_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 808 { 809 uint32_t nsec_scale; 810 hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1; 811 unsigned int *tscp = (unsigned int *)&tsc; 812 uintptr_t scalehrtimef; 813 uint64_t scale; 814 GElf_Sym sym; 815 int expected = !(flags & DCMD_ADDRSPEC); 816 uint_t absolute = FALSE, relative = FALSE; 817 818 if (mdb_getopts(argc, argv, 819 'a', MDB_OPT_SETBITS, TRUE, &absolute, 820 'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected) 821 return (DCMD_USAGE); 822 823 if (absolute && relative) { 824 mdb_warn("can't specify both -a and -r\n"); 825 return (DCMD_USAGE); 826 } 827 828 if (expected == 1) { 829 switch (argv[argc - 1].a_type) { 830 case MDB_TYPE_STRING: 831 tsc = mdb_strtoull(argv[argc - 1].a_un.a_str); 832 break; 833 case MDB_TYPE_IMMEDIATE: 834 tsc = argv[argc - 1].a_un.a_val; 835 break; 836 default: 837 return (DCMD_USAGE); 838 } 839 } 840 841 if (mdb_readsym(&scalehrtimef, 842 sizeof (scalehrtimef), "scalehrtimef") == -1) { 843 mdb_warn("couldn't read 'scalehrtimef'"); 844 return (DCMD_ERR); 845 } 846 847 if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) { 848 mdb_warn("couldn't find 'tsc_scalehrtime'"); 849 return (DCMD_ERR); 850 } 851 852 if (sym.st_value != scalehrtimef) { 853 mdb_warn("::scalehrtime requires that scalehrtimef " 854 "be set to tsc_scalehrtime\n"); 855 return (DCMD_ERR); 856 } 857 858 if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) { 859 mdb_warn("couldn't read 'nsec_scale'"); 860 return (DCMD_ERR); 861 } 862 863 if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) { 864 mdb_warn("couldn't read 'tsc_last'"); 865 return (DCMD_ERR); 866 } 867 868 if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) { 869 mdb_warn("couldn't read 'tsc_hrtime_base'"); 870 return (DCMD_ERR); 871 } 872 873 /* 874 * If our time is greater than half of tsc_last, we will take our 875 * delta against tsc_last, convert it, and add that to (or subtract it 876 * from) tsc_hrtime_base. This mimics what the kernel actually does 877 * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much 878 * higher precision result than trying to convert a large tsc value. 879 */ 880 if (absolute || (tsc > (tsc_last >> 1) && !relative)) { 881 if (tsc > tsc_last) { 882 tsc = tsc - tsc_last; 883 } else { 884 tsc = tsc_last - tsc; 885 mult = -1; 886 } 887 } else { 888 base = 0; 889 } 890 891 scale = (uint64_t)nsec_scale; 892 893 hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT; 894 hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT); 895 896 mdb_printf("0x%llx\n", base + (hrt * mult)); 897 898 return (DCMD_OK); 899 } 900 901 /* 902 * The x86 feature set is implemented as a bitmap array. That bitmap array is 903 * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES) 904 * macro. We have the names for each of these features in unix's text segment 905 * so we do not have to duplicate them and instead just look them up. 906 */ 907 /*ARGSUSED*/ 908 static int 909 x86_featureset_dcmd(uintptr_t addr, uint_t flags, int argc, 910 const mdb_arg_t *argv) 911 { 912 void *fset; 913 GElf_Sym sym; 914 uintptr_t nptr; 915 char name[128]; 916 int ii; 917 918 size_t sz = sizeof (uchar_t) * BT_SIZEOFMAP(NUM_X86_FEATURES); 919 920 if (argc != 0) 921 return (DCMD_USAGE); 922 923 if (mdb_lookup_by_name("x86_feature_names", &sym) == -1) { 924 mdb_warn("couldn't find x86_feature_names"); 925 return (DCMD_ERR); 926 } 927 928 fset = mdb_zalloc(sz, UM_NOSLEEP); 929 if (fset == NULL) { 930 mdb_warn("failed to allocate memory for x86_featureset"); 931 return (DCMD_ERR); 932 } 933 934 if (mdb_readvar(fset, "x86_featureset") != sz) { 935 mdb_warn("failed to read x86_featureset"); 936 mdb_free(fset, sz); 937 return (DCMD_ERR); 938 } 939 940 for (ii = 0; ii < NUM_X86_FEATURES; ii++) { 941 if (!BT_TEST((ulong_t *)fset, ii)) 942 continue; 943 944 if (mdb_vread(&nptr, sizeof (char *), sym.st_value + 945 sizeof (void *) * ii) != sizeof (char *)) { 946 mdb_warn("failed to read feature array %d", ii); 947 mdb_free(fset, sz); 948 return (DCMD_ERR); 949 } 950 951 if (mdb_readstr(name, sizeof (name), nptr) == -1) { 952 mdb_warn("failed to read feature %d", ii); 953 mdb_free(fset, sz); 954 return (DCMD_ERR); 955 } 956 mdb_printf("%s\n", name); 957 } 958 959 mdb_free(fset, sz); 960 return (DCMD_OK); 961 } 962 963 #ifdef _KMDB 964 /* ARGSUSED */ 965 static int 966 sysregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 967 { 968 struct sysregs sregs = { 0 }; 969 desctbr_t gdtr; 970 boolean_t longmode = B_FALSE; 971 972 #ifdef __amd64 973 longmode = B_TRUE; 974 #endif 975 976 sregs.sr_cr0 = kmdb_unix_getcr0(); 977 sregs.sr_cr2 = kmdb_unix_getcr2(); 978 sregs.sr_cr3 = kmdb_unix_getcr3(); 979 sregs.sr_cr4 = kmdb_unix_getcr4(); 980 981 kmdb_unix_getgdtr(&gdtr); 982 sregs.sr_gdtr.d_base = gdtr.dtr_base; 983 sregs.sr_gdtr.d_lim = gdtr.dtr_limit; 984 985 mdb_x86_print_sysregs(&sregs, longmode); 986 987 return (DCMD_OK); 988 } 989 #endif 990 991 extern void xcall_help(void); 992 extern int xcall_dcmd(uintptr_t, uint_t, int, const mdb_arg_t *); 993 994 static const mdb_dcmd_t dcmds[] = { 995 { "gate_desc", ":", "dump a gate descriptor", gate_desc }, 996 { "idt", ":[-v]", "dump an IDT", idt }, 997 { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace }, 998 { "vatopfn", ":[-a as]", "translate address to physical page", 999 va2pfn_dcmd }, 1000 { "report_maps", ":[-m]", 1001 "Given PFN, report mappings / page table usage", 1002 report_maps_dcmd, report_maps_help }, 1003 { "htables", "", "Given hat_t *, lists all its htable_t * values", 1004 htables_dcmd, htables_help }, 1005 { "ptable", ":[-lm]", "Given PFN, dump contents of a page table", 1006 ptable_dcmd, ptable_help }, 1007 { "ptmap", ":", "Given a cr3 value, dump all mappings", 1008 ptmap_dcmd, ptmap_help }, 1009 { "pte", ":[-l N]", "print human readable page table entry", 1010 pte_dcmd }, 1011 { "pfntomfn", ":", "convert physical page to hypervisor machine page", 1012 pfntomfn_dcmd }, 1013 { "mfntopfn", ":", "convert hypervisor machine page to physical page", 1014 mfntopfn_dcmd }, 1015 { "memseg_list", ":", "show memseg list", memseg_list }, 1016 { "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time", 1017 scalehrtime_dcmd, scalehrtime_help }, 1018 { "x86_featureset", NULL, "dump the x86_featureset vector", 1019 x86_featureset_dcmd }, 1020 { "xcall", ":", "print CPU cross-call state", xcall_dcmd, xcall_help }, 1021 #ifdef _KMDB 1022 { "sysregs", NULL, "dump system registers", sysregs_dcmd }, 1023 #endif 1024 { NULL } 1025 }; 1026 1027 static const mdb_walker_t walkers[] = { 1028 { "ttrace", "walks trap trace buffers in reverse chronological order", 1029 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini }, 1030 { "mutex_owner", "walks the owner of a mutex", 1031 mutex_owner_init, mutex_owner_step }, 1032 { "memseg", "walk the memseg structures", 1033 memseg_walk_init, memseg_walk_step, memseg_walk_fini }, 1034 { NULL } 1035 }; 1036 1037 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers }; 1038 1039 const mdb_modinfo_t * 1040 _mdb_init(void) 1041 { 1042 return (&modinfo); 1043 } 1044 1045 void 1046 _mdb_fini(void) 1047 { 1048 free_mmu(); 1049 } 1050