1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 29 /* All Rights Reserved */ 30 /* */ 31 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 32 /* All Rights Reserved */ 33 /* */ 34 35 #pragma ident "%Z%%M% %I% %E% SMI" 36 37 #include <sys/types.h> 38 #include <sys/sysmacros.h> 39 #include <sys/param.h> 40 #include <sys/signal.h> 41 #include <sys/systm.h> 42 #include <sys/user.h> 43 #include <sys/proc.h> 44 #include <sys/disp.h> 45 #include <sys/class.h> 46 #include <sys/core.h> 47 #include <sys/syscall.h> 48 #include <sys/cpuvar.h> 49 #include <sys/vm.h> 50 #include <sys/sysinfo.h> 51 #include <sys/fault.h> 52 #include <sys/stack.h> 53 #include <sys/mmu.h> 54 #include <sys/psw.h> 55 #include <sys/regset.h> 56 #include <sys/fp.h> 57 #include <sys/trap.h> 58 #include <sys/kmem.h> 59 #include <sys/vtrace.h> 60 #include <sys/cmn_err.h> 61 #include <sys/prsystm.h> 62 #include <sys/mutex_impl.h> 63 #include <sys/machsystm.h> 64 #include <sys/archsystm.h> 65 #include <sys/sdt.h> 66 #include <sys/avintr.h> 67 #include <sys/kobj.h> 68 69 #include <vm/hat.h> 70 71 #include <vm/seg_kmem.h> 72 #include <vm/as.h> 73 #include <vm/seg.h> 74 #include <vm/hat_pte.h> 75 76 #include <sys/procfs.h> 77 78 #include <sys/reboot.h> 79 #include <sys/debug.h> 80 #include <sys/debugreg.h> 81 #include <sys/modctl.h> 82 #include <sys/aio_impl.h> 83 #include <sys/tnf.h> 84 #include <sys/tnf_probe.h> 85 #include <sys/cred.h> 86 #include <sys/mman.h> 87 #include <sys/x86_archext.h> 88 #include <sys/copyops.h> 89 #include <c2/audit.h> 90 #include <sys/ftrace.h> 91 #include <sys/panic.h> 92 #include <sys/traptrace.h> 93 #include <sys/ontrap.h> 94 #include <sys/cpc_impl.h> 95 96 #define USER 0x10000 /* user-mode flag added to trap type */ 97 98 static const char *trap_type_mnemonic[] = { 99 "de", "db", "2", "bp", 100 "of", "br", "ud", "nm", 101 "df", "9", "ts", "np", 102 "ss", "gp", "pf", "15", 103 "mf", "ac", "mc", "xf" 104 }; 105 106 static const char *trap_type[] = { 107 "Divide error", /* trap id 0 */ 108 "Debug", /* trap id 1 */ 109 "NMI interrupt", /* trap id 2 */ 110 "Breakpoint", /* trap id 3 */ 111 "Overflow", /* trap id 4 */ 112 "BOUND range exceeded", /* trap id 5 */ 113 "Invalid opcode", /* trap id 6 */ 114 "Device not available", /* trap id 7 */ 115 "Double fault", /* trap id 8 */ 116 "Coprocessor segment overrun", /* trap id 9 */ 117 "Invalid TSS", /* trap id 10 */ 118 "Segment not present", /* trap id 11 */ 119 "Stack segment fault", /* trap id 12 */ 120 "General protection", /* trap id 13 */ 121 "Page fault", /* trap id 14 */ 122 "Reserved", /* trap id 15 */ 123 "x87 floating point error", /* trap id 16 */ 124 "Alignment check", /* trap id 17 */ 125 "Machine check", /* trap id 18 */ 126 "SIMD floating point exception", /* trap id 19 */ 127 }; 128 129 #define TRAP_TYPES (sizeof (trap_type) / sizeof (trap_type[0])) 130 131 int tudebug = 0; 132 int tudebugbpt = 0; 133 int tudebugfpe = 0; 134 int tudebugsse = 0; 135 136 #if defined(TRAPDEBUG) || defined(lint) 137 int tdebug = 0; 138 int lodebug = 0; 139 int faultdebug = 0; 140 #else 141 #define tdebug 0 142 #define lodebug 0 143 #define faultdebug 0 144 #endif /* defined(TRAPDEBUG) || defined(lint) */ 145 146 #if defined(TRAPTRACE) 147 static void dump_ttrace(void); 148 #endif /* TRAPTRACE */ 149 static void dumpregs(struct regs *); 150 static void showregs(uint_t, struct regs *, caddr_t); 151 static void dump_tss(void); 152 static int kern_gpfault(struct regs *); 153 154 struct trap_info { 155 struct regs *trap_regs; 156 uint_t trap_type; 157 caddr_t trap_addr; 158 }; 159 160 /*ARGSUSED*/ 161 static int 162 die(uint_t type, struct regs *rp, caddr_t addr, processorid_t cpuid) 163 { 164 struct trap_info ti; 165 const char *trap_name, *trap_mnemonic; 166 167 if (type < TRAP_TYPES) { 168 trap_name = trap_type[type]; 169 trap_mnemonic = trap_type_mnemonic[type]; 170 } else { 171 trap_name = "trap"; 172 trap_mnemonic = "-"; 173 } 174 175 #ifdef TRAPTRACE 176 TRAPTRACE_FREEZE; 177 #endif 178 179 ti.trap_regs = rp; 180 ti.trap_type = type & ~USER; 181 ti.trap_addr = addr; 182 183 curthread->t_panic_trap = &ti; 184 185 if (type == T_PGFLT && addr < (caddr_t)KERNELBASE) { 186 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p " 187 "occurred in module \"%s\" due to %s", 188 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr, 189 mod_containing_pc((caddr_t)rp->r_pc), 190 addr < (caddr_t)PAGESIZE ? 191 "a NULL pointer dereference" : 192 "an illegal access to a user address"); 193 } else 194 panic("BAD TRAP: type=%x (#%s %s) rp=%p addr=%p", 195 type, trap_mnemonic, trap_name, (void *)rp, (void *)addr); 196 return (0); 197 } 198 199 /* 200 * Rewrite the instruction at pc to be an int $T_SYSCALLINT instruction. 201 * 202 * int <vector> is two bytes: 0xCD <vector> 203 */ 204 205 #define SLOW_SCALL_SIZE 2 206 207 static int 208 rewrite_syscall(caddr_t pc) 209 { 210 uchar_t instr[SLOW_SCALL_SIZE] = { 0xCD, T_SYSCALLINT }; 211 212 if (uwrite(curthread->t_procp, instr, SLOW_SCALL_SIZE, 213 (uintptr_t)pc) != 0) 214 return (1); 215 216 return (0); 217 } 218 219 /* 220 * Test to see if the instruction at pc is sysenter or syscall. The second 221 * argument should be the x86 feature flag corresponding to the expected 222 * instruction. 223 * 224 * sysenter is two bytes: 0x0F 0x34 225 * syscall is two bytes: 0x0F 0x05 226 */ 227 228 #define FAST_SCALL_SIZE 2 229 230 static int 231 instr_is_fast_syscall(caddr_t pc, int which) 232 { 233 uchar_t instr[FAST_SCALL_SIZE]; 234 235 ASSERT(which == X86_SEP || which == X86_ASYSC); 236 237 if (copyin_nowatch(pc, (caddr_t)instr, FAST_SCALL_SIZE) != 0 || 238 instr[0] != 0x0F) 239 return (0); 240 241 if ((which == X86_SEP && instr[1] == 0x34) || 242 (which == X86_ASYSC && instr[1] == 0x05)) 243 return (1); 244 245 return (0); 246 } 247 248 /* 249 * Test to see if the instruction at pc is a system call instruction. 250 * 251 * The bytes of an lcall instruction used for the syscall trap. 252 * static uchar_t lcall[7] = { 0x9a, 0, 0, 0, 0, 0x7, 0 }; 253 * static uchar_t lcallalt[7] = { 0x9a, 0, 0, 0, 0, 0x27, 0 }; 254 */ 255 256 #define LCALLSIZE 7 257 258 static int 259 instr_is_syscall(caddr_t pc) 260 { 261 uchar_t instr[LCALLSIZE]; 262 263 if (copyin_nowatch(pc, (caddr_t)instr, LCALLSIZE) == 0 && 264 instr[0] == 0x9a && 265 instr[1] == 0 && 266 instr[2] == 0 && 267 instr[3] == 0 && 268 instr[4] == 0 && 269 (instr[5] == 0x7 || instr[5] == 0x27) && 270 instr[6] == 0) 271 return (1); 272 273 return (0); 274 } 275 276 #ifdef __amd64 277 278 /* 279 * In the first revisions of AMD64 CPUs produced by AMD, the LAHF and 280 * SAHF instructions were not implemented in 64bit mode. Later revisions 281 * did implement these instructions. An extension to the cpuid instruction 282 * was added to check for the capability of executing these instructions 283 * in 64bit mode. 284 * 285 * Intel originally did not implement these instructions in EM64T either, 286 * but added them in later revisions. 287 * 288 * So, there are different chip revisions by both vendors out there that 289 * may or may not implement these instructions. The easy solution is to 290 * just always emulate these instructions on demand. 291 * 292 * SAHF == store %ah in the lower 8 bits of %rflags (opcode 0x9e) 293 * LAHF == load the lower 8 bits of %rflags into %ah (opcode 0x9f) 294 */ 295 296 #define LSAHFSIZE 1 297 298 static int 299 instr_is_lsahf(caddr_t pc, uchar_t *instr) 300 { 301 if (copyin_nowatch(pc, (caddr_t)instr, LSAHFSIZE) == 0 && 302 (*instr == 0x9e || *instr == 0x9f)) 303 return (1); 304 return (0); 305 } 306 307 /* 308 * Emulate the LAHF and SAHF instructions. The reference manuals define 309 * these instructions to always load/store bit 1 as a 1, and bits 3 and 5 310 * as a 0. The other, defined, bits are copied (the PS_ICC bits and PS_P). 311 * 312 * Note that %ah is bits 8-15 of %rax. 313 */ 314 static void 315 emulate_lsahf(struct regs *rp, uchar_t instr) 316 { 317 if (instr == 0x9e) { 318 /* sahf. Copy bits from %ah to flags. */ 319 rp->r_ps = (rp->r_ps & ~0xff) | 320 ((rp->r_rax >> 8) & PSL_LSAHFMASK) | PS_MB1; 321 } else { 322 /* lahf. Copy bits from flags to %ah. */ 323 rp->r_rax = (rp->r_rax & ~0xff00) | 324 (((rp->r_ps & PSL_LSAHFMASK) | PS_MB1) << 8); 325 } 326 rp->r_pc += LSAHFSIZE; 327 } 328 #endif /* __amd64 */ 329 330 #ifdef OPTERON_ERRATUM_91 331 332 /* 333 * Test to see if the instruction at pc is a prefetch instruction. 334 * 335 * The first byte of prefetch instructions is always 0x0F. 336 * The second byte is 0x18 for regular prefetch or 0x0D for AMD 3dnow prefetch. 337 * The third byte is between 0 and 3 inclusive. 338 */ 339 340 #define PREFETCHSIZE 3 341 342 static int 343 cmp_to_prefetch(uchar_t *p) 344 { 345 if (*p == 0x0F && (*(p+1) == 0x18 || *(p+1) == 0x0D) && *(p+2) <= 3) 346 return (1); 347 return (0); 348 } 349 350 static int 351 instr_is_prefetch(caddr_t pc) 352 { 353 uchar_t instr[PREFETCHSIZE]; 354 int error; 355 356 error = copyin_nowatch(pc, (caddr_t)instr, PREFETCHSIZE); 357 358 if (error == 0 && cmp_to_prefetch(instr)) 359 return (1); 360 return (0); 361 } 362 363 #endif /* OPTERON_ERRATUM_91 */ 364 365 /* 366 * Called from the trap handler when a processor trap occurs. 367 * 368 * Note: All user-level traps that might call stop() must exit 369 * trap() by 'goto out' or by falling through. 370 */ 371 void 372 trap(struct regs *rp, caddr_t addr, processorid_t cpuid) 373 { 374 kthread_t *cur_thread = curthread; 375 enum seg_rw rw; 376 unsigned type; 377 proc_t *p = ttoproc(cur_thread); 378 klwp_t *lwp = ttolwp(cur_thread); 379 uintptr_t lofault; 380 faultcode_t pagefault(), res, errcode; 381 enum fault_type fault_type; 382 k_siginfo_t siginfo; 383 uint_t fault = 0; 384 int mstate; 385 int sicode = 0; 386 int watchcode; 387 int watchpage; 388 caddr_t vaddr; 389 size_t sz; 390 int ta; 391 #ifdef __amd64 392 uchar_t instr; 393 #endif 394 395 ASSERT_STACK_ALIGNED(); 396 397 type = rp->r_trapno; 398 CPU_STATS_ADDQ(CPU, sys, trap, 1); 399 400 ASSERT(cur_thread->t_schedflag & TS_DONT_SWAP); 401 402 if (type == T_PGFLT) { 403 404 errcode = rp->r_err; 405 if (errcode & PF_ERR_WRITE) 406 rw = S_WRITE; 407 else if ((caddr_t)rp->r_pc == addr || 408 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC))) 409 rw = S_EXEC; 410 else 411 rw = S_READ; 412 413 #if defined(__i386) 414 /* 415 * Pentium Pro work-around 416 */ 417 if ((errcode & PF_ERR_PROT) && pentiumpro_bug4046376) { 418 uint_t attr; 419 uint_t priv_violation; 420 uint_t access_violation; 421 422 if (hat_getattr(addr < (caddr_t)kernelbase ? 423 curproc->p_as->a_hat : kas.a_hat, addr, &attr) 424 == -1) { 425 errcode &= ~PF_ERR_PROT; 426 } else { 427 priv_violation = (errcode & PF_ERR_USER) && 428 !(attr & PROT_USER); 429 access_violation = (errcode & PF_ERR_WRITE) && 430 !(attr & PROT_WRITE); 431 if (!priv_violation && !access_violation) 432 goto cleanup; 433 } 434 } 435 #endif /* __i386 */ 436 437 } 438 439 if (tdebug) 440 showregs(type, rp, addr); 441 442 if (USERMODE(rp->r_cs)) { 443 /* 444 * Set up the current cred to use during this trap. u_cred 445 * no longer exists. t_cred is used instead. 446 * The current process credential applies to the thread for 447 * the entire trap. If trapping from the kernel, this 448 * should already be set up. 449 */ 450 if (cur_thread->t_cred != p->p_cred) { 451 cred_t *oldcred = cur_thread->t_cred; 452 /* 453 * DTrace accesses t_cred in probe context. t_cred 454 * must always be either NULL, or point to a valid, 455 * allocated cred structure. 456 */ 457 cur_thread->t_cred = crgetcred(); 458 crfree(oldcred); 459 } 460 ASSERT(lwp != NULL); 461 type |= USER; 462 ASSERT(lwptoregs(lwp) == rp); 463 lwp->lwp_state = LWP_SYS; 464 465 switch (type) { 466 case T_PGFLT + USER: 467 if ((caddr_t)rp->r_pc == addr) 468 mstate = LMS_TFAULT; 469 else 470 mstate = LMS_DFAULT; 471 break; 472 default: 473 mstate = LMS_TRAP; 474 break; 475 } 476 /* Kernel probe */ 477 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */, 478 tnf_microstate, state, mstate); 479 mstate = new_mstate(cur_thread, mstate); 480 481 bzero(&siginfo, sizeof (siginfo)); 482 } 483 484 switch (type) { 485 case T_PGFLT + USER: 486 case T_SGLSTP: 487 case T_SGLSTP + USER: 488 case T_BPTFLT + USER: 489 break; 490 491 default: 492 FTRACE_2("trap(): type=0x%lx, regs=0x%lx", 493 (ulong_t)type, (ulong_t)rp); 494 break; 495 } 496 497 switch (type) { 498 default: 499 if (type & USER) { 500 if (tudebug) 501 showregs(type, rp, (caddr_t)0); 502 printf("trap: Unknown trap type %d in user mode\n", 503 type & ~USER); 504 siginfo.si_signo = SIGILL; 505 siginfo.si_code = ILL_ILLTRP; 506 siginfo.si_addr = (caddr_t)rp->r_pc; 507 siginfo.si_trapno = type & ~USER; 508 fault = FLTILL; 509 break; 510 } else { 511 (void) die(type, rp, addr, cpuid); 512 /*NOTREACHED*/ 513 } 514 515 case T_PGFLT: /* system page fault */ 516 /* 517 * If we're under on_trap() protection (see <sys/ontrap.h>), 518 * set ot_trap and longjmp back to the on_trap() call site. 519 */ 520 if ((cur_thread->t_ontrap != NULL) && 521 (cur_thread->t_ontrap->ot_prot & OT_DATA_ACCESS)) { 522 curthread->t_ontrap->ot_trap |= OT_DATA_ACCESS; 523 longjmp(&curthread->t_ontrap->ot_jmpbuf); 524 } 525 526 /* 527 * See if we can handle as pagefault. Save lofault 528 * across this. Here we assume that an address 529 * less than KERNELBASE is a user fault. 530 * We can do this as copy.s routines verify that the 531 * starting address is less than KERNELBASE before 532 * starting and because we know that we always have 533 * KERNELBASE mapped as invalid to serve as a "barrier". 534 */ 535 lofault = cur_thread->t_lofault; 536 cur_thread->t_lofault = 0; 537 538 mstate = new_mstate(cur_thread, LMS_KFAULT); 539 540 if (addr < (caddr_t)kernelbase) { 541 res = pagefault(addr, 542 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 0); 543 if (res == FC_NOMAP && 544 addr < p->p_usrstack && 545 grow(addr)) 546 res = 0; 547 } else { 548 res = pagefault(addr, 549 (errcode & PF_ERR_PROT)? F_PROT: F_INVAL, rw, 1); 550 } 551 (void) new_mstate(cur_thread, mstate); 552 553 /* 554 * Restore lofault. If we resolved the fault, exit. 555 * If we didn't and lofault wasn't set, die. 556 */ 557 cur_thread->t_lofault = lofault; 558 if (res == 0) 559 goto cleanup; 560 561 #if defined(OPTERON_ERRATUM_93) && defined(_LP64) 562 if (lofault == 0 && opteron_erratum_93) { 563 /* 564 * Workaround for Opteron Erratum 93. On return from 565 * a System Managment Interrupt at a HLT instruction 566 * the %rip might be truncated to a 32 bit value. 567 * BIOS is supposed to fix this, but some don't. 568 * If this occurs we simply restore the high order bits. 569 * The HLT instruction is 1 byte of 0xf4. 570 */ 571 uintptr_t rip = rp->r_pc; 572 573 if ((rip & 0xfffffffful) == rip) { 574 rip |= 0xfffffffful << 32; 575 if (hat_getpfnum(kas.a_hat, (caddr_t)rip) != 576 PFN_INVALID && 577 (*(uchar_t *)rip == 0xf4 || 578 *(uchar_t *)(rip - 1) == 0xf4)) { 579 rp->r_pc = rip; 580 goto cleanup; 581 } 582 } 583 } 584 #endif /* OPTERON_ERRATUM_93 && _LP64 */ 585 586 #ifdef OPTERON_ERRATUM_91 587 if (lofault == 0 && opteron_erratum_91) { 588 /* 589 * Workaround for Opteron Erratum 91. Prefetches may 590 * generate a page fault (they're not supposed to do 591 * that!). If this occurs we simply return back to the 592 * instruction. 593 */ 594 caddr_t pc = (caddr_t)rp->r_pc; 595 596 /* 597 * If the faulting PC is not mapped, this is a 598 * legitimate kernel page fault that must result in a 599 * panic. If the faulting PC is mapped, it could contain 600 * a prefetch instruction. Check for that here. 601 */ 602 if (hat_getpfnum(kas.a_hat, pc) != PFN_INVALID) { 603 if (cmp_to_prefetch((uchar_t *)pc)) { 604 #ifdef DEBUG 605 cmn_err(CE_WARN, "Opteron erratum 91 " 606 "occurred: kernel prefetch" 607 " at %p generated a page fault!", 608 (void *)rp->r_pc); 609 #endif /* DEBUG */ 610 goto cleanup; 611 } 612 } 613 (void) die(type, rp, addr, cpuid); 614 } 615 #endif /* OPTERON_ERRATUM_91 */ 616 617 if (lofault == 0) 618 (void) die(type, rp, addr, cpuid); 619 620 /* 621 * Cannot resolve fault. Return to lofault. 622 */ 623 if (lodebug) { 624 showregs(type, rp, addr); 625 traceregs(rp); 626 } 627 if (FC_CODE(res) == FC_OBJERR) 628 res = FC_ERRNO(res); 629 else 630 res = EFAULT; 631 rp->r_r0 = res; 632 rp->r_pc = cur_thread->t_lofault; 633 goto cleanup; 634 635 case T_PGFLT + USER: /* user page fault */ 636 if (faultdebug) { 637 char *fault_str; 638 639 switch (rw) { 640 case S_READ: 641 fault_str = "read"; 642 break; 643 case S_WRITE: 644 fault_str = "write"; 645 break; 646 case S_EXEC: 647 fault_str = "exec"; 648 break; 649 default: 650 fault_str = ""; 651 break; 652 } 653 printf("user %s fault: addr=0x%lx errcode=0x%x\n", 654 fault_str, (uintptr_t)addr, errcode); 655 } 656 657 #if defined(OPTERON_ERRATUM_100) && defined(_LP64) 658 /* 659 * Workaround for AMD erratum 100 660 * 661 * A 32-bit process may receive a page fault on a non 662 * 32-bit address by mistake. The range of the faulting 663 * address will be 664 * 665 * 0xffffffff80000000 .. 0xffffffffffffffff or 666 * 0x0000000100000000 .. 0x000000017fffffff 667 * 668 * The fault is always due to an instruction fetch, however 669 * the value of r_pc should be correct (in 32 bit range), 670 * so we ignore the page fault on the bogus address. 671 */ 672 if (p->p_model == DATAMODEL_ILP32 && 673 (0xffffffff80000000 <= (uintptr_t)addr || 674 (0x100000000 <= (uintptr_t)addr && 675 (uintptr_t)addr <= 0x17fffffff))) { 676 if (!opteron_erratum_100) 677 panic("unexpected erratum #100"); 678 if (rp->r_pc <= 0xffffffff) 679 goto out; 680 } 681 #endif /* OPTERON_ERRATUM_100 && _LP64 */ 682 683 ASSERT(!(curthread->t_flag & T_WATCHPT)); 684 watchpage = (pr_watch_active(p) && pr_is_watchpage(addr, rw)); 685 #ifdef __i386 686 /* 687 * In 32-bit mode, the lcall (system call) instruction fetches 688 * one word from the stack, at the stack pointer, because of the 689 * way the call gate is constructed. This is a bogus 690 * read and should not be counted as a read watchpoint. 691 * We work around the problem here by testing to see if 692 * this situation applies and, if so, simply jumping to 693 * the code in locore.s that fields the system call trap. 694 * The registers on the stack are already set up properly 695 * due to the match between the call gate sequence and the 696 * trap gate sequence. We just have to adjust the pc. 697 */ 698 if (watchpage && addr == (caddr_t)rp->r_sp && 699 rw == S_READ && instr_is_syscall((caddr_t)rp->r_pc)) { 700 extern void watch_syscall(void); 701 702 rp->r_pc += LCALLSIZE; 703 watch_syscall(); /* never returns */ 704 /* NOTREACHED */ 705 } 706 #endif /* __i386 */ 707 vaddr = addr; 708 if (!watchpage || (sz = instr_size(rp, &vaddr, rw)) <= 0) 709 fault_type = (errcode & PF_ERR_PROT)? F_PROT: F_INVAL; 710 else if ((watchcode = pr_is_watchpoint(&vaddr, &ta, 711 sz, NULL, rw)) != 0) { 712 if (ta) { 713 do_watch_step(vaddr, sz, rw, 714 watchcode, rp->r_pc); 715 fault_type = F_INVAL; 716 } else { 717 bzero(&siginfo, sizeof (siginfo)); 718 siginfo.si_signo = SIGTRAP; 719 siginfo.si_code = watchcode; 720 siginfo.si_addr = vaddr; 721 siginfo.si_trapafter = 0; 722 siginfo.si_pc = (caddr_t)rp->r_pc; 723 fault = FLTWATCH; 724 break; 725 } 726 } else { 727 /* XXX pr_watch_emul() never succeeds (for now) */ 728 if (rw != S_EXEC && pr_watch_emul(rp, vaddr, rw)) 729 goto out; 730 do_watch_step(vaddr, sz, rw, 0, 0); 731 fault_type = F_INVAL; 732 } 733 734 res = pagefault(addr, fault_type, rw, 0); 735 736 /* 737 * If pagefault() succeeded, ok. 738 * Otherwise attempt to grow the stack. 739 */ 740 if (res == 0 || 741 (res == FC_NOMAP && 742 addr < p->p_usrstack && 743 grow(addr))) { 744 lwp->lwp_lastfault = FLTPAGE; 745 lwp->lwp_lastfaddr = addr; 746 if (prismember(&p->p_fltmask, FLTPAGE)) { 747 bzero(&siginfo, sizeof (siginfo)); 748 siginfo.si_addr = addr; 749 (void) stop_on_fault(FLTPAGE, &siginfo); 750 } 751 goto out; 752 } else if (res == FC_PROT && addr < p->p_usrstack && 753 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC))) { 754 report_stack_exec(p, addr); 755 } 756 757 #ifdef OPTERON_ERRATUM_91 758 /* 759 * Workaround for Opteron Erratum 91. Prefetches may generate a 760 * page fault (they're not supposed to do that!). If this 761 * occurs we simply return back to the instruction. 762 * 763 * We rely on copyin to properly fault in the page with r_pc. 764 */ 765 if (opteron_erratum_91 && 766 addr != (caddr_t)rp->r_pc && 767 instr_is_prefetch((caddr_t)rp->r_pc)) { 768 #ifdef DEBUG 769 cmn_err(CE_WARN, "Opteron erratum 91 occurred: " 770 "prefetch at %p in pid %d generated a trap!", 771 (void *)rp->r_pc, p->p_pid); 772 #endif /* DEBUG */ 773 goto out; 774 } 775 #endif /* OPTERON_ERRATUM_91 */ 776 777 if (tudebug) 778 showregs(type, rp, addr); 779 /* 780 * In the case where both pagefault and grow fail, 781 * set the code to the value provided by pagefault. 782 * We map all errors returned from pagefault() to SIGSEGV. 783 */ 784 bzero(&siginfo, sizeof (siginfo)); 785 siginfo.si_addr = addr; 786 switch (FC_CODE(res)) { 787 case FC_HWERR: 788 case FC_NOSUPPORT: 789 siginfo.si_signo = SIGBUS; 790 siginfo.si_code = BUS_ADRERR; 791 fault = FLTACCESS; 792 break; 793 case FC_ALIGN: 794 siginfo.si_signo = SIGBUS; 795 siginfo.si_code = BUS_ADRALN; 796 fault = FLTACCESS; 797 break; 798 case FC_OBJERR: 799 if ((siginfo.si_errno = FC_ERRNO(res)) != EINTR) { 800 siginfo.si_signo = SIGBUS; 801 siginfo.si_code = BUS_OBJERR; 802 fault = FLTACCESS; 803 } 804 break; 805 default: /* FC_NOMAP or FC_PROT */ 806 siginfo.si_signo = SIGSEGV; 807 siginfo.si_code = 808 (res == FC_NOMAP)? SEGV_MAPERR : SEGV_ACCERR; 809 fault = FLTBOUNDS; 810 break; 811 } 812 break; 813 814 case T_ILLINST + USER: /* invalid opcode fault */ 815 /* 816 * If the syscall instruction is disabled due to LDT usage, a 817 * user program that attempts to execute it will trigger a #ud 818 * trap. Check for that case here. If this occurs on a CPU which 819 * doesn't even support syscall, the result of all of this will 820 * be to emulate that particular instruction. 821 */ 822 if (p->p_ldt != NULL && 823 instr_is_fast_syscall((caddr_t)rp->r_pc, X86_ASYSC)) { 824 if (rewrite_syscall((caddr_t)rp->r_pc) == 0) 825 goto out; 826 #ifdef DEBUG 827 else 828 cmn_err(CE_WARN, "failed to rewrite syscall " 829 "instruction in process %d", 830 curthread->t_procp->p_pid); 831 #endif /* DEBUG */ 832 } 833 834 #ifdef __amd64 835 /* 836 * Emulate the LAHF and SAHF instructions if needed. 837 * See the instr_is_lsahf function for details. 838 */ 839 if (p->p_model == DATAMODEL_LP64 && 840 instr_is_lsahf((caddr_t)rp->r_pc, &instr)) { 841 emulate_lsahf(rp, instr); 842 goto out; 843 } 844 #endif 845 846 /*FALLTHROUGH*/ 847 848 if (tudebug) 849 showregs(type, rp, (caddr_t)0); 850 siginfo.si_signo = SIGILL; 851 siginfo.si_code = ILL_ILLOPC; 852 siginfo.si_addr = (caddr_t)rp->r_pc; 853 fault = FLTILL; 854 break; 855 856 case T_ZERODIV + USER: /* integer divide by zero */ 857 if (tudebug && tudebugfpe) 858 showregs(type, rp, (caddr_t)0); 859 siginfo.si_signo = SIGFPE; 860 siginfo.si_code = FPE_INTDIV; 861 siginfo.si_addr = (caddr_t)rp->r_pc; 862 fault = FLTIZDIV; 863 break; 864 865 case T_OVFLW + USER: /* integer overflow */ 866 if (tudebug && tudebugfpe) 867 showregs(type, rp, (caddr_t)0); 868 siginfo.si_signo = SIGFPE; 869 siginfo.si_code = FPE_INTOVF; 870 siginfo.si_addr = (caddr_t)rp->r_pc; 871 fault = FLTIOVF; 872 break; 873 874 case T_NOEXTFLT + USER: /* math coprocessor not available */ 875 if (tudebug && tudebugfpe) 876 showregs(type, rp, addr); 877 if (fpnoextflt(rp)) { 878 siginfo.si_signo = SIGFPE; 879 siginfo.si_code = ILL_ILLOPC; 880 siginfo.si_addr = (caddr_t)rp->r_pc; 881 fault = FLTFPE; 882 } 883 break; 884 885 case T_EXTOVRFLT: /* extension overrun fault */ 886 /* check if we took a kernel trap on behalf of user */ 887 { 888 extern void ndptrap_frstor(void); 889 if (rp->r_pc != (uintptr_t)ndptrap_frstor) 890 (void) die(type, rp, addr, cpuid); 891 type |= USER; 892 } 893 /*FALLTHROUGH*/ 894 case T_EXTOVRFLT + USER: /* extension overrun fault */ 895 if (tudebug && tudebugfpe) 896 showregs(type, rp, addr); 897 if (fpextovrflt(rp)) { 898 siginfo.si_signo = SIGSEGV; 899 siginfo.si_code = SEGV_MAPERR; 900 siginfo.si_addr = (caddr_t)rp->r_pc; 901 fault = FLTBOUNDS; 902 } 903 break; 904 905 case T_EXTERRFLT: /* x87 floating point exception pending */ 906 /* check if we took a kernel trap on behalf of user */ 907 { 908 extern void ndptrap_frstor(void); 909 if (rp->r_pc != (uintptr_t)ndptrap_frstor) 910 (void) die(type, rp, addr, cpuid); 911 type |= USER; 912 } 913 /*FALLTHROUGH*/ 914 915 case T_EXTERRFLT + USER: /* x87 floating point exception pending */ 916 if (tudebug && tudebugfpe) 917 showregs(type, rp, addr); 918 if (sicode = fpexterrflt(rp)) { 919 siginfo.si_signo = SIGFPE; 920 siginfo.si_code = sicode; 921 siginfo.si_addr = (caddr_t)rp->r_pc; 922 fault = FLTFPE; 923 } 924 break; 925 926 case T_SIMDFPE + USER: /* SSE and SSE2 exceptions */ 927 if (tudebug && tudebugsse) 928 showregs(type, rp, addr); 929 if ((x86_feature & (X86_SSE|X86_SSE2)) == 0) { 930 /* 931 * There are rumours that some user instructions 932 * on older CPUs can cause this trap to occur; in 933 * which case send a SIGILL instead of a SIGFPE. 934 */ 935 siginfo.si_signo = SIGILL; 936 siginfo.si_code = ILL_ILLTRP; 937 siginfo.si_addr = (caddr_t)rp->r_pc; 938 siginfo.si_trapno = type & ~USER; 939 fault = FLTILL; 940 } else if ((sicode = fpsimderrflt(rp)) != 0) { 941 siginfo.si_signo = SIGFPE; 942 siginfo.si_code = sicode; 943 siginfo.si_addr = (caddr_t)rp->r_pc; 944 fault = FLTFPE; 945 } 946 break; 947 948 case T_BPTFLT: /* breakpoint trap */ 949 /* 950 * Kernel breakpoint traps should only happen when kmdb is 951 * active, and even then, it'll have interposed on the IDT, so 952 * control won't get here. If it does, we've hit a breakpoint 953 * without the debugger, which is very strange, and very 954 * fatal. 955 */ 956 if (tudebug && tudebugbpt) 957 showregs(type, rp, (caddr_t)0); 958 959 (void) die(type, rp, addr, cpuid); 960 break; 961 962 case T_SGLSTP: /* single step/hw breakpoint exception */ 963 if (tudebug && tudebugbpt) 964 showregs(type, rp, (caddr_t)0); 965 966 /* Now evaluate how we got here */ 967 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) { 968 /* 969 * i386 single-steps even through lcalls which 970 * change the privilege level. So we take a trap at 971 * the first instruction in privileged mode. 972 * 973 * Set a flag to indicate that upon completion of 974 * the system call, deal with the single-step trap. 975 * 976 * The same thing happens for sysenter, too. 977 */ 978 #if defined(__amd64) 979 if (rp->r_pc == (uintptr_t)sys_sysenter) { 980 /* 981 * Adjust the pc so that we don't execute the 982 * swapgs instruction at the head of the 983 * handler and completely confuse things. 984 */ 985 rp->r_pc = (uintptr_t) 986 _sys_sysenter_post_swapgs; 987 #elif defined(__i386) 988 if (rp->r_pc == (uintptr_t)sys_call || 989 rp->r_pc == (uintptr_t)sys_sysenter) { 990 #endif 991 rp->r_ps &= ~PS_T; /* turn off trace */ 992 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; 993 cur_thread->t_post_sys = 1; 994 aston(curthread); 995 goto cleanup; 996 } 997 } 998 /* XXX - needs review on debugger interface? */ 999 if (boothowto & RB_DEBUG) 1000 debug_enter((char *)NULL); 1001 else 1002 (void) die(type, rp, addr, cpuid); 1003 break; 1004 1005 case T_NMIFLT: /* NMI interrupt */ 1006 printf("Unexpected NMI in system mode\n"); 1007 goto cleanup; 1008 1009 case T_NMIFLT + USER: /* NMI interrupt */ 1010 printf("Unexpected NMI in user mode\n"); 1011 break; 1012 1013 case T_GPFLT: /* general protection violation */ 1014 #if defined(__amd64) 1015 /* 1016 * On amd64, we can get a #gp from referencing addresses 1017 * in the virtual address hole e.g. from a copyin. 1018 */ 1019 1020 /* 1021 * If we're under on_trap() protection (see <sys/ontrap.h>), 1022 * set ot_trap and longjmp back to the on_trap() call site. 1023 */ 1024 if ((cur_thread->t_ontrap != NULL) && 1025 (cur_thread->t_ontrap->ot_prot & OT_DATA_ACCESS)) { 1026 curthread->t_ontrap->ot_trap |= OT_DATA_ACCESS; 1027 longjmp(&curthread->t_ontrap->ot_jmpbuf); 1028 } 1029 1030 /* 1031 * If we're under lofault protection (copyin etc.), 1032 * longjmp back to lofault with an EFAULT. 1033 */ 1034 if (cur_thread->t_lofault) { 1035 /* 1036 * Fault is not resolvable, so just return to lofault 1037 */ 1038 if (lodebug) { 1039 showregs(type, rp, addr); 1040 traceregs(rp); 1041 } 1042 rp->r_r0 = EFAULT; 1043 rp->r_pc = cur_thread->t_lofault; 1044 goto cleanup; 1045 } 1046 /*FALLTHROUGH*/ 1047 #endif 1048 case T_STKFLT: /* stack fault */ 1049 case T_TSSFLT: /* invalid TSS fault */ 1050 case T_SEGFLT: /* segment not present fault */ 1051 if (tudebug) 1052 showregs(type, rp, (caddr_t)0); 1053 if (kern_gpfault(rp)) 1054 (void) die(type, rp, addr, cpuid); 1055 goto cleanup; 1056 /*FALLTHROUGH*/ 1057 1058 /* 1059 * ONLY 32-bit PROCESSES can USE a PRIVATE LDT! 64-bit apps should have 1060 * no legacy need for them, so we put a stop to it here. 1061 * 1062 * So: not-present fault is ONLY valid for 32-bit processes with a private LDT 1063 * trying to do a system call. Emulate it. 1064 * 1065 * #gp fault is ONLY valid for 32-bit processes also, which DO NOT have private 1066 * LDT, and are trying to do a system call. Emulate it. 1067 */ 1068 case T_SEGFLT + USER: /* segment not present fault */ 1069 case T_GPFLT + USER: /* general protection violation */ 1070 #ifdef _SYSCALL32_IMPL 1071 if (p->p_model != DATAMODEL_NATIVE) { 1072 #endif /* _SYSCALL32_IMPL */ 1073 if (instr_is_syscall((caddr_t)rp->r_pc)) { 1074 if (type == T_SEGFLT + USER) 1075 ASSERT(p->p_ldt != NULL); 1076 1077 if ((p->p_ldt == NULL && type == T_GPFLT + USER) || 1078 type == T_SEGFLT + USER) { 1079 1080 /* 1081 * The user attempted a system call via the obsolete 1082 * call gate mechanism. Because the process doesn't have 1083 * an LDT (i.e. the ldtr contains 0), a #gp results. 1084 * Emulate the syscall here, just as we do above for a 1085 * #np trap. 1086 */ 1087 1088 /* 1089 * Since this is a not-present trap, rp->r_pc points to 1090 * the trapping lcall instruction. We need to bump it 1091 * to the next insn so the app can continue on. 1092 */ 1093 rp->r_pc += LCALLSIZE; 1094 lwp->lwp_regs = rp; 1095 1096 /* 1097 * Normally the microstate of the LWP is forced back to 1098 * LMS_USER by the syscall handlers. Emulate that 1099 * behavior here. 1100 */ 1101 mstate = LMS_USER; 1102 1103 dosyscall(); 1104 goto out; 1105 } 1106 } 1107 #ifdef _SYSCALL32_IMPL 1108 } 1109 #endif /* _SYSCALL32_IMPL */ 1110 /* 1111 * If the current process is using a private LDT and the 1112 * trapping instruction is sysenter, the sysenter instruction 1113 * has been disabled on the CPU because it destroys segment 1114 * registers. If this is the case, rewrite the instruction to 1115 * be a safe system call and retry it. If this occurs on a CPU 1116 * which doesn't even support sysenter, the result of all of 1117 * this will be to emulate that particular instruction. 1118 */ 1119 if (p->p_ldt != NULL && 1120 instr_is_fast_syscall((caddr_t)rp->r_pc, X86_SEP)) { 1121 if (rewrite_syscall((caddr_t)rp->r_pc) == 0) 1122 goto out; 1123 #ifdef DEBUG 1124 else 1125 cmn_err(CE_WARN, "failed to rewrite sysenter " 1126 "instruction in process %d", 1127 curthread->t_procp->p_pid); 1128 #endif /* DEBUG */ 1129 } 1130 /*FALLTHROUGH*/ 1131 1132 case T_BOUNDFLT + USER: /* bound fault */ 1133 case T_STKFLT + USER: /* stack fault */ 1134 case T_TSSFLT + USER: /* invalid TSS fault */ 1135 if (tudebug) 1136 showregs(type, rp, (caddr_t)0); 1137 siginfo.si_signo = SIGSEGV; 1138 siginfo.si_code = SEGV_MAPERR; 1139 siginfo.si_addr = (caddr_t)rp->r_pc; 1140 fault = FLTBOUNDS; 1141 break; 1142 1143 case T_ALIGNMENT + USER: /* user alignment error (486) */ 1144 if (tudebug) 1145 showregs(type, rp, (caddr_t)0); 1146 bzero(&siginfo, sizeof (siginfo)); 1147 siginfo.si_signo = SIGBUS; 1148 siginfo.si_code = BUS_ADRALN; 1149 siginfo.si_addr = (caddr_t)rp->r_pc; 1150 fault = FLTACCESS; 1151 break; 1152 1153 case T_SGLSTP + USER: /* single step/hw breakpoint exception */ 1154 if (tudebug && tudebugbpt) 1155 showregs(type, rp, (caddr_t)0); 1156 1157 /* Was it single-stepping? */ 1158 if (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP) { 1159 pcb_t *pcb = &lwp->lwp_pcb; 1160 1161 rp->r_ps &= ~PS_T; 1162 /* 1163 * If both NORMAL_STEP and WATCH_STEP are in effect, 1164 * give precedence to NORMAL_STEP. If neither is set, 1165 * user must have set the PS_T bit in %efl; treat this 1166 * as NORMAL_STEP. 1167 */ 1168 if ((pcb->pcb_flags & NORMAL_STEP) || 1169 !(pcb->pcb_flags & WATCH_STEP)) { 1170 siginfo.si_signo = SIGTRAP; 1171 siginfo.si_code = TRAP_TRACE; 1172 siginfo.si_addr = (caddr_t)rp->r_pc; 1173 fault = FLTTRACE; 1174 if (pcb->pcb_flags & WATCH_STEP) 1175 (void) undo_watch_step(NULL); 1176 } else { 1177 fault = undo_watch_step(&siginfo); 1178 } 1179 pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP); 1180 } else { 1181 cmn_err(CE_WARN, 1182 "Unexpected INT 1 in user mode, dr6=%lx", 1183 lwp->lwp_pcb.pcb_drstat); 1184 } 1185 break; 1186 1187 case T_BPTFLT + USER: /* breakpoint trap */ 1188 if (tudebug && tudebugbpt) 1189 showregs(type, rp, (caddr_t)0); 1190 /* 1191 * int 3 (the breakpoint instruction) leaves the pc referring 1192 * to the address one byte after the breakpointed address. 1193 * If the P_PR_BPTADJ flag has been set via /proc, We adjust 1194 * it back so it refers to the breakpointed address. 1195 */ 1196 if (p->p_proc_flag & P_PR_BPTADJ) 1197 rp->r_pc--; 1198 siginfo.si_signo = SIGTRAP; 1199 siginfo.si_code = TRAP_BRKPT; 1200 siginfo.si_addr = (caddr_t)rp->r_pc; 1201 fault = FLTBPT; 1202 break; 1203 1204 case T_AST: 1205 /* 1206 * This occurs only after the cs register has been made to 1207 * look like a kernel selector, either through debugging or 1208 * possibly by functions like setcontext(). The thread is 1209 * about to cause a general protection fault at common_iret() 1210 * in locore. We let that happen immediately instead of 1211 * doing the T_AST processing. 1212 */ 1213 goto cleanup; 1214 1215 case T_AST + USER: /* profiling or resched pseudo trap */ 1216 if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW) { 1217 lwp->lwp_pcb.pcb_flags &= ~CPC_OVERFLOW; 1218 if (kcpc_overflow_ast()) { 1219 /* 1220 * Signal performance counter overflow 1221 */ 1222 if (tudebug) 1223 showregs(type, rp, (caddr_t)0); 1224 bzero(&siginfo, sizeof (siginfo)); 1225 siginfo.si_signo = SIGEMT; 1226 siginfo.si_code = EMT_CPCOVF; 1227 siginfo.si_addr = (caddr_t)rp->r_pc; 1228 fault = FLTCPCOVF; 1229 } 1230 } 1231 break; 1232 } 1233 1234 /* 1235 * We can't get here from a system trap 1236 */ 1237 ASSERT(type & USER); 1238 1239 if (fault) { 1240 /* 1241 * Remember the fault and fault adddress 1242 * for real-time (SIGPROF) profiling. 1243 */ 1244 lwp->lwp_lastfault = fault; 1245 lwp->lwp_lastfaddr = siginfo.si_addr; 1246 1247 DTRACE_PROC2(fault, int, fault, ksiginfo_t *, &siginfo); 1248 1249 /* 1250 * If a debugger has declared this fault to be an 1251 * event of interest, stop the lwp. Otherwise just 1252 * deliver the associated signal. 1253 */ 1254 if (siginfo.si_signo != SIGKILL && 1255 prismember(&p->p_fltmask, fault) && 1256 stop_on_fault(fault, &siginfo) == 0) 1257 siginfo.si_signo = 0; 1258 } 1259 1260 if (siginfo.si_signo) 1261 trapsig(&siginfo, (fault == FLTCPCOVF)? 0 : 1); 1262 1263 if (lwp->lwp_oweupc) 1264 profil_tick(rp->r_pc); 1265 1266 if (cur_thread->t_astflag | cur_thread->t_sig_check) { 1267 /* 1268 * Turn off the AST flag before checking all the conditions that 1269 * may have caused an AST. This flag is on whenever a signal or 1270 * unusual condition should be handled after the next trap or 1271 * syscall. 1272 */ 1273 astoff(cur_thread); 1274 /* 1275 * If a single-step trap occurred on a syscall (see above) 1276 * recognize it now. Do this before checking for signals 1277 * because deferred_singlestep_trap() may generate a SIGTRAP to 1278 * the LWP or may otherwise mark the LWP to call issig(FORREAL). 1279 */ 1280 if (lwp->lwp_pcb.pcb_flags & DEBUG_PENDING) 1281 deferred_singlestep_trap((caddr_t)rp->r_pc); 1282 1283 cur_thread->t_sig_check = 0; 1284 1285 mutex_enter(&p->p_lock); 1286 if (curthread->t_proc_flag & TP_CHANGEBIND) { 1287 timer_lwpbind(); 1288 curthread->t_proc_flag &= ~TP_CHANGEBIND; 1289 } 1290 mutex_exit(&p->p_lock); 1291 1292 /* 1293 * for kaio requests that are on the per-process poll queue, 1294 * aiop->aio_pollq, they're AIO_POLL bit is set, the kernel 1295 * should copyout their result_t to user memory. by copying 1296 * out the result_t, the user can poll on memory waiting 1297 * for the kaio request to complete. 1298 */ 1299 if (p->p_aio) 1300 aio_cleanup(0); 1301 /* 1302 * If this LWP was asked to hold, call holdlwp(), which will 1303 * stop. holdlwps() sets this up and calls pokelwps() which 1304 * sets the AST flag. 1305 * 1306 * Also check TP_EXITLWP, since this is used by fresh new LWPs 1307 * through lwp_rtt(). That flag is set if the lwp_create(2) 1308 * syscall failed after creating the LWP. 1309 */ 1310 if (ISHOLD(p)) 1311 holdlwp(); 1312 1313 /* 1314 * All code that sets signals and makes ISSIG evaluate true must 1315 * set t_astflag afterwards. 1316 */ 1317 if (ISSIG_PENDING(cur_thread, lwp, p)) { 1318 if (issig(FORREAL)) 1319 psig(); 1320 cur_thread->t_sig_check = 1; 1321 } 1322 1323 if (cur_thread->t_rprof != NULL) { 1324 realsigprof(0, 0); 1325 cur_thread->t_sig_check = 1; 1326 } 1327 1328 /* 1329 * /proc can't enable/disable the trace bit itself 1330 * because that could race with the call gate used by 1331 * system calls via "lcall". If that happened, an 1332 * invalid EFLAGS would result. prstep()/prnostep() 1333 * therefore schedule an AST for the purpose. 1334 */ 1335 if (lwp->lwp_pcb.pcb_flags & REQUEST_STEP) { 1336 lwp->lwp_pcb.pcb_flags &= ~REQUEST_STEP; 1337 rp->r_ps |= PS_T; 1338 } 1339 if (lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP) { 1340 lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP; 1341 rp->r_ps &= ~PS_T; 1342 } 1343 } 1344 1345 out: /* We can't get here from a system trap */ 1346 ASSERT(type & USER); 1347 1348 if (ISHOLD(p)) 1349 holdlwp(); 1350 1351 /* 1352 * Set state to LWP_USER here so preempt won't give us a kernel 1353 * priority if it occurs after this point. Call CL_TRAPRET() to 1354 * restore the user-level priority. 1355 * 1356 * It is important that no locks (other than spinlocks) be entered 1357 * after this point before returning to user mode (unless lwp_state 1358 * is set back to LWP_SYS). 1359 */ 1360 lwp->lwp_state = LWP_USER; 1361 1362 if (cur_thread->t_trapret) { 1363 cur_thread->t_trapret = 0; 1364 thread_lock(cur_thread); 1365 CL_TRAPRET(cur_thread); 1366 thread_unlock(cur_thread); 1367 } 1368 if (CPU->cpu_runrun) 1369 preempt(); 1370 (void) new_mstate(cur_thread, mstate); 1371 1372 /* Kernel probe */ 1373 TNF_PROBE_1(thread_state, "thread", /* CSTYLED */, 1374 tnf_microstate, state, LMS_USER); 1375 1376 return; 1377 1378 cleanup: /* system traps end up here */ 1379 ASSERT(!(type & USER)); 1380 } 1381 1382 /* 1383 * Patch non-zero to disable preemption of threads in the kernel. 1384 */ 1385 int IGNORE_KERNEL_PREEMPTION = 0; /* XXX - delete this someday */ 1386 1387 struct kpreempt_cnts { /* kernel preemption statistics */ 1388 int kpc_idle; /* executing idle thread */ 1389 int kpc_intr; /* executing interrupt thread */ 1390 int kpc_clock; /* executing clock thread */ 1391 int kpc_blocked; /* thread has blocked preemption (t_preempt) */ 1392 int kpc_notonproc; /* thread is surrendering processor */ 1393 int kpc_inswtch; /* thread has ratified scheduling decision */ 1394 int kpc_prilevel; /* processor interrupt level is too high */ 1395 int kpc_apreempt; /* asynchronous preemption */ 1396 int kpc_spreempt; /* synchronous preemption */ 1397 } kpreempt_cnts; 1398 1399 /* 1400 * kernel preemption: forced rescheduling, preempt the running kernel thread. 1401 * the argument is old PIL for an interrupt, 1402 * or the distingished value KPREEMPT_SYNC. 1403 */ 1404 void 1405 kpreempt(int asyncspl) 1406 { 1407 kthread_t *cur_thread = curthread; 1408 1409 if (IGNORE_KERNEL_PREEMPTION) { 1410 aston(CPU->cpu_dispthread); 1411 return; 1412 } 1413 1414 /* 1415 * Check that conditions are right for kernel preemption 1416 */ 1417 do { 1418 if (cur_thread->t_preempt) { 1419 /* 1420 * either a privileged thread (idle, panic, interrupt) 1421 * or will check when t_preempt is lowered 1422 */ 1423 if (cur_thread->t_pri < 0) 1424 kpreempt_cnts.kpc_idle++; 1425 else if (cur_thread->t_flag & T_INTR_THREAD) { 1426 kpreempt_cnts.kpc_intr++; 1427 if (cur_thread->t_pil == CLOCK_LEVEL) 1428 kpreempt_cnts.kpc_clock++; 1429 } else 1430 kpreempt_cnts.kpc_blocked++; 1431 aston(CPU->cpu_dispthread); 1432 return; 1433 } 1434 if (cur_thread->t_state != TS_ONPROC || 1435 cur_thread->t_disp_queue != CPU->cpu_disp) { 1436 /* this thread will be calling swtch() shortly */ 1437 kpreempt_cnts.kpc_notonproc++; 1438 if (CPU->cpu_thread != CPU->cpu_dispthread) { 1439 /* already in swtch(), force another */ 1440 kpreempt_cnts.kpc_inswtch++; 1441 siron(); 1442 } 1443 return; 1444 } 1445 if (getpil() >= DISP_LEVEL) { 1446 /* 1447 * We can't preempt this thread if it is at 1448 * a PIL >= DISP_LEVEL since it may be holding 1449 * a spin lock (like sched_lock). 1450 */ 1451 siron(); /* check back later */ 1452 kpreempt_cnts.kpc_prilevel++; 1453 return; 1454 } 1455 1456 if (asyncspl != KPREEMPT_SYNC) 1457 kpreempt_cnts.kpc_apreempt++; 1458 else 1459 kpreempt_cnts.kpc_spreempt++; 1460 1461 cur_thread->t_preempt++; 1462 preempt(); 1463 cur_thread->t_preempt--; 1464 } while (CPU->cpu_kprunrun); 1465 } 1466 1467 /* 1468 * Print out debugging info. 1469 */ 1470 static void 1471 showregs(uint_t type, struct regs *rp, caddr_t addr) 1472 { 1473 int s; 1474 1475 s = spl7(); 1476 type &= ~USER; 1477 if (u.u_comm[0]) 1478 printf("%s: ", u.u_comm); 1479 if (type < TRAP_TYPES) 1480 printf("#%s %s\n", trap_type_mnemonic[type], trap_type[type]); 1481 else 1482 switch (type) { 1483 case T_SYSCALL: 1484 printf("Syscall Trap:\n"); 1485 break; 1486 case T_AST: 1487 printf("AST\n"); 1488 break; 1489 default: 1490 printf("Bad Trap = %d\n", type); 1491 break; 1492 } 1493 if (type == T_PGFLT) { 1494 printf("Bad %s fault at addr=0x%lx\n", 1495 USERMODE(rp->r_cs) ? "user": "kernel", (uintptr_t)addr); 1496 } else if (addr) { 1497 printf("addr=0x%lx\n", (uintptr_t)addr); 1498 } 1499 1500 printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n", 1501 (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ? 1502 ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps); 1503 1504 #if defined(__lint) 1505 /* 1506 * this clause can be deleted when lint bug 4870403 is fixed 1507 * (lint thinks that bit 32 is illegal in a %b format string) 1508 */ 1509 printf("cr0: %x cr4: %b\n", 1510 (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4); 1511 #else 1512 printf("cr0: %b cr4: %b\n", 1513 (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4); 1514 #endif 1515 1516 #if defined(__amd64) 1517 printf("cr2: %lx cr3: %lx cr8: %lx\n", getcr2(), getcr3(), getcr8()); 1518 #elif defined(__i386) 1519 printf("cr2: %lx cr3: %lx\n", getcr2(), getcr3()); 1520 #endif 1521 1522 dumpregs(rp); 1523 splx(s); 1524 } 1525 1526 static void 1527 dumpregs(struct regs *rp) 1528 { 1529 #if defined(__amd64) 1530 const char fmt[] = "\t%3s: %16lx %3s: %16lx %3s: %16lx\n"; 1531 1532 printf(fmt, "rdi", rp->r_rdi, "rsi", rp->r_rsi, "rdx", rp->r_rdx); 1533 printf(fmt, "rcx", rp->r_rcx, " r8", rp->r_r8, " r9", rp->r_r9); 1534 printf(fmt, "rax", rp->r_rax, "rbx", rp->r_rbx, "rbp", rp->r_rbp); 1535 printf(fmt, "r10", rp->r_r10, "r11", rp->r_r11, "r12", rp->r_r12); 1536 printf(fmt, "r13", rp->r_r13, "r14", rp->r_r14, "r15", rp->r_r15); 1537 1538 printf(fmt, "fsb", rp->r_fsbase, "gsb", rp->r_gsbase, " ds", rp->r_ds); 1539 printf(fmt, " es", rp->r_es, " fs", rp->r_fs, " gs", rp->r_gs); 1540 1541 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err, "rip", rp->r_rip); 1542 printf(fmt, " cs", rp->r_cs, "rfl", rp->r_rfl, "rsp", rp->r_rsp); 1543 1544 printf("\t%3s: %16lx\n", " ss", rp->r_ss); 1545 1546 #elif defined(__i386) 1547 const char fmt[] = "\t%3s: %8lx %3s: %8lx %3s: %8lx %3s: %8lx\n"; 1548 1549 printf(fmt, " gs", rp->r_gs, " fs", rp->r_fs, 1550 " es", rp->r_es, " ds", rp->r_ds); 1551 printf(fmt, "edi", rp->r_edi, "esi", rp->r_esi, 1552 "ebp", rp->r_ebp, "esp", rp->r_esp); 1553 printf(fmt, "ebx", rp->r_ebx, "edx", rp->r_edx, 1554 "ecx", rp->r_ecx, "eax", rp->r_eax); 1555 printf(fmt, "trp", rp->r_trapno, "err", rp->r_err, 1556 "eip", rp->r_eip, " cs", rp->r_cs); 1557 printf("\t%3s: %8lx %3s: %8lx %3s: %8lx\n", 1558 "efl", rp->r_efl, "usp", rp->r_uesp, " ss", rp->r_ss); 1559 1560 #endif /* __i386 */ 1561 } 1562 1563 /* 1564 * Handle #gp faults in kernel mode. 1565 * 1566 * One legitimate way this can happen is if we attempt to update segment 1567 * registers to naughty values on the way out of the kernel. 1568 * 1569 * This can happen in a couple of ways: someone - either accidentally or 1570 * on purpose - creates (setcontext(2), lwp_create(2)) or modifies 1571 * (signal(2)) a ucontext that contains silly segment register values. 1572 * Or someone - either accidentally or on purpose - modifies the prgregset_t 1573 * of a subject process via /proc to contain silly segment register values. 1574 * 1575 * (The unfortunate part is that we can end up discovering the bad segment 1576 * register value in the middle of an 'iret' after we've popped most of the 1577 * stack. So it becomes quite difficult to associate an accurate ucontext 1578 * with the lwp, because the act of taking the #gp trap overwrites most of 1579 * what we were going to send the lwp.) 1580 * 1581 * OTOH if it turns out that's -not- the problem, and we're -not- an lwp 1582 * trying to return to user mode and we get a #gp fault, then we need 1583 * to die() -- which will happen if we return non-zero from this routine. 1584 */ 1585 static int 1586 kern_gpfault(struct regs *rp) 1587 { 1588 kthread_t *t = curthread; 1589 proc_t *p = ttoproc(t); 1590 klwp_t *lwp = ttolwp(t); 1591 struct regs tmpregs, *trp = NULL; 1592 caddr_t pc = (caddr_t)rp->r_pc; 1593 int v; 1594 1595 extern void _sys_rtt(), sr_sup(); 1596 1597 #if defined(__amd64) 1598 extern void _update_sregs(), _update_sregs_done(); 1599 static const uint8_t iretq_insn[2] = { 0x48, 0xcf }; 1600 1601 #elif defined(__i386) 1602 static const uint8_t iret_insn[1] = { 0xcf }; 1603 1604 /* 1605 * Note carefully the appallingly awful dependency between 1606 * the instruction sequence used in __SEGREGS_POP and these 1607 * instructions encoded here. 1608 * 1609 * XX64 Add some commentary to locore.s/privregs.h to document this. 1610 */ 1611 static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 }; 1612 static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 }; 1613 static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 }; 1614 static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc }; 1615 #endif 1616 /* 1617 * if we're not an lwp, or the pc range is outside _sys_rtt, then 1618 * we should immediately be die()ing horribly 1619 */ 1620 if (lwp == NULL || 1621 (uintptr_t)pc < (uintptr_t)_sys_rtt || 1622 (uintptr_t)pc > (uintptr_t)sr_sup) 1623 return (1); 1624 1625 /* 1626 * So at least we're in the right part of the kernel. 1627 * 1628 * Disassemble the instruction at the faulting pc. 1629 * Once we know what it is, we carefully reconstruct the stack 1630 * based on the order in which the stack is deconstructed in 1631 * _sys_rtt. Ew. 1632 */ 1633 1634 #if defined(__amd64) 1635 1636 if (bcmp(pc, iretq_insn, sizeof (iretq_insn)) == 0) { 1637 /* 1638 * We took the #gp while trying to perform the iretq. 1639 * This means that either %cs or %ss are bad. 1640 * All we know for sure is that most of the general 1641 * registers have been restored, including the 1642 * segment registers, and all we have left on the 1643 * topmost part of the lwp's stack are the 1644 * registers that the iretq was unable to consume. 1645 * 1646 * All the rest of the state was crushed by the #gp 1647 * which pushed -its- registers atop our old save area 1648 * (because we had to decrement the stack pointer, sigh) so 1649 * all that we can try and do is to reconstruct the 1650 * crushed frame from the #gp trap frame itself. 1651 */ 1652 trp = &tmpregs; 1653 trp->r_ss = lwptoregs(lwp)->r_ss; 1654 trp->r_sp = lwptoregs(lwp)->r_sp; 1655 trp->r_ps = lwptoregs(lwp)->r_ps; 1656 trp->r_cs = lwptoregs(lwp)->r_cs; 1657 trp->r_pc = lwptoregs(lwp)->r_pc; 1658 bcopy(rp, trp, offsetof(struct regs, r_pc)); 1659 1660 /* 1661 * Validate simple math 1662 */ 1663 ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc); 1664 ASSERT(trp->r_err == rp->r_err); 1665 1666 } else if ((lwp->lwp_pcb.pcb_flags & RUPDATE_PENDING) != 0 && 1667 pc >= (caddr_t)_update_sregs && 1668 pc < (caddr_t)_update_sregs_done) { 1669 /* 1670 * This is the common case -- we're trying to load 1671 * a bad segment register value in the only section 1672 * of kernel code that ever loads segment registers. 1673 * 1674 * We don't need to do anything at this point because 1675 * the pcb contains all the pending segment register 1676 * state, and the regs are still intact because we 1677 * didn't adjust the stack pointer yet. Given the fidelity 1678 * of all this, we could conceivably send a signal 1679 * to the lwp, rather than core-ing. 1680 */ 1681 trp = lwptoregs(lwp); 1682 ASSERT((caddr_t)trp == (caddr_t)rp->r_sp); 1683 } 1684 1685 #elif defined(__i386) 1686 1687 if (bcmp(pc, iret_insn, sizeof (iret_insn)) == 0) { 1688 /* 1689 * We took the #gp while trying to perform the iret. 1690 * This means that either %cs or %ss are bad. 1691 * All we know for sure is that most of the general 1692 * registers have been restored, including the 1693 * segment registers, and all we have left on the 1694 * topmost part of the lwp's stack are the registers that 1695 * the iret was unable to consume. 1696 * 1697 * All the rest of the state was crushed by the #gp 1698 * which pushed -its- registers atop our old save area 1699 * (because we had to decrement the stack pointer, sigh) so 1700 * all that we can try and do is to reconstruct the 1701 * crushed frame from the #gp trap frame itself. 1702 */ 1703 trp = &tmpregs; 1704 trp->r_ss = lwptoregs(lwp)->r_ss; 1705 trp->r_sp = lwptoregs(lwp)->r_sp; 1706 trp->r_ps = lwptoregs(lwp)->r_ps; 1707 trp->r_cs = lwptoregs(lwp)->r_cs; 1708 trp->r_pc = lwptoregs(lwp)->r_pc; 1709 bcopy(rp, trp, offsetof(struct regs, r_pc)); 1710 1711 ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc); 1712 ASSERT(trp->r_err == rp->r_err); 1713 1714 } else { 1715 /* 1716 * Segment registers are reloaded in _sys_rtt 1717 * via the following sequence: 1718 * 1719 * movw 0(%esp), %gs 1720 * movw 4(%esp), %fs 1721 * movw 8(%esp), %es 1722 * movw 12(%esp), %ds 1723 * addl $16, %esp 1724 * 1725 * Thus if any of them fault, we know the user 1726 * registers are left unharmed on the stack. 1727 */ 1728 if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 || 1729 bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 || 1730 bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 || 1731 bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0) 1732 trp = lwptoregs(lwp); 1733 } 1734 #endif /* __amd64 */ 1735 1736 if (trp == NULL) 1737 return (1); 1738 1739 /* 1740 * If we get to here, we're reasonably confident that we've 1741 * correctly decoded what happened on the way out of the kernel. 1742 * Rewrite the lwp's registers so that we can create a core dump 1743 * the (at least vaguely) represents the mcontext we were 1744 * being asked to restore when things went so terribly wrong. 1745 */ 1746 1747 /* 1748 * Make sure that we have a meaningful %trapno and %err. 1749 */ 1750 trp->r_trapno = rp->r_trapno; 1751 trp->r_err = rp->r_err; 1752 1753 if ((caddr_t)trp != (caddr_t)lwptoregs(lwp)) 1754 bcopy(trp, lwptoregs(lwp), sizeof (*trp)); 1755 1756 mutex_enter(&p->p_lock); 1757 lwp->lwp_cursig = SIGSEGV; 1758 mutex_exit(&p->p_lock); 1759 1760 /* 1761 * Terminate all LWPs but don't discard them. If another lwp beat us to 1762 * the punch by calling exit(), evaporate now. 1763 */ 1764 proc_is_exiting(p); 1765 if (exitlwps(1) != 0) { 1766 mutex_enter(&p->p_lock); 1767 lwp_exit(); 1768 } 1769 1770 #ifdef C2_AUDIT 1771 if (audit_active) /* audit core dump */ 1772 audit_core_start(SIGSEGV); 1773 #endif 1774 v = core(SIGSEGV, B_FALSE); 1775 #ifdef C2_AUDIT 1776 if (audit_active) /* audit core dump */ 1777 audit_core_finish(v ? CLD_KILLED : CLD_DUMPED); 1778 #endif 1779 exit(v ? CLD_KILLED : CLD_DUMPED, SIGSEGV); 1780 return (0); 1781 } 1782 1783 /* 1784 * dump_tss() - Display the TSS structure 1785 */ 1786 1787 #if defined(__amd64) 1788 1789 static void 1790 dump_tss(void) 1791 { 1792 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */ 1793 struct tss *tss = CPU->cpu_tss; 1794 1795 printf(tss_fmt, "tss_rsp0", (void *)tss->tss_rsp0); 1796 printf(tss_fmt, "tss_rsp1", (void *)tss->tss_rsp1); 1797 printf(tss_fmt, "tss_rsp2", (void *)tss->tss_rsp2); 1798 1799 printf(tss_fmt, "tss_ist1", (void *)tss->tss_ist1); 1800 printf(tss_fmt, "tss_ist2", (void *)tss->tss_ist2); 1801 printf(tss_fmt, "tss_ist3", (void *)tss->tss_ist3); 1802 printf(tss_fmt, "tss_ist4", (void *)tss->tss_ist4); 1803 printf(tss_fmt, "tss_ist5", (void *)tss->tss_ist5); 1804 printf(tss_fmt, "tss_ist6", (void *)tss->tss_ist6); 1805 printf(tss_fmt, "tss_ist7", (void *)tss->tss_ist7); 1806 } 1807 1808 #elif defined(__i386) 1809 1810 static void 1811 dump_tss(void) 1812 { 1813 const char tss_fmt[] = "tss.%s:\t0x%p\n"; /* Format string */ 1814 struct tss *tss = CPU->cpu_tss; 1815 1816 printf(tss_fmt, "tss_link", (void *)(uintptr_t)tss->tss_link); 1817 printf(tss_fmt, "tss_esp0", (void *)(uintptr_t)tss->tss_esp0); 1818 printf(tss_fmt, "tss_ss0", (void *)(uintptr_t)tss->tss_ss0); 1819 printf(tss_fmt, "tss_esp1", (void *)(uintptr_t)tss->tss_esp1); 1820 printf(tss_fmt, "tss_ss1", (void *)(uintptr_t)tss->tss_ss1); 1821 printf(tss_fmt, "tss_esp2", (void *)(uintptr_t)tss->tss_esp2); 1822 printf(tss_fmt, "tss_ss2", (void *)(uintptr_t)tss->tss_ss2); 1823 printf(tss_fmt, "tss_cr3", (void *)(uintptr_t)tss->tss_cr3); 1824 printf(tss_fmt, "tss_eip", (void *)(uintptr_t)tss->tss_eip); 1825 printf(tss_fmt, "tss_eflags", (void *)(uintptr_t)tss->tss_eflags); 1826 printf(tss_fmt, "tss_eax", (void *)(uintptr_t)tss->tss_eax); 1827 printf(tss_fmt, "tss_ebx", (void *)(uintptr_t)tss->tss_ebx); 1828 printf(tss_fmt, "tss_ecx", (void *)(uintptr_t)tss->tss_ecx); 1829 printf(tss_fmt, "tss_edx", (void *)(uintptr_t)tss->tss_edx); 1830 printf(tss_fmt, "tss_esp", (void *)(uintptr_t)tss->tss_esp); 1831 } 1832 1833 #endif /* __amd64 */ 1834 1835 #if defined(TRAPTRACE) 1836 1837 int ttrace_nrec = 0; /* number of records to dump out */ 1838 int ttrace_dump_nregs = 5; /* dump out this many records with regs too */ 1839 1840 /* 1841 * Dump out the last ttrace_nrec traptrace records on each CPU 1842 */ 1843 static void 1844 dump_ttrace(void) 1845 { 1846 trap_trace_ctl_t *ttc; 1847 trap_trace_rec_t *rec; 1848 uintptr_t current; 1849 int i, j, k; 1850 int n = NCPU; 1851 #if defined(__amd64) 1852 const char banner[] = 1853 "\ncpu address timestamp " 1854 "type vc handler pc\n"; 1855 const char fmt1[] = "%3d %016lx %12llx "; 1856 #elif defined(__i386) 1857 const char banner[] = 1858 "\ncpu address timestamp type vc handler pc\n"; 1859 const char fmt1[] = "%3d %08lx %12llx "; 1860 #endif 1861 const char fmt2[] = "%4s %3x "; 1862 const char fmt3[] = "%8s "; 1863 1864 if (ttrace_nrec == 0) 1865 return; 1866 1867 printf(banner); 1868 1869 for (i = 0; i < n; i++) { 1870 ttc = &trap_trace_ctl[i]; 1871 if (ttc->ttc_first == NULL) 1872 continue; 1873 1874 current = ttc->ttc_next - sizeof (trap_trace_rec_t); 1875 for (j = 0; j < ttrace_nrec; j++) { 1876 struct sysent *sys; 1877 struct autovec *vec; 1878 extern struct av_head autovect[]; 1879 int type; 1880 ulong_t off; 1881 char *sym, *stype; 1882 1883 if (current < ttc->ttc_first) 1884 current = 1885 ttc->ttc_limit - sizeof (trap_trace_rec_t); 1886 1887 if (current == NULL) 1888 continue; 1889 1890 rec = (trap_trace_rec_t *)current; 1891 1892 if (rec->ttr_stamp == 0) 1893 break; 1894 1895 printf(fmt1, i, (uintptr_t)rec, rec->ttr_stamp); 1896 1897 switch (rec->ttr_marker) { 1898 case TT_SYSCALL: 1899 case TT_SYSENTER: 1900 case TT_SYSC: 1901 case TT_SYSC64: 1902 #if defined(__amd64) 1903 sys = &sysent32[rec->ttr_sysnum]; 1904 switch (rec->ttr_marker) { 1905 case TT_SYSC64: 1906 sys = &sysent[rec->ttr_sysnum]; 1907 /*FALLTHROUGH*/ 1908 #elif defined(__i386) 1909 sys = &sysent[rec->ttr_sysnum]; 1910 switch (rec->ttr_marker) { 1911 case TT_SYSC64: 1912 #endif 1913 case TT_SYSC: 1914 stype = "sysc"; /* syscall */ 1915 break; 1916 case TT_SYSCALL: 1917 stype = "lcal"; /* lcall */ 1918 break; 1919 case TT_SYSENTER: 1920 stype = "syse"; /* sysenter */ 1921 break; 1922 default: 1923 break; 1924 } 1925 printf(fmt2, "sysc", rec->ttr_sysnum); 1926 if (sys != NULL) { 1927 sym = kobj_getsymname( 1928 (uintptr_t)sys->sy_callc, 1929 &off); 1930 if (sym != NULL) 1931 printf("%s ", sym); 1932 else 1933 printf("%p ", sys->sy_callc); 1934 } else { 1935 printf("unknown "); 1936 } 1937 break; 1938 1939 case TT_INTERRUPT: 1940 printf(fmt2, "intr", rec->ttr_vector); 1941 vec = (&autovect[rec->ttr_vector])->avh_link; 1942 if (vec != NULL) { 1943 sym = kobj_getsymname( 1944 (uintptr_t)vec->av_vector, &off); 1945 if (sym != NULL) 1946 printf("%s ", sym); 1947 else 1948 printf("%p ", vec->av_vector); 1949 } else { 1950 printf("unknown "); 1951 } 1952 break; 1953 1954 case TT_TRAP: 1955 type = rec->ttr_regs.r_trapno; 1956 printf(fmt2, "trap", type); 1957 printf("#%s ", type < TRAP_TYPES ? 1958 trap_type_mnemonic[type] : "trap"); 1959 break; 1960 1961 default: 1962 break; 1963 } 1964 1965 sym = kobj_getsymname(rec->ttr_regs.r_pc, &off); 1966 if (sym != NULL) 1967 printf("%s+%lx\n", sym, off); 1968 else 1969 printf("%lx\n", rec->ttr_regs.r_pc); 1970 1971 if (ttrace_dump_nregs-- > 0) { 1972 int s; 1973 1974 if (rec->ttr_marker == TT_INTERRUPT) 1975 printf( 1976 "\t\tipl %x spl %x pri %x\n", 1977 rec->ttr_ipl, 1978 rec->ttr_spl, 1979 rec->ttr_pri); 1980 1981 dumpregs(&rec->ttr_regs); 1982 1983 printf("\t%3s: %p\n\n", " ct", 1984 (void *)rec->ttr_curthread); 1985 1986 /* 1987 * print out the pc stack that we recorded 1988 * at trap time (if any) 1989 */ 1990 for (s = 0; s < rec->ttr_sdepth; s++) { 1991 uintptr_t fullpc; 1992 1993 if (s >= TTR_STACK_DEPTH) { 1994 printf("ttr_sdepth corrupt\n"); 1995 break; 1996 } 1997 1998 fullpc = (uintptr_t)rec->ttr_stack[s]; 1999 2000 sym = kobj_getsymname(fullpc, &off); 2001 if (sym != NULL) 2002 printf("-> %s+0x%lx()\n", 2003 sym, off); 2004 else 2005 printf("-> 0x%lx()\n", fullpc); 2006 } 2007 printf("\n"); 2008 } 2009 current -= sizeof (trap_trace_rec_t); 2010 } 2011 } 2012 } 2013 2014 #endif /* TRAPTRACE */ 2015 2016 void 2017 panic_showtrap(struct trap_info *tip) 2018 { 2019 showregs(tip->trap_type, tip->trap_regs, tip->trap_addr); 2020 2021 #if defined(TRAPTRACE) 2022 dump_ttrace(); 2023 #endif /* TRAPTRACE */ 2024 2025 if (tip->trap_type == T_DBLFLT) 2026 dump_tss(); 2027 } 2028 2029 void 2030 panic_savetrap(panic_data_t *pdp, struct trap_info *tip) 2031 { 2032 panic_saveregs(pdp, tip->trap_regs); 2033 } 2034