1/*- 2 * Copyright (c) 1989, 1990 William F. Jolitz. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * Copyright (c) 2007, 2018 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by A. Joseph Koshy under 8 * sponsorship from the FreeBSD Foundation and Google, Inc. 9 * Portions of this software were developed by Konstantin Belousov 10 * <kib@FreeBSD.org> under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include "opt_apic.h" 38#include "opt_atpic.h" 39#include "opt_hwpmc_hooks.h" 40#include "opt_hyperv.h" 41 42#include "assym.inc" 43 44#include <machine/psl.h> 45#include <machine/asmacros.h> 46#include <machine/trap.h> 47 48#ifdef KDTRACE_HOOKS 49 .bss 50 .globl dtrace_invop_jump_addr 51 .align 4 52 .type dtrace_invop_jump_addr, @object 53 .size dtrace_invop_jump_addr, 4 54dtrace_invop_jump_addr: 55 .zero 4 56 .globl dtrace_invop_calltrap_addr 57 .align 4 58 .type dtrace_invop_calltrap_addr, @object 59 .size dtrace_invop_calltrap_addr, 4 60dtrace_invop_calltrap_addr: 61 .zero 8 62#endif 63 .text 64ENTRY(start_exceptions) 65 .globl tramp_idleptd 66tramp_idleptd: .long 0 67 68/*****************************************************************************/ 69/* Trap handling */ 70/*****************************************************************************/ 71/* 72 * Trap and fault vector routines. 73 * 74 * All traps are 'interrupt gates', SDT_SYS386IGT. Interrupts are disabled 75 * by hardware to not allow interrupts until code switched to the kernel 76 * address space and the kernel thread stack. 77 * 78 * The cpu will push a certain amount of state onto the kernel stack for 79 * the current process. The amount of state depends on the type of trap 80 * and whether the trap crossed rings or not. See i386/include/frame.h. 81 * At the very least the current EFLAGS (status register, which includes 82 * the interrupt disable state prior to the trap), the code segment register, 83 * and the return instruction pointer are pushed by the cpu. The cpu 84 * will also push an 'error' code for certain traps. We push a dummy 85 * error code for those traps where the cpu doesn't in order to maintain 86 * a consistent frame. We also push a contrived 'trap number'. 87 * 88 * The cpu does not push the general registers, we must do that, and we 89 * must restore them prior to calling 'iret'. The cpu adjusts the %cs and 90 * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we 91 * must load them with appropriate values for supervisor mode operation. 92 * 93 * This code is not executed at the linked address, it is copied to the 94 * trampoline area. As the consequence, all code there and in included files 95 * must be PIC. 96 */ 97 98#define TRAP(a) pushl $(a) ; jmp alltraps 99 100IDTVEC(div) 101 pushl $0; TRAP(T_DIVIDE) 102IDTVEC(bpt) 103 pushl $0; TRAP(T_BPTFLT) 104IDTVEC(dtrace_ret) 105 pushl $0; TRAP(T_DTRACE_RET) 106IDTVEC(ofl) 107 pushl $0; TRAP(T_OFLOW) 108IDTVEC(bnd) 109 pushl $0; TRAP(T_BOUND) 110#ifndef KDTRACE_HOOKS 111IDTVEC(ill) 112 pushl $0; TRAP(T_PRIVINFLT) 113#endif 114IDTVEC(dna) 115 pushl $0; TRAP(T_DNA) 116IDTVEC(fpusegm) 117 pushl $0; TRAP(T_FPOPFLT) 118IDTVEC(tss) 119 TRAP(T_TSSFLT) 120IDTVEC(missing) 121 pushl $T_SEGNPFLT 122 jmp irettraps 123IDTVEC(stk) 124 pushl $T_STKFLT 125 jmp irettraps 126IDTVEC(prot) 127 pushl $T_PROTFLT 128 jmp irettraps 129IDTVEC(page) 130 testl $PSL_VM, TF_EFLAGS-TF_ERR(%esp) 131 jnz upf 132 testb $SEL_RPL_MASK, TF_CS-TF_ERR(%esp) 133 jnz upf 134 cmpl $PMAP_TRM_MIN_ADDRESS, TF_EIP-TF_ERR(%esp) 135 jb upf 136 137 /* 138 * This is a handshake between copyout_fast.s and page fault 139 * handler. We check for page fault occuring at the special 140 * places in the copyout fast path, where page fault can 141 * legitimately happen while accessing either user space or 142 * kernel pageable memory, and return control to *%edx. 143 * We switch to the idleptd page table from a user page table, 144 * if needed. 145 */ 146 pushl %eax 147 movl TF_EIP-TF_ERR+4(%esp), %eax 148 addl $1f, %eax 149 call 5f 1501: cmpl $pf_x1, %eax 151 je 2f 152 cmpl $pf_x2, %eax 153 je 2f 154 cmpl $pf_x3, %eax 155 je 2f 156 cmpl $pf_x4, %eax 157 je 2f 158 cmpl $pf_x5, %eax 159 je 2f 160 cmpl $pf_x6, %eax 161 je 2f 162 cmpl $pf_x7, %eax 163 je 2f 164 cmpl $pf_x8, %eax 165 je 2f 166 cmpl $pf_y1, %eax 167 je 4f 168 cmpl $pf_y2, %eax 169 je 4f 170 jmp upf_eax 1712: movl $tramp_idleptd, %eax 172 subl $3f, %eax 173 call 6f 1743: movl (%eax), %eax 175 movl %eax, %cr3 1764: popl %eax 177 movl %edx, TF_EIP-TF_ERR(%esp) 178 addl $4, %esp 179 iret 1805: subl (%esp), %eax 181 retl 1826: addl (%esp), %eax 183 retl 184 185upf_eax:popl %eax 186upf: pushl $T_PAGEFLT 187 jmp alltraps 188IDTVEC(rsvd_pti) 189IDTVEC(rsvd) 190 pushl $0; TRAP(T_RESERVED) 191IDTVEC(fpu) 192 pushl $0; TRAP(T_ARITHTRAP) 193IDTVEC(align) 194 TRAP(T_ALIGNFLT) 195IDTVEC(xmm) 196 pushl $0; TRAP(T_XMMFLT) 197 198 /* 199 * All traps except ones for syscalls or invalid segment, 200 * jump to alltraps. If 201 * interrupts were enabled when the trap occurred, then interrupts 202 * are enabled now if the trap was through a trap gate, else 203 * disabled if the trap was through an interrupt gate. Note that 204 * int0x80_syscall is a trap gate. Interrupt gates are used by 205 * page faults, non-maskable interrupts, debug and breakpoint 206 * exceptions. 207 */ 208 SUPERALIGN_TEXT 209 .globl alltraps 210 .type alltraps,@function 211alltraps: 212 PUSH_FRAME2 213alltraps_with_regs_pushed: 214 SET_KERNEL_SREGS 215 cld 216 KENTER 217calltrap: 218 pushl %esp 219 movl $trap,%eax 220 call *%eax 221 add $4, %esp 222 223 /* 224 * Return via doreti to handle ASTs. 225 */ 226 jmp doreti 227 228 .globl irettraps 229 .type irettraps,@function 230irettraps: 231 testl $PSL_VM, TF_EFLAGS-TF_TRAPNO(%esp) 232 jnz alltraps 233 testb $SEL_RPL_MASK, TF_CS-TF_TRAPNO(%esp) 234 jnz alltraps 235 236 /* 237 * Kernel mode. 238 * The special case there is the kernel mode with user %cr3 and 239 * trampoline stack. We need to copy both current frame and the 240 * hardware portion of the frame we tried to return to, to the 241 * normal stack. This logic must follow the stack unwind order 242 * in doreti. 243 */ 244 PUSH_FRAME2 245 SET_KERNEL_SREGS 246 cld 247 call 1f 2481: popl %ebx 249 leal (doreti_iret - 1b)(%ebx), %edx 250 cmpl %edx, TF_EIP(%esp) 251 jne 2f 252 /* -8 because exception did not switch ring */ 253 movl $(2 * TF_SZ - TF_EIP - 8), %ecx 254 jmp 5f 2552: leal (doreti_popl_ds - 1b)(%ebx), %edx 256 cmpl %edx, TF_EIP(%esp) 257 jne 3f 258 movl $(2 * TF_SZ - TF_DS - 8), %ecx 259 jmp 5f 2603: leal (doreti_popl_es - 1b)(%ebx), %edx 261 cmpl %edx, TF_EIP(%esp) 262 jne 4f 263 movl $(2 * TF_SZ - TF_ES - 8), %ecx 264 jmp 5f 2654: leal (doreti_popl_fs - 1b)(%ebx), %edx 266 cmpl %edx, TF_EIP(%esp) 267 jne calltrap 268 movl $(2 * TF_SZ - TF_FS - 8), %ecx 2695: cmpl $PMAP_TRM_MIN_ADDRESS, %esp /* trampoline stack ? */ 270 jb calltrap /* if not, no need to change stacks */ 271 movl (tramp_idleptd - 1b)(%ebx), %eax 272 movl %eax, %cr3 273 movl PCPU(KESP0), %edx 274 subl %ecx, %edx 275 movl %edx, %edi 276 movl %esp, %esi 277 rep; movsb 278 movl %edx, %esp 279 /* kernel mode, normal */ 280 jmp calltrap 281 282/* 283 * Privileged instruction fault. 284 */ 285#ifdef KDTRACE_HOOKS 286 SUPERALIGN_TEXT 287IDTVEC(ill) 288 /* 289 * Check if this is a user fault. If so, just handle it as a normal 290 * trap. 291 */ 292 testl $PSL_VM, 8(%esp) /* and vm86 mode. */ 293 jnz norm_ill 294 cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */ 295 jne norm_ill 296 297 /* 298 * Check if a DTrace hook is registered. The trampoline cannot 299 * be instrumented. 300 */ 301 cmpl $0, dtrace_invop_jump_addr 302 je norm_ill 303 304 /* 305 * This is a kernel instruction fault that might have been caused 306 * by a DTrace provider. 307 */ 308 pushal 309 cld 310 311 /* 312 * Set our jump address for the jump back in the event that 313 * the exception wasn't caused by DTrace at all. 314 */ 315 movl $norm_ill, dtrace_invop_calltrap_addr 316 317 /* Jump to the code hooked in by DTrace. */ 318 jmpl *dtrace_invop_jump_addr 319 320 /* 321 * Process the instruction fault in the normal way. 322 */ 323norm_ill: 324 pushl $0 325 pushl $T_PRIVINFLT 326 jmp alltraps 327#endif 328 329/* 330 * See comment in the handler for the kernel case T_TRCTRAP in trap.c. 331 * The exception handler must be ready to execute with wrong %cr3. 332 * We save original %cr3 in frame->tf_err, similarly to NMI and MCE 333 * handlers. 334 */ 335IDTVEC(dbg) 336 pushl $0 337 pushl $T_TRCTRAP 338 PUSH_FRAME2 339 SET_KERNEL_SREGS 340 cld 341 movl %cr3, %eax 342 movl %eax, TF_ERR(%esp) 343 call 1f 3441: popl %eax 345 movl (tramp_idleptd - 1b)(%eax), %eax 346 movl %eax, %cr3 347 testl $PSL_VM, TF_EFLAGS(%esp) 348 jnz dbg_user 349 testb $SEL_RPL_MASK,TF_CS(%esp) 350 jz calltrap 351dbg_user: 352 NMOVE_STACKS 353 movl $handle_ibrs_entry,%eax 354 call *%eax 355 pushl %esp 356 movl $trap,%eax 357 call *%eax 358 add $4, %esp 359 movl $T_RESERVED, TF_TRAPNO(%esp) 360 jmp doreti 361 362IDTVEC(mchk) 363 pushl $0 364 pushl $T_MCHK 365 jmp nmi_mchk_common 366 367IDTVEC(nmi) 368 pushl $0 369 pushl $T_NMI 370nmi_mchk_common: 371 PUSH_FRAME2 372 SET_KERNEL_SREGS 373 cld 374 /* 375 * Save %cr3 into tf_err. There is no good place to put it. 376 * Always reload %cr3, since we might have interrupted the 377 * kernel entry or exit. 378 * Do not switch to the thread kernel stack, otherwise we might 379 * obliterate the previous context partially copied from the 380 * trampoline stack. 381 * Do not re-enable IBRS, there is no good place to store 382 * previous state if we come from the kernel. 383 */ 384 movl %cr3, %eax 385 movl %eax, TF_ERR(%esp) 386 call 1f 3871: popl %eax 388 movl (tramp_idleptd - 1b)(%eax), %eax 389 movl %eax, %cr3 390 jmp calltrap 391 392/* 393 * Trap gate entry for syscalls (int 0x80). 394 * This is used by FreeBSD ELF executables, "new" a.out executables, and all 395 * Linux executables. 396 * 397 * Even though the name says 'int0x80', this is actually a trap gate, not an 398 * interrupt gate. Thus interrupts are enabled on entry just as they are for 399 * a normal syscall. 400 */ 401 SUPERALIGN_TEXT 402IDTVEC(int0x80_syscall) 403 pushl $2 /* sizeof "int 0x80" */ 404 pushl $0 /* tf_trapno */ 405 PUSH_FRAME2 406 SET_KERNEL_SREGS 407 cld 408 MOVE_STACKS 409 movl $handle_ibrs_entry,%eax 410 call *%eax 411 sti 412 pushl %esp 413 movl $syscall, %eax 414 call *%eax 415 add $4, %esp 416 jmp doreti 417 418ENTRY(fork_trampoline) 419 pushl %esp /* trapframe pointer */ 420 pushl %ebx /* arg1 */ 421 pushl %esi /* function */ 422 movl $fork_exit, %eax 423 call *%eax 424 addl $12,%esp 425 /* cut from syscall */ 426 427 /* 428 * Return via doreti to handle ASTs. 429 */ 430 jmp doreti 431 432 433 .data 434 .p2align 4 435 .text 436 SUPERALIGN_TEXT 437 438#ifdef DEV_ATPIC 439#include <i386/i386/atpic_vector.S> 440#endif 441 442#if defined(DEV_APIC) && defined(DEV_ATPIC) 443 .data 444 .p2align 4 445 .text 446 SUPERALIGN_TEXT 447#endif 448 449#ifdef DEV_APIC 450#include <i386/i386/apic_vector.S> 451#endif 452 453#ifdef DEV_HYPERV 454 .data 455 .p2align 4 456 .text 457 SUPERALIGN_TEXT 458#include <dev/hyperv/vmbus/i386/vmbus_vector.S> 459#endif 460 461 .data 462 .p2align 4 463 .text 464 SUPERALIGN_TEXT 465#include <i386/i386/vm86bios.S> 466 467 .text 468 469#include <i386/i386/copyout_fast.S> 470 471/* 472 * void doreti(struct trapframe) 473 * 474 * Handle return from interrupts, traps and syscalls. 475 */ 476 .text 477 SUPERALIGN_TEXT 478 .type doreti,@function 479 .globl doreti 480doreti: 481doreti_next: 482 /* 483 * Check if ASTs can be handled now. ASTs cannot be safely 484 * processed when returning from an NMI. 485 */ 486 cmpb $T_NMI,TF_TRAPNO(%esp) 487#ifdef HWPMC_HOOKS 488 je doreti_nmi 489#else 490 je doreti_exit 491#endif 492 /* 493 * PSL_VM must be checked first since segment registers only 494 * have an RPL in non-VM86 mode. 495 * ASTs can not be handled now if we are in a vm86 call. 496 */ 497 testl $PSL_VM,TF_EFLAGS(%esp) 498 jz doreti_notvm86 499 movl PCPU(CURPCB),%ecx 500 testl $PCB_VM86CALL,PCB_FLAGS(%ecx) 501 jz doreti_ast 502 jmp doreti_popl_fs 503 504doreti_notvm86: 505 testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ 506 jz doreti_exit /* can't handle ASTs now if not */ 507 508doreti_ast: 509 /* 510 * Check for ASTs atomically with returning. Disabling CPU 511 * interrupts provides sufficient locking even in the SMP case, 512 * since we will be informed of any new ASTs by an IPI. 513 */ 514 cli 515 movl PCPU(CURTHREAD),%eax 516 cmpl $0,TD_AST(%eax) 517 je doreti_exit 518 sti 519 pushl %esp /* pass a pointer to the trapframe */ 520 movl $ast, %eax 521 call *%eax 522 add $4,%esp 523 jmp doreti_ast 524 525 /* 526 * doreti_exit: pop registers, iret. 527 * 528 * The segment register pop is a special case, since it may 529 * fault if (for example) a sigreturn specifies bad segment 530 * registers. The fault is handled in trap.c. 531 */ 532doreti_exit: 533 cmpl $T_NMI, TF_TRAPNO(%esp) 534 je doreti_iret_nmi 535 cmpl $T_MCHK, TF_TRAPNO(%esp) 536 je doreti_iret_nmi 537 cmpl $T_TRCTRAP, TF_TRAPNO(%esp) 538 je doreti_iret_nmi 539 testl $PSL_VM,TF_EFLAGS(%esp) 540 jnz 1f /* PCB_VM86CALL is not set */ 541 testl $SEL_RPL_MASK, TF_CS(%esp) 542 jz doreti_popl_fs 5431: movl $handle_ibrs_exit,%eax 544 call *%eax 545 movl mds_handler,%eax 546 call *%eax 547 movl %esp, %esi 548 movl PCPU(TRAMPSTK), %edx 549 movl $TF_SZ, %ecx 550 testl $PSL_VM,TF_EFLAGS(%esp) 551 jz 2f /* PCB_VM86CALL is not set */ 552 addl $VM86_STACK_SPACE, %ecx 5532: subl %ecx, %edx 554 movl %edx, %edi 555 rep; movsb 556 movl %edx, %esp 557 movl PCPU(CURPCB),%eax 558 movl PCB_CR3(%eax), %eax 559 movl %eax, %cr3 560 561 .globl doreti_popl_fs 562doreti_popl_fs: 563 popl %fs 564 .globl doreti_popl_es 565doreti_popl_es: 566 popl %es 567 .globl doreti_popl_ds 568doreti_popl_ds: 569 popl %ds 570 popal 571 addl $8,%esp 572 .globl doreti_iret 573doreti_iret: 574 iret 575 576doreti_iret_nmi: 577 movl TF_ERR(%esp), %eax 578 movl %eax, %cr3 579 jmp doreti_popl_fs 580 581 /* 582 * doreti_iret_fault and friends. Alternative return code for 583 * the case where we get a fault in the doreti_exit code 584 * above. trap() (i386/i386/trap.c) catches this specific 585 * case, and continues in the corresponding place in the code 586 * below. 587 * 588 * If the fault occurred during return to usermode, we recreate 589 * the trap frame and call trap() to send a signal. Otherwise 590 * the kernel was tricked into fault by attempt to restore invalid 591 * usermode segment selectors on return from nested fault or 592 * interrupt, where interrupted kernel entry code not yet loaded 593 * kernel selectors. In the latter case, emulate iret and zero 594 * the invalid selector. 595 */ 596 ALIGN_TEXT 597 .globl doreti_iret_fault 598doreti_iret_fault: 599 pushl $0 /* tf_err */ 600 pushl $0 /* tf_trapno XXXKIB: provide more useful value ? */ 601 pushal 602 pushl $0 603 movw %ds,(%esp) 604 .globl doreti_popl_ds_fault 605doreti_popl_ds_fault: 606 testb $SEL_RPL_MASK,TF_CS-TF_DS(%esp) 607 jz doreti_popl_ds_kfault 608 pushl $0 609 movw %es,(%esp) 610 .globl doreti_popl_es_fault 611doreti_popl_es_fault: 612 testb $SEL_RPL_MASK,TF_CS-TF_ES(%esp) 613 jz doreti_popl_es_kfault 614 pushl $0 615 movw %fs,(%esp) 616 .globl doreti_popl_fs_fault 617doreti_popl_fs_fault: 618 testb $SEL_RPL_MASK,TF_CS-TF_FS(%esp) 619 jz doreti_popl_fs_kfault 620 movl $0,TF_ERR(%esp) /* XXX should be the error code */ 621 movl $T_PROTFLT,TF_TRAPNO(%esp) 622 SET_KERNEL_SREGS 623 jmp calltrap 624 625doreti_popl_ds_kfault: 626 movl $0,(%esp) 627 jmp doreti_popl_ds 628doreti_popl_es_kfault: 629 movl $0,(%esp) 630 jmp doreti_popl_es 631doreti_popl_fs_kfault: 632 movl $0,(%esp) 633 jmp doreti_popl_fs 634 635#ifdef HWPMC_HOOKS 636doreti_nmi: 637 /* 638 * Since we are returning from an NMI, check if the current trap 639 * was from user mode and if so whether the current thread 640 * needs a user call chain capture. 641 */ 642 testl $PSL_VM, TF_EFLAGS(%esp) 643 jnz doreti_exit 644 testb $SEL_RPL_MASK,TF_CS(%esp) 645 jz doreti_exit 646 movl PCPU(CURTHREAD),%eax /* curthread present? */ 647 orl %eax,%eax 648 jz doreti_exit 649 testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */ 650 jz doreti_exit 651 /* 652 * Switch to thread stack. Reset tf_trapno to not indicate NMI, 653 * to cause normal userspace exit. 654 */ 655 movl $T_RESERVED, TF_TRAPNO(%esp) 656 NMOVE_STACKS 657 /* 658 * Take the processor out of NMI mode by executing a fake "iret". 659 */ 660 pushfl 661 pushl %cs 662 call 1f 6631: popl %eax 664 leal (outofnmi-1b)(%eax),%eax 665 pushl %eax 666 iret 667outofnmi: 668 /* 669 * Call the callchain capture hook after turning interrupts back on. 670 */ 671 movl pmc_hook,%ecx 672 orl %ecx,%ecx 673 jz doreti_exit 674 pushl %esp /* frame pointer */ 675 pushl $PMC_FN_USER_CALLCHAIN /* command */ 676 movl PCPU(CURTHREAD),%eax 677 pushl %eax /* curthread */ 678 sti 679 call *%ecx 680 addl $12,%esp 681 jmp doreti_ast 682#endif 683 684ENTRY(end_exceptions) 685