1/*- 2 * Copyright (c) 1989, 1990 William F. Jolitz. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * Copyright (c) 2007-2018 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by A. Joseph Koshy under 8 * sponsorship from the FreeBSD Foundation and Google, Inc. 9 * 10 * Portions of this software were developed by 11 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 12 * the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39#include "opt_atpic.h" 40#include "opt_hwpmc_hooks.h" 41 42#include "assym.inc" 43 44#include <machine/psl.h> 45#include <machine/asmacros.h> 46#include <machine/trap.h> 47#include <machine/specialreg.h> 48#include <machine/pmap.h> 49 50#ifdef KDTRACE_HOOKS 51 .bss 52 .globl dtrace_invop_jump_addr 53 .align 8 54 .type dtrace_invop_jump_addr,@object 55 .size dtrace_invop_jump_addr,8 56dtrace_invop_jump_addr: 57 .zero 8 58 .globl dtrace_invop_calltrap_addr 59 .align 8 60 .type dtrace_invop_calltrap_addr,@object 61 .size dtrace_invop_calltrap_addr,8 62dtrace_invop_calltrap_addr: 63 .zero 8 64#endif 65 .text 66#ifdef HWPMC_HOOKS 67 ENTRY(start_exceptions) 68#endif 69 70/*****************************************************************************/ 71/* Trap handling */ 72/*****************************************************************************/ 73/* 74 * Trap and fault vector routines. 75 * 76 * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes 77 * state on the stack but also disables interrupts. This is important for 78 * us for the use of the swapgs instruction. We cannot be interrupted 79 * until the GS.base value is correct. For most traps, we automatically 80 * then enable interrupts if the interrupted context had them enabled. 81 * This is equivalent to the i386 port's use of SDT_SYS386TGT. 82 * 83 * The cpu will push a certain amount of state onto the kernel stack for 84 * the current process. See amd64/include/frame.h. 85 * This includes the current RFLAGS (status register, which includes 86 * the interrupt disable state prior to the trap), the code segment register, 87 * and the return instruction pointer are pushed by the cpu. The cpu 88 * will also push an 'error' code for certain traps. We push a dummy 89 * error code for those traps where the cpu doesn't in order to maintain 90 * a consistent frame. We also push a contrived 'trap number'. 91 * 92 * The CPU does not push the general registers, so we must do that, and we 93 * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss 94 * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for 95 * for the kernel mode operation shortly, without changes to the selector 96 * loaded. Since superuser long mode works with any selectors loaded into 97 * segment registers other then %cs, which makes them mostly unused in long 98 * mode, and kernel does not reference %fs, leave them alone. The segment 99 * registers are reloaded on return to the usermode. 100 */ 101 102/* Traps that we leave interrupts disabled for. */ 103 .macro TRAP_NOEN l, trapno 104 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u 105\l\()_pti_k: 106 subq $TF_RIP,%rsp 107 movl $\trapno,TF_TRAPNO(%rsp) 108 movq $0,TF_ADDR(%rsp) 109 movq $0,TF_ERR(%rsp) 110 jmp alltraps_noen_k 111\l\()_pti_u: 112 subq $TF_RIP,%rsp 113 movl $\trapno,TF_TRAPNO(%rsp) 114 movq $0,TF_ADDR(%rsp) 115 movq $0,TF_ERR(%rsp) 116 jmp alltraps_noen_u 117 118 .globl X\l 119 .type X\l,@function 120X\l: 121 subq $TF_RIP,%rsp 122 movl $\trapno,TF_TRAPNO(%rsp) 123 movq $0,TF_ADDR(%rsp) 124 movq $0,TF_ERR(%rsp) 125 testb $SEL_RPL_MASK,TF_CS(%rsp) 126 jz alltraps_noen_k 127 swapgs 128 lfence 129 jmp alltraps_noen_u 130 .endm 131 132 TRAP_NOEN bpt, T_BPTFLT 133#ifdef KDTRACE_HOOKS 134 TRAP_NOEN dtrace_ret, T_DTRACE_RET 135#endif 136 137/* Regular traps; The cpu does not supply tf_err for these. */ 138 .macro TRAP l, trapno 139 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u 140\l\()_pti_k: 141 subq $TF_RIP,%rsp 142 movl $\trapno,TF_TRAPNO(%rsp) 143 movq $0,TF_ADDR(%rsp) 144 movq $0,TF_ERR(%rsp) 145 jmp alltraps_k 146\l\()_pti_u: 147 subq $TF_RIP,%rsp 148 movl $\trapno,TF_TRAPNO(%rsp) 149 movq $0,TF_ADDR(%rsp) 150 movq $0,TF_ERR(%rsp) 151 jmp alltraps_u 152 153 .globl X\l 154 .type X\l,@function 155X\l: 156 subq $TF_RIP,%rsp 157 movl $\trapno,TF_TRAPNO(%rsp) 158 movq $0,TF_ADDR(%rsp) 159 movq $0,TF_ERR(%rsp) 160 testb $SEL_RPL_MASK,TF_CS(%rsp) 161 jz alltraps_k 162 swapgs 163 lfence 164 jmp alltraps_u 165 .endm 166 167 TRAP div, T_DIVIDE 168 TRAP ofl, T_OFLOW 169 TRAP bnd, T_BOUND 170 TRAP ill, T_PRIVINFLT 171 TRAP dna, T_DNA 172 TRAP fpusegm, T_FPOPFLT 173 TRAP rsvd, T_RESERVED 174 TRAP fpu, T_ARITHTRAP 175 TRAP xmm, T_XMMFLT 176 177/* This group of traps have tf_err already pushed by the cpu. */ 178 .macro TRAP_ERR l, trapno 179 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u,has_err=1 180\l\()_pti_k: 181 subq $TF_ERR,%rsp 182 movl $\trapno,TF_TRAPNO(%rsp) 183 movq $0,TF_ADDR(%rsp) 184 jmp alltraps_k 185\l\()_pti_u: 186 subq $TF_ERR,%rsp 187 movl $\trapno,TF_TRAPNO(%rsp) 188 movq $0,TF_ADDR(%rsp) 189 jmp alltraps_u 190 .globl X\l 191 .type X\l,@function 192X\l: 193 subq $TF_ERR,%rsp 194 movl $\trapno,TF_TRAPNO(%rsp) 195 movq $0,TF_ADDR(%rsp) 196 testb $SEL_RPL_MASK,TF_CS(%rsp) 197 jz alltraps_k 198 swapgs 199 lfence 200 jmp alltraps_u 201 .endm 202 203 TRAP_ERR tss, T_TSSFLT 204 TRAP_ERR align, T_ALIGNFLT 205 206 /* 207 * alltraps_u/k entry points. 208 * SWAPGS must be already performed by prologue, 209 * if this is the first time in the kernel from userland. 210 * Re-enable interrupts if they were enabled before the trap. 211 * This approximates SDT_SYS386TGT on the i386 port. 212 */ 213 SUPERALIGN_TEXT 214 .globl alltraps_u 215 .type alltraps_u,@function 216alltraps_u: 217 movq %rdi,TF_RDI(%rsp) 218 movq %rdx,TF_RDX(%rsp) 219 movq %rax,TF_RAX(%rsp) 220 movq %rcx,TF_RCX(%rsp) 221 movq PCPU(CURPCB),%rdi 222 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 223 call handle_ibrs_entry 224 jmp alltraps_save_segs 225 SUPERALIGN_TEXT 226 .globl alltraps_k 227 .type alltraps_k,@function 228alltraps_k: 229 lfence 230 movq %rdi,TF_RDI(%rsp) 231 movq %rdx,TF_RDX(%rsp) 232 movq %rax,TF_RAX(%rsp) 233 movq %rcx,TF_RCX(%rsp) 234alltraps_save_segs: 235 SAVE_SEGS 236 testl $PSL_I,TF_RFLAGS(%rsp) 237 jz alltraps_pushregs_no_rax 238 sti 239alltraps_pushregs_no_rax: 240 movq %rsi,TF_RSI(%rsp) 241 movq %r8,TF_R8(%rsp) 242 movq %r9,TF_R9(%rsp) 243 movq %rbx,TF_RBX(%rsp) 244 movq %rbp,TF_RBP(%rsp) 245 movq %r10,TF_R10(%rsp) 246 movq %r11,TF_R11(%rsp) 247 movq %r12,TF_R12(%rsp) 248 movq %r13,TF_R13(%rsp) 249 movq %r14,TF_R14(%rsp) 250 movq %r15,TF_R15(%rsp) 251 movl $TF_HASSEGS,TF_FLAGS(%rsp) 252 pushfq 253 andq $~(PSL_D | PSL_AC),(%rsp) 254 popfq 255#ifdef KDTRACE_HOOKS 256 /* 257 * DTrace Function Boundary Trace (fbt) probes are triggered 258 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint 259 * interrupt. For all other trap types, just handle them in 260 * the usual way. 261 */ 262 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 263 jnz calltrap /* ignore userland traps */ 264 cmpl $T_BPTFLT,TF_TRAPNO(%rsp) 265 jne calltrap 266 267 /* Check if there is no DTrace hook registered. */ 268 cmpq $0,dtrace_invop_jump_addr 269 je calltrap 270 271 /* 272 * Set our jump address for the jump back in the event that 273 * the breakpoint wasn't caused by DTrace at all. 274 */ 275 movq $calltrap,dtrace_invop_calltrap_addr(%rip) 276 277 /* Jump to the code hooked in by DTrace. */ 278 jmpq *dtrace_invop_jump_addr 279#endif 280 .globl calltrap 281 .type calltrap,@function 282calltrap: 283 KMSAN_ENTER 284 movq %rsp, %rdi 285 call trap_check 286 KMSAN_LEAVE 287 jmp doreti /* Handle any pending ASTs */ 288 289 /* 290 * alltraps_noen_u/k entry points. 291 * Again, SWAPGS must be already performed by prologue, if needed. 292 * Unlike alltraps above, we want to leave the interrupts disabled. 293 * This corresponds to SDT_SYS386IGT on the i386 port. 294 */ 295 SUPERALIGN_TEXT 296 .globl alltraps_noen_u 297 .type alltraps_noen_u,@function 298alltraps_noen_u: 299 movq %rdi,TF_RDI(%rsp) 300 movq PCPU(CURPCB),%rdi 301 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 302 jmp alltraps_noen_save_segs 303 SUPERALIGN_TEXT 304 .globl alltraps_noen_k 305 .type alltraps_noen_k,@function 306alltraps_noen_k: 307 lfence 308 movq %rdi,TF_RDI(%rsp) 309alltraps_noen_save_segs: 310 SAVE_SEGS 311 movq %rdx,TF_RDX(%rsp) 312 movq %rax,TF_RAX(%rsp) 313 movq %rcx,TF_RCX(%rsp) 314 testb $SEL_RPL_MASK,TF_CS(%rsp) 315 jz alltraps_pushregs_no_rax 316 call handle_ibrs_entry 317 jmp alltraps_pushregs_no_rax 318 319IDTVEC(dblfault) 320 subq $TF_ERR,%rsp 321 movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) 322 movq $0,TF_ADDR(%rsp) 323 movq $0,TF_ERR(%rsp) 324 movq %rdi,TF_RDI(%rsp) 325 movq %rsi,TF_RSI(%rsp) 326 movq %rdx,TF_RDX(%rsp) 327 movq %rcx,TF_RCX(%rsp) 328 movq %r8,TF_R8(%rsp) 329 movq %r9,TF_R9(%rsp) 330 movq %rax,TF_RAX(%rsp) 331 movq %rbx,TF_RBX(%rsp) 332 movq %rbp,TF_RBP(%rsp) 333 movq %r10,TF_R10(%rsp) 334 movq %r11,TF_R11(%rsp) 335 movq %r12,TF_R12(%rsp) 336 movq %r13,TF_R13(%rsp) 337 movq %r14,TF_R14(%rsp) 338 movq %r15,TF_R15(%rsp) 339 SAVE_SEGS 340 movl $TF_HASSEGS,TF_FLAGS(%rsp) 341 pushfq 342 andq $~(PSL_D | PSL_AC),(%rsp) 343 popfq 344 movq TF_SIZE(%rsp),%rdx 345 movl %edx,%eax 346 shrq $32,%rdx 347 movl $MSR_GSBASE,%ecx 348 wrmsr 349 movq %cr3,%rax 350 movq %rax,PCPU(SAVED_UCR3) 351 movq PCPU(KCR3),%rax 352 cmpq $~0,%rax 353 je 2f 354 movq %rax,%cr3 3552: KMSAN_ENTER 356 movq %rsp,%rdi 357 call dblfault_handler 358 KMSAN_LEAVE 3593: hlt 360 jmp 3b 361 362 ALIGN_TEXT 363IDTVEC(page_pti) 364 testb $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp) 365 jz page_k 366 swapgs 367 lfence 368 pushq %rax 369 movq %cr3,%rax 370 movq %rax,PCPU(SAVED_UCR3) 371 cmpq $~0,PCPU(UCR3) 372 jne 1f 373 popq %rax 374 jmp page_u 3751: pushq %rdx 376 PTI_UUENTRY has_err=1 377 jmp page_u 378 ALIGN_TEXT 379IDTVEC(page) 380 testb $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */ 381 jnz page_u_swapgs /* already running with kernel GS.base */ 382page_k: 383 lfence 384 subq $TF_ERR,%rsp 385 movq %rdi,TF_RDI(%rsp) /* free up GP registers */ 386 movq %rax,TF_RAX(%rsp) 387 movq %rdx,TF_RDX(%rsp) 388 movq %rcx,TF_RCX(%rsp) 389 jmp page_cr2 390 ALIGN_TEXT 391page_u_swapgs: 392 swapgs 393 lfence 394page_u: 395 subq $TF_ERR,%rsp 396 movq %rdi,TF_RDI(%rsp) 397 movq %rax,TF_RAX(%rsp) 398 movq %rdx,TF_RDX(%rsp) 399 movq %rcx,TF_RCX(%rsp) 400 movq PCPU(CURPCB),%rdi 401 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 402 movq PCPU(SAVED_UCR3),%rax 403 movq %rax,PCB_SAVED_UCR3(%rdi) 404 call handle_ibrs_entry 405page_cr2: 406 movq %cr2,%rdi /* preserve %cr2 before .. */ 407 movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ 408 SAVE_SEGS 409 movl $T_PAGEFLT,TF_TRAPNO(%rsp) 410 testl $PSL_I,TF_RFLAGS(%rsp) 411 jz alltraps_pushregs_no_rax 412 sti 413 jmp alltraps_pushregs_no_rax 414 415 /* 416 * We have to special-case this one. If we get a trap in doreti() at 417 * the iretq stage, we'll reenter with the wrong gs state. We'll have 418 * to do a special the swapgs in this case even coming from the kernel. 419 * XXX linux has a trap handler for their equivalent of load_gs(). 420 * 421 * On the stack, we have the hardware interrupt frame to return 422 * to usermode (faulted) and another frame with error code, for 423 * fault. For PTI, copy both frames to the main thread stack. 424 * Handle the potential 16-byte alignment adjustment incurred 425 * during the second fault by copying both frames independently 426 * while unwinding the stack in between. 427 */ 428 .macro PROTF_ENTRY name,trapno 429\name\()_pti_doreti: 430 swapgs 431 lfence 432 cmpq $~0,PCPU(UCR3) 433 je 1f 434 pushq %rax 435 pushq %rdx 436 movq PCPU(KCR3),%rax 437 movq %rax,%cr3 438 movq PCPU(RSP0),%rax 439 subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */ 440 MOVE_STACKS (PTI_SIZE / 8) 441 addq $PTI_SIZE,%rax 442 movq PTI_RSP(%rsp),%rsp 443 MOVE_STACKS (PTI_SIZE / 8 - 3) 444 subq $PTI_SIZE,%rax 445 movq %rax,%rsp 446 popq %rdx 447 popq %rax 4481: swapgs 449 jmp X\name 450IDTVEC(\name\()_pti) 451 cmpq $doreti_iret,PTI_RIP-2*8(%rsp) 452 je \name\()_pti_doreti 453 testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */ 454 jz X\name /* lfence is not needed until %gs: use */ 455 PTI_UENTRY has_err=1 456 swapgs /* fence provided by PTI_UENTRY */ 457IDTVEC(\name) 458 subq $TF_ERR,%rsp 459 movl $\trapno,TF_TRAPNO(%rsp) 460 jmp prot_addrf 461 .endm 462 463 PROTF_ENTRY missing, T_SEGNPFLT 464 PROTF_ENTRY stk, T_STKFLT 465 PROTF_ENTRY prot, T_PROTFLT 466 467prot_addrf: 468 movq $0,TF_ADDR(%rsp) 469 movq %rdi,TF_RDI(%rsp) /* free up a GP register */ 470 movq %rax,TF_RAX(%rsp) 471 movq %rdx,TF_RDX(%rsp) 472 movq %rcx,TF_RCX(%rsp) 473 movw %fs,TF_FS(%rsp) 474 movw %gs,TF_GS(%rsp) 475 leaq doreti_iret(%rip),%rdi 476 cmpq %rdi,TF_RIP(%rsp) 477 je 5f /* kernel but with user gsbase!! */ 478 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 479 jz 6f /* already running with kernel GS.base */ 480 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 481 jz 2f 482 rdfsbase %rax 483 rdgsbase %rdx 4842: swapgs 485 lfence 486 movq PCPU(CURPCB),%rdi 487 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 488 jz 4f 489 movq %rax,PCB_FSBASE(%rdi) 490 movq %rdx,PCB_GSBASE(%rdi) 491 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* full iret from user #gp */ 4924: call handle_ibrs_entry 493 movw %es,TF_ES(%rsp) 494 movw %ds,TF_DS(%rsp) 495 testl $PSL_I,TF_RFLAGS(%rsp) 496 jz alltraps_pushregs_no_rax 497 sti 498 jmp alltraps_pushregs_no_rax 499 5005: swapgs 5016: lfence 502 movq PCPU(CURPCB),%rdi 503 jmp 4b 504 505/* 506 * Fast syscall entry point. We enter here with just our new %cs/%ss set, 507 * and the new privilege level. We are still running on the old user stack 508 * pointer. We have to juggle a few things around to find our stack etc. 509 * swapgs gives us access to our PCPU space only. 510 * 511 * We do not support invoking this from a custom segment registers, 512 * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. 513 */ 514 SUPERALIGN_TEXT 515IDTVEC(fast_syscall_pti) 516 swapgs 517 cmpq $~0,PCPU(UCR3) 518 je fast_syscall_common 519 movq %rax,PCPU(SCRATCH_RAX) 520 movq PCPU(KCR3),%rax 521 movq %rax,%cr3 522 movq PCPU(SCRATCH_RAX),%rax 523 jmp fast_syscall_common 524 SUPERALIGN_TEXT 525IDTVEC(fast_syscall) 526 swapgs 527fast_syscall_common: 528 movq %rsp,PCPU(SCRATCH_RSP) 529 movq PCPU(RSP0),%rsp 530 /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ 531 subq $TF_SIZE,%rsp 532 /* defer TF_RSP till we have a spare register */ 533 movq %r11,TF_RFLAGS(%rsp) 534 movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ 535 movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ 536 movq %r11,TF_RSP(%rsp) /* user stack pointer */ 537 /* 538 * Save a few arg registers early to free them for use in 539 * handle_ibrs_entry(). %r10 is especially tricky. It is not an 540 * arg register, but it holds the arg register %rcx. Profiling 541 * preserves %rcx, but may clobber %r10. Profiling may also 542 * clobber %r11, but %r11 (original %eflags) has been saved. 543 */ 544 movq %rax,TF_RAX(%rsp) /* syscall number */ 545 movq %rdx,TF_RDX(%rsp) /* arg 3 */ 546 movq %r10,TF_RCX(%rsp) /* arg 4 */ 547 SAVE_SEGS 548 call handle_ibrs_entry 549 movq PCPU(CURPCB),%r11 550 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) 551 sti 552 movq $KUDSEL,TF_SS(%rsp) 553 movq $KUCSEL,TF_CS(%rsp) 554 movq $2,TF_ERR(%rsp) 555 movq %rdi,TF_RDI(%rsp) /* arg 1 */ 556 movq %rsi,TF_RSI(%rsp) /* arg 2 */ 557 movq %r8,TF_R8(%rsp) /* arg 5 */ 558 movq %r9,TF_R9(%rsp) /* arg 6 */ 559 movq %rbx,TF_RBX(%rsp) /* C preserved */ 560 movq %rbp,TF_RBP(%rsp) /* C preserved */ 561 movq %r12,TF_R12(%rsp) /* C preserved */ 562 movq %r13,TF_R13(%rsp) /* C preserved */ 563 movq %r14,TF_R14(%rsp) /* C preserved */ 564 movq %r15,TF_R15(%rsp) /* C preserved */ 565 movl $TF_HASSEGS,TF_FLAGS(%rsp) 566 movq PCPU(CURTHREAD),%rdi 567 movq %rsp,TD_FRAME(%rdi) 568 movl TF_RFLAGS(%rsp),%esi 569 andl $PSL_T,%esi 570 call amd64_syscall 5711: movq PCPU(CURPCB),%rax 572 /* Disable interrupts before testing PCB_FULL_IRET. */ 573 cli 574 testl $PCB_FULL_IRET,PCB_FLAGS(%rax) 575 jnz 4f 576 /* Check for and handle AST's on return to userland. */ 577 movq PCPU(CURTHREAD),%rax 578 cmpl $0,TD_AST(%rax) 579 jne 3f 580 call handle_ibrs_exit 581 callq *mds_handler 582 /* Restore preserved registers. */ 583 movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ 584 movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ 585 movq TF_RDX(%rsp),%rdx /* return value 2 */ 586 movq TF_RAX(%rsp),%rax /* return value 1 */ 587 movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ 588 movq TF_RIP(%rsp),%rcx /* original %rip */ 589 movq TF_RSP(%rsp),%rsp /* user stack pointer */ 590 xorl %r8d,%r8d /* zero the rest of GPRs */ 591 xorl %r10d,%r10d 592 cmpq $~0,PCPU(UCR3) 593 je 2f 594 movq PCPU(UCR3),%r9 595 andq PCPU(UCR3_LOAD_MASK),%r9 596 movq %r9,%cr3 5972: xorl %r9d,%r9d 598 movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK) 599 swapgs 600 sysretq 601 6023: /* AST scheduled. */ 603 sti 604 movq %rsp,%rdi 605 call ast 606 jmp 1b 607 6084: /* Requested full context restore, use doreti for that. */ 609 jmp doreti 610 611/* 612 * Here for CYA insurance, in case a "syscall" instruction gets 613 * issued from 32 bit compatibility mode. MSR_CSTAR has to point 614 * to *something* if EFER_SCE is enabled. 615 */ 616IDTVEC(fast_syscall32) 617 sysret 618 619/* 620 * DB# handler is very similar to NM#, because 'mov/pop %ss' delay 621 * generation of exception until the next instruction is executed, 622 * which might be a kernel entry. So we must execute the handler 623 * on IST stack and be ready for non-kernel GSBASE. 624 */ 625IDTVEC(dbg) 626 subq $TF_RIP,%rsp 627 movl $(T_TRCTRAP),TF_TRAPNO(%rsp) 628 movq $0,TF_ADDR(%rsp) 629 movq $0,TF_ERR(%rsp) 630 movq %rdi,TF_RDI(%rsp) 631 movq %rsi,TF_RSI(%rsp) 632 movq %rdx,TF_RDX(%rsp) 633 movq %rcx,TF_RCX(%rsp) 634 movq %r8,TF_R8(%rsp) 635 movq %r9,TF_R9(%rsp) 636 movq %rax,TF_RAX(%rsp) 637 movq %rbx,TF_RBX(%rsp) 638 movq %rbp,TF_RBP(%rsp) 639 movq %r10,TF_R10(%rsp) 640 movq %r11,TF_R11(%rsp) 641 movq %r12,TF_R12(%rsp) 642 movq %r13,TF_R13(%rsp) 643 movq %r14,TF_R14(%rsp) 644 movq %r15,TF_R15(%rsp) 645 SAVE_SEGS 646 movl $TF_HASSEGS,TF_FLAGS(%rsp) 647 pushfq 648 andq $~(PSL_D | PSL_AC),(%rsp) 649 popfq 650 testb $SEL_RPL_MASK,TF_CS(%rsp) 651 jnz dbg_fromuserspace 652 lfence 653 /* 654 * We've interrupted the kernel. See comment in NMI handler about 655 * registers use. 656 */ 657 movq %cr2,%r15 658 movl $MSR_GSBASE,%ecx 659 rdmsr 660 movq %rax,%r12 661 shlq $32,%rdx 662 orq %rdx,%r12 663 /* Retrieve and load the canonical value for GS.base. */ 664 movq TF_SIZE(%rsp),%rdx 665 movl %edx,%eax 666 shrq $32,%rdx 667 wrmsr 668 movq %cr3,%r13 669 movq PCPU(KCR3),%rax 670 cmpq $~0,%rax 671 je 1f 672 movq %rax,%cr3 6731: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 674 je 2f 675 movl $MSR_IA32_SPEC_CTRL,%ecx 676 rdmsr 677 movl %eax,%r14d 678 call handle_ibrs_entry 6792: movq %rsp,%rdi 680 call trap 681 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 682 je 3f 683 movl %r14d,%eax 684 xorl %edx,%edx 685 movl $MSR_IA32_SPEC_CTRL,%ecx 686 wrmsr 687 /* 688 * Put back the preserved MSR_GSBASE value. 689 */ 6903: movl $MSR_GSBASE,%ecx 691 movq %r12,%rdx 692 movl %edx,%eax 693 shrq $32,%rdx 694 wrmsr 695 movq %r13,%cr3 696 movq %r15,%cr2 697 RESTORE_REGS 698 addq $TF_RIP,%rsp 699 jmp doreti_iret 700dbg_fromuserspace: 701 /* 702 * Switch to kernel GSBASE and kernel page table, and copy frame 703 * from the IST stack to the normal kernel stack, since trap() 704 * re-enables interrupts, and since we might trap on DB# while 705 * in trap(). 706 */ 707 swapgs 708 lfence 709 movq PCPU(KCR3),%rax 710 cmpq $~0,%rax 711 je 1f 712 movq %rax,%cr3 7131: movq PCPU(RSP0),%rax 714 movl $TF_SIZE,%ecx 715 subq %rcx,%rax 716 movq %rax,%rdi 717 movq %rsp,%rsi 718 rep;movsb 719 movq %rax,%rsp 720 call handle_ibrs_entry 721 movq PCPU(CURPCB),%rdi 722 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) 723 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 724 jz 3f 725 rdfsbase %rax 726 movq %rax,PCB_FSBASE(%rdi) 727 movl $MSR_KGSBASE,%ecx 728 rdmsr 729 shlq $32,%rdx 730 orq %rdx,%rax 731 movq %rax,PCB_GSBASE(%rdi) 7323: jmp calltrap 733 734/* 735 * NMI handling is special. 736 * 737 * First, NMIs do not respect the state of the processor's RFLAGS.IF 738 * bit. The NMI handler may be entered at any time, including when 739 * the processor is in a critical section with RFLAGS.IF == 0. 740 * The processor's GS.base value could be invalid on entry to the 741 * handler. 742 * 743 * Second, the processor treats NMIs specially, blocking further NMIs 744 * until an 'iretq' instruction is executed. We thus need to execute 745 * the NMI handler with interrupts disabled, to prevent a nested interrupt 746 * from executing an 'iretq' instruction and inadvertently taking the 747 * processor out of NMI mode. 748 * 749 * Third, the NMI handler runs on its own stack (tss_ist2). The canonical 750 * GS.base value for the processor is stored just above the bottom of its 751 * NMI stack. For NMIs taken from kernel mode, the current value in 752 * the processor's GS.base is saved at entry to C-preserved register %r12, 753 * the canonical value for GS.base is then loaded into the processor, and 754 * the saved value is restored at exit time. For NMIs taken from user mode, 755 * the cheaper 'SWAPGS' instructions are used for swapping GS.base. 756 */ 757 758IDTVEC(nmi) 759 subq $TF_RIP,%rsp 760 movl $(T_NMI),TF_TRAPNO(%rsp) 761 movq $0,TF_ADDR(%rsp) 762 movq $0,TF_ERR(%rsp) 763 movq %rdi,TF_RDI(%rsp) 764 movq %rsi,TF_RSI(%rsp) 765 movq %rdx,TF_RDX(%rsp) 766 movq %rcx,TF_RCX(%rsp) 767 movq %r8,TF_R8(%rsp) 768 movq %r9,TF_R9(%rsp) 769 movq %rax,TF_RAX(%rsp) 770 movq %rbx,TF_RBX(%rsp) 771 movq %rbp,TF_RBP(%rsp) 772 movq %r10,TF_R10(%rsp) 773 movq %r11,TF_R11(%rsp) 774 movq %r12,TF_R12(%rsp) 775 movq %r13,TF_R13(%rsp) 776 movq %r14,TF_R14(%rsp) 777 movq %r15,TF_R15(%rsp) 778 SAVE_SEGS 779 movl $TF_HASSEGS,TF_FLAGS(%rsp) 780 pushfq 781 andq $~(PSL_D | PSL_AC),(%rsp) 782 popfq 783 xorl %ebx,%ebx 784 testb $SEL_RPL_MASK,TF_CS(%rsp) 785 jnz nmi_fromuserspace 786 /* 787 * We've interrupted the kernel. Preserve in callee-saved regs: 788 * GS.base in %r12, 789 * %cr3 in %r13, 790 * possibly lower half of MSR_IA32_SPEC_CTL in %r14d, 791 * %cr2 in %r15. 792 */ 793 lfence 794 movq %cr2,%r15 795 movl $MSR_GSBASE,%ecx 796 rdmsr 797 movq %rax,%r12 798 shlq $32,%rdx 799 orq %rdx,%r12 800 /* Retrieve and load the canonical value for GS.base. */ 801 movq TF_SIZE(%rsp),%rdx 802 movl %edx,%eax 803 shrq $32,%rdx 804 wrmsr 805 movq %cr3,%r13 806 movq PCPU(KCR3),%rax 807 cmpq $~0,%rax 808 je 1f 809 movq %rax,%cr3 8101: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 811 je nmi_calltrap 812 movl $MSR_IA32_SPEC_CTRL,%ecx 813 rdmsr 814 movl %eax,%r14d 815 call handle_ibrs_entry 816 jmp nmi_calltrap 817nmi_fromuserspace: 818 incl %ebx 819 swapgs 820 lfence 821 movq %cr3,%r13 822 movq PCPU(KCR3),%rax 823 cmpq $~0,%rax 824 je 1f 825 movq %rax,%cr3 8261: call handle_ibrs_entry 827 movq PCPU(CURPCB),%rdi 828 testq %rdi,%rdi 829 jz 3f 830 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) 831 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 832 jz 3f 833 rdfsbase %rax 834 movq %rax,PCB_FSBASE(%rdi) 835 movl $MSR_KGSBASE,%ecx 836 rdmsr 837 shlq $32,%rdx 838 orq %rdx,%rax 839 movq %rax,PCB_GSBASE(%rdi) 8403: 841/* Note: this label is also used by ddb and gdb: */ 842nmi_calltrap: 843 KMSAN_ENTER 844 movq %rsp,%rdi 845 call trap 846 KMSAN_LEAVE 847#ifdef HWPMC_HOOKS 848 /* 849 * Capture a userspace callchain if needed. 850 * 851 * - Check if the current trap was from user mode. 852 * - Check if the current thread is valid. 853 * - Check if the thread requires a user call chain to be 854 * captured. 855 * 856 * We are still in NMI mode at this point. 857 */ 858 testl %ebx,%ebx 859 jz nocallchain /* not from userspace */ 860 movq PCPU(CURTHREAD),%rax 861 orq %rax,%rax /* curthread present? */ 862 jz nocallchain 863 /* 864 * Move execution to the regular kernel stack, because we 865 * committed to return through doreti. 866 */ 867 movq %rsp,%rsi /* source stack pointer */ 868 movq $TF_SIZE,%rcx 869 movq PCPU(RSP0),%rdx 870 subq %rcx,%rdx 871 movq %rdx,%rdi /* destination stack pointer */ 872 shrq $3,%rcx /* trap frame size in long words */ 873 pushfq 874 andq $~(PSL_D | PSL_AC),(%rsp) 875 popfq 876 rep 877 movsq /* copy trapframe */ 878 movq %rdx,%rsp /* we are on the regular kstack */ 879 880 testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ 881 jz nocallchain 882 /* 883 * A user callchain is to be captured, so: 884 * - Take the processor out of "NMI" mode by faking an "iret", 885 * to allow for nested NMI interrupts. 886 * - Enable interrupts, so that copyin() can work. 887 */ 888 movl %ss,%eax 889 pushq %rax /* tf_ss */ 890 pushq %rdx /* tf_rsp (on kernel stack) */ 891 pushfq /* tf_rflags */ 892 movl %cs,%eax 893 pushq %rax /* tf_cs */ 894 pushq $outofnmi /* tf_rip */ 895 iretq 896outofnmi: 897 /* 898 * At this point the processor has exited NMI mode and is running 899 * with interrupts turned off on the normal kernel stack. 900 * 901 * If a pending NMI gets recognized at or after this point, it 902 * will cause a kernel callchain to be traced. 903 * 904 * We turn interrupts back on, and call the user callchain capture hook. 905 */ 906 movq pmc_hook,%rax 907 orq %rax,%rax 908 jz nocallchain 909 movq PCPU(CURTHREAD),%rdi /* thread */ 910 movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ 911 movq %rsp,%rdx /* frame */ 912 sti 913 call *%rax 914 cli 915nocallchain: 916#endif 917 testl %ebx,%ebx /* %ebx != 0 => return to userland */ 918 jnz doreti_exit 919 /* 920 * Restore speculation control MSR, if preserved. 921 */ 922 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 923 je 1f 924 movl %r14d,%eax 925 xorl %edx,%edx 926 movl $MSR_IA32_SPEC_CTRL,%ecx 927 wrmsr 928 /* 929 * Put back the preserved MSR_GSBASE value. 930 */ 9311: movl $MSR_GSBASE,%ecx 932 movq %r12,%rdx 933 movl %edx,%eax 934 shrq $32,%rdx 935 wrmsr 936 cmpb $0, nmi_flush_l1d_sw(%rip) 937 je 2f 938 call flush_l1d_sw /* bhyve L1TF assist */ 9392: movq %r13,%cr3 940 movq %r15,%cr2 941 RESTORE_REGS 942 addq $TF_RIP,%rsp 943 jmp doreti_iret 944 945/* 946 * MC# handling is similar to NMI. 947 * 948 * As with NMIs, machine check exceptions do not respect RFLAGS.IF and 949 * can occur at any time with a GS.base value that does not correspond 950 * to the privilege level in CS. 951 * 952 * Machine checks are not unblocked by iretq, but it is best to run 953 * the handler with interrupts disabled since the exception may have 954 * interrupted a critical section. 955 * 956 * The MC# handler runs on its own stack (tss_ist3). The canonical 957 * GS.base value for the processor is stored just above the bottom of 958 * its MC# stack. For exceptions taken from kernel mode, the current 959 * value in the processor's GS.base is saved at entry to C-preserved 960 * register %r12, the canonical value for GS.base is then loaded into 961 * the processor, and the saved value is restored at exit time. For 962 * exceptions taken from user mode, the cheaper 'SWAPGS' instructions 963 * are used for swapping GS.base. 964 */ 965 966IDTVEC(mchk) 967 subq $TF_RIP,%rsp 968 movl $(T_MCHK),TF_TRAPNO(%rsp) 969 movq $0,TF_ADDR(%rsp) 970 movq $0,TF_ERR(%rsp) 971 movq %rdi,TF_RDI(%rsp) 972 movq %rsi,TF_RSI(%rsp) 973 movq %rdx,TF_RDX(%rsp) 974 movq %rcx,TF_RCX(%rsp) 975 movq %r8,TF_R8(%rsp) 976 movq %r9,TF_R9(%rsp) 977 movq %rax,TF_RAX(%rsp) 978 movq %rbx,TF_RBX(%rsp) 979 movq %rbp,TF_RBP(%rsp) 980 movq %r10,TF_R10(%rsp) 981 movq %r11,TF_R11(%rsp) 982 movq %r12,TF_R12(%rsp) 983 movq %r13,TF_R13(%rsp) 984 movq %r14,TF_R14(%rsp) 985 movq %r15,TF_R15(%rsp) 986 SAVE_SEGS 987 movl $TF_HASSEGS,TF_FLAGS(%rsp) 988 pushfq 989 andq $~(PSL_D | PSL_AC),(%rsp) 990 popfq 991 xorl %ebx,%ebx 992 testb $SEL_RPL_MASK,TF_CS(%rsp) 993 jnz mchk_fromuserspace 994 /* 995 * We've interrupted the kernel. See comment in NMI handler about 996 * registers use. 997 */ 998 movq %cr2,%r15 999 movl $MSR_GSBASE,%ecx 1000 rdmsr 1001 movq %rax,%r12 1002 shlq $32,%rdx 1003 orq %rdx,%r12 1004 /* Retrieve and load the canonical value for GS.base. */ 1005 movq TF_SIZE(%rsp),%rdx 1006 movl %edx,%eax 1007 shrq $32,%rdx 1008 wrmsr 1009 movq %cr3,%r13 1010 movq PCPU(KCR3),%rax 1011 cmpq $~0,%rax 1012 je 1f 1013 movq %rax,%cr3 10141: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 1015 je mchk_calltrap 1016 movl $MSR_IA32_SPEC_CTRL,%ecx 1017 rdmsr 1018 movl %eax,%r14d 1019 call handle_ibrs_entry 1020 jmp mchk_calltrap 1021mchk_fromuserspace: 1022 incl %ebx 1023 swapgs 1024 movq %cr3,%r13 1025 movq PCPU(KCR3),%rax 1026 cmpq $~0,%rax 1027 je 1f 1028 movq %rax,%cr3 10291: call handle_ibrs_entry 1030/* Note: this label is also used by ddb and gdb: */ 1031mchk_calltrap: 1032 KMSAN_ENTER 1033 movq %rsp,%rdi 1034 call mca_intr 1035 KMSAN_LEAVE 1036 testl %ebx,%ebx /* %ebx != 0 => return to userland */ 1037 jnz doreti_exit 1038 /* 1039 * Restore speculation control MSR, if preserved. 1040 */ 1041 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 1042 je 1f 1043 movl %r14d,%eax 1044 xorl %edx,%edx 1045 movl $MSR_IA32_SPEC_CTRL,%ecx 1046 wrmsr 1047 /* 1048 * Put back the preserved MSR_GSBASE value. 1049 */ 10501: movl $MSR_GSBASE,%ecx 1051 movq %r12,%rdx 1052 movl %edx,%eax 1053 shrq $32,%rdx 1054 wrmsr 1055 movq %r13,%cr3 1056 movq %r15,%cr2 1057 RESTORE_REGS 1058 addq $TF_RIP,%rsp 1059 jmp doreti_iret 1060 1061ENTRY(fork_trampoline) 1062 movq %r12,%rdi /* function */ 1063 movq %rbx,%rsi /* arg1 */ 1064 movq %rsp,%rdx /* trapframe pointer */ 1065 call fork_exit 1066 jmp doreti /* Handle any ASTs */ 1067 1068/* 1069 * To efficiently implement classification of trap and interrupt handlers 1070 * for profiling, there must be only trap handlers between the labels btrap 1071 * and bintr, and only interrupt handlers between the labels bintr and 1072 * eintr. This is implemented (partly) by including files that contain 1073 * some of the handlers. Before including the files, set up a normal asm 1074 * environment so that the included files doesn't need to know that they are 1075 * included. 1076 */ 1077 1078#ifdef COMPAT_FREEBSD32 1079 .data 1080 .p2align 4 1081 .text 1082 SUPERALIGN_TEXT 1083 1084#include <amd64/ia32/ia32_exception.S> 1085#endif 1086 1087 .data 1088 .p2align 4 1089 .text 1090 SUPERALIGN_TEXT 1091#include <amd64/amd64/apic_vector.S> 1092 1093#ifdef DEV_ATPIC 1094 .data 1095 .p2align 4 1096 .text 1097 SUPERALIGN_TEXT 1098 1099#include <amd64/amd64/atpic_vector.S> 1100#endif 1101 1102/* 1103 * void doreti(struct trapframe) 1104 * 1105 * Handle return from interrupts, traps and syscalls. 1106 */ 1107 .text 1108 SUPERALIGN_TEXT 1109 .type doreti,@function 1110 .globl doreti 1111doreti: 1112 /* 1113 * Check if ASTs can be handled now. 1114 */ 1115 testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ 1116 jz doreti_exit /* can't handle ASTs now if not */ 1117 1118doreti_ast: 1119 /* 1120 * Check for ASTs atomically with returning. Disabling CPU 1121 * interrupts provides sufficient locking even in the SMP case, 1122 * since we will be informed of any new ASTs by an IPI. 1123 */ 1124 cli 1125 movq PCPU(CURTHREAD),%rax 1126 cmpl $0,TD_AST(%rax) 1127 je doreti_exit 1128 sti 1129 movq %rsp,%rdi /* pass a pointer to the trapframe */ 1130 call ast 1131 jmp doreti_ast 1132 1133 /* 1134 * doreti_exit: pop registers, iret. 1135 * 1136 * The segment register pop is a special case, since it may 1137 * fault if (for example) a sigreturn specifies bad segment 1138 * registers. The fault is handled in trap.c. 1139 */ 1140doreti_exit: 1141 movq PCPU(CURPCB),%r8 1142 1143 /* 1144 * Do not reload segment registers for kernel. 1145 * Since we do not reload segments registers with sane 1146 * values on kernel entry, descriptors referenced by 1147 * segments registers might be not valid. This is fatal 1148 * for user mode, but is not a problem for the kernel. 1149 */ 1150 testb $SEL_RPL_MASK,TF_CS(%rsp) 1151 jz ld_regs 1152 testl $PCB_FULL_IRET,PCB_FLAGS(%r8) 1153 jz ld_regs 1154 andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) 1155 testl $TF_HASSEGS,TF_FLAGS(%rsp) 1156 je set_segs 1157 1158do_segs: 1159 /* Restore %fs and fsbase */ 1160 movw TF_FS(%rsp),%ax 1161 .globl ld_fs 1162ld_fs: 1163 movw %ax,%fs 1164 movl $MSR_FSBASE,%ecx 1165 movl PCB_FSBASE(%r8),%eax 1166 movl PCB_FSBASE+4(%r8),%edx 1167 .globl ld_fsbase 1168ld_fsbase: 1169 wrmsr 1170 /* Restore %gs and gsbase */ 1171 movw TF_GS(%rsp),%si 1172 pushfq 1173 cli 1174 movl $MSR_GSBASE,%ecx 1175 /* Save current kernel %gs base into %r12d:%r13d */ 1176 rdmsr 1177 movl %eax,%r12d 1178 movl %edx,%r13d 1179 .globl ld_gs 1180ld_gs: 1181 movw %si,%gs 1182 /* Restore kernel %gs base */ 1183 movl %r12d,%eax 1184 movl %r13d,%edx 1185 wrmsr 1186 popfq 1187 /* 1188 * Restore user %gs base, either from PCB if used for TLS, or 1189 * from the previously saved msr read. 1190 */ 1191 movl $MSR_KGSBASE,%ecx 1192 movl PCB_GSBASE(%r8),%eax 1193 movl PCB_GSBASE+4(%r8),%edx 1194 .globl ld_gsbase 1195ld_gsbase: 1196 wrmsr /* May trap if non-canonical, but only for TLS. */ 1197 .globl ld_es 1198ld_es: 1199 movw TF_ES(%rsp),%es 1200 .globl ld_ds 1201ld_ds: 1202 movw TF_DS(%rsp),%ds 1203ld_regs: 1204 RESTORE_REGS 1205 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 1206 jz 2f /* keep running with kernel GS.base */ 1207 cli 1208 call handle_ibrs_exit_rs 1209 callq *mds_handler 1210 cmpq $~0,PCPU(UCR3) 1211 je 1f 1212 pushq %rdx 1213 movq PCPU(PTI_RSP0),%rdx 1214 subq $PTI_SIZE,%rdx 1215 movq %rax,PTI_RAX(%rdx) 1216 popq %rax 1217 movq %rax,PTI_RDX(%rdx) 1218 movq TF_RIP(%rsp),%rax 1219 movq %rax,PTI_RIP(%rdx) 1220 movq TF_CS(%rsp),%rax 1221 movq %rax,PTI_CS(%rdx) 1222 movq TF_RFLAGS(%rsp),%rax 1223 movq %rax,PTI_RFLAGS(%rdx) 1224 movq TF_RSP(%rsp),%rax 1225 movq %rax,PTI_RSP(%rdx) 1226 movq TF_SS(%rsp),%rax 1227 movq %rax,PTI_SS(%rdx) 1228 movq PCPU(UCR3),%rax 1229 andq PCPU(UCR3_LOAD_MASK),%rax 1230 movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK) 1231 swapgs 1232 movq %rdx,%rsp 1233 movq %rax,%cr3 1234 popq %rdx 1235 popq %rax 1236 addq $8,%rsp 1237 jmp doreti_iret 12381: swapgs 12392: addq $TF_RIP,%rsp 1240 .globl doreti_iret 1241doreti_iret: 1242 iretq 1243 1244set_segs: 1245 movw $KUDSEL,%ax 1246 movw %ax,TF_DS(%rsp) 1247 movw %ax,TF_ES(%rsp) 1248 movw $KUF32SEL,TF_FS(%rsp) 1249 movw $KUG32SEL,TF_GS(%rsp) 1250 jmp do_segs 1251 1252 /* 1253 * doreti_iret_fault. Alternative return code for 1254 * the case where we get a fault in the doreti_exit code 1255 * above. trap() (amd64/amd64/trap.c) catches this specific 1256 * case, sends the process a signal and continues in the 1257 * corresponding place in the code below. 1258 */ 1259 ALIGN_TEXT 1260 .globl doreti_iret_fault 1261doreti_iret_fault: 1262 subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ 1263 movq %rax,TF_RAX(%rsp) 1264 movq %rdx,TF_RDX(%rsp) 1265 movq %rcx,TF_RCX(%rsp) 1266 call handle_ibrs_entry 1267 testb $SEL_RPL_MASK,TF_CS(%rsp) 1268 jz 1f 1269 sti 12701: 1271 SAVE_SEGS 1272 movl $TF_HASSEGS,TF_FLAGS(%rsp) 1273 movq %rdi,TF_RDI(%rsp) 1274 movq %rsi,TF_RSI(%rsp) 1275 movq %r8,TF_R8(%rsp) 1276 movq %r9,TF_R9(%rsp) 1277 movq %rbx,TF_RBX(%rsp) 1278 movq %rbp,TF_RBP(%rsp) 1279 movq %r10,TF_R10(%rsp) 1280 movq %r11,TF_R11(%rsp) 1281 movq %r12,TF_R12(%rsp) 1282 movq %r13,TF_R13(%rsp) 1283 movq %r14,TF_R14(%rsp) 1284 movq %r15,TF_R15(%rsp) 1285 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1286 movq $0,TF_ERR(%rsp) /* XXX should be the error code */ 1287 movq $0,TF_ADDR(%rsp) 1288 jmp calltrap 1289 1290 ALIGN_TEXT 1291 .globl ds_load_fault 1292ds_load_fault: 1293 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1294 testb $SEL_RPL_MASK,TF_CS(%rsp) 1295 jz 1f 1296 sti 12971: 1298 movq %rsp,%rdi 1299 call trap 1300 movw $KUDSEL,TF_DS(%rsp) 1301 jmp doreti 1302 1303 ALIGN_TEXT 1304 .globl es_load_fault 1305es_load_fault: 1306 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1307 testl $PSL_I,TF_RFLAGS(%rsp) 1308 jz 1f 1309 sti 13101: 1311 movq %rsp,%rdi 1312 call trap 1313 movw $KUDSEL,TF_ES(%rsp) 1314 jmp doreti 1315 1316 ALIGN_TEXT 1317 .globl fs_load_fault 1318fs_load_fault: 1319 testl $PSL_I,TF_RFLAGS(%rsp) 1320 jz 1f 1321 sti 13221: 1323 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1324 movq %rsp,%rdi 1325 call trap 1326 movw $KUF32SEL,TF_FS(%rsp) 1327 jmp doreti 1328 1329 ALIGN_TEXT 1330 .globl gs_load_fault 1331gs_load_fault: 1332 popfq 1333 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1334 testl $PSL_I,TF_RFLAGS(%rsp) 1335 jz 1f 1336 sti 13371: 1338 movq %rsp,%rdi 1339 call trap 1340 movw $KUG32SEL,TF_GS(%rsp) 1341 jmp doreti 1342 1343 ALIGN_TEXT 1344 .globl fsbase_load_fault 1345fsbase_load_fault: 1346 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1347 testl $PSL_I,TF_RFLAGS(%rsp) 1348 jz 1f 1349 sti 13501: 1351 movq %rsp,%rdi 1352 call trap 1353 movq PCPU(CURTHREAD),%r8 1354 movq TD_PCB(%r8),%r8 1355 movq $0,PCB_FSBASE(%r8) 1356 jmp doreti 1357 1358 ALIGN_TEXT 1359 .globl gsbase_load_fault 1360gsbase_load_fault: 1361 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1362 testl $PSL_I,TF_RFLAGS(%rsp) 1363 jz 1f 1364 sti 13651: 1366 movq %rsp,%rdi 1367 call trap 1368 movq PCPU(CURTHREAD),%r8 1369 movq TD_PCB(%r8),%r8 1370 movq $0,PCB_GSBASE(%r8) 1371 jmp doreti 1372 1373#ifdef HWPMC_HOOKS 1374 ENTRY(end_exceptions) 1375#endif 1376