1/*- 2 * Copyright (c) 1989, 1990 William F. Jolitz. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * Copyright (c) 2007-2018 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by A. Joseph Koshy under 8 * sponsorship from the FreeBSD Foundation and Google, Inc. 9 * 10 * Portions of this software were developed by 11 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 12 * the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39#include "opt_atpic.h" 40#include "opt_hwpmc_hooks.h" 41 42#include "assym.inc" 43 44#include <machine/psl.h> 45#include <machine/asmacros.h> 46#include <machine/trap.h> 47#include <machine/specialreg.h> 48#include <machine/pmap.h> 49 50#ifdef KDTRACE_HOOKS 51 .bss 52 .globl dtrace_invop_jump_addr 53 .align 8 54 .type dtrace_invop_jump_addr,@object 55 .size dtrace_invop_jump_addr,8 56dtrace_invop_jump_addr: 57 .zero 8 58 .globl dtrace_invop_calltrap_addr 59 .align 8 60 .type dtrace_invop_calltrap_addr,@object 61 .size dtrace_invop_calltrap_addr,8 62dtrace_invop_calltrap_addr: 63 .zero 8 64#endif 65 .text 66#ifdef HWPMC_HOOKS 67 ENTRY(start_exceptions) 68#endif 69 70/*****************************************************************************/ 71/* Trap handling */ 72/*****************************************************************************/ 73/* 74 * Trap and fault vector routines. 75 * 76 * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes 77 * state on the stack but also disables interrupts. This is important for 78 * us for the use of the swapgs instruction. We cannot be interrupted 79 * until the GS.base value is correct. For most traps, we automatically 80 * then enable interrupts if the interrupted context had them enabled. 81 * This is equivalent to the i386 port's use of SDT_SYS386TGT. 82 * 83 * The cpu will push a certain amount of state onto the kernel stack for 84 * the current process. See amd64/include/frame.h. 85 * This includes the current RFLAGS (status register, which includes 86 * the interrupt disable state prior to the trap), the code segment register, 87 * and the return instruction pointer are pushed by the cpu. The cpu 88 * will also push an 'error' code for certain traps. We push a dummy 89 * error code for those traps where the cpu doesn't in order to maintain 90 * a consistent frame. We also push a contrived 'trap number'. 91 * 92 * The CPU does not push the general registers, so we must do that, and we 93 * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss 94 * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for 95 * for the kernel mode operation shortly, without changes to the selector 96 * loaded. Since superuser long mode works with any selectors loaded into 97 * segment registers other then %cs, which makes them mostly unused in long 98 * mode, and kernel does not reference %fs, leave them alone. The segment 99 * registers are reloaded on return to the usermode. 100 */ 101 102/* Traps that we leave interrupts disabled for. */ 103 .macro TRAP_NOEN l, trapno 104 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u 105\l\()_pti_k: 106 subq $TF_RIP,%rsp 107 movl $\trapno,TF_TRAPNO(%rsp) 108 movq $0,TF_ADDR(%rsp) 109 movq $0,TF_ERR(%rsp) 110 jmp alltraps_noen_k 111\l\()_pti_u: 112 subq $TF_RIP,%rsp 113 movl $\trapno,TF_TRAPNO(%rsp) 114 movq $0,TF_ADDR(%rsp) 115 movq $0,TF_ERR(%rsp) 116 jmp alltraps_noen_u 117 118 .globl X\l 119 .type X\l,@function 120X\l: 121 subq $TF_RIP,%rsp 122 movl $\trapno,TF_TRAPNO(%rsp) 123 movq $0,TF_ADDR(%rsp) 124 movq $0,TF_ERR(%rsp) 125 testb $SEL_RPL_MASK,TF_CS(%rsp) 126 jz alltraps_noen_k 127 swapgs 128 lfence 129 jmp alltraps_noen_u 130 .endm 131 132 TRAP_NOEN bpt, T_BPTFLT 133#ifdef KDTRACE_HOOKS 134 TRAP_NOEN dtrace_ret, T_DTRACE_RET 135#endif 136 137/* Regular traps; The cpu does not supply tf_err for these. */ 138 .macro TRAP l, trapno 139 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u 140\l\()_pti_k: 141 subq $TF_RIP,%rsp 142 movl $\trapno,TF_TRAPNO(%rsp) 143 movq $0,TF_ADDR(%rsp) 144 movq $0,TF_ERR(%rsp) 145 jmp alltraps_k 146\l\()_pti_u: 147 subq $TF_RIP,%rsp 148 movl $\trapno,TF_TRAPNO(%rsp) 149 movq $0,TF_ADDR(%rsp) 150 movq $0,TF_ERR(%rsp) 151 jmp alltraps_u 152 153 .globl X\l 154 .type X\l,@function 155X\l: 156 subq $TF_RIP,%rsp 157 movl $\trapno,TF_TRAPNO(%rsp) 158 movq $0,TF_ADDR(%rsp) 159 movq $0,TF_ERR(%rsp) 160 testb $SEL_RPL_MASK,TF_CS(%rsp) 161 jz alltraps_k 162 swapgs 163 lfence 164 jmp alltraps_u 165 .endm 166 167 TRAP div, T_DIVIDE 168 TRAP ofl, T_OFLOW 169 TRAP bnd, T_BOUND 170 TRAP ill, T_PRIVINFLT 171 TRAP dna, T_DNA 172 TRAP fpusegm, T_FPOPFLT 173 TRAP rsvd, T_RESERVED 174 TRAP fpu, T_ARITHTRAP 175 TRAP xmm, T_XMMFLT 176 177/* This group of traps have tf_err already pushed by the cpu. */ 178 .macro TRAP_ERR l, trapno 179 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u,has_err=1 180\l\()_pti_k: 181 subq $TF_ERR,%rsp 182 movl $\trapno,TF_TRAPNO(%rsp) 183 movq $0,TF_ADDR(%rsp) 184 jmp alltraps_k 185\l\()_pti_u: 186 subq $TF_ERR,%rsp 187 movl $\trapno,TF_TRAPNO(%rsp) 188 movq $0,TF_ADDR(%rsp) 189 jmp alltraps_u 190 .globl X\l 191 .type X\l,@function 192X\l: 193 subq $TF_ERR,%rsp 194 movl $\trapno,TF_TRAPNO(%rsp) 195 movq $0,TF_ADDR(%rsp) 196 testb $SEL_RPL_MASK,TF_CS(%rsp) 197 jz alltraps_k 198 swapgs 199 lfence 200 jmp alltraps_u 201 .endm 202 203 TRAP_ERR tss, T_TSSFLT 204 TRAP_ERR align, T_ALIGNFLT 205 206 /* 207 * alltraps_u/k entry points. 208 * SWAPGS must be already performed by prologue, 209 * if this is the first time in the kernel from userland. 210 * Re-enable interrupts if they were enabled before the trap. 211 * This approximates SDT_SYS386TGT on the i386 port. 212 */ 213 SUPERALIGN_TEXT 214 .globl alltraps_u 215 .type alltraps_u,@function 216alltraps_u: 217 movq %rdi,TF_RDI(%rsp) 218 movq %rdx,TF_RDX(%rsp) 219 movq %rax,TF_RAX(%rsp) 220 movq %rcx,TF_RCX(%rsp) 221 movq PCPU(CURPCB),%rdi 222 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 223 call handle_ibrs_entry 224 jmp alltraps_save_segs 225 SUPERALIGN_TEXT 226 .globl alltraps_k 227 .type alltraps_k,@function 228alltraps_k: 229 lfence 230 movq %rdi,TF_RDI(%rsp) 231 movq %rdx,TF_RDX(%rsp) 232 movq %rax,TF_RAX(%rsp) 233 movq %rcx,TF_RCX(%rsp) 234alltraps_save_segs: 235 SAVE_SEGS 236 testl $PSL_I,TF_RFLAGS(%rsp) 237 jz alltraps_pushregs_no_rax 238 sti 239alltraps_pushregs_no_rax: 240 movq %rsi,TF_RSI(%rsp) 241 movq %r8,TF_R8(%rsp) 242 movq %r9,TF_R9(%rsp) 243 movq %rbx,TF_RBX(%rsp) 244 movq %rbp,TF_RBP(%rsp) 245 movq %r10,TF_R10(%rsp) 246 movq %r11,TF_R11(%rsp) 247 movq %r12,TF_R12(%rsp) 248 movq %r13,TF_R13(%rsp) 249 movq %r14,TF_R14(%rsp) 250 movq %r15,TF_R15(%rsp) 251 movl $TF_HASSEGS,TF_FLAGS(%rsp) 252 pushfq 253 andq $~(PSL_D | PSL_AC),(%rsp) 254 popfq 255#ifdef KDTRACE_HOOKS 256 /* 257 * DTrace Function Boundary Trace (fbt) probes are triggered 258 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint 259 * interrupt. For all other trap types, just handle them in 260 * the usual way. 261 */ 262 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 263 jnz calltrap /* ignore userland traps */ 264 cmpl $T_BPTFLT,TF_TRAPNO(%rsp) 265 jne calltrap 266 267 /* Check if there is no DTrace hook registered. */ 268 cmpq $0,dtrace_invop_jump_addr 269 je calltrap 270 271 /* 272 * Set our jump address for the jump back in the event that 273 * the breakpoint wasn't caused by DTrace at all. 274 */ 275 movq $calltrap,dtrace_invop_calltrap_addr(%rip) 276 277 /* Jump to the code hooked in by DTrace. */ 278 jmpq *dtrace_invop_jump_addr 279#endif 280 .globl calltrap 281 .type calltrap,@function 282calltrap: 283 KMSAN_ENTER 284 movq %rsp, %rdi 285 call trap_check 286 KMSAN_LEAVE 287 jmp doreti /* Handle any pending ASTs */ 288 289 /* 290 * alltraps_noen_u/k entry points. 291 * Again, SWAPGS must be already performed by prologue, if needed. 292 * Unlike alltraps above, we want to leave the interrupts disabled. 293 * This corresponds to SDT_SYS386IGT on the i386 port. 294 */ 295 SUPERALIGN_TEXT 296 .globl alltraps_noen_u 297 .type alltraps_noen_u,@function 298alltraps_noen_u: 299 movq %rdi,TF_RDI(%rsp) 300 movq PCPU(CURPCB),%rdi 301 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 302 jmp alltraps_noen_save_segs 303 SUPERALIGN_TEXT 304 .globl alltraps_noen_k 305 .type alltraps_noen_k,@function 306alltraps_noen_k: 307 lfence 308 movq %rdi,TF_RDI(%rsp) 309alltraps_noen_save_segs: 310 SAVE_SEGS 311 movq %rdx,TF_RDX(%rsp) 312 movq %rax,TF_RAX(%rsp) 313 movq %rcx,TF_RCX(%rsp) 314 testb $SEL_RPL_MASK,TF_CS(%rsp) 315 jz alltraps_pushregs_no_rax 316 call handle_ibrs_entry 317 jmp alltraps_pushregs_no_rax 318 319IDTVEC(dblfault) 320 subq $TF_ERR,%rsp 321 movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) 322 movq $0,TF_ADDR(%rsp) 323 movq $0,TF_ERR(%rsp) 324 movq %rdi,TF_RDI(%rsp) 325 movq %rsi,TF_RSI(%rsp) 326 movq %rdx,TF_RDX(%rsp) 327 movq %rcx,TF_RCX(%rsp) 328 movq %r8,TF_R8(%rsp) 329 movq %r9,TF_R9(%rsp) 330 movq %rax,TF_RAX(%rsp) 331 movq %rbx,TF_RBX(%rsp) 332 movq %rbp,TF_RBP(%rsp) 333 movq %r10,TF_R10(%rsp) 334 movq %r11,TF_R11(%rsp) 335 movq %r12,TF_R12(%rsp) 336 movq %r13,TF_R13(%rsp) 337 movq %r14,TF_R14(%rsp) 338 movq %r15,TF_R15(%rsp) 339 SAVE_SEGS 340 movl $TF_HASSEGS,TF_FLAGS(%rsp) 341 pushfq 342 andq $~(PSL_D | PSL_AC),(%rsp) 343 popfq 344 movq TF_SIZE(%rsp),%rdx 345 movl %edx,%eax 346 shrq $32,%rdx 347 movl $MSR_GSBASE,%ecx 348 wrmsr 349 movq %cr3,%rax 350 movq %rax,PCPU(SAVED_UCR3) 351 movq PCPU(KCR3),%rax 352 cmpq $~0,%rax 353 je 2f 354 movq %rax,%cr3 3552: KMSAN_ENTER 356 movq %rsp,%rdi 357 call dblfault_handler 358 KMSAN_LEAVE 3593: hlt 360 jmp 3b 361 362 ALIGN_TEXT 363IDTVEC(page_pti) 364 testb $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp) 365 jz page_k 366 swapgs 367 lfence 368 pushq %rax 369 movq %cr3,%rax 370 movq %rax,PCPU(SAVED_UCR3) 371 cmpq $~0,PCPU(UCR3) 372 jne 1f 373 popq %rax 374 jmp page_u 3751: pushq %rdx 376 PTI_UUENTRY has_err=1 377 jmp page_u 378 ALIGN_TEXT 379IDTVEC(page) 380 testb $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */ 381 jnz page_u_swapgs /* already running with kernel GS.base */ 382page_k: 383 lfence 384 subq $TF_ERR,%rsp 385 movq %rdi,TF_RDI(%rsp) /* free up GP registers */ 386 movq %rax,TF_RAX(%rsp) 387 movq %rdx,TF_RDX(%rsp) 388 movq %rcx,TF_RCX(%rsp) 389 jmp page_cr2 390 ALIGN_TEXT 391page_u_swapgs: 392 swapgs 393 lfence 394page_u: 395 subq $TF_ERR,%rsp 396 movq %rdi,TF_RDI(%rsp) 397 movq %rax,TF_RAX(%rsp) 398 movq %rdx,TF_RDX(%rsp) 399 movq %rcx,TF_RCX(%rsp) 400 movq PCPU(CURPCB),%rdi 401 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 402 movq PCPU(SAVED_UCR3),%rax 403 movq %rax,PCB_SAVED_UCR3(%rdi) 404 call handle_ibrs_entry 405page_cr2: 406 movq %cr2,%rdi /* preserve %cr2 before .. */ 407 movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ 408 SAVE_SEGS 409 movl $T_PAGEFLT,TF_TRAPNO(%rsp) 410 testl $PSL_I,TF_RFLAGS(%rsp) 411 jz alltraps_pushregs_no_rax 412 sti 413 jmp alltraps_pushregs_no_rax 414 415 /* 416 * We have to special-case this one. If we get a trap in doreti() at 417 * the iretq stage, we'll reenter with the wrong gs state. We'll have 418 * to do a special the swapgs in this case even coming from the kernel. 419 * XXX linux has a trap handler for their equivalent of load_gs(). 420 * 421 * On the stack, we have the hardware interrupt frame to return 422 * to usermode (faulted) and another frame with error code, for 423 * fault. For PTI, copy both frames to the main thread stack. 424 * Handle the potential 16-byte alignment adjustment incurred 425 * during the second fault by copying both frames independently 426 * while unwinding the stack in between. 427 */ 428 .macro PROTF_ENTRY name,trapno 429\name\()_pti_doreti: 430 swapgs 431 lfence 432 cmpq $~0,PCPU(UCR3) 433 je 1f 434 pushq %rax 435 pushq %rdx 436 movq PCPU(KCR3),%rax 437 movq %rax,%cr3 438 movq PCPU(RSP0),%rax 439 subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */ 440 MOVE_STACKS (PTI_SIZE / 8) 441 addq $PTI_SIZE,%rax 442 movq PTI_RSP(%rsp),%rsp 443 MOVE_STACKS (PTI_SIZE / 8 - 3) 444 subq $PTI_SIZE,%rax 445 movq %rax,%rsp 446 popq %rdx 447 popq %rax 4481: swapgs 449 jmp X\name 450IDTVEC(\name\()_pti) 451 cmpq $doreti_iret,PTI_RIP-2*8(%rsp) 452 je \name\()_pti_doreti 453 testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */ 454 jz X\name /* lfence is not needed until %gs: use */ 455 PTI_UENTRY has_err=1 456 swapgs /* fence provided by PTI_UENTRY */ 457IDTVEC(\name) 458 subq $TF_ERR,%rsp 459 movl $\trapno,TF_TRAPNO(%rsp) 460 jmp prot_addrf 461 .endm 462 463 PROTF_ENTRY missing, T_SEGNPFLT 464 PROTF_ENTRY stk, T_STKFLT 465 PROTF_ENTRY prot, T_PROTFLT 466 467prot_addrf: 468 movq $0,TF_ADDR(%rsp) 469 movq %rdi,TF_RDI(%rsp) /* free up a GP register */ 470 movq %rax,TF_RAX(%rsp) 471 movq %rdx,TF_RDX(%rsp) 472 movq %rcx,TF_RCX(%rsp) 473 movw %fs,TF_FS(%rsp) 474 movw %gs,TF_GS(%rsp) 475 leaq doreti_iret(%rip),%rdi 476 cmpq %rdi,TF_RIP(%rsp) 477 je 5f /* kernel but with user gsbase!! */ 478 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 479 jz 6f /* already running with kernel GS.base */ 480 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 481 jz 2f 482 cmpw $KUF32SEL,TF_FS(%rsp) 483 jne 1f 484 rdfsbase %rax 4851: cmpw $KUG32SEL,TF_GS(%rsp) 486 jne 2f 487 rdgsbase %rdx 4882: swapgs 489 lfence 490 movq PCPU(CURPCB),%rdi 491 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 492 jz 4f 493 cmpw $KUF32SEL,TF_FS(%rsp) 494 jne 3f 495 movq %rax,PCB_FSBASE(%rdi) 4963: cmpw $KUG32SEL,TF_GS(%rsp) 497 jne 4f 498 movq %rdx,PCB_GSBASE(%rdi) 499 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* full iret from user #gp */ 5004: call handle_ibrs_entry 501 movw %es,TF_ES(%rsp) 502 movw %ds,TF_DS(%rsp) 503 testl $PSL_I,TF_RFLAGS(%rsp) 504 jz alltraps_pushregs_no_rax 505 sti 506 jmp alltraps_pushregs_no_rax 507 5085: swapgs 5096: lfence 510 movq PCPU(CURPCB),%rdi 511 jmp 4b 512 513/* 514 * Fast syscall entry point. We enter here with just our new %cs/%ss set, 515 * and the new privilege level. We are still running on the old user stack 516 * pointer. We have to juggle a few things around to find our stack etc. 517 * swapgs gives us access to our PCPU space only. 518 * 519 * We do not support invoking this from a custom segment registers, 520 * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. 521 */ 522 SUPERALIGN_TEXT 523IDTVEC(fast_syscall_pti) 524 swapgs 525 cmpq $~0,PCPU(UCR3) 526 je fast_syscall_common 527 movq %rax,PCPU(SCRATCH_RAX) 528 movq PCPU(KCR3),%rax 529 movq %rax,%cr3 530 movq PCPU(SCRATCH_RAX),%rax 531 jmp fast_syscall_common 532 SUPERALIGN_TEXT 533IDTVEC(fast_syscall) 534 swapgs 535fast_syscall_common: 536 movq %rsp,PCPU(SCRATCH_RSP) 537 movq PCPU(RSP0),%rsp 538 /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ 539 subq $TF_SIZE,%rsp 540 /* defer TF_RSP till we have a spare register */ 541 movq %r11,TF_RFLAGS(%rsp) 542 movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ 543 movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ 544 movq %r11,TF_RSP(%rsp) /* user stack pointer */ 545 /* 546 * Save a few arg registers early to free them for use in 547 * handle_ibrs_entry(). %r10 is especially tricky. It is not an 548 * arg register, but it holds the arg register %rcx. Profiling 549 * preserves %rcx, but may clobber %r10. Profiling may also 550 * clobber %r11, but %r11 (original %eflags) has been saved. 551 */ 552 movq %rax,TF_RAX(%rsp) /* syscall number */ 553 movq %rdx,TF_RDX(%rsp) /* arg 3 */ 554 movq %r10,TF_RCX(%rsp) /* arg 4 */ 555 SAVE_SEGS 556 call handle_ibrs_entry 557 movq PCPU(CURPCB),%r11 558 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) 559 sti 560 movq $KUDSEL,TF_SS(%rsp) 561 movq $KUCSEL,TF_CS(%rsp) 562 movq $2,TF_ERR(%rsp) 563 movq %rdi,TF_RDI(%rsp) /* arg 1 */ 564 movq %rsi,TF_RSI(%rsp) /* arg 2 */ 565 movq %r8,TF_R8(%rsp) /* arg 5 */ 566 movq %r9,TF_R9(%rsp) /* arg 6 */ 567 movq %rbx,TF_RBX(%rsp) /* C preserved */ 568 movq %rbp,TF_RBP(%rsp) /* C preserved */ 569 movq %r12,TF_R12(%rsp) /* C preserved */ 570 movq %r13,TF_R13(%rsp) /* C preserved */ 571 movq %r14,TF_R14(%rsp) /* C preserved */ 572 movq %r15,TF_R15(%rsp) /* C preserved */ 573 movl $TF_HASSEGS,TF_FLAGS(%rsp) 574 movq PCPU(CURTHREAD),%rdi 575 movq %rsp,TD_FRAME(%rdi) 576 movl TF_RFLAGS(%rsp),%esi 577 andl $PSL_T,%esi 578 call amd64_syscall 5791: movq PCPU(CURPCB),%rax 580 /* Disable interrupts before testing PCB_FULL_IRET. */ 581 cli 582 testl $PCB_FULL_IRET,PCB_FLAGS(%rax) 583 jnz 4f 584 /* Check for and handle AST's on return to userland. */ 585 movq PCPU(CURTHREAD),%rax 586 cmpl $0,TD_AST(%rax) 587 jne 3f 588 call handle_ibrs_exit 589 callq *mds_handler 590 /* Restore preserved registers. */ 591 movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ 592 movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ 593 movq TF_RDX(%rsp),%rdx /* return value 2 */ 594 movq TF_RAX(%rsp),%rax /* return value 1 */ 595 movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ 596 movq TF_RIP(%rsp),%rcx /* original %rip */ 597 movq TF_RSP(%rsp),%rsp /* user stack pointer */ 598 xorl %r8d,%r8d /* zero the rest of GPRs */ 599 xorl %r10d,%r10d 600 cmpq $~0,PCPU(UCR3) 601 je 2f 602 movq PCPU(UCR3),%r9 603 andq PCPU(UCR3_LOAD_MASK),%r9 604 movq %r9,%cr3 6052: xorl %r9d,%r9d 606 movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK) 607 swapgs 608 sysretq 609 6103: /* AST scheduled. */ 611 sti 612 movq %rsp,%rdi 613 call ast 614 jmp 1b 615 6164: /* Requested full context restore, use doreti for that. */ 617 jmp doreti 618 619/* 620 * Here for CYA insurance, in case a "syscall" instruction gets 621 * issued from 32 bit compatibility mode. MSR_CSTAR has to point 622 * to *something* if EFER_SCE is enabled. 623 */ 624IDTVEC(fast_syscall32) 625 sysret 626 627/* 628 * DB# handler is very similar to NM#, because 'mov/pop %ss' delay 629 * generation of exception until the next instruction is executed, 630 * which might be a kernel entry. So we must execute the handler 631 * on IST stack and be ready for non-kernel GSBASE. 632 */ 633IDTVEC(dbg) 634 subq $TF_RIP,%rsp 635 movl $(T_TRCTRAP),TF_TRAPNO(%rsp) 636 movq $0,TF_ADDR(%rsp) 637 movq $0,TF_ERR(%rsp) 638 movq %rdi,TF_RDI(%rsp) 639 movq %rsi,TF_RSI(%rsp) 640 movq %rdx,TF_RDX(%rsp) 641 movq %rcx,TF_RCX(%rsp) 642 movq %r8,TF_R8(%rsp) 643 movq %r9,TF_R9(%rsp) 644 movq %rax,TF_RAX(%rsp) 645 movq %rbx,TF_RBX(%rsp) 646 movq %rbp,TF_RBP(%rsp) 647 movq %r10,TF_R10(%rsp) 648 movq %r11,TF_R11(%rsp) 649 movq %r12,TF_R12(%rsp) 650 movq %r13,TF_R13(%rsp) 651 movq %r14,TF_R14(%rsp) 652 movq %r15,TF_R15(%rsp) 653 SAVE_SEGS 654 movl $TF_HASSEGS,TF_FLAGS(%rsp) 655 pushfq 656 andq $~(PSL_D | PSL_AC),(%rsp) 657 popfq 658 testb $SEL_RPL_MASK,TF_CS(%rsp) 659 jnz dbg_fromuserspace 660 lfence 661 /* 662 * We've interrupted the kernel. See comment in NMI handler about 663 * registers use. 664 */ 665 movq %cr2,%r15 666 movl $MSR_GSBASE,%ecx 667 rdmsr 668 movq %rax,%r12 669 shlq $32,%rdx 670 orq %rdx,%r12 671 /* Retrieve and load the canonical value for GS.base. */ 672 movq TF_SIZE(%rsp),%rdx 673 movl %edx,%eax 674 shrq $32,%rdx 675 wrmsr 676 movq %cr3,%r13 677 movq PCPU(KCR3),%rax 678 cmpq $~0,%rax 679 je 1f 680 movq %rax,%cr3 6811: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 682 je 2f 683 movl $MSR_IA32_SPEC_CTRL,%ecx 684 rdmsr 685 movl %eax,%r14d 686 call handle_ibrs_entry 6872: movq %rsp,%rdi 688 call trap 689 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 690 je 3f 691 movl %r14d,%eax 692 xorl %edx,%edx 693 movl $MSR_IA32_SPEC_CTRL,%ecx 694 wrmsr 695 /* 696 * Put back the preserved MSR_GSBASE value. 697 */ 6983: movl $MSR_GSBASE,%ecx 699 movq %r12,%rdx 700 movl %edx,%eax 701 shrq $32,%rdx 702 wrmsr 703 movq %r13,%cr3 704 movq %r15,%cr2 705 RESTORE_REGS 706 addq $TF_RIP,%rsp 707 jmp doreti_iret 708dbg_fromuserspace: 709 /* 710 * Switch to kernel GSBASE and kernel page table, and copy frame 711 * from the IST stack to the normal kernel stack, since trap() 712 * re-enables interrupts, and since we might trap on DB# while 713 * in trap(). 714 */ 715 swapgs 716 lfence 717 movq PCPU(KCR3),%rax 718 cmpq $~0,%rax 719 je 1f 720 movq %rax,%cr3 7211: movq PCPU(RSP0),%rax 722 movl $TF_SIZE,%ecx 723 subq %rcx,%rax 724 movq %rax,%rdi 725 movq %rsp,%rsi 726 rep;movsb 727 movq %rax,%rsp 728 call handle_ibrs_entry 729 movq PCPU(CURPCB),%rdi 730 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) 731 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 732 jz 3f 733 cmpw $KUF32SEL,TF_FS(%rsp) 734 jne 2f 735 rdfsbase %rax 736 movq %rax,PCB_FSBASE(%rdi) 7372: cmpw $KUG32SEL,TF_GS(%rsp) 738 jne 3f 739 movl $MSR_KGSBASE,%ecx 740 rdmsr 741 shlq $32,%rdx 742 orq %rdx,%rax 743 movq %rax,PCB_GSBASE(%rdi) 7443: jmp calltrap 745 746/* 747 * NMI handling is special. 748 * 749 * First, NMIs do not respect the state of the processor's RFLAGS.IF 750 * bit. The NMI handler may be entered at any time, including when 751 * the processor is in a critical section with RFLAGS.IF == 0. 752 * The processor's GS.base value could be invalid on entry to the 753 * handler. 754 * 755 * Second, the processor treats NMIs specially, blocking further NMIs 756 * until an 'iretq' instruction is executed. We thus need to execute 757 * the NMI handler with interrupts disabled, to prevent a nested interrupt 758 * from executing an 'iretq' instruction and inadvertently taking the 759 * processor out of NMI mode. 760 * 761 * Third, the NMI handler runs on its own stack (tss_ist2). The canonical 762 * GS.base value for the processor is stored just above the bottom of its 763 * NMI stack. For NMIs taken from kernel mode, the current value in 764 * the processor's GS.base is saved at entry to C-preserved register %r12, 765 * the canonical value for GS.base is then loaded into the processor, and 766 * the saved value is restored at exit time. For NMIs taken from user mode, 767 * the cheaper 'SWAPGS' instructions are used for swapping GS.base. 768 */ 769 770IDTVEC(nmi) 771 subq $TF_RIP,%rsp 772 movl $(T_NMI),TF_TRAPNO(%rsp) 773 movq $0,TF_ADDR(%rsp) 774 movq $0,TF_ERR(%rsp) 775 movq %rdi,TF_RDI(%rsp) 776 movq %rsi,TF_RSI(%rsp) 777 movq %rdx,TF_RDX(%rsp) 778 movq %rcx,TF_RCX(%rsp) 779 movq %r8,TF_R8(%rsp) 780 movq %r9,TF_R9(%rsp) 781 movq %rax,TF_RAX(%rsp) 782 movq %rbx,TF_RBX(%rsp) 783 movq %rbp,TF_RBP(%rsp) 784 movq %r10,TF_R10(%rsp) 785 movq %r11,TF_R11(%rsp) 786 movq %r12,TF_R12(%rsp) 787 movq %r13,TF_R13(%rsp) 788 movq %r14,TF_R14(%rsp) 789 movq %r15,TF_R15(%rsp) 790 SAVE_SEGS 791 movl $TF_HASSEGS,TF_FLAGS(%rsp) 792 pushfq 793 andq $~(PSL_D | PSL_AC),(%rsp) 794 popfq 795 xorl %ebx,%ebx 796 testb $SEL_RPL_MASK,TF_CS(%rsp) 797 jnz nmi_fromuserspace 798 /* 799 * We've interrupted the kernel. Preserve in callee-saved regs: 800 * GS.base in %r12, 801 * %cr3 in %r13, 802 * possibly lower half of MSR_IA32_SPEC_CTL in %r14d, 803 * %cr2 in %r15. 804 */ 805 lfence 806 movq %cr2,%r15 807 movl $MSR_GSBASE,%ecx 808 rdmsr 809 movq %rax,%r12 810 shlq $32,%rdx 811 orq %rdx,%r12 812 /* Retrieve and load the canonical value for GS.base. */ 813 movq TF_SIZE(%rsp),%rdx 814 movl %edx,%eax 815 shrq $32,%rdx 816 wrmsr 817 movq %cr3,%r13 818 movq PCPU(KCR3),%rax 819 cmpq $~0,%rax 820 je 1f 821 movq %rax,%cr3 8221: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 823 je nmi_calltrap 824 movl $MSR_IA32_SPEC_CTRL,%ecx 825 rdmsr 826 movl %eax,%r14d 827 call handle_ibrs_entry 828 jmp nmi_calltrap 829nmi_fromuserspace: 830 incl %ebx 831 swapgs 832 lfence 833 movq %cr3,%r13 834 movq PCPU(KCR3),%rax 835 cmpq $~0,%rax 836 je 1f 837 movq %rax,%cr3 8381: call handle_ibrs_entry 839 movq PCPU(CURPCB),%rdi 840 testq %rdi,%rdi 841 jz 3f 842 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) 843 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 844 jz 3f 845 cmpw $KUF32SEL,TF_FS(%rsp) 846 jne 2f 847 rdfsbase %rax 848 movq %rax,PCB_FSBASE(%rdi) 8492: cmpw $KUG32SEL,TF_GS(%rsp) 850 jne 3f 851 movl $MSR_KGSBASE,%ecx 852 rdmsr 853 shlq $32,%rdx 854 orq %rdx,%rax 855 movq %rax,PCB_GSBASE(%rdi) 8563: 857/* Note: this label is also used by ddb and gdb: */ 858nmi_calltrap: 859 KMSAN_ENTER 860 movq %rsp,%rdi 861 call trap 862 KMSAN_LEAVE 863#ifdef HWPMC_HOOKS 864 /* 865 * Capture a userspace callchain if needed. 866 * 867 * - Check if the current trap was from user mode. 868 * - Check if the current thread is valid. 869 * - Check if the thread requires a user call chain to be 870 * captured. 871 * 872 * We are still in NMI mode at this point. 873 */ 874 testl %ebx,%ebx 875 jz nocallchain /* not from userspace */ 876 movq PCPU(CURTHREAD),%rax 877 orq %rax,%rax /* curthread present? */ 878 jz nocallchain 879 /* 880 * Move execution to the regular kernel stack, because we 881 * committed to return through doreti. 882 */ 883 movq %rsp,%rsi /* source stack pointer */ 884 movq $TF_SIZE,%rcx 885 movq PCPU(RSP0),%rdx 886 subq %rcx,%rdx 887 movq %rdx,%rdi /* destination stack pointer */ 888 shrq $3,%rcx /* trap frame size in long words */ 889 pushfq 890 andq $~(PSL_D | PSL_AC),(%rsp) 891 popfq 892 rep 893 movsq /* copy trapframe */ 894 movq %rdx,%rsp /* we are on the regular kstack */ 895 896 testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ 897 jz nocallchain 898 /* 899 * A user callchain is to be captured, so: 900 * - Take the processor out of "NMI" mode by faking an "iret", 901 * to allow for nested NMI interrupts. 902 * - Enable interrupts, so that copyin() can work. 903 */ 904 movl %ss,%eax 905 pushq %rax /* tf_ss */ 906 pushq %rdx /* tf_rsp (on kernel stack) */ 907 pushfq /* tf_rflags */ 908 movl %cs,%eax 909 pushq %rax /* tf_cs */ 910 pushq $outofnmi /* tf_rip */ 911 iretq 912outofnmi: 913 /* 914 * At this point the processor has exited NMI mode and is running 915 * with interrupts turned off on the normal kernel stack. 916 * 917 * If a pending NMI gets recognized at or after this point, it 918 * will cause a kernel callchain to be traced. 919 * 920 * We turn interrupts back on, and call the user callchain capture hook. 921 */ 922 movq pmc_hook,%rax 923 orq %rax,%rax 924 jz nocallchain 925 movq PCPU(CURTHREAD),%rdi /* thread */ 926 movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ 927 movq %rsp,%rdx /* frame */ 928 sti 929 call *%rax 930 cli 931nocallchain: 932#endif 933 testl %ebx,%ebx /* %ebx != 0 => return to userland */ 934 jnz doreti_exit 935 /* 936 * Restore speculation control MSR, if preserved. 937 */ 938 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 939 je 1f 940 movl %r14d,%eax 941 xorl %edx,%edx 942 movl $MSR_IA32_SPEC_CTRL,%ecx 943 wrmsr 944 /* 945 * Put back the preserved MSR_GSBASE value. 946 */ 9471: movl $MSR_GSBASE,%ecx 948 movq %r12,%rdx 949 movl %edx,%eax 950 shrq $32,%rdx 951 wrmsr 952 cmpb $0, nmi_flush_l1d_sw(%rip) 953 je 2f 954 call flush_l1d_sw /* bhyve L1TF assist */ 9552: movq %r13,%cr3 956 movq %r15,%cr2 957 RESTORE_REGS 958 addq $TF_RIP,%rsp 959 jmp doreti_iret 960 961/* 962 * MC# handling is similar to NMI. 963 * 964 * As with NMIs, machine check exceptions do not respect RFLAGS.IF and 965 * can occur at any time with a GS.base value that does not correspond 966 * to the privilege level in CS. 967 * 968 * Machine checks are not unblocked by iretq, but it is best to run 969 * the handler with interrupts disabled since the exception may have 970 * interrupted a critical section. 971 * 972 * The MC# handler runs on its own stack (tss_ist3). The canonical 973 * GS.base value for the processor is stored just above the bottom of 974 * its MC# stack. For exceptions taken from kernel mode, the current 975 * value in the processor's GS.base is saved at entry to C-preserved 976 * register %r12, the canonical value for GS.base is then loaded into 977 * the processor, and the saved value is restored at exit time. For 978 * exceptions taken from user mode, the cheaper 'SWAPGS' instructions 979 * are used for swapping GS.base. 980 */ 981 982IDTVEC(mchk) 983 subq $TF_RIP,%rsp 984 movl $(T_MCHK),TF_TRAPNO(%rsp) 985 movq $0,TF_ADDR(%rsp) 986 movq $0,TF_ERR(%rsp) 987 movq %rdi,TF_RDI(%rsp) 988 movq %rsi,TF_RSI(%rsp) 989 movq %rdx,TF_RDX(%rsp) 990 movq %rcx,TF_RCX(%rsp) 991 movq %r8,TF_R8(%rsp) 992 movq %r9,TF_R9(%rsp) 993 movq %rax,TF_RAX(%rsp) 994 movq %rbx,TF_RBX(%rsp) 995 movq %rbp,TF_RBP(%rsp) 996 movq %r10,TF_R10(%rsp) 997 movq %r11,TF_R11(%rsp) 998 movq %r12,TF_R12(%rsp) 999 movq %r13,TF_R13(%rsp) 1000 movq %r14,TF_R14(%rsp) 1001 movq %r15,TF_R15(%rsp) 1002 SAVE_SEGS 1003 movl $TF_HASSEGS,TF_FLAGS(%rsp) 1004 pushfq 1005 andq $~(PSL_D | PSL_AC),(%rsp) 1006 popfq 1007 xorl %ebx,%ebx 1008 testb $SEL_RPL_MASK,TF_CS(%rsp) 1009 jnz mchk_fromuserspace 1010 /* 1011 * We've interrupted the kernel. See comment in NMI handler about 1012 * registers use. 1013 */ 1014 movq %cr2,%r15 1015 movl $MSR_GSBASE,%ecx 1016 rdmsr 1017 movq %rax,%r12 1018 shlq $32,%rdx 1019 orq %rdx,%r12 1020 /* Retrieve and load the canonical value for GS.base. */ 1021 movq TF_SIZE(%rsp),%rdx 1022 movl %edx,%eax 1023 shrq $32,%rdx 1024 wrmsr 1025 movq %cr3,%r13 1026 movq PCPU(KCR3),%rax 1027 cmpq $~0,%rax 1028 je 1f 1029 movq %rax,%cr3 10301: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 1031 je mchk_calltrap 1032 movl $MSR_IA32_SPEC_CTRL,%ecx 1033 rdmsr 1034 movl %eax,%r14d 1035 call handle_ibrs_entry 1036 jmp mchk_calltrap 1037mchk_fromuserspace: 1038 incl %ebx 1039 swapgs 1040 movq %cr3,%r13 1041 movq PCPU(KCR3),%rax 1042 cmpq $~0,%rax 1043 je 1f 1044 movq %rax,%cr3 10451: call handle_ibrs_entry 1046/* Note: this label is also used by ddb and gdb: */ 1047mchk_calltrap: 1048 KMSAN_ENTER 1049 movq %rsp,%rdi 1050 call mca_intr 1051 KMSAN_LEAVE 1052 testl %ebx,%ebx /* %ebx != 0 => return to userland */ 1053 jnz doreti_exit 1054 /* 1055 * Restore speculation control MSR, if preserved. 1056 */ 1057 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 1058 je 1f 1059 movl %r14d,%eax 1060 xorl %edx,%edx 1061 movl $MSR_IA32_SPEC_CTRL,%ecx 1062 wrmsr 1063 /* 1064 * Put back the preserved MSR_GSBASE value. 1065 */ 10661: movl $MSR_GSBASE,%ecx 1067 movq %r12,%rdx 1068 movl %edx,%eax 1069 shrq $32,%rdx 1070 wrmsr 1071 movq %r13,%cr3 1072 movq %r15,%cr2 1073 RESTORE_REGS 1074 addq $TF_RIP,%rsp 1075 jmp doreti_iret 1076 1077ENTRY(fork_trampoline) 1078 movq %r12,%rdi /* function */ 1079 movq %rbx,%rsi /* arg1 */ 1080 movq %rsp,%rdx /* trapframe pointer */ 1081 call fork_exit 1082 jmp doreti /* Handle any ASTs */ 1083 1084/* 1085 * To efficiently implement classification of trap and interrupt handlers 1086 * for profiling, there must be only trap handlers between the labels btrap 1087 * and bintr, and only interrupt handlers between the labels bintr and 1088 * eintr. This is implemented (partly) by including files that contain 1089 * some of the handlers. Before including the files, set up a normal asm 1090 * environment so that the included files doesn't need to know that they are 1091 * included. 1092 */ 1093 1094#ifdef COMPAT_FREEBSD32 1095 .data 1096 .p2align 4 1097 .text 1098 SUPERALIGN_TEXT 1099 1100#include <amd64/ia32/ia32_exception.S> 1101#endif 1102 1103 .data 1104 .p2align 4 1105 .text 1106 SUPERALIGN_TEXT 1107#include <amd64/amd64/apic_vector.S> 1108 1109#ifdef DEV_ATPIC 1110 .data 1111 .p2align 4 1112 .text 1113 SUPERALIGN_TEXT 1114 1115#include <amd64/amd64/atpic_vector.S> 1116#endif 1117 1118/* 1119 * void doreti(struct trapframe) 1120 * 1121 * Handle return from interrupts, traps and syscalls. 1122 */ 1123 .text 1124 SUPERALIGN_TEXT 1125 .type doreti,@function 1126 .globl doreti 1127doreti: 1128 /* 1129 * Check if ASTs can be handled now. 1130 */ 1131 testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ 1132 jz doreti_exit /* can't handle ASTs now if not */ 1133 1134doreti_ast: 1135 /* 1136 * Check for ASTs atomically with returning. Disabling CPU 1137 * interrupts provides sufficient locking even in the SMP case, 1138 * since we will be informed of any new ASTs by an IPI. 1139 */ 1140 cli 1141 movq PCPU(CURTHREAD),%rax 1142 cmpl $0,TD_AST(%rax) 1143 je doreti_exit 1144 sti 1145 movq %rsp,%rdi /* pass a pointer to the trapframe */ 1146 call ast 1147 jmp doreti_ast 1148 1149 /* 1150 * doreti_exit: pop registers, iret. 1151 * 1152 * The segment register pop is a special case, since it may 1153 * fault if (for example) a sigreturn specifies bad segment 1154 * registers. The fault is handled in trap.c. 1155 */ 1156doreti_exit: 1157 movq PCPU(CURPCB),%r8 1158 1159 /* 1160 * Do not reload segment registers for kernel. 1161 * Since we do not reload segments registers with sane 1162 * values on kernel entry, descriptors referenced by 1163 * segments registers might be not valid. This is fatal 1164 * for user mode, but is not a problem for the kernel. 1165 */ 1166 testb $SEL_RPL_MASK,TF_CS(%rsp) 1167 jz ld_regs 1168 testl $PCB_FULL_IRET,PCB_FLAGS(%r8) 1169 jz ld_regs 1170 andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) 1171 testl $TF_HASSEGS,TF_FLAGS(%rsp) 1172 je set_segs 1173 1174do_segs: 1175 /* Restore %fs and fsbase */ 1176 movw TF_FS(%rsp),%ax 1177 .globl ld_fs 1178ld_fs: 1179 movw %ax,%fs 1180 cmpw $KUF32SEL,%ax 1181 jne 1f 1182 movl $MSR_FSBASE,%ecx 1183 movl PCB_FSBASE(%r8),%eax 1184 movl PCB_FSBASE+4(%r8),%edx 1185 .globl ld_fsbase 1186ld_fsbase: 1187 wrmsr 11881: 1189 /* Restore %gs and gsbase */ 1190 movw TF_GS(%rsp),%si 1191 pushfq 1192 cli 1193 movl $MSR_GSBASE,%ecx 1194 /* Save current kernel %gs base into %r12d:%r13d */ 1195 rdmsr 1196 movl %eax,%r12d 1197 movl %edx,%r13d 1198 .globl ld_gs 1199ld_gs: 1200 movw %si,%gs 1201 /* Save user %gs base into %r14d:%r15d */ 1202 rdmsr 1203 movl %eax,%r14d 1204 movl %edx,%r15d 1205 /* Restore kernel %gs base */ 1206 movl %r12d,%eax 1207 movl %r13d,%edx 1208 wrmsr 1209 popfq 1210 /* 1211 * Restore user %gs base, either from PCB if used for TLS, or 1212 * from the previously saved msr read. 1213 */ 1214 movl $MSR_KGSBASE,%ecx 1215 cmpw $KUG32SEL,%si 1216 jne 1f 1217 movl PCB_GSBASE(%r8),%eax 1218 movl PCB_GSBASE+4(%r8),%edx 1219 jmp ld_gsbase 12201: 1221 movl %r14d,%eax 1222 movl %r15d,%edx 1223 .globl ld_gsbase 1224ld_gsbase: 1225 wrmsr /* May trap if non-canonical, but only for TLS. */ 1226 .globl ld_es 1227ld_es: 1228 movw TF_ES(%rsp),%es 1229 .globl ld_ds 1230ld_ds: 1231 movw TF_DS(%rsp),%ds 1232ld_regs: 1233 RESTORE_REGS 1234 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 1235 jz 2f /* keep running with kernel GS.base */ 1236 cli 1237 call handle_ibrs_exit_rs 1238 callq *mds_handler 1239 cmpq $~0,PCPU(UCR3) 1240 je 1f 1241 pushq %rdx 1242 movq PCPU(PTI_RSP0),%rdx 1243 subq $PTI_SIZE,%rdx 1244 movq %rax,PTI_RAX(%rdx) 1245 popq %rax 1246 movq %rax,PTI_RDX(%rdx) 1247 movq TF_RIP(%rsp),%rax 1248 movq %rax,PTI_RIP(%rdx) 1249 movq TF_CS(%rsp),%rax 1250 movq %rax,PTI_CS(%rdx) 1251 movq TF_RFLAGS(%rsp),%rax 1252 movq %rax,PTI_RFLAGS(%rdx) 1253 movq TF_RSP(%rsp),%rax 1254 movq %rax,PTI_RSP(%rdx) 1255 movq TF_SS(%rsp),%rax 1256 movq %rax,PTI_SS(%rdx) 1257 movq PCPU(UCR3),%rax 1258 andq PCPU(UCR3_LOAD_MASK),%rax 1259 movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK) 1260 swapgs 1261 movq %rdx,%rsp 1262 movq %rax,%cr3 1263 popq %rdx 1264 popq %rax 1265 addq $8,%rsp 1266 jmp doreti_iret 12671: swapgs 12682: addq $TF_RIP,%rsp 1269 .globl doreti_iret 1270doreti_iret: 1271 iretq 1272 1273set_segs: 1274 movw $KUDSEL,%ax 1275 movw %ax,TF_DS(%rsp) 1276 movw %ax,TF_ES(%rsp) 1277 movw $KUF32SEL,TF_FS(%rsp) 1278 movw $KUG32SEL,TF_GS(%rsp) 1279 jmp do_segs 1280 1281 /* 1282 * doreti_iret_fault. Alternative return code for 1283 * the case where we get a fault in the doreti_exit code 1284 * above. trap() (amd64/amd64/trap.c) catches this specific 1285 * case, sends the process a signal and continues in the 1286 * corresponding place in the code below. 1287 */ 1288 ALIGN_TEXT 1289 .globl doreti_iret_fault 1290doreti_iret_fault: 1291 subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ 1292 movq %rax,TF_RAX(%rsp) 1293 movq %rdx,TF_RDX(%rsp) 1294 movq %rcx,TF_RCX(%rsp) 1295 call handle_ibrs_entry 1296 testb $SEL_RPL_MASK,TF_CS(%rsp) 1297 jz 1f 1298 sti 12991: 1300 SAVE_SEGS 1301 movl $TF_HASSEGS,TF_FLAGS(%rsp) 1302 movq %rdi,TF_RDI(%rsp) 1303 movq %rsi,TF_RSI(%rsp) 1304 movq %r8,TF_R8(%rsp) 1305 movq %r9,TF_R9(%rsp) 1306 movq %rbx,TF_RBX(%rsp) 1307 movq %rbp,TF_RBP(%rsp) 1308 movq %r10,TF_R10(%rsp) 1309 movq %r11,TF_R11(%rsp) 1310 movq %r12,TF_R12(%rsp) 1311 movq %r13,TF_R13(%rsp) 1312 movq %r14,TF_R14(%rsp) 1313 movq %r15,TF_R15(%rsp) 1314 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1315 movq $0,TF_ERR(%rsp) /* XXX should be the error code */ 1316 movq $0,TF_ADDR(%rsp) 1317 jmp calltrap 1318 1319 ALIGN_TEXT 1320 .globl ds_load_fault 1321ds_load_fault: 1322 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1323 testb $SEL_RPL_MASK,TF_CS(%rsp) 1324 jz 1f 1325 sti 13261: 1327 movq %rsp,%rdi 1328 call trap 1329 movw $KUDSEL,TF_DS(%rsp) 1330 jmp doreti 1331 1332 ALIGN_TEXT 1333 .globl es_load_fault 1334es_load_fault: 1335 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1336 testl $PSL_I,TF_RFLAGS(%rsp) 1337 jz 1f 1338 sti 13391: 1340 movq %rsp,%rdi 1341 call trap 1342 movw $KUDSEL,TF_ES(%rsp) 1343 jmp doreti 1344 1345 ALIGN_TEXT 1346 .globl fs_load_fault 1347fs_load_fault: 1348 testl $PSL_I,TF_RFLAGS(%rsp) 1349 jz 1f 1350 sti 13511: 1352 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1353 movq %rsp,%rdi 1354 call trap 1355 movw $KUF32SEL,TF_FS(%rsp) 1356 jmp doreti 1357 1358 ALIGN_TEXT 1359 .globl gs_load_fault 1360gs_load_fault: 1361 popfq 1362 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1363 testl $PSL_I,TF_RFLAGS(%rsp) 1364 jz 1f 1365 sti 13661: 1367 movq %rsp,%rdi 1368 call trap 1369 movw $KUG32SEL,TF_GS(%rsp) 1370 jmp doreti 1371 1372 ALIGN_TEXT 1373 .globl fsbase_load_fault 1374fsbase_load_fault: 1375 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1376 testl $PSL_I,TF_RFLAGS(%rsp) 1377 jz 1f 1378 sti 13791: 1380 movq %rsp,%rdi 1381 call trap 1382 movq PCPU(CURTHREAD),%r8 1383 movq TD_PCB(%r8),%r8 1384 movq $0,PCB_FSBASE(%r8) 1385 jmp doreti 1386 1387 ALIGN_TEXT 1388 .globl gsbase_load_fault 1389gsbase_load_fault: 1390 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1391 testl $PSL_I,TF_RFLAGS(%rsp) 1392 jz 1f 1393 sti 13941: 1395 movq %rsp,%rdi 1396 call trap 1397 movq PCPU(CURTHREAD),%r8 1398 movq TD_PCB(%r8),%r8 1399 movq $0,PCB_GSBASE(%r8) 1400 jmp doreti 1401 1402#ifdef HWPMC_HOOKS 1403 ENTRY(end_exceptions) 1404#endif 1405