1/* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11/* 12 * Copyright 2019 Joyent, Inc. 13 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 14 */ 15 16/* 17 * This file contains the trampolines that are used by KPTI in order to be 18 * able to take interrupts/trap/etc while on the "user" page table. 19 * 20 * We don't map the full kernel text into the user page table: instead we 21 * map this one small section of trampolines (which compiles to ~13 pages). 22 * These trampolines are set in the IDT always (so they will run no matter 23 * whether we're on the kernel or user page table), and their primary job is to 24 * pivot us to the kernel %cr3 and %rsp without ruining everything. 25 * 26 * All of these interrupts use the amd64 IST feature when we have KPTI enabled, 27 * meaning that they will execute with their %rsp set to a known location, even 28 * if we take them in the kernel. 29 * 30 * Over in desctbls.c (for cpu0) and mp_pc.c (other cpus) we set up the IST 31 * stack to point at &cpu->cpu_m.mcpu_kpti.kf_tr_rsp. You can see the mcpu_kpti 32 * (a struct kpti_frame) defined in machcpuvar.h. This struct is set up to be 33 * page-aligned, and we map the page it's on into both page tables. Using a 34 * struct attached to the cpu_t also means that we can use %rsp-relative 35 * addressing to find anything on the cpu_t, so we don't have to touch %gs or 36 * GSBASE at all on incoming interrupt trampolines (which can get pretty hairy). 37 * 38 * This little struct is where the CPU will push the actual interrupt frame. 39 * Then, in the trampoline, we change %cr3, then figure out our destination 40 * stack pointer and "pivot" to it (set %rsp and re-push the CPU's interrupt 41 * frame). Then we jump to the regular ISR in the kernel text and carry on as 42 * normal. 43 * 44 * We leave the original frame and any spilled regs behind in the kpti_frame 45 * lazily until we want to return to userland. Then, we clear any spilled 46 * regs from it, and overwrite the rest with our iret frame. When switching 47 * this cpu to a different process (in hat_switch), we bzero the whole region to 48 * make sure nothing can leak between processes. 49 * 50 * When we're returning back to the original place we took the interrupt later 51 * (especially if it was in userland), we have to jmp back to the "return 52 * trampolines" here, since when we set %cr3 back to the user value, we need to 53 * be executing from code here in these shared pages and not the main kernel 54 * text again. Even though it should be fine to iret directly from kernel text 55 * when returning to kernel code, we make things jmp to a trampoline here just 56 * for consistency. 57 * 58 * Note that with IST, it's very important that we always must have pivoted 59 * away from the IST stack before we could possibly take any other interrupt 60 * on the same IST (unless it's an end-of-the-world fault and we don't care 61 * about coming back from it ever). 62 * 63 * This is particularly relevant to the dbgtrap/brktrap trampolines, as they 64 * regularly have to happen from within trampoline code (e.g. in the sysenter 65 * single-step case) and then return to the world normally. As a result, these 66 * two are IST'd to their own kpti_frame right above the normal one (in the same 67 * page), so they don't clobber their parent interrupt. 68 * 69 * To aid with debugging, we also IST the page fault (#PF/pftrap), general 70 * protection fault (#GP/gptrap) and stack fault (#SS/stktrap) interrupts to 71 * their own separate kpti_frame. This ensures that if we take one of these 72 * due to a bug in trampoline code, we preserve the original trampoline 73 * state that caused the trap. 74 * 75 * NMI, MCE and dblfault interrupts also are taken on their own dedicated IST 76 * stacks, since they can interrupt another ISR at any time. These stacks are 77 * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in 78 * their trampolines (and do it unconditionally), and don't bother pivoting 79 * away. We're either going into the panic() path, or we're going to return 80 * straight away without rescheduling, so it's fine to not be on our real 81 * kthread stack (and some of the state we want to go find it with might be 82 * corrupt!) 83 * 84 * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a 85 * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to 86 * point at the PML4 for kas early in boot and never touch it again. Hopefully 87 * it survives whatever corruption brings down the rest of the kernel! 88 * 89 * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64 90 * cases) in that they do not push an interrupt frame (and also have some other 91 * effects). In the syscall trampolines, we assume that we can only be taking 92 * the call from userland and use swapgs and an unconditional overwrite of %cr3. 93 * We do not do any stack pivoting for syscalls (and we leave SYSENTER's 94 * existing %rsp pivot untouched) -- instead we spill registers into 95 * %gs:CPU_KPTI_* as we need to. 96 * 97 * Note that the normal %cr3 values do not cause invalidations with PCIDE - see 98 * hat_switch(). 99 */ 100 101/* 102 * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you 103 * fix bugs here check to see if they should be fixed there as well. 104 */ 105 106#include <sys/asm_linkage.h> 107#include <sys/asm_misc.h> 108#include <sys/regset.h> 109#include <sys/privregs.h> 110#include <sys/psw.h> 111#include <sys/machbrand.h> 112#include <sys/param.h> 113 114#include <sys/segments.h> 115#include <sys/pcb.h> 116#include <sys/trap.h> 117#include <sys/ftrace.h> 118#include <sys/traptrace.h> 119#include <sys/clock.h> 120#include <sys/model.h> 121#include <sys/panic.h> 122 123#if defined(__xpv) 124#include <sys/hypervisor.h> 125#endif 126 127#include "assym.h" 128 129 .data 130 DGDEF3(kpti_enable, 8, 8) 131 .fill 1, 8, 1 132 133#if DEBUG 134 .data 135_bad_ts_panic_msg: 136 .string "kpti_trampolines.s: tr_iret_user but CR0.TS set" 137#endif 138 139.section ".text"; 140.align MMU_PAGESIZE 141 142.global kpti_tramp_start 143kpti_tramp_start: 144 nop 145 146/* This will be set by mlsetup, and then double-checked later */ 147.global kpti_safe_cr3 148kpti_safe_cr3: 149 .quad 0 150 SET_SIZE(kpti_safe_cr3) 151 152/* startup_kmem() will overwrite this */ 153.global kpti_kbase 154kpti_kbase: 155 .quad KERNELBASE 156 SET_SIZE(kpti_kbase) 157 158#define SET_KERNEL_CR3(spillreg) \ 159 mov %cr3, spillreg; \ 160 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 161 mov %gs:CPU_KPTI_KCR3, spillreg; \ 162 cmp $0, spillreg; \ 163 je 2f; \ 164 mov spillreg, %cr3; \ 1652: 166 167#if DEBUG 168#define SET_USER_CR3(spillreg) \ 169 mov %cr3, spillreg; \ 170 mov spillreg, %gs:CPU_KPTI_TR_CR3; \ 171 mov %gs:CPU_KPTI_UCR3, spillreg; \ 172 mov spillreg, %cr3 173#else 174#define SET_USER_CR3(spillreg) \ 175 mov %gs:CPU_KPTI_UCR3, spillreg; \ 176 mov spillreg, %cr3 177#endif 178 179#define PIVOT_KPTI_STK(spillreg) \ 180 mov %rsp, spillreg; \ 181 mov %gs:CPU_KPTI_RET_RSP, %rsp; \ 182 pushq T_FRAMERET_SS(spillreg); \ 183 pushq T_FRAMERET_RSP(spillreg); \ 184 pushq T_FRAMERET_RFLAGS(spillreg); \ 185 pushq T_FRAMERET_CS(spillreg); \ 186 pushq T_FRAMERET_RIP(spillreg) 187 188 189#define INTERRUPT_TRAMPOLINE_P(errpush) \ 190 pushq %r13; \ 191 pushq %r14; \ 192 subq $KPTI_R14, %rsp; \ 193 /* Save current %cr3. */ \ 194 mov %cr3, %r14; \ 195 mov %r14, KPTI_TR_CR3(%rsp); \ 196 \ 197 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 198 je 3f; \ 1991: \ 200 /* Change to the "kernel" %cr3 */ \ 201 mov KPTI_KCR3(%rsp), %r14; \ 202 cmp $0, %r14; \ 203 je 2f; \ 204 mov %r14, %cr3; \ 2052: \ 206 /* Get our cpu_t in %r13 */ \ 207 mov %rsp, %r13; \ 208 and $(~(MMU_PAGESIZE - 1)), %r13; \ 209 subq $CPU_KPTI_START, %r13; \ 210 /* Use top of the kthread stk */ \ 211 mov CPU_THREAD(%r13), %r14; \ 212 mov T_STACK(%r14), %r14; \ 213 addq $REGSIZE+MINFRAME, %r14; \ 214 jmp 4f; \ 2153: \ 216 /* Check the %rsp in the frame. */ \ 217 /* Is it above kernel base? */ \ 218 mov kpti_kbase, %r14; \ 219 cmp %r14, KPTI_RSP(%rsp); \ 220 jb 1b; \ 221 /* Use the %rsp from the trap frame */ \ 222 mov KPTI_RSP(%rsp), %r14; \ 223 and $(~0xf), %r14; \ 2244: \ 225 mov %rsp, %r13; \ 226 /* %r14 contains our destination stk */ \ 227 mov %r14, %rsp; \ 228 pushq KPTI_SS(%r13); \ 229 pushq KPTI_RSP(%r13); \ 230 pushq KPTI_RFLAGS(%r13); \ 231 pushq KPTI_CS(%r13); \ 232 pushq KPTI_RIP(%r13); \ 233 errpush; \ 234 mov KPTI_R14(%r13), %r14; \ 235 mov KPTI_R13(%r13), %r13 236 237#define INTERRUPT_TRAMPOLINE_NOERR \ 238 INTERRUPT_TRAMPOLINE_P(/**/) 239 240#define INTERRUPT_TRAMPOLINE \ 241 INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 242 243/* 244 * This is used for all interrupts that can plausibly be taken inside another 245 * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS). 246 * 247 * We also use this for #NP, even though it uses the standard IST: the 248 * additional %rsp checks below will catch when we get an exception doing an 249 * iret to userspace with a bad %cs/%ss. This appears as a kernel trap, and 250 * only later gets redirected via kern_gpfault(). 251 * 252 * We check for whether we took the interrupt while in another trampoline, in 253 * which case we need to use the kthread stack. 254 */ 255#define DBG_INTERRUPT_TRAMPOLINE_P(errpush) \ 256 pushq %r13; \ 257 pushq %r14; \ 258 subq $KPTI_R14, %rsp; \ 259 /* Check for clobbering */ \ 260 cmpq $0, KPTI_FLAG(%rsp); \ 261 je 1f; \ 262 /* Don't worry, this totally works */ \ 263 int $8; \ 2641: \ 265 movq $1, KPTI_FLAG(%rsp); \ 266 /* Save current %cr3. */ \ 267 mov %cr3, %r14; \ 268 mov %r14, KPTI_TR_CR3(%rsp); \ 269 \ 270 cmpw $KCS_SEL, KPTI_CS(%rsp); \ 271 je 4f; \ 2722: \ 273 /* Change to the "kernel" %cr3 */ \ 274 mov KPTI_KCR3(%rsp), %r14; \ 275 cmp $0, %r14; \ 276 je 3f; \ 277 mov %r14, %cr3; \ 2783: \ 279 /* Get our cpu_t in %r13 */ \ 280 mov %rsp, %r13; \ 281 and $(~(MMU_PAGESIZE - 1)), %r13; \ 282 subq $CPU_KPTI_START, %r13; \ 283 /* Use top of the kthread stk */ \ 284 mov CPU_THREAD(%r13), %r14; \ 285 mov T_STACK(%r14), %r14; \ 286 addq $REGSIZE+MINFRAME, %r14; \ 287 jmp 6f; \ 2884: \ 289 /* Check the %rsp in the frame. */ \ 290 /* Is it above kernel base? */ \ 291 /* If not, treat as user. */ \ 292 mov kpti_kbase, %r14; \ 293 cmp %r14, KPTI_RSP(%rsp); \ 294 jb 2b; \ 295 /* Is it within the kpti_frame page? */ \ 296 /* If it is, treat as user interrupt */ \ 297 mov %rsp, %r13; \ 298 and $(~(MMU_PAGESIZE - 1)), %r13; \ 299 mov KPTI_RSP(%rsp), %r14; \ 300 and $(~(MMU_PAGESIZE - 1)), %r14; \ 301 cmp %r13, %r14; \ 302 je 2b; \ 303 /* Were we in trampoline code? */ \ 304 leaq kpti_tramp_start, %r14; \ 305 cmp %r14, KPTI_RIP(%rsp); \ 306 jb 5f; \ 307 leaq kpti_tramp_end, %r14; \ 308 cmp %r14, KPTI_RIP(%rsp); \ 309 ja 5f; \ 310 /* If we were, change %cr3: we might */ \ 311 /* have interrupted before it did. */ \ 312 mov KPTI_KCR3(%rsp), %r14; \ 313 mov %r14, %cr3; \ 3145: \ 315 /* Use the %rsp from the trap frame */ \ 316 mov KPTI_RSP(%rsp), %r14; \ 317 and $(~0xf), %r14; \ 3186: \ 319 mov %rsp, %r13; \ 320 /* %r14 contains our destination stk */ \ 321 mov %r14, %rsp; \ 322 pushq KPTI_SS(%r13); \ 323 pushq KPTI_RSP(%r13); \ 324 pushq KPTI_RFLAGS(%r13); \ 325 pushq KPTI_CS(%r13); \ 326 pushq KPTI_RIP(%r13); \ 327 errpush; \ 328 mov KPTI_R14(%r13), %r14; \ 329 movq $0, KPTI_FLAG(%r13); \ 330 mov KPTI_R13(%r13), %r13 331 332#define DBG_INTERRUPT_TRAMPOLINE_NOERR \ 333 DBG_INTERRUPT_TRAMPOLINE_P(/**/) 334 335#define DBG_INTERRUPT_TRAMPOLINE \ 336 DBG_INTERRUPT_TRAMPOLINE_P(pushq KPTI_ERR(%r13)) 337 338 /* 339 * These labels (_start and _end) are used by trap.c to determine if 340 * we took an interrupt like an NMI during the return process. 341 */ 342.global tr_sysc_ret_start 343tr_sysc_ret_start: 344 345 /* 346 * Syscall return trampolines. 347 * 348 * These are expected to be called on the kernel %gs. tr_sysret[ql] are 349 * called after %rsp is changed back to the user value, so we have no 350 * stack to work with. tr_sysexit has a kernel stack (but has to 351 * preserve rflags, soooo). 352 */ 353 ENTRY_NP(tr_sysretq) 354 cmpq $1, kpti_enable 355 jne 1f 356 357 mov %r13, %gs:CPU_KPTI_R13 358 SET_USER_CR3(%r13) 359 mov %gs:CPU_KPTI_R13, %r13 360 /* Zero these to make sure they didn't leak from a kernel trap */ 361 movq $0, %gs:CPU_KPTI_R13 362 movq $0, %gs:CPU_KPTI_R14 3631: 364 swapgs 365 sysretq 366 SET_SIZE(tr_sysretq) 367 368 ENTRY_NP(tr_sysretl) 369 cmpq $1, kpti_enable 370 jne 1f 371 372 mov %r13, %gs:CPU_KPTI_R13 373 SET_USER_CR3(%r13) 374 mov %gs:CPU_KPTI_R13, %r13 375 /* Zero these to make sure they didn't leak from a kernel trap */ 376 movq $0, %gs:CPU_KPTI_R13 377 movq $0, %gs:CPU_KPTI_R14 3781: 379 SWAPGS 380 SYSRETL 381 SET_SIZE(tr_sysretl) 382 383 ENTRY_NP(tr_sysexit) 384 /* 385 * Note: we want to preserve RFLAGS across this branch, since sysexit 386 * (unlike sysret above) does not restore RFLAGS for us. 387 * 388 * We still have the real kernel stack (sysexit does restore that), so 389 * we can use pushfq/popfq. 390 */ 391 pushfq 392 393 cmpq $1, kpti_enable 394 jne 1f 395 396 /* Have to pop it back off now before we change %cr3! */ 397 popfq 398 mov %r13, %gs:CPU_KPTI_R13 399 SET_USER_CR3(%r13) 400 mov %gs:CPU_KPTI_R13, %r13 401 /* Zero these to make sure they didn't leak from a kernel trap */ 402 movq $0, %gs:CPU_KPTI_R13 403 movq $0, %gs:CPU_KPTI_R14 404 jmp 2f 4051: 406 popfq 4072: 408 swapgs 409 sti 410 SYSEXITL 411 SET_SIZE(tr_sysexit) 412 413.global tr_sysc_ret_end 414tr_sysc_ret_end: 415 416 /* 417 * Syscall entry trampolines. 418 */ 419 420#if DEBUG 421#define MK_SYSCALL_TRAMPOLINE(isr) \ 422 ENTRY_NP(tr_##isr); \ 423 swapgs; \ 424 mov %r13, %gs:CPU_KPTI_R13; \ 425 mov %cr3, %r13; \ 426 mov %r13, %gs:CPU_KPTI_TR_CR3; \ 427 mov %gs:CPU_KPTI_KCR3, %r13; \ 428 mov %r13, %cr3; \ 429 mov %gs:CPU_KPTI_R13, %r13; \ 430 swapgs; \ 431 jmp isr; \ 432 SET_SIZE(tr_##isr) 433#else 434#define MK_SYSCALL_TRAMPOLINE(isr) \ 435 ENTRY_NP(tr_##isr); \ 436 swapgs; \ 437 mov %r13, %gs:CPU_KPTI_R13; \ 438 mov %gs:CPU_KPTI_KCR3, %r13; \ 439 mov %r13, %cr3; \ 440 mov %gs:CPU_KPTI_R13, %r13; \ 441 swapgs; \ 442 jmp isr; \ 443 SET_SIZE(tr_##isr) 444#endif 445 446 MK_SYSCALL_TRAMPOLINE(sys_syscall) 447 MK_SYSCALL_TRAMPOLINE(sys_syscall32) 448 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall) 449 MK_SYSCALL_TRAMPOLINE(brand_sys_syscall32) 450 451 /* 452 * SYSENTER is special. The CPU is really not very helpful when it 453 * comes to preserving and restoring state with it, and as a result 454 * we have to do all of it by hand. So, since we want to preserve 455 * RFLAGS, we have to be very careful in these trampolines to not 456 * clobber any bits in it. That means no cmpqs or branches! 457 */ 458 ENTRY_NP(tr_sys_sysenter) 459 swapgs 460 mov %r13, %gs:CPU_KPTI_R13 461#if DEBUG 462 mov %cr3, %r13 463 mov %r13, %gs:CPU_KPTI_TR_CR3 464#endif 465 mov %gs:CPU_KPTI_KCR3, %r13 466 mov %r13, %cr3 467 mov %gs:CPU_KPTI_R13, %r13 468 jmp _sys_sysenter_post_swapgs 469 SET_SIZE(tr_sys_sysenter) 470 471 ENTRY_NP(tr_brand_sys_sysenter) 472 swapgs 473 mov %r13, %gs:CPU_KPTI_R13 474#if DEBUG 475 mov %cr3, %r13 476 mov %r13, %gs:CPU_KPTI_TR_CR3 477#endif 478 mov %gs:CPU_KPTI_KCR3, %r13 479 mov %r13, %cr3 480 mov %gs:CPU_KPTI_R13, %r13 481 jmp _brand_sys_sysenter_post_swapgs 482 SET_SIZE(tr_brand_sys_sysenter) 483 484#define MK_SYSCALL_INT_TRAMPOLINE(isr) \ 485 ENTRY_NP(tr_##isr); \ 486 swapgs; \ 487 mov %r13, %gs:CPU_KPTI_R13; \ 488 SET_KERNEL_CR3(%r13); \ 489 mov %gs:CPU_THREAD, %r13; \ 490 mov T_STACK(%r13), %r13; \ 491 addq $REGSIZE+MINFRAME, %r13; \ 492 mov %r13, %rsp; \ 493 pushq %gs:CPU_KPTI_SS; \ 494 pushq %gs:CPU_KPTI_RSP; \ 495 pushq %gs:CPU_KPTI_RFLAGS; \ 496 pushq %gs:CPU_KPTI_CS; \ 497 pushq %gs:CPU_KPTI_RIP; \ 498 mov %gs:CPU_KPTI_R13, %r13; \ 499 swapgs; \ 500 jmp isr; \ 501 SET_SIZE(tr_##isr) 502 503 MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int) 504 MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int) 505 506 /* 507 * Interrupt/trap return trampolines 508 */ 509 510.global tr_intr_ret_start 511tr_intr_ret_start: 512 513 ENTRY_NP(tr_iret_auto) 514 cmpq $1, kpti_enable 515 jne tr_iret_kernel 516 cmpw $KCS_SEL, T_FRAMERET_CS(%rsp) 517 je tr_iret_kernel 518 jmp tr_iret_user 519 SET_SIZE(tr_iret_auto) 520 521 ENTRY_NP(tr_iret_kernel) 522 /* 523 * Yes, this does nothing extra. But this way we know if we see iret 524 * elsewhere, then we've failed to properly consider trampolines there. 525 */ 526 iretq 527 SET_SIZE(tr_iret_kernel) 528 529 ENTRY_NP(tr_iret_user) 530#if DEBUG 531 /* 532 * Panic if we find CR0.TS set. We're still on the kernel stack and 533 * %cr3, but we do need to swap back to the kernel gs. (We don't worry 534 * about swapgs speculation here.) 535 */ 536 pushq %rax 537 mov %cr0, %rax 538 testq $CR0_TS, %rax 539 jz 1f 540 swapgs 541 popq %rax 542 leaq _bad_ts_panic_msg(%rip), %rdi 543 xorl %eax, %eax 544 pushq %rbp 545 movq %rsp, %rbp 546 call panic 5471: 548 popq %rax 549#endif 550 551 cmpq $1, kpti_enable 552 jne 1f 553 554 /* 555 * KPTI enabled: we're on the user gsbase at this point, so we 556 * need to swap back so we can pivot stacks. 557 * 558 * The swapgs lfence mitigation is probably not needed here 559 * since a mis-speculation of the above branch would imply KPTI 560 * is disabled, but we'll do so anyway. 561 */ 562 swapgs 563 lfence 564 mov %r13, %gs:CPU_KPTI_R13 565 PIVOT_KPTI_STK(%r13) 566 SET_USER_CR3(%r13) 567 mov %gs:CPU_KPTI_R13, %r13 568 /* Zero these to make sure they didn't leak from a kernel trap. */ 569 movq $0, %gs:CPU_KPTI_R13 570 movq $0, %gs:CPU_KPTI_R14 571 /* And back to user gsbase again. */ 572 swapgs 5731: 574 iretq 575 SET_SIZE(tr_iret_user) 576 577 /* 578 * This special return trampoline is for KDI's use only (with kmdb). 579 * 580 * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR 581 * instead. This trampoline runs after GSBASE has already been changed 582 * back to the userland value (so we can't use %gs). 583 * 584 * Instead, the caller gives us a pointer to the kpti_dbg frame in %r13. 585 * The KPTI_R13 member in the kpti_dbg has already been set to what the 586 * real %r13 should be before we IRET. 587 * 588 * Additionally, KDI keeps a copy of the incoming %cr3 value when it 589 * took an interrupt, and has put that back in the kpti_dbg area for us 590 * to use, so we don't do any sniffing of %cs here. This is important 591 * so that debugging code that changes %cr3 is possible. 592 */ 593 ENTRY_NP(tr_iret_kdi) 594 movq %r14, KPTI_R14(%r13) /* %r14 has to be preserved by us */ 595 596 movq %rsp, %r14 /* original %rsp is pointing at IRET frame */ 597 leaq KPTI_TOP(%r13), %rsp 598 pushq T_FRAMERET_SS(%r14) 599 pushq T_FRAMERET_RSP(%r14) 600 pushq T_FRAMERET_RFLAGS(%r14) 601 pushq T_FRAMERET_CS(%r14) 602 pushq T_FRAMERET_RIP(%r14) 603 604 movq KPTI_TR_CR3(%r13), %r14 605 movq %r14, %cr3 606 607 movq KPTI_R14(%r13), %r14 608 movq KPTI_R13(%r13), %r13 /* preserved by our caller */ 609 610 iretq 611 SET_SIZE(tr_iret_kdi) 612 613.global tr_intr_ret_end 614tr_intr_ret_end: 615 616 /* 617 * Interrupt/trap entry trampolines 618 */ 619 620 /* CPU pushed an error code, and ISR wants one */ 621#define MK_INTR_TRAMPOLINE(isr) \ 622 ENTRY_NP(tr_##isr); \ 623 INTERRUPT_TRAMPOLINE; \ 624 jmp isr; \ 625 SET_SIZE(tr_##isr) 626 627 /* CPU didn't push an error code, and ISR doesn't want one */ 628#define MK_INTR_TRAMPOLINE_NOERR(isr) \ 629 ENTRY_NP(tr_##isr); \ 630 push $0; \ 631 INTERRUPT_TRAMPOLINE_NOERR; \ 632 jmp isr; \ 633 SET_SIZE(tr_##isr) 634 635 /* CPU pushed an error code, and ISR wants one */ 636#define MK_DBG_INTR_TRAMPOLINE(isr) \ 637 ENTRY_NP(tr_##isr); \ 638 DBG_INTERRUPT_TRAMPOLINE; \ 639 jmp isr; \ 640 SET_SIZE(tr_##isr) 641 642 /* CPU didn't push an error code, and ISR doesn't want one */ 643#define MK_DBG_INTR_TRAMPOLINE_NOERR(isr) \ 644 ENTRY_NP(tr_##isr); \ 645 push $0; \ 646 DBG_INTERRUPT_TRAMPOLINE_NOERR; \ 647 jmp isr; \ 648 SET_SIZE(tr_##isr) 649 650 651 MK_INTR_TRAMPOLINE_NOERR(div0trap) 652 MK_DBG_INTR_TRAMPOLINE_NOERR(dbgtrap) 653 MK_DBG_INTR_TRAMPOLINE_NOERR(brktrap) 654 MK_INTR_TRAMPOLINE_NOERR(ovflotrap) 655 MK_INTR_TRAMPOLINE_NOERR(boundstrap) 656 MK_INTR_TRAMPOLINE_NOERR(invoptrap) 657 MK_INTR_TRAMPOLINE_NOERR(ndptrap) 658 MK_INTR_TRAMPOLINE(invtsstrap) 659 MK_DBG_INTR_TRAMPOLINE(segnptrap) 660 MK_DBG_INTR_TRAMPOLINE(stktrap) 661 MK_DBG_INTR_TRAMPOLINE(gptrap) 662 MK_DBG_INTR_TRAMPOLINE(pftrap) 663 MK_INTR_TRAMPOLINE_NOERR(resvtrap) 664 MK_INTR_TRAMPOLINE_NOERR(ndperr) 665 MK_INTR_TRAMPOLINE(achktrap) 666 MK_INTR_TRAMPOLINE_NOERR(xmtrap) 667 MK_INTR_TRAMPOLINE_NOERR(invaltrap) 668 MK_INTR_TRAMPOLINE_NOERR(fasttrap) 669 MK_INTR_TRAMPOLINE_NOERR(dtrace_ret) 670 671 /* 672 * These are special because they can interrupt other traps, and 673 * each other. We don't need to pivot their stacks, because they have 674 * dedicated IST stack space, but we need to change %cr3. 675 */ 676 ENTRY_NP(tr_nmiint) 677 pushq %r13 678 mov kpti_safe_cr3, %r13 679 mov %r13, %cr3 680 popq %r13 681 jmp nmiint 682 SET_SIZE(tr_nmiint) 683 684#if !defined(__xpv) 685 ENTRY_NP(tr_syserrtrap) 686 /* 687 * If we got here we should always have a zero error code pushed. 688 * The INT $0x8 instr doesn't seem to push one, though, which we use 689 * as an emergency panic in the other trampolines. So adjust things 690 * here. 691 */ 692 cmpq $0, (%rsp) 693 je 1f 694 pushq $0 6951: 696 pushq %r13 697 mov kpti_safe_cr3, %r13 698 mov %r13, %cr3 699 popq %r13 700 jmp syserrtrap 701 SET_SIZE(tr_syserrtrap) 702#endif 703 704 ENTRY_NP(tr_mcetrap) 705 pushq %r13 706 mov kpti_safe_cr3, %r13 707 mov %r13, %cr3 708 popq %r13 709 jmp mcetrap 710 SET_SIZE(tr_mcetrap) 711 712 /* 713 * Interrupts start at 32 714 */ 715#define MKIVCT(n) \ 716 ENTRY_NP(tr_ivct##n) \ 717 push $0; \ 718 INTERRUPT_TRAMPOLINE; \ 719 push $n - 0x20; \ 720 jmp cmnint; \ 721 SET_SIZE(tr_ivct##n) 722 723 MKIVCT(32); MKIVCT(33); MKIVCT(34); MKIVCT(35); 724 MKIVCT(36); MKIVCT(37); MKIVCT(38); MKIVCT(39); 725 MKIVCT(40); MKIVCT(41); MKIVCT(42); MKIVCT(43); 726 MKIVCT(44); MKIVCT(45); MKIVCT(46); MKIVCT(47); 727 MKIVCT(48); MKIVCT(49); MKIVCT(50); MKIVCT(51); 728 MKIVCT(52); MKIVCT(53); MKIVCT(54); MKIVCT(55); 729 MKIVCT(56); MKIVCT(57); MKIVCT(58); MKIVCT(59); 730 MKIVCT(60); MKIVCT(61); MKIVCT(62); MKIVCT(63); 731 MKIVCT(64); MKIVCT(65); MKIVCT(66); MKIVCT(67); 732 MKIVCT(68); MKIVCT(69); MKIVCT(70); MKIVCT(71); 733 MKIVCT(72); MKIVCT(73); MKIVCT(74); MKIVCT(75); 734 MKIVCT(76); MKIVCT(77); MKIVCT(78); MKIVCT(79); 735 MKIVCT(80); MKIVCT(81); MKIVCT(82); MKIVCT(83); 736 MKIVCT(84); MKIVCT(85); MKIVCT(86); MKIVCT(87); 737 MKIVCT(88); MKIVCT(89); MKIVCT(90); MKIVCT(91); 738 MKIVCT(92); MKIVCT(93); MKIVCT(94); MKIVCT(95); 739 MKIVCT(96); MKIVCT(97); MKIVCT(98); MKIVCT(99); 740 MKIVCT(100); MKIVCT(101); MKIVCT(102); MKIVCT(103); 741 MKIVCT(104); MKIVCT(105); MKIVCT(106); MKIVCT(107); 742 MKIVCT(108); MKIVCT(109); MKIVCT(110); MKIVCT(111); 743 MKIVCT(112); MKIVCT(113); MKIVCT(114); MKIVCT(115); 744 MKIVCT(116); MKIVCT(117); MKIVCT(118); MKIVCT(119); 745 MKIVCT(120); MKIVCT(121); MKIVCT(122); MKIVCT(123); 746 MKIVCT(124); MKIVCT(125); MKIVCT(126); MKIVCT(127); 747 MKIVCT(128); MKIVCT(129); MKIVCT(130); MKIVCT(131); 748 MKIVCT(132); MKIVCT(133); MKIVCT(134); MKIVCT(135); 749 MKIVCT(136); MKIVCT(137); MKIVCT(138); MKIVCT(139); 750 MKIVCT(140); MKIVCT(141); MKIVCT(142); MKIVCT(143); 751 MKIVCT(144); MKIVCT(145); MKIVCT(146); MKIVCT(147); 752 MKIVCT(148); MKIVCT(149); MKIVCT(150); MKIVCT(151); 753 MKIVCT(152); MKIVCT(153); MKIVCT(154); MKIVCT(155); 754 MKIVCT(156); MKIVCT(157); MKIVCT(158); MKIVCT(159); 755 MKIVCT(160); MKIVCT(161); MKIVCT(162); MKIVCT(163); 756 MKIVCT(164); MKIVCT(165); MKIVCT(166); MKIVCT(167); 757 MKIVCT(168); MKIVCT(169); MKIVCT(170); MKIVCT(171); 758 MKIVCT(172); MKIVCT(173); MKIVCT(174); MKIVCT(175); 759 MKIVCT(176); MKIVCT(177); MKIVCT(178); MKIVCT(179); 760 MKIVCT(180); MKIVCT(181); MKIVCT(182); MKIVCT(183); 761 MKIVCT(184); MKIVCT(185); MKIVCT(186); MKIVCT(187); 762 MKIVCT(188); MKIVCT(189); MKIVCT(190); MKIVCT(191); 763 MKIVCT(192); MKIVCT(193); MKIVCT(194); MKIVCT(195); 764 MKIVCT(196); MKIVCT(197); MKIVCT(198); MKIVCT(199); 765 MKIVCT(200); MKIVCT(201); MKIVCT(202); MKIVCT(203); 766 MKIVCT(204); MKIVCT(205); MKIVCT(206); MKIVCT(207); 767 MKIVCT(208); MKIVCT(209); MKIVCT(210); MKIVCT(211); 768 MKIVCT(212); MKIVCT(213); MKIVCT(214); MKIVCT(215); 769 MKIVCT(216); MKIVCT(217); MKIVCT(218); MKIVCT(219); 770 MKIVCT(220); MKIVCT(221); MKIVCT(222); MKIVCT(223); 771 MKIVCT(224); MKIVCT(225); MKIVCT(226); MKIVCT(227); 772 MKIVCT(228); MKIVCT(229); MKIVCT(230); MKIVCT(231); 773 MKIVCT(232); MKIVCT(233); MKIVCT(234); MKIVCT(235); 774 MKIVCT(236); MKIVCT(237); MKIVCT(238); MKIVCT(239); 775 MKIVCT(240); MKIVCT(241); MKIVCT(242); MKIVCT(243); 776 MKIVCT(244); MKIVCT(245); MKIVCT(246); MKIVCT(247); 777 MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251); 778 MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255); 779 780 /* 781 * We're PCIDE, but we don't have INVPCID. The only way to invalidate a 782 * PCID other than the current one, then, is to load its cr3 then 783 * invlpg. But loading kf_user_cr3 means we can longer access our 784 * caller's text mapping (or indeed, its stack). So this little helper 785 * has to live within our trampoline text region. 786 * 787 * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3) 788 */ 789 ENTRY_NP(tr_mmu_flush_user_range) 790 push %rbx 791 /* When we read cr3, it never has the NOINVL bit set. */ 792 mov %cr3, %rax 793 movq $CR3_NOINVL_BIT, %rbx 794 orq %rbx, %rax 795 796 mov %rcx, %cr3 797 add %rdi, %rsi 798.align ASM_ENTRY_ALIGN 7991: 800 invlpg (%rdi) 801 add %rdx, %rdi 802 cmp %rsi, %rdi 803 jb 1b 804 mov %rax, %cr3 805 pop %rbx 806 retq 807 SET_SIZE(tr_mmu_flush_user_range) 808 809.align MMU_PAGESIZE 810.global kpti_tramp_end 811kpti_tramp_end: 812 nop 813