1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 21965aae3SH. Peter Anvin #ifndef _ASM_X86_MMU_CONTEXT_H 31965aae3SH. Peter Anvin #define _ASM_X86_MMU_CONTEXT_H 4bb898558SAl Viro 5bb898558SAl Viro #include <asm/desc.h> 660063497SArun Sharma #include <linux/atomic.h> 7d17d8f9dSDave Hansen #include <linux/mm_types.h> 87d06d9c9SDave Hansen #include <linux/pkeys.h> 9d17d8f9dSDave Hansen 10d17d8f9dSDave Hansen #include <trace/events/tlb.h> 11d17d8f9dSDave Hansen 12bb898558SAl Viro #include <asm/pgalloc.h> 13bb898558SAl Viro #include <asm/tlbflush.h> 14bb898558SAl Viro #include <asm/paravirt.h> 15fe3d197fSDave Hansen #include <asm/mpx.h> 16*d97080ebSNadav Amit #include <asm/debugreg.h> 17f39681edSAndy Lutomirski 18f39681edSAndy Lutomirski extern atomic64_t last_mm_ctx_id; 19f39681edSAndy Lutomirski 20fdc0269eSJuergen Gross #ifndef CONFIG_PARAVIRT_XXL 21bb898558SAl Viro static inline void paravirt_activate_mm(struct mm_struct *prev, 22bb898558SAl Viro struct mm_struct *next) 23bb898558SAl Viro { 24bb898558SAl Viro } 25fdc0269eSJuergen Gross #endif /* !CONFIG_PARAVIRT_XXL */ 26bb898558SAl Viro 277911d3f7SAndy Lutomirski #ifdef CONFIG_PERF_EVENTS 28631fe154SDavidlohr Bueso 29631fe154SDavidlohr Bueso DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); 30a6673429SAndy Lutomirski 317911d3f7SAndy Lutomirski static inline void load_mm_cr4(struct mm_struct *mm) 327911d3f7SAndy Lutomirski { 33631fe154SDavidlohr Bueso if (static_branch_unlikely(&rdpmc_always_available_key) || 34a6673429SAndy Lutomirski atomic_read(&mm->context.perf_rdpmc_allowed)) 357911d3f7SAndy Lutomirski cr4_set_bits(X86_CR4_PCE); 367911d3f7SAndy Lutomirski else 377911d3f7SAndy Lutomirski cr4_clear_bits(X86_CR4_PCE); 387911d3f7SAndy Lutomirski } 397911d3f7SAndy Lutomirski #else 407911d3f7SAndy Lutomirski static inline void load_mm_cr4(struct mm_struct *mm) {} 417911d3f7SAndy Lutomirski #endif 427911d3f7SAndy Lutomirski 43a5b9e5a2SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 44bb898558SAl Viro /* 4537868fe1SAndy Lutomirski * ldt_structs can be allocated, used, and freed, but they are never 4637868fe1SAndy Lutomirski * modified while live. 4737868fe1SAndy Lutomirski */ 4837868fe1SAndy Lutomirski struct ldt_struct { 4937868fe1SAndy Lutomirski /* 5037868fe1SAndy Lutomirski * Xen requires page-aligned LDTs with special permissions. This is 5137868fe1SAndy Lutomirski * needed to prevent us from installing evil descriptors such as 5237868fe1SAndy Lutomirski * call gates. On native, we could merge the ldt_struct and LDT 5337868fe1SAndy Lutomirski * allocations, but it's not worth trying to optimize. 5437868fe1SAndy Lutomirski */ 5537868fe1SAndy Lutomirski struct desc_struct *entries; 56bbf79d21SBorislav Petkov unsigned int nr_entries; 57f55f0501SAndy Lutomirski 58f55f0501SAndy Lutomirski /* 59f55f0501SAndy Lutomirski * If PTI is in use, then the entries array is not mapped while we're 60f55f0501SAndy Lutomirski * in user mode. The whole array will be aliased at the addressed 61f55f0501SAndy Lutomirski * given by ldt_slot_va(slot). We use two slots so that we can allocate 62f55f0501SAndy Lutomirski * and map, and enable a new LDT without invalidating the mapping 63f55f0501SAndy Lutomirski * of an older, still-in-use LDT. 64f55f0501SAndy Lutomirski * 65f55f0501SAndy Lutomirski * slot will be -1 if this LDT doesn't have an alias mapping. 66f55f0501SAndy Lutomirski */ 67f55f0501SAndy Lutomirski int slot; 6837868fe1SAndy Lutomirski }; 6937868fe1SAndy Lutomirski 70f55f0501SAndy Lutomirski /* This is a multiple of PAGE_SIZE. */ 71f55f0501SAndy Lutomirski #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) 72f55f0501SAndy Lutomirski 73f55f0501SAndy Lutomirski static inline void *ldt_slot_va(int slot) 74f55f0501SAndy Lutomirski { 75f55f0501SAndy Lutomirski return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); 76f55f0501SAndy Lutomirski } 77f55f0501SAndy Lutomirski 78a5b9e5a2SAndy Lutomirski /* 79a5b9e5a2SAndy Lutomirski * Used for LDT copy/destruction. 80a5b9e5a2SAndy Lutomirski */ 81a4828f81SThomas Gleixner static inline void init_new_context_ldt(struct mm_struct *mm) 82a4828f81SThomas Gleixner { 83a4828f81SThomas Gleixner mm->context.ldt = NULL; 84a4828f81SThomas Gleixner init_rwsem(&mm->context.ldt_usr_sem); 85a4828f81SThomas Gleixner } 86a4828f81SThomas Gleixner int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); 8739a0526fSDave Hansen void destroy_context_ldt(struct mm_struct *mm); 88f55f0501SAndy Lutomirski void ldt_arch_exit_mmap(struct mm_struct *mm); 89a5b9e5a2SAndy Lutomirski #else /* CONFIG_MODIFY_LDT_SYSCALL */ 90a4828f81SThomas Gleixner static inline void init_new_context_ldt(struct mm_struct *mm) { } 91a4828f81SThomas Gleixner static inline int ldt_dup_context(struct mm_struct *oldmm, 92a5b9e5a2SAndy Lutomirski struct mm_struct *mm) 93a5b9e5a2SAndy Lutomirski { 94a5b9e5a2SAndy Lutomirski return 0; 95a5b9e5a2SAndy Lutomirski } 9639a0526fSDave Hansen static inline void destroy_context_ldt(struct mm_struct *mm) { } 97f55f0501SAndy Lutomirski static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } 98a5b9e5a2SAndy Lutomirski #endif 99a5b9e5a2SAndy Lutomirski 10037868fe1SAndy Lutomirski static inline void load_mm_ldt(struct mm_struct *mm) 10137868fe1SAndy Lutomirski { 102a5b9e5a2SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 10337868fe1SAndy Lutomirski struct ldt_struct *ldt; 10437868fe1SAndy Lutomirski 1053382290eSWill Deacon /* READ_ONCE synchronizes with smp_store_release */ 1063382290eSWill Deacon ldt = READ_ONCE(mm->context.ldt); 10737868fe1SAndy Lutomirski 10837868fe1SAndy Lutomirski /* 10937868fe1SAndy Lutomirski * Any change to mm->context.ldt is followed by an IPI to all 11037868fe1SAndy Lutomirski * CPUs with the mm active. The LDT will not be freed until 11137868fe1SAndy Lutomirski * after the IPI is handled by all such CPUs. This means that, 11237868fe1SAndy Lutomirski * if the ldt_struct changes before we return, the values we see 11337868fe1SAndy Lutomirski * will be safe, and the new values will be loaded before we run 11437868fe1SAndy Lutomirski * any user code. 11537868fe1SAndy Lutomirski * 11637868fe1SAndy Lutomirski * NB: don't try to convert this to use RCU without extreme care. 11737868fe1SAndy Lutomirski * We would still need IRQs off, because we don't want to change 11837868fe1SAndy Lutomirski * the local LDT after an IPI loaded a newer value than the one 11937868fe1SAndy Lutomirski * that we can see. 12037868fe1SAndy Lutomirski */ 12137868fe1SAndy Lutomirski 122f55f0501SAndy Lutomirski if (unlikely(ldt)) { 123f55f0501SAndy Lutomirski if (static_cpu_has(X86_FEATURE_PTI)) { 124f55f0501SAndy Lutomirski if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { 125f55f0501SAndy Lutomirski /* 126f55f0501SAndy Lutomirski * Whoops -- either the new LDT isn't mapped 127f55f0501SAndy Lutomirski * (if slot == -1) or is mapped into a bogus 128f55f0501SAndy Lutomirski * slot (if slot > 1). 129f55f0501SAndy Lutomirski */ 13037868fe1SAndy Lutomirski clear_LDT(); 131f55f0501SAndy Lutomirski return; 132f55f0501SAndy Lutomirski } 133f55f0501SAndy Lutomirski 134f55f0501SAndy Lutomirski /* 135f55f0501SAndy Lutomirski * If page table isolation is enabled, ldt->entries 136f55f0501SAndy Lutomirski * will not be mapped in the userspace pagetables. 137f55f0501SAndy Lutomirski * Tell the CPU to access the LDT through the alias 138f55f0501SAndy Lutomirski * at ldt_slot_va(ldt->slot). 139f55f0501SAndy Lutomirski */ 140f55f0501SAndy Lutomirski set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); 141f55f0501SAndy Lutomirski } else { 142f55f0501SAndy Lutomirski set_ldt(ldt->entries, ldt->nr_entries); 143f55f0501SAndy Lutomirski } 144f55f0501SAndy Lutomirski } else { 145f55f0501SAndy Lutomirski clear_LDT(); 146f55f0501SAndy Lutomirski } 147a5b9e5a2SAndy Lutomirski #else 148a5b9e5a2SAndy Lutomirski clear_LDT(); 149a5b9e5a2SAndy Lutomirski #endif 15073534258SAndy Lutomirski } 15173534258SAndy Lutomirski 15273534258SAndy Lutomirski static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 15373534258SAndy Lutomirski { 15473534258SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 15573534258SAndy Lutomirski /* 15673534258SAndy Lutomirski * Load the LDT if either the old or new mm had an LDT. 15773534258SAndy Lutomirski * 15873534258SAndy Lutomirski * An mm will never go from having an LDT to not having an LDT. Two 15973534258SAndy Lutomirski * mms never share an LDT, so we don't gain anything by checking to 16073534258SAndy Lutomirski * see whether the LDT changed. There's also no guarantee that 16173534258SAndy Lutomirski * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, 16273534258SAndy Lutomirski * then prev->context.ldt will also be non-NULL. 16373534258SAndy Lutomirski * 16473534258SAndy Lutomirski * If we really cared, we could optimize the case where prev == next 16573534258SAndy Lutomirski * and we're exiting lazy mode. Most of the time, if this happens, 16673534258SAndy Lutomirski * we don't actually need to reload LDTR, but modify_ldt() is mostly 16773534258SAndy Lutomirski * used by legacy code and emulators where we don't need this level of 16873534258SAndy Lutomirski * performance. 16973534258SAndy Lutomirski * 17073534258SAndy Lutomirski * This uses | instead of || because it generates better code. 17173534258SAndy Lutomirski */ 17273534258SAndy Lutomirski if (unlikely((unsigned long)prev->context.ldt | 17373534258SAndy Lutomirski (unsigned long)next->context.ldt)) 17473534258SAndy Lutomirski load_mm_ldt(next); 17573534258SAndy Lutomirski #endif 17637868fe1SAndy Lutomirski 17737868fe1SAndy Lutomirski DEBUG_LOCKS_WARN_ON(preemptible()); 17837868fe1SAndy Lutomirski } 17937868fe1SAndy Lutomirski 180b956575bSAndy Lutomirski void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 1816826c8ffSBrian Gerst 182a31e184eSDave Hansen /* 183a31e184eSDave Hansen * Init a new mm. Used on mm copies, like at fork() 184a31e184eSDave Hansen * and on mm's that are brand-new, like at execve(). 185a31e184eSDave Hansen */ 18639a0526fSDave Hansen static inline int init_new_context(struct task_struct *tsk, 18739a0526fSDave Hansen struct mm_struct *mm) 18839a0526fSDave Hansen { 189c2b3496bSPeter Zijlstra mutex_init(&mm->context.lock); 190c2b3496bSPeter Zijlstra 191f39681edSAndy Lutomirski mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 192f39681edSAndy Lutomirski atomic64_set(&mm->context.tlb_gen, 0); 193f39681edSAndy Lutomirski 194e8c24d3aSDave Hansen #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 195e8c24d3aSDave Hansen if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 1962fa9d1cfSDave Hansen /* pkey 0 is the default and allocated implicitly */ 197e8c24d3aSDave Hansen mm->context.pkey_allocation_map = 0x1; 198e8c24d3aSDave Hansen /* -1 means unallocated or invalid */ 199e8c24d3aSDave Hansen mm->context.execute_only_pkey = -1; 200e8c24d3aSDave Hansen } 201e8c24d3aSDave Hansen #endif 202a4828f81SThomas Gleixner init_new_context_ldt(mm); 203a4828f81SThomas Gleixner return 0; 20439a0526fSDave Hansen } 20539a0526fSDave Hansen static inline void destroy_context(struct mm_struct *mm) 20639a0526fSDave Hansen { 20739a0526fSDave Hansen destroy_context_ldt(mm); 20839a0526fSDave Hansen } 20939a0526fSDave Hansen 21069c0319aSAndy Lutomirski extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, 21169c0319aSAndy Lutomirski struct task_struct *tsk); 2126826c8ffSBrian Gerst 213078194f8SAndy Lutomirski extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 214078194f8SAndy Lutomirski struct task_struct *tsk); 215078194f8SAndy Lutomirski #define switch_mm_irqs_off switch_mm_irqs_off 216bb898558SAl Viro 217bb898558SAl Viro #define activate_mm(prev, next) \ 218bb898558SAl Viro do { \ 219bb898558SAl Viro paravirt_activate_mm((prev), (next)); \ 220bb898558SAl Viro switch_mm((prev), (next), NULL); \ 221bb898558SAl Viro } while (0); 222bb898558SAl Viro 2236826c8ffSBrian Gerst #ifdef CONFIG_X86_32 2246826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 2256826c8ffSBrian Gerst do { \ 226ccbeed3aSTejun Heo lazy_load_gs(0); \ 2276826c8ffSBrian Gerst } while (0) 2286826c8ffSBrian Gerst #else 2296826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 2306826c8ffSBrian Gerst do { \ 2316826c8ffSBrian Gerst load_gs_index(0); \ 2326826c8ffSBrian Gerst loadsegment(fs, 0); \ 2336826c8ffSBrian Gerst } while (0) 2346826c8ffSBrian Gerst #endif 235bb898558SAl Viro 236a31e184eSDave Hansen static inline void arch_dup_pkeys(struct mm_struct *oldmm, 237a31e184eSDave Hansen struct mm_struct *mm) 238a31e184eSDave Hansen { 239a31e184eSDave Hansen #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 240a31e184eSDave Hansen if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 241a31e184eSDave Hansen return; 242a31e184eSDave Hansen 243a31e184eSDave Hansen /* Duplicate the oldmm pkey state in mm: */ 244a31e184eSDave Hansen mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; 245a31e184eSDave Hansen mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; 246a31e184eSDave Hansen #endif 247a31e184eSDave Hansen } 248a31e184eSDave Hansen 249c10e83f5SThomas Gleixner static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 250a1ea1c03SDave Hansen { 251a31e184eSDave Hansen arch_dup_pkeys(oldmm, mm); 252a1ea1c03SDave Hansen paravirt_arch_dup_mmap(oldmm, mm); 253a4828f81SThomas Gleixner return ldt_dup_context(oldmm, mm); 254a1ea1c03SDave Hansen } 255a1ea1c03SDave Hansen 256a1ea1c03SDave Hansen static inline void arch_exit_mmap(struct mm_struct *mm) 257a1ea1c03SDave Hansen { 258a1ea1c03SDave Hansen paravirt_arch_exit_mmap(mm); 259f55f0501SAndy Lutomirski ldt_arch_exit_mmap(mm); 260a1ea1c03SDave Hansen } 261a1ea1c03SDave Hansen 262b0e9b09bSDave Hansen #ifdef CONFIG_X86_64 263b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 264b0e9b09bSDave Hansen { 26597f2645fSMasahiro Yamada return !IS_ENABLED(CONFIG_IA32_EMULATION) || 266b0e9b09bSDave Hansen !(mm->context.ia32_compat == TIF_IA32); 267b0e9b09bSDave Hansen } 268b0e9b09bSDave Hansen #else 269b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 270b0e9b09bSDave Hansen { 271b0e9b09bSDave Hansen return false; 272b0e9b09bSDave Hansen } 273b0e9b09bSDave Hansen #endif 274b0e9b09bSDave Hansen 275fe3d197fSDave Hansen static inline void arch_bprm_mm_init(struct mm_struct *mm, 276fe3d197fSDave Hansen struct vm_area_struct *vma) 277fe3d197fSDave Hansen { 278fe3d197fSDave Hansen mpx_mm_init(mm); 279fe3d197fSDave Hansen } 280fe3d197fSDave Hansen 2811de4fa14SDave Hansen static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, 2821de4fa14SDave Hansen unsigned long start, unsigned long end) 2831de4fa14SDave Hansen { 284c922228eSDave Hansen /* 285c922228eSDave Hansen * mpx_notify_unmap() goes and reads a rarely-hot 286c922228eSDave Hansen * cacheline in the mm_struct. That can be expensive 287c922228eSDave Hansen * enough to be seen in profiles. 288c922228eSDave Hansen * 289c922228eSDave Hansen * The mpx_notify_unmap() call and its contents have been 290c922228eSDave Hansen * observed to affect munmap() performance on hardware 291c922228eSDave Hansen * where MPX is not present. 292c922228eSDave Hansen * 293c922228eSDave Hansen * The unlikely() optimizes for the fast case: no MPX 294c922228eSDave Hansen * in the CPU, or no MPX use in the process. Even if 295c922228eSDave Hansen * we get this wrong (in the unlikely event that MPX 296c922228eSDave Hansen * is widely enabled on some system) the overhead of 297c922228eSDave Hansen * MPX itself (reading bounds tables) is expected to 298c922228eSDave Hansen * overwhelm the overhead of getting this unlikely() 299c922228eSDave Hansen * consistently wrong. 300c922228eSDave Hansen */ 301c922228eSDave Hansen if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) 3021de4fa14SDave Hansen mpx_notify_unmap(mm, vma, start, end); 3031de4fa14SDave Hansen } 3041de4fa14SDave Hansen 30533a709b2SDave Hansen /* 30633a709b2SDave Hansen * We only want to enforce protection keys on the current process 30733a709b2SDave Hansen * because we effectively have no access to PKRU for other 30833a709b2SDave Hansen * processes or any way to tell *which * PKRU in a threaded 30933a709b2SDave Hansen * process we could use. 31033a709b2SDave Hansen * 31133a709b2SDave Hansen * So do not enforce things if the VMA is not from the current 31233a709b2SDave Hansen * mm, or if we are in a kernel thread. 31333a709b2SDave Hansen */ 31433a709b2SDave Hansen static inline bool vma_is_foreign(struct vm_area_struct *vma) 31533a709b2SDave Hansen { 31633a709b2SDave Hansen if (!current->mm) 31733a709b2SDave Hansen return true; 31833a709b2SDave Hansen /* 31933a709b2SDave Hansen * Should PKRU be enforced on the access to this VMA? If 32033a709b2SDave Hansen * the VMA is from another process, then PKRU has no 32133a709b2SDave Hansen * relevance and should not be enforced. 32233a709b2SDave Hansen */ 32333a709b2SDave Hansen if (current->mm != vma->vm_mm) 32433a709b2SDave Hansen return true; 32533a709b2SDave Hansen 32633a709b2SDave Hansen return false; 32733a709b2SDave Hansen } 32833a709b2SDave Hansen 3291b2ee126SDave Hansen static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 330d61172b4SDave Hansen bool write, bool execute, bool foreign) 33133a709b2SDave Hansen { 332d61172b4SDave Hansen /* pkeys never affect instruction fetches */ 333d61172b4SDave Hansen if (execute) 334d61172b4SDave Hansen return true; 33533a709b2SDave Hansen /* allow access if the VMA is not one from this process */ 3361b2ee126SDave Hansen if (foreign || vma_is_foreign(vma)) 33733a709b2SDave Hansen return true; 33833a709b2SDave Hansen return __pkru_allows_pkey(vma_pkey(vma), write); 33933a709b2SDave Hansen } 34033a709b2SDave Hansen 34152a2af40SAndy Lutomirski /* 342d6e41f11SAndy Lutomirski * This can be used from process context to figure out what the value of 3436c690ee1SAndy Lutomirski * CR3 is without needing to do a (slow) __read_cr3(). 344d6e41f11SAndy Lutomirski * 345d6e41f11SAndy Lutomirski * It's intended to be used for code like KVM that sneakily changes CR3 346d6e41f11SAndy Lutomirski * and needs to restore it. It needs to be used very carefully. 347d6e41f11SAndy Lutomirski */ 348d6e41f11SAndy Lutomirski static inline unsigned long __get_current_cr3_fast(void) 349d6e41f11SAndy Lutomirski { 35050fb83a6SDave Hansen unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, 35147061a24SAndy Lutomirski this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 35210af6235SAndy Lutomirski 353d6e41f11SAndy Lutomirski /* For now, be very restrictive about when this can be called. */ 3544c07f904SRoman Kagan VM_WARN_ON(in_nmi() || preemptible()); 355d6e41f11SAndy Lutomirski 3566c690ee1SAndy Lutomirski VM_BUG_ON(cr3 != __read_cr3()); 357d6e41f11SAndy Lutomirski return cr3; 358d6e41f11SAndy Lutomirski } 359d6e41f11SAndy Lutomirski 360cefa929cSAndy Lutomirski typedef struct { 361cefa929cSAndy Lutomirski struct mm_struct *mm; 362cefa929cSAndy Lutomirski } temp_mm_state_t; 363cefa929cSAndy Lutomirski 364cefa929cSAndy Lutomirski /* 365cefa929cSAndy Lutomirski * Using a temporary mm allows to set temporary mappings that are not accessible 366cefa929cSAndy Lutomirski * by other CPUs. Such mappings are needed to perform sensitive memory writes 367cefa929cSAndy Lutomirski * that override the kernel memory protections (e.g., W^X), without exposing the 368cefa929cSAndy Lutomirski * temporary page-table mappings that are required for these write operations to 369cefa929cSAndy Lutomirski * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the 370cefa929cSAndy Lutomirski * mapping is torn down. 371cefa929cSAndy Lutomirski * 372cefa929cSAndy Lutomirski * Context: The temporary mm needs to be used exclusively by a single core. To 373cefa929cSAndy Lutomirski * harden security IRQs must be disabled while the temporary mm is 374cefa929cSAndy Lutomirski * loaded, thereby preventing interrupt handler bugs from overriding 375cefa929cSAndy Lutomirski * the kernel memory protection. 376cefa929cSAndy Lutomirski */ 377cefa929cSAndy Lutomirski static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) 378cefa929cSAndy Lutomirski { 379cefa929cSAndy Lutomirski temp_mm_state_t temp_state; 380cefa929cSAndy Lutomirski 381cefa929cSAndy Lutomirski lockdep_assert_irqs_disabled(); 382cefa929cSAndy Lutomirski temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); 383cefa929cSAndy Lutomirski switch_mm_irqs_off(NULL, mm, current); 384*d97080ebSNadav Amit 385*d97080ebSNadav Amit /* 386*d97080ebSNadav Amit * If breakpoints are enabled, disable them while the temporary mm is 387*d97080ebSNadav Amit * used. Userspace might set up watchpoints on addresses that are used 388*d97080ebSNadav Amit * in the temporary mm, which would lead to wrong signals being sent or 389*d97080ebSNadav Amit * crashes. 390*d97080ebSNadav Amit * 391*d97080ebSNadav Amit * Note that breakpoints are not disabled selectively, which also causes 392*d97080ebSNadav Amit * kernel breakpoints (e.g., perf's) to be disabled. This might be 393*d97080ebSNadav Amit * undesirable, but still seems reasonable as the code that runs in the 394*d97080ebSNadav Amit * temporary mm should be short. 395*d97080ebSNadav Amit */ 396*d97080ebSNadav Amit if (hw_breakpoint_active()) 397*d97080ebSNadav Amit hw_breakpoint_disable(); 398*d97080ebSNadav Amit 399cefa929cSAndy Lutomirski return temp_state; 400cefa929cSAndy Lutomirski } 401cefa929cSAndy Lutomirski 402cefa929cSAndy Lutomirski static inline void unuse_temporary_mm(temp_mm_state_t prev_state) 403cefa929cSAndy Lutomirski { 404cefa929cSAndy Lutomirski lockdep_assert_irqs_disabled(); 405cefa929cSAndy Lutomirski switch_mm_irqs_off(NULL, prev_state.mm, current); 406*d97080ebSNadav Amit 407*d97080ebSNadav Amit /* 408*d97080ebSNadav Amit * Restore the breakpoints if they were disabled before the temporary mm 409*d97080ebSNadav Amit * was loaded. 410*d97080ebSNadav Amit */ 411*d97080ebSNadav Amit if (hw_breakpoint_active()) 412*d97080ebSNadav Amit hw_breakpoint_restore(); 413cefa929cSAndy Lutomirski } 414cefa929cSAndy Lutomirski 4151965aae3SH. Peter Anvin #endif /* _ASM_X86_MMU_CONTEXT_H */ 416