1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 21965aae3SH. Peter Anvin #ifndef _ASM_X86_MMU_CONTEXT_H 31965aae3SH. Peter Anvin #define _ASM_X86_MMU_CONTEXT_H 4bb898558SAl Viro 5bb898558SAl Viro #include <asm/desc.h> 660063497SArun Sharma #include <linux/atomic.h> 7d17d8f9dSDave Hansen #include <linux/mm_types.h> 87d06d9c9SDave Hansen #include <linux/pkeys.h> 9d17d8f9dSDave Hansen 10d17d8f9dSDave Hansen #include <trace/events/tlb.h> 11d17d8f9dSDave Hansen 12bb898558SAl Viro #include <asm/pgalloc.h> 13bb898558SAl Viro #include <asm/tlbflush.h> 14bb898558SAl Viro #include <asm/paravirt.h> 15fe3d197fSDave Hansen #include <asm/mpx.h> 16d97080ebSNadav Amit #include <asm/debugreg.h> 17f39681edSAndy Lutomirski 18f39681edSAndy Lutomirski extern atomic64_t last_mm_ctx_id; 19f39681edSAndy Lutomirski 20fdc0269eSJuergen Gross #ifndef CONFIG_PARAVIRT_XXL 21bb898558SAl Viro static inline void paravirt_activate_mm(struct mm_struct *prev, 22bb898558SAl Viro struct mm_struct *next) 23bb898558SAl Viro { 24bb898558SAl Viro } 25fdc0269eSJuergen Gross #endif /* !CONFIG_PARAVIRT_XXL */ 26bb898558SAl Viro 277911d3f7SAndy Lutomirski #ifdef CONFIG_PERF_EVENTS 28631fe154SDavidlohr Bueso 29405b4537SAnthony Steinhauser DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); 30631fe154SDavidlohr Bueso DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); 31a6673429SAndy Lutomirski 3221e450d2SJan Kiszka static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) 337911d3f7SAndy Lutomirski { 34631fe154SDavidlohr Bueso if (static_branch_unlikely(&rdpmc_always_available_key) || 35405b4537SAnthony Steinhauser (!static_branch_unlikely(&rdpmc_never_available_key) && 36405b4537SAnthony Steinhauser atomic_read(&mm->context.perf_rdpmc_allowed))) 3721e450d2SJan Kiszka cr4_set_bits_irqsoff(X86_CR4_PCE); 387911d3f7SAndy Lutomirski else 3921e450d2SJan Kiszka cr4_clear_bits_irqsoff(X86_CR4_PCE); 407911d3f7SAndy Lutomirski } 417911d3f7SAndy Lutomirski #else 4221e450d2SJan Kiszka static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} 437911d3f7SAndy Lutomirski #endif 447911d3f7SAndy Lutomirski 45a5b9e5a2SAndy Lutomirski #ifdef CONFIG_MODIFY_LDT_SYSCALL 46bb898558SAl Viro /* 4737868fe1SAndy Lutomirski * ldt_structs can be allocated, used, and freed, but they are never 4837868fe1SAndy Lutomirski * modified while live. 4937868fe1SAndy Lutomirski */ 5037868fe1SAndy Lutomirski struct ldt_struct { 5137868fe1SAndy Lutomirski /* 5237868fe1SAndy Lutomirski * Xen requires page-aligned LDTs with special permissions. This is 5337868fe1SAndy Lutomirski * needed to prevent us from installing evil descriptors such as 5437868fe1SAndy Lutomirski * call gates. On native, we could merge the ldt_struct and LDT 5537868fe1SAndy Lutomirski * allocations, but it's not worth trying to optimize. 5637868fe1SAndy Lutomirski */ 5737868fe1SAndy Lutomirski struct desc_struct *entries; 58bbf79d21SBorislav Petkov unsigned int nr_entries; 59f55f0501SAndy Lutomirski 60f55f0501SAndy Lutomirski /* 61f55f0501SAndy Lutomirski * If PTI is in use, then the entries array is not mapped while we're 62f55f0501SAndy Lutomirski * in user mode. The whole array will be aliased at the addressed 63f55f0501SAndy Lutomirski * given by ldt_slot_va(slot). We use two slots so that we can allocate 64f55f0501SAndy Lutomirski * and map, and enable a new LDT without invalidating the mapping 65f55f0501SAndy Lutomirski * of an older, still-in-use LDT. 66f55f0501SAndy Lutomirski * 67f55f0501SAndy Lutomirski * slot will be -1 if this LDT doesn't have an alias mapping. 68f55f0501SAndy Lutomirski */ 69f55f0501SAndy Lutomirski int slot; 7037868fe1SAndy Lutomirski }; 7137868fe1SAndy Lutomirski 72a5b9e5a2SAndy Lutomirski /* 73a5b9e5a2SAndy Lutomirski * Used for LDT copy/destruction. 74a5b9e5a2SAndy Lutomirski */ 75a4828f81SThomas Gleixner static inline void init_new_context_ldt(struct mm_struct *mm) 76a4828f81SThomas Gleixner { 77a4828f81SThomas Gleixner mm->context.ldt = NULL; 78a4828f81SThomas Gleixner init_rwsem(&mm->context.ldt_usr_sem); 79a4828f81SThomas Gleixner } 80a4828f81SThomas Gleixner int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); 8139a0526fSDave Hansen void destroy_context_ldt(struct mm_struct *mm); 82f55f0501SAndy Lutomirski void ldt_arch_exit_mmap(struct mm_struct *mm); 83a5b9e5a2SAndy Lutomirski #else /* CONFIG_MODIFY_LDT_SYSCALL */ 84a4828f81SThomas Gleixner static inline void init_new_context_ldt(struct mm_struct *mm) { } 85a4828f81SThomas Gleixner static inline int ldt_dup_context(struct mm_struct *oldmm, 86a5b9e5a2SAndy Lutomirski struct mm_struct *mm) 87a5b9e5a2SAndy Lutomirski { 88a5b9e5a2SAndy Lutomirski return 0; 89a5b9e5a2SAndy Lutomirski } 9039a0526fSDave Hansen static inline void destroy_context_ldt(struct mm_struct *mm) { } 91f55f0501SAndy Lutomirski static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } 92a5b9e5a2SAndy Lutomirski #endif 93a5b9e5a2SAndy Lutomirski 94*186525bdSIngo Molnar #ifdef CONFIG_MODIFY_LDT_SYSCALL 95*186525bdSIngo Molnar extern void load_mm_ldt(struct mm_struct *mm); 96*186525bdSIngo Molnar extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); 97*186525bdSIngo Molnar #else 9837868fe1SAndy Lutomirski static inline void load_mm_ldt(struct mm_struct *mm) 9937868fe1SAndy Lutomirski { 100f55f0501SAndy Lutomirski clear_LDT(); 101f55f0501SAndy Lutomirski } 10273534258SAndy Lutomirski static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 10373534258SAndy Lutomirski { 10437868fe1SAndy Lutomirski DEBUG_LOCKS_WARN_ON(preemptible()); 10537868fe1SAndy Lutomirski } 106*186525bdSIngo Molnar #endif 10737868fe1SAndy Lutomirski 108*186525bdSIngo Molnar extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 1096826c8ffSBrian Gerst 110a31e184eSDave Hansen /* 111a31e184eSDave Hansen * Init a new mm. Used on mm copies, like at fork() 112a31e184eSDave Hansen * and on mm's that are brand-new, like at execve(). 113a31e184eSDave Hansen */ 11439a0526fSDave Hansen static inline int init_new_context(struct task_struct *tsk, 11539a0526fSDave Hansen struct mm_struct *mm) 11639a0526fSDave Hansen { 117c2b3496bSPeter Zijlstra mutex_init(&mm->context.lock); 118c2b3496bSPeter Zijlstra 119f39681edSAndy Lutomirski mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 120f39681edSAndy Lutomirski atomic64_set(&mm->context.tlb_gen, 0); 121f39681edSAndy Lutomirski 122e8c24d3aSDave Hansen #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 123e8c24d3aSDave Hansen if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 1242fa9d1cfSDave Hansen /* pkey 0 is the default and allocated implicitly */ 125e8c24d3aSDave Hansen mm->context.pkey_allocation_map = 0x1; 126e8c24d3aSDave Hansen /* -1 means unallocated or invalid */ 127e8c24d3aSDave Hansen mm->context.execute_only_pkey = -1; 128e8c24d3aSDave Hansen } 129e8c24d3aSDave Hansen #endif 130a4828f81SThomas Gleixner init_new_context_ldt(mm); 131a4828f81SThomas Gleixner return 0; 13239a0526fSDave Hansen } 13339a0526fSDave Hansen static inline void destroy_context(struct mm_struct *mm) 13439a0526fSDave Hansen { 13539a0526fSDave Hansen destroy_context_ldt(mm); 13639a0526fSDave Hansen } 13739a0526fSDave Hansen 13869c0319aSAndy Lutomirski extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, 13969c0319aSAndy Lutomirski struct task_struct *tsk); 1406826c8ffSBrian Gerst 141078194f8SAndy Lutomirski extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 142078194f8SAndy Lutomirski struct task_struct *tsk); 143078194f8SAndy Lutomirski #define switch_mm_irqs_off switch_mm_irqs_off 144bb898558SAl Viro 145bb898558SAl Viro #define activate_mm(prev, next) \ 146bb898558SAl Viro do { \ 147bb898558SAl Viro paravirt_activate_mm((prev), (next)); \ 148bb898558SAl Viro switch_mm((prev), (next), NULL); \ 149bb898558SAl Viro } while (0); 150bb898558SAl Viro 1516826c8ffSBrian Gerst #ifdef CONFIG_X86_32 1526826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 1536826c8ffSBrian Gerst do { \ 154ccbeed3aSTejun Heo lazy_load_gs(0); \ 1556826c8ffSBrian Gerst } while (0) 1566826c8ffSBrian Gerst #else 1576826c8ffSBrian Gerst #define deactivate_mm(tsk, mm) \ 1586826c8ffSBrian Gerst do { \ 1596826c8ffSBrian Gerst load_gs_index(0); \ 1606826c8ffSBrian Gerst loadsegment(fs, 0); \ 1616826c8ffSBrian Gerst } while (0) 1626826c8ffSBrian Gerst #endif 163bb898558SAl Viro 164a31e184eSDave Hansen static inline void arch_dup_pkeys(struct mm_struct *oldmm, 165a31e184eSDave Hansen struct mm_struct *mm) 166a31e184eSDave Hansen { 167a31e184eSDave Hansen #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 168a31e184eSDave Hansen if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 169a31e184eSDave Hansen return; 170a31e184eSDave Hansen 171a31e184eSDave Hansen /* Duplicate the oldmm pkey state in mm: */ 172a31e184eSDave Hansen mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; 173a31e184eSDave Hansen mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; 174a31e184eSDave Hansen #endif 175a31e184eSDave Hansen } 176a31e184eSDave Hansen 177c10e83f5SThomas Gleixner static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 178a1ea1c03SDave Hansen { 179a31e184eSDave Hansen arch_dup_pkeys(oldmm, mm); 180a1ea1c03SDave Hansen paravirt_arch_dup_mmap(oldmm, mm); 181a4828f81SThomas Gleixner return ldt_dup_context(oldmm, mm); 182a1ea1c03SDave Hansen } 183a1ea1c03SDave Hansen 184a1ea1c03SDave Hansen static inline void arch_exit_mmap(struct mm_struct *mm) 185a1ea1c03SDave Hansen { 186a1ea1c03SDave Hansen paravirt_arch_exit_mmap(mm); 187f55f0501SAndy Lutomirski ldt_arch_exit_mmap(mm); 188a1ea1c03SDave Hansen } 189a1ea1c03SDave Hansen 190b0e9b09bSDave Hansen #ifdef CONFIG_X86_64 191b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 192b0e9b09bSDave Hansen { 19397f2645fSMasahiro Yamada return !IS_ENABLED(CONFIG_IA32_EMULATION) || 194b0e9b09bSDave Hansen !(mm->context.ia32_compat == TIF_IA32); 195b0e9b09bSDave Hansen } 196b0e9b09bSDave Hansen #else 197b0e9b09bSDave Hansen static inline bool is_64bit_mm(struct mm_struct *mm) 198b0e9b09bSDave Hansen { 199b0e9b09bSDave Hansen return false; 200b0e9b09bSDave Hansen } 201b0e9b09bSDave Hansen #endif 202b0e9b09bSDave Hansen 203fe3d197fSDave Hansen static inline void arch_bprm_mm_init(struct mm_struct *mm, 204fe3d197fSDave Hansen struct vm_area_struct *vma) 205fe3d197fSDave Hansen { 206fe3d197fSDave Hansen mpx_mm_init(mm); 207fe3d197fSDave Hansen } 208fe3d197fSDave Hansen 2095a28fc94SDave Hansen static inline void arch_unmap(struct mm_struct *mm, unsigned long start, 2105a28fc94SDave Hansen unsigned long end) 2111de4fa14SDave Hansen { 212c922228eSDave Hansen /* 213c922228eSDave Hansen * mpx_notify_unmap() goes and reads a rarely-hot 214c922228eSDave Hansen * cacheline in the mm_struct. That can be expensive 215c922228eSDave Hansen * enough to be seen in profiles. 216c922228eSDave Hansen * 217c922228eSDave Hansen * The mpx_notify_unmap() call and its contents have been 218c922228eSDave Hansen * observed to affect munmap() performance on hardware 219c922228eSDave Hansen * where MPX is not present. 220c922228eSDave Hansen * 221c922228eSDave Hansen * The unlikely() optimizes for the fast case: no MPX 222c922228eSDave Hansen * in the CPU, or no MPX use in the process. Even if 223c922228eSDave Hansen * we get this wrong (in the unlikely event that MPX 224c922228eSDave Hansen * is widely enabled on some system) the overhead of 225c922228eSDave Hansen * MPX itself (reading bounds tables) is expected to 226c922228eSDave Hansen * overwhelm the overhead of getting this unlikely() 227c922228eSDave Hansen * consistently wrong. 228c922228eSDave Hansen */ 229c922228eSDave Hansen if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) 2305a28fc94SDave Hansen mpx_notify_unmap(mm, start, end); 2311de4fa14SDave Hansen } 2321de4fa14SDave Hansen 23333a709b2SDave Hansen /* 23433a709b2SDave Hansen * We only want to enforce protection keys on the current process 23533a709b2SDave Hansen * because we effectively have no access to PKRU for other 23633a709b2SDave Hansen * processes or any way to tell *which * PKRU in a threaded 23733a709b2SDave Hansen * process we could use. 23833a709b2SDave Hansen * 23933a709b2SDave Hansen * So do not enforce things if the VMA is not from the current 24033a709b2SDave Hansen * mm, or if we are in a kernel thread. 24133a709b2SDave Hansen */ 24233a709b2SDave Hansen static inline bool vma_is_foreign(struct vm_area_struct *vma) 24333a709b2SDave Hansen { 24433a709b2SDave Hansen if (!current->mm) 24533a709b2SDave Hansen return true; 24633a709b2SDave Hansen /* 24733a709b2SDave Hansen * Should PKRU be enforced on the access to this VMA? If 24833a709b2SDave Hansen * the VMA is from another process, then PKRU has no 24933a709b2SDave Hansen * relevance and should not be enforced. 25033a709b2SDave Hansen */ 25133a709b2SDave Hansen if (current->mm != vma->vm_mm) 25233a709b2SDave Hansen return true; 25333a709b2SDave Hansen 25433a709b2SDave Hansen return false; 25533a709b2SDave Hansen } 25633a709b2SDave Hansen 2571b2ee126SDave Hansen static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 258d61172b4SDave Hansen bool write, bool execute, bool foreign) 25933a709b2SDave Hansen { 260d61172b4SDave Hansen /* pkeys never affect instruction fetches */ 261d61172b4SDave Hansen if (execute) 262d61172b4SDave Hansen return true; 26333a709b2SDave Hansen /* allow access if the VMA is not one from this process */ 2641b2ee126SDave Hansen if (foreign || vma_is_foreign(vma)) 26533a709b2SDave Hansen return true; 26633a709b2SDave Hansen return __pkru_allows_pkey(vma_pkey(vma), write); 26733a709b2SDave Hansen } 26833a709b2SDave Hansen 26952a2af40SAndy Lutomirski /* 270d6e41f11SAndy Lutomirski * This can be used from process context to figure out what the value of 2716c690ee1SAndy Lutomirski * CR3 is without needing to do a (slow) __read_cr3(). 272d6e41f11SAndy Lutomirski * 273d6e41f11SAndy Lutomirski * It's intended to be used for code like KVM that sneakily changes CR3 274d6e41f11SAndy Lutomirski * and needs to restore it. It needs to be used very carefully. 275d6e41f11SAndy Lutomirski */ 276d6e41f11SAndy Lutomirski static inline unsigned long __get_current_cr3_fast(void) 277d6e41f11SAndy Lutomirski { 27850fb83a6SDave Hansen unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, 27947061a24SAndy Lutomirski this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 28010af6235SAndy Lutomirski 281d6e41f11SAndy Lutomirski /* For now, be very restrictive about when this can be called. */ 2824c07f904SRoman Kagan VM_WARN_ON(in_nmi() || preemptible()); 283d6e41f11SAndy Lutomirski 2846c690ee1SAndy Lutomirski VM_BUG_ON(cr3 != __read_cr3()); 285d6e41f11SAndy Lutomirski return cr3; 286d6e41f11SAndy Lutomirski } 287d6e41f11SAndy Lutomirski 288cefa929cSAndy Lutomirski typedef struct { 289cefa929cSAndy Lutomirski struct mm_struct *mm; 290cefa929cSAndy Lutomirski } temp_mm_state_t; 291cefa929cSAndy Lutomirski 292cefa929cSAndy Lutomirski /* 293cefa929cSAndy Lutomirski * Using a temporary mm allows to set temporary mappings that are not accessible 294cefa929cSAndy Lutomirski * by other CPUs. Such mappings are needed to perform sensitive memory writes 295cefa929cSAndy Lutomirski * that override the kernel memory protections (e.g., W^X), without exposing the 296cefa929cSAndy Lutomirski * temporary page-table mappings that are required for these write operations to 297cefa929cSAndy Lutomirski * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the 298cefa929cSAndy Lutomirski * mapping is torn down. 299cefa929cSAndy Lutomirski * 300cefa929cSAndy Lutomirski * Context: The temporary mm needs to be used exclusively by a single core. To 301cefa929cSAndy Lutomirski * harden security IRQs must be disabled while the temporary mm is 302cefa929cSAndy Lutomirski * loaded, thereby preventing interrupt handler bugs from overriding 303cefa929cSAndy Lutomirski * the kernel memory protection. 304cefa929cSAndy Lutomirski */ 305cefa929cSAndy Lutomirski static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) 306cefa929cSAndy Lutomirski { 307cefa929cSAndy Lutomirski temp_mm_state_t temp_state; 308cefa929cSAndy Lutomirski 309cefa929cSAndy Lutomirski lockdep_assert_irqs_disabled(); 310cefa929cSAndy Lutomirski temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); 311cefa929cSAndy Lutomirski switch_mm_irqs_off(NULL, mm, current); 312d97080ebSNadav Amit 313d97080ebSNadav Amit /* 314d97080ebSNadav Amit * If breakpoints are enabled, disable them while the temporary mm is 315d97080ebSNadav Amit * used. Userspace might set up watchpoints on addresses that are used 316d97080ebSNadav Amit * in the temporary mm, which would lead to wrong signals being sent or 317d97080ebSNadav Amit * crashes. 318d97080ebSNadav Amit * 319d97080ebSNadav Amit * Note that breakpoints are not disabled selectively, which also causes 320d97080ebSNadav Amit * kernel breakpoints (e.g., perf's) to be disabled. This might be 321d97080ebSNadav Amit * undesirable, but still seems reasonable as the code that runs in the 322d97080ebSNadav Amit * temporary mm should be short. 323d97080ebSNadav Amit */ 324d97080ebSNadav Amit if (hw_breakpoint_active()) 325d97080ebSNadav Amit hw_breakpoint_disable(); 326d97080ebSNadav Amit 327cefa929cSAndy Lutomirski return temp_state; 328cefa929cSAndy Lutomirski } 329cefa929cSAndy Lutomirski 330cefa929cSAndy Lutomirski static inline void unuse_temporary_mm(temp_mm_state_t prev_state) 331cefa929cSAndy Lutomirski { 332cefa929cSAndy Lutomirski lockdep_assert_irqs_disabled(); 333cefa929cSAndy Lutomirski switch_mm_irqs_off(NULL, prev_state.mm, current); 334d97080ebSNadav Amit 335d97080ebSNadav Amit /* 336d97080ebSNadav Amit * Restore the breakpoints if they were disabled before the temporary mm 337d97080ebSNadav Amit * was loaded. 338d97080ebSNadav Amit */ 339d97080ebSNadav Amit if (hw_breakpoint_active()) 340d97080ebSNadav Amit hw_breakpoint_restore(); 341cefa929cSAndy Lutomirski } 342cefa929cSAndy Lutomirski 3431965aae3SH. Peter Anvin #endif /* _ASM_X86_MMU_CONTEXT_H */ 344