1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _ASM_X86_MMU_CONTEXT_H 3 #define _ASM_X86_MMU_CONTEXT_H 4 5 #include <asm/desc.h> 6 #include <linux/atomic.h> 7 #include <linux/mm_types.h> 8 #include <linux/pkeys.h> 9 10 #include <trace/events/tlb.h> 11 12 #include <asm/pgalloc.h> 13 #include <asm/tlbflush.h> 14 #include <asm/paravirt.h> 15 #include <asm/debugreg.h> 16 17 extern atomic64_t last_mm_ctx_id; 18 19 #ifndef CONFIG_PARAVIRT_XXL 20 static inline void paravirt_activate_mm(struct mm_struct *prev, 21 struct mm_struct *next) 22 { 23 } 24 #endif /* !CONFIG_PARAVIRT_XXL */ 25 26 #ifdef CONFIG_PERF_EVENTS 27 28 DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); 29 DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); 30 31 static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) 32 { 33 if (static_branch_unlikely(&rdpmc_always_available_key) || 34 (!static_branch_unlikely(&rdpmc_never_available_key) && 35 atomic_read(&mm->context.perf_rdpmc_allowed))) 36 cr4_set_bits_irqsoff(X86_CR4_PCE); 37 else 38 cr4_clear_bits_irqsoff(X86_CR4_PCE); 39 } 40 #else 41 static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} 42 #endif 43 44 #ifdef CONFIG_MODIFY_LDT_SYSCALL 45 /* 46 * ldt_structs can be allocated, used, and freed, but they are never 47 * modified while live. 48 */ 49 struct ldt_struct { 50 /* 51 * Xen requires page-aligned LDTs with special permissions. This is 52 * needed to prevent us from installing evil descriptors such as 53 * call gates. On native, we could merge the ldt_struct and LDT 54 * allocations, but it's not worth trying to optimize. 55 */ 56 struct desc_struct *entries; 57 unsigned int nr_entries; 58 59 /* 60 * If PTI is in use, then the entries array is not mapped while we're 61 * in user mode. The whole array will be aliased at the addressed 62 * given by ldt_slot_va(slot). We use two slots so that we can allocate 63 * and map, and enable a new LDT without invalidating the mapping 64 * of an older, still-in-use LDT. 65 * 66 * slot will be -1 if this LDT doesn't have an alias mapping. 67 */ 68 int slot; 69 }; 70 71 /* 72 * Used for LDT copy/destruction. 73 */ 74 static inline void init_new_context_ldt(struct mm_struct *mm) 75 { 76 mm->context.ldt = NULL; 77 init_rwsem(&mm->context.ldt_usr_sem); 78 } 79 int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); 80 void destroy_context_ldt(struct mm_struct *mm); 81 void ldt_arch_exit_mmap(struct mm_struct *mm); 82 #else /* CONFIG_MODIFY_LDT_SYSCALL */ 83 static inline void init_new_context_ldt(struct mm_struct *mm) { } 84 static inline int ldt_dup_context(struct mm_struct *oldmm, 85 struct mm_struct *mm) 86 { 87 return 0; 88 } 89 static inline void destroy_context_ldt(struct mm_struct *mm) { } 90 static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } 91 #endif 92 93 #ifdef CONFIG_MODIFY_LDT_SYSCALL 94 extern void load_mm_ldt(struct mm_struct *mm); 95 extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); 96 #else 97 static inline void load_mm_ldt(struct mm_struct *mm) 98 { 99 clear_LDT(); 100 } 101 static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) 102 { 103 DEBUG_LOCKS_WARN_ON(preemptible()); 104 } 105 #endif 106 107 extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 108 109 /* 110 * Init a new mm. Used on mm copies, like at fork() 111 * and on mm's that are brand-new, like at execve(). 112 */ 113 static inline int init_new_context(struct task_struct *tsk, 114 struct mm_struct *mm) 115 { 116 mutex_init(&mm->context.lock); 117 118 mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); 119 atomic64_set(&mm->context.tlb_gen, 0); 120 121 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 122 if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { 123 /* pkey 0 is the default and allocated implicitly */ 124 mm->context.pkey_allocation_map = 0x1; 125 /* -1 means unallocated or invalid */ 126 mm->context.execute_only_pkey = -1; 127 } 128 #endif 129 init_new_context_ldt(mm); 130 return 0; 131 } 132 static inline void destroy_context(struct mm_struct *mm) 133 { 134 destroy_context_ldt(mm); 135 } 136 137 extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, 138 struct task_struct *tsk); 139 140 extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 141 struct task_struct *tsk); 142 #define switch_mm_irqs_off switch_mm_irqs_off 143 144 #define activate_mm(prev, next) \ 145 do { \ 146 paravirt_activate_mm((prev), (next)); \ 147 switch_mm((prev), (next), NULL); \ 148 } while (0); 149 150 #ifdef CONFIG_X86_32 151 #define deactivate_mm(tsk, mm) \ 152 do { \ 153 lazy_load_gs(0); \ 154 } while (0) 155 #else 156 #define deactivate_mm(tsk, mm) \ 157 do { \ 158 load_gs_index(0); \ 159 loadsegment(fs, 0); \ 160 } while (0) 161 #endif 162 163 static inline void arch_dup_pkeys(struct mm_struct *oldmm, 164 struct mm_struct *mm) 165 { 166 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 167 if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 168 return; 169 170 /* Duplicate the oldmm pkey state in mm: */ 171 mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; 172 mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; 173 #endif 174 } 175 176 static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 177 { 178 arch_dup_pkeys(oldmm, mm); 179 paravirt_arch_dup_mmap(oldmm, mm); 180 return ldt_dup_context(oldmm, mm); 181 } 182 183 static inline void arch_exit_mmap(struct mm_struct *mm) 184 { 185 paravirt_arch_exit_mmap(mm); 186 ldt_arch_exit_mmap(mm); 187 } 188 189 #ifdef CONFIG_X86_64 190 static inline bool is_64bit_mm(struct mm_struct *mm) 191 { 192 return !IS_ENABLED(CONFIG_IA32_EMULATION) || 193 !(mm->context.ia32_compat == TIF_IA32); 194 } 195 #else 196 static inline bool is_64bit_mm(struct mm_struct *mm) 197 { 198 return false; 199 } 200 #endif 201 202 static inline void arch_unmap(struct mm_struct *mm, unsigned long start, 203 unsigned long end) 204 { 205 } 206 207 /* 208 * We only want to enforce protection keys on the current process 209 * because we effectively have no access to PKRU for other 210 * processes or any way to tell *which * PKRU in a threaded 211 * process we could use. 212 * 213 * So do not enforce things if the VMA is not from the current 214 * mm, or if we are in a kernel thread. 215 */ 216 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 217 bool write, bool execute, bool foreign) 218 { 219 /* pkeys never affect instruction fetches */ 220 if (execute) 221 return true; 222 /* allow access if the VMA is not one from this process */ 223 if (foreign || vma_is_foreign(vma)) 224 return true; 225 return __pkru_allows_pkey(vma_pkey(vma), write); 226 } 227 228 /* 229 * This can be used from process context to figure out what the value of 230 * CR3 is without needing to do a (slow) __read_cr3(). 231 * 232 * It's intended to be used for code like KVM that sneakily changes CR3 233 * and needs to restore it. It needs to be used very carefully. 234 */ 235 static inline unsigned long __get_current_cr3_fast(void) 236 { 237 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, 238 this_cpu_read(cpu_tlbstate.loaded_mm_asid)); 239 240 /* For now, be very restrictive about when this can be called. */ 241 VM_WARN_ON(in_nmi() || preemptible()); 242 243 VM_BUG_ON(cr3 != __read_cr3()); 244 return cr3; 245 } 246 247 typedef struct { 248 struct mm_struct *mm; 249 } temp_mm_state_t; 250 251 /* 252 * Using a temporary mm allows to set temporary mappings that are not accessible 253 * by other CPUs. Such mappings are needed to perform sensitive memory writes 254 * that override the kernel memory protections (e.g., W^X), without exposing the 255 * temporary page-table mappings that are required for these write operations to 256 * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the 257 * mapping is torn down. 258 * 259 * Context: The temporary mm needs to be used exclusively by a single core. To 260 * harden security IRQs must be disabled while the temporary mm is 261 * loaded, thereby preventing interrupt handler bugs from overriding 262 * the kernel memory protection. 263 */ 264 static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) 265 { 266 temp_mm_state_t temp_state; 267 268 lockdep_assert_irqs_disabled(); 269 temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); 270 switch_mm_irqs_off(NULL, mm, current); 271 272 /* 273 * If breakpoints are enabled, disable them while the temporary mm is 274 * used. Userspace might set up watchpoints on addresses that are used 275 * in the temporary mm, which would lead to wrong signals being sent or 276 * crashes. 277 * 278 * Note that breakpoints are not disabled selectively, which also causes 279 * kernel breakpoints (e.g., perf's) to be disabled. This might be 280 * undesirable, but still seems reasonable as the code that runs in the 281 * temporary mm should be short. 282 */ 283 if (hw_breakpoint_active()) 284 hw_breakpoint_disable(); 285 286 return temp_state; 287 } 288 289 static inline void unuse_temporary_mm(temp_mm_state_t prev_state) 290 { 291 lockdep_assert_irqs_disabled(); 292 switch_mm_irqs_off(NULL, prev_state.mm, current); 293 294 /* 295 * Restore the breakpoints if they were disabled before the temporary mm 296 * was loaded. 297 */ 298 if (hw_breakpoint_active()) 299 hw_breakpoint_restore(); 300 } 301 302 #endif /* _ASM_X86_MMU_CONTEXT_H */ 303