1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2014, The Linux Foundation. All rights reserved. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/mem_encrypt.h> 9 #include <linux/sched.h> 10 #include <linux/vmalloc.h> 11 #include <linux/pagewalk.h> 12 13 #include <asm/cacheflush.h> 14 #include <asm/pgtable-prot.h> 15 #include <asm/set_memory.h> 16 #include <asm/tlbflush.h> 17 #include <asm/kfence.h> 18 19 struct page_change_data { 20 pgprot_t set_mask; 21 pgprot_t clear_mask; 22 }; 23 24 static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk) 25 { 26 struct page_change_data *masks = walk->private; 27 28 /* 29 * Some users clear and set bits which alias each other (e.g. PTE_NG and 30 * PTE_PRESENT_INVALID). It is therefore important that we always clear 31 * first then set. 32 */ 33 val &= ~(pgprot_val(masks->clear_mask)); 34 val |= (pgprot_val(masks->set_mask)); 35 36 return val; 37 } 38 39 static int pageattr_pud_entry(pud_t *pud, unsigned long addr, 40 unsigned long next, struct mm_walk *walk) 41 { 42 pud_t val = pudp_get(pud); 43 44 if (pud_leaf(val)) { 45 if (WARN_ON_ONCE((next - addr) != PUD_SIZE)) 46 return -EINVAL; 47 val = __pud(set_pageattr_masks(pud_val(val), walk)); 48 set_pud(pud, val); 49 walk->action = ACTION_CONTINUE; 50 } 51 52 return 0; 53 } 54 55 static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, 56 unsigned long next, struct mm_walk *walk) 57 { 58 pmd_t val = pmdp_get(pmd); 59 60 if (pmd_leaf(val)) { 61 if (WARN_ON_ONCE((next - addr) != PMD_SIZE)) 62 return -EINVAL; 63 val = __pmd(set_pageattr_masks(pmd_val(val), walk)); 64 set_pmd(pmd, val); 65 walk->action = ACTION_CONTINUE; 66 } 67 68 return 0; 69 } 70 71 static int pageattr_pte_entry(pte_t *pte, unsigned long addr, 72 unsigned long next, struct mm_walk *walk) 73 { 74 pte_t val = __ptep_get(pte); 75 76 val = __pte(set_pageattr_masks(pte_val(val), walk)); 77 __set_pte(pte, val); 78 79 return 0; 80 } 81 82 static const struct mm_walk_ops pageattr_ops = { 83 .pud_entry = pageattr_pud_entry, 84 .pmd_entry = pageattr_pmd_entry, 85 .pte_entry = pageattr_pte_entry, 86 }; 87 88 bool rodata_full __ro_after_init = true; 89 90 bool can_set_direct_map(void) 91 { 92 /* 93 * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear 94 * map to be mapped at page granularity, so that it is possible to 95 * protect/unprotect single pages. 96 * 97 * KFENCE pool requires page-granular mapping if initialized late. 98 * 99 * Realms need to make pages shared/protected at page granularity. 100 */ 101 return rodata_full || debug_pagealloc_enabled() || 102 arm64_kfence_can_set_direct_map() || is_realm_world(); 103 } 104 105 static int update_range_prot(unsigned long start, unsigned long size, 106 pgprot_t set_mask, pgprot_t clear_mask) 107 { 108 struct page_change_data data; 109 int ret; 110 111 data.set_mask = set_mask; 112 data.clear_mask = clear_mask; 113 114 ret = split_kernel_leaf_mapping(start, start + size); 115 if (WARN_ON_ONCE(ret)) 116 return ret; 117 118 lazy_mmu_mode_enable(); 119 120 /* 121 * The caller must ensure that the range we are operating on does not 122 * partially overlap a block mapping, or a cont mapping. Any such case 123 * must be eliminated by splitting the mapping. 124 */ 125 ret = walk_kernel_page_table_range_lockless(start, start + size, 126 &pageattr_ops, NULL, &data); 127 lazy_mmu_mode_disable(); 128 129 return ret; 130 } 131 132 static int __change_memory_common(unsigned long start, unsigned long size, 133 pgprot_t set_mask, pgprot_t clear_mask) 134 { 135 int ret; 136 137 ret = update_range_prot(start, size, set_mask, clear_mask); 138 139 /* 140 * If the memory is being switched from present-invalid to valid without 141 * changing any other bits then a TLBI isn't required as a non-valid 142 * entry cannot be cached in the TLB. 143 */ 144 if (pgprot_val(set_mask) != PTE_PRESENT_VALID_KERNEL || 145 pgprot_val(clear_mask) != PTE_PRESENT_INVALID) 146 flush_tlb_kernel_range(start, start + size); 147 return ret; 148 } 149 150 static int change_memory_common(unsigned long addr, int numpages, 151 pgprot_t set_mask, pgprot_t clear_mask) 152 { 153 unsigned long start = addr; 154 unsigned long size = PAGE_SIZE * numpages; 155 unsigned long end = start + size; 156 struct vm_struct *area; 157 int ret; 158 159 if (!PAGE_ALIGNED(addr)) { 160 start &= PAGE_MASK; 161 end = start + size; 162 WARN_ON_ONCE(1); 163 } 164 165 /* 166 * Kernel VA mappings are always live, and splitting live section 167 * mappings into page mappings may cause TLB conflicts. This means 168 * we have to ensure that changing the permission bits of the range 169 * we are operating on does not result in such splitting. 170 * 171 * Let's restrict ourselves to mappings created by vmalloc (or vmap). 172 * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page 173 * mappings are updated and splitting is never needed. 174 * 175 * So check whether the [addr, addr + size) interval is entirely 176 * covered by precisely one VM area that has the VM_ALLOC flag set. 177 */ 178 area = find_vm_area((void *)addr); 179 if (!area || 180 ((unsigned long)kasan_reset_tag((void *)end) > 181 (unsigned long)kasan_reset_tag(area->addr) + area->size) || 182 ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) 183 return -EINVAL; 184 185 if (!numpages) 186 return 0; 187 188 /* 189 * If we are manipulating read-only permissions, apply the same 190 * change to the linear mapping of the pages that back this VM area. 191 */ 192 if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || 193 pgprot_val(clear_mask) == PTE_RDONLY)) { 194 unsigned long idx = ((unsigned long)kasan_reset_tag((void *)start) - 195 (unsigned long)kasan_reset_tag(area->addr)) 196 >> PAGE_SHIFT; 197 for (; numpages; idx++, numpages--) { 198 ret = __change_memory_common((u64)page_address(area->pages[idx]), 199 PAGE_SIZE, set_mask, clear_mask); 200 if (ret) 201 return ret; 202 } 203 } 204 205 /* 206 * Get rid of potentially aliasing lazily unmapped vm areas that may 207 * have permissions set that deviate from the ones we are setting here. 208 */ 209 vm_unmap_aliases(); 210 211 return __change_memory_common(start, size, set_mask, clear_mask); 212 } 213 214 int set_memory_ro(unsigned long addr, int numpages) 215 { 216 return change_memory_common(addr, numpages, 217 __pgprot(PTE_RDONLY), 218 __pgprot(PTE_WRITE)); 219 } 220 221 int set_memory_rw(unsigned long addr, int numpages) 222 { 223 return change_memory_common(addr, numpages, 224 __pgprot(PTE_WRITE), 225 __pgprot(PTE_RDONLY)); 226 } 227 228 int set_memory_nx(unsigned long addr, int numpages) 229 { 230 return change_memory_common(addr, numpages, 231 __pgprot(PTE_PXN), 232 __pgprot(PTE_MAYBE_GP)); 233 } 234 235 int set_memory_x(unsigned long addr, int numpages) 236 { 237 return change_memory_common(addr, numpages, 238 __pgprot(PTE_MAYBE_GP), 239 __pgprot(PTE_PXN)); 240 } 241 242 int set_memory_valid(unsigned long addr, int numpages, int enable) 243 { 244 if (enable) 245 return __change_memory_common(addr, PAGE_SIZE * numpages, 246 __pgprot(PTE_PRESENT_VALID_KERNEL), 247 __pgprot(PTE_PRESENT_INVALID)); 248 else 249 return __change_memory_common(addr, PAGE_SIZE * numpages, 250 __pgprot(PTE_PRESENT_INVALID), 251 __pgprot(PTE_PRESENT_VALID_KERNEL)); 252 } 253 254 int set_direct_map_invalid_noflush(struct page *page) 255 { 256 pgprot_t clear_mask = __pgprot(PTE_PRESENT_VALID_KERNEL); 257 pgprot_t set_mask = __pgprot(PTE_PRESENT_INVALID); 258 259 if (!can_set_direct_map()) 260 return 0; 261 262 return update_range_prot((unsigned long)page_address(page), 263 PAGE_SIZE, set_mask, clear_mask); 264 } 265 266 int set_direct_map_default_noflush(struct page *page) 267 { 268 pgprot_t set_mask = __pgprot(PTE_PRESENT_VALID_KERNEL | PTE_WRITE); 269 pgprot_t clear_mask = __pgprot(PTE_PRESENT_INVALID | PTE_RDONLY); 270 271 if (!can_set_direct_map()) 272 return 0; 273 274 return update_range_prot((unsigned long)page_address(page), 275 PAGE_SIZE, set_mask, clear_mask); 276 } 277 278 static int __set_memory_enc_dec(unsigned long addr, 279 int numpages, 280 bool encrypt) 281 { 282 unsigned long set_prot = 0, clear_prot = 0; 283 phys_addr_t start, end; 284 int ret; 285 286 if (!is_realm_world()) 287 return 0; 288 289 if (!__is_lm_address(addr)) 290 return -EINVAL; 291 292 start = __virt_to_phys(addr); 293 end = start + numpages * PAGE_SIZE; 294 295 if (encrypt) 296 clear_prot = PROT_NS_SHARED; 297 else 298 set_prot = PROT_NS_SHARED; 299 300 /* 301 * Break the mapping before we make any changes to avoid stale TLB 302 * entries or Synchronous External Aborts caused by RIPAS_EMPTY 303 */ 304 ret = __change_memory_common(addr, PAGE_SIZE * numpages, 305 __pgprot(set_prot | PTE_PRESENT_INVALID), 306 __pgprot(clear_prot | PTE_PRESENT_VALID_KERNEL)); 307 308 if (ret) 309 return ret; 310 311 if (encrypt) 312 ret = rsi_set_memory_range_protected(start, end); 313 else 314 ret = rsi_set_memory_range_shared(start, end); 315 316 if (ret) 317 return ret; 318 319 return __change_memory_common(addr, PAGE_SIZE * numpages, 320 __pgprot(PTE_PRESENT_VALID_KERNEL), 321 __pgprot(PTE_PRESENT_INVALID)); 322 } 323 324 static int realm_set_memory_encrypted(unsigned long addr, int numpages) 325 { 326 int ret = __set_memory_enc_dec(addr, numpages, true); 327 328 /* 329 * If the request to change state fails, then the only sensible cause 330 * of action for the caller is to leak the memory 331 */ 332 WARN(ret, "Failed to encrypt memory, %d pages will be leaked", 333 numpages); 334 335 return ret; 336 } 337 338 static int realm_set_memory_decrypted(unsigned long addr, int numpages) 339 { 340 int ret = __set_memory_enc_dec(addr, numpages, false); 341 342 WARN(ret, "Failed to decrypt memory, %d pages will be leaked", 343 numpages); 344 345 return ret; 346 } 347 348 static const struct arm64_mem_crypt_ops realm_crypt_ops = { 349 .encrypt = realm_set_memory_encrypted, 350 .decrypt = realm_set_memory_decrypted, 351 }; 352 353 int realm_register_memory_enc_ops(void) 354 { 355 return arm64_mem_crypt_ops_register(&realm_crypt_ops); 356 } 357 358 int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) 359 { 360 unsigned long addr = (unsigned long)page_address(page); 361 362 if (!can_set_direct_map()) 363 return 0; 364 365 return set_memory_valid(addr, nr, valid); 366 } 367 368 #ifdef CONFIG_DEBUG_PAGEALLOC 369 /* 370 * This is - apart from the return value - doing the same 371 * thing as the new set_direct_map_valid_noflush() function. 372 * 373 * Unify? Explain the conceptual differences? 374 */ 375 void __kernel_map_pages(struct page *page, int numpages, int enable) 376 { 377 if (!can_set_direct_map()) 378 return; 379 380 set_memory_valid((unsigned long)page_address(page), numpages, enable); 381 } 382 #endif /* CONFIG_DEBUG_PAGEALLOC */ 383 384 /* 385 * This function is used to determine if a linear map page has been marked as 386 * not-valid. Walk the page table and check the PTE_VALID bit. 387 * 388 * Because this is only called on the kernel linear map, p?d_sect() implies 389 * p?d_present(). When debug_pagealloc is enabled, sections mappings are 390 * disabled. 391 */ 392 bool kernel_page_present(struct page *page) 393 { 394 pgd_t *pgdp; 395 p4d_t *p4dp; 396 pud_t *pudp, pud; 397 pmd_t *pmdp, pmd; 398 pte_t *ptep; 399 unsigned long addr = (unsigned long)page_address(page); 400 401 pgdp = pgd_offset_k(addr); 402 if (pgd_none(READ_ONCE(*pgdp))) 403 return false; 404 405 p4dp = p4d_offset(pgdp, addr); 406 if (p4d_none(READ_ONCE(*p4dp))) 407 return false; 408 409 pudp = pud_offset(p4dp, addr); 410 pud = READ_ONCE(*pudp); 411 if (pud_none(pud)) 412 return false; 413 if (pud_leaf(pud)) 414 return pud_valid(pud); 415 416 pmdp = pmd_offset(pudp, addr); 417 pmd = READ_ONCE(*pmdp); 418 if (pmd_none(pmd)) 419 return false; 420 if (pmd_leaf(pmd)) 421 return pmd_valid(pmd); 422 423 ptep = pte_offset_kernel(pmdp, addr); 424 return pte_valid(__ptep_get(ptep)); 425 } 426